Objective-C
Objective-C
Avoiding Outbound Links Matching Patterns
See more Spider Examples
The spider accumulates outbound links when crawling. Your program may specify any number of "avoid patterns" to prevent any link matching at least one of the wildcarded patterns from being added.Chilkat Objective-C Downloads
#import <CkoSpider.h>
#import <NSString.h>
BOOL success = NO;
CkoSpider *spider = [[CkoSpider alloc] init];
// --------------------------------------------------------------------
// Note: The URLs in this example are no longer valid.
// You should replace the URLs with URLs from a site of your
// own choosing -- preferably your own site if testing.
// (Google's Directory no longer exists.)
// --------------------------------------------------------------------
// First, we'll get the outbound links for a page in the
// Google directory. Then we'll add some avoid patterns
// and then re-fetch, to see it work...
[spider Initialize: @"directory.google.com"];
[spider AddUnspidered: @"http://directory.google.com/Top/Recreation/Food/Cheese/"];
success = [spider CrawlNext];
// Display the outbound links
int i;
NSString *url = 0;
for (i = 0; i <= [spider.NumOutboundLinks intValue] - 1; i++) {
NSLog(@"%@",[spider GetOutboundLink: [NSNumber numberWithInt: i]]);
}
// The output:
// http://www.cheese.com/
// http://www.cheesediaries.com/
// http://www.WisDairy.com/
// http://www.newenglandcheese.com
// http://www.ilovecheese.com
// http://www.cheesefromspain.com
// http://www.realcaliforniacheese.com/
// http://www.frencheese.co.uk/
// http://www.cheesesociety.org/
// http://www.specialcheese.com/queso.htm
// http://www.franceway.com/cheese/intro.htm
// http://www.foodsubs.com/Chesfirm.html
// http://www.cheeseboard.co.uk/
// http://www.thecheeseweb.com/
// http://www.vtcheese.com/
// http://www.coldbacon.com/cheese.html
// http://www.norwegiancheeses.co.uk/
// http://www.reluctantgourmet.com/cheese.htm
// http://www.lancewood.co.za/
// http://www.switzerlandcheese.ca
// http://www.frenchcheese.dk/
// http://www.dolcevita.com/cuisine/cheese/cheese.htm
// http://cheeseisland.net/
// http://www.cheestrings.ca/
// http://www.dreamcheese.co.uk
// http://hgic.clemson.edu/factsheets/HGIC3506.htm
// http://www.epicurious.com/cooking/how_to/food_dictionary/entry?id=1815
// http://www.mousetrapcheese.co.uk
// http://taquitos.net/yum/gc.shtml
// http://www.greek-recipe.com/static/greek-cheese
// http://www.park.org/Netherlands/pavilions/food_and_markets/cheese/introduction.html
// http://www.dairyfarmers.org/engl/recipes/4_1.asp
// http://www.prairieridgecheese.com/wischeesguid.html
// http://dmoz.org/cgi-bin/add.cgi?where=Recreation/Food/Cheese
// http://dmoz.org/about.html
// http://dmoz.org/cgi-bin/apply.cgi?where=Recreation/Food/Cheese
// Do it again, but this time with avoid patterns.
[spider Initialize: @"directory.google.com"];
[spider AddUnspidered: @"http://directory.google.com/Top/Recreation/Food/Cheese/"];
// Add some avoid patterns:
[spider AddAvoidOutboundLinkPattern: @"*dmoz.org*"];
[spider AddAvoidOutboundLinkPattern: @"*?id=*"];
[spider AddAvoidOutboundLinkPattern: @"*.co.uk*"];
success = [spider CrawlNext];
NSLog(@"%@",@"-----------------------");
// Display the outbound links
for (i = 0; i <= [spider.NumOutboundLinks intValue] - 1; i++) {
NSLog(@"%@",[spider GetOutboundLink: [NSNumber numberWithInt: i]]);
}
// Output:
// http://www.cheese.com/
// http://www.cheesediaries.com/
// http://www.WisDairy.com/
// http://www.newenglandcheese.com
// http://www.ilovecheese.com
// http://www.cheesefromspain.com
// http://www.realcaliforniacheese.com/
// http://www.cheesesociety.org/
// http://www.specialcheese.com/queso.htm
// http://www.franceway.com/cheese/intro.htm
// http://www.foodsubs.com/Chesfirm.html
// http://www.thecheeseweb.com/
// http://www.vtcheese.com/
// http://www.coldbacon.com/cheese.html
// http://www.reluctantgourmet.com/cheese.htm
// http://www.lancewood.co.za/
// http://www.switzerlandcheese.ca
// http://www.frenchcheese.dk/
// http://www.dolcevita.com/cuisine/cheese/cheese.htm
// http://cheeseisland.net/
// http://www.cheestrings.ca/
// http://hgic.clemson.edu/factsheets/HGIC3506.htm
// http://taquitos.net/yum/gc.shtml
// http://www.greek-recipe.com/static/greek-cheese
// http://www.park.org/Netherlands/pavilions/food_and_markets/cheese/introduction.html
// http://www.dairyfarmers.org/engl/recipes/4_1.asp
// http://www.prairieridgecheese.com/wischeesguid.htm