From e39ed3b68139fa38449cc8948938b59b0d0ff9f1 Mon Sep 17 00:00:00 2001 From: "Jip J. Dekker" Date: Tue, 1 Apr 2014 20:56:32 +0200 Subject: [PATCH] Added a way for parsers to access the spider. --- FourmiCrawler/parsers/parser.py | 12 ++++++++++-- FourmiCrawler/spider.py | 13 +++++++------ 2 files changed, 17 insertions(+), 8 deletions(-) diff --git a/FourmiCrawler/parsers/parser.py b/FourmiCrawler/parsers/parser.py index 7097ee3..78d9dc1 100644 --- a/FourmiCrawler/parsers/parser.py +++ b/FourmiCrawler/parsers/parser.py @@ -5,8 +5,16 @@ class Parser: ''' website should be an regular expression of websites you want to parse. ''' - website = "http://localhost/*" + website = "http://something/*" + __spider = None def parse(self, reponse): log.msg("The parse function of the empty parser was used.", level=log.WARNING) - pass \ No newline at end of file + pass + + def generate_search_url(self, compound): + # return website[:-1] + compound + pass + + def set_spider(self, spider): + self.__spider = spider diff --git a/FourmiCrawler/spider.py b/FourmiCrawler/spider.py index d2711c4..9b356f8 100644 --- a/FourmiCrawler/spider.py +++ b/FourmiCrawler/spider.py @@ -16,11 +16,12 @@ class FourmiSpider(Spider): if re.match(parser.website, reponse.url): log.msg("Url: " + reponse.url + " -> Parser: " + parser.website, level=log.DEBUG) return parser.parse(reponse) - return none - - - def add_parser(self, parser): - self.parsers.append(parser) + return None def add_parsers(self, parsers): - self.parsers.extend(parsers) + for parser in parsers: + self.add_parser(parser) + + def add_parser(self, parser): + self.parsers.add(parser) + parser.set_spider(self) \ No newline at end of file