Added a way for parsers to access the spider.
This commit is contained in:
parent
4d9e5307bf
commit
e39ed3b681
@ -5,8 +5,16 @@ class Parser:
|
|||||||
'''
|
'''
|
||||||
website should be an regular expression of websites you want to parse.
|
website should be an regular expression of websites you want to parse.
|
||||||
'''
|
'''
|
||||||
website = "http://localhost/*"
|
website = "http://something/*"
|
||||||
|
__spider = None
|
||||||
|
|
||||||
def parse(self, reponse):
|
def parse(self, reponse):
|
||||||
log.msg("The parse function of the empty parser was used.", level=log.WARNING)
|
log.msg("The parse function of the empty parser was used.", level=log.WARNING)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
|
def generate_search_url(self, compound):
|
||||||
|
# return website[:-1] + compound
|
||||||
|
pass
|
||||||
|
|
||||||
|
def set_spider(self, spider):
|
||||||
|
self.__spider = spider
|
||||||
|
@ -16,11 +16,12 @@ class FourmiSpider(Spider):
|
|||||||
if re.match(parser.website, reponse.url):
|
if re.match(parser.website, reponse.url):
|
||||||
log.msg("Url: " + reponse.url + " -> Parser: " + parser.website, level=log.DEBUG)
|
log.msg("Url: " + reponse.url + " -> Parser: " + parser.website, level=log.DEBUG)
|
||||||
return parser.parse(reponse)
|
return parser.parse(reponse)
|
||||||
return none
|
return None
|
||||||
|
|
||||||
|
|
||||||
def add_parser(self, parser):
|
|
||||||
self.parsers.append(parser)
|
|
||||||
|
|
||||||
def add_parsers(self, parsers):
|
def add_parsers(self, parsers):
|
||||||
self.parsers.extend(parsers)
|
for parser in parsers:
|
||||||
|
self.add_parser(parser)
|
||||||
|
|
||||||
|
def add_parser(self, parser):
|
||||||
|
self.parsers.add(parser)
|
||||||
|
parser.set_spider(self)
|
Reference in New Issue
Block a user