Added an structure to get requests for all websites for a new synonym
This commit is contained in:
parent
e39ed3b681
commit
f93dc2d160
@ -1,9 +1,10 @@
|
||||
from scrapy import log
|
||||
from scrapy.http import Request
|
||||
|
||||
|
||||
class Parser:
|
||||
'''
|
||||
website should be an regular expression of websites you want to parse.
|
||||
website should be an regular expression of the urls of request the parser is able to parse.
|
||||
'''
|
||||
website = "http://something/*"
|
||||
__spider = None
|
||||
@ -12,8 +13,8 @@ class Parser:
|
||||
log.msg("The parse function of the empty parser was used.", level=log.WARNING)
|
||||
pass
|
||||
|
||||
def generate_search_url(self, compound):
|
||||
# return website[:-1] + compound
|
||||
def new_compound_request(self, compound):
|
||||
# return Request(url=self.website[:-1] + compound, callable=self.parse)
|
||||
pass
|
||||
|
||||
def set_spider(self, spider):
|
||||
|
@ -5,7 +5,7 @@ import re
|
||||
|
||||
class FourmiSpider(Spider):
|
||||
name = "FourmiSpider"
|
||||
parsers = []
|
||||
__parsers = []
|
||||
|
||||
def __init__(self, compound=None, *args, **kwargs):
|
||||
super(FourmiSpider, self).__init__(*args, **kwargs)
|
||||
@ -18,10 +18,17 @@ class FourmiSpider(Spider):
|
||||
return parser.parse(reponse)
|
||||
return None
|
||||
|
||||
def get_synonym_requests(self, compound):
|
||||
requests = []
|
||||
for parser in self.parsers:
|
||||
requests.append(parser.new_compound_request(compound))
|
||||
return requests
|
||||
|
||||
|
||||
def add_parsers(self, parsers):
|
||||
for parser in parsers:
|
||||
self.add_parser(parser)
|
||||
|
||||
def add_parser(self, parser):
|
||||
self.parsers.add(parser)
|
||||
self.__parsers.add(parser)
|
||||
parser.set_spider(self)
|
Reference in New Issue
Block a user