Added an structure to get requests for all websites for a new synonym
This commit is contained in:
parent
e39ed3b681
commit
f93dc2d160
@ -1,9 +1,10 @@
|
|||||||
from scrapy import log
|
from scrapy import log
|
||||||
|
from scrapy.http import Request
|
||||||
|
|
||||||
|
|
||||||
class Parser:
|
class Parser:
|
||||||
'''
|
'''
|
||||||
website should be an regular expression of websites you want to parse.
|
website should be an regular expression of the urls of request the parser is able to parse.
|
||||||
'''
|
'''
|
||||||
website = "http://something/*"
|
website = "http://something/*"
|
||||||
__spider = None
|
__spider = None
|
||||||
@ -12,8 +13,8 @@ class Parser:
|
|||||||
log.msg("The parse function of the empty parser was used.", level=log.WARNING)
|
log.msg("The parse function of the empty parser was used.", level=log.WARNING)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def generate_search_url(self, compound):
|
def new_compound_request(self, compound):
|
||||||
# return website[:-1] + compound
|
# return Request(url=self.website[:-1] + compound, callable=self.parse)
|
||||||
pass
|
pass
|
||||||
|
|
||||||
def set_spider(self, spider):
|
def set_spider(self, spider):
|
||||||
|
@ -5,7 +5,7 @@ import re
|
|||||||
|
|
||||||
class FourmiSpider(Spider):
|
class FourmiSpider(Spider):
|
||||||
name = "FourmiSpider"
|
name = "FourmiSpider"
|
||||||
parsers = []
|
__parsers = []
|
||||||
|
|
||||||
def __init__(self, compound=None, *args, **kwargs):
|
def __init__(self, compound=None, *args, **kwargs):
|
||||||
super(FourmiSpider, self).__init__(*args, **kwargs)
|
super(FourmiSpider, self).__init__(*args, **kwargs)
|
||||||
@ -18,10 +18,17 @@ class FourmiSpider(Spider):
|
|||||||
return parser.parse(reponse)
|
return parser.parse(reponse)
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
def get_synonym_requests(self, compound):
|
||||||
|
requests = []
|
||||||
|
for parser in self.parsers:
|
||||||
|
requests.append(parser.new_compound_request(compound))
|
||||||
|
return requests
|
||||||
|
|
||||||
|
|
||||||
def add_parsers(self, parsers):
|
def add_parsers(self, parsers):
|
||||||
for parser in parsers:
|
for parser in parsers:
|
||||||
self.add_parser(parser)
|
self.add_parser(parser)
|
||||||
|
|
||||||
def add_parser(self, parser):
|
def add_parser(self, parser):
|
||||||
self.parsers.add(parser)
|
self.__parsers.add(parser)
|
||||||
parser.set_spider(self)
|
parser.set_spider(self)
|
Reference in New Issue
Block a user