Archived
1
0

Added an structure to get requests for all websites for a new synonym

This commit is contained in:
Jip J. Dekker 2014-04-01 21:07:36 +02:00
parent e39ed3b681
commit f93dc2d160
2 changed files with 13 additions and 5 deletions

View File

@ -1,9 +1,10 @@
from scrapy import log from scrapy import log
from scrapy.http import Request
class Parser: class Parser:
''' '''
website should be an regular expression of websites you want to parse. website should be an regular expression of the urls of request the parser is able to parse.
''' '''
website = "http://something/*" website = "http://something/*"
__spider = None __spider = None
@ -12,8 +13,8 @@ class Parser:
log.msg("The parse function of the empty parser was used.", level=log.WARNING) log.msg("The parse function of the empty parser was used.", level=log.WARNING)
pass pass
def generate_search_url(self, compound): def new_compound_request(self, compound):
# return website[:-1] + compound # return Request(url=self.website[:-1] + compound, callable=self.parse)
pass pass
def set_spider(self, spider): def set_spider(self, spider):

View File

@ -5,7 +5,7 @@ import re
class FourmiSpider(Spider): class FourmiSpider(Spider):
name = "FourmiSpider" name = "FourmiSpider"
parsers = [] __parsers = []
def __init__(self, compound=None, *args, **kwargs): def __init__(self, compound=None, *args, **kwargs):
super(FourmiSpider, self).__init__(*args, **kwargs) super(FourmiSpider, self).__init__(*args, **kwargs)
@ -18,10 +18,17 @@ class FourmiSpider(Spider):
return parser.parse(reponse) return parser.parse(reponse)
return None return None
def get_synonym_requests(self, compound):
requests = []
for parser in self.parsers:
requests.append(parser.new_compound_request(compound))
return requests
def add_parsers(self, parsers): def add_parsers(self, parsers):
for parser in parsers: for parser in parsers:
self.add_parser(parser) self.add_parser(parser)
def add_parser(self, parser): def add_parser(self, parser):
self.parsers.add(parser) self.__parsers.add(parser)
parser.set_spider(self) parser.set_spider(self)