Archived
1
0

Added an structure to get requests for all websites for a new synonym

This commit is contained in:
Jip J. Dekker 2014-04-01 21:07:36 +02:00
parent e39ed3b681
commit f93dc2d160
2 changed files with 13 additions and 5 deletions

View File

@ -1,9 +1,10 @@
from scrapy import log
from scrapy.http import Request
class Parser:
'''
website should be an regular expression of websites you want to parse.
website should be an regular expression of the urls of request the parser is able to parse.
'''
website = "http://something/*"
__spider = None
@ -12,8 +13,8 @@ class Parser:
log.msg("The parse function of the empty parser was used.", level=log.WARNING)
pass
def generate_search_url(self, compound):
# return website[:-1] + compound
def new_compound_request(self, compound):
# return Request(url=self.website[:-1] + compound, callable=self.parse)
pass
def set_spider(self, spider):

View File

@ -5,7 +5,7 @@ import re
class FourmiSpider(Spider):
name = "FourmiSpider"
parsers = []
__parsers = []
def __init__(self, compound=None, *args, **kwargs):
super(FourmiSpider, self).__init__(*args, **kwargs)
@ -18,10 +18,17 @@ class FourmiSpider(Spider):
return parser.parse(reponse)
return None
def get_synonym_requests(self, compound):
requests = []
for parser in self.parsers:
requests.append(parser.new_compound_request(compound))
return requests
def add_parsers(self, parsers):
for parser in parsers:
self.add_parser(parser)
def add_parser(self, parser):
self.parsers.add(parser)
self.__parsers.add(parser)
parser.set_spider(self)