Archived
1
0

Sources don't need to be mangled

This commit is contained in:
Jip J. Dekker 2014-06-04 18:34:31 +02:00
parent 1557d17877
commit f128c54312

View File

@ -9,7 +9,7 @@ class FourmiSpider(Spider):
A spider writen for the Fourmi Project which calls upon all available sources to request and scrape data. A spider writen for the Fourmi Project which calls upon all available sources to request and scrape data.
""" """
name = "FourmiSpider" name = "FourmiSpider"
__sources = [] _sources = []
synonyms = [] synonyms = []
def __init__(self, compound=None, selected_attributes=[".*"], *args, **kwargs): def __init__(self, compound=None, selected_attributes=[".*"], *args, **kwargs):
@ -29,7 +29,7 @@ class FourmiSpider(Spider):
:param response: A Scrapy Response object that should be parsed :param response: A Scrapy Response object that should be parsed
:return: A list of Result items and new Request to be handled by the scrapy core. :return: A list of Result items and new Request to be handled by the scrapy core.
""" """
for source in self.__sources: for source in self._sources:
if re.match(source.website, response.url): if re.match(source.website, response.url):
log.msg("Url: " + response.url + " -> Source: " + source.website, level=log.DEBUG) log.msg("Url: " + response.url + " -> Source: " + source.website, level=log.DEBUG)
return source.parse(response) return source.parse(response)
@ -42,7 +42,7 @@ class FourmiSpider(Spider):
:return: A list of Scrapy Request objects :return: A list of Scrapy Request objects
""" """
requests = [] requests = []
for parser in self.__sources: for parser in self._sources:
parser_requests = parser.new_compound_request(compound) parser_requests = parser.new_compound_request(compound)
if parser_requests is not None: if parser_requests is not None:
requests.append(parser_requests) requests.append(parser_requests)
@ -71,5 +71,5 @@ class FourmiSpider(Spider):
A function add a new Parser object to the list of available parsers. A function add a new Parser object to the list of available parsers.
:param source: A Source Object :param source: A Source Object
""" """
self.__sources.append(source) self._sources.append(source)
source.set_spider(self) source.set_spider(self)