Archived
1
0
This repository has been archived on 2025-03-03. You can view files and clone it, but cannot push or open issues or pull requests.
2014-06-19 22:05:21 +02:00

42 lines
1.3 KiB
Python

from scrapy import log
# from scrapy.http import Request
class Source:
website = "http://something/.*" # Regex of URI's the source is able to parse
_spider = None
def __init__(self, config=None):
"""
Initiation of a new Source
"""
self.cfg = {}
if config is not None:
self.cfg = config
pass
def parse(self, response):
"""
This function should be able to parse all Scrapy Response objects with a URL matching the website Regex.
:param response: A Scrapy Response object
:return: A list of Result items and new Scrapy Requests
"""
log.msg("The parse function of the empty source was used.", level=log.WARNING)
pass
def new_compound_request(self, compound):
"""
This function should return a Scrapy Request for the given compound request.
:param compound: A compound name.
:return: A new Scrapy Request
"""
# return Request(url=self.website[:-2].replace("\\", "") + compound, callback=self.parse)
pass
def set_spider(self, spider):
"""
A Function to save the associated spider.
:param spider: A FourmiSpider object
"""
self._spider = spider