Archived
1
0

each source now receives a configuration dictionary

This commit is contained in:
RTB 2014-06-05 16:30:48 +02:00
parent fb3c760249
commit ff3b81b813
5 changed files with 9 additions and 9 deletions

View File

@ -20,8 +20,8 @@ class ChemSpider(Source):
somewhere.
"""
def __init__(self):
Source.__init__(self)
def __init__(self, config):
Source.__init__(self, config)
website = 'http://www.chemspider.com/*'

View File

@ -24,8 +24,8 @@ class NIST(Source):
ignore_list = set()
def __init__(self):
Source.__init__(self)
def __init__(self, config):
Source.__init__(self, config)
def parse(self, response):
sel = Selector(response)

View File

@ -19,8 +19,8 @@ class WikipediaParser(Source):
__spider = None
searched_compounds = []
def __init__(self):
Source.__init__(self)
def __init__(self, config):
Source.__init__(self, config)
def parse(self, response):
""" Distributes the above described behaviour """
@ -116,4 +116,4 @@ class WikipediaParser(Source):
""" find external links, named 'Identifiers' to different sources. """
links = sel.xpath('//span[contains(concat(" ",normalize-space(@class)," "),"reflink")]/a'
'[contains(concat(" ",normalize-space(@class)," "),"external")]/@href').extract()
return links
return links

View File

@ -6,7 +6,7 @@ class Source:
website = "http://something/*" # Regex of URI's the source is able to parse
_spider = None
def __init__(self):
def __init__(self, config):
"""
Initiation of a new Source
"""

View File

@ -29,7 +29,7 @@ class SourceLoader:
sourcecfg = dict()
if config.has_section(cls.__name__):
sourcecfg = dict(config.items(cls.__name__))
self.sources.append(cls()) # [review] - Would we ever need arguments for the parsers?
self.sources.append(cls(sourcecfg))
known_parser.add(cls)
def include(self, source_names):