diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py index 8c0bd8b..147b9b1 100644 --- a/FourmiCrawler/sources/ChemSpider.py +++ b/FourmiCrawler/sources/ChemSpider.py @@ -20,8 +20,8 @@ class ChemSpider(Source): somewhere. """ - def __init__(self): - Source.__init__(self) + def __init__(self, config): + Source.__init__(self, config) website = 'http://www.chemspider.com/*' diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py index 6e8fabb..b125790 100644 --- a/FourmiCrawler/sources/NIST.py +++ b/FourmiCrawler/sources/NIST.py @@ -24,8 +24,8 @@ class NIST(Source): ignore_list = set() - def __init__(self): - Source.__init__(self) + def __init__(self, config): + Source.__init__(self, config) def parse(self, response): sel = Selector(response) diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py index 868b49f..56adc4c 100644 --- a/FourmiCrawler/sources/WikipediaParser.py +++ b/FourmiCrawler/sources/WikipediaParser.py @@ -19,8 +19,8 @@ class WikipediaParser(Source): __spider = None searched_compounds = [] - def __init__(self): - Source.__init__(self) + def __init__(self, config): + Source.__init__(self, config) def parse(self, response): """ Distributes the above described behaviour """ @@ -116,4 +116,4 @@ class WikipediaParser(Source): """ find external links, named 'Identifiers' to different sources. """ links = sel.xpath('//span[contains(concat(" ",normalize-space(@class)," "),"reflink")]/a' '[contains(concat(" ",normalize-space(@class)," "),"external")]/@href').extract() - return links \ No newline at end of file + return links diff --git a/FourmiCrawler/sources/source.py b/FourmiCrawler/sources/source.py index d289d72..603d91f 100644 --- a/FourmiCrawler/sources/source.py +++ b/FourmiCrawler/sources/source.py @@ -6,7 +6,7 @@ class Source: website = "http://something/*" # Regex of URI's the source is able to parse _spider = None - def __init__(self): + def __init__(self, config): """ Initiation of a new Source """ diff --git a/sourceloader.py b/sourceloader.py index 5ee22df..512ca7a 100644 --- a/sourceloader.py +++ b/sourceloader.py @@ -29,7 +29,7 @@ class SourceLoader: sourcecfg = dict() if config.has_section(cls.__name__): sourcecfg = dict(config.items(cls.__name__)) - self.sources.append(cls()) # [review] - Would we ever need arguments for the parsers? + self.sources.append(cls(sourcecfg)) known_parser.add(cls) def include(self, source_names):