diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py index 87a6ee7..fb51a4a 100644 --- a/FourmiCrawler/sources/ChemSpider.py +++ b/FourmiCrawler/sources/ChemSpider.py @@ -26,9 +26,12 @@ class ChemSpider(Source): structure = 'Chemical-Structure.%s.html' extendedinfo = 'MassSpecAPI.asmx/GetExtendedCompoundInfo?csid=%s&token=' - def __init__(self, config={}): + def __init__(self, config=None): Source.__init__(self, config) - self.cfg = config + if self.cfg is None: + self.cfg = {} + else: + self.cfg = config self.ignore_list = [] if 'token' not in self.cfg or self.cfg['token'] == '': log.msg('ChemSpider token not set or empty, search/MassSpec API ' diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py index 3c323ef..d71d08f 100644 --- a/FourmiCrawler/sources/NIST.py +++ b/FourmiCrawler/sources/NIST.py @@ -22,12 +22,13 @@ class NIST(Source): search = 'cgi/cbook.cgi?Name=%s&Units=SI&cTP=on' - cfg = {} - - def __init__(self, config={}): + def __init__(self, config=None): Source.__init__(self, config) self.ignore_list = set() - self.cfg = config + if config is None: + self.cfg = {} + else: + self.cfg = config def parse(self, response): sel = Selector(response) diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py index 4aa49b2..b995f30 100644 --- a/FourmiCrawler/sources/WikipediaParser.py +++ b/FourmiCrawler/sources/WikipediaParser.py @@ -1,9 +1,11 @@ +import re + from scrapy.http import Request from scrapy import log -from source import Source from scrapy.selector import Selector + +from source import Source from FourmiCrawler.items import Result -import re class WikipediaParser(Source): @@ -17,11 +19,13 @@ class WikipediaParser(Source): __spider = None searched_compounds = [] - cfg = {} - def __init__(self, config={}): + def __init__(self, config=None): Source.__init__(self, config) - self.cfg = config + if config is None: + self.cfg = {} + else: + self.cfg = config def parse(self, response): """ diff --git a/FourmiCrawler/sources/source.py b/FourmiCrawler/sources/source.py index a609bb9..fe36784 100644 --- a/FourmiCrawler/sources/source.py +++ b/FourmiCrawler/sources/source.py @@ -6,7 +6,7 @@ class Source: website = "http://something/*" # Regex of URI's the source is able to parse _spider = None - def __init__(self, config={}): + def __init__(self, config=None): """ Initiation of a new Source """ diff --git a/FourmiCrawler/spider.py b/FourmiCrawler/spider.py index 5c09f07..7552c7d 100644 --- a/FourmiCrawler/spider.py +++ b/FourmiCrawler/spider.py @@ -10,7 +10,7 @@ class FourmiSpider(Spider): """ name = "FourmiSpider" - def __init__(self, compound=None, selected_attributes=[".*"], *args, **kwargs): + def __init__(self, compound=None, selected_attributes=None, *args, **kwargs): """ Initiation of the Spider :param compound: compound that will be searched. @@ -20,7 +20,10 @@ class FourmiSpider(Spider): self.synonyms = set() super(FourmiSpider, self).__init__(*args, **kwargs) self.synonyms.add(compound) - self.selected_attributes = selected_attributes + if selected_attributes is None: + self.selected_attributes = [".*"] + else: + self.selected_attributes = selected_attributes def parse(self, response): """