From 828928f1ddc23b24fd2cbfa3d225a8922bc5be1d Mon Sep 17 00:00:00 2001 From: RTB Date: Thu, 5 Jun 2014 15:55:01 +0200 Subject: [PATCH 01/40] added sources.cfg to git ignore --- .gitignore | 3 +++ 1 file changed, 3 insertions(+) diff --git a/.gitignore b/.gitignore index 158ef41..14c4e72 100644 --- a/.gitignore +++ b/.gitignore @@ -4,6 +4,9 @@ #Python Specific ignores *.pyc +#may contain authentication information +sources.cfg + #THINGS WE WOULD NEVER EVER WANT! #ignore thumbnails created by windows Thumbs.db From fb3c7602497d864b06a893f93ddedf5f965cb6c3 Mon Sep 17 00:00:00 2001 From: RTB Date: Thu, 5 Jun 2014 16:22:52 +0200 Subject: [PATCH 02/40] sourceloader now reads sources.cfg for source initialization --- sourceloader.py | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/sourceloader.py b/sourceloader.py index 2ed50a8..5ee22df 100644 --- a/sourceloader.py +++ b/sourceloader.py @@ -1,6 +1,7 @@ import inspect import os import re +import ConfigParser from FourmiCrawler.sources.source import Source @@ -17,11 +18,17 @@ class SourceLoader: path += "/" + rel_dir known_parser = set() + config = ConfigParser.ConfigParser() + config.read('sources.cfg') + for py in [f[:-3] for f in os.listdir(path) if f.endswith('.py') and f != '__init__.py']: mod = __import__('.'.join([rel_dir.replace("/", "."), py]), fromlist=[py]) classes = [getattr(mod, x) for x in dir(mod) if inspect.isclass(getattr(mod, x))] for cls in classes: if issubclass(cls, Source) and cls not in known_parser: + sourcecfg = dict() + if config.has_section(cls.__name__): + sourcecfg = dict(config.items(cls.__name__)) self.sources.append(cls()) # [review] - Would we ever need arguments for the parsers? known_parser.add(cls) @@ -55,4 +62,4 @@ class SourceLoader: string += "Source: " + src.__class__.__name__ string += " - " string += "URI: " + src.website + "\n" - return string \ No newline at end of file + return string From ff3b81b81375cfc2c2f23b0cc9236e1a7356ff47 Mon Sep 17 00:00:00 2001 From: RTB Date: Thu, 5 Jun 2014 16:30:48 +0200 Subject: [PATCH 03/40] each source now receives a configuration dictionary --- FourmiCrawler/sources/ChemSpider.py | 4 ++-- FourmiCrawler/sources/NIST.py | 4 ++-- FourmiCrawler/sources/WikipediaParser.py | 6 +++--- FourmiCrawler/sources/source.py | 2 +- sourceloader.py | 2 +- 5 files changed, 9 insertions(+), 9 deletions(-) diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py index 8c0bd8b..147b9b1 100644 --- a/FourmiCrawler/sources/ChemSpider.py +++ b/FourmiCrawler/sources/ChemSpider.py @@ -20,8 +20,8 @@ class ChemSpider(Source): somewhere. """ - def __init__(self): - Source.__init__(self) + def __init__(self, config): + Source.__init__(self, config) website = 'http://www.chemspider.com/*' diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py index 6e8fabb..b125790 100644 --- a/FourmiCrawler/sources/NIST.py +++ b/FourmiCrawler/sources/NIST.py @@ -24,8 +24,8 @@ class NIST(Source): ignore_list = set() - def __init__(self): - Source.__init__(self) + def __init__(self, config): + Source.__init__(self, config) def parse(self, response): sel = Selector(response) diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py index 868b49f..56adc4c 100644 --- a/FourmiCrawler/sources/WikipediaParser.py +++ b/FourmiCrawler/sources/WikipediaParser.py @@ -19,8 +19,8 @@ class WikipediaParser(Source): __spider = None searched_compounds = [] - def __init__(self): - Source.__init__(self) + def __init__(self, config): + Source.__init__(self, config) def parse(self, response): """ Distributes the above described behaviour """ @@ -116,4 +116,4 @@ class WikipediaParser(Source): """ find external links, named 'Identifiers' to different sources. """ links = sel.xpath('//span[contains(concat(" ",normalize-space(@class)," "),"reflink")]/a' '[contains(concat(" ",normalize-space(@class)," "),"external")]/@href').extract() - return links \ No newline at end of file + return links diff --git a/FourmiCrawler/sources/source.py b/FourmiCrawler/sources/source.py index d289d72..603d91f 100644 --- a/FourmiCrawler/sources/source.py +++ b/FourmiCrawler/sources/source.py @@ -6,7 +6,7 @@ class Source: website = "http://something/*" # Regex of URI's the source is able to parse _spider = None - def __init__(self): + def __init__(self, config): """ Initiation of a new Source """ diff --git a/sourceloader.py b/sourceloader.py index 5ee22df..512ca7a 100644 --- a/sourceloader.py +++ b/sourceloader.py @@ -29,7 +29,7 @@ class SourceLoader: sourcecfg = dict() if config.has_section(cls.__name__): sourcecfg = dict(config.items(cls.__name__)) - self.sources.append(cls()) # [review] - Would we ever need arguments for the parsers? + self.sources.append(cls(sourcecfg)) known_parser.add(cls) def include(self, source_names): From eb3eee77a0e898e7a8a424674600ebabcc045c5f Mon Sep 17 00:00:00 2001 From: RTB Date: Thu, 5 Jun 2014 16:50:13 +0200 Subject: [PATCH 04/40] updated function description for __init__ --- sourceloader.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/sourceloader.py b/sourceloader.py index 512ca7a..8a6f8b4 100644 --- a/sourceloader.py +++ b/sourceloader.py @@ -12,6 +12,8 @@ class SourceLoader: def __init__(self, rel_dir="FourmiCrawler/sources"): """ The initiation of a SourceLoader, selects and indexes a directory for usable sources. + Also loads a configuration file for Sources and passes the arguments in + the named section to the source :param rel_dir: A relative path to a directory. """ path = os.path.dirname(os.path.abspath(__file__)) From df4ba2f784643fb4095bb0ce3a501393cb3ee1a8 Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 6 Jun 2014 12:48:30 +0200 Subject: [PATCH 05/40] changed __init__ of all sources to have an empty dictionary as default config value --- FourmiCrawler/sources/ChemSpider.py | 2 +- FourmiCrawler/sources/NIST.py | 2 +- FourmiCrawler/sources/WikipediaParser.py | 2 +- FourmiCrawler/sources/source.py | 2 +- 4 files changed, 4 insertions(+), 4 deletions(-) diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py index 147b9b1..6332530 100644 --- a/FourmiCrawler/sources/ChemSpider.py +++ b/FourmiCrawler/sources/ChemSpider.py @@ -20,7 +20,7 @@ class ChemSpider(Source): somewhere. """ - def __init__(self, config): + def __init__(self, config={}): Source.__init__(self, config) website = 'http://www.chemspider.com/*' diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py index b125790..2d152e4 100644 --- a/FourmiCrawler/sources/NIST.py +++ b/FourmiCrawler/sources/NIST.py @@ -24,7 +24,7 @@ class NIST(Source): ignore_list = set() - def __init__(self, config): + def __init__(self, config={}): Source.__init__(self, config) def parse(self, response): diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py index 56adc4c..d0dfd03 100644 --- a/FourmiCrawler/sources/WikipediaParser.py +++ b/FourmiCrawler/sources/WikipediaParser.py @@ -19,7 +19,7 @@ class WikipediaParser(Source): __spider = None searched_compounds = [] - def __init__(self, config): + def __init__(self, config={}): Source.__init__(self, config) def parse(self, response): diff --git a/FourmiCrawler/sources/source.py b/FourmiCrawler/sources/source.py index 603d91f..a609bb9 100644 --- a/FourmiCrawler/sources/source.py +++ b/FourmiCrawler/sources/source.py @@ -6,7 +6,7 @@ class Source: website = "http://something/*" # Regex of URI's the source is able to parse _spider = None - def __init__(self, config): + def __init__(self, config={}): """ Initiation of a new Source """ From 217fb3e9cd27d25964bea5c29a6f023d37336b40 Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 6 Jun 2014 16:17:46 +0200 Subject: [PATCH 06/40] ChemSpider now uses the token from sources.cfg with checks --- FourmiCrawler/sources/ChemSpider.py | 28 ++++++++++++++++++---------- 1 file changed, 18 insertions(+), 10 deletions(-) diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py index 6332530..1d79019 100644 --- a/FourmiCrawler/sources/ChemSpider.py +++ b/FourmiCrawler/sources/ChemSpider.py @@ -9,7 +9,7 @@ from FourmiCrawler.items import Result # [TODO] - Maybe clean up usage of '.extract()[0]', because of possible IndexError exception. - +# [TODO] - Add checks at search request and extendedCompoundInfo on whether the token was valid or not class ChemSpider(Source): """ChemSpider scraper for synonyms and properties @@ -20,20 +20,28 @@ class ChemSpider(Source): somewhere. """ - def __init__(self, config={}): - Source.__init__(self, config) - website = 'http://www.chemspider.com/*' - # [TODO] - Save and access token of specific user. - search = ('Search.asmx/SimpleSearch?query=%s&token=' - '052bfd06-5ce4-43d6-bf12-89eabefd2338') + search = 'Search.asmx/SimpleSearch?query=%s&token=' structure = 'Chemical-Structure.%s.html' - extendedinfo = ('MassSpecAPI.asmx/GetExtendedCompoundInfo?csid=%s&token=' - '052bfd06-5ce4-43d6-bf12-89eabefd2338') + extendedinfo = 'MassSpecAPI.asmx/GetExtendedCompoundInfo?csid=%s&token=' + cfg = {} ignore_list = [] + def __init__(self, config={}): + Source.__init__(self, config) + self.cfg = config + if 'reliability' not in self.cfg: + log.msg('Reliability not set for ChemSpider', level=log.WARNING) + if 'token' not in self.cfg or self.cfg['token'] == '': + log.msg('ChemSpider token not set or empty, search/MassSpec API ' + 'not available', level=log.WARNING) + self.cfg['token'] = '' + self.search += self.cfg['token'] + self.extendedinfo += self.cfg['token'] + + def parse(self, response): sel = Selector(response) requests = [] @@ -224,7 +232,7 @@ class ChemSpider(Source): callback=self.parse_extendedinfo)] def new_compound_request(self, compound): - if compound in self.ignore_list: # [TODO] - add regular expression + if compound in self.ignore_list or self.cfg['token'] == '': return None searchurl = self.website[:-1] + self.search % compound log.msg('chemspider compound', level=log.DEBUG) From 755c981efa83adb97a8826b07e14431729d982f5 Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 6 Jun 2014 18:12:31 +0200 Subject: [PATCH 07/40] created newresult function that uses the config for reliability --- FourmiCrawler/sources/ChemSpider.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py index 1d79019..0357477 100644 --- a/FourmiCrawler/sources/ChemSpider.py +++ b/FourmiCrawler/sources/ChemSpider.py @@ -210,6 +210,15 @@ class ChemSpider(Source): properties.append(result) return properties + def newresult(self, attribute, value, conditions, source='ChemSpider'): + return Result({ + 'attribute': attribute, + 'value': value, + 'source': source, + 'reliability': self.cfg['reliability'], + 'conditions': conditions + }) + def parse_searchrequest(self, response): """Parse the initial response of the ChemSpider Search API """ sel = Selector(response) From b3b879d2adb9afa2cf703a7d641e05d75e2b170e Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 6 Jun 2014 18:17:55 +0200 Subject: [PATCH 08/40] updated parse_extendedinfo to use the newresult function --- FourmiCrawler/sources/ChemSpider.py | 14 ++++++-------- 1 file changed, 6 insertions(+), 8 deletions(-) diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py index 0357477..ce28930 100644 --- a/FourmiCrawler/sources/ChemSpider.py +++ b/FourmiCrawler/sources/ChemSpider.py @@ -199,18 +199,16 @@ class ChemSpider(Source): names = sel.xpath('*').xpath('name()').extract() values = sel.xpath('*').xpath('text()').extract() for (name, value) in zip(names, values): - result = Result({ - 'attribute': name, - 'value': value, # These values have no unit! - 'source': 'ChemSpider ExtendedCompoundInfo', - 'reliability': 'Unknown', - 'conditions': '' - }) + result = self.newresult( + attribute=name, + value=value, # These values have no unit! + source='ChemSpider ExtendedCompoundInfo', + ) if result['value']: properties.append(result) return properties - def newresult(self, attribute, value, conditions, source='ChemSpider'): + def newresult(self, attribute, value, conditions='', source='ChemSpider'): return Result({ 'attribute': attribute, 'value': value, From 0021953a9a0bf580287923a82fa7aee217da809d Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 6 Jun 2014 18:20:29 +0200 Subject: [PATCH 09/40] updated parse_properties to use newresult function --- FourmiCrawler/sources/ChemSpider.py | 26 +++++++++++--------------- 1 file changed, 11 insertions(+), 15 deletions(-) diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py index ce28930..38e7684 100644 --- a/FourmiCrawler/sources/ChemSpider.py +++ b/FourmiCrawler/sources/ChemSpider.py @@ -84,13 +84,12 @@ class ChemSpider(Source): prop_value = m.group(1) prop_conditions = m.group(2) - new_prop = Result({ - 'attribute': prop_name, - 'value': prop_value, - 'source': 'ChemSpider Predicted - ACD/Labs Tab', - 'reliability': 'Unknown', - 'conditions': prop_conditions - }) + new_prop = self.newresult( + attribute=prop_name, + value=prop_value, + source='ChemSpider Predicted - ACD/Labs Tab', + conditions=prop_conditions + ) properties.append(new_prop) log.msg('CS prop: |%s| |%s| |%s|' % (new_prop['attribute'], new_prop['value'], new_prop['source']), @@ -108,14 +107,11 @@ class ChemSpider(Source): if line.xpath('span/text()'): property_name = line.xpath('span/text()').extract()[0].rstrip() else: - new_prop = Result({ - 'attribute': property_name[:-1], - 'value': line.xpath('text()').extract()[0].rstrip(), - 'source': line.xpath( - 'strong/text()').extract()[0].rstrip(), - 'reliability': 'Unknown', - 'conditions': '' - }) + new_prop = self.newresult( + attribute=property_name[:-1], + value=line.xpath('text()').extract()[0].rstrip(), + source=line.xpath('strong/text()').extract()[0].rstrip(), + ) properties.append(new_prop) log.msg('CS prop: |%s| |%s| |%s|' % (new_prop['attribute'], new_prop['value'], From de21891bff3ae5bf567ebcb20fc57016af3fc9d1 Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 6 Jun 2014 18:52:18 +0200 Subject: [PATCH 10/40] created newresult function in NIST.py --- FourmiCrawler/sources/NIST.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py index 2d152e4..016b704 100644 --- a/FourmiCrawler/sources/NIST.py +++ b/FourmiCrawler/sources/NIST.py @@ -269,6 +269,15 @@ class NIST(Source): return results + def newresult(self, attribute, value, conditions=''): + return Result({ + 'attribute': attribute, + 'value': value, + 'source': 'NIST', + 'reliability': self.cfg['reliability'], + 'conditions': conditions + }) + def new_compound_request(self, compound): if compound not in self.ignore_list: self.ignore_list.update(compound) From e347b7538d1c77fe2ca612dd6847a8b45c7d1d09 Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 6 Jun 2014 18:54:38 +0200 Subject: [PATCH 11/40] updated parse_individidual_datapoints to use newresult function --- FourmiCrawler/sources/NIST.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py index 016b704..209df56 100644 --- a/FourmiCrawler/sources/NIST.py +++ b/FourmiCrawler/sources/NIST.py @@ -230,8 +230,7 @@ class NIST(Source): return results - @staticmethod - def parse_individual_datapoints(response): + def parse_individual_datapoints(self, response): """Parses the page linked from aggregate data""" sel = Selector(response) table = sel.xpath('//table[@class="data"]')[0] @@ -258,13 +257,11 @@ class NIST(Source): if m: uncertainty = '+- %s ' % m.group(1) # [TODO]: get the plusminus sign working in here - result = Result({ - 'attribute': name, - 'value': '%s %s%s' % (tds[0], uncertainty, unit), - 'source': 'NIST', - 'reliability': 'Unknown', - 'conditions': condition - }) + result = self.newresult( + attribute=name, + value='%s %s%s' % (tds[0], uncertainty, unit), + conditions=condition + ) results.append(result) return results From a272f9f6d6bd14e0345df2f70237d942d1bc1e70 Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 6 Jun 2014 18:58:33 +0200 Subject: [PATCH 12/40] updated parse_antoine_data to use newresult function --- FourmiCrawler/sources/NIST.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py index 209df56..f20d5ba 100644 --- a/FourmiCrawler/sources/NIST.py +++ b/FourmiCrawler/sources/NIST.py @@ -212,20 +212,17 @@ class NIST(Source): results.append(result) return results - @staticmethod - def parse_antoine_data(table, summary): + def parse_antoine_data(self, table, summary): """Parse table containing parameters for the Antione equation""" results = [] for tr in table.xpath('tr[td]'): tds = tr.xpath('td/text()').extract() - result = Result({ - 'attribute': summary, - 'value': 'A=%s, B=%s, C=%s' % (tds[1], tds[2], tds[3]), - 'source': 'NIST', - 'reliability': 'Unknown', - 'conditions': '%s K' % tds[0] - }) + result = self.newresult( + attribute=summary, + value='A=%s, B=%s, C=%s' % (tds[1], tds[2], tds[3]), + conditions='%s K' % tds[0] + ) results.append(result) return results From ed53889018889201984879ae933b8741b1dffa1d Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 6 Jun 2014 19:00:04 +0200 Subject: [PATCH 13/40] updated parse_generic_data to use newresult function --- FourmiCrawler/sources/NIST.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py index f20d5ba..1d8c91f 100644 --- a/FourmiCrawler/sources/NIST.py +++ b/FourmiCrawler/sources/NIST.py @@ -185,8 +185,7 @@ class NIST(Source): return results - @staticmethod - def parse_generic_data(table, summary): + def parse_generic_data(self, table, summary): """Parses the common tables of 4 and 5 rows. Assumes they are of the form: Symbol (unit)|Temperature (K)|Method|Reference|Comment @@ -202,13 +201,11 @@ class NIST(Source): for tr in table.xpath('tr[td]'): tds = tr.xpath('td/text()').extract() - result = Result({ - 'attribute': summary, - 'value': tds[0] + ' ' + unit, - 'source': 'NIST', - 'reliability': 'Unknown', - 'conditions': '%s K' % tds[1] - }) + result = self.newresult( + attribute=summary, + value=tds[0] + ' ' + unit, + conditions='%s K' % tds[1] + ) results.append(result) return results From c49d76cb660bda1b6dc441164a48a70bc1a3a07f Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 6 Jun 2014 19:03:51 +0200 Subject: [PATCH 14/40] updated parse_transition_data to use newresult function --- FourmiCrawler/sources/NIST.py | 15 ++++++--------- 1 file changed, 6 insertions(+), 9 deletions(-) diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py index 1d8c91f..19c18a5 100644 --- a/FourmiCrawler/sources/NIST.py +++ b/FourmiCrawler/sources/NIST.py @@ -161,8 +161,7 @@ class NIST(Source): results.append(result) return results - @staticmethod - def parse_transition_data(table, summary): + def parse_transition_data(self, table, summary): """Parses the table containing properties regarding phase changes""" results = [] @@ -174,13 +173,11 @@ class NIST(Source): for tr in table.xpath('tr[td]'): tds = tr.xpath('td/text()').extract() - result = Result({ - 'attribute': summary, - 'value': tds[0] + ' ' + unit, - 'source': 'NIST', - 'reliability': 'Unknown', - 'conditions': '%s K, (%s -> %s)' % (tds[1], tds[2], tds[3]) - }) + result = self.newresult( + attribute=summary, + value=tds[0] + ' ' + unit, + conditions='%s K, (%s -> %s)' % (tds[1], tds[2], tds[3]) + ) results.append(result) return results From 80770de5c0d42d31127b99e5aec3e4627dc46e5c Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 6 Jun 2014 19:06:22 +0200 Subject: [PATCH 15/40] updated parse_aggregate_data to use newresult function --- FourmiCrawler/sources/NIST.py | 12 +++++------- 1 file changed, 5 insertions(+), 7 deletions(-) diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py index 19c18a5..c684e2c 100644 --- a/FourmiCrawler/sources/NIST.py +++ b/FourmiCrawler/sources/NIST.py @@ -150,13 +150,11 @@ class NIST(Source): name = m.group(1) condition = m.group(2) - result = Result({ - 'attribute': name, - 'value': data[1] + ' ' + data[2], - 'source': 'NIST', - 'reliability': 'Unknown', - 'conditions': condition - }) + result = Result( + attribute=name, + value=data[1] + ' ' + data[2], + conditions=condition + ) log.msg('NIST: |%s|' % data, level=log.DEBUG) results.append(result) return results From a77eafe5130914aacdea630cb2c64c5994b54187 Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 6 Jun 2014 19:08:22 +0200 Subject: [PATCH 16/40] updated parse_generic_info to use newresult function --- FourmiCrawler/sources/NIST.py | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py index c684e2c..8f8338a 100644 --- a/FourmiCrawler/sources/NIST.py +++ b/FourmiCrawler/sources/NIST.py @@ -114,13 +114,10 @@ class NIST(Source): requests = [] for key, value in data.iteritems(): - result = Result({ - 'attribute': key, - 'value': value, - 'source': 'NIST', - 'reliability': 'Unknown', - 'conditions': '' - }) + result = self.newresult( + attribute=key, + value=value + ) requests.append(result) return requests From f6f5c5f6fe86eaaaac0a1bc6a32abecd52d0993b Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 6 Jun 2014 19:13:25 +0200 Subject: [PATCH 17/40] added config to NIST along with reliability check --- FourmiCrawler/sources/NIST.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py index 8f8338a..afafae1 100644 --- a/FourmiCrawler/sources/NIST.py +++ b/FourmiCrawler/sources/NIST.py @@ -23,9 +23,14 @@ class NIST(Source): search = 'cgi/cbook.cgi?Name=%s&Units=SI&cTP=on' ignore_list = set() + cfg = {} def __init__(self, config={}): Source.__init__(self, config) + self.cfg = config + if 'reliability' not in self.cfg or self.cfg['reliability'] == '': + log.msg('Reliability not set for NIST', level=log.WARNING) + self.cfg['reliability'] = '' def parse(self, response): sel = Selector(response) From 981615c6b3539bbad66d34c62b7bca19ed8a274f Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 6 Jun 2014 19:14:24 +0200 Subject: [PATCH 18/40] chemspider __init__ now sets reliability to empty string if it does not exist in config --- FourmiCrawler/sources/ChemSpider.py | 1 + 1 file changed, 1 insertion(+) diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py index 38e7684..faad02d 100644 --- a/FourmiCrawler/sources/ChemSpider.py +++ b/FourmiCrawler/sources/ChemSpider.py @@ -34,6 +34,7 @@ class ChemSpider(Source): self.cfg = config if 'reliability' not in self.cfg: log.msg('Reliability not set for ChemSpider', level=log.WARNING) + self.cfg['reliability'] = '' if 'token' not in self.cfg or self.cfg['token'] == '': log.msg('ChemSpider token not set or empty, search/MassSpec API ' 'not available', level=log.WARNING) From 68139b483931cbddb44091c1c958003d6e9619a1 Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 6 Jun 2014 19:27:27 +0200 Subject: [PATCH 19/40] added config to wikipedia along with reliability check --- FourmiCrawler/sources/WikipediaParser.py | 7 +++++++ 1 file changed, 7 insertions(+) diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py index d0dfd03..dd27e34 100644 --- a/FourmiCrawler/sources/WikipediaParser.py +++ b/FourmiCrawler/sources/WikipediaParser.py @@ -19,8 +19,15 @@ class WikipediaParser(Source): __spider = None searched_compounds = [] + cfg = {} + def __init__(self, config={}): Source.__init__(self, config) + self.cfg = config + if 'reliability' not in self.cfg or self.cfg['reliability'] == '': + log.msg('Reliability not set for Wikipedia', level=log.WARNING) + self.cfg['reliability'] = '' + def parse(self, response): """ Distributes the above described behaviour """ From 69664d3ac0fefa036ad5dfe99c2641a13a693ad8 Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 6 Jun 2014 19:30:31 +0200 Subject: [PATCH 20/40] added newrresult function to WikipediaParser.py --- FourmiCrawler/sources/WikipediaParser.py | 9 +++++++++ 1 file changed, 9 insertions(+) diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py index dd27e34..34b51c0 100644 --- a/FourmiCrawler/sources/WikipediaParser.py +++ b/FourmiCrawler/sources/WikipediaParser.py @@ -124,3 +124,12 @@ class WikipediaParser(Source): links = sel.xpath('//span[contains(concat(" ",normalize-space(@class)," "),"reflink")]/a' '[contains(concat(" ",normalize-space(@class)," "),"external")]/@href').extract() return links + + def newresult(self, attribute, value): + return Result({ + 'attribute': attribute, + 'value': value, + 'source': 'Wikipedia', + 'reliability': self.cfg['reliability'], + 'conditions': '' + }) From 30f00b676d0b1396b538eb95e0127bea2393c12b Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 6 Jun 2014 20:16:25 +0200 Subject: [PATCH 21/40] updated parse to use newresult function --- FourmiCrawler/sources/WikipediaParser.py | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py index 34b51c0..781d08f 100644 --- a/FourmiCrawler/sources/WikipediaParser.py +++ b/FourmiCrawler/sources/WikipediaParser.py @@ -28,7 +28,6 @@ class WikipediaParser(Source): log.msg('Reliability not set for Wikipedia', level=log.WARNING) self.cfg['reliability'] = '' - def parse(self, response): """ Distributes the above described behaviour """ log.msg('A response from %s just arrived!' % response.url, level=log.DEBUG) @@ -51,13 +50,10 @@ class WikipediaParser(Source): prop_names = tr_list[::2] prop_values = tr_list[1::2] for i, prop_name in enumerate(prop_names): - item = Result({ - 'attribute': prop_name.extract().encode('utf-8'), - 'value': prop_values[i].extract().encode('utf-8'), - 'source': "Wikipedia", - 'reliability': "Unknown", - 'conditions': "" - }) + item = self.newresult( + attribute=prop_name.extract().encode('utf-8'), + value=prop_values[i].extract().encode('utf-8') + ) items.append(item) log.msg('Wiki prop: |%s| |%s| |%s|' % (item['attribute'], item['value'], item['source']), level=log.DEBUG) @@ -68,13 +64,10 @@ class WikipediaParser(Source): log.msg('item: %s' % tablerow.xpath('./th').xpath('normalize-space(string())'), level=log.DEBUG) if tablerow.xpath('./th').xpath('normalize-space(string())') and tablerow.xpath('./td').xpath( 'normalize-space(string())'): - item = Result({ - 'attribute': tablerow.xpath('./th').xpath('normalize-space(string())').extract()[0].encode('utf-8'), - 'value': tablerow.xpath('./td').xpath('normalize-space(string())').extract()[0].encode('utf-8'), - 'source': "Wikipedia", - 'reliability': "Unknown", - 'conditions': "" - }) + item = self.newresult( + attribute=tablerow.xpath('./th').xpath('normalize-space(string())').extract()[0].encode('utf-8'), + value=tablerow.xpath('./td').xpath('normalize-space(string())').extract()[0].encode('utf-8'), + ) items.append(item) log.msg( 'Wiki prop: |attribute: %s| |value: %s| |%s|' % (item['attribute'], item['value'], item['source']), From 4eeabd7aba20ccfa1d73fc59a4a9ba38eb71c6df Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 6 Jun 2014 23:16:22 +0200 Subject: [PATCH 22/40] removed erronous @staticmethod for parse_properties --- FourmiCrawler/sources/ChemSpider.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py index faad02d..7834077 100644 --- a/FourmiCrawler/sources/ChemSpider.py +++ b/FourmiCrawler/sources/ChemSpider.py @@ -53,8 +53,7 @@ class ChemSpider(Source): return requests - @staticmethod - def parse_properties(sel): + def parse_properties(self, sel): """scrape Experimental Data and Predicted ACD/Labs tabs""" properties = [] From a12add5e4c8d034d3590927ed85c3cc56319dea3 Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 6 Jun 2014 23:29:32 +0200 Subject: [PATCH 23/40] removed @staticmethod from parse_extendedinfo --- FourmiCrawler/sources/ChemSpider.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py index 7834077..8078347 100644 --- a/FourmiCrawler/sources/ChemSpider.py +++ b/FourmiCrawler/sources/ChemSpider.py @@ -187,8 +187,7 @@ class ChemSpider(Source): } return synonym - @staticmethod - def parse_extendedinfo(response): + def parse_extendedinfo(self, response): """Scrape data from the ChemSpider GetExtendedCompoundInfo API""" sel = Selector(response) properties = [] From 012267c31c31cc72ce8377540b7c86533aebb6e7 Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 6 Jun 2014 23:32:03 +0200 Subject: [PATCH 24/40] fixed result in parse_aggregate_data --- FourmiCrawler/sources/NIST.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py index afafae1..10496ab 100644 --- a/FourmiCrawler/sources/NIST.py +++ b/FourmiCrawler/sources/NIST.py @@ -152,7 +152,7 @@ class NIST(Source): name = m.group(1) condition = m.group(2) - result = Result( + result = self.newresult( attribute=name, value=data[1] + ' ' + data[2], conditions=condition From b847d2d5912288d4f2a860c55f1f6fc2ad81fa87 Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 6 Jun 2014 23:49:38 +0200 Subject: [PATCH 25/40] replaced ChemSpider() with NIST() due to token issues --- tests/test_spider.py | 8 ++++---- 1 file changed, 4 insertions(+), 4 deletions(-) diff --git a/tests/test_spider.py b/tests/test_spider.py index 89d6cfc..589a571 100644 --- a/tests/test_spider.py +++ b/tests/test_spider.py @@ -3,7 +3,7 @@ import unittest from scrapy.http import Request from FourmiCrawler import spider -from FourmiCrawler.sources.ChemSpider import ChemSpider +from FourmiCrawler.sources.NIST import NIST from FourmiCrawler.sources.source import Source @@ -41,7 +41,7 @@ class TestFoumiSpider(unittest.TestCase): self.spi.add_source(src) self.assertEqual(self.spi.start_requests(), []) - src2 = ChemSpider() + src2 = NIST() self.spi.add_source(src2) requests = self.spi.start_requests() self.assertGreater(len(requests), 0) @@ -57,8 +57,8 @@ class TestFoumiSpider(unittest.TestCase): self.assertEqual(self.spi.get_synonym_requests("new_compound"), []) self.assertIn("new_compound", self.spi.synonyms) - src2 = ChemSpider() + src2 = NIST() self.spi.add_source(src2) self.assertIsInstance(self.spi.get_synonym_requests("other_compound")[0], Request) self.assertIn("other_compound", self.spi.synonyms) - self.assertEqual(self.spi.get_synonym_requests("other_compound"), []) \ No newline at end of file + self.assertEqual(self.spi.get_synonym_requests("other_compound"), []) From cbab2ac7a4f0ff6c4521fe87d0baa3db3eae94c9 Mon Sep 17 00:00:00 2001 From: "Jip J. Dekker" Date: Sat, 7 Jun 2014 12:21:00 +0200 Subject: [PATCH 26/40] The difference between class and object variables --- FourmiCrawler/sources/NIST.py | 2 +- FourmiCrawler/spider.py | 4 ++-- 2 files changed, 3 insertions(+), 3 deletions(-) diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py index 10496ab..3e061ae 100644 --- a/FourmiCrawler/sources/NIST.py +++ b/FourmiCrawler/sources/NIST.py @@ -22,11 +22,11 @@ class NIST(Source): search = 'cgi/cbook.cgi?Name=%s&Units=SI&cTP=on' - ignore_list = set() cfg = {} def __init__(self, config={}): Source.__init__(self, config) + self.ignore_list = set() self.cfg = config if 'reliability' not in self.cfg or self.cfg['reliability'] == '': log.msg('Reliability not set for NIST', level=log.WARNING) diff --git a/FourmiCrawler/spider.py b/FourmiCrawler/spider.py index 60f7363..5c09f07 100644 --- a/FourmiCrawler/spider.py +++ b/FourmiCrawler/spider.py @@ -9,8 +9,6 @@ class FourmiSpider(Spider): A spider writen for the Fourmi Project which calls upon all available sources to request and scrape data. """ name = "FourmiSpider" - _sources = [] - synonyms = set() def __init__(self, compound=None, selected_attributes=[".*"], *args, **kwargs): """ @@ -18,6 +16,8 @@ class FourmiSpider(Spider): :param compound: compound that will be searched. :param selected_attributes: A list of regular expressions that the attributes should match. """ + self._sources = [] + self.synonyms = set() super(FourmiSpider, self).__init__(*args, **kwargs) self.synonyms.add(compound) self.selected_attributes = selected_attributes From 96a7f5acd43e82c7da98da15219349282d9c2a6d Mon Sep 17 00:00:00 2001 From: RTB Date: Sun, 8 Jun 2014 18:52:29 +0200 Subject: [PATCH 27/40] added get_section function to grab sections from sources.cfg --- utils/configurator.py | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/utils/configurator.py b/utils/configurator.py index 90e0320..db6c111 100644 --- a/utils/configurator.py +++ b/utils/configurator.py @@ -1,6 +1,6 @@ from scrapy import log from scrapy.utils.project import get_project_settings - +import ConfigParser class Configurator: """ @@ -47,3 +47,13 @@ class Configurator: log.start(logstdout=False, loglevel=log.DEBUG) else: log.start(logstdout=True, loglevel=log.WARNING) + + def get_section(self, config, sourcename): + section = dict() + if config.has_section(sourcename): + section = dict(config.items(sourcename)) + if 'reliability' not in section: + log.msg('Reliability not set for %s' % sourcename, + level=log.WARNING) + section['reliability'] = '' + return section From 87ec6e6506d2da848a7a34d52a582e2d86fa6dfe Mon Sep 17 00:00:00 2001 From: RTB Date: Sun, 8 Jun 2014 19:03:01 +0200 Subject: [PATCH 28/40] added read_sourceconfiguration function --- utils/configurator.py | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/utils/configurator.py b/utils/configurator.py index db6c111..658cf4b 100644 --- a/utils/configurator.py +++ b/utils/configurator.py @@ -48,6 +48,11 @@ class Configurator: else: log.start(logstdout=True, loglevel=log.WARNING) + def read_sourceconfiguration(self): + config = ConfigParser.ConfigParser() + config.read('sources.cfg') # [TODO]: should be softcoded eventually + return config + def get_section(self, config, sourcename): section = dict() if config.has_section(sourcename): From f93ff4a309eebfd5129e652e806e3979e65c0d60 Mon Sep 17 00:00:00 2001 From: RTB Date: Sun, 8 Jun 2014 19:14:33 +0200 Subject: [PATCH 29/40] made read_sourceconfiguration() and get_section() static --- utils/configurator.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/utils/configurator.py b/utils/configurator.py index 658cf4b..a87b28a 100644 --- a/utils/configurator.py +++ b/utils/configurator.py @@ -48,12 +48,14 @@ class Configurator: else: log.start(logstdout=True, loglevel=log.WARNING) - def read_sourceconfiguration(self): + @staticmethod + def read_sourceconfiguration(): config = ConfigParser.ConfigParser() config.read('sources.cfg') # [TODO]: should be softcoded eventually return config - def get_section(self, config, sourcename): + @staticmethod + def get_section(config, sourcename): section = dict() if config.has_section(sourcename): section = dict(config.items(sourcename)) From 3278de2b3a0588814b955f5ca8fa5e92fe507f21 Mon Sep 17 00:00:00 2001 From: RTB Date: Sun, 8 Jun 2014 19:15:07 +0200 Subject: [PATCH 30/40] made sourceloader use static Configurator methods --- utils/sourceloader.py | 10 +++------- 1 file changed, 3 insertions(+), 7 deletions(-) diff --git a/utils/sourceloader.py b/utils/sourceloader.py index 07f966f..9b33657 100644 --- a/utils/sourceloader.py +++ b/utils/sourceloader.py @@ -1,10 +1,9 @@ import inspect import os import re -import ConfigParser from FourmiCrawler.sources.source import Source - +from utils.configurator import Configurator class SourceLoader: sources = [] @@ -20,17 +19,14 @@ class SourceLoader: path += "/" + rel_dir known_parser = set() - config = ConfigParser.ConfigParser() - config.read('sources.cfg') + config = Configurator.read_sourceconfiguration() for py in [f[:-3] for f in os.listdir(path) if f.endswith('.py') and f != '__init__.py']: mod = __import__('.'.join([rel_dir.replace("../", "").replace("/", "."), py]), fromlist=[py]) classes = [getattr(mod, x) for x in dir(mod) if inspect.isclass(getattr(mod, x))] for cls in classes: if issubclass(cls, Source) and cls not in known_parser: - sourcecfg = dict() - if config.has_section(cls.__name__): - sourcecfg = dict(config.items(cls.__name__)) + sourcecfg = Configurator.get_section(config, cls.__name__) self.sources.append(cls(sourcecfg)) known_parser.add(cls) From 09ab4249baf89dbdc51676d298b6af31a7db4fd0 Mon Sep 17 00:00:00 2001 From: RTB Date: Sun, 8 Jun 2014 19:21:56 +0200 Subject: [PATCH 31/40] added function descriptions of read_sourceonfiguration and get_section --- utils/configurator.py | 12 ++++++++++++ 1 file changed, 12 insertions(+) diff --git a/utils/configurator.py b/utils/configurator.py index a87b28a..83dee1b 100644 --- a/utils/configurator.py +++ b/utils/configurator.py @@ -50,12 +50,24 @@ class Configurator: @staticmethod def read_sourceconfiguration(): + """ + This function reads sources.cfg in the main folder for configuration + variables for sources + :return a ConfigParser object of sources.cfg + """ config = ConfigParser.ConfigParser() config.read('sources.cfg') # [TODO]: should be softcoded eventually return config @staticmethod def get_section(config, sourcename): + """ + This function reads a config section labeled in variable sourcename and + tests whether the reliability variable is set else set to empty string + :param config: a ConfigParser object + :param sourcename: the name of the section to be read + :return a dictionary of the section in the config labeled in sourcename + """ section = dict() if config.has_section(sourcename): section = dict(config.items(sourcename)) From a6fb27f8a702afc5bea9603fcd50520a0802ead7 Mon Sep 17 00:00:00 2001 From: RTB Date: Sun, 8 Jun 2014 19:29:29 +0200 Subject: [PATCH 32/40] the difference between class and object variables... --- FourmiCrawler/sources/ChemSpider.py | 4 +--- 1 file changed, 1 insertion(+), 3 deletions(-) diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py index 8078347..a9894bb 100644 --- a/FourmiCrawler/sources/ChemSpider.py +++ b/FourmiCrawler/sources/ChemSpider.py @@ -26,12 +26,10 @@ class ChemSpider(Source): structure = 'Chemical-Structure.%s.html' extendedinfo = 'MassSpecAPI.asmx/GetExtendedCompoundInfo?csid=%s&token=' - cfg = {} - ignore_list = [] - def __init__(self, config={}): Source.__init__(self, config) self.cfg = config + self.ignore_list = [] if 'reliability' not in self.cfg: log.msg('Reliability not set for ChemSpider', level=log.WARNING) self.cfg['reliability'] = '' From f01ff62d99e045c0afa0d4cb9b0471593500874c Mon Sep 17 00:00:00 2001 From: RTB Date: Sun, 8 Jun 2014 19:30:05 +0200 Subject: [PATCH 33/40] removed test for existence of reliability in config from chemspider init --- FourmiCrawler/sources/ChemSpider.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py index a9894bb..87a6ee7 100644 --- a/FourmiCrawler/sources/ChemSpider.py +++ b/FourmiCrawler/sources/ChemSpider.py @@ -30,9 +30,6 @@ class ChemSpider(Source): Source.__init__(self, config) self.cfg = config self.ignore_list = [] - if 'reliability' not in self.cfg: - log.msg('Reliability not set for ChemSpider', level=log.WARNING) - self.cfg['reliability'] = '' if 'token' not in self.cfg or self.cfg['token'] == '': log.msg('ChemSpider token not set or empty, search/MassSpec API ' 'not available', level=log.WARNING) From bbc9abadb890694909e914bc84c10bd91eaa079a Mon Sep 17 00:00:00 2001 From: RTB Date: Sun, 8 Jun 2014 19:30:59 +0200 Subject: [PATCH 34/40] removed test for existence of reliability in config from NIST init --- FourmiCrawler/sources/NIST.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py index 3e061ae..3c323ef 100644 --- a/FourmiCrawler/sources/NIST.py +++ b/FourmiCrawler/sources/NIST.py @@ -28,9 +28,6 @@ class NIST(Source): Source.__init__(self, config) self.ignore_list = set() self.cfg = config - if 'reliability' not in self.cfg or self.cfg['reliability'] == '': - log.msg('Reliability not set for NIST', level=log.WARNING) - self.cfg['reliability'] = '' def parse(self, response): sel = Selector(response) From 806b816c302f0cf7efd119ce3bdb485071f0d50e Mon Sep 17 00:00:00 2001 From: RTB Date: Sun, 8 Jun 2014 19:31:34 +0200 Subject: [PATCH 35/40] removed test for existence of reliability in config from WikipediaParser init --- FourmiCrawler/sources/WikipediaParser.py | 3 --- 1 file changed, 3 deletions(-) diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py index 781d08f..8722cef 100644 --- a/FourmiCrawler/sources/WikipediaParser.py +++ b/FourmiCrawler/sources/WikipediaParser.py @@ -24,9 +24,6 @@ class WikipediaParser(Source): def __init__(self, config={}): Source.__init__(self, config) self.cfg = config - if 'reliability' not in self.cfg or self.cfg['reliability'] == '': - log.msg('Reliability not set for Wikipedia', level=log.WARNING) - self.cfg['reliability'] = '' def parse(self, response): """ Distributes the above described behaviour """ From a62b40a21fa37a888a8e4ccfba37b73174fe80e2 Mon Sep 17 00:00:00 2001 From: RTB Date: Sun, 8 Jun 2014 20:16:30 +0200 Subject: [PATCH 36/40] get_section in configurator returns the default section if the requested section does not exist --- utils/configurator.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/utils/configurator.py b/utils/configurator.py index 83dee1b..dfc6330 100644 --- a/utils/configurator.py +++ b/utils/configurator.py @@ -63,7 +63,8 @@ class Configurator: def get_section(config, sourcename): """ This function reads a config section labeled in variable sourcename and - tests whether the reliability variable is set else set to empty string + tests whether the reliability variable is set else set to empty string. + Return the default section if the labeled config section does not exist :param config: a ConfigParser object :param sourcename: the name of the section to be read :return a dictionary of the section in the config labeled in sourcename @@ -71,6 +72,8 @@ class Configurator: section = dict() if config.has_section(sourcename): section = dict(config.items(sourcename)) + elif config.defaults(): + section = config.defaults() if 'reliability' not in section: log.msg('Reliability not set for %s' % sourcename, level=log.WARNING) From 8cb6bb8d417160b36d5357cd6467ba712353b9e7 Mon Sep 17 00:00:00 2001 From: RTB Date: Sun, 8 Jun 2014 21:13:35 +0200 Subject: [PATCH 37/40] added simple tests for read_sourceconfiguration and get_section --- tests/test_configurator.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/tests/test_configurator.py b/tests/test_configurator.py index 8cc61ea..93e28df 100644 --- a/tests/test_configurator.py +++ b/tests/test_configurator.py @@ -1,6 +1,7 @@ import unittest from utils.configurator import Configurator +import ConfigReader class TestConfigurator(unittest.TestCase): @@ -24,4 +25,26 @@ class TestConfigurator(unittest.TestCase): # self.conf.start_log("test.log", True) # self.conf.start_log("test.log", False) # self.conf.start_log(None, True) - # self.conf.start_log(None, False) \ No newline at end of file + # self.conf.start_log(None, False) + + def test_read_sourceconfiguration(self): + config = self.conf.read_sourceconfiguration() + self.assertIsInstance(config, ConfigReader) + + def test_get_section(self): + config = ConfigReader.ConfigReader() + section = self.conf.get_section(config, 'test') + self.assertIn(section, 'reliability') + self.assertEquals(section['reliability'], '') + + config.set('DEFAULT', 'reliability', 'Low') + + section = self.conf.get_section(config, 'test') + self.assertEquals(section['reliability'] = 'Low') + + config.add_section('test') + config.set('test', 'var', 'Maybe') + + section = self.conf.get_section(config, 'test') + self.assertEquals(section['reliability'] = 'Low') + self.assertEqual(section['var'], 'Maybe') From d141bb9f4fcd8a268b5b9cac1048a8718d041795 Mon Sep 17 00:00:00 2001 From: RTB Date: Sun, 8 Jun 2014 21:19:58 +0200 Subject: [PATCH 38/40] replaced erronous equality signs with commas --- tests/test_configurator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_configurator.py b/tests/test_configurator.py index 93e28df..236597b 100644 --- a/tests/test_configurator.py +++ b/tests/test_configurator.py @@ -40,11 +40,11 @@ class TestConfigurator(unittest.TestCase): config.set('DEFAULT', 'reliability', 'Low') section = self.conf.get_section(config, 'test') - self.assertEquals(section['reliability'] = 'Low') + self.assertEquals(section['reliability'], 'Low') config.add_section('test') config.set('test', 'var', 'Maybe') section = self.conf.get_section(config, 'test') - self.assertEquals(section['reliability'] = 'Low') + self.assertEquals(section['reliability'], 'Low') self.assertEqual(section['var'], 'Maybe') From a43d90ae69668dd580cf298393133d51ce17db87 Mon Sep 17 00:00:00 2001 From: RTB Date: Sun, 8 Jun 2014 21:26:17 +0200 Subject: [PATCH 39/40] replaced erronous ConfigReader with ConfigParser... --- tests/test_configurator.py | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/tests/test_configurator.py b/tests/test_configurator.py index 236597b..533db5d 100644 --- a/tests/test_configurator.py +++ b/tests/test_configurator.py @@ -1,7 +1,7 @@ import unittest from utils.configurator import Configurator -import ConfigReader +import ConfigParser class TestConfigurator(unittest.TestCase): @@ -29,10 +29,10 @@ class TestConfigurator(unittest.TestCase): def test_read_sourceconfiguration(self): config = self.conf.read_sourceconfiguration() - self.assertIsInstance(config, ConfigReader) + self.assertIsInstance(config, ConfigParser) def test_get_section(self): - config = ConfigReader.ConfigReader() + config = ConfigParser.ConfigParser() section = self.conf.get_section(config, 'test') self.assertIn(section, 'reliability') self.assertEquals(section['reliability'], '') From 326413effa07cffa2388c7ecd414e699f7cef4cf Mon Sep 17 00:00:00 2001 From: RTB Date: Sun, 8 Jun 2014 21:42:38 +0200 Subject: [PATCH 40/40] fixed syntax of erronous assertIn and assertIsInstance commands --- tests/test_configurator.py | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/tests/test_configurator.py b/tests/test_configurator.py index 533db5d..eb43cb7 100644 --- a/tests/test_configurator.py +++ b/tests/test_configurator.py @@ -29,12 +29,12 @@ class TestConfigurator(unittest.TestCase): def test_read_sourceconfiguration(self): config = self.conf.read_sourceconfiguration() - self.assertIsInstance(config, ConfigParser) + self.assertIsInstance(config, ConfigParser.ConfigParser) def test_get_section(self): config = ConfigParser.ConfigParser() section = self.conf.get_section(config, 'test') - self.assertIn(section, 'reliability') + self.assertIn('reliability', section) self.assertEquals(section['reliability'], '') config.set('DEFAULT', 'reliability', 'Low')