From 74e7152d5fc2d35ad109f2660bb2385cdc04526d Mon Sep 17 00:00:00 2001 From: "Jip J. Dekker" Date: Sun, 15 Jun 2014 20:45:35 +0200 Subject: [PATCH] A lot of PEP-8 fixes --- FourmiCrawler/sources/ChemSpider.py | 6 +++--- FourmiCrawler/sources/NIST.py | 14 +++++++------- FourmiCrawler/sources/WikipediaParser.py | 17 ++++++++--------- FourmiCrawler/spider.py | 2 +- fourmi.py | 6 ++++-- tests/test_spider.py | 1 - utils/configurator.py | 5 +---- utils/sourceloader.py | 1 + 8 files changed, 25 insertions(+), 27 deletions(-) diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py index fb51a4a..3f1538f 100644 --- a/FourmiCrawler/sources/ChemSpider.py +++ b/FourmiCrawler/sources/ChemSpider.py @@ -40,7 +40,6 @@ class ChemSpider(Source): self.search += self.cfg['token'] self.extendedinfo += self.cfg['token'] - def parse(self, response): sel = Selector(response) requests = [] @@ -202,13 +201,14 @@ class ChemSpider(Source): return properties def newresult(self, attribute, value, conditions='', source='ChemSpider'): - return Result({ + return Result( + { 'attribute': attribute, 'value': value, 'source': source, 'reliability': self.cfg['reliability'], 'conditions': conditions - }) + }) def parse_searchrequest(self, response): """Parse the initial response of the ChemSpider Search API """ diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py index d71d08f..e81db5a 100644 --- a/FourmiCrawler/sources/NIST.py +++ b/FourmiCrawler/sources/NIST.py @@ -89,7 +89,6 @@ class NIST(Source): InChiKey, CAS number """ ul = sel.xpath('body/ul[li/strong="IUPAC Standard InChI:"]') - li = ul.xpath('li') raw_synonyms = ul.xpath('li[strong="Other names:"]/text()').extract() for synonym in raw_synonyms[0].strip().split(';\n'): @@ -256,12 +255,13 @@ class NIST(Source): return results def newresult(self, attribute, value, conditions=''): - return Result({ - 'attribute': attribute, - 'value': value, - 'source': 'NIST', - 'reliability': self.cfg['reliability'], - 'conditions': conditions + return Result( + { + 'attribute': attribute, + 'value': value, + 'source': 'NIST', + 'reliability': self.cfg['reliability'], + 'conditions': conditions }) def new_compound_request(self, compound): diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py index b995f30..cfd2555 100644 --- a/FourmiCrawler/sources/WikipediaParser.py +++ b/FourmiCrawler/sources/WikipediaParser.py @@ -19,7 +19,6 @@ class WikipediaParser(Source): __spider = None searched_compounds = [] - def __init__(self, config=None): Source.__init__(self, config) if config is None: @@ -57,7 +56,7 @@ class WikipediaParser(Source): # scrape the chembox (wikipedia template) items = self.parse_chembox(sel, items) - #scrape the drugbox (wikipedia template) + # scrape the drugbox (wikipedia template) items = self.parse_drugbox(sel, items) items = filter(lambda a: a['value'] != '', items) # remove items with an empty value @@ -127,7 +126,6 @@ class WikipediaParser(Source): level=log.DEBUG) return items - def new_compound_request(self, compound): return Request(url=self.website[:-1] + compound, callback=self.parse) @@ -165,10 +163,11 @@ class WikipediaParser(Source): return links def newresult(self, attribute, value): - return Result({ - 'attribute': attribute, - 'value': value, - 'source': 'Wikipedia', - 'reliability': self.cfg['reliability'], - 'conditions': '' + return Result( + { + 'attribute': attribute, + 'value': value, + 'source': 'Wikipedia', + 'reliability': self.cfg['reliability'], + 'conditions': '' }) diff --git a/FourmiCrawler/spider.py b/FourmiCrawler/spider.py index 7552c7d..ebfd2cf 100644 --- a/FourmiCrawler/spider.py +++ b/FourmiCrawler/spider.py @@ -21,7 +21,7 @@ class FourmiSpider(Spider): super(FourmiSpider, self).__init__(*args, **kwargs) self.synonyms.add(compound) if selected_attributes is None: - self.selected_attributes = [".*"] + self.selected_attributes = [".*"] else: self.selected_attributes = selected_attributes diff --git a/fourmi.py b/fourmi.py index 1b9237c..2a422ef 100755 --- a/fourmi.py +++ b/fourmi.py @@ -60,8 +60,10 @@ def search(docopt_arguments, source_loader): conf = Configurator() conf.set_logging(docopt_arguments["--log"], docopt_arguments["-v"]) conf.set_output(docopt_arguments["--output"], docopt_arguments["--format"]) - setup_crawler(docopt_arguments[""], conf.scrapy_settings, source_loader, docopt_arguments["--attributes"].split(',')) - log.start(conf.scrapy_settings.get("LOG_FILE"), conf.scrapy_settings.get("LOG_LEVEL"), conf.scrapy_settings.get("LOG_STDOUT")) + setup_crawler(docopt_arguments[""], conf.scrapy_settings, + source_loader, docopt_arguments["--attributes"].split(',')) + log.start(conf.scrapy_settings.get("LOG_FILE"), + conf.scrapy_settings.get("LOG_LEVEL"), conf.scrapy_settings.get("LOG_STDOUT")) reactor.run() diff --git a/tests/test_spider.py b/tests/test_spider.py index 589a571..1ee40b1 100644 --- a/tests/test_spider.py +++ b/tests/test_spider.py @@ -47,7 +47,6 @@ class TestFoumiSpider(unittest.TestCase): self.assertGreater(len(requests), 0) self.assertIsInstance(requests[0], Request) - def test_synonym_requests(self): # A test for the synonym request function self.spi._sources = [] diff --git a/utils/configurator.py b/utils/configurator.py index 7dc27c5..62987c6 100644 --- a/utils/configurator.py +++ b/utils/configurator.py @@ -12,7 +12,6 @@ class Configurator: def __init__(self): self.scrapy_settings = get_project_settings() - def set_output(self, filename, fileformat): """ This function manipulates the Scrapy output file settings that normally would be set in the settings file. @@ -31,7 +30,6 @@ class Configurator: if fileformat is not None: self.scrapy_settings.overrides["FEED_FORMAT"] = fileformat - def set_logging(self, logfile=None, verbose=0): """ This function changes the default settings of Scapy's logging functionality @@ -61,7 +59,6 @@ class Configurator: else: self.scrapy_settings.overrides["LOG_FILE"] = None - @staticmethod def read_sourceconfiguration(): """ @@ -70,7 +67,7 @@ class Configurator: :return a ConfigParser object of sources.cfg """ config = ConfigParser.ConfigParser() - config.read('sources.cfg') # [TODO]: should be softcoded eventually + config.read('sources.cfg') # [TODO]: should be softcoded eventually return config @staticmethod diff --git a/utils/sourceloader.py b/utils/sourceloader.py index 9b33657..8c54464 100644 --- a/utils/sourceloader.py +++ b/utils/sourceloader.py @@ -5,6 +5,7 @@ import re from FourmiCrawler.sources.source import Source from utils.configurator import Configurator + class SourceLoader: sources = []