Archived
1
0

A lot of PEP-8 fixes

This commit is contained in:
Jip J. Dekker 2014-06-15 20:45:35 +02:00
parent 66f2384747
commit 74e7152d5f
8 changed files with 25 additions and 27 deletions

View File

@ -40,7 +40,6 @@ class ChemSpider(Source):
self.search += self.cfg['token'] self.search += self.cfg['token']
self.extendedinfo += self.cfg['token'] self.extendedinfo += self.cfg['token']
def parse(self, response): def parse(self, response):
sel = Selector(response) sel = Selector(response)
requests = [] requests = []
@ -202,13 +201,14 @@ class ChemSpider(Source):
return properties return properties
def newresult(self, attribute, value, conditions='', source='ChemSpider'): def newresult(self, attribute, value, conditions='', source='ChemSpider'):
return Result({ return Result(
{
'attribute': attribute, 'attribute': attribute,
'value': value, 'value': value,
'source': source, 'source': source,
'reliability': self.cfg['reliability'], 'reliability': self.cfg['reliability'],
'conditions': conditions 'conditions': conditions
}) })
def parse_searchrequest(self, response): def parse_searchrequest(self, response):
"""Parse the initial response of the ChemSpider Search API """ """Parse the initial response of the ChemSpider Search API """

View File

@ -89,7 +89,6 @@ class NIST(Source):
InChiKey, CAS number InChiKey, CAS number
""" """
ul = sel.xpath('body/ul[li/strong="IUPAC Standard InChI:"]') ul = sel.xpath('body/ul[li/strong="IUPAC Standard InChI:"]')
li = ul.xpath('li')
raw_synonyms = ul.xpath('li[strong="Other names:"]/text()').extract() raw_synonyms = ul.xpath('li[strong="Other names:"]/text()').extract()
for synonym in raw_synonyms[0].strip().split(';\n'): for synonym in raw_synonyms[0].strip().split(';\n'):
@ -256,12 +255,13 @@ class NIST(Source):
return results return results
def newresult(self, attribute, value, conditions=''): def newresult(self, attribute, value, conditions=''):
return Result({ return Result(
'attribute': attribute, {
'value': value, 'attribute': attribute,
'source': 'NIST', 'value': value,
'reliability': self.cfg['reliability'], 'source': 'NIST',
'conditions': conditions 'reliability': self.cfg['reliability'],
'conditions': conditions
}) })
def new_compound_request(self, compound): def new_compound_request(self, compound):

View File

@ -19,7 +19,6 @@ class WikipediaParser(Source):
__spider = None __spider = None
searched_compounds = [] searched_compounds = []
def __init__(self, config=None): def __init__(self, config=None):
Source.__init__(self, config) Source.__init__(self, config)
if config is None: if config is None:
@ -57,7 +56,7 @@ class WikipediaParser(Source):
# scrape the chembox (wikipedia template) # scrape the chembox (wikipedia template)
items = self.parse_chembox(sel, items) items = self.parse_chembox(sel, items)
#scrape the drugbox (wikipedia template) # scrape the drugbox (wikipedia template)
items = self.parse_drugbox(sel, items) items = self.parse_drugbox(sel, items)
items = filter(lambda a: a['value'] != '', items) # remove items with an empty value items = filter(lambda a: a['value'] != '', items) # remove items with an empty value
@ -127,7 +126,6 @@ class WikipediaParser(Source):
level=log.DEBUG) level=log.DEBUG)
return items return items
def new_compound_request(self, compound): def new_compound_request(self, compound):
return Request(url=self.website[:-1] + compound, callback=self.parse) return Request(url=self.website[:-1] + compound, callback=self.parse)
@ -165,10 +163,11 @@ class WikipediaParser(Source):
return links return links
def newresult(self, attribute, value): def newresult(self, attribute, value):
return Result({ return Result(
'attribute': attribute, {
'value': value, 'attribute': attribute,
'source': 'Wikipedia', 'value': value,
'reliability': self.cfg['reliability'], 'source': 'Wikipedia',
'conditions': '' 'reliability': self.cfg['reliability'],
'conditions': ''
}) })

View File

@ -21,7 +21,7 @@ class FourmiSpider(Spider):
super(FourmiSpider, self).__init__(*args, **kwargs) super(FourmiSpider, self).__init__(*args, **kwargs)
self.synonyms.add(compound) self.synonyms.add(compound)
if selected_attributes is None: if selected_attributes is None:
self.selected_attributes = [".*"] self.selected_attributes = [".*"]
else: else:
self.selected_attributes = selected_attributes self.selected_attributes = selected_attributes

View File

@ -60,8 +60,10 @@ def search(docopt_arguments, source_loader):
conf = Configurator() conf = Configurator()
conf.set_logging(docopt_arguments["--log"], docopt_arguments["-v"]) conf.set_logging(docopt_arguments["--log"], docopt_arguments["-v"])
conf.set_output(docopt_arguments["--output"], docopt_arguments["--format"]) conf.set_output(docopt_arguments["--output"], docopt_arguments["--format"])
setup_crawler(docopt_arguments["<compound>"], conf.scrapy_settings, source_loader, docopt_arguments["--attributes"].split(',')) setup_crawler(docopt_arguments["<compound>"], conf.scrapy_settings,
log.start(conf.scrapy_settings.get("LOG_FILE"), conf.scrapy_settings.get("LOG_LEVEL"), conf.scrapy_settings.get("LOG_STDOUT")) source_loader, docopt_arguments["--attributes"].split(','))
log.start(conf.scrapy_settings.get("LOG_FILE"),
conf.scrapy_settings.get("LOG_LEVEL"), conf.scrapy_settings.get("LOG_STDOUT"))
reactor.run() reactor.run()

View File

@ -47,7 +47,6 @@ class TestFoumiSpider(unittest.TestCase):
self.assertGreater(len(requests), 0) self.assertGreater(len(requests), 0)
self.assertIsInstance(requests[0], Request) self.assertIsInstance(requests[0], Request)
def test_synonym_requests(self): def test_synonym_requests(self):
# A test for the synonym request function # A test for the synonym request function
self.spi._sources = [] self.spi._sources = []

View File

@ -12,7 +12,6 @@ class Configurator:
def __init__(self): def __init__(self):
self.scrapy_settings = get_project_settings() self.scrapy_settings = get_project_settings()
def set_output(self, filename, fileformat): def set_output(self, filename, fileformat):
""" """
This function manipulates the Scrapy output file settings that normally would be set in the settings file. This function manipulates the Scrapy output file settings that normally would be set in the settings file.
@ -31,7 +30,6 @@ class Configurator:
if fileformat is not None: if fileformat is not None:
self.scrapy_settings.overrides["FEED_FORMAT"] = fileformat self.scrapy_settings.overrides["FEED_FORMAT"] = fileformat
def set_logging(self, logfile=None, verbose=0): def set_logging(self, logfile=None, verbose=0):
""" """
This function changes the default settings of Scapy's logging functionality This function changes the default settings of Scapy's logging functionality
@ -61,7 +59,6 @@ class Configurator:
else: else:
self.scrapy_settings.overrides["LOG_FILE"] = None self.scrapy_settings.overrides["LOG_FILE"] = None
@staticmethod @staticmethod
def read_sourceconfiguration(): def read_sourceconfiguration():
""" """
@ -70,7 +67,7 @@ class Configurator:
:return a ConfigParser object of sources.cfg :return a ConfigParser object of sources.cfg
""" """
config = ConfigParser.ConfigParser() config = ConfigParser.ConfigParser()
config.read('sources.cfg') # [TODO]: should be softcoded eventually config.read('sources.cfg') # [TODO]: should be softcoded eventually
return config return config
@staticmethod @staticmethod

View File

@ -5,6 +5,7 @@ import re
from FourmiCrawler.sources.source import Source from FourmiCrawler.sources.source import Source
from utils.configurator import Configurator from utils.configurator import Configurator
class SourceLoader: class SourceLoader:
sources = [] sources = []