A lot of PEP-8 fixes
This commit is contained in:
parent
66f2384747
commit
74e7152d5f
@ -40,7 +40,6 @@ class ChemSpider(Source):
|
|||||||
self.search += self.cfg['token']
|
self.search += self.cfg['token']
|
||||||
self.extendedinfo += self.cfg['token']
|
self.extendedinfo += self.cfg['token']
|
||||||
|
|
||||||
|
|
||||||
def parse(self, response):
|
def parse(self, response):
|
||||||
sel = Selector(response)
|
sel = Selector(response)
|
||||||
requests = []
|
requests = []
|
||||||
@ -202,13 +201,14 @@ class ChemSpider(Source):
|
|||||||
return properties
|
return properties
|
||||||
|
|
||||||
def newresult(self, attribute, value, conditions='', source='ChemSpider'):
|
def newresult(self, attribute, value, conditions='', source='ChemSpider'):
|
||||||
return Result({
|
return Result(
|
||||||
|
{
|
||||||
'attribute': attribute,
|
'attribute': attribute,
|
||||||
'value': value,
|
'value': value,
|
||||||
'source': source,
|
'source': source,
|
||||||
'reliability': self.cfg['reliability'],
|
'reliability': self.cfg['reliability'],
|
||||||
'conditions': conditions
|
'conditions': conditions
|
||||||
})
|
})
|
||||||
|
|
||||||
def parse_searchrequest(self, response):
|
def parse_searchrequest(self, response):
|
||||||
"""Parse the initial response of the ChemSpider Search API """
|
"""Parse the initial response of the ChemSpider Search API """
|
||||||
|
@ -89,7 +89,6 @@ class NIST(Source):
|
|||||||
InChiKey, CAS number
|
InChiKey, CAS number
|
||||||
"""
|
"""
|
||||||
ul = sel.xpath('body/ul[li/strong="IUPAC Standard InChI:"]')
|
ul = sel.xpath('body/ul[li/strong="IUPAC Standard InChI:"]')
|
||||||
li = ul.xpath('li')
|
|
||||||
|
|
||||||
raw_synonyms = ul.xpath('li[strong="Other names:"]/text()').extract()
|
raw_synonyms = ul.xpath('li[strong="Other names:"]/text()').extract()
|
||||||
for synonym in raw_synonyms[0].strip().split(';\n'):
|
for synonym in raw_synonyms[0].strip().split(';\n'):
|
||||||
@ -256,12 +255,13 @@ class NIST(Source):
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
def newresult(self, attribute, value, conditions=''):
|
def newresult(self, attribute, value, conditions=''):
|
||||||
return Result({
|
return Result(
|
||||||
'attribute': attribute,
|
{
|
||||||
'value': value,
|
'attribute': attribute,
|
||||||
'source': 'NIST',
|
'value': value,
|
||||||
'reliability': self.cfg['reliability'],
|
'source': 'NIST',
|
||||||
'conditions': conditions
|
'reliability': self.cfg['reliability'],
|
||||||
|
'conditions': conditions
|
||||||
})
|
})
|
||||||
|
|
||||||
def new_compound_request(self, compound):
|
def new_compound_request(self, compound):
|
||||||
|
@ -19,7 +19,6 @@ class WikipediaParser(Source):
|
|||||||
__spider = None
|
__spider = None
|
||||||
searched_compounds = []
|
searched_compounds = []
|
||||||
|
|
||||||
|
|
||||||
def __init__(self, config=None):
|
def __init__(self, config=None):
|
||||||
Source.__init__(self, config)
|
Source.__init__(self, config)
|
||||||
if config is None:
|
if config is None:
|
||||||
@ -57,7 +56,7 @@ class WikipediaParser(Source):
|
|||||||
# scrape the chembox (wikipedia template)
|
# scrape the chembox (wikipedia template)
|
||||||
items = self.parse_chembox(sel, items)
|
items = self.parse_chembox(sel, items)
|
||||||
|
|
||||||
#scrape the drugbox (wikipedia template)
|
# scrape the drugbox (wikipedia template)
|
||||||
items = self.parse_drugbox(sel, items)
|
items = self.parse_drugbox(sel, items)
|
||||||
|
|
||||||
items = filter(lambda a: a['value'] != '', items) # remove items with an empty value
|
items = filter(lambda a: a['value'] != '', items) # remove items with an empty value
|
||||||
@ -127,7 +126,6 @@ class WikipediaParser(Source):
|
|||||||
level=log.DEBUG)
|
level=log.DEBUG)
|
||||||
return items
|
return items
|
||||||
|
|
||||||
|
|
||||||
def new_compound_request(self, compound):
|
def new_compound_request(self, compound):
|
||||||
return Request(url=self.website[:-1] + compound, callback=self.parse)
|
return Request(url=self.website[:-1] + compound, callback=self.parse)
|
||||||
|
|
||||||
@ -165,10 +163,11 @@ class WikipediaParser(Source):
|
|||||||
return links
|
return links
|
||||||
|
|
||||||
def newresult(self, attribute, value):
|
def newresult(self, attribute, value):
|
||||||
return Result({
|
return Result(
|
||||||
'attribute': attribute,
|
{
|
||||||
'value': value,
|
'attribute': attribute,
|
||||||
'source': 'Wikipedia',
|
'value': value,
|
||||||
'reliability': self.cfg['reliability'],
|
'source': 'Wikipedia',
|
||||||
'conditions': ''
|
'reliability': self.cfg['reliability'],
|
||||||
|
'conditions': ''
|
||||||
})
|
})
|
||||||
|
@ -21,7 +21,7 @@ class FourmiSpider(Spider):
|
|||||||
super(FourmiSpider, self).__init__(*args, **kwargs)
|
super(FourmiSpider, self).__init__(*args, **kwargs)
|
||||||
self.synonyms.add(compound)
|
self.synonyms.add(compound)
|
||||||
if selected_attributes is None:
|
if selected_attributes is None:
|
||||||
self.selected_attributes = [".*"]
|
self.selected_attributes = [".*"]
|
||||||
else:
|
else:
|
||||||
self.selected_attributes = selected_attributes
|
self.selected_attributes = selected_attributes
|
||||||
|
|
||||||
|
@ -60,8 +60,10 @@ def search(docopt_arguments, source_loader):
|
|||||||
conf = Configurator()
|
conf = Configurator()
|
||||||
conf.set_logging(docopt_arguments["--log"], docopt_arguments["-v"])
|
conf.set_logging(docopt_arguments["--log"], docopt_arguments["-v"])
|
||||||
conf.set_output(docopt_arguments["--output"], docopt_arguments["--format"])
|
conf.set_output(docopt_arguments["--output"], docopt_arguments["--format"])
|
||||||
setup_crawler(docopt_arguments["<compound>"], conf.scrapy_settings, source_loader, docopt_arguments["--attributes"].split(','))
|
setup_crawler(docopt_arguments["<compound>"], conf.scrapy_settings,
|
||||||
log.start(conf.scrapy_settings.get("LOG_FILE"), conf.scrapy_settings.get("LOG_LEVEL"), conf.scrapy_settings.get("LOG_STDOUT"))
|
source_loader, docopt_arguments["--attributes"].split(','))
|
||||||
|
log.start(conf.scrapy_settings.get("LOG_FILE"),
|
||||||
|
conf.scrapy_settings.get("LOG_LEVEL"), conf.scrapy_settings.get("LOG_STDOUT"))
|
||||||
reactor.run()
|
reactor.run()
|
||||||
|
|
||||||
|
|
||||||
|
@ -47,7 +47,6 @@ class TestFoumiSpider(unittest.TestCase):
|
|||||||
self.assertGreater(len(requests), 0)
|
self.assertGreater(len(requests), 0)
|
||||||
self.assertIsInstance(requests[0], Request)
|
self.assertIsInstance(requests[0], Request)
|
||||||
|
|
||||||
|
|
||||||
def test_synonym_requests(self):
|
def test_synonym_requests(self):
|
||||||
# A test for the synonym request function
|
# A test for the synonym request function
|
||||||
self.spi._sources = []
|
self.spi._sources = []
|
||||||
|
@ -12,7 +12,6 @@ class Configurator:
|
|||||||
def __init__(self):
|
def __init__(self):
|
||||||
self.scrapy_settings = get_project_settings()
|
self.scrapy_settings = get_project_settings()
|
||||||
|
|
||||||
|
|
||||||
def set_output(self, filename, fileformat):
|
def set_output(self, filename, fileformat):
|
||||||
"""
|
"""
|
||||||
This function manipulates the Scrapy output file settings that normally would be set in the settings file.
|
This function manipulates the Scrapy output file settings that normally would be set in the settings file.
|
||||||
@ -31,7 +30,6 @@ class Configurator:
|
|||||||
if fileformat is not None:
|
if fileformat is not None:
|
||||||
self.scrapy_settings.overrides["FEED_FORMAT"] = fileformat
|
self.scrapy_settings.overrides["FEED_FORMAT"] = fileformat
|
||||||
|
|
||||||
|
|
||||||
def set_logging(self, logfile=None, verbose=0):
|
def set_logging(self, logfile=None, verbose=0):
|
||||||
"""
|
"""
|
||||||
This function changes the default settings of Scapy's logging functionality
|
This function changes the default settings of Scapy's logging functionality
|
||||||
@ -61,7 +59,6 @@ class Configurator:
|
|||||||
else:
|
else:
|
||||||
self.scrapy_settings.overrides["LOG_FILE"] = None
|
self.scrapy_settings.overrides["LOG_FILE"] = None
|
||||||
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def read_sourceconfiguration():
|
def read_sourceconfiguration():
|
||||||
"""
|
"""
|
||||||
@ -70,7 +67,7 @@ class Configurator:
|
|||||||
:return a ConfigParser object of sources.cfg
|
:return a ConfigParser object of sources.cfg
|
||||||
"""
|
"""
|
||||||
config = ConfigParser.ConfigParser()
|
config = ConfigParser.ConfigParser()
|
||||||
config.read('sources.cfg') # [TODO]: should be softcoded eventually
|
config.read('sources.cfg') # [TODO]: should be softcoded eventually
|
||||||
return config
|
return config
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
|
@ -5,6 +5,7 @@ import re
|
|||||||
from FourmiCrawler.sources.source import Source
|
from FourmiCrawler.sources.source import Source
|
||||||
from utils.configurator import Configurator
|
from utils.configurator import Configurator
|
||||||
|
|
||||||
|
|
||||||
class SourceLoader:
|
class SourceLoader:
|
||||||
sources = []
|
sources = []
|
||||||
|
|
||||||
|
Reference in New Issue
Block a user