PEP-8 standards for a lot of things
This commit is contained in:
parent
0da2d74e2c
commit
648b23e466
@ -5,7 +5,8 @@ from scrapy.selector import Selector
|
|||||||
from FourmiCrawler.items import Result
|
from FourmiCrawler.items import Result
|
||||||
import re
|
import re
|
||||||
|
|
||||||
# [TODO] - Maybe clean up usage of '.extract()[0]', because it will raise an IndexError exception if the xpath matches nothing.
|
# [TODO] - Maybe clean up usage of '.extract()[0]', because of possible IndexError exception.
|
||||||
|
|
||||||
|
|
||||||
class ChemSpider(Parser):
|
class ChemSpider(Parser):
|
||||||
"""ChemSpider scraper for synonyms and properties
|
"""ChemSpider scraper for synonyms and properties
|
||||||
@ -15,6 +16,10 @@ class ChemSpider(Parser):
|
|||||||
The token required for the API should be in a configuration file
|
The token required for the API should be in a configuration file
|
||||||
somewhere.
|
somewhere.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
|
def __init__(self):
|
||||||
|
pass
|
||||||
|
|
||||||
website = 'http://www.chemspider.com/*'
|
website = 'http://www.chemspider.com/*'
|
||||||
|
|
||||||
# [TODO] - Save and access token of specific user.
|
# [TODO] - Save and access token of specific user.
|
||||||
@ -36,9 +41,9 @@ class ChemSpider(Parser):
|
|||||||
|
|
||||||
return requests
|
return requests
|
||||||
|
|
||||||
def parse_properties(self, sel):
|
@staticmethod
|
||||||
|
def parse_properties(sel):
|
||||||
"""scrape Experimental Data and Predicted ACD/Labs tabs"""
|
"""scrape Experimental Data and Predicted ACD/Labs tabs"""
|
||||||
requests = []
|
|
||||||
properties = []
|
properties = []
|
||||||
|
|
||||||
# Predicted - ACD/Labs tab
|
# Predicted - ACD/Labs tab
|
||||||
@ -130,7 +135,7 @@ class ChemSpider(Parser):
|
|||||||
|
|
||||||
# [TODO] - confirm if English User-Validated synonyms are OK too
|
# [TODO] - confirm if English User-Validated synonyms are OK too
|
||||||
for syn in synonyms:
|
for syn in synonyms:
|
||||||
if (syn['category'] == 'expert' and syn['language'] == 'English'):
|
if syn['category'] == 'expert' and syn['language'] == 'English':
|
||||||
log.msg('CS emit synonym: %s' % syn['name'], level=log.DEBUG)
|
log.msg('CS emit synonym: %s' % syn['name'], level=log.DEBUG)
|
||||||
self._Parser__spider.get_synonym_requests(syn['name'])
|
self._Parser__spider.get_synonym_requests(syn['name'])
|
||||||
|
|
||||||
@ -172,7 +177,8 @@ class ChemSpider(Parser):
|
|||||||
}
|
}
|
||||||
return synonym
|
return synonym
|
||||||
|
|
||||||
def parse_extendedinfo(self, response):
|
@staticmethod
|
||||||
|
def parse_extendedinfo(response):
|
||||||
"""Scrape data from the ChemSpider GetExtendedCompoundInfo API"""
|
"""Scrape data from the ChemSpider GetExtendedCompoundInfo API"""
|
||||||
sel = Selector(response)
|
sel = Selector(response)
|
||||||
properties = []
|
properties = []
|
||||||
@ -181,7 +187,7 @@ class ChemSpider(Parser):
|
|||||||
for (name, value) in zip(names, values):
|
for (name, value) in zip(names, values):
|
||||||
result = Result({
|
result = Result({
|
||||||
'attribute': name,
|
'attribute': name,
|
||||||
'value': value, #These values have no unit!
|
'value': value, # These values have no unit!
|
||||||
'source': 'ChemSpider ExtendedCompoundInfo',
|
'source': 'ChemSpider ExtendedCompoundInfo',
|
||||||
'reliability': 'Unknown',
|
'reliability': 'Unknown',
|
||||||
'conditions': ''
|
'conditions': ''
|
||||||
@ -205,7 +211,7 @@ class ChemSpider(Parser):
|
|||||||
callback=self.parse_extendedinfo)]
|
callback=self.parse_extendedinfo)]
|
||||||
|
|
||||||
def new_compound_request(self, compound):
|
def new_compound_request(self, compound):
|
||||||
if compound in self.ignore_list: #[TODO] - add regular expression
|
if compound in self.ignore_list: # [TODO] - add regular expression
|
||||||
return None
|
return None
|
||||||
searchurl = self.website[:-1] + self.search % compound
|
searchurl = self.website[:-1] + self.search % compound
|
||||||
log.msg('chemspider compound', level=log.DEBUG)
|
log.msg('chemspider compound', level=log.DEBUG)
|
||||||
|
Reference in New Issue
Block a user