Archived
1
0

PEP-8 standards for a lot of things

This commit is contained in:
Jip J. Dekker 2014-04-22 18:54:10 +02:00
parent 0da2d74e2c
commit 648b23e466

View File

@ -5,7 +5,8 @@ from scrapy.selector import Selector
from FourmiCrawler.items import Result from FourmiCrawler.items import Result
import re import re
# [TODO] - Maybe clean up usage of '.extract()[0]', because it will raise an IndexError exception if the xpath matches nothing. # [TODO] - Maybe clean up usage of '.extract()[0]', because of possible IndexError exception.
class ChemSpider(Parser): class ChemSpider(Parser):
"""ChemSpider scraper for synonyms and properties """ChemSpider scraper for synonyms and properties
@ -15,6 +16,10 @@ class ChemSpider(Parser):
The token required for the API should be in a configuration file The token required for the API should be in a configuration file
somewhere. somewhere.
""" """
def __init__(self):
pass
website = 'http://www.chemspider.com/*' website = 'http://www.chemspider.com/*'
# [TODO] - Save and access token of specific user. # [TODO] - Save and access token of specific user.
@ -36,9 +41,9 @@ class ChemSpider(Parser):
return requests return requests
def parse_properties(self, sel): @staticmethod
def parse_properties(sel):
"""scrape Experimental Data and Predicted ACD/Labs tabs""" """scrape Experimental Data and Predicted ACD/Labs tabs"""
requests = []
properties = [] properties = []
# Predicted - ACD/Labs tab # Predicted - ACD/Labs tab
@ -130,7 +135,7 @@ class ChemSpider(Parser):
# [TODO] - confirm if English User-Validated synonyms are OK too # [TODO] - confirm if English User-Validated synonyms are OK too
for syn in synonyms: for syn in synonyms:
if (syn['category'] == 'expert' and syn['language'] == 'English'): if syn['category'] == 'expert' and syn['language'] == 'English':
log.msg('CS emit synonym: %s' % syn['name'], level=log.DEBUG) log.msg('CS emit synonym: %s' % syn['name'], level=log.DEBUG)
self._Parser__spider.get_synonym_requests(syn['name']) self._Parser__spider.get_synonym_requests(syn['name'])
@ -172,7 +177,8 @@ class ChemSpider(Parser):
} }
return synonym return synonym
def parse_extendedinfo(self, response): @staticmethod
def parse_extendedinfo(response):
"""Scrape data from the ChemSpider GetExtendedCompoundInfo API""" """Scrape data from the ChemSpider GetExtendedCompoundInfo API"""
sel = Selector(response) sel = Selector(response)
properties = [] properties = []
@ -181,7 +187,7 @@ class ChemSpider(Parser):
for (name, value) in zip(names, values): for (name, value) in zip(names, values):
result = Result({ result = Result({
'attribute': name, 'attribute': name,
'value': value, #These values have no unit! 'value': value, # These values have no unit!
'source': 'ChemSpider ExtendedCompoundInfo', 'source': 'ChemSpider ExtendedCompoundInfo',
'reliability': 'Unknown', 'reliability': 'Unknown',
'conditions': '' 'conditions': ''
@ -205,7 +211,7 @@ class ChemSpider(Parser):
callback=self.parse_extendedinfo)] callback=self.parse_extendedinfo)]
def new_compound_request(self, compound): def new_compound_request(self, compound):
if compound in self.ignore_list: #[TODO] - add regular expression if compound in self.ignore_list: # [TODO] - add regular expression
return None return None
searchurl = self.website[:-1] + self.search % compound searchurl = self.website[:-1] + self.search % compound
log.msg('chemspider compound', level=log.DEBUG) log.msg('chemspider compound', level=log.DEBUG)