From 22ca4afa33058781330fd125b61e23281dcb0c4d Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Fri, 20 Jun 2014 11:21:26 +0200
Subject: [PATCH] Code inspection

---
 FourmiCrawler/sources/ChemSpider.py | 18 +++++++++---------
 FourmiCrawler/sources/NIST.py       | 13 +++++++------
 FourmiCrawler/sources/PubChem.py    | 26 ++++++++++++++------------
 fourmi.py                           |  2 +-
 4 files changed, 31 insertions(+), 28 deletions(-)

diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py
index b4bf6f0..e95d067 100644
--- a/FourmiCrawler/sources/ChemSpider.py
+++ b/FourmiCrawler/sources/ChemSpider.py
@@ -89,7 +89,7 @@ class ChemSpider(Source):
 
             # Test for properties without values, with one hardcoded exception
             if (not re.match(r'^\d', prop_value) or
-                (prop_name == 'Polarizability' and prop_value == '10-24cm3')):
+                    (prop_name == 'Polarizability' and prop_value == '10-24cm3')):
                 continue
 
             m = re.match(r'(.*) \((.*)\)', prop_name)
@@ -122,12 +122,12 @@ class ChemSpider(Source):
         properties = []
 
         scraped_list = sel.xpath('.//li[span="Experimental Physico-chemical '
-                         'Properties"]//li/table/tr/td')
+                                 'Properties"]//li/table/tr/td')
         if not scraped_list:
             return properties
         # Format is: property name followed by a list of values
         property_name = scraped_list.pop(0).xpath(
-        'span/text()').extract()[0].rstrip()
+            'span/text()').extract()[0].rstrip()
         for line in scraped_list:
             if line.xpath('span/text()'):
                 property_name = line.xpath('span/text()').extract()[0].rstrip()
@@ -251,12 +251,12 @@ class ChemSpider(Source):
         :return: A Result item
         """
         return Result({
-                'attribute': attribute,
-                'value': value,
-                'source': source,
-                'reliability': self.cfg['reliability'],
-                'conditions': conditions
-            })
+            'attribute': attribute,
+            'value': value,
+            'source': source,
+            'reliability': self.cfg['reliability'],
+            'conditions': conditions
+        })
 
     def parse_searchrequest(self, response):
         """
diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py
index 691b062..52f1332 100644
--- a/FourmiCrawler/sources/NIST.py
+++ b/FourmiCrawler/sources/NIST.py
@@ -313,12 +313,13 @@ class NIST(Source):
         :param conditions: optional conditions regarding the value
         :return: A Result item
         """
-        return Result({
-            'attribute': attribute,
-            'value': value,
-            'source': 'NIST',
-            'reliability': self.cfg['reliability'],
-            'conditions': conditions
+        return Result(
+            {
+                'attribute': attribute,
+                'value': value,
+                'source': 'NIST',
+                'reliability': self.cfg['reliability'],
+                'conditions': conditions
             })
 
     def new_compound_request(self, compound):
diff --git a/FourmiCrawler/sources/PubChem.py b/FourmiCrawler/sources/PubChem.py
index 15fa3f9..4cd5304 100644
--- a/FourmiCrawler/sources/PubChem.py
+++ b/FourmiCrawler/sources/PubChem.py
@@ -15,7 +15,7 @@ class PubChem(Source):
         including sources of the values of properties.
     """
 
-    #PubChem has its data on compound name, properties and their values on different html pages, so different URLs used
+    # PubChem has its data on compound name, properties and their values on different html pages, so different URLs used
     website = 'http://.*\\.ncbi\\.nlm\\.nih\\.gov/.*'
     website_www = 'http://www.ncbi.nlm.nih.gov/*'
     website_pubchem = 'http://pubchem.ncbi.nlm.nih.gov/.*'
@@ -54,14 +54,16 @@ class PubChem(Source):
         n = re.search(r'cid=(\d+)', response.url)
         if n:
             cid = n.group(1)
-        log.msg('cid: %s' % cid, level=log.DEBUG)   #getting the right id of the compound with which it can reach
-                                                # the seperate html page which contains the properties and their values
+        log.msg('cid: %s' % cid, level=log.DEBUG)  # getting the right id of the compound with which it can reach
+        # the seperate html page which contains the properties and their values
 
-        #using this cid to get the right url and scrape it
-        requests.append(Request(url=self.website_pubchem[:-2].replace("\\","") + self.data_url % cid, callback=self.parse_data))
+        # using this cid to get the right url and scrape it
+        requests.append(
+            Request(url=self.website_pubchem[:-2].replace("\\", "") + self.data_url % cid, callback=self.parse_data))
         return requests
 
-    def parse_data(self, response):
+    @staticmethod
+    def parse_data(response):
         """
         Parse data found in 'Chemical and Physical properties' part of a substance page.
         :param response: The response with the page to parse
@@ -74,8 +76,8 @@ class PubChem(Source):
         props = sel.xpath('//div')
 
         for prop in props:
-            prop_name = ''.join(prop.xpath('b/text()').extract()) # name of property that it is parsing
-            if prop.xpath('a'):     # parsing for single value in property
+            prop_name = ''.join(prop.xpath('b/text()').extract())  # name of property that it is parsing
+            if prop.xpath('a'):  # parsing for single value in property
                 prop_source = ''.join(prop.xpath('a/@title').extract())
                 prop_value = ''.join(prop.xpath('a/text()').extract())
                 new_prop = Result({
@@ -89,7 +91,7 @@ class PubChem(Source):
                         (new_prop['attribute'], new_prop['value'],
                          new_prop['source']), level=log.DEBUG)
                 requests.append(new_prop)
-            elif prop.xpath('ul'):    # parsing for multiple values (list) in property
+            elif prop.xpath('ul'):  # parsing for multiple values (list) in property
                 prop_values = prop.xpath('ul//li')
                 for prop_li in prop_values:
                     prop_value = ''.join(prop_li.xpath('a/text()').extract())
@@ -102,8 +104,8 @@ class PubChem(Source):
                         'conditions': ''
                     })
                     log.msg('PubChem prop: |%s| |%s| |%s|' %
-                        (new_prop['attribute'], new_prop['value'],
-                         new_prop['source']), level=log.DEBUG)
+                            (new_prop['attribute'], new_prop['value'],
+                             new_prop['source']), level=log.DEBUG)
                     requests.append(new_prop)
 
         return requests
@@ -116,7 +118,7 @@ class PubChem(Source):
                  case the search request forwarded to the compound page
         """
 
-        #check if pubchem forwarded straight to compound page
+        # check if pubchem forwarded straight to compound page
         m = re.match(self.website_pubchem, response.url)
         if m:
             log.msg('PubChem search forwarded to compound page',
diff --git a/fourmi.py b/fourmi.py
index d6d5fd9..f0caa05 100755
--- a/fourmi.py
+++ b/fourmi.py
@@ -63,7 +63,7 @@ def search(docopt_arguments, source_loader):
                   source_loader, docopt_arguments["--attributes"].split(','))
     if conf.scrapy_settings.getbool("LOG_ENABLED"):
         log.start(conf.scrapy_settings.get("LOG_FILE"),
-              conf.scrapy_settings.get("LOG_LEVEL"), conf.scrapy_settings.get("LOG_STDOUT"))
+                  conf.scrapy_settings.get("LOG_LEVEL"), conf.scrapy_settings.get("LOG_STDOUT"))
     reactor.run()