Code reformat

2014-06-04 19:34:23 +02:00 · 2014-06-04 19:34:23 +02:00 · 046fbed3cd
commit 046fbed3cd
parent b3c230e835
9 changed files with 19 additions and 23 deletions
--- a/FourmiCrawler/pipelines.py
+++ b/FourmiCrawler/pipelines.py
@ -4,8 +4,8 @@ import re

 from scrapy.exceptions import DropItem

-class RemoveNonePipeline(object):

+class RemoveNonePipeline(object):
    def __init__(self):
        pass

@ -21,8 +21,8 @@ class RemoveNonePipeline(object):
                item[key] = ""
        return item

-class DuplicatePipeline(object):

+class DuplicatePipeline(object):
    def __init__(self):
        self.known_values = set()

@ -35,13 +35,13 @@ class DuplicatePipeline(object):
        """
        value = (item['attribute'], item['value'], item['conditions'])
        if value in self.known_values:
-            raise DropItem("Duplicate item found: %s" % item) #[todo] append sources of first item.
+            raise DropItem("Duplicate item found: %s" % item)  # [todo] append sources of first item.
        else:
            self.known_values.add(value)
            return item

-class AttributeSelectionPipeline(object):

+class AttributeSelectionPipeline(object):
    def __init__(self):
        pass;

--- a/FourmiCrawler/settings.py
+++ b/FourmiCrawler/settings.py
@ -3,7 +3,7 @@
 # For simplicity, this file contains only the most important settings by
 # default. All the other settings are documented here:
 #
-#     http://doc.scrapy.org/en/latest/topics/settings.html
+# http://doc.scrapy.org/en/latest/topics/settings.html
 #

 BOT_NAME = 'FourmiCrawler'
--- a/FourmiCrawler/sources/ChemSpider.py
+++ b/FourmiCrawler/sources/ChemSpider.py
@ -63,7 +63,7 @@ class ChemSpider(Source):
            # Test for properties without values, with one hardcoded exception
            if (not re.match(r'^\d', prop_value) or
                    (prop_name == 'Polarizability' and
-                    prop_value == '10-24cm3')):
+                             prop_value == '10-24cm3')):
                continue

            # Match for condition in parentheses
--- a/FourmiCrawler/sources/NIST.py
+++ b/FourmiCrawler/sources/NIST.py
@ -10,7 +10,7 @@ from FourmiCrawler.items import Result

 # [TODO]: values can be '128.', perhaps remove the dot in that case?
 # [TODO]: properties have references and comments which do not exist in the
-#         Result item, but should be included eventually.
+# Result item, but should be included eventually.

 class NIST(Source):
    """NIST Scraper plugin
@ -18,7 +18,7 @@ class NIST(Source):
    This plugin manages searching for a chemical on the NIST website
    and parsing the resulting page if the chemical exists on NIST.
    """
-    website = "http://webbook.nist.gov/*"  
+    website = "http://webbook.nist.gov/*"

    search = 'cgi/cbook.cgi?Name=%s&Units=SI&cTP=on'

@ -78,7 +78,7 @@ class NIST(Source):
                requests.extend(self.parse_generic_data(table, summary))
            else:
                log.msg('NIST table: NOT SUPPORTED', level=log.WARNING)
-                continue #Assume unsupported
+                continue  #Assume unsupported
        return requests

    def parse_generic_info(self, sel):
@ -106,7 +106,7 @@ class NIST(Source):
        data['IUPAC Standard InChI'] = raw_inchi.extract()[0]

        raw_inchikey = ul.xpath('li[strong="IUPAC Standard InChIKey:"]'
-                            '/tt/text()')
+                                '/tt/text()')
        data['IUPAC Standard InChIKey'] = raw_inchikey.extract()[0]

        raw_cas_number = ul.xpath('li[strong="CAS Registry Number:"]/text()')
@ -132,10 +132,10 @@ class NIST(Source):
        results = []
        for tr in table.xpath('tr[td]'):
            extra_data_url = tr.xpath('td[last()][a="Individual data points"]'
-                                '/a/@href').extract()
+                                      '/a/@href').extract()
            if extra_data_url:
                request = Request(url=self.website[:-1] + extra_data_url[0],
-                    callback=self.parse_individual_datapoints)
+                                  callback=self.parse_individual_datapoints)
                results.append(request)
                continue
            data = []
@ -183,7 +183,6 @@ class NIST(Source):
            })
            results.append(result)

-
        return results

    @staticmethod
--- a/FourmiCrawler/sources/WikipediaParser.py
+++ b/FourmiCrawler/sources/WikipediaParser.py
@ -38,7 +38,7 @@ class WikipediaParser(Source):
        """ scrape data from infobox on wikipedia. """
        items = []

-        #be sure to get chembox (wikipedia template)
+        # be sure to get chembox (wikipedia template)
        tr_list = sel.xpath('.//table[@class="infobox bordered"]//td[not(@colspan)]'). \
            xpath('normalize-space(string())')
        prop_names = tr_list[::2]
--- a/fourmi.py
+++ b/fourmi.py
@ -1,4 +1,4 @@
-#!/usr/bin/env python
+# !/usr/bin/env python
 """
 Fourmi, a web scraper build to search specific information for a given compound (and it's pseudonyms).

--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@ -7,12 +7,11 @@ from FourmiCrawler import pipelines, spider, items


 class TestPipelines(unittest.TestCase):
-
    def setUp(self):
        self.testItem = items.Result()

    def test_NonePipeline(self):
-        #Testing the pipeline that replaces the None values in items.
+        # Testing the pipeline that replaces the None values in items.
        self.testItem["value"] = "abc"
        pipe = pipelines.RemoveNonePipeline()
        processed = pipe.process_item(self.testItem, spider.FourmiSpider())
@ -25,7 +24,7 @@ class TestPipelines(unittest.TestCase):
                self.assertIs(processed[key], "")

    def test_DuplicatePipeline(self):
-        #Testing the pipeline that removes duplicates.
+        # Testing the pipeline that removes duplicates.
        self.testItem["attribute"] = "test"
        self.testItem["value"] = "test"
        self.testItem["conditions"] = "test"
@ -39,7 +38,7 @@ class TestPipelines(unittest.TestCase):
        self.assertEqual(pipe.process_item(otherItem, spider.FourmiSpider()), otherItem)

    def test_AttributeSelection(self):
-        #Testing the pipeline that selects attributes.
+        # Testing the pipeline that selects attributes.
        item1 = copy.deepcopy(self.testItem)
        item2 = copy.deepcopy(self.testItem)

--- a/tests/test_sourceloader.py
+++ b/tests/test_sourceloader.py
@ -4,7 +4,6 @@ from sourceloader import SourceLoader


 class TestSourceloader(unittest.TestCase):
-
    def setUp(self):
        self.loader = SourceLoader()

@ -16,7 +15,7 @@ class TestSourceloader(unittest.TestCase):
        self.assertIn("Source: WikipediaParser", str(self.loader))

    def test_include(self):
-        #Tests for the include functionality.
+        # Tests for the include functionality.
        self.loader.include(["So.rc.*"])

        self.assertIn("Source: Source", str(self.loader))
@ -25,7 +24,7 @@ class TestSourceloader(unittest.TestCase):
        self.assertNotIn("Source: WikipediaParser", str(self.loader))

    def test_exclude(self):
-        #Tests for the exclude functionality.
+        # Tests for the exclude functionality.
        self.loader.exclude(["So.rc.*"])

        self.assertNotIn("Source: Source", str(self.loader))
--- a/tests/test_spider.py
+++ b/tests/test_spider.py
@ -8,7 +8,6 @@ from FourmiCrawler.sources.source import Source


 class TestFoumiSpider(unittest.TestCase):
-
    def setUp(self):
        self.compound = "test_compound"
        self.attributes = ["a.*", ".*a"]