diff --git a/FourmiCrawler/pipelines.py b/FourmiCrawler/pipelines.py index dd4e11d..55b0f49 100644 --- a/FourmiCrawler/pipelines.py +++ b/FourmiCrawler/pipelines.py @@ -9,7 +9,8 @@ class RemoveNonePipeline(object): def __init__(self): pass - def process_item(self, item, spider): + @staticmethod + def process_item(item, spider): """ Processing the items so None values are replaced by empty strings :param item: The incoming item @@ -43,9 +44,10 @@ class DuplicatePipeline(object): class AttributeSelectionPipeline(object): def __init__(self): - pass; + pass - def process_item(self, item, spider): + @staticmethod + def process_item(item, spider): """ The items are processed using the selected attribute list available in the spider, items that don't match the selected items are dropped. diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py index dfada5f..8c0bd8b 100644 --- a/FourmiCrawler/sources/ChemSpider.py +++ b/FourmiCrawler/sources/ChemSpider.py @@ -61,9 +61,7 @@ class ChemSpider(Source): prop_conditions = '' # Test for properties without values, with one hardcoded exception - if (not re.match(r'^\d', prop_value) or - (prop_name == 'Polarizability' and - prop_value == '10-24cm3')): + if not re.match(r'^\d', prop_value) or (prop_name == 'Polarizability' and prop_value == '10-24cm3'): continue # Match for condition in parentheses diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py index a5f784d..6e8fabb 100644 --- a/FourmiCrawler/sources/NIST.py +++ b/FourmiCrawler/sources/NIST.py @@ -78,7 +78,7 @@ class NIST(Source): requests.extend(self.parse_generic_data(table, summary)) else: log.msg('NIST table: NOT SUPPORTED', level=log.WARNING) - continue #Assume unsupported + continue # Assume unsupported return requests def parse_generic_info(self, sel): @@ -230,7 +230,8 @@ class NIST(Source): return results - def parse_individual_datapoints(self, response): + @staticmethod + def parse_individual_datapoints(response): """Parses the page linked from aggregate data""" sel = Selector(response) table = sel.xpath('//table[@class="data"]')[0] diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py index ab97954..dfb8e83 100644 --- a/tests/test_pipeline.py +++ b/tests/test_pipeline.py @@ -10,7 +10,7 @@ class TestPipelines(unittest.TestCase): def setUp(self): self.testItem = items.Result() - def test_NonePipeline(self): + def test_none_pipeline(self): # Testing the pipeline that replaces the None values in items. self.testItem["value"] = "abc" pipe = pipelines.RemoveNonePipeline() @@ -23,7 +23,7 @@ class TestPipelines(unittest.TestCase): if key is not "value": self.assertIs(processed[key], "") - def test_DuplicatePipeline(self): + def test_duplicate_pipeline(self): # Testing the pipeline that removes duplicates. self.testItem["attribute"] = "test" self.testItem["value"] = "test" @@ -33,11 +33,11 @@ class TestPipelines(unittest.TestCase): self.assertEqual(pipe.process_item(self.testItem, spider.FourmiSpider()), self.testItem) self.assertRaises(DropItem, pipe.process_item, self.testItem, spider.FourmiSpider()) - otherItem = copy.deepcopy(self.testItem) - otherItem["value"] = "test1" - self.assertEqual(pipe.process_item(otherItem, spider.FourmiSpider()), otherItem) + other_item = copy.deepcopy(self.testItem) + other_item["value"] = "test1" + self.assertEqual(pipe.process_item(other_item, spider.FourmiSpider()), other_item) - def test_AttributeSelection(self): + def test_attribute_selection(self): # Testing the pipeline that selects attributes. item1 = copy.deepcopy(self.testItem) item2 = copy.deepcopy(self.testItem)