diff --git a/FourmiCrawler/pipelines.py b/FourmiCrawler/pipelines.py index 2dfd531..1bcba3a 100644 --- a/FourmiCrawler/pipelines.py +++ b/FourmiCrawler/pipelines.py @@ -35,7 +35,7 @@ class DuplicatePipeline(object): """ value = (item['attribute'], item['value'], item['conditions']) if value in self.known_values: - raise DropItem("Duplicate item found: %s" % item) # #[todo] append sources of first item. + raise DropItem("Duplicate item found: %s" % item) #[todo] append sources of first item. else: self.known_values.add(value) return item diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py new file mode 100644 index 0000000..9500272 --- /dev/null +++ b/tests/test_pipeline.py @@ -0,0 +1,48 @@ +import copy +import unittest +from FourmiCrawler import pipelines, spider, items +from scrapy.exceptions import DropItem + + +class TestPipelines(unittest.TestCase): + + def setUp(self): + self.testItem = items.Result() + + def test_NonePipeline(self): + self.testItem["value"] = "abc" + pipe = pipelines.RemoveNonePipeline() + processed = pipe.process_item(self.testItem, spider.FourmiSpider()) + + self.assertTrue(processed["value"] == "abc") + + for key in self.testItem: + self.assertIsNotNone(processed[key]) + if key is not "value": + self.assertIs(processed[key], "") + + def test_DuplicatePipeline(self): + self.testItem["attribute"] = "test" + self.testItem["value"] = "test" + self.testItem["conditions"] = "test" + + pipe = pipelines.DuplicatePipeline() + self.assertEqual(pipe.process_item(self.testItem, spider.FourmiSpider()), self.testItem) + self.assertRaises(DropItem, pipe.process_item, self.testItem, spider.FourmiSpider()) + + otherItem = copy.deepcopy(self.testItem) + otherItem["value"] = "test1" + self.assertEqual(pipe.process_item(otherItem, spider.FourmiSpider()), otherItem) + + def test_AttributeSelection(self): + item1 = copy.deepcopy(self.testItem) + item2 = copy.deepcopy(self.testItem) + + item1["attribute"] = "abd" + item2["attribute"] = "abc" + + s = spider.FourmiSpider(selected_attributes=["a.d"]) + pipe = pipelines.AttributeSelectionPipeline() + + self.assertEqual(pipe.process_item(item1, s), item1) + self.assertRaises(DropItem, pipe.process_item, item2, s) \ No newline at end of file