Archived
1
0

Added documentation to the pipeline.

This commit is contained in:
Jip J. Dekker 2014-05-08 15:35:18 +02:00
parent c705133194
commit 2e654255c5
2 changed files with 7 additions and 1 deletions

View File

@ -31,6 +31,13 @@ class AttributeSelectionPipeline(object):
pass;
def process_item(self, item, spider):
"""
The items are processed using the selected attribute list available in the spider,
items that don't match the selected items are dropped.
:param item: The incoming item
:param spider: The spider which scraped the item. Should have an attribute "selected_attributes".
:return: :raise DropItem: Returns item if it matches an selected attribute, else it is dropped.
"""
if [x for x in spider.selected_attributes if re.match(x, item["attribute"])]:
return item
else:

View File

@ -22,7 +22,6 @@ Options:
--include=<regex> Include only sources that match these regular expressions split by a comma.
--exclude=<regex> Exclude the sources that match these regular expressions split by a comma.
"""
import re
from twisted.internet import reactor
from scrapy.crawler import Crawler