Added documentation to the pipeline.
This commit is contained in:
parent
c705133194
commit
2e654255c5
@ -31,6 +31,13 @@ class AttributeSelectionPipeline(object):
|
||||
pass;
|
||||
|
||||
def process_item(self, item, spider):
|
||||
"""
|
||||
The items are processed using the selected attribute list available in the spider,
|
||||
items that don't match the selected items are dropped.
|
||||
:param item: The incoming item
|
||||
:param spider: The spider which scraped the item. Should have an attribute "selected_attributes".
|
||||
:return: :raise DropItem: Returns item if it matches an selected attribute, else it is dropped.
|
||||
"""
|
||||
if [x for x in spider.selected_attributes if re.match(x, item["attribute"])]:
|
||||
return item
|
||||
else:
|
||||
|
@ -22,7 +22,6 @@ Options:
|
||||
--include=<regex> Include only sources that match these regular expressions split by a comma.
|
||||
--exclude=<regex> Exclude the sources that match these regular expressions split by a comma.
|
||||
"""
|
||||
import re
|
||||
|
||||
from twisted.internet import reactor
|
||||
from scrapy.crawler import Crawler
|
||||
|
Reference in New Issue
Block a user