Archived
1
0
This repository has been archived on 2025-03-03. You can view files and clone it, but cannot push or open issues or pull requests.
Fourmi/FourmiCrawler/settings.py
2014-05-08 15:45:42 +02:00

25 lines
665 B
Python

# Scrapy settings for Fourmi project
#
# For simplicity, this file contains only the most important settings by
# default. All the other settings are documented here:
#
# http://doc.scrapy.org/en/latest/topics/settings.html
#
BOT_NAME = 'FourmiCrawler'
SPIDER_MODULES = ['FourmiCrawler']
NEWSPIDER_MODULE = 'FourmiCrawler'
ITEM_PIPELINES = {
'FourmiCrawler.pipelines.AttributeSelectionPipeline': 100,
'FourmiCrawler.pipelines.DuplicatePipeline': 200,
}
FEED_URI = 'results.json'
FEED_FORMAT = 'jsonlines'
# Crawl responsibly by identifying yourself (and your website) on the
# user-agent
# USER_AGENT = 'FourmiCrawler (+http://www.yourdomain.com)'