From fb41d772f203b420784582732ea64fd45d96c51d Mon Sep 17 00:00:00 2001 From: Nout van Deijck Date: Wed, 21 May 2014 16:11:02 +0200 Subject: [PATCH] Added custom user-agent because otherwise it would block, because not amused by scraper --- FourmiCrawler/settings.py | 2 ++ 1 file changed, 2 insertions(+) diff --git a/FourmiCrawler/settings.py b/FourmiCrawler/settings.py index be91fef..490a3a5 100644 --- a/FourmiCrawler/settings.py +++ b/FourmiCrawler/settings.py @@ -16,6 +16,8 @@ ITEM_PIPELINES = { FEED_URI = 'results.json' FEED_FORMAT = 'jsonlines' +USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36' + # Crawl responsibly by identifying yourself (and your website) on the # user-agent