From 7355de1b20b9444879f743e20f03533ed19f192b Mon Sep 17 00:00:00 2001 From: "Jip J. Dekker" Date: Tue, 18 Mar 2014 18:03:22 +0100 Subject: [PATCH] Added an simple script to run a spider --- Fourmi.py | 21 +++++++++++++++++++++ 1 file changed, 21 insertions(+) create mode 100644 Fourmi.py diff --git a/Fourmi.py b/Fourmi.py new file mode 100644 index 0000000..4ed2c95 --- /dev/null +++ b/Fourmi.py @@ -0,0 +1,21 @@ +""" +Fourmi - An internet webcrawler searching for information on chemical compounds. +[todo] - Add some more useful text here. +""" + +from twisted.internet import reactor +from scrapy.crawler import Crawler +from scrapy import log, signals +from FourmiCrawler.spiders.Chemspider import ChemspiderSpider # [review] - There should be an easy way to import all spiders! +from scrapy.utils.project import get_project_settings + +# [todo] - Add something to add all spiders, with the right references +spider = ChemspiderSpider(compound = "Aspirin") +settings = get_project_settings() +crawler = Crawler(settings) +crawler.signals.connect(reactor.stop, signal=signals.spider_closed) +crawler.configure() +crawler.crawl(spider) +crawler.start() +log.start() +reactor.run() \ No newline at end of file