From 0da286c90753f021e33fa37c2e3bb27fe12b25c8 Mon Sep 17 00:00:00 2001 From: RTB Date: Tue, 8 Apr 2014 12:08:45 +0200 Subject: [PATCH] created basic structure of ChemSpider search parser --- FourmiCrawler/parsers/ChemSpider.py | 32 +++++++++++++++++++++++++++++ 1 file changed, 32 insertions(+) create mode 100644 FourmiCrawler/parsers/ChemSpider.py diff --git a/FourmiCrawler/parsers/ChemSpider.py b/FourmiCrawler/parsers/ChemSpider.py new file mode 100644 index 0000000..bd69e58 --- /dev/null +++ b/FourmiCrawler/parsers/ChemSpider.py @@ -0,0 +1,32 @@ +from scrapy import log +from scrapy.http import Request +from scrapy.selector import Selector +from FourmiCrawler.items import Result +from ChemSpider_token import TOKEN #TODO: move the token elsewhere + +""" +This parser will manage searching for chemicals through the ChemsSpider API, +and parsing the resulting ChemSpider page. +The token required for the API should be in a configuration file somewhere. +""" +class ChemSpider: + + website = "http://www.chemspider.com/*" + __spider = 'ChemSpider' + + search = "Search.asmx/SimpleSearch?query=%s&token=%s" + + print "ChemSpider start" + log.msg('chemspider start', level=log.DEBUG) + + def parse(self, response): + sel = Selector(response) + log.msg('chemspider parse', level=log.DEBUG) + print "ChemSpider parse" + pass + + def new_compound_request(self,compound): + searchurl = website[:-1] + search % (compound, TOKEN) + log.msg('chemspider compound', level=log.DEBUG) + print "ChemSpider compound" + return Request(url=searchurl, callback=self.parse)