Archived
1
0

created basic structure of ChemSpider search parser

This commit is contained in:
RTB 2014-04-08 12:08:45 +02:00
parent 3a074467e6
commit 0da286c907

View File

@ -0,0 +1,32 @@
from scrapy import log
from scrapy.http import Request
from scrapy.selector import Selector
from FourmiCrawler.items import Result
from ChemSpider_token import TOKEN #TODO: move the token elsewhere
"""
This parser will manage searching for chemicals through the ChemsSpider API,
and parsing the resulting ChemSpider page.
The token required for the API should be in a configuration file somewhere.
"""
class ChemSpider:
website = "http://www.chemspider.com/*"
__spider = 'ChemSpider'
search = "Search.asmx/SimpleSearch?query=%s&token=%s"
print "ChemSpider start"
log.msg('chemspider start', level=log.DEBUG)
def parse(self, response):
sel = Selector(response)
log.msg('chemspider parse', level=log.DEBUG)
print "ChemSpider parse"
pass
def new_compound_request(self,compound):
searchurl = website[:-1] + search % (compound, TOKEN)
log.msg('chemspider compound', level=log.DEBUG)
print "ChemSpider compound"
return Request(url=searchurl, callback=self.parse)