Archived
1
0

added parse_searchrequest function

This commit is contained in:
RTB 2014-04-12 19:41:36 +02:00
parent 246463b450
commit 22fa67735d

View File

@ -15,12 +15,23 @@ class ChemSpider(Parser):
__spider = 'ChemSpider'
search = "Search.asmx/SimpleSearch?query=%s&token=052bfd06-5ce4-43d6-bf12-89eabefd2338"
structure = "Chemical-Structure.%s.html"
def parse(self, response):
sel = Selector(response)
log.msg('chemspider parse', level=log.WARNING)
def parse_searchrequest(self, response):
sel = Selector(response)
log.msg('chemspider parse_searchrequest', level=log.WARNING)
sel.register_namespace('cs', 'http://www.chemspider.com/')
csid = sel.xpath('.//cs:int/text()').extract()[0]
#TODO: handle multiple csids in case of vague search term
structure_url = self.website[:-1] + self.structure % csid
log.msg('chemspider URL: %s' % structure_url, level=log.WARNING)
return Request(structure_url, callback=self.parse)
def new_compound_request(self,compound):
searchurl = self.website[:-1] + self.search % compound
log.msg('chemspider compound', level=log.WARNING)
return Request(url=searchurl, callback=self.parse)
return Request(url=searchurl, callback=self.parse_searchrequest)