created basic structure of ChemSpider search parser
This commit is contained in:
parent
3a074467e6
commit
0da286c907
32
FourmiCrawler/parsers/ChemSpider.py
Normal file
32
FourmiCrawler/parsers/ChemSpider.py
Normal file
@ -0,0 +1,32 @@
|
||||
from scrapy import log
|
||||
from scrapy.http import Request
|
||||
from scrapy.selector import Selector
|
||||
from FourmiCrawler.items import Result
|
||||
from ChemSpider_token import TOKEN #TODO: move the token elsewhere
|
||||
|
||||
"""
|
||||
This parser will manage searching for chemicals through the ChemsSpider API,
|
||||
and parsing the resulting ChemSpider page.
|
||||
The token required for the API should be in a configuration file somewhere.
|
||||
"""
|
||||
class ChemSpider:
|
||||
|
||||
website = "http://www.chemspider.com/*"
|
||||
__spider = 'ChemSpider'
|
||||
|
||||
search = "Search.asmx/SimpleSearch?query=%s&token=%s"
|
||||
|
||||
print "ChemSpider start"
|
||||
log.msg('chemspider start', level=log.DEBUG)
|
||||
|
||||
def parse(self, response):
|
||||
sel = Selector(response)
|
||||
log.msg('chemspider parse', level=log.DEBUG)
|
||||
print "ChemSpider parse"
|
||||
pass
|
||||
|
||||
def new_compound_request(self,compound):
|
||||
searchurl = website[:-1] + search % (compound, TOKEN)
|
||||
log.msg('chemspider compound', level=log.DEBUG)
|
||||
print "ChemSpider compound"
|
||||
return Request(url=searchurl, callback=self.parse)
|
Reference in New Issue
Block a user