added ignore list to new_compound_request for synonyms found by chemspider parser
This commit is contained in:
parent
b1b969a16c
commit
564dbc3292
@ -16,6 +16,8 @@ class ChemSpider(Parser):
|
||||
search = "Search.asmx/SimpleSearch?query=%s&token=052bfd06-5ce4-43d6-bf12-89eabefd2338"
|
||||
structure = "Chemical-Structure.%s.html"
|
||||
|
||||
ignore_list = []
|
||||
|
||||
def parse(self, response):
|
||||
sel = Selector(response)
|
||||
requests = []
|
||||
@ -36,6 +38,8 @@ class ChemSpider(Parser):
|
||||
for syn in sel.xpath('//p[@class="syn"]/span[@class=""]/text()').extract():
|
||||
synonyms.append( self.new_synonym( syn, 'low' ) )
|
||||
|
||||
self.ignore_list.extend(synonyms)
|
||||
|
||||
return requests
|
||||
|
||||
def new_synonym(self, name, reliability):
|
||||
@ -60,6 +64,8 @@ class ChemSpider(Parser):
|
||||
return Request(structure_url, callback=self.parse)
|
||||
|
||||
def new_compound_request(self,compound):
|
||||
if compound in self.ignore_list: #TODO: add regular expression
|
||||
return None
|
||||
searchurl = self.website[:-1] + self.search % compound
|
||||
log.msg('chemspider compound', level=log.WARNING)
|
||||
return Request(url=searchurl, callback=self.parse_searchrequest)
|
||||
|
Reference in New Issue
Block a user