chemspider now scrapes for reference data on synonyms
This commit is contained in:
parent
119d48890d
commit
ae21fa7c67
@ -102,13 +102,26 @@ class ChemSpider(Parser):
|
|||||||
language = language[0][1:-1]
|
language = language[0][1:-1]
|
||||||
else:
|
else:
|
||||||
language = 'English'
|
language = 'English'
|
||||||
|
log.msg('CS synonym: %s (%s) (%s)' % (name, category, language),
|
||||||
|
level=log.DEBUG)
|
||||||
|
references = []
|
||||||
|
for ref in sel.xpath('span[@class="synonym_ref"]'):
|
||||||
|
refname = ref.xpath('normalize-space(string())')
|
||||||
|
references.append({'name': refname.extract()[0][1:-1], 'URI': ''})
|
||||||
|
for ref in sel.xpath('a[@class="synonym_ref"]'):
|
||||||
|
references.append({
|
||||||
|
'name': ref.xpath('@title').extract()[0],
|
||||||
|
'URI': ref.xpath('@href').extract()[0]
|
||||||
|
})
|
||||||
|
for ref in references:
|
||||||
|
log.msg('CS synonym ref: %s %s' % (ref['name'], ref['URI']),
|
||||||
|
level=log.DEBUG)
|
||||||
synonym = {
|
synonym = {
|
||||||
'name': name,
|
'name': name,
|
||||||
'category': category,
|
'category': category,
|
||||||
'language': language
|
'language': language,
|
||||||
|
'references': references
|
||||||
}
|
}
|
||||||
log.msg('CS synonym: %s (%s) (%s)' % (name, category, language),
|
|
||||||
level=log.DEBUG)
|
|
||||||
return synonym
|
return synonym
|
||||||
|
|
||||||
def parse_extendedinfo(self, response):
|
def parse_extendedinfo(self, response):
|
||||||
|
Reference in New Issue
Block a user