added scraping for generic info except for synonyms
This commit is contained in:
parent
50c79e3b1f
commit
98f58ea4e2
@ -18,6 +18,8 @@ class NIST(Source):
|
|||||||
|
|
||||||
requests = []
|
requests = []
|
||||||
|
|
||||||
|
requests.extend(self.parse_generic_info(sel))
|
||||||
|
|
||||||
symbol_table = {}
|
symbol_table = {}
|
||||||
tds = sel.xpath('//table[@class="symbol_table"]/tr/td')
|
tds = sel.xpath('//table[@class="symbol_table"]/tr/td')
|
||||||
for (symbol_td, name_td) in zip(tds[::2], tds[1::2]):
|
for (symbol_td, name_td) in zip(tds[::2], tds[1::2]):
|
||||||
@ -60,6 +62,41 @@ class NIST(Source):
|
|||||||
continue #Assume unsupported
|
continue #Assume unsupported
|
||||||
return requests
|
return requests
|
||||||
|
|
||||||
|
def parse_generic_info(self, sel):
|
||||||
|
ul = sel.xpath('body/ul[li/strong="IUPAC Standard InChI:"]')
|
||||||
|
li = ul.xpath('li')
|
||||||
|
|
||||||
|
data = {}
|
||||||
|
|
||||||
|
raw_formula = ul.xpath('li[strong/a="Formula"]//text()').extract()
|
||||||
|
data['Chemical formula'] = ''.join(raw_formula[2:]).strip()
|
||||||
|
|
||||||
|
raw_mol_weight = ul.xpath('li[strong/a="Molecular weight"]/text()')
|
||||||
|
data['Molecular weight'] = raw_mol_weight.extract()[0].strip()
|
||||||
|
|
||||||
|
raw_inchi = ul.xpath('li[strong="IUPAC Standard InChI:"]//tt/text()')
|
||||||
|
data['IUPAC Standard InChI'] = raw_inchi.extract()[0]
|
||||||
|
|
||||||
|
raw_inchikey = ul.xpath('li[strong="IUPAC Standard InChIKey:"]'
|
||||||
|
'/tt/text()')
|
||||||
|
data['IUPAC Standard InChIKey'] = raw_inchikey.extract()[0]
|
||||||
|
|
||||||
|
raw_cas_number = ul.xpath('li[strong="CAS Registry Number:"]/text()')
|
||||||
|
data['CAS Registry Number'] = raw_cas_number.extract()[0].strip()
|
||||||
|
|
||||||
|
requests = []
|
||||||
|
for key, value in data.iteritems():
|
||||||
|
result = Result({
|
||||||
|
'attribute': key,
|
||||||
|
'value': value,
|
||||||
|
'source': 'NIST',
|
||||||
|
'reliability': 'Unknown',
|
||||||
|
'conditions': ''
|
||||||
|
})
|
||||||
|
requests.append(result)
|
||||||
|
|
||||||
|
return requests
|
||||||
|
|
||||||
def parse_aggregate_data(self, table, symbol_table):
|
def parse_aggregate_data(self, table, symbol_table):
|
||||||
results = []
|
results = []
|
||||||
for tr in table.xpath('tr[td]'):
|
for tr in table.xpath('tr[td]'):
|
||||||
|
Reference in New Issue
Block a user