Archived
1
0

went to a general loop over all values, this way getting all elements from the Wikipedia infobox (except for those with a colspan, because these mess up)

This commit is contained in:
Bas Vb 2014-04-16 14:56:32 +02:00
parent f1280dd66d
commit ce3105f3c1

View File

@ -19,18 +19,22 @@ class WikipediaParser(Parser):
print response.url print response.url
#self.log('A response from %s just arrived!' % response.url) #self.log('A response from %s just arrived!' % response.url)
sel = Selector(response) sel = Selector(response)
items = [] items = self.parse_infobox(sel)
density = self.getdensity(sel) return items
items.append(density)
meltingpoint = self.getmeltingpoint(sel) def parse_infobox(self, sel):
items.append(meltingpoint) items=[]
boilingpoint = self.getboilingpoint(sel) tr_list = sel.xpath('.//table[@class="infobox bordered"]//td[not(@colspan)]').xpath('normalize-space(string())')
chemlink = self.getchemspider(sel) prop_names = tr_list[::2]
items.append(boilingpoint) prop_values = tr_list[1::2]
heatcapacity = self.getheatcapacity(sel) for i, prop_name in enumerate(prop_names):
items.append(heatcapacity) item = Result()
molarentropy = self.getmolarentropy(sel) item['attribute'] = prop_name.extract().encode('utf-8')
items.append(molarentropy) item['value'] = prop_values[i].extract().encode('utf-8')
item['source'] = "Wikipedia"
items.append(item)
print "new: " + item['attribute']
print item['value']
return items return items
def new_compound_request(self, compound): def new_compound_request(self, compound):