Archived
1
0

now also (finally) scrapes property values and names, but not yet coupled together and not yet returned.

This commit is contained in:
Nout van Deijck 2014-06-02 09:26:36 +02:00
parent 8083d0c7bc
commit ba8f845178

View File

@ -60,12 +60,20 @@ class PubChem(Source):
log.msg('parsing data', level=log.DEBUG)
requests = []
sel = Selector(response)
# props = sel.xpath('.//div')
prop_values = sel.xpath('//div//a/text()').extract()
prop_names = sel.xpath('//div//a/ancestor::div/b/text()').extract()
print prop_values
print prop_names
# print props
return requests
# this (old) definition is only here to help myself
def parse_properties(self, sel):
""" scrape data from 'Chemical and Physical Properties' box on PubChem. """
items = []
@ -95,9 +103,9 @@ class PubChem(Source):
items = filter(lambda a: a['value'] != '', items) # remove items with an empty value
# item_list = self.clean_items(items)
return items
def new_compound_request(self, compound):
return Request(url=self.website_www[:-1] + self.search % compound, callback=self.parse)