From 1ca3593ae1c85f4dcdb758ec84e4714f15f927cb Mon Sep 17 00:00:00 2001 From: Bas Vb Date: Wed, 16 Apr 2014 00:35:19 +0200 Subject: [PATCH] Parse is runnable now. --- FourmiCrawler/parsers/WikipediaParser.py | 10 ++++++++-- 1 file changed, 8 insertions(+), 2 deletions(-) diff --git a/FourmiCrawler/parsers/WikipediaParser.py b/FourmiCrawler/parsers/WikipediaParser.py index 724fb79..784fccf 100644 --- a/FourmiCrawler/parsers/WikipediaParser.py +++ b/FourmiCrawler/parsers/WikipediaParser.py @@ -18,14 +18,20 @@ class WikipediaParser(Parser): items = [] item = Result() item['attribute']="Melting point" - item['value']="value1" # sel.xpath('//tr[contains(@href, "/wiki/Melting_point")]/text()').extract() + item['value']= sel.xpath('//tr/td/a[@title="Melting point"]/../../td[2]/text()').extract() # ('//tr[contains(@href, "/wiki/Melting_point")]/text()').extract() item['source']= "Wikipedia" items.append(item) print item['attribute'] print item['value'] print item['source'] - print "test" return items + def getmeltingpoint(self, sel): + item=Result() + item['attribute']="Melting point" + item['value']= sel.xpath('//tr/td/a[@title="Melting point"]/../../td[2]/text()').extract() # ('//tr[contains(@href, "/wiki/Melting_point")]/text()').extract() + item['source']= "Wikipedia" + return item + def new_compound_request(self, compound): return Request(url=self.website[:-1] + compound, callback=self.parse) \ No newline at end of file