diff --git a/FourmiCrawler/parsers/WikipediaParser.py b/FourmiCrawler/parsers/WikipediaParser.py index d88f4f1..b3dc36f 100644 --- a/FourmiCrawler/parsers/WikipediaParser.py +++ b/FourmiCrawler/parsers/WikipediaParser.py @@ -1,3 +1,30 @@ -__author__ = 'Bas' -__author__ = 'Nout' -#new branch \ No newline at end of file +import parser +from scrapy.selector import Selector +from FourmiCrawler.items import Result + +class WikipediaParser: + + website = "http://en.wikipedia.org/wiki/Methane" + __spider = "WikipediaParser" + + + #def __init__(self, csid): + # self.website = "http://en.wikipedia.org/wiki/{id}".format(id=csid) + + #def parse(self, response): + #self.log('A response from %s just arrived!' % response.url) + def parse(): + sel = Selector("http://en.wikipedia.org/wiki/Methane") + items = [] + item = Result() + item['attribute']="Melting point" + item['value']=site.xpath('//tr[contains(@href, "/wiki/Melting_point")]/text()').extract() + item['source']= self.website + items.append(item) + print item['attribute'] + print item['value'] + print item['source'] + print "test" + return items + + parse() \ No newline at end of file