From 30f00b676d0b1396b538eb95e0127bea2393c12b Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 6 Jun 2014 20:16:25 +0200 Subject: [PATCH] updated parse to use newresult function --- FourmiCrawler/sources/WikipediaParser.py | 23 ++++++++--------------- 1 file changed, 8 insertions(+), 15 deletions(-) diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py index 34b51c0..781d08f 100644 --- a/FourmiCrawler/sources/WikipediaParser.py +++ b/FourmiCrawler/sources/WikipediaParser.py @@ -28,7 +28,6 @@ class WikipediaParser(Source): log.msg('Reliability not set for Wikipedia', level=log.WARNING) self.cfg['reliability'] = '' - def parse(self, response): """ Distributes the above described behaviour """ log.msg('A response from %s just arrived!' % response.url, level=log.DEBUG) @@ -51,13 +50,10 @@ class WikipediaParser(Source): prop_names = tr_list[::2] prop_values = tr_list[1::2] for i, prop_name in enumerate(prop_names): - item = Result({ - 'attribute': prop_name.extract().encode('utf-8'), - 'value': prop_values[i].extract().encode('utf-8'), - 'source': "Wikipedia", - 'reliability': "Unknown", - 'conditions': "" - }) + item = self.newresult( + attribute=prop_name.extract().encode('utf-8'), + value=prop_values[i].extract().encode('utf-8') + ) items.append(item) log.msg('Wiki prop: |%s| |%s| |%s|' % (item['attribute'], item['value'], item['source']), level=log.DEBUG) @@ -68,13 +64,10 @@ class WikipediaParser(Source): log.msg('item: %s' % tablerow.xpath('./th').xpath('normalize-space(string())'), level=log.DEBUG) if tablerow.xpath('./th').xpath('normalize-space(string())') and tablerow.xpath('./td').xpath( 'normalize-space(string())'): - item = Result({ - 'attribute': tablerow.xpath('./th').xpath('normalize-space(string())').extract()[0].encode('utf-8'), - 'value': tablerow.xpath('./td').xpath('normalize-space(string())').extract()[0].encode('utf-8'), - 'source': "Wikipedia", - 'reliability': "Unknown", - 'conditions': "" - }) + item = self.newresult( + attribute=tablerow.xpath('./th').xpath('normalize-space(string())').extract()[0].encode('utf-8'), + value=tablerow.xpath('./td').xpath('normalize-space(string())').extract()[0].encode('utf-8'), + ) items.append(item) log.msg( 'Wiki prop: |attribute: %s| |value: %s| |%s|' % (item['attribute'], item['value'], item['source']),