From b5c83125f7f5fea42677761785598f4894c1a731 Mon Sep 17 00:00:00 2001 From: Nout van Deijck Date: Wed, 23 Apr 2014 12:27:53 +0200 Subject: [PATCH] Added extra request for chemspider link retreived from Wikipedia --- FourmiCrawler/parsers/WikipediaParser.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/FourmiCrawler/parsers/WikipediaParser.py b/FourmiCrawler/parsers/WikipediaParser.py index b339a73..b60b98d 100644 --- a/FourmiCrawler/parsers/WikipediaParser.py +++ b/FourmiCrawler/parsers/WikipediaParser.py @@ -46,7 +46,10 @@ class WikipediaParser(Parser): items.append(item) log.msg('Wiki prop: |%s| |%s| |%s|' % (item['attribute'], item['value'], item['source']), level=log.DEBUG) items=filter(lambda a: a['value']!='', items) #remove items with an empty value - return self.cleanitems(items) + itemlist=self.cleanitems(items) + request=Request(self.getchemspider(sel)) + itemlist.append(request) + return itemlist def new_compound_request(self, compound): return Request(url=self.website[:-1] + compound, callback=self.parse)