From 0ad98905e3cfca43e34d92cede65cfa7ec727469 Mon Sep 17 00:00:00 2001 From: RTB Date: Sun, 13 Apr 2014 23:35:25 +0200 Subject: [PATCH] added scraping for wikipedia links in synonym tab --- FourmiCrawler/parsers/ChemSpider.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/FourmiCrawler/parsers/ChemSpider.py b/FourmiCrawler/parsers/ChemSpider.py index 01eb274..679e4ca 100644 --- a/FourmiCrawler/parsers/ChemSpider.py +++ b/FourmiCrawler/parsers/ChemSpider.py @@ -22,6 +22,9 @@ class ChemSpider(Parser): requests = [] requests_synonyms = self.parse_synonyms(sel) requests.extend(requests_synonyms) + for wiki_url in sel.xpath('.//a[@title="Wiki"]/@href').extract(): + requests.append( Request(url=wiki_url) ) + return requests def parse_synonyms(self, sel):