From 7fc980befe42132ba8468dfa9b6f19be5754ba0d Mon Sep 17 00:00:00 2001 From: Rob tB Date: Wed, 16 Apr 2014 15:02:37 +0200 Subject: [PATCH] chemspider should now only generate new Requests for wikipedia links from 'expert confirmed' synonyms --- FourmiCrawler/parsers/ChemSpider.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/FourmiCrawler/parsers/ChemSpider.py b/FourmiCrawler/parsers/ChemSpider.py index 4c45c92..a36a42f 100644 --- a/FourmiCrawler/parsers/ChemSpider.py +++ b/FourmiCrawler/parsers/ChemSpider.py @@ -25,7 +25,7 @@ class ChemSpider(Parser): requests.extend(requests_synonyms) requests_properties = self.parse_properties(sel) requests.extend(requests_properties) - for wiki_url in sel.xpath('.//a[@title="Wiki"]/@href').extract(): + for wiki_url in sel.xpath('.//p[@class="syn"][strong]/a[@title="Wiki"]/@href').extract(): requests.append( Request(url=wiki_url) ) return requests