From bf4a5bb41f44448adfac478f961b0b5c2a2d3f37 Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 18 Apr 2014 13:36:33 +0200 Subject: [PATCH] added scraping of synonyms labeled as 'synonym_cn' --- FourmiCrawler/parsers/ChemSpider.py | 3 +++ 1 file changed, 3 insertions(+) diff --git a/FourmiCrawler/parsers/ChemSpider.py b/FourmiCrawler/parsers/ChemSpider.py index 7f01323..fd2b84c 100644 --- a/FourmiCrawler/parsers/ChemSpider.py +++ b/FourmiCrawler/parsers/ChemSpider.py @@ -74,6 +74,9 @@ class ChemSpider(Parser): def parse_synonyms(self, sel): requests = [] synonyms = [] + for syn in sel.xpath('//p[@class="syn"][span[@class="synonym_cn"]]'): + name = syn.xpath('span[@class="synonym_cn"]/text()').extract()[0] + synonyms.append(self.new_synonym(syn, name, 'expert')) for syn in sel.xpath('//p[@class="syn"][strong]'): name = syn.xpath('strong/text()').extract()[0] synonyms.append(self.new_synonym(syn, name, 'expert'))