From 4f2c046c9c1d8d7a37e9d8214ec59ead11e1b95c Mon Sep 17 00:00:00 2001 From: RTB Date: Thu, 17 Apr 2014 22:06:45 +0200 Subject: [PATCH] rewrote parse_synonyms and new_synonym to use an internal dictionary structure --- FourmiCrawler/parsers/ChemSpider.py | 20 +++++++++----------- 1 file changed, 9 insertions(+), 11 deletions(-) diff --git a/FourmiCrawler/parsers/ChemSpider.py b/FourmiCrawler/parsers/ChemSpider.py index 8dc0103..7141839 100644 --- a/FourmiCrawler/parsers/ChemSpider.py +++ b/FourmiCrawler/parsers/ChemSpider.py @@ -78,31 +78,29 @@ class ChemSpider(Parser): synonyms = [] for syn in sel.xpath('//p[@class="syn"][strong]'): name = syn.xpath('strong/text()').extract()[0] - synonyms.append(self.new_synonym(syn, name, 'high')) + synonyms.append(self.new_synonym(syn, name, 'expert')) for syn in sel.xpath( '//p[@class="syn"][span[@class="synonym_confirmed"]]'): name = syn.xpath( 'span[@class="synonym_confirmed"]/text()').extract()[0] - synonyms.append(self.new_synonym(syn, name, 'medium')) + synonyms.append(self.new_synonym(syn, name, 'user')) for syn in sel.xpath('//p[@class="syn"][span[@class=""]]'): name = syn.xpath('span[@class=""]/text()').extract()[0] - synonyms.append(self.new_synonym(syn, name, 'low')) + synonyms.append(self.new_synonym(syn, name, 'nonvalidated')) for synonym in synonyms: - if synonym['reliability'] == 'high': - self._Parser__spider.get_synonym_requests(synonym['value']) + if synonym['category'] == 'expert': + self._Parser__spider.get_synonym_requests(synonym['name']) return requests def new_synonym(self, sel, name, reliability): log.msg('CS synonym: %s (%s)' % (name, reliability), level=log.DEBUG) self.ignore_list.append(name) - synonym = Result() - synonym['attribute'] = 'synonym' - synonym['value'] = name - synonym['source'] = 'ChemSpider' - synonym['reliability'] = reliability - synonym['conditions'] = '' + synonym = { + 'name': name, + 'category': category, + } return synonym def parse_extendedinfo(self, response):