From 2ac6d1711d9665cab322c8c201dca8046abf4472 Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 18 Apr 2014 17:11:04 +0200 Subject: [PATCH] added comments for chemspider new_synonym --- FourmiCrawler/parsers/ChemSpider.py | 4 ++++ 1 file changed, 4 insertions(+) diff --git a/FourmiCrawler/parsers/ChemSpider.py b/FourmiCrawler/parsers/ChemSpider.py index 2d98544..6fde538 100644 --- a/FourmiCrawler/parsers/ChemSpider.py +++ b/FourmiCrawler/parsers/ChemSpider.py @@ -134,15 +134,19 @@ somewhere. return requests def new_synonym(self, sel, name, category): + """Scrape for a single synonym at a given HTML tag""" self.ignore_list.append(name) language = sel.xpath('span[@class="synonym_language"]/text()') if language: + # The [1:-1] is to remove brackets around the language name language = language.extract()[0][1:-1] else: + # If language is not given, English is assumed, TODO: confirm language = 'English' log.msg('CS synonym: %s (%s) (%s)' % (name, category, language), level=log.DEBUG) references = [] + # A synonym can have multiple references, each optionally with link for ref in sel.xpath('span[@class="synonym_ref"]'): refname = ref.xpath('normalize-space(string())') references.append({