From 3bf8dccf18d5d7f7b5a7d7b5138ffc82d51b310a Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 18 Apr 2014 14:59:56 +0200 Subject: [PATCH] properties from Predicted - ACD/Labs tab now include conditions from attribute variable --- FourmiCrawler/parsers/ChemSpider.py | 12 +++++++++--- 1 file changed, 9 insertions(+), 3 deletions(-) diff --git a/FourmiCrawler/parsers/ChemSpider.py b/FourmiCrawler/parsers/ChemSpider.py index 169e836..3c88728 100644 --- a/FourmiCrawler/parsers/ChemSpider.py +++ b/FourmiCrawler/parsers/ChemSpider.py @@ -38,15 +38,21 @@ class ChemSpider(Parser): prop_names = td_list[::2] prop_values = td_list[1::2] for (prop_name, prop_value) in zip(prop_names, prop_values): - prop_name = prop_name.extract().encode('utf-8') + prop_name = prop_name.extract().encode('utf-8')[:-1] prop_value = prop_value.extract().encode('utf-8') + prop_conditions = '' + + m = re.match(r'(.*) \((.*)\)', prop_name) + if m: + prop_name = m.group(1) + prop_conditions = m.group(2) new_prop = Result({ - 'attribute': prop_name[:-1], + 'attribute': prop_name, 'value': prop_value, 'source': 'ChemSpider Predicted - ACD/Labs Tab', 'reliability': 'Unknown', - 'conditions': '' + 'conditions': prop_conditions }) properties.append(new_prop) log.msg('CS prop: |%s| |%s| |%s|' \