From 7abb491d3fdcd92661b9c7dc8a7ebfbd686e4cbb Mon Sep 17 00:00:00 2001 From: RTB Date: Wed, 7 May 2014 22:08:43 +0200 Subject: [PATCH] added function for most generic tables --- FourmiCrawler/sources/NIST.py | 24 ++++++++++++++++++++++++ 1 file changed, 24 insertions(+) diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py index 0191fee..5757546 100644 --- a/FourmiCrawler/sources/NIST.py +++ b/FourmiCrawler/sources/NIST.py @@ -46,9 +46,13 @@ class NIST(Source): elif len(tables.xpath('tr[1]/th')) == 5: log.msg('NIST table: generic 5 columns', level=log.DEBUG) # Symbol (unit) Temperature (K) Method Reference Comment + requests.extend( + self.parse_generic_data(tables)) elif len(tables.xpath('tr[1]/th')) == 4: log.msg('NIST table: generic 4 columns', level=log.DEBUG) # Symbol (unit) Temperature (K) Reference Comment + requests.extend( + self.parse_generic_data(tables)) else: log.msg('NIST table: NOT SUPPORTED', level=log.WARNING) continue #Assume unsupported @@ -94,6 +98,26 @@ class NIST(Source): return results + @staticmethod + def parse_generic_data(table): + results = [] + + name = table.xpath('@summary').extract()[0] + unit = table.xpath('tr[1]/th[1]/node()').extract()[-1][2:-1] + + for tr in table.xpath('tr[td]'): + tds = tr.xpath('td/text()').extract() + result = Result({ + 'attribute': name, + 'value': tds[0] + ' ' + unit, + 'source': 'NIST', + 'reliability': 'Unknown', + 'conditions': '%s K' % tds[1] + }) + log.msg('NIST: |%s|' % result, level=log.DEBUG) + results.append(result) + return results + def new_compound_request(self, compound): return Request(url=self.website[:-1] + self.search % compound, callback=self.parse)