From 95e24f9c44778a18d764e6bfc09d99d16b2fdb87 Mon Sep 17 00:00:00 2001 From: RTB Date: Wed, 7 May 2014 17:09:42 +0200 Subject: [PATCH] added code to recognize various table formats --- FourmiCrawler/sources/NIST.py | 22 ++++++++++++++++++++++ 1 file changed, 22 insertions(+) diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py index cd049a0..37f8d04 100644 --- a/FourmiCrawler/sources/NIST.py +++ b/FourmiCrawler/sources/NIST.py @@ -25,6 +25,28 @@ class NIST(Source): log.msg('NIST symbol: |%s|, name: |%s|' % (symbol, name), level=log.DEBUG) + for tables in sel.xpath('//table[@class="data"]'): + if tables.xpath('@summary').extract()[0] == 'One dimensional data': + log.msg('NIST table: Aggregrate data', level=log.DEBUG) + elif tables.xpath('tr/th="Initial Phase"').extract()[0] == '1': + log.msg('NIST table; Enthalpy/entropy of phase transition', + level=log.DEBUG) + elif tables.xpath('tr[1]/td'): + log.msg('NIST table: Horizontal table', level=log.DEBUG) + elif (tables.xpath('@summary').extract()[0] == + 'Antoine Equation Parameters'): + log.msg('NIST table: Antoine Equation Parameters', + level=log.DEBUG) + elif len(tables.xpath('tr[1]/th')) == 5: + log.msg('NIST table: generic 5 columns', level=log.DEBUG) + # Symbol (unit) Temperature (K) Method Reference Comment + elif len(tables.xpath('tr[1]/th')) == 4: + log.msg('NIST table: generic 4 columns', level=log.DEBUG) + # Symbol (unit) Temperature (K) Reference Comment + else: + log.msg('NIST table: NOT SUPPORTED', level=log.WARNING) + continue #Assume unsupported + def new_compound_request(self, compound): return Request(url=self.website[:-1] + self.search % compound, callback=self.parse)