diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py index 6e884ef..0191fee 100644 --- a/FourmiCrawler/sources/NIST.py +++ b/FourmiCrawler/sources/NIST.py @@ -3,7 +3,7 @@ from scrapy import log from scrapy.http import Request from scrapy.selector import Selector from FourmiCrawler.items import Result - +import re class NIST(Source): website = "http://webbook.nist.gov/*" @@ -35,6 +35,8 @@ class NIST(Source): elif tables.xpath('tr/th="Initial Phase"').extract()[0] == '1': log.msg('NIST table; Enthalpy/entropy of phase transition', level=log.DEBUG) + requests.extend( + self.parse_transition_data(tables, symbol_table)) elif tables.xpath('tr[1]/td'): log.msg('NIST table: Horizontal table', level=log.DEBUG) elif (tables.xpath('@summary').extract()[0] == @@ -70,6 +72,28 @@ class NIST(Source): results.append(result) return results + @staticmethod + def parse_transition_data(table, symbol_table): + results = [] + + name = table.xpath('@summary').extract()[0] + unit = table.xpath('tr[1]/th[1]/node()').extract()[-1][2:-1] + + for tr in table.xpath('tr[td]'): + tds = tr.xpath('td/text()').extract() + result = Result({ + 'attribute': name, + 'value': tds[0] + ' ' + unit, + 'source': 'NIST', + 'reliability': 'Unknown', + 'conditions': '%s K, (%s -> %s)' % (tds[1], tds[2], tds[3]) + }) + log.msg('NIST: |%s|' % result, level=log.DEBUG) + results.append(result) + + + return results + def new_compound_request(self, compound): return Request(url=self.website[:-1] + self.search % compound, callback=self.parse)