Archived
1
0

added function to scrape transition tables

This commit is contained in:
RTB 2014-05-07 21:58:52 +02:00
parent 85595ecf35
commit 10dd74e026

View File

@ -3,7 +3,7 @@ from scrapy import log
from scrapy.http import Request
from scrapy.selector import Selector
from FourmiCrawler.items import Result
import re
class NIST(Source):
website = "http://webbook.nist.gov/*"
@ -35,6 +35,8 @@ class NIST(Source):
elif tables.xpath('tr/th="Initial Phase"').extract()[0] == '1':
log.msg('NIST table; Enthalpy/entropy of phase transition',
level=log.DEBUG)
requests.extend(
self.parse_transition_data(tables, symbol_table))
elif tables.xpath('tr[1]/td'):
log.msg('NIST table: Horizontal table', level=log.DEBUG)
elif (tables.xpath('@summary').extract()[0] ==
@ -70,6 +72,28 @@ class NIST(Source):
results.append(result)
return results
@staticmethod
def parse_transition_data(table, symbol_table):
results = []
name = table.xpath('@summary').extract()[0]
unit = table.xpath('tr[1]/th[1]/node()').extract()[-1][2:-1]
for tr in table.xpath('tr[td]'):
tds = tr.xpath('td/text()').extract()
result = Result({
'attribute': name,
'value': tds[0] + ' ' + unit,
'source': 'NIST',
'reliability': 'Unknown',
'conditions': '%s K, (%s -> %s)' % (tds[1], tds[2], tds[3])
})
log.msg('NIST: |%s|' % result, level=log.DEBUG)
results.append(result)
return results
def new_compound_request(self, compound):
return Request(url=self.website[:-1] + self.search % compound,
callback=self.parse)