added function to scrape transition tables
This commit is contained in:
parent
85595ecf35
commit
10dd74e026
@ -3,7 +3,7 @@ from scrapy import log
|
||||
from scrapy.http import Request
|
||||
from scrapy.selector import Selector
|
||||
from FourmiCrawler.items import Result
|
||||
|
||||
import re
|
||||
|
||||
class NIST(Source):
|
||||
website = "http://webbook.nist.gov/*"
|
||||
@ -35,6 +35,8 @@ class NIST(Source):
|
||||
elif tables.xpath('tr/th="Initial Phase"').extract()[0] == '1':
|
||||
log.msg('NIST table; Enthalpy/entropy of phase transition',
|
||||
level=log.DEBUG)
|
||||
requests.extend(
|
||||
self.parse_transition_data(tables, symbol_table))
|
||||
elif tables.xpath('tr[1]/td'):
|
||||
log.msg('NIST table: Horizontal table', level=log.DEBUG)
|
||||
elif (tables.xpath('@summary').extract()[0] ==
|
||||
@ -70,6 +72,28 @@ class NIST(Source):
|
||||
results.append(result)
|
||||
return results
|
||||
|
||||
@staticmethod
|
||||
def parse_transition_data(table, symbol_table):
|
||||
results = []
|
||||
|
||||
name = table.xpath('@summary').extract()[0]
|
||||
unit = table.xpath('tr[1]/th[1]/node()').extract()[-1][2:-1]
|
||||
|
||||
for tr in table.xpath('tr[td]'):
|
||||
tds = tr.xpath('td/text()').extract()
|
||||
result = Result({
|
||||
'attribute': name,
|
||||
'value': tds[0] + ' ' + unit,
|
||||
'source': 'NIST',
|
||||
'reliability': 'Unknown',
|
||||
'conditions': '%s K, (%s -> %s)' % (tds[1], tds[2], tds[3])
|
||||
})
|
||||
log.msg('NIST: |%s|' % result, level=log.DEBUG)
|
||||
results.append(result)
|
||||
|
||||
|
||||
return results
|
||||
|
||||
def new_compound_request(self, compound):
|
||||
return Request(url=self.website[:-1] + self.search % compound,
|
||||
callback=self.parse)
|
||||
|
Reference in New Issue
Block a user