Archived
1
0
This repository has been archived on 2025-03-03. You can view files and clone it, but cannot push or open issues or pull requests.

31 lines
995 B
Python

from source import Source
from scrapy import log
from scrapy.http import Request
from scrapy.selector import Selector
from FourmiCrawler.items import Result
class NIST(Source):
website = "http://webbook.nist.gov/*"
search = 'cgi/cbook.cgi?Name=%s&Units=SI&cTG=on&cTC=on&cTP=on'
def __init__(self):
Source.__init__(self)
def parse(self, response):
sel = Selector(response)
symbol_table = {}
tds = sel.xpath('//table[@class="symbol_table"]/tr/td')
for (symbol_td, name_td) in zip(tds[::2], tds[1::2]):
symbol = ''.join(symbol_td.xpath('node()').extract())
name = name_td.xpath('text()').extract()[0]
symbol_table[symbol] = name
log.msg('NIST symbol: |%s|, name: |%s|' % (symbol, name),
level=log.DEBUG)
def new_compound_request(self, compound):
return Request(url=self.website[:-1] + self.search % compound,
callback=self.parse)