From 930eb6cad588d49a46b2dea51d0cbe72565c4763 Mon Sep 17 00:00:00 2001 From: Rob tB Date: Sun, 4 May 2014 21:20:46 +0200 Subject: [PATCH] NIST now scrapes the symbol table for later use --- FourmiCrawler/sources/NIST.py | 13 +++++++++++-- 1 file changed, 11 insertions(+), 2 deletions(-) diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py index 2c4337c..44a8037 100644 --- a/FourmiCrawler/sources/NIST.py +++ b/FourmiCrawler/sources/NIST.py @@ -13,8 +13,17 @@ class NIST(Source): def __init__(self): Source.__init__(self) - def parse(self, reponse): - pass + def parse(self, response): + sel = Selector(response) + + symbol_table = {} + tds = sel.xpath('//table[@class="symbol_table"]/tr/td') + for (symbol_td, name_td) in zip(tds[::2], tds[1::2]): + symbol = ''.join(symbol_td.xpath('node()').extract()) + name = name_td.xpath('text()').extract()[0] + symbol_table[symbol] = name + log.msg('NIST symbol: |%s|, name: |%s|' % (symbol, name), + level=log.DEBUG) def new_compound_request(self, compound): return Request(url=self.website[:-1] + self.search % compound,