diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py index 2c4337c..44a8037 100644 --- a/FourmiCrawler/sources/NIST.py +++ b/FourmiCrawler/sources/NIST.py @@ -13,8 +13,17 @@ class NIST(Source): def __init__(self): Source.__init__(self) - def parse(self, reponse): - pass + def parse(self, response): + sel = Selector(response) + + symbol_table = {} + tds = sel.xpath('//table[@class="symbol_table"]/tr/td') + for (symbol_td, name_td) in zip(tds[::2], tds[1::2]): + symbol = ''.join(symbol_td.xpath('node()').extract()) + name = name_td.xpath('text()').extract()[0] + symbol_table[symbol] = name + log.msg('NIST symbol: |%s|, name: |%s|' % (symbol, name), + level=log.DEBUG) def new_compound_request(self, compound): return Request(url=self.website[:-1] + self.search % compound,