From 775a920b9bc72f2d6f7e08624a0203af4f0b0a22 Mon Sep 17 00:00:00 2001 From: RTB Date: Fri, 9 May 2014 13:00:22 +0200 Subject: [PATCH] NIST scraper now handles urls with individual data points --- FourmiCrawler/sources/NIST.py | 25 ++++++++++++++++++++++++- 1 file changed, 24 insertions(+), 1 deletion(-) diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py index 1222c63..6ae6862 100644 --- a/FourmiCrawler/sources/NIST.py +++ b/FourmiCrawler/sources/NIST.py @@ -152,7 +152,30 @@ class NIST(Source): return results def parse_individual_datapoints(self, response): - pass + sel = Selector(response) + table = sel.xpath('//table[@class="data"]')[0] + + results = [] + + name = table.xpath('@summary').extract()[0] + tr_unit = ''.join(table.xpath('tr[1]/th[1]/node()').extract()) + m = re.search(r'\((.*)\)', tr_unit) + unit = '!' + if m: + unit = m.group(1) + + for tr in table.xpath('tr[td]'): + tds = tr.xpath('td/text()').extract() + result = Result({ + 'attribute': name, + 'value': '%s %s' % (tds[0], unit), + 'source': 'NIST', + 'reliability': 'Unknown', + 'conditions': '' + }) + results.append(result) + + return results def new_compound_request(self, compound): return Request(url=self.website[:-1] + self.search % compound,