NIST scraper now handles urls with individual data points

2014-05-09 13:00:22 +02:00 · 2014-05-09 13:00:22 +02:00 · 775a920b9b
commit 775a920b9b
parent 5e067fd572
1 changed files with 24 additions and 1 deletions
--- a/FourmiCrawler/sources/NIST.py
+++ b/FourmiCrawler/sources/NIST.py
@ -152,7 +152,30 @@ class NIST(Source):
        return results
    def parse_individual_datapoints(self, response):
-        pass
+        sel = Selector(response)
        table = sel.xpath('//table[@class="data"]')[0]
        results = []
        name = table.xpath('@summary').extract()[0]
        tr_unit = ''.join(table.xpath('tr[1]/th[1]/node()').extract())
        m = re.search(r'\((.*)\)', tr_unit)
        unit = '!'
        if m:
            unit = m.group(1)
        for tr in table.xpath('tr[td]'):
            tds = tr.xpath('td/text()').extract()
            result = Result({
                'attribute': name,
                'value': '%s %s' % (tds[0], unit),
                'source': 'NIST',
                'reliability': 'Unknown',
                'conditions': ''
            })
            results.append(result)
        return results
    def new_compound_request(self, compound):
        return Request(url=self.website[:-1] + self.search % compound,