Archived
1
0

added ignore list

This commit is contained in:
RTB 2014-05-17 14:09:10 +02:00
parent 98f58ea4e2
commit 56ee6b1ad3

View File

@ -5,11 +5,15 @@ from scrapy.selector import Selector
from FourmiCrawler.items import Result
import re
# [TODO]: values can be '128.', perhaps remove the dot in that case?
class NIST(Source):
website = "http://webbook.nist.gov/*"
search = 'cgi/cbook.cgi?Name=%s&Units=SI&cTP=on'
ignore_list = set()
def __init__(self):
Source.__init__(self)
@ -235,5 +239,7 @@ class NIST(Source):
return results
def new_compound_request(self, compound):
return Request(url=self.website[:-1] + self.search % compound,
callback=self.parse)
if compound not in self.ignore_list:
self.ignore_list.update(compound)
return Request(url=self.website[:-1] + self.search % compound,
callback=self.parse)