added ignore list
This commit is contained in:
parent
98f58ea4e2
commit
56ee6b1ad3
@ -5,11 +5,15 @@ from scrapy.selector import Selector
|
|||||||
from FourmiCrawler.items import Result
|
from FourmiCrawler.items import Result
|
||||||
import re
|
import re
|
||||||
|
|
||||||
|
# [TODO]: values can be '128.', perhaps remove the dot in that case?
|
||||||
|
|
||||||
class NIST(Source):
|
class NIST(Source):
|
||||||
website = "http://webbook.nist.gov/*"
|
website = "http://webbook.nist.gov/*"
|
||||||
|
|
||||||
search = 'cgi/cbook.cgi?Name=%s&Units=SI&cTP=on'
|
search = 'cgi/cbook.cgi?Name=%s&Units=SI&cTP=on'
|
||||||
|
|
||||||
|
ignore_list = set()
|
||||||
|
|
||||||
def __init__(self):
|
def __init__(self):
|
||||||
Source.__init__(self)
|
Source.__init__(self)
|
||||||
|
|
||||||
@ -235,5 +239,7 @@ class NIST(Source):
|
|||||||
return results
|
return results
|
||||||
|
|
||||||
def new_compound_request(self, compound):
|
def new_compound_request(self, compound):
|
||||||
|
if compound not in self.ignore_list:
|
||||||
|
self.ignore_list.update(compound)
|
||||||
return Request(url=self.website[:-1] + self.search % compound,
|
return Request(url=self.website[:-1] + self.search % compound,
|
||||||
callback=self.parse)
|
callback=self.parse)
|
||||||
|
Reference in New Issue
Block a user