fixed comments
This commit is contained in:
parent
150fc5bea7
commit
9cbdf57238
@ -22,9 +22,10 @@ class WikipediaParser(Parser):
|
|||||||
pass
|
pass
|
||||||
|
|
||||||
def parse(self, response):
|
def parse(self, response):
|
||||||
|
""" Distributes the above described behaviour """
|
||||||
log.msg('A response from %s just arrived!' % response.url, level=log.DEBUG)
|
log.msg('A response from %s just arrived!' % response.url, level=log.DEBUG)
|
||||||
sel = Selector(response)
|
sel = Selector(response)
|
||||||
compound = sel.xpath('//h1[@id="firstHeading"]//span/text()').extract()[0]
|
compound = sel.xpath('//h1[@id="firstHeading"]//span/text()').extract()[0] # makes sure to use main page
|
||||||
if compound in self.searched_compounds:
|
if compound in self.searched_compounds:
|
||||||
return None
|
return None
|
||||||
else:
|
else:
|
||||||
@ -33,7 +34,7 @@ class WikipediaParser(Parser):
|
|||||||
return items
|
return items
|
||||||
|
|
||||||
def parse_infobox(self, sel):
|
def parse_infobox(self, sel):
|
||||||
#scrape data from infobox on wikipedia.
|
""" scrape data from infobox on wikipedia. """
|
||||||
items = []
|
items = []
|
||||||
|
|
||||||
#be sure to get both chembox (wikipedia template) and drugbox (wikipedia template) to scrape
|
#be sure to get both chembox (wikipedia template) and drugbox (wikipedia template) to scrape
|
||||||
@ -78,7 +79,7 @@ class WikipediaParser(Parser):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def cleanitems(items):
|
def cleanitems(items):
|
||||||
#clean up properties using regex, makes it possible to split the values from the units
|
""" clean up properties using regex, makes it possible to split the values from the units """
|
||||||
for item in items:
|
for item in items:
|
||||||
value = item['value']
|
value = item['value']
|
||||||
m = re.search('F;\s(\d+[\.,]?\d*)', value) # clean up numerical Kelvin value (after F)
|
m = re.search('F;\s(\d+[\.,]?\d*)', value) # clean up numerical Kelvin value (after F)
|
||||||
@ -91,7 +92,7 @@ class WikipediaParser(Parser):
|
|||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def get_identifiers(sel):
|
def get_identifiers(sel):
|
||||||
#find external links, named 'Identifiers' to different sources.
|
""" find external links, named 'Identifiers' to different sources. """
|
||||||
links = sel.xpath('//span[contains(concat(" ",normalize-space(@class)," "),"reflink")]/a'
|
links = sel.xpath('//span[contains(concat(" ",normalize-space(@class)," "),"reflink")]/a'
|
||||||
'[contains(concat(" ",normalize-space(@class)," "),"external")]/@href').extract()
|
'[contains(concat(" ",normalize-space(@class)," "),"external")]/@href').extract()
|
||||||
return links
|
return links
|
Reference in New Issue
Block a user