Other occurences of website REGEX
This commit is contained in:
parent
a3e973ecad
commit
093eba8b04
@ -277,8 +277,8 @@ class ChemSpider(Source):
|
|||||||
log.msg('ChemSpider found multiple substances, taking first '
|
log.msg('ChemSpider found multiple substances, taking first '
|
||||||
'element', level=log.DEBUG)
|
'element', level=log.DEBUG)
|
||||||
csid = csids[0]
|
csid = csids[0]
|
||||||
structure_url = self.website[:-1] + self.structure % csid
|
structure_url = self.website[:-2].replace("\\", "") + self.structure % csid
|
||||||
extendedinfo_url = self.website[:-1] + self.extendedinfo % csid
|
extendedinfo_url = self.website[:-2].replace("\\", "") + self.extendedinfo % csid
|
||||||
log.msg('chemspider URL: %s' % structure_url, level=log.DEBUG)
|
log.msg('chemspider URL: %s' % structure_url, level=log.DEBUG)
|
||||||
return [Request(url=structure_url,
|
return [Request(url=structure_url,
|
||||||
callback=self.parse),
|
callback=self.parse),
|
||||||
|
@ -164,7 +164,7 @@ class NIST(Source):
|
|||||||
extra_data_url = tr.xpath('td[last()][a="Individual data points"]'
|
extra_data_url = tr.xpath('td[last()][a="Individual data points"]'
|
||||||
'/a/@href').extract()
|
'/a/@href').extract()
|
||||||
if extra_data_url:
|
if extra_data_url:
|
||||||
request = Request(url=self.website[:-1] + extra_data_url[0],
|
request = Request(url=self.website[:-2].replace("\\", "") + extra_data_url[0],
|
||||||
callback=self.parse_individual_datapoints)
|
callback=self.parse_individual_datapoints)
|
||||||
results.append(request)
|
results.append(request)
|
||||||
continue
|
continue
|
||||||
|
@ -51,7 +51,7 @@ class PubChem(Source):
|
|||||||
self._spider.get_synonym_requests(synonym)
|
self._spider.get_synonym_requests(synonym)
|
||||||
log.msg('Raw synonyms found: %s' % raw_synonyms, level=log.DEBUG)
|
log.msg('Raw synonyms found: %s' % raw_synonyms, level=log.DEBUG)
|
||||||
|
|
||||||
n = re.search(r'cid=(\d+)',response.url)
|
n = re.search(r'cid=(\d+)', response.url)
|
||||||
if n:
|
if n:
|
||||||
cid = n.group(1)
|
cid = n.group(1)
|
||||||
log.msg('cid: %s' % cid, level=log.DEBUG) #getting the right id of the compound with which it can reach
|
log.msg('cid: %s' % cid, level=log.DEBUG) #getting the right id of the compound with which it can reach
|
||||||
|
Reference in New Issue
Block a user