Archived
1
0

Refractored classname

This commit is contained in:
Jip J. Dekker 2014-04-23 15:57:10 +02:00
parent e18e4b4b26
commit 90f03734a6
4 changed files with 7 additions and 7 deletions

View File

@ -1,4 +1,4 @@
from source import Parser from source import Source
from scrapy import log from scrapy import log
from scrapy.http import Request from scrapy.http import Request
from scrapy.selector import Selector from scrapy.selector import Selector
@ -8,7 +8,7 @@ import re
# [TODO] - Maybe clean up usage of '.extract()[0]', because of possible IndexError exception. # [TODO] - Maybe clean up usage of '.extract()[0]', because of possible IndexError exception.
class ChemSpider(Parser): class ChemSpider(Source):
"""ChemSpider scraper for synonyms and properties """ChemSpider scraper for synonyms and properties
This parser will manage searching for chemicals through the This parser will manage searching for chemicals through the
@ -18,7 +18,7 @@ class ChemSpider(Parser):
""" """
def __init__(self): def __init__(self):
Parser.__init__(self) Source.__init__(self)
website = 'http://www.chemspider.com/*' website = 'http://www.chemspider.com/*'

View File

@ -2,7 +2,7 @@ from scrapy import log
# from scrapy.http import Request # from scrapy.http import Request
class Parser: class Source:
website = "http://something/*" # Regex of URI's the source is able to parse website = "http://something/*" # Regex of URI's the source is able to parse
_spider = None _spider = None

View File

@ -15,7 +15,7 @@ class FourmiSpider(Spider):
def parse(self, reponse): def parse(self, reponse):
for parser in self.__parsers: for parser in self.__parsers:
if re.match(parser.website, reponse.url): if re.match(parser.website, reponse.url):
log.msg("Url: " + reponse.url + " -> Parser: " + parser.website, level=log.DEBUG) log.msg("Url: " + reponse.url + " -> Source: " + parser.website, level=log.DEBUG)
return parser.parse(reponse) return parser.parse(reponse)
return None return None

View File

@ -1,7 +1,7 @@
import inspect import inspect
import os import os
import re import re
from FourmiCrawler.sources.source import Parser from FourmiCrawler.sources.source import Source
class SourceLoader: class SourceLoader:
@ -16,7 +16,7 @@ class SourceLoader:
mod = __import__('.'.join([rel_dir.replace("/", "."), py]), fromlist=[py]) mod = __import__('.'.join([rel_dir.replace("/", "."), py]), fromlist=[py])
classes = [getattr(mod, x) for x in dir(mod) if inspect.isclass(getattr(mod, x))] classes = [getattr(mod, x) for x in dir(mod) if inspect.isclass(getattr(mod, x))]
for cls in classes: for cls in classes:
if issubclass(cls, Parser) and cls not in known_parser: if issubclass(cls, Source) and cls not in known_parser:
self.sources.append(cls()) # [review] - Would we ever need arguments for the parsers? self.sources.append(cls()) # [review] - Would we ever need arguments for the parsers?
known_parser.add(cls) known_parser.add(cls)