Added the configuration of the scrapy settings as a new module
This commit is contained in:
parent
007549aad8
commit
90129f41cc
46
fourmi.py
46
fourmi.py
@ -30,6 +30,7 @@ from scrapy.utils.project import get_project_settings
|
||||
import docopt
|
||||
|
||||
from FourmiCrawler.spider import FourmiSpider
|
||||
from utils.configurator import Configurator
|
||||
from utils.sourceloader import SourceLoader
|
||||
|
||||
|
||||
@ -50,53 +51,16 @@ def setup_crawler(compound, settings, source_loader, attributes):
|
||||
crawler.start()
|
||||
|
||||
|
||||
def scrapy_settings_manipulation(docopt_arguments):
|
||||
"""
|
||||
This function manipulates the Scrapy settings that normally would be set in the settings file. In the Fourmi
|
||||
project these are command line arguments.
|
||||
:param docopt_arguments: A dictionary generated by docopt containing all CLI arguments.
|
||||
"""
|
||||
settings = get_project_settings()
|
||||
|
||||
if docopt_arguments["--output"] != 'result.*format*':
|
||||
settings.overrides["FEED_URI"] = docopt_arguments["--output"]
|
||||
elif docopt_arguments["--format"] == "jsonlines":
|
||||
settings.overrides["FEED_URI"] = "results.json"
|
||||
elif docopt_arguments["--format"] is not None:
|
||||
settings.overrides["FEED_URI"] = "results." + docopt_arguments["--format"]
|
||||
|
||||
if docopt_arguments["--format"] is not None:
|
||||
settings.overrides["FEED_FORMAT"] = docopt_arguments["--format"]
|
||||
|
||||
return settings
|
||||
|
||||
|
||||
def start_log(docopt_arguments):
|
||||
"""
|
||||
This function starts the logging functionality of Scrapy using the settings given by the CLI.
|
||||
:param docopt_arguments: A dictionary generated by docopt containing all CLI arguments.
|
||||
"""
|
||||
if docopt_arguments["--log"] is not None:
|
||||
if docopt_arguments["--verbose"]:
|
||||
log.start(logfile=docopt_arguments["--log"], logstdout=False, loglevel=log.DEBUG)
|
||||
else:
|
||||
log.start(logfile=docopt_arguments["--log"], logstdout=True, loglevel=log.WARNING)
|
||||
else:
|
||||
if docopt_arguments["--verbose"]:
|
||||
log.start(logstdout=False, loglevel=log.DEBUG)
|
||||
else:
|
||||
log.start(logstdout=True, loglevel=log.WARNING)
|
||||
|
||||
|
||||
def search(docopt_arguments, source_loader):
|
||||
"""
|
||||
The function that facilitates the search for a specific compound.
|
||||
:param docopt_arguments: A dictionary generated by docopt containing all CLI arguments.
|
||||
:param source_loader: An initiated SourceLoader object pointed at the directory with the sources.
|
||||
"""
|
||||
start_log(docopt_arguments)
|
||||
settings = scrapy_settings_manipulation(docopt_arguments)
|
||||
setup_crawler(docopt_arguments["<compound>"], settings, source_loader, docopt_arguments["--attributes"].split(','))
|
||||
conf = Configurator()
|
||||
conf.start_log(docopt_arguments["--log"], docopt_arguments["--verbose"])
|
||||
conf.set_output(docopt_arguments["--output"], docopt_arguments["--format"])
|
||||
setup_crawler(docopt_arguments["<compound>"], conf.scrapy_settings, source_loader, docopt_arguments["--attributes"].split(','))
|
||||
reactor.run()
|
||||
|
||||
|
||||
|
43
utils/configurator.py
Normal file
43
utils/configurator.py
Normal file
@ -0,0 +1,43 @@
|
||||
from scrapy import log
|
||||
from scrapy.utils.project import get_project_settings
|
||||
|
||||
|
||||
class Configurator:
|
||||
|
||||
def __init__(self):
|
||||
self.scrapy_settings = get_project_settings()
|
||||
|
||||
|
||||
def set_output(self, filename, format):
|
||||
"""
|
||||
This function manipulates the Scrapy settings that normally would be set in the settings file. In the Fourmi
|
||||
project these are command line arguments.
|
||||
:param docopt_arguments: A dictionary generated by docopt containing all CLI arguments.
|
||||
"""
|
||||
|
||||
if filename != 'result.*format*':
|
||||
self.scrapy_settings.overrides["FEED_URI"] = format
|
||||
elif format == "jsonlines":
|
||||
self.scrapy_settings.overrides["FEED_URI"] = "results.json"
|
||||
elif format is not None:
|
||||
self.scrapy_settings.overrides["FEED_URI"] = "results." + format
|
||||
|
||||
if format is not None:
|
||||
self.scrapy_settings.overrides["FEED_FORMAT"] = format
|
||||
|
||||
|
||||
def start_log(self, logfile, verbose):
|
||||
"""
|
||||
This function starts the logging functionality of Scrapy using the settings given by the CLI.
|
||||
:param docopt_arguments: A dictionary generated by docopt containing all CLI arguments.
|
||||
"""
|
||||
if logfile is not None:
|
||||
if verbose:
|
||||
log.start(logfile=logfile, logstdout=False, loglevel=log.DEBUG)
|
||||
else:
|
||||
log.start(logfile=logfile, logstdout=True, loglevel=log.WARNING)
|
||||
else:
|
||||
if verbose:
|
||||
log.start(logstdout=False, loglevel=log.DEBUG)
|
||||
else:
|
||||
log.start(logstdout=True, loglevel=log.WARNING)
|
Reference in New Issue
Block a user