Archived
1
0

Added the configuration of the scrapy settings as a new module

This commit is contained in:
Jip J. Dekker 2014-06-08 12:42:21 +02:00
parent 007549aad8
commit 90129f41cc
2 changed files with 48 additions and 41 deletions

View File

@ -30,6 +30,7 @@ from scrapy.utils.project import get_project_settings
import docopt import docopt
from FourmiCrawler.spider import FourmiSpider from FourmiCrawler.spider import FourmiSpider
from utils.configurator import Configurator
from utils.sourceloader import SourceLoader from utils.sourceloader import SourceLoader
@ -50,53 +51,16 @@ def setup_crawler(compound, settings, source_loader, attributes):
crawler.start() crawler.start()
def scrapy_settings_manipulation(docopt_arguments):
"""
This function manipulates the Scrapy settings that normally would be set in the settings file. In the Fourmi
project these are command line arguments.
:param docopt_arguments: A dictionary generated by docopt containing all CLI arguments.
"""
settings = get_project_settings()
if docopt_arguments["--output"] != 'result.*format*':
settings.overrides["FEED_URI"] = docopt_arguments["--output"]
elif docopt_arguments["--format"] == "jsonlines":
settings.overrides["FEED_URI"] = "results.json"
elif docopt_arguments["--format"] is not None:
settings.overrides["FEED_URI"] = "results." + docopt_arguments["--format"]
if docopt_arguments["--format"] is not None:
settings.overrides["FEED_FORMAT"] = docopt_arguments["--format"]
return settings
def start_log(docopt_arguments):
"""
This function starts the logging functionality of Scrapy using the settings given by the CLI.
:param docopt_arguments: A dictionary generated by docopt containing all CLI arguments.
"""
if docopt_arguments["--log"] is not None:
if docopt_arguments["--verbose"]:
log.start(logfile=docopt_arguments["--log"], logstdout=False, loglevel=log.DEBUG)
else:
log.start(logfile=docopt_arguments["--log"], logstdout=True, loglevel=log.WARNING)
else:
if docopt_arguments["--verbose"]:
log.start(logstdout=False, loglevel=log.DEBUG)
else:
log.start(logstdout=True, loglevel=log.WARNING)
def search(docopt_arguments, source_loader): def search(docopt_arguments, source_loader):
""" """
The function that facilitates the search for a specific compound. The function that facilitates the search for a specific compound.
:param docopt_arguments: A dictionary generated by docopt containing all CLI arguments. :param docopt_arguments: A dictionary generated by docopt containing all CLI arguments.
:param source_loader: An initiated SourceLoader object pointed at the directory with the sources. :param source_loader: An initiated SourceLoader object pointed at the directory with the sources.
""" """
start_log(docopt_arguments) conf = Configurator()
settings = scrapy_settings_manipulation(docopt_arguments) conf.start_log(docopt_arguments["--log"], docopt_arguments["--verbose"])
setup_crawler(docopt_arguments["<compound>"], settings, source_loader, docopt_arguments["--attributes"].split(',')) conf.set_output(docopt_arguments["--output"], docopt_arguments["--format"])
setup_crawler(docopt_arguments["<compound>"], conf.scrapy_settings, source_loader, docopt_arguments["--attributes"].split(','))
reactor.run() reactor.run()

43
utils/configurator.py Normal file
View File

@ -0,0 +1,43 @@
from scrapy import log
from scrapy.utils.project import get_project_settings
class Configurator:
def __init__(self):
self.scrapy_settings = get_project_settings()
def set_output(self, filename, format):
"""
This function manipulates the Scrapy settings that normally would be set in the settings file. In the Fourmi
project these are command line arguments.
:param docopt_arguments: A dictionary generated by docopt containing all CLI arguments.
"""
if filename != 'result.*format*':
self.scrapy_settings.overrides["FEED_URI"] = format
elif format == "jsonlines":
self.scrapy_settings.overrides["FEED_URI"] = "results.json"
elif format is not None:
self.scrapy_settings.overrides["FEED_URI"] = "results." + format
if format is not None:
self.scrapy_settings.overrides["FEED_FORMAT"] = format
def start_log(self, logfile, verbose):
"""
This function starts the logging functionality of Scrapy using the settings given by the CLI.
:param docopt_arguments: A dictionary generated by docopt containing all CLI arguments.
"""
if logfile is not None:
if verbose:
log.start(logfile=logfile, logstdout=False, loglevel=log.DEBUG)
else:
log.start(logfile=logfile, logstdout=True, loglevel=log.WARNING)
else:
if verbose:
log.start(logstdout=False, loglevel=log.DEBUG)
else:
log.start(logstdout=True, loglevel=log.WARNING)