Archived
1
0

Edited the actual functions

This commit is contained in:
Jip J. Dekker 2014-06-20 11:14:51 +02:00
parent 335f558aca
commit 6c0b55dab6
2 changed files with 7 additions and 6 deletions

View File

@ -17,7 +17,7 @@ Options:
--version Show version. --version Show version.
-v Verbose logging output. (Multiple occurrences increase logging level) -v Verbose logging output. (Multiple occurrences increase logging level)
--log=<file> Save log to an file. --log=<file> Save log to an file.
-o <file> --output=<file> Output file [default: results.*format*] -o <file> --output=<file> Output file [default: <compound>.*format*]
-f <format> --format=<format> Output formats (supported: csv, json, jsonlines, xml) [default: csv] -f <format> --format=<format> Output formats (supported: csv, json, jsonlines, xml) [default: csv]
--include=<regex> Include only sources that match these regular expressions split by a comma. --include=<regex> Include only sources that match these regular expressions split by a comma.
--exclude=<regex> Exclude the sources that match these regular expressions split by a comma. --exclude=<regex> Exclude the sources that match these regular expressions split by a comma.
@ -58,7 +58,7 @@ def search(docopt_arguments, source_loader):
""" """
conf = Configurator() conf = Configurator()
conf.set_logging(docopt_arguments["--log"], docopt_arguments["-v"]) conf.set_logging(docopt_arguments["--log"], docopt_arguments["-v"])
conf.set_output(docopt_arguments["--output"], docopt_arguments["--format"]) conf.set_output(docopt_arguments["--output"], docopt_arguments["--format"], docopt_arguments["<compound>"])
setup_crawler(docopt_arguments["<compound>"], conf.scrapy_settings, setup_crawler(docopt_arguments["<compound>"], conf.scrapy_settings,
source_loader, docopt_arguments["--attributes"].split(',')) source_loader, docopt_arguments["--attributes"].split(','))
if conf.scrapy_settings.getbool("LOG_ENABLED"): if conf.scrapy_settings.getbool("LOG_ENABLED"):

View File

@ -3,6 +3,7 @@ import os
from scrapy.utils.project import get_project_settings from scrapy.utils.project import get_project_settings
class Configurator: class Configurator:
""" """
A helper class in the fourmi class. This class is used to process the settings as set A helper class in the fourmi class. This class is used to process the settings as set
@ -12,7 +13,7 @@ class Configurator:
def __init__(self): def __init__(self):
self.scrapy_settings = get_project_settings() self.scrapy_settings = get_project_settings()
def set_output(self, filename, fileformat): def set_output(self, filename, fileformat, compound):
""" """
This function manipulates the Scrapy output file settings that normally would be set in the settings file. This function manipulates the Scrapy output file settings that normally would be set in the settings file.
In the Fourmi project these are command line arguments. In the Fourmi project these are command line arguments.
@ -20,12 +21,12 @@ class Configurator:
:param fileformat: The format in which the output will be. :param fileformat: The format in which the output will be.
""" """
if filename != 'results.*format*': if filename != '<compound>.*format*':
self.scrapy_settings.overrides["FEED_URI"] = filename self.scrapy_settings.overrides["FEED_URI"] = filename
elif fileformat == "jsonlines": elif fileformat == "jsonlines":
self.scrapy_settings.overrides["FEED_URI"] = "results.json" self.scrapy_settings.overrides["FEED_URI"] = compound + ".json"
elif fileformat is not None: elif fileformat is not None:
self.scrapy_settings.overrides["FEED_URI"] = "results." + fileformat self.scrapy_settings.overrides["FEED_URI"] = compound + "." + fileformat
if fileformat is not None: if fileformat is not None:
self.scrapy_settings.overrides["FEED_FORMAT"] = fileformat self.scrapy_settings.overrides["FEED_FORMAT"] = fileformat