diff --git a/fourmi.py b/fourmi.py index e45d605..d6d5fd9 100755 --- a/fourmi.py +++ b/fourmi.py @@ -17,7 +17,7 @@ Options: --version Show version. -v Verbose logging output. (Multiple occurrences increase logging level) --log= Save log to an file. - -o --output= Output file [default: results.*format*] + -o --output= Output file [default: .*format*] -f --format= Output formats (supported: csv, json, jsonlines, xml) [default: csv] --include= Include only sources that match these regular expressions split by a comma. --exclude= Exclude the sources that match these regular expressions split by a comma. @@ -58,7 +58,7 @@ def search(docopt_arguments, source_loader): """ conf = Configurator() conf.set_logging(docopt_arguments["--log"], docopt_arguments["-v"]) - conf.set_output(docopt_arguments["--output"], docopt_arguments["--format"]) + conf.set_output(docopt_arguments["--output"], docopt_arguments["--format"], docopt_arguments[""]) setup_crawler(docopt_arguments[""], conf.scrapy_settings, source_loader, docopt_arguments["--attributes"].split(',')) if conf.scrapy_settings.getbool("LOG_ENABLED"): diff --git a/utils/configurator.py b/utils/configurator.py index 358adc7..2db7cdb 100644 --- a/utils/configurator.py +++ b/utils/configurator.py @@ -3,6 +3,7 @@ import os from scrapy.utils.project import get_project_settings + class Configurator: """ A helper class in the fourmi class. This class is used to process the settings as set @@ -12,7 +13,7 @@ class Configurator: def __init__(self): self.scrapy_settings = get_project_settings() - def set_output(self, filename, fileformat): + def set_output(self, filename, fileformat, compound): """ This function manipulates the Scrapy output file settings that normally would be set in the settings file. In the Fourmi project these are command line arguments. @@ -20,12 +21,12 @@ class Configurator: :param fileformat: The format in which the output will be. """ - if filename != 'results.*format*': + if filename != '.*format*': self.scrapy_settings.overrides["FEED_URI"] = filename elif fileformat == "jsonlines": - self.scrapy_settings.overrides["FEED_URI"] = "results.json" + self.scrapy_settings.overrides["FEED_URI"] = compound + ".json" elif fileformat is not None: - self.scrapy_settings.overrides["FEED_URI"] = "results." + fileformat + self.scrapy_settings.overrides["FEED_URI"] = compound + "." + fileformat if fileformat is not None: self.scrapy_settings.overrides["FEED_FORMAT"] = fileformat