Merge branch 'develop' into feature/GUI

2014-06-17 17:14:36 +02:00 · 2014-06-17 17:14:36 +02:00 · 738e1afb36
commit 738e1afb36
parent 88a1390de8 9542262bf7
16 changed files with 331 additions and 76 deletions
--- a/Changelog.md
+++ b/Changelog.md
@ -0,0 +1,12 @@
 ### v0.5.3
 - FIX: It is now again possible to use both verbose and the source inclusion/exclusion options
 - FIX: Logging is now "actually" disabled if not using the verbose option.
 - FEATURE: Added support for PubChem
 ### v0.5.2 
 - FIX: Signatured used to contain untracked and older files, current signature
 should be correct.
 ### v0.5.1
 - UPDATED: Logging functionality from command line
 - DEV: Code cleanup and extra tests
--- a/FourmiCrawler/settings.py
+++ b/FourmiCrawler/settings.py
@ -18,8 +18,10 @@ ITEM_PIPELINES = {
 FEED_URI = 'results.json'
 FEED_FORMAT = 'jsonlines'
 # Crawl responsibly by identifying yourself (and your website) on the
 # user-agent
 # [todo] - Check for repercussions on spoofing the user agent
 # USER_AGENT = 'FourmiCrawler (+http://www.yourdomain.com)'
 USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36'
--- a/FourmiCrawler/sources/ChemSpider.py
+++ b/FourmiCrawler/sources/ChemSpider.py
@ -26,9 +26,8 @@ class ChemSpider(Source):
    structure = 'Chemical-Structure.%s.html'
    extendedinfo = 'MassSpecAPI.asmx/GetExtendedCompoundInfo?csid=%s&token='
-    def __init__(self, config={}):
+    def __init__(self, config=None):
        Source.__init__(self, config)
        self.cfg = config
        self.ignore_list = []
        if 'token' not in self.cfg or self.cfg['token'] == '':
            log.msg('ChemSpider token not set or empty, search/MassSpec API '
@ -37,7 +36,6 @@ class ChemSpider(Source):
        self.search += self.cfg['token']
        self.extendedinfo += self.cfg['token']
    def parse(self, response):
        sel = Selector(response)
        requests = []
@ -199,13 +197,14 @@ class ChemSpider(Source):
        return properties
    def newresult(self, attribute, value, conditions='', source='ChemSpider'):
-        return Result({
+        return Result(
            {
                'attribute': attribute,
                'value': value,
                'source': source,
                'reliability': self.cfg['reliability'],
                'conditions': conditions
-                })
+            })
    def parse_searchrequest(self, response):
        """Parse the initial response of the ChemSpider Search API """
--- a/FourmiCrawler/sources/NIST.py
+++ b/FourmiCrawler/sources/NIST.py
@ -22,12 +22,9 @@ class NIST(Source):
    search = 'cgi/cbook.cgi?Name=%s&Units=SI&cTP=on'
-    cfg = {}
+    def __init__(self, config=None):
    def __init__(self, config={}):
        Source.__init__(self, config)
        self.ignore_list = set()
        self.cfg = config
    def parse(self, response):
        sel = Selector(response)
@ -88,7 +85,6 @@ class NIST(Source):
        InChiKey, CAS number
        """
        ul = sel.xpath('body/ul[li/strong="IUPAC Standard InChI:"]')
        li = ul.xpath('li')
        raw_synonyms = ul.xpath('li[strong="Other names:"]/text()').extract()
        for synonym in raw_synonyms[0].strip().split(';\n'):
@ -255,12 +251,13 @@ class NIST(Source):
        return results
    def newresult(self, attribute, value, conditions=''):
-        return Result({
+        return Result(
-            'attribute': attribute,
+            {
-            'value': value,
+                'attribute': attribute,
-            'source': 'NIST',
+                'value': value,
-            'reliability': self.cfg['reliability'],
+                'source': 'NIST',
-            'conditions': conditions
+                'reliability': self.cfg['reliability'],
                'conditions': conditions
            })
    def new_compound_request(self, compound):
--- a/FourmiCrawler/sources/PubChem.py
+++ b/FourmiCrawler/sources/PubChem.py
@ -0,0 +1,111 @@
 from scrapy.http import Request
 from scrapy import log
 from source import Source
 from scrapy.selector import Selector
 from FourmiCrawler.items import Result
 import re
 class PubChem(Source):
    """ PubChem scraper for chemical properties
        This parser parses the part on PubChem pages that gives Chemical and Physical properties of a substance,
        including sources of the values of properties.
    """
    #PubChem has its data on compound name, properties and their values on different html pages, so different URLs used
    website = 'https://*.ncbi.nlm.nih.gov/*'
    website_www = 'https://www.ncbi.nlm.nih.gov/*'
    website_pubchem = 'https://pubchem.ncbi.nlm.nih.gov/*'
    search = 'pccompound?term=%s'
    data_url = 'toc/summary_toc.cgi?tocid=27&cid=%s'
    __spider = None
    searched_compounds = set()
    def __init__(self, config):
        Source.__init__(self, config)
        self.cfg = config
    def parse(self, response):
        """
        Distributes the above described behaviour
        :param response: The incoming search request
        :return Returns the found properties if response is unique or returns none if it's already known
        """
        requests = []
        log.msg('A response from %s just arrived!' % response.url, level=log.DEBUG)
        sel = Selector(response)
        compound = sel.xpath('//h1/text()').extract()[0]
        if compound in self.searched_compounds:
            return None
        self.searched_compounds.update(compound)
        raw_synonyms = sel.xpath('//div[@class="smalltext"]/text()').extract()[0]
        for synonym in raw_synonyms.strip().split(', '):
            log.msg('PubChem synonym found: %s' % synonym, level=log.DEBUG)
            self.searched_compounds.update(synonym)
            self._spider.get_synonym_requests(synonym)
        log.msg('Raw synonyms found: %s' % raw_synonyms, level=log.DEBUG)
        n = re.search(r'cid=(\d+)',response.url)
        if n:
            cid = n.group(1)
        log.msg('cid: %s' % cid, level=log.DEBUG)   #getting the right id of the compound with which it can reach
                                                # the seperate html page which contains the properties and their values
        #using this cid to get the right url and scrape it
        requests.append(Request(url=self.website_pubchem[:-1] + self.data_url % cid, callback=self.parse_data))
        return requests
    def parse_data(self, response):
        """
        Parse data found in 'Chemical and Physical properties' part of a substance page.
        :param response: The response with the page to parse
        :return: requests: Returns a list of properties with their values, source, etc.
        """
        log.msg('parsing data', level=log.DEBUG)
        requests = []
        sel = Selector(response)
        props = sel.xpath('//div')
        for prop in props:
            prop_name = ''.join(prop.xpath('b/text()').extract()) # name of property that it is parsing
            if prop.xpath('a'):     # parsing for single value in property
                prop_source = ''.join(prop.xpath('a/@title').extract())
                prop_value = ''.join(prop.xpath('a/text()').extract())
                new_prop = Result({
                    'attribute': prop_name,
                    'value': prop_value,
                    'source': prop_source,
                    'reliability': 'Unknown',
                    'conditions': ''
                })
                log.msg('PubChem prop: |%s| |%s| |%s|' %
                        (new_prop['attribute'], new_prop['value'],
                         new_prop['source']), level=log.DEBUG)
                requests.append(new_prop)
            elif prop.xpath('ul'):    # parsing for multiple values (list) in property
                prop_values = prop.xpath('ul//li')
                for prop_li in prop_values:
                    prop_value = ''.join(prop_li.xpath('a/text()').extract())
                    prop_source = ''.join(prop_li.xpath('a/@title').extract())
                    new_prop = Result({
                        'attribute': prop_name,
                        'value': prop_value,
                        'source': prop_source,
                        'reliability': 'Unknown',
                        'conditions': ''
                    })
                    log.msg('PubChem prop: |%s| |%s| |%s|' %
                        (new_prop['attribute'], new_prop['value'],
                         new_prop['source']), level=log.DEBUG)
                    requests.append(new_prop)
        return requests
    def new_compound_request(self, compound):
        return Request(url=self.website_www[:-1] + self.search % compound, callback=self.parse)
--- a/FourmiCrawler/sources/WikipediaParser.py
+++ b/FourmiCrawler/sources/WikipediaParser.py
@ -1,9 +1,11 @@
 import re
 from scrapy.http import Request
 from scrapy import log
 from source import Source
 from scrapy.selector import Selector
 from source import Source
 from FourmiCrawler.items import Result
 import re
 class WikipediaParser(Source):
@ -17,11 +19,8 @@ class WikipediaParser(Source):
    __spider = None
    searched_compounds = []
-    cfg = {}
+    def __init__(self, config=None):
    def __init__(self, config={}):
        Source.__init__(self, config)
        self.cfg = config
    def parse(self, response):
        """
@ -53,7 +52,7 @@ class WikipediaParser(Source):
        # scrape the chembox (wikipedia template)
        items = self.parse_chembox(sel, items)
-        #scrape the drugbox (wikipedia template)
+        # scrape the drugbox (wikipedia template)
        items = self.parse_drugbox(sel, items)
        items = filter(lambda a: a['value'] != '', items)  # remove items with an empty value
@ -123,7 +122,6 @@ class WikipediaParser(Source):
                    level=log.DEBUG)
        return items
    def new_compound_request(self, compound):
        return Request(url=self.website[:-1] + compound, callback=self.parse)
@ -161,10 +159,11 @@ class WikipediaParser(Source):
        return links
    def newresult(self, attribute, value):
-        return Result({
+        return Result(
-            'attribute': attribute,
+            {
-            'value': value,
+                'attribute': attribute,
-            'source': 'Wikipedia',
+                'value': value,
-            'reliability': self.cfg['reliability'],
+                'source': 'Wikipedia',
-            'conditions': ''
+                'reliability': self.cfg['reliability'],
                'conditions': ''
            })
--- a/FourmiCrawler/sources/source.py
+++ b/FourmiCrawler/sources/source.py
@ -6,10 +6,13 @@ class Source:
    website = "http://something/*"  # Regex of URI's the source is able to parse
    _spider = None
-    def __init__(self, config={}):
+    def __init__(self, config=None):
        """
        Initiation of a new Source
        """
        self.cfg = {}
        if config is not None:
            self.cfg = config
        pass
    def parse(self, response):
--- a/FourmiCrawler/spider.py
+++ b/FourmiCrawler/spider.py
@ -10,7 +10,7 @@ class FourmiSpider(Spider):
    """
    name = "FourmiSpider"
-    def __init__(self, compound=None, selected_attributes=[".*"], *args, **kwargs):
+    def __init__(self, compound=None, selected_attributes=None, *args, **kwargs):
        """
        Initiation of the Spider
        :param compound: compound that will be searched.
@ -20,7 +20,10 @@ class FourmiSpider(Spider):
        self.synonyms = set()
        super(FourmiSpider, self).__init__(*args, **kwargs)
        self.synonyms.add(compound)
-        self.selected_attributes = selected_attributes
+        if selected_attributes is None:
            self.selected_attributes = [".*"]
        else:
            self.selected_attributes = selected_attributes
    def parse(self, response):
        """
--- a/README.md
+++ b/README.md
@ -23,21 +23,21 @@ documentation](http://doc.scrapy.org/en/latest/index.html).
 ### Installing 
-If you're installing Fourmi, please take a look at our [installation guide](...)
+If you're installing Fourmi, please take a look at our installation guides
-on our wiki. When you've installed the application, make sure to check our
+on our [wiki](https://github.com/jjdekker/Fourmi/wiki). When you've installed the application, make sure to check our
-[usage guide](...).
+usage guide on the [Command Line Interface](https://github.com/jjdekker/Fourmi/wiki/CLI) and on the [Graphical User Interface](https://github.com/jjdekker/Fourmi/wiki/GUI).
 ### Using the Source
 To use the Fourmi source code multiple dependencies are required. Take a look at
-the [wiki page](...) on using the application source code for a step by step
+our [wiki pages](https://github.com/jjdekker/Fourmi/wiki) on using the application source code in our a step by step
 installation guide.
 When developing for the Fourmi project keep in mind that code readability is a
 must. To maintain the readability, code should be conform with the
 [PEP-8](http://legacy.python.org/dev/peps/pep-0008/) style guide for Python
 code. More information about the different structures and principles of the
-Fourmi application can be found on our [wiki](...).
+Fourmi application can be found on our [wiki](https://github.com/jjdekker/Fourmi/wiki).
 ### To Do
@ -45,13 +45,9 @@ The Fourmi project has the following goals for the nearby future:
 __Main goals:__
 - Improve our documentation and guides. (Assignee: Dekker)
 - Build an graphical user interface(GUI) as alternative for the command line
 interface(CLI). (Assignee: Harmen)
 - Compiling the source into an windows executable. (Assignee: Bas)
 - Create an configuration file to hold logins and API keys.
 - Determine reliability of our data point.
 - Create an module to gather data from NIST. (Assignee: Rob)
 - Create an module to gather data from PubChem. (Assignee: Nout)
 __Side goals:__
--- a/SIGNED.md
+++ b/SIGNED.md
@ -0,0 +1,101 @@
 ##### Signed by https://keybase.io/jdekker
 ```
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.11 (GNU/Linux)
 iQIcBAABAgAGBQJTn3GgAAoJEJrQ9RIUCT6/CI4P/RSAQrd6JugGZoQu/gNdW6eB
 MYCybqYGZiieVhUaGOnFNVlp68YpXH+sP/Uc6hXEX30UQEsDmhMeT5NA7ZMS+zJ9
 MNHGQdJq22lGb3+VoVBV4RTMdkQXOXvx6p5biskjIEtM3tfTxP529GvAX2TFUNnt
 gGWk28EDr30M95XwDxwWo+57Xv8VtSb3VSvXEbrdwGYf8EoQo9oPtzYQ0YcdupcC
 ET8bukYVcwpAjoTnPlEy89TiHHohwmimr2ASXeQ64Ks5wfjzcF7NENCAmaAfR+KI
 VLLuGqdWMBx1ewVuAXTCZ0Mga/kBoRUaO0PC13UmL8LhhZY9Z3cwD4UnPU35/RQi
 IbLfQcZHf/gEvyMeiTYCsyWpm+/xxn1+EfHol4/Q9VSXzZgRBX05Ik6tqeCvjdgG
 4PyHBaJTTm/HfMNdg3mr1mbyjTv5UxglEyPv+Y4NdfoVfepkXsXbzvNSyVffZ3Bw
 UaFp7KzIC4Jugdpv63FleiAdDY0+iZ5shH86wD1+HJ0/a87kn5Ao1yESby7J7U+f
 poZQYeMFeuC0T5hY/3iYoyvZ68oH918ESESiucSulp5BvfwuqGL2+xo5uJIwGYXE
 3IDQC7xbA14JHX86IVJlSHAD33iWyiC+5yjw4/bRRVl37KPsLdHiXH3YIRnF5I2I
 ZbM/uDYyJdZbBe4UoCoF
 =AMhi
 -----END PGP SIGNATURE-----
 ```
 <!-- END SIGNATURES -->
 ### Begin signed statement 
 #### Expect
 ```
 size  exec  file                      contents                                                        
            ./                                                                                        
 375           .gitignore              d2e475a6a4fa51422cac0a07495914e776858fb9ab9c8937a4d491a3e042d6b1
 464           .travis.yml             3063ba078607b8d16bd6467afc15fbbaa4b26c1e30be5ce7cef453cfccbaa95c
 428           Changelog.md            c7791d1914ddca9ff1549d90468a79787a7feafe94cecd756e3d7cbd4bcbc7df
              FourmiCrawler/                                                                          
 0               __init__.py           e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
 304             items.py              b00d49a3d53fa13306c7f8b023adb93ab88423c4fce46600689814f6b02bb806
 2178            pipelines.py          f9b7b84938060751e15e45de5133dffe50c798bff2a20019206fe7c9d677ad49
 914             settings.py           0be2eaf8e83e85ed27754c896421180fc80cb5ce44449aa9f1048e465d1a96f2
                sources/                                                                              
 9991              ChemSpider.py       847013e34c5c3683ec66a337837287512b4bab9fbea2ece12e4130ab0dbf264d
 9898              NIST.py             97abc84fce85c47b789822715a1945ab84cc052a32340c861141c1af66bab644
 4754              PubChem.py          58ed4c92519e385f2768cf8034b006b18f8a21632cb1c5a0849b1a329a8c6ffb
 6907              WikipediaParser.py  5d6de911c773129a34b76c40a9b547aafc67644a15f39cd0be6afc7a16fb0f97
 0                 __init__.py         e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
 1262              source.py           16c4cdfca849b7dc2bc89d7a6f7ad021f4aa1d04234394312f1d0edf0fd9c5a4
 3026            spider.py             1ffba2512988b7a6b535a4a31a4ef688ece4f8c595c3d50355c34ef46b23e44a
 1081          LICENSE                 36951e5f1910bad3e008ab7228f35ad8933192e52d3c3ae6a5e875765e27192c
 3965          README.md               d21236d6a175be28ef8e2fee8a256e95b6a513163e3f1071c26c62e9093db7f3
 3676  x       fourmi.py               2ff89f97fd2a49d08417d9ab6cf08e88944d0c45f54ec84550b530be48676c23
 261           scrapy.cfg              624c068fd06303daa65b8e0d0d3ef88ac1f123be2694ef5b4f3f9a9dcd983f85
              tests/                                                                                  
 1               __init__.py           01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b
 2837            test_configurator.py  4a0eb6e7121eb09a63ab5cb797570d1a42080c5346c3b8b365da56eefa599e80
 1892            test_pipeline.py      387a336b0f36722a20e712aa033e5771c44f9e92561dd73acffd53d622c52031
 1260            test_sourceloader.py  b108b4b80adcdb7401273a9823b1f1a19eb5178776186eb5a9976aed8b1ee869
 2113            test_spider.py        300f280377b522737be0d8e4a80031ab118a4011bdbb92131e9c400fcdab6299
              utils/                                                                                  
 0               __init__.py           e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
 3552            configurator.py       e2b7e0ee6c1fef4373785dfe5df8ec6950f31ce6a5d9632b69a66ea3d1eaf921
 2537            sourceloader.py       f5a5ac2a6aba0658dbe11361f465caabcf3c06c5c8dc9a631874211cc19d2d37
 ```
 #### Ignore
 ```
 /SIGNED.md
 ```
 #### Presets
 ```
 git      # ignore .git and anything as described by .gitignore files
 dropbox  # ignore .dropbox-cache and other Dropbox-related files    
 kb       # ignore anything as described by .kbignore files          
 ```
 <!-- summarize version = 0.0.9 -->
 ### End signed statement
 <hr>
 #### Notes
 With keybase you can sign any directory's contents, whether it's a git repo,
 source code distribution, or a personal documents folder. It aims to replace the drudgery of:
  1. comparing a zipped file to a detached statement
  2. downloading a public key
  3. confirming it is in fact the author's by reviewing public statements they've made, using it
 All in one simple command:
 ```bash
 keybase dir verify
 ```
 There are lots of options, including assertions for automating your checks.
 For more info, check out https://keybase.io/docs/command_line/code_signing
--- a/fourmi.py
+++ b/fourmi.py
@ -5,7 +5,7 @@ Fourmi, a web scraper build to search specific information for a given compound
 Usage:
    fourmi search <compound>
    fourmi [options] search <compound>
-    fourmi [options] [--include=<sourcename> | --exclude=<sourcename>] search <compound>
+    fourmi [options] [-v | -vv | -vvv] [--include=<sourcename> | --exclude=<sourcename>] search <compound>
    fourmi list
    fourmi [--include=<sourcename> | --exclude=<sourcename>] list
    fourmi -h | --help
@ -15,7 +15,7 @@ Options:
    --attributes=<regex>            Include only that match these regular expressions split by a comma. [default: .*]
    -h --help                       Show this screen.
    --version                       Show version.
-    --verbose                       Verbose logging output.
+    -v                              Verbose logging output. (Multiple occurrences increase logging level)
    --log=<file>                    Save log to an file.
    -o <file> --output=<file>       Output file [default: results.*format*]
    -f <format> --format=<format>   Output formats (supported: csv, json, jsonlines, xml) [default: csv]
@ -25,8 +25,7 @@ Options:
 from twisted.internet import reactor
 from scrapy.crawler import Crawler
-from scrapy import log, signals
+from scrapy import signals, log
 from scrapy.utils.project import get_project_settings
 import docopt
 from FourmiCrawler.spider import FourmiSpider
@ -58,15 +57,19 @@ def search(docopt_arguments, source_loader):
    :param source_loader: An initiated SourceLoader object pointed at the directory with the sources.
    """
    conf = Configurator()
-    conf.start_log(docopt_arguments["--log"], docopt_arguments["--verbose"])
+    conf.set_logging(docopt_arguments["--log"], docopt_arguments["-v"])
    conf.set_output(docopt_arguments["--output"], docopt_arguments["--format"])
-    setup_crawler(docopt_arguments["<compound>"], conf.scrapy_settings, source_loader, docopt_arguments["--attributes"].split(','))
+    setup_crawler(docopt_arguments["<compound>"], conf.scrapy_settings,
                  source_loader, docopt_arguments["--attributes"].split(','))
    if conf.scrapy_settings.getbool("LOG_ENABLED"):
        log.start(conf.scrapy_settings.get("LOG_FILE"),
              conf.scrapy_settings.get("LOG_LEVEL"), conf.scrapy_settings.get("LOG_STDOUT"))
    reactor.run()
 # The start for the Fourmi Command Line interface.
 if __name__ == '__main__':
-    arguments = docopt.docopt(__doc__, version='Fourmi - V0.5.0')
+    arguments = docopt.docopt(__doc__, version='Fourmi - V0.5.3')
    loader = SourceLoader()
    if arguments["--include"]:
--- a/tests/test_configurator.py
+++ b/tests/test_configurator.py
@ -1,7 +1,8 @@
 import unittest
 import ConfigParser
 from utils.configurator import Configurator
 import ConfigParser
 class TestConfigurator(unittest.TestCase):
@ -21,11 +22,28 @@ class TestConfigurator(unittest.TestCase):
        self.assertEqual(self.conf.scrapy_settings["FEED_URI"], "results.csv")
        self.assertEqual(self.conf.scrapy_settings["FEED_FORMAT"], "csv")
-    # def test_start_log(self):
+    def test_start_log(self):
-    #     self.conf.start_log("test.log", True)
+        for i in range(0, 3):
-    #     self.conf.start_log("test.log", False)
+            self.conf.set_logging("TEST", i)
-    #     self.conf.start_log(None, True)
+            self.assertEqual(self.conf.scrapy_settings.get("LOG_FILE"), "TEST")
-    #     self.conf.start_log(None, False)
+            if i > 0:
                self.assertEqual(self.conf.scrapy_settings.get("LOG_ENABLED"), True)
                if i > 1:
                    self.assertEqual(self.conf.scrapy_settings.get("LOG_STDOUT"), False)
                else:
                    self.assertEqual(self.conf.scrapy_settings.get("LOG_STDOUT"), True)
            else:
                self.assertEqual(self.conf.scrapy_settings.get("LOG_ENABLED"), False)
                self.assertEqual(self.conf.scrapy_settings.get("LOG_STDOUT"), True)
            if i == 1:
                self.assertEqual(self.conf.scrapy_settings.get("LOG_LEVEL"), "WARNING")
            elif i == 2:
                self.assertEqual(self.conf.scrapy_settings.get("LOG_LEVEL"), "INFO")
            elif i == 3:
                self.assertEqual(self.conf.scrapy_settings.get("LOG_LEVEL"), "DEBUG")
            self.conf.set_logging(verbose=i)
            self.assertEqual(self.conf.scrapy_settings.get("LOG_FILE"), None)
    def test_read_sourceconfiguration(self):
        config = self.conf.read_sourceconfiguration()
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@ -13,6 +13,7 @@ class TestPipelines(unittest.TestCase):
    def test_none_pipeline(self):
        # Testing the pipeline that replaces the None values in items.
        self.testItem["value"] = "abc"
        self.testItem["source"] = None
        pipe = pipelines.RemoveNonePipeline()
        processed = pipe.process_item(self.testItem, spider.FourmiSpider())
--- a/tests/test_spider.py
+++ b/tests/test_spider.py
@ -47,7 +47,6 @@ class TestFoumiSpider(unittest.TestCase):
        self.assertGreater(len(requests), 0)
        self.assertIsInstance(requests[0], Request)
    def test_synonym_requests(self):
        # A test for the synonym request function
        self.spi._sources = []
--- a/utils/configurator.py
+++ b/utils/configurator.py
@ -1,7 +1,8 @@
 from scrapy import log
 from scrapy.utils.project import get_project_settings
 import ConfigParser
 from scrapy.utils.project import get_project_settings
 class Configurator:
    """
    A helper class in the fourmi class. This class is used to process the settings as set
@ -11,7 +12,6 @@ class Configurator:
    def __init__(self):
        self.scrapy_settings = get_project_settings()
    def set_output(self, filename, fileformat):
        """
        This function manipulates the Scrapy output file settings that normally would be set in the settings file.
@ -30,23 +30,34 @@ class Configurator:
        if fileformat is not None:
            self.scrapy_settings.overrides["FEED_FORMAT"] = fileformat
-
+    def set_logging(self, logfile=None, verbose=0):
    def start_log(self, logfile, verbose):
        """
-        This function starts the logging functionality of Scrapy using the settings given by the CLI.
+        This function changes the default settings of Scapy's logging functionality
        using the settings given by the CLI.
        :param logfile: The location where the logfile will be saved.
-        :param verbose: A boolean value to switch between loglevels.
+        :param verbose: A integer value to switch between loglevels.
        """
-        if logfile is not None:
+        if verbose != 0:
-            if verbose:
+            self.scrapy_settings.overrides["LOG_ENABLED"] = True
                log.start(logfile=logfile, logstdout=False, loglevel=log.DEBUG)
            else:
                log.start(logfile=logfile, logstdout=True, loglevel=log.WARNING)
        else:
-            if verbose:
+            self.scrapy_settings.overrides["LOG_ENABLED"] = False
-                log.start(logstdout=False, loglevel=log.DEBUG)
+
-            else:
+        if verbose == 1:
-                log.start(logstdout=True, loglevel=log.WARNING)
+            self.scrapy_settings.overrides["LOG_LEVEL"] = "WARNING"
        elif verbose == 2:
            self.scrapy_settings.overrides["LOG_LEVEL"] = "INFO"
        else:
            self.scrapy_settings.overrides["LOG_LEVEL"] = "DEBUG"
        if verbose > 1:
            self.scrapy_settings.overrides["LOG_STDOUT"] = False
        else:
            self.scrapy_settings.overrides["LOG_STDOUT"] = True
        if logfile is not None:
            self.scrapy_settings.overrides["LOG_FILE"] = logfile
        else:
            self.scrapy_settings.overrides["LOG_FILE"] = None
    @staticmethod
    def read_sourceconfiguration():
@ -56,7 +67,7 @@ class Configurator:
        :return a ConfigParser object of sources.cfg
        """
        config = ConfigParser.ConfigParser()
-        config.read('sources.cfg') # [TODO]: should be softcoded eventually
+        config.read('sources.cfg')  # [TODO]: should be softcoded eventually
        return config
    @staticmethod
@ -75,7 +86,6 @@ class Configurator:
        elif config.defaults():
            section = config.defaults()
        if 'reliability' not in section:
-            log.msg('Reliability not set for %s' % sourcename,
+            print 'WARNING: Reliability not set for %s' % sourcename
                    level=log.WARNING)
            section['reliability'] = ''
        return section
--- a/utils/sourceloader.py
+++ b/utils/sourceloader.py
@ -5,6 +5,7 @@ import re
 from FourmiCrawler.sources.source import Source
 from utils.configurator import Configurator
 class SourceLoader:
    sources = []