From bfa78f4697bb4b49f8855d0ccefef2121abe64f2 Mon Sep 17 00:00:00 2001
From: Bas Vb <bas.berkel@student.ru.nl>
Date: Tue, 10 Jun 2014 22:30:59 +0200
Subject: [PATCH 01/21] Clean up documentation in Wikipedia parsers

---
 FourmiCrawler/sources/WikipediaParser.py | 40 ++++++++++++++++++------
 1 file changed, 31 insertions(+), 9 deletions(-)

diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py
index 8722cef..344f836 100644
--- a/FourmiCrawler/sources/WikipediaParser.py
+++ b/FourmiCrawler/sources/WikipediaParser.py
@@ -1,11 +1,9 @@
-import re
-
 from scrapy.http import Request
 from scrapy import log
-from scrapy.selector import Selector
-
 from source import Source
+from scrapy.selector import Selector
 from FourmiCrawler.items import Result
+import re
 
 
 class WikipediaParser(Source):
@@ -26,7 +24,11 @@ class WikipediaParser(Source):
         self.cfg = config
 
     def parse(self, response):
-        """ Distributes the above described behaviour """
+        """
+        Distributes the above described behaviour
+        :param response: The incoming search request
+        :return: Returns the found properties if response is unique or returns none if it's already known
+        """
         log.msg('A response from %s just arrived!' % response.url, level=log.DEBUG)
         sel = Selector(response)
         compound = sel.xpath('//h1[@id="firstHeading"]//span/text()').extract()[0]  # makes sure to use main page
@@ -38,7 +40,14 @@ class WikipediaParser(Source):
             return items
 
     def parse_infobox(self, sel):
-        """ scrape data from infobox on wikipedia. """
+        """
+        Scrape data from infobox on wikipedia.
+
+        Data from two types of infoboxes: class="infobox bordered" and class="infobox" is scraped and
+        :param sel: The selector with the html-information of the page to parse
+        :return: item_list: Returns a list of properties with their values, source, etc..
+        """
+
         items = []
 
         # be sure to get chembox (wikipedia template)
@@ -54,7 +63,7 @@ class WikipediaParser(Source):
             items.append(item)
             log.msg('Wiki prop: |%s| |%s| |%s|' % (item['attribute'], item['value'], item['source']), level=log.DEBUG)
 
-        #scrape the  drugbox (wikipedia template)
+        #scrape the drugbox (wikipedia template)
         tr_list2 = sel.xpath('.//table[@class="infobox"]//tr')
         log.msg('dit: %s' % tr_list2, level=log.DEBUG)
         for tablerow in tr_list2:
@@ -97,7 +106,15 @@ class WikipediaParser(Source):
 
     @staticmethod
     def clean_items(items):
-        """ clean up properties using regex, makes it possible to split the values from the units """
+
+        """
+        Clean up properties using regex, makes it possible to split the values from the units
+
+        Almost not in use, only cleans J/K/mol values and boiling/melting points.
+
+        :param items: List of properties with their values, source, etc..
+        :return: items: List of now cleaned up items
+        """
         for item in items:
             value = item['value']
             m = re.search('F;\s(\d+[\.,]?\d*)', value)  # clean up numerical Kelvin value (after F)
@@ -110,7 +127,12 @@ class WikipediaParser(Source):
 
     @staticmethod
     def get_identifiers(sel):
-        """ find external links, named 'Identifiers' to different sources. """
+        """
+        Find external links, named 'Identifiers' to different sources.
+
+        :param sel: The selector with the html-information of the page to parse
+        :return: links: New links which can be used to expand the crawlers search
+        """
         links = sel.xpath('//span[contains(concat(" ",normalize-space(@class)," "),"reflink")]/a'
                           '[contains(concat(" ",normalize-space(@class)," "),"external")]/@href').extract()
         return links

From 6621b3028c161e2809058f5fdff755c13eddcc4b Mon Sep 17 00:00:00 2001
From: Bas Vb <bas.berkel@student.ru.nl>
Date: Tue, 10 Jun 2014 22:32:03 +0200
Subject: [PATCH 02/21] small typography

---
 FourmiCrawler/sources/WikipediaParser.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py
index 344f836..94dc9d5 100644
--- a/FourmiCrawler/sources/WikipediaParser.py
+++ b/FourmiCrawler/sources/WikipediaParser.py
@@ -10,7 +10,7 @@ class WikipediaParser(Source):
     """ Wikipedia scraper for chemical properties
 
     This parser parses Wikipedia info boxes (also bordered) to obtain properties and their values.
-     It also returns requests with other external sources which contain information on parsed subject.
+    It also returns requests with other external sources which contain information on parsed subject.
     """
 
     website = "http://en.wikipedia.org/wiki/*"

From e9a5fc08e5944b630eaee091663439e29e430ea0 Mon Sep 17 00:00:00 2001
From: Bas Vb <bas.berkel@student.ru.nl>
Date: Tue, 10 Jun 2014 22:41:32 +0200
Subject: [PATCH 03/21] Splitting up parse function

---
 FourmiCrawler/sources/WikipediaParser.py | 61 ++++++++++++++----------
 1 file changed, 35 insertions(+), 26 deletions(-)

diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py
index 94dc9d5..6ea222d 100644
--- a/FourmiCrawler/sources/WikipediaParser.py
+++ b/FourmiCrawler/sources/WikipediaParser.py
@@ -50,34 +50,11 @@ class WikipediaParser(Source):
 
         items = []
 
-        # be sure to get chembox (wikipedia template)
-        tr_list = sel.xpath('.//table[@class="infobox bordered"]//td[not(@colspan)]'). \
-            xpath('normalize-space(string())')
-        prop_names = tr_list[::2]
-        prop_values = tr_list[1::2]
-        for i, prop_name in enumerate(prop_names):
-            item = self.newresult(
-                attribute=prop_name.extract().encode('utf-8'),
-                value=prop_values[i].extract().encode('utf-8')
-            )
-            items.append(item)
-            log.msg('Wiki prop: |%s| |%s| |%s|' % (item['attribute'], item['value'], item['source']), level=log.DEBUG)
+        # scrape the chembox (wikipedia template)
+        parse_chembox(sel,items)
 
         #scrape the drugbox (wikipedia template)
-        tr_list2 = sel.xpath('.//table[@class="infobox"]//tr')
-        log.msg('dit: %s' % tr_list2, level=log.DEBUG)
-        for tablerow in tr_list2:
-            log.msg('item: %s' % tablerow.xpath('./th').xpath('normalize-space(string())'), level=log.DEBUG)
-            if tablerow.xpath('./th').xpath('normalize-space(string())') and tablerow.xpath('./td').xpath(
-                    'normalize-space(string())'):
-                item = self.newresult(
-                    attribute=tablerow.xpath('./th').xpath('normalize-space(string())').extract()[0].encode('utf-8'),
-                    value=tablerow.xpath('./td').xpath('normalize-space(string())').extract()[0].encode('utf-8'),
-                )
-                items.append(item)
-                log.msg(
-                    'Wiki prop: |attribute: %s| |value: %s| |%s|' % (item['attribute'], item['value'], item['source']),
-                    level=log.DEBUG)
+        parse_drugbox(sel,items)
 
         items = filter(lambda a: a['value'] != '', items)  # remove items with an empty value
         item_list = self.clean_items(items)
@@ -101,6 +78,38 @@ class WikipediaParser(Source):
 
         return item_list
 
+    def parse_chembox(self, sel, items):
+        tr_list = sel.xpath('.//table[@class="infobox bordered"]//td[not(@colspan)]'). \
+            xpath('normalize-space(string())')
+        prop_names = tr_list[::2]
+        prop_values = tr_list[1::2]
+        for i, prop_name in enumerate(prop_names):
+            item = self.newresult(
+                attribute=prop_name.extract().encode('utf-8'),
+                value=prop_values[i].extract().encode('utf-8')
+            )
+            items.append(item)
+            log.msg('Wiki prop: |%s| |%s| |%s|' % (item['attribute'], item['value'], item['source']), level=log.DEBUG)
+        return items
+
+    def parse_drugbox(self, sel, items):
+        tr_list2 = sel.xpath('.//table[@class="infobox"]//tr')
+        log.msg('dit: %s' % tr_list2, level=log.DEBUG)
+        for tablerow in tr_list2:
+            log.msg('item: %s' % tablerow.xpath('./th').xpath('normalize-space(string())'), level=log.DEBUG)
+            if tablerow.xpath('./th').xpath('normalize-space(string())') and tablerow.xpath('./td').xpath(
+                    'normalize-space(string())'):
+                item = self.newresult(
+                    attribute=tablerow.xpath('./th').xpath('normalize-space(string())').extract()[0].encode('utf-8'),
+                    value=tablerow.xpath('./td').xpath('normalize-space(string())').extract()[0].encode('utf-8'),
+                )
+                items.append(item)
+                log.msg(
+                    'Wiki prop: |attribute: %s| |value: %s| |%s|' % (item['attribute'], item['value'], item['source']),
+                    level=log.DEBUG)
+        return items
+
+
     def new_compound_request(self, compound):
         return Request(url=self.website[:-1] + compound, callback=self.parse)
 

From de474fea31d75b0f77fd010c457635f06b034664 Mon Sep 17 00:00:00 2001
From: Bas Vb <bas.berkel@student.ru.nl>
Date: Tue, 10 Jun 2014 22:42:45 +0200
Subject: [PATCH 04/21] small fixes

---
 FourmiCrawler/sources/WikipediaParser.py | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py
index 6ea222d..38ed836 100644
--- a/FourmiCrawler/sources/WikipediaParser.py
+++ b/FourmiCrawler/sources/WikipediaParser.py
@@ -51,10 +51,10 @@ class WikipediaParser(Source):
         items = []
 
         # scrape the chembox (wikipedia template)
-        parse_chembox(sel,items)
+        items = self.parse_chembox(sel, items)
 
         #scrape the drugbox (wikipedia template)
-        parse_drugbox(sel,items)
+        items = self.parse_drugbox(sel, items)
 
         items = filter(lambda a: a['value'] != '', items)  # remove items with an empty value
         item_list = self.clean_items(items)

From a1859f2ec2a2986b4dc94fccf044e62243193bd0 Mon Sep 17 00:00:00 2001
From: Bas Vb <bas.berkel@student.ru.nl>
Date: Tue, 10 Jun 2014 22:46:50 +0200
Subject: [PATCH 05/21] final documentation

---
 FourmiCrawler/sources/WikipediaParser.py | 14 ++++++++++++++
 1 file changed, 14 insertions(+)

diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py
index 38ed836..4aa49b2 100644
--- a/FourmiCrawler/sources/WikipediaParser.py
+++ b/FourmiCrawler/sources/WikipediaParser.py
@@ -79,6 +79,13 @@ class WikipediaParser(Source):
         return item_list
 
     def parse_chembox(self, sel, items):
+        """
+        Scrape data from chembox infobox on wikipedia.
+
+        :param sel: The selector with the html-information of the page to parse
+        :param items: the list of items where the result have to be stored in
+        :return: items: the list of items with the new found and stored items
+        """
         tr_list = sel.xpath('.//table[@class="infobox bordered"]//td[not(@colspan)]'). \
             xpath('normalize-space(string())')
         prop_names = tr_list[::2]
@@ -93,6 +100,13 @@ class WikipediaParser(Source):
         return items
 
     def parse_drugbox(self, sel, items):
+        """
+        Scrape data from drugbox infobox on wikipedia.
+
+        :param sel: The selector with the html-information of the page to parse
+        :param items: the list of items where the result have to be stored in
+        :return: items: the list of items with the new found and stored items
+        """
         tr_list2 = sel.xpath('.//table[@class="infobox"]//tr')
         log.msg('dit: %s' % tr_list2, level=log.DEBUG)
         for tablerow in tr_list2:

From ee7f1ab739a4b3004635914a02c14baa5b5510b5 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 15 Jun 2014 19:26:13 +0200
Subject: [PATCH 06/21] Updated the Objectives and linkage to the wiki

---
 README.md | 14 +++++---------
 1 file changed, 5 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index 48b0419..f09f77c 100644
--- a/README.md
+++ b/README.md
@@ -23,21 +23,21 @@ documentation](http://doc.scrapy.org/en/latest/index.html).
 
 ### Installing 
 
-If you're installing Fourmi, please take a look at our [installation guide](...)
-on our wiki. When you've installed the application, make sure to check our
-[usage guide](...).
+If you're installing Fourmi, please take a look at our installation guides
+on our [wiki](https://github.com/jjdekker/Fourmi/wiki). When you've installed the application, make sure to check our
+usage guide on the [Command Line Interface](https://github.com/jjdekker/Fourmi/wiki/CLI) and on the [Graphical User Interface](https://github.com/jjdekker/Fourmi/wiki/GUI).
 
 ### Using the Source
 
 To use the Fourmi source code multiple dependencies are required. Take a look at
-the [wiki page](...) on using the application source code for a step by step
+our [wiki pages](https://github.com/jjdekker/Fourmi/wiki) on using the application source code in our a step by step
 installation guide.
 
 When developing for the Fourmi project keep in mind that code readability is a
 must. To maintain the readability, code should be conform with the
 [PEP-8](http://legacy.python.org/dev/peps/pep-0008/) style guide for Python
 code. More information about the different structures and principles of the
-Fourmi application can be found on our [wiki](...).
+Fourmi application can be found on our [wiki](https://github.com/jjdekker/Fourmi/wiki).
 
 ### To Do
 
@@ -45,13 +45,9 @@ The Fourmi project has the following goals for the nearby future:
 
 __Main goals:__
 
-- Improve our documentation and guides. (Assignee: Dekker)
 - Build an graphical user interface(GUI) as alternative for the command line
 interface(CLI). (Assignee: Harmen)
 - Compiling the source into an windows executable. (Assignee: Bas)
-- Create an configuration file to hold logins and API keys.
-- Determine reliability of our data point.
-- Create an module to gather data from NIST. (Assignee: Rob)
 - Create an module to gather data from PubChem. (Assignee: Nout)
 
 __Side goals:__

From 2eb8f3e0af18cad1adafeb8d6e2783b483539c35 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 15 Jun 2014 19:38:52 +0200
Subject: [PATCH 07/21] Changed logging CL option

---
 fourmi.py | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/fourmi.py b/fourmi.py
index e6d7e9a..ab4baef 100755
--- a/fourmi.py
+++ b/fourmi.py
@@ -5,6 +5,7 @@ Fourmi, a web scraper build to search specific information for a given compound
 Usage:
     fourmi search <compound>
     fourmi [options] search <compound>
+    fourmi [-v | -vv | -vvv] [options] search <compound>
     fourmi [options] [--include=<sourcename> | --exclude=<sourcename>] search <compound>
     fourmi list
     fourmi [--include=<sourcename> | --exclude=<sourcename>] list
@@ -15,7 +16,7 @@ Options:
     --attributes=<regex>            Include only that match these regular expressions split by a comma. [default: .*]
     -h --help                       Show this screen.
     --version                       Show version.
-    --verbose                       Verbose logging output.
+    -v                              Verbose logging output. (Multiple occurrences increase logging level)
     --log=<file>                    Save log to an file.
     -o <file> --output=<file>       Output file [default: results.*format*]
     -f <format> --format=<format>   Output formats (supported: csv, json, jsonlines, xml) [default: csv]
@@ -25,8 +26,7 @@ Options:
 
 from twisted.internet import reactor
 from scrapy.crawler import Crawler
-from scrapy import log, signals
-from scrapy.utils.project import get_project_settings
+from scrapy import signals
 import docopt
 
 from FourmiCrawler.spider import FourmiSpider
@@ -69,6 +69,8 @@ if __name__ == '__main__':
     arguments = docopt.docopt(__doc__, version='Fourmi - V0.5.0')
     loader = SourceLoader()
 
+    print arguments["-v"]
+
     if arguments["--include"]:
         loader.include(arguments["--include"].split(','))
     elif arguments["--exclude"]:

From 4672903c9b9b39a3b64cb3f56e1c5530f89890ae Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 15 Jun 2014 19:50:31 +0200
Subject: [PATCH 08/21] The logging now using the scrapy setting overrides

---
 fourmi.py             |  4 +---
 utils/configurator.py | 39 +++++++++++++++++++++++++++------------
 2 files changed, 28 insertions(+), 15 deletions(-)

diff --git a/fourmi.py b/fourmi.py
index ab4baef..1fd54e7 100755
--- a/fourmi.py
+++ b/fourmi.py
@@ -58,7 +58,7 @@ def search(docopt_arguments, source_loader):
     :param source_loader: An initiated SourceLoader object pointed at the directory with the sources.
     """
     conf = Configurator()
-    conf.start_log(docopt_arguments["--log"], docopt_arguments["--verbose"])
+    conf.start_log(docopt_arguments["--log"], docopt_arguments["-v"])
     conf.set_output(docopt_arguments["--output"], docopt_arguments["--format"])
     setup_crawler(docopt_arguments["<compound>"], conf.scrapy_settings, source_loader, docopt_arguments["--attributes"].split(','))
     reactor.run()
@@ -69,8 +69,6 @@ if __name__ == '__main__':
     arguments = docopt.docopt(__doc__, version='Fourmi - V0.5.0')
     loader = SourceLoader()
 
-    print arguments["-v"]
-
     if arguments["--include"]:
         loader.include(arguments["--include"].split(','))
     elif arguments["--exclude"]:
diff --git a/utils/configurator.py b/utils/configurator.py
index dfc6330..25a4883 100644
--- a/utils/configurator.py
+++ b/utils/configurator.py
@@ -1,6 +1,8 @@
+import ConfigParser
+
 from scrapy import log
 from scrapy.utils.project import get_project_settings
-import ConfigParser
+
 
 class Configurator:
     """
@@ -33,20 +35,33 @@ class Configurator:
 
     def start_log(self, logfile, verbose):
         """
-        This function starts the logging functionality of Scrapy using the settings given by the CLI.
+        This function changes the default settings of Scapy's logging functionality
+        using the settings given by the CLI.
         :param logfile: The location where the logfile will be saved.
-        :param verbose: A boolean value to switch between loglevels.
+        :param verbose: A integer value to switch between loglevels.
         """
-        if logfile is not None:
-            if verbose:
-                log.start(logfile=logfile, logstdout=False, loglevel=log.DEBUG)
-            else:
-                log.start(logfile=logfile, logstdout=True, loglevel=log.WARNING)
+        if verbose != 0:
+            self.scrapy_settings.overrides["LOG_ENABLED"] = True
         else:
-            if verbose:
-                log.start(logstdout=False, loglevel=log.DEBUG)
-            else:
-                log.start(logstdout=True, loglevel=log.WARNING)
+            self.scrapy_settings.overrides["LOG_ENABLED"] = False
+
+        if verbose == 1:
+            self.scrapy_settings.overrides["LOG_LEVEL"] = "WARNING"
+        elif verbose == 2:
+            self.scrapy_settings.overrides["LOG_LEVEL"] = "INFO"
+        else:
+            self.scrapy_settings.overrides["LOG_LEVEL"] = "DEBUG"
+
+        if verbose > 1:
+            self.scrapy_settings.overrides["LOG_STDOUT"] = False
+        else:
+            self.scrapy_settings.overrides["LOG_STDOUT"] = True
+
+        if logfile is not None:
+            self.scrapy_settings.overrides["LOG_FILE"] = logfile
+        else:
+            self.scrapy_settings.overrides["LOG_FILE"] = None
+
 
     @staticmethod
     def read_sourceconfiguration():

From f604c3efcc62b39b139651b440ce46761204a0d9 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 15 Jun 2014 20:07:11 +0200
Subject: [PATCH 09/21] Utils can't use the logging facilities as they aren't
 started yet

---
 utils/configurator.py | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/utils/configurator.py b/utils/configurator.py
index 25a4883..7c1aaa8 100644
--- a/utils/configurator.py
+++ b/utils/configurator.py
@@ -1,6 +1,5 @@
 import ConfigParser
 
-from scrapy import log
 from scrapy.utils.project import get_project_settings
 
 
@@ -90,7 +89,6 @@ class Configurator:
         elif config.defaults():
             section = config.defaults()
         if 'reliability' not in section:
-            log.msg('Reliability not set for %s' % sourcename,
-                    level=log.WARNING)
+            print 'Reliability not set for %s' % sourcename
             section['reliability'] = ''
         return section

From 3ea950b93662d741f2b0d971ba43f3c9804c55eb Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 15 Jun 2014 20:09:40 +0200
Subject: [PATCH 10/21] Logging facility is working again.

---
 fourmi.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/fourmi.py b/fourmi.py
index 1fd54e7..95fc53a 100755
--- a/fourmi.py
+++ b/fourmi.py
@@ -26,7 +26,7 @@ Options:
 
 from twisted.internet import reactor
 from scrapy.crawler import Crawler
-from scrapy import signals
+from scrapy import signals, log
 import docopt
 
 from FourmiCrawler.spider import FourmiSpider
@@ -61,6 +61,7 @@ def search(docopt_arguments, source_loader):
     conf.start_log(docopt_arguments["--log"], docopt_arguments["-v"])
     conf.set_output(docopt_arguments["--output"], docopt_arguments["--format"])
     setup_crawler(docopt_arguments["<compound>"], conf.scrapy_settings, source_loader, docopt_arguments["--attributes"].split(','))
+    log.start(conf.scrapy_settings.get("LOG_FILE"), conf.scrapy_settings.get("LOG_LEVEL"), conf.scrapy_settings.get("LOG_STDOUT"))
     reactor.run()
 
 

From 3fe2cde892ba1889d7d845a71c2e41a8037781be Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 15 Jun 2014 20:10:17 +0200
Subject: [PATCH 11/21] Error message clearly labeled as a warning

---
 utils/configurator.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/utils/configurator.py b/utils/configurator.py
index 7c1aaa8..5cde4d5 100644
--- a/utils/configurator.py
+++ b/utils/configurator.py
@@ -89,6 +89,6 @@ class Configurator:
         elif config.defaults():
             section = config.defaults()
         if 'reliability' not in section:
-            print 'Reliability not set for %s' % sourcename
+            print 'WARNING: Reliability not set for %s' % sourcename
             section['reliability'] = ''
         return section

From e3d6087ed43e6c38ab6c156ea9926447e7867028 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 15 Jun 2014 20:12:23 +0200
Subject: [PATCH 12/21] renamed logging function

---
 tests/test_configurator.py | 11 +++++------
 utils/configurator.py      |  2 +-
 2 files changed, 6 insertions(+), 7 deletions(-)

diff --git a/tests/test_configurator.py b/tests/test_configurator.py
index eb43cb7..cf54132 100644
--- a/tests/test_configurator.py
+++ b/tests/test_configurator.py
@@ -1,7 +1,8 @@
 import unittest
+import ConfigParser
+
 from utils.configurator import Configurator
 
-import ConfigParser
 
 class TestConfigurator(unittest.TestCase):
 
@@ -21,11 +22,9 @@ class TestConfigurator(unittest.TestCase):
         self.assertEqual(self.conf.scrapy_settings["FEED_URI"], "results.csv")
         self.assertEqual(self.conf.scrapy_settings["FEED_FORMAT"], "csv")
 
-    # def test_start_log(self):
-    #     self.conf.start_log("test.log", True)
-    #     self.conf.start_log("test.log", False)
-    #     self.conf.start_log(None, True)
-    #     self.conf.start_log(None, False)
+    def test_start_log(self):
+        for i in range(0 ,3):
+            self.conf.set_logging()
 
     def test_read_sourceconfiguration(self):
         config = self.conf.read_sourceconfiguration()
diff --git a/utils/configurator.py b/utils/configurator.py
index 5cde4d5..03ef38f 100644
--- a/utils/configurator.py
+++ b/utils/configurator.py
@@ -32,7 +32,7 @@ class Configurator:
             self.scrapy_settings.overrides["FEED_FORMAT"] = fileformat
 
 
-    def start_log(self, logfile, verbose):
+    def set_logging(self, logfile, verbose):
         """
         This function changes the default settings of Scapy's logging functionality
         using the settings given by the CLI.

From 435356c3212e5f6656fd7f560217c398f2a26d16 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 15 Jun 2014 20:32:24 +0200
Subject: [PATCH 13/21] Added default values to the logging function

---
 fourmi.py             | 2 +-
 utils/configurator.py | 2 +-
 2 files changed, 2 insertions(+), 2 deletions(-)

diff --git a/fourmi.py b/fourmi.py
index 95fc53a..1b9237c 100755
--- a/fourmi.py
+++ b/fourmi.py
@@ -58,7 +58,7 @@ def search(docopt_arguments, source_loader):
     :param source_loader: An initiated SourceLoader object pointed at the directory with the sources.
     """
     conf = Configurator()
-    conf.start_log(docopt_arguments["--log"], docopt_arguments["-v"])
+    conf.set_logging(docopt_arguments["--log"], docopt_arguments["-v"])
     conf.set_output(docopt_arguments["--output"], docopt_arguments["--format"])
     setup_crawler(docopt_arguments["<compound>"], conf.scrapy_settings, source_loader, docopt_arguments["--attributes"].split(','))
     log.start(conf.scrapy_settings.get("LOG_FILE"), conf.scrapy_settings.get("LOG_LEVEL"), conf.scrapy_settings.get("LOG_STDOUT"))
diff --git a/utils/configurator.py b/utils/configurator.py
index 03ef38f..7dc27c5 100644
--- a/utils/configurator.py
+++ b/utils/configurator.py
@@ -32,7 +32,7 @@ class Configurator:
             self.scrapy_settings.overrides["FEED_FORMAT"] = fileformat
 
 
-    def set_logging(self, logfile, verbose):
+    def set_logging(self, logfile=None, verbose=0):
         """
         This function changes the default settings of Scapy's logging functionality
         using the settings given by the CLI.

From fa42562b8e63bc049cac5a8769b02f7dd72a97c1 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 15 Jun 2014 20:33:58 +0200
Subject: [PATCH 14/21] Tests for the Logging Functionality

---
 tests/test_configurator.py | 23 +++++++++++++++++++++--
 1 file changed, 21 insertions(+), 2 deletions(-)

diff --git a/tests/test_configurator.py b/tests/test_configurator.py
index cf54132..df29da9 100644
--- a/tests/test_configurator.py
+++ b/tests/test_configurator.py
@@ -23,8 +23,27 @@ class TestConfigurator(unittest.TestCase):
         self.assertEqual(self.conf.scrapy_settings["FEED_FORMAT"], "csv")
 
     def test_start_log(self):
-        for i in range(0 ,3):
-            self.conf.set_logging()
+        for i in range(0, 3):
+            self.conf.set_logging("TEST", i)
+            self.assertEqual(self.conf.scrapy_settings.get("LOG_FILE"), "TEST")
+            if i > 0:
+                self.assertEqual(self.conf.scrapy_settings.get("LOG_ENABLED"), True)
+                if i > 1:
+                    self.assertEqual(self.conf.scrapy_settings.get("LOG_STDOUT"), False)
+                else:
+                    self.assertEqual(self.conf.scrapy_settings.get("LOG_STDOUT"), True)
+            else:
+                self.assertEqual(self.conf.scrapy_settings.get("LOG_ENABLED"), False)
+                self.assertEqual(self.conf.scrapy_settings.get("LOG_STDOUT"), True)
+            if i == 1:
+                self.assertEqual(self.conf.scrapy_settings.get("LOG_LEVEL"), "WARNING")
+            elif i == 2:
+                self.assertEqual(self.conf.scrapy_settings.get("LOG_LEVEL"), "INFO")
+            elif i == 3:
+                self.assertEqual(self.conf.scrapy_settings.get("LOG_LEVEL"), "DEBUG")
+
+            self.conf.set_logging(verbose=i)
+            self.assertEqual(self.conf.scrapy_settings.get("LOG_FILE"), None)
 
     def test_read_sourceconfiguration(self):
         config = self.conf.read_sourceconfiguration()

From 66f2384747a5a86aba034729f532794e7c06e8fe Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 15 Jun 2014 20:41:19 +0200
Subject: [PATCH 15/21] Default arguments can't be mutable

---
 FourmiCrawler/sources/ChemSpider.py      |  7 +++++--
 FourmiCrawler/sources/NIST.py            |  9 +++++----
 FourmiCrawler/sources/WikipediaParser.py | 14 +++++++++-----
 FourmiCrawler/sources/source.py          |  2 +-
 FourmiCrawler/spider.py                  |  7 +++++--
 5 files changed, 25 insertions(+), 14 deletions(-)

diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py
index 87a6ee7..fb51a4a 100644
--- a/FourmiCrawler/sources/ChemSpider.py
+++ b/FourmiCrawler/sources/ChemSpider.py
@@ -26,9 +26,12 @@ class ChemSpider(Source):
     structure = 'Chemical-Structure.%s.html'
     extendedinfo = 'MassSpecAPI.asmx/GetExtendedCompoundInfo?csid=%s&token='
 
-    def __init__(self, config={}):
+    def __init__(self, config=None):
         Source.__init__(self, config)
-        self.cfg = config
+        if self.cfg is None:
+            self.cfg = {}
+        else:
+            self.cfg = config
         self.ignore_list = []
         if 'token' not in self.cfg or self.cfg['token'] == '':
             log.msg('ChemSpider token not set or empty, search/MassSpec API '
diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py
index 3c323ef..d71d08f 100644
--- a/FourmiCrawler/sources/NIST.py
+++ b/FourmiCrawler/sources/NIST.py
@@ -22,12 +22,13 @@ class NIST(Source):
 
     search = 'cgi/cbook.cgi?Name=%s&Units=SI&cTP=on'
 
-    cfg = {}
-
-    def __init__(self, config={}):
+    def __init__(self, config=None):
         Source.__init__(self, config)
         self.ignore_list = set()
-        self.cfg = config
+        if config is None:
+            self.cfg = {}
+        else:
+            self.cfg = config
 
     def parse(self, response):
         sel = Selector(response)
diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py
index 4aa49b2..b995f30 100644
--- a/FourmiCrawler/sources/WikipediaParser.py
+++ b/FourmiCrawler/sources/WikipediaParser.py
@@ -1,9 +1,11 @@
+import re
+
 from scrapy.http import Request
 from scrapy import log
-from source import Source
 from scrapy.selector import Selector
+
+from source import Source
 from FourmiCrawler.items import Result
-import re
 
 
 class WikipediaParser(Source):
@@ -17,11 +19,13 @@ class WikipediaParser(Source):
     __spider = None
     searched_compounds = []
 
-    cfg = {}
 
-    def __init__(self, config={}):
+    def __init__(self, config=None):
         Source.__init__(self, config)
-        self.cfg = config
+        if config is None:
+            self.cfg = {}
+        else:
+            self.cfg = config
 
     def parse(self, response):
         """
diff --git a/FourmiCrawler/sources/source.py b/FourmiCrawler/sources/source.py
index a609bb9..fe36784 100644
--- a/FourmiCrawler/sources/source.py
+++ b/FourmiCrawler/sources/source.py
@@ -6,7 +6,7 @@ class Source:
     website = "http://something/*"  # Regex of URI's the source is able to parse
     _spider = None
 
-    def __init__(self, config={}):
+    def __init__(self, config=None):
         """
         Initiation of a new Source
         """
diff --git a/FourmiCrawler/spider.py b/FourmiCrawler/spider.py
index 5c09f07..7552c7d 100644
--- a/FourmiCrawler/spider.py
+++ b/FourmiCrawler/spider.py
@@ -10,7 +10,7 @@ class FourmiSpider(Spider):
     """
     name = "FourmiSpider"
 
-    def __init__(self, compound=None, selected_attributes=[".*"], *args, **kwargs):
+    def __init__(self, compound=None, selected_attributes=None, *args, **kwargs):
         """
         Initiation of the Spider
         :param compound: compound that will be searched.
@@ -20,7 +20,10 @@ class FourmiSpider(Spider):
         self.synonyms = set()
         super(FourmiSpider, self).__init__(*args, **kwargs)
         self.synonyms.add(compound)
-        self.selected_attributes = selected_attributes
+        if selected_attributes is None:
+           self.selected_attributes = [".*"]
+        else:
+            self.selected_attributes = selected_attributes
 
     def parse(self, response):
         """

From 74e7152d5fc2d35ad109f2660bb2385cdc04526d Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 15 Jun 2014 20:45:35 +0200
Subject: [PATCH 16/21] A lot of PEP-8 fixes

---
 FourmiCrawler/sources/ChemSpider.py      |  6 +++---
 FourmiCrawler/sources/NIST.py            | 14 +++++++-------
 FourmiCrawler/sources/WikipediaParser.py | 17 ++++++++---------
 FourmiCrawler/spider.py                  |  2 +-
 fourmi.py                                |  6 ++++--
 tests/test_spider.py                     |  1 -
 utils/configurator.py                    |  5 +----
 utils/sourceloader.py                    |  1 +
 8 files changed, 25 insertions(+), 27 deletions(-)

diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py
index fb51a4a..3f1538f 100644
--- a/FourmiCrawler/sources/ChemSpider.py
+++ b/FourmiCrawler/sources/ChemSpider.py
@@ -40,7 +40,6 @@ class ChemSpider(Source):
         self.search += self.cfg['token']
         self.extendedinfo += self.cfg['token']
 
-
     def parse(self, response):
         sel = Selector(response)
         requests = []
@@ -202,13 +201,14 @@ class ChemSpider(Source):
         return properties
 
     def newresult(self, attribute, value, conditions='', source='ChemSpider'):
-        return Result({
+        return Result(
+            {
                 'attribute': attribute,
                 'value': value,
                 'source': source,
                 'reliability': self.cfg['reliability'],
                 'conditions': conditions
-                })
+            })
 
     def parse_searchrequest(self, response):
         """Parse the initial response of the ChemSpider Search API """
diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py
index d71d08f..e81db5a 100644
--- a/FourmiCrawler/sources/NIST.py
+++ b/FourmiCrawler/sources/NIST.py
@@ -89,7 +89,6 @@ class NIST(Source):
         InChiKey, CAS number
         """
         ul = sel.xpath('body/ul[li/strong="IUPAC Standard InChI:"]')
-        li = ul.xpath('li')
 
         raw_synonyms = ul.xpath('li[strong="Other names:"]/text()').extract()
         for synonym in raw_synonyms[0].strip().split(';\n'):
@@ -256,12 +255,13 @@ class NIST(Source):
         return results
 
     def newresult(self, attribute, value, conditions=''):
-        return Result({
-            'attribute': attribute,
-            'value': value,
-            'source': 'NIST',
-            'reliability': self.cfg['reliability'],
-            'conditions': conditions
+        return Result(
+            {
+                'attribute': attribute,
+                'value': value,
+                'source': 'NIST',
+                'reliability': self.cfg['reliability'],
+                'conditions': conditions
             })
 
     def new_compound_request(self, compound):
diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py
index b995f30..cfd2555 100644
--- a/FourmiCrawler/sources/WikipediaParser.py
+++ b/FourmiCrawler/sources/WikipediaParser.py
@@ -19,7 +19,6 @@ class WikipediaParser(Source):
     __spider = None
     searched_compounds = []
 
-
     def __init__(self, config=None):
         Source.__init__(self, config)
         if config is None:
@@ -57,7 +56,7 @@ class WikipediaParser(Source):
         # scrape the chembox (wikipedia template)
         items = self.parse_chembox(sel, items)
 
-        #scrape the drugbox (wikipedia template)
+        # scrape the drugbox (wikipedia template)
         items = self.parse_drugbox(sel, items)
 
         items = filter(lambda a: a['value'] != '', items)  # remove items with an empty value
@@ -127,7 +126,6 @@ class WikipediaParser(Source):
                     level=log.DEBUG)
         return items
 
-
     def new_compound_request(self, compound):
         return Request(url=self.website[:-1] + compound, callback=self.parse)
 
@@ -165,10 +163,11 @@ class WikipediaParser(Source):
         return links
 
     def newresult(self, attribute, value):
-        return Result({
-            'attribute': attribute,
-            'value': value,
-            'source': 'Wikipedia',
-            'reliability': self.cfg['reliability'],
-            'conditions': ''
+        return Result(
+            {
+                'attribute': attribute,
+                'value': value,
+                'source': 'Wikipedia',
+                'reliability': self.cfg['reliability'],
+                'conditions': ''
             })
diff --git a/FourmiCrawler/spider.py b/FourmiCrawler/spider.py
index 7552c7d..ebfd2cf 100644
--- a/FourmiCrawler/spider.py
+++ b/FourmiCrawler/spider.py
@@ -21,7 +21,7 @@ class FourmiSpider(Spider):
         super(FourmiSpider, self).__init__(*args, **kwargs)
         self.synonyms.add(compound)
         if selected_attributes is None:
-           self.selected_attributes = [".*"]
+            self.selected_attributes = [".*"]
         else:
             self.selected_attributes = selected_attributes
 
diff --git a/fourmi.py b/fourmi.py
index 1b9237c..2a422ef 100755
--- a/fourmi.py
+++ b/fourmi.py
@@ -60,8 +60,10 @@ def search(docopt_arguments, source_loader):
     conf = Configurator()
     conf.set_logging(docopt_arguments["--log"], docopt_arguments["-v"])
     conf.set_output(docopt_arguments["--output"], docopt_arguments["--format"])
-    setup_crawler(docopt_arguments["<compound>"], conf.scrapy_settings, source_loader, docopt_arguments["--attributes"].split(','))
-    log.start(conf.scrapy_settings.get("LOG_FILE"), conf.scrapy_settings.get("LOG_LEVEL"), conf.scrapy_settings.get("LOG_STDOUT"))
+    setup_crawler(docopt_arguments["<compound>"], conf.scrapy_settings,
+                  source_loader, docopt_arguments["--attributes"].split(','))
+    log.start(conf.scrapy_settings.get("LOG_FILE"),
+              conf.scrapy_settings.get("LOG_LEVEL"), conf.scrapy_settings.get("LOG_STDOUT"))
     reactor.run()
 
 
diff --git a/tests/test_spider.py b/tests/test_spider.py
index 589a571..1ee40b1 100644
--- a/tests/test_spider.py
+++ b/tests/test_spider.py
@@ -47,7 +47,6 @@ class TestFoumiSpider(unittest.TestCase):
         self.assertGreater(len(requests), 0)
         self.assertIsInstance(requests[0], Request)
 
-
     def test_synonym_requests(self):
         # A test for the synonym request function
         self.spi._sources = []
diff --git a/utils/configurator.py b/utils/configurator.py
index 7dc27c5..62987c6 100644
--- a/utils/configurator.py
+++ b/utils/configurator.py
@@ -12,7 +12,6 @@ class Configurator:
     def __init__(self):
         self.scrapy_settings = get_project_settings()
 
-
     def set_output(self, filename, fileformat):
         """
         This function manipulates the Scrapy output file settings that normally would be set in the settings file.
@@ -31,7 +30,6 @@ class Configurator:
         if fileformat is not None:
             self.scrapy_settings.overrides["FEED_FORMAT"] = fileformat
 
-
     def set_logging(self, logfile=None, verbose=0):
         """
         This function changes the default settings of Scapy's logging functionality
@@ -61,7 +59,6 @@ class Configurator:
         else:
             self.scrapy_settings.overrides["LOG_FILE"] = None
 
-
     @staticmethod
     def read_sourceconfiguration():
         """
@@ -70,7 +67,7 @@ class Configurator:
         :return a ConfigParser object of sources.cfg
         """
         config = ConfigParser.ConfigParser()
-        config.read('sources.cfg') # [TODO]: should be softcoded eventually
+        config.read('sources.cfg')  # [TODO]: should be softcoded eventually
         return config
 
     @staticmethod
diff --git a/utils/sourceloader.py b/utils/sourceloader.py
index 9b33657..8c54464 100644
--- a/utils/sourceloader.py
+++ b/utils/sourceloader.py
@@ -5,6 +5,7 @@ import re
 from FourmiCrawler.sources.source import Source
 from utils.configurator import Configurator
 
+
 class SourceLoader:
     sources = []
 

From 79cf15b95c30cd937a9394df92ed87a3a635c07e Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 15 Jun 2014 20:50:00 +0200
Subject: [PATCH 17/21] Refractoring double code

---
 FourmiCrawler/sources/ChemSpider.py      | 4 ----
 FourmiCrawler/sources/NIST.py            | 4 ----
 FourmiCrawler/sources/WikipediaParser.py | 4 ----
 FourmiCrawler/sources/source.py          | 3 +++
 4 files changed, 3 insertions(+), 12 deletions(-)

diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py
index 3f1538f..0110e57 100644
--- a/FourmiCrawler/sources/ChemSpider.py
+++ b/FourmiCrawler/sources/ChemSpider.py
@@ -28,10 +28,6 @@ class ChemSpider(Source):
 
     def __init__(self, config=None):
         Source.__init__(self, config)
-        if self.cfg is None:
-            self.cfg = {}
-        else:
-            self.cfg = config
         self.ignore_list = []
         if 'token' not in self.cfg or self.cfg['token'] == '':
             log.msg('ChemSpider token not set or empty, search/MassSpec API '
diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py
index e81db5a..934b457 100644
--- a/FourmiCrawler/sources/NIST.py
+++ b/FourmiCrawler/sources/NIST.py
@@ -25,10 +25,6 @@ class NIST(Source):
     def __init__(self, config=None):
         Source.__init__(self, config)
         self.ignore_list = set()
-        if config is None:
-            self.cfg = {}
-        else:
-            self.cfg = config
 
     def parse(self, response):
         sel = Selector(response)
diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py
index cfd2555..401698c 100644
--- a/FourmiCrawler/sources/WikipediaParser.py
+++ b/FourmiCrawler/sources/WikipediaParser.py
@@ -21,10 +21,6 @@ class WikipediaParser(Source):
 
     def __init__(self, config=None):
         Source.__init__(self, config)
-        if config is None:
-            self.cfg = {}
-        else:
-            self.cfg = config
 
     def parse(self, response):
         """
diff --git a/FourmiCrawler/sources/source.py b/FourmiCrawler/sources/source.py
index fe36784..36218b0 100644
--- a/FourmiCrawler/sources/source.py
+++ b/FourmiCrawler/sources/source.py
@@ -10,6 +10,9 @@ class Source:
         """
         Initiation of a new Source
         """
+        self.cfg = {}
+        if config is not None:
+            self.cfg = config
         pass
 
     def parse(self, response):

From 147b148dbdfa102de5b1b6d002480cb6acfca39d Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 15 Jun 2014 21:00:36 +0200
Subject: [PATCH 18/21] Force a attribute of the test item to be None

---
 tests/test_pipeline.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index dfb8e83..eb2b070 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -13,6 +13,7 @@ class TestPipelines(unittest.TestCase):
     def test_none_pipeline(self):
         # Testing the pipeline that replaces the None values in items.
         self.testItem["value"] = "abc"
+        self.testItem["source"] = None
         pipe = pipelines.RemoveNonePipeline()
         processed = pipe.process_item(self.testItem, spider.FourmiSpider())
 

From a27e1e4bdd30b402a2c0ec99f8556c777bf57197 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 15 Jun 2014 21:09:43 +0200
Subject: [PATCH 19/21] Bumped version number

---
 fourmi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fourmi.py b/fourmi.py
index 2a422ef..55a3c20 100755
--- a/fourmi.py
+++ b/fourmi.py
@@ -69,7 +69,7 @@ def search(docopt_arguments, source_loader):
 
 # The start for the Fourmi Command Line interface.
 if __name__ == '__main__':
-    arguments = docopt.docopt(__doc__, version='Fourmi - V0.5.0')
+    arguments = docopt.docopt(__doc__, version='Fourmi - V0.5.1')
     loader = SourceLoader()
 
     if arguments["--include"]:

From 5f3ade8ff9f29d36e86a6e5d6b598cdc9870d60e Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 15 Jun 2014 21:11:30 +0200
Subject: [PATCH 20/21] Added a changelog

---
 Changelog.md | 3 +++
 1 file changed, 3 insertions(+)
 create mode 100644 Changelog.md

diff --git a/Changelog.md b/Changelog.md
new file mode 100644
index 0000000..2a63786
--- /dev/null
+++ b/Changelog.md
@@ -0,0 +1,3 @@
+### v0.5.1
+- UPDATED: Logging functionality from command line
+- DEV: Code cleanup and extra tests
\ No newline at end of file

From 9c9aba55d8a32ba716d83ad7aa1f5816db61fe63 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 15 Jun 2014 21:12:19 +0200
Subject: [PATCH 21/21] Added my signature, confirming validity of current
 files

---
 SIGNED.md | 103 ++++++++++++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 103 insertions(+)
 create mode 100644 SIGNED.md

diff --git a/SIGNED.md b/SIGNED.md
new file mode 100644
index 0000000..79e66cb
--- /dev/null
+++ b/SIGNED.md
@@ -0,0 +1,103 @@
+##### Signed by https://keybase.io/jdekker
+```
+-----BEGIN PGP SIGNATURE-----
+Version: GnuPG v1.4.11 (GNU/Linux)
+
+iQIcBAABAgAGBQJTnfAAAAoJEJrQ9RIUCT6/KZIQAME07yzAG5hnqsQof5ESoeQs
+5wBxAhiBIX/0yn3qIT/eMh0ubCKUZsqJ3/PzUljeMJ6CGtwxFYfTWkgjYlOoAz9G
+fS7CjPmRPyiu+MFo5he+oVRmLUMqfuLUrCyuIxJwMXq5YbQvzyqiffvxr8VRULtV
+3c0drWfQMX1ZeAWSIYN0xuMndzvaqIAQU6o4tSQf/rUiKlM2NnTDNUHu2PY9FED/
+IJwM/IgAMAkJARyL7ltq6pHzORsu7sd2Nhv0esa0Gs2GSuRjKueeMZvJzpDAufy9
+bWn9EqKhVwPR6zWnXRmNj9Ymj1w167hIUYcBdFhC7kie5zv9+pDE6d/s7pw/Rejd
+L0k8LKBGtJ8o7SKYR9kcNLDWXEnHjfCraD+14FMYqQPcz2ekoV6Exv/mP8qRPwUc
+b+FtjJtW8fEiOMAyjMOvLTzYbCVwjdErAqgNdHeSByi1nxfrphjajRiNUt7fVimJ
+++QZzKCj6xN2MuTJ41KbZ8teiUXwQB4OKKij0fgoy0RBwW0vqH6MF7cCKm1zT1Qa
+9FGlBU2jSybQqUu4lJ/eUjO/3tQMhJErQJU/i+6lwi7OMnS9J/g17Heghp5Hxyhc
+VWvhR56pbWLIL2XQqDGGEqPDIzXohHnbRJ1N71b06akIvIIrTqc6Glu4PJeUG/Pe
+EF8/jBwydxbKUOyKRSQS
+=xWbc
+-----END PGP SIGNATURE-----
+
+```
+
+<!-- END SIGNATURES -->
+
+### Begin signed statement 
+
+#### Expect
+
+```
+size    exec  file                      contents                                                                                                                         
+              ./                                                                                                                                                         
+17591           .coverage               1dd1207846db74e407d3a4a1951b8e81934a4693385d39f6c337a224375bad39|1b7ead09cf213b5a9545557be982aaa30238b689bb54adf604f82b12ef521eb2
+375             .gitignore              d2e475a6a4fa51422cac0a07495914e776858fb9ab9c8937a4d491a3e042d6b1                                                                 
+464             .travis.yml             3063ba078607b8d16bd6467afc15fbbaa4b26c1e30be5ce7cef453cfccbaa95c                                                                 
+97              Changelog.md            bcbce9a33bbbbcd18fd7788e6dc3a9c4b13dff7128ea99968994c1b290ddc931                                                                 
+                FourmiCrawler/                                                                                                                                           
+0                 __init__.py           e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855                                                                 
+304               items.py              b00d49a3d53fa13306c7f8b023adb93ab88423c4fce46600689814f6b02bb806                                                                 
+2178              pipelines.py          f9b7b84938060751e15e45de5133dffe50c798bff2a20019206fe7c9d677ad49                                                                 
+716               settings.py           37a8f63e123bccc77076d574617a522b30c1d7c5e893ec3d78cc40e1563dd8a6                                                                 
+                  sources/                                                                                                                                               
+9991                ChemSpider.py       847013e34c5c3683ec66a337837287512b4bab9fbea2ece12e4130ab0dbf264d                                                                 
+9898                NIST.py             97abc84fce85c47b789822715a1945ab84cc052a32340c861141c1af66bab644                                                                 
+6907                WikipediaParser.py  5d6de911c773129a34b76c40a9b547aafc67644a15f39cd0be6afc7a16fb0f97                                                                 
+0                   __init__.py         e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855                                                                 
+1262                source.py           16c4cdfca849b7dc2bc89d7a6f7ad021f4aa1d04234394312f1d0edf0fd9c5a4                                                                 
+3026              spider.py             1ffba2512988b7a6b535a4a31a4ef688ece4f8c595c3d50355c34ef46b23e44a                                                                 
+1081            LICENSE                 36951e5f1910bad3e008ab7228f35ad8933192e52d3c3ae6a5e875765e27192c                                                                 
+3965            README.md               d21236d6a175be28ef8e2fee8a256e95b6a513163e3f1071c26c62e9093db7f3                                                                 
+3659    x       fourmi.py               81781ed7299e447e6fc551fba69e62cd7a1d63f27dfa063927f4c5c10f5ac331                                                                 
+200850          log.txt                 d76e741f9e7b67c2574e9cdbbe499ea4861f6e0bd11e5962fdaf9d8720effef8                                                                 
+184692          results.csv             31132f7f394babeb5dfd249aaa714756017b2e1b314b6715f57e6ad9524e5be8|d0bb724f6d714ec7a4a1ad2052f70dd4510b5ac08d616e24b5e9a903dedab586
+261             scrapy.cfg              624c068fd06303daa65b8e0d0d3ef88ac1f123be2694ef5b4f3f9a9dcd983f85                                                                 
+                tests/                                                                                                                                                   
+1                 __init__.py           01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b                                                                 
+2837              test_configurator.py  4a0eb6e7121eb09a63ab5cb797570d1a42080c5346c3b8b365da56eefa599e80                                                                 
+1892              test_pipeline.py      387a336b0f36722a20e712aa033e5771c44f9e92561dd73acffd53d622c52031                                                                 
+1260              test_sourceloader.py  b108b4b80adcdb7401273a9823b1f1a19eb5178776186eb5a9976aed8b1ee869                                                                 
+2113              test_spider.py        300f280377b522737be0d8e4a80031ab118a4011bdbb92131e9c400fcdab6299                                                                 
+                utils/                                                                                                                                                   
+0                 __init__.py           e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855                                                                 
+3552              configurator.py       e2b7e0ee6c1fef4373785dfe5df8ec6950f31ce6a5d9632b69a66ea3d1eaf921                                                                 
+2537              sourceloader.py       f5a5ac2a6aba0658dbe11361f465caabcf3c06c5c8dc9a631874211cc19d2d37                                                                 
+```
+
+#### Ignore
+
+```
+/SIGNED.md
+```
+
+#### Presets
+
+```
+git      # ignore .git and anything as described by .gitignore files
+dropbox  # ignore .dropbox-cache and other Dropbox-related files    
+kb       # ignore anything as described by .kbignore files          
+```
+
+<!-- summarize version = 0.0.9 -->
+
+### End signed statement
+
+<hr>
+
+#### Notes
+
+With keybase you can sign any directory's contents, whether it's a git repo,
+source code distribution, or a personal documents folder. It aims to replace the drudgery of:
+
+  1. comparing a zipped file to a detached statement
+  2. downloading a public key
+  3. confirming it is in fact the author's by reviewing public statements they've made, using it
+
+All in one simple command:
+
+```bash
+keybase dir verify
+```
+
+There are lots of options, including assertions for automating your checks.
+
+For more info, check out https://keybase.io/docs/command_line/code_signing
\ No newline at end of file