From ca90796904b8d3ff0b0cd0c00f87b1761ddca7ad Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 1 Jun 2014 19:53:37 +0200
Subject: [PATCH 01/37] Added documentation to the Executable Python file

---
 fourmi.py | 24 +++++++++++++++++++++++-
 1 file changed, 23 insertions(+), 1 deletion(-)
diff --git a/fourmi.py b/fourmi.py
index efa4e54..c09087d 100755
--- a/fourmi.py
+++ b/fourmi.py
@@ -34,6 +34,13 @@ from sourceloader import SourceLoader
 
 
 def setup_crawler(searchable, settings, source_loader, attributes):
+    """
+    This function prepares and start the crawler which starts the actual search on the internet
+    :param searchable: The compound which should be searched
+    :param settings: A scrapy settings object
+    :param source_loader: A fully functional SourceLoader object which contains only the sources that should be used.
+    :param attributes: A list of regular expressions which the attribute names should match.
+    """
     spider = FourmiSpider(compound=searchable, selected_attributes=attributes)
     spider.add_parsers(source_loader.sources)
     crawler = Crawler(settings)
@@ -44,8 +51,13 @@ def setup_crawler(searchable, settings, source_loader, attributes):
 
 
 def scrapy_settings_manipulation(docopt_arguments):
+    """
+    This function manipulates the Scrapy settings that normally would be set in the settings file. In the Fourmi
+    project these are command line arguments.
+    :param docopt_arguments: A dictionary generated by docopt containing all CLI arguments.
+    """
     settings = get_project_settings()
-    # [todo] - add at least a warning for files that already exist
+
     if docopt_arguments["--output"] != 'result.*format*':
         settings.overrides["FEED_URI"] = docopt_arguments["--output"]
     elif docopt_arguments["--format"] == "jsonlines":
@@ -60,6 +72,10 @@ def scrapy_settings_manipulation(docopt_arguments):
 
 
 def start_log(docopt_arguments):
+    """
+    This function starts the logging functionality of Scrapy using the settings given by the CLI.
+    :param docopt_arguments:  A dictionary generated by docopt containing all CLI arguments.
+    """
     if docopt_arguments["--log"] is not None:
         if docopt_arguments["--verbose"]:
             log.start(logfile=docopt_arguments["--log"], logstdout=False, loglevel=log.DEBUG)
@@ -73,12 +89,18 @@ def start_log(docopt_arguments):
 
 
 def search(docopt_arguments, source_loader):
+    """
+    The function that facilitates the search for a specific compound.
+    :param docopt_arguments: A dictionary generated by docopt containing all CLI arguments.
+    :param source_loader: An initiated SourceLoader object pointed at the directory with the sources.
+    """
     start_log(docopt_arguments)
     settings = scrapy_settings_manipulation(docopt_arguments)
     setup_crawler(docopt_arguments["<compound>"], settings, source_loader, docopt_arguments["--attributes"].split(','))
     reactor.run()
 
 
+# The start for the Fourmi Command Line interface.
 if __name__ == '__main__':
     arguments = docopt.docopt(__doc__, version='Fourmi - V0.3.0')
     loader = SourceLoader()

From e272c9f3425d42446abd1f428448edc944f22319 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 1 Jun 2014 19:55:10 +0200
Subject: [PATCH 02/37] Changed a parameter name for clarification

---
 fourmi.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/fourmi.py b/fourmi.py
index c09087d..9f32cff 100755
--- a/fourmi.py
+++ b/fourmi.py
@@ -33,15 +33,15 @@ from FourmiCrawler.spider import FourmiSpider
 from sourceloader import SourceLoader
 
 
-def setup_crawler(searchable, settings, source_loader, attributes):
+def setup_crawler(compound, settings, source_loader, attributes):
     """
     This function prepares and start the crawler which starts the actual search on the internet
-    :param searchable: The compound which should be searched
+    :param compound: The compound which should be searched
     :param settings: A scrapy settings object
     :param source_loader: A fully functional SourceLoader object which contains only the sources that should be used.
     :param attributes: A list of regular expressions which the attribute names should match.
     """
-    spider = FourmiSpider(compound=searchable, selected_attributes=attributes)
+    spider = FourmiSpider(compound=compound, selected_attributes=attributes)
     spider.add_parsers(source_loader.sources)
     crawler = Crawler(settings)
     crawler.signals.connect(reactor.stop, signal=signals.spider_closed)

From a040bc7a0263aed473ab1b5ce2f294aeaad81d2b Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 1 Jun 2014 20:01:19 +0200
Subject: [PATCH 03/37] Added documentation for the sourceloader

---
 sourceloader.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/sourceloader.py b/sourceloader.py
index 9957a70..2ed50a8 100644
--- a/sourceloader.py
+++ b/sourceloader.py
@@ -1,6 +1,7 @@
 import inspect
 import os
 import re
+
 from FourmiCrawler.sources.source import Source
 
 
@@ -8,6 +9,10 @@ class SourceLoader:
     sources = []
 
     def __init__(self, rel_dir="FourmiCrawler/sources"):
+        """
+        The initiation of a SourceLoader, selects and indexes a directory for usable sources.
+        :param rel_dir: A relative path to a directory.
+        """
         path = os.path.dirname(os.path.abspath(__file__))
         path += "/" + rel_dir
         known_parser = set()
@@ -21,18 +26,30 @@ class SourceLoader:
                     known_parser.add(cls)
 
     def include(self, source_names):
+        """
+        This function excludes all sources that don't match the given regular expressions.
+        :param source_names: A list of regular expression (strings)
+        """
         new = set()
         for name in source_names:
             new.update([src for src in self.sources if re.match(name, src.__class__.__name__)])
         self.sources = list(new)
 
     def exclude(self, source_names):
+        """
+        This function excludes all sources that match the given regular expressions.
+        :param source_names: A list of regular expression (strings)
+        """
         exclude = []
         for name in source_names:
             exclude.extend([src for src in self.sources if re.match(name, src.__class__.__name__)])
         self.sources = [src for src in self.sources if src not in exclude]
 
     def __str__(self):
+        """
+        This function returns a string with all sources currently available in the SourceLoader.
+        :return: a string with all available sources.
+        """
         string = ""
         for src in self.sources:
             string += "Source: " + src.__class__.__name__

From c4876f029baa41dd17197f0fb72fc5c466f71d1d Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 1 Jun 2014 20:14:47 +0200
Subject: [PATCH 04/37] Added documentation to the FourmiSpider

---
 FourmiCrawler/spider.py | 34 +++++++++++++++++++++++++++++++++-
 1 file changed, 33 insertions(+), 1 deletion(-)

diff --git a/FourmiCrawler/spider.py b/FourmiCrawler/spider.py
index 87f22c6..8ec18cc 100644
--- a/FourmiCrawler/spider.py
+++ b/FourmiCrawler/spider.py
@@ -1,19 +1,34 @@
+import re
+
 from scrapy.spider import Spider
 from scrapy import log
-import re
 
 
 class FourmiSpider(Spider):
+    """
+    A spider writen for the Fourmi Project which calls upon all available sources to request and scrape data.
+    """
     name = "FourmiSpider"
     __parsers = []
     synonyms = []
 
     def __init__(self, compound=None, selected_attributes=[".*"], *args, **kwargs):
+        """
+        Initiation of the Spider
+        :param compound: compound that will be searched.
+        :param selected_attributes: A list of regular expressions that the attributes should match.
+        """
         super(FourmiSpider, self).__init__(*args, **kwargs)
         self.synonyms.append(compound)
         self.selected_attributes = selected_attributes;
 
     def parse(self, reponse):
+        """
+        The function that is called when a response to a request is available. This function distributes this to a
+        parser which should be able to handle parsing the data.
+        :param reponse: A Scrapy Response object that should be parsed
+        :return: A list of Result items and new Request to be handled by the scrapy core.
+        """
         for parser in self.__parsers:
             if re.match(parser.website, reponse.url):
                 log.msg("Url: " + reponse.url + " -> Source: " + parser.website, level=log.DEBUG)
@@ -21,6 +36,11 @@ class FourmiSpider(Spider):
         return None
 
     def get_synonym_requests(self, compound):
+        """
+        A function that generates new Scrapy Request for each source given a new synonym of a compound.
+        :param compound: A compound name
+        :return: A list of Scrapy Request objects
+        """
         requests = []
         for parser in self.__parsers:
             parser_requests = parser.new_compound_request(compound)
@@ -29,15 +49,27 @@ class FourmiSpider(Spider):
         return requests
 
     def start_requests(self):
+        """
+        The function called by Scrapy for it's first Requests
+        :return: A list of Scrapy Request generated from the known synonyms using the available sources.
+        """
         requests = []
         for synonym in self.synonyms:
             requests.extend(self.get_synonym_requests(synonym))
         return requests
 
     def add_parsers(self, parsers):
+        """
+        A function to add a new Parser objects to the list of available parsers.
+        :param parsers: A list of Parser Objects.
+        """
         for parser in parsers:
             self.add_parser(parser)
 
     def add_parser(self, parser):
+        """
+        A function add a new Parser object to the list of available parsers.
+        :param parser: A Parser Object
+        """
         self.__parsers.append(parser)
         parser.set_spider(self)
\ No newline at end of file

From 3499946e97be70b98de89566a30999ba0d1666b8 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 1 Jun 2014 20:15:15 +0200
Subject: [PATCH 05/37] Fixed a typo

---
 FourmiCrawler/spider.py | 10 +++++-----
 1 file changed, 5 insertions(+), 5 deletions(-)

diff --git a/FourmiCrawler/spider.py b/FourmiCrawler/spider.py
index 8ec18cc..a58b6ea 100644
--- a/FourmiCrawler/spider.py
+++ b/FourmiCrawler/spider.py
@@ -22,17 +22,17 @@ class FourmiSpider(Spider):
         self.synonyms.append(compound)
         self.selected_attributes = selected_attributes;
 
-    def parse(self, reponse):
+    def parse(self, response):
         """
         The function that is called when a response to a request is available. This function distributes this to a
         parser which should be able to handle parsing the data.
-        :param reponse: A Scrapy Response object that should be parsed
+        :param response: A Scrapy Response object that should be parsed
         :return: A list of Result items and new Request to be handled by the scrapy core.
         """
         for parser in self.__parsers:
-            if re.match(parser.website, reponse.url):
-                log.msg("Url: " + reponse.url + " -> Source: " + parser.website, level=log.DEBUG)
-                return parser.parse(reponse)
+            if re.match(parser.website, response.url):
+                log.msg("Url: " + response.url + " -> Source: " + parser.website, level=log.DEBUG)
+                return parser.parse(response)
         return None
 
     def get_synonym_requests(self, compound):

From c27a875d681d0f912570bef4a583b85ea483bdbe Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 1 Jun 2014 20:18:03 +0200
Subject: [PATCH 06/37] Parser/Source consistency

---
 FourmiCrawler/spider.py | 32 ++++++++++++++++----------------
 fourmi.py               |  2 +-
 2 files changed, 17 insertions(+), 17 deletions(-)

diff --git a/FourmiCrawler/spider.py b/FourmiCrawler/spider.py
index a58b6ea..08abb6b 100644
--- a/FourmiCrawler/spider.py
+++ b/FourmiCrawler/spider.py
@@ -9,7 +9,7 @@ class FourmiSpider(Spider):
     A spider writen for the Fourmi Project which calls upon all available sources to request and scrape data.
     """
     name = "FourmiSpider"
-    __parsers = []
+    __sources = []
     synonyms = []
 
     def __init__(self, compound=None, selected_attributes=[".*"], *args, **kwargs):
@@ -25,14 +25,14 @@ class FourmiSpider(Spider):
     def parse(self, response):
         """
         The function that is called when a response to a request is available. This function distributes this to a
-        parser which should be able to handle parsing the data.
+        source which should be able to handle parsing the data.
         :param response: A Scrapy Response object that should be parsed
         :return: A list of Result items and new Request to be handled by the scrapy core.
         """
-        for parser in self.__parsers:
-            if re.match(parser.website, response.url):
-                log.msg("Url: " + response.url + " -> Source: " + parser.website, level=log.DEBUG)
-                return parser.parse(response)
+        for source in self.__sources:
+            if re.match(source.website, response.url):
+                log.msg("Url: " + response.url + " -> Source: " + source.website, level=log.DEBUG)
+                return source.parse(response)
         return None
 
     def get_synonym_requests(self, compound):
@@ -42,7 +42,7 @@ class FourmiSpider(Spider):
         :return: A list of Scrapy Request objects
         """
         requests = []
-        for parser in self.__parsers:
+        for parser in self.__sources:
             parser_requests = parser.new_compound_request(compound)
             if parser_requests is not None:
                 requests.append(parser_requests)
@@ -58,18 +58,18 @@ class FourmiSpider(Spider):
             requests.extend(self.get_synonym_requests(synonym))
         return requests
 
-    def add_parsers(self, parsers):
+    def add_sources(self, sources):
         """
-        A function to add a new Parser objects to the list of available parsers.
-        :param parsers: A list of Parser Objects.
+        A function to add a new Parser objects to the list of available sources.
+        :param sources: A list of Source Objects.
         """
-        for parser in parsers:
-            self.add_parser(parser)
+        for parser in sources:
+            self.add_source(parser)
 
-    def add_parser(self, parser):
+    def add_source(self, source):
         """
         A function add a new Parser object to the list of available parsers.
-        :param parser: A Parser Object
+        :param source: A Source Object
         """
-        self.__parsers.append(parser)
-        parser.set_spider(self)
\ No newline at end of file
+        self.__sources.append(source)
+        source.set_spider(self)
\ No newline at end of file
diff --git a/fourmi.py b/fourmi.py
index 9f32cff..945c8a2 100755
--- a/fourmi.py
+++ b/fourmi.py
@@ -42,7 +42,7 @@ def setup_crawler(compound, settings, source_loader, attributes):
     :param attributes: A list of regular expressions which the attribute names should match.
     """
     spider = FourmiSpider(compound=compound, selected_attributes=attributes)
-    spider.add_parsers(source_loader.sources)
+    spider.add_sources(source_loader.sources)
     crawler = Crawler(settings)
     crawler.signals.connect(reactor.stop, signal=signals.spider_closed)
     crawler.configure()

From f7d0fb4a450c10ab6ce147406f216e537f474c32 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 1 Jun 2014 20:24:54 +0200
Subject: [PATCH 07/37] Added documentation to the basic Source

---
 FourmiCrawler/sources/source.py | 17 +++++++++++++++++
 1 file changed, 17 insertions(+)

diff --git a/FourmiCrawler/sources/source.py b/FourmiCrawler/sources/source.py
index 3c51724..1ac0b9e 100644
--- a/FourmiCrawler/sources/source.py
+++ b/FourmiCrawler/sources/source.py
@@ -7,15 +7,32 @@ class Source:
     _spider = None
 
     def __init__(self):
+        """
+        Initiation of a new Source
+        """
         pass
 
     def parse(self, reponse):
+        """
+        This function should be able to parse all Scrapy Response objects with a URL matching the website Regex.
+        :param reponse: A Scrapy Response object
+        :return: A list of Result items and new Scrapy Requests
+        """
         log.msg("The parse function of the empty parser was used.", level=log.WARNING)
         pass
 
     def new_compound_request(self, compound):
+        """
+        This function should return a Scrapy Request for the given compound request.
+        :param compound: A compound name.
+        :return: A new Scrapy Request
+        """
         # return Request(url=self.website[:-1] + compound, callback=self.parse)
         pass
 
     def set_spider(self, spider):
+        """
+        A Function to save the associated spider.
+        :param spider: A FourmiSpider object
+        """
         self._spider = spider

From f81b1c950074a8ab181b3f91034f58db9c2b8c54 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 1 Jun 2014 20:25:46 +0200
Subject: [PATCH 08/37] Fixed a typo

---
 FourmiCrawler/sources/source.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/FourmiCrawler/sources/source.py b/FourmiCrawler/sources/source.py
index 1ac0b9e..d289d72 100644
--- a/FourmiCrawler/sources/source.py
+++ b/FourmiCrawler/sources/source.py
@@ -12,13 +12,13 @@ class Source:
         """
         pass
 
-    def parse(self, reponse):
+    def parse(self, response):
         """
         This function should be able to parse all Scrapy Response objects with a URL matching the website Regex.
-        :param reponse: A Scrapy Response object
+        :param response: A Scrapy Response object
         :return: A list of Result items and new Scrapy Requests
         """
-        log.msg("The parse function of the empty parser was used.", level=log.WARNING)
+        log.msg("The parse function of the empty source was used.", level=log.WARNING)
         pass
 
     def new_compound_request(self, compound):

From aac0a7c79c661db1c452bc5d31c9b2c77589701c Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Sun, 1 Jun 2014 20:29:51 +0200
Subject: [PATCH 09/37] References to the main Scrapy documentation

---
 FourmiCrawler/items.py     | 4 +---
 FourmiCrawler/pipelines.py | 7 +++----
 2 files changed, 4 insertions(+), 7 deletions(-)

diff --git a/FourmiCrawler/items.py b/FourmiCrawler/items.py
index c7fd41c..9f9a516 100644
--- a/FourmiCrawler/items.py
+++ b/FourmiCrawler/items.py
@@ -1,6 +1,4 @@
-# Define here the models for your scraped items
-#
-# See documentation in:
+# For more information on item definitions, see the Scrapy documentation in:
 # http://doc.scrapy.org/en/latest/topics/items.html
 
 from scrapy.item import Item, Field
diff --git a/FourmiCrawler/pipelines.py b/FourmiCrawler/pipelines.py
index e1dadbf..ff7ceed 100644
--- a/FourmiCrawler/pipelines.py
+++ b/FourmiCrawler/pipelines.py
@@ -1,8 +1,7 @@
-# Define your item pipelines here
-#
-# Don't forget to add your pipeline to the ITEM_PIPELINES setting
-# See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
+# For more information on item pipelines, see the Scrapy documentation in:
+# http://doc.scrapy.org/en/latest/topics/item-pipeline.html
 import re
+
 from scrapy.exceptions import DropItem
 
 

From d4a0ffdff3216aa0af7273e639e24fb2034adced Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 12:01:05 +0200
Subject: [PATCH 10/37] Optimized imports

---
 FourmiCrawler/sources/ChemSpider.py      | 7 +++++--
 FourmiCrawler/sources/NIST.py            | 7 +++++--
 FourmiCrawler/sources/WikipediaParser.py | 6 ++++--
 3 files changed, 14 insertions(+), 6 deletions(-)

diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py
index 2fcd07c..254c1a5 100644
--- a/FourmiCrawler/sources/ChemSpider.py
+++ b/FourmiCrawler/sources/ChemSpider.py
@@ -1,9 +1,12 @@
-from source import Source
+import re
+
 from scrapy import log
 from scrapy.http import Request
 from scrapy.selector import Selector
+
+from source import Source
 from FourmiCrawler.items import Result
-import re
+
 
 # [TODO] - Maybe clean up usage of '.extract()[0]', because of possible IndexError exception.
 
diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py
index 0b75b17..2fe5966 100644
--- a/FourmiCrawler/sources/NIST.py
+++ b/FourmiCrawler/sources/NIST.py
@@ -1,9 +1,12 @@
-from source import Source
+import re
+
 from scrapy import log
 from scrapy.http import Request
 from scrapy.selector import Selector
+
+from source import Source
 from FourmiCrawler.items import Result
-import re
+
 
 # [TODO]: values can be '128.', perhaps remove the dot in that case?
 # [TODO]: properties have references and comments which do not exist in the
diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py
index cb7d0b9..c4f7a0f 100644
--- a/FourmiCrawler/sources/WikipediaParser.py
+++ b/FourmiCrawler/sources/WikipediaParser.py
@@ -1,9 +1,11 @@
+import re
+
 from scrapy.http import Request
 from scrapy import log
-from source import Source
 from scrapy.selector import Selector
+
+from source import Source
 from FourmiCrawler.items import Result
-import re
 
 
 class WikipediaParser(Source):

From 7a8c0fe6adefd23cda38218ec63b58454d0a2344 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 12:18:15 +0200
Subject: [PATCH 11/37] Added a basic testing structure

---
 .travis.yml                | 15 +++++++++++++++
 tests/__init__.py          |  1 +
 tests/test_sourceloader.py |  5 +++++
 3 files changed, 21 insertions(+)
 create mode 100644 .travis.yml
 create mode 100644 tests/__init__.py
 create mode 100644 tests/test_sourceloader.py

diff --git a/.travis.yml b/.travis.yml
new file mode 100644
index 0000000..7a243ba
--- /dev/null
+++ b/.travis.yml
@@ -0,0 +1,15 @@
+# Config file for automatic testing at travis-ci.org
+
+language: python
+python: 2.7
+
+# command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
+install:
+  - pip install flake8 Scrapy docopt
+
+# command to run tests, e.g. python setup.py test
+script:
+  - nosetests tests
+  - make lint
+
+after_success: coveralls
\ No newline at end of file
diff --git a/tests/__init__.py b/tests/__init__.py
new file mode 100644
index 0000000..34a27d6
--- /dev/null
+++ b/tests/__init__.py
@@ -0,0 +1 @@
+__author__ = 'jdekker'
diff --git a/tests/test_sourceloader.py b/tests/test_sourceloader.py
new file mode 100644
index 0000000..c7ccff9
--- /dev/null
+++ b/tests/test_sourceloader.py
@@ -0,0 +1,5 @@
+import unittest
+
+
+class TestSourceloader(unittest.TestCase):
+    pass
\ No newline at end of file

From c3d2bf92e5c98e3dc5e1990bc1fae074dcca5fe9 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 12:43:33 +0200
Subject: [PATCH 12/37] Added tests for the source loader

---
 tests/test_sourceloader.py | 31 ++++++++++++++++++++++++++++++-
 1 file changed, 30 insertions(+), 1 deletion(-)

diff --git a/tests/test_sourceloader.py b/tests/test_sourceloader.py
index c7ccff9..d97578b 100644
--- a/tests/test_sourceloader.py
+++ b/tests/test_sourceloader.py
@@ -1,5 +1,34 @@
 import unittest
+from sourceloader import SourceLoader
+
 
 
 class TestSourceloader(unittest.TestCase):
-    pass
\ No newline at end of file
+
+    def setUp(self):
+        self.loader = SourceLoader()
+
+    def test_init(self):
+        # Test if sourceloader points to the right directory, where the sources are present.
+        self.assertIn("Source: Source", str(self.loader))
+        self.assertIn("Source: NIST", str(self.loader))
+        self.assertIn("Source: ChemSpider", str(self.loader))
+        self.assertIn("Source: WikipediaParser", str(self.loader))
+
+    def test_include(self):
+        #Tests for the include functionality.
+        self.loader.include(["So.rc.*"])
+
+        self.assertIn("Source: Source", str(self.loader))
+        self.assertNotIn("Source: NIST", str(self.loader))
+        self.assertNotIn("Source: ChemSpider", str(self.loader))
+        self.assertNotIn("Source: WikipediaParser", str(self.loader))
+
+    def test_exclude(self):
+        #Tests for the exclude functionality.
+        self.loader.exclude(["So.rc.*"])
+
+        self.assertNotIn("Source: Source", str(self.loader))
+        self.assertIn("Source: NIST", str(self.loader))
+        self.assertIn("Source: ChemSpider", str(self.loader))
+        self.assertIn("Source: WikipediaParser", str(self.loader))

From 704c5c25deebc473b117d65d5180c251af9c1121 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 12:51:15 +0200
Subject: [PATCH 13/37] Travis CI (hopefully working settings)

---
 .travis.yml | 7 ++-----
 1 file changed, 2 insertions(+), 5 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 7a243ba..2c4e998 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -5,11 +5,8 @@ python: 2.7
 
 # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
 install:
-  - pip install flake8 Scrapy docopt
+  - pip install Scrapy docopt
 
 # command to run tests, e.g. python setup.py test
 script:
-  - nosetests tests
-  - make lint
-
-after_success: coveralls
\ No newline at end of file
+  - nosetests tests
\ No newline at end of file

From 55130ea38884c18c8df12003cf0b9dcf3e80c3ad Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 12:59:22 +0200
Subject: [PATCH 14/37] Added Travis badges to the README

---
 README.md | 3 +++
 1 file changed, 3 insertions(+)

diff --git a/README.md b/README.md
index e9150a6..7769216 100644
--- a/README.md
+++ b/README.md
@@ -1,5 +1,8 @@
 # Fourmi
 
+**Master branch**: [![Build Status](https://travis-ci.org/Recondor/Fourmi.svg?branch=master)](https://travis-ci.org/Recondor/Fourmi)
+**Developing branch**: [![Build Status](https://travis-ci.org/Recondor/Fourmi.svg?branch=develop)](https://travis-ci.org/Recondor/Fourmi)
+
 Fourmi is an web scraper for chemical substances. The program is designed to be
 used as a search engine to search multiple chemical databases for a specific
 substance. The program will produce all available attributes of the substance

From b9252cc3fd626fe198280c142cc06972e9f16c38 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 15:18:27 +0200
Subject: [PATCH 15/37] Removed name from __init__ file

---
 tests/__init__.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/tests/__init__.py b/tests/__init__.py
index 34a27d6..8b13789 100644
--- a/tests/__init__.py
+++ b/tests/__init__.py
@@ -1 +1 @@
-__author__ = 'jdekker'
+

From b6afb3b2b56b8886e99f8c058ee634a6fa6d9503 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 15:22:18 +0200
Subject: [PATCH 16/37] Made Fourmi a python package

---
 __init__.py                | 1 +
 tests/test_sourceloader.py | 1 -
 2 files changed, 1 insertion(+), 1 deletion(-)
 create mode 100644 __init__.py

diff --git a/__init__.py b/__init__.py
new file mode 100644
index 0000000..8b13789
--- /dev/null
+++ b/__init__.py
@@ -0,0 +1 @@
+
diff --git a/tests/test_sourceloader.py b/tests/test_sourceloader.py
index d97578b..cf5ed0f 100644
--- a/tests/test_sourceloader.py
+++ b/tests/test_sourceloader.py
@@ -2,7 +2,6 @@ import unittest
 from sourceloader import SourceLoader
 
 
-
 class TestSourceloader(unittest.TestCase):
 
     def setUp(self):

From 26702666b61c3b8734f9d96d9fd851d0252e77a8 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 16:06:41 +0200
Subject: [PATCH 17/37] Added travis notification to the slack channel

---
 .travis.yml | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 2c4e998..63c9412 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -9,4 +9,7 @@ install:
 
 # command to run tests, e.g. python setup.py test
 script:
-  - nosetests tests
\ No newline at end of file
+  - nosetests tests
+
+notifications:
+  slack: descartes2:6sgCzx3PvrO9IIMwKxj12dDM
\ No newline at end of file

From c48c4ec697520e9f73fe367a8d7b810cfdf6e277 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 16:09:55 +0200
Subject: [PATCH 18/37] None pipeline doesn't need a set

---
 FourmiCrawler/pipelines.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/FourmiCrawler/pipelines.py b/FourmiCrawler/pipelines.py
index 0d4a405..2dfd531 100644
--- a/FourmiCrawler/pipelines.py
+++ b/FourmiCrawler/pipelines.py
@@ -7,7 +7,7 @@ from scrapy.exceptions import DropItem
 class RemoveNonePipeline(object):
 
     def __init__(self):
-        self.known_values = set()
+        pass
 
     def process_item(self, item, spider):
         """

From 75c0be1fea2e35be255c01499dec6c3906c5c868 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 16:50:14 +0200
Subject: [PATCH 19/37] Added tests for the pipline

---
 FourmiCrawler/pipelines.py |  2 +-
 tests/test_pipeline.py     | 48 ++++++++++++++++++++++++++++++++++++++
 2 files changed, 49 insertions(+), 1 deletion(-)
 create mode 100644 tests/test_pipeline.py

diff --git a/FourmiCrawler/pipelines.py b/FourmiCrawler/pipelines.py
index 2dfd531..1bcba3a 100644
--- a/FourmiCrawler/pipelines.py
+++ b/FourmiCrawler/pipelines.py
@@ -35,7 +35,7 @@ class DuplicatePipeline(object):
         """
         value = (item['attribute'], item['value'], item['conditions'])
         if value in self.known_values:
-            raise DropItem("Duplicate item found: %s" % item) # #[todo] append sources of first item.
+            raise DropItem("Duplicate item found: %s" % item) #[todo] append sources of first item.
         else:
             self.known_values.add(value)
             return item
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
new file mode 100644
index 0000000..9500272
--- /dev/null
+++ b/tests/test_pipeline.py
@@ -0,0 +1,48 @@
+import copy
+import unittest
+from FourmiCrawler import pipelines, spider, items
+from scrapy.exceptions import DropItem
+
+
+class TestPipelines(unittest.TestCase):
+
+    def setUp(self):
+        self.testItem = items.Result()
+
+    def test_NonePipeline(self):
+        self.testItem["value"] = "abc"
+        pipe = pipelines.RemoveNonePipeline()
+        processed = pipe.process_item(self.testItem, spider.FourmiSpider())
+
+        self.assertTrue(processed["value"] == "abc")
+
+        for key in self.testItem:
+            self.assertIsNotNone(processed[key])
+            if key is not "value":
+                self.assertIs(processed[key], "")
+
+    def test_DuplicatePipeline(self):
+        self.testItem["attribute"] = "test"
+        self.testItem["value"] = "test"
+        self.testItem["conditions"] = "test"
+
+        pipe = pipelines.DuplicatePipeline()
+        self.assertEqual(pipe.process_item(self.testItem, spider.FourmiSpider()), self.testItem)
+        self.assertRaises(DropItem, pipe.process_item, self.testItem, spider.FourmiSpider())
+
+        otherItem = copy.deepcopy(self.testItem)
+        otherItem["value"] = "test1"
+        self.assertEqual(pipe.process_item(otherItem, spider.FourmiSpider()), otherItem)
+
+    def test_AttributeSelection(self):
+        item1 = copy.deepcopy(self.testItem)
+        item2 = copy.deepcopy(self.testItem)
+
+        item1["attribute"] = "abd"
+        item2["attribute"] = "abc"
+
+        s = spider.FourmiSpider(selected_attributes=["a.d"])
+        pipe = pipelines.AttributeSelectionPipeline()
+
+        self.assertEqual(pipe.process_item(item1, s), item1)
+        self.assertRaises(DropItem, pipe.process_item, item2, s)
\ No newline at end of file

From 743989edb875c5824c02a450418308142cc29d66 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 17:46:03 +0200
Subject: [PATCH 20/37] Second badge on a new line

---
 README.md | 1 +
 1 file changed, 1 insertion(+)

diff --git a/README.md b/README.md
index 7769216..2b286a0 100644
--- a/README.md
+++ b/README.md
@@ -1,6 +1,7 @@
 # Fourmi
 
 **Master branch**: [![Build Status](https://travis-ci.org/Recondor/Fourmi.svg?branch=master)](https://travis-ci.org/Recondor/Fourmi)
+
 **Developing branch**: [![Build Status](https://travis-ci.org/Recondor/Fourmi.svg?branch=develop)](https://travis-ci.org/Recondor/Fourmi)
 
 Fourmi is an web scraper for chemical substances. The program is designed to be

From b9a8c65d24f9f96258254251eaac47e7a3012744 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 17:46:33 +0200
Subject: [PATCH 21/37] For testing, Fourmi should not be a package

---
 __init__.py | 1 -
 1 file changed, 1 deletion(-)
 delete mode 100644 __init__.py

diff --git a/__init__.py b/__init__.py
deleted file mode 100644
index 8b13789..0000000
--- a/__init__.py
+++ /dev/null
@@ -1 +0,0 @@
-

From 1557d1787756d099094eada65ac49b09864ec95f Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 18:22:28 +0200
Subject: [PATCH 22/37] Added documentation to the test cases

---
 tests/test_pipeline.py | 7 ++++++-
 1 file changed, 6 insertions(+), 1 deletion(-)

diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index 9500272..f1fab36 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -1,8 +1,10 @@
 import copy
 import unittest
-from FourmiCrawler import pipelines, spider, items
+
 from scrapy.exceptions import DropItem
 
+from FourmiCrawler import pipelines, spider, items
+
 
 class TestPipelines(unittest.TestCase):
 
@@ -10,6 +12,7 @@ class TestPipelines(unittest.TestCase):
         self.testItem = items.Result()
 
     def test_NonePipeline(self):
+        #Testing the pipeline that replaces the None values in items.
         self.testItem["value"] = "abc"
         pipe = pipelines.RemoveNonePipeline()
         processed = pipe.process_item(self.testItem, spider.FourmiSpider())
@@ -22,6 +25,7 @@ class TestPipelines(unittest.TestCase):
                 self.assertIs(processed[key], "")
 
     def test_DuplicatePipeline(self):
+        #Testing the pipeline that removes duplicates.
         self.testItem["attribute"] = "test"
         self.testItem["value"] = "test"
         self.testItem["conditions"] = "test"
@@ -35,6 +39,7 @@ class TestPipelines(unittest.TestCase):
         self.assertEqual(pipe.process_item(otherItem, spider.FourmiSpider()), otherItem)
 
     def test_AttributeSelection(self):
+        #Testing the pipeline that selects attributes.
         item1 = copy.deepcopy(self.testItem)
         item2 = copy.deepcopy(self.testItem)
 

From f128c5431215f5011c3ae1d26f65a8191e4a0c2d Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 18:34:31 +0200
Subject: [PATCH 23/37] Sources don't need to be mangled

---
 FourmiCrawler/spider.py | 8 ++++----
 1 file changed, 4 insertions(+), 4 deletions(-)

diff --git a/FourmiCrawler/spider.py b/FourmiCrawler/spider.py
index 08abb6b..6a9a12e 100644
--- a/FourmiCrawler/spider.py
+++ b/FourmiCrawler/spider.py
@@ -9,7 +9,7 @@ class FourmiSpider(Spider):
     A spider writen for the Fourmi Project which calls upon all available sources to request and scrape data.
     """
     name = "FourmiSpider"
-    __sources = []
+    _sources = []
     synonyms = []
 
     def __init__(self, compound=None, selected_attributes=[".*"], *args, **kwargs):
@@ -29,7 +29,7 @@ class FourmiSpider(Spider):
         :param response: A Scrapy Response object that should be parsed
         :return: A list of Result items and new Request to be handled by the scrapy core.
         """
-        for source in self.__sources:
+        for source in self._sources:
             if re.match(source.website, response.url):
                 log.msg("Url: " + response.url + " -> Source: " + source.website, level=log.DEBUG)
                 return source.parse(response)
@@ -42,7 +42,7 @@ class FourmiSpider(Spider):
         :return: A list of Scrapy Request objects
         """
         requests = []
-        for parser in self.__sources:
+        for parser in self._sources:
             parser_requests = parser.new_compound_request(compound)
             if parser_requests is not None:
                 requests.append(parser_requests)
@@ -71,5 +71,5 @@ class FourmiSpider(Spider):
         A function add a new Parser object to the list of available parsers.
         :param source: A Source Object
         """
-        self.__sources.append(source)
+        self._sources.append(source)
         source.set_spider(self)
\ No newline at end of file

From 0c9862d836a2cd9cf41c022fe183190cbf21ea48 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 18:54:29 +0200
Subject: [PATCH 24/37] Damn you semicolon!

---
 FourmiCrawler/spider.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/FourmiCrawler/spider.py b/FourmiCrawler/spider.py
index 6a9a12e..fa1c5e2 100644
--- a/FourmiCrawler/spider.py
+++ b/FourmiCrawler/spider.py
@@ -20,7 +20,7 @@ class FourmiSpider(Spider):
         """
         super(FourmiSpider, self).__init__(*args, **kwargs)
         self.synonyms.append(compound)
-        self.selected_attributes = selected_attributes;
+        self.selected_attributes = selected_attributes
 
     def parse(self, response):
         """

From eb727bd6c4d9cc6dded03a03debc6506b14d7020 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 19:12:08 +0200
Subject: [PATCH 25/37] No two requests shall be the same!

---
 FourmiCrawler/spider.py | 14 ++++++++------
 1 file changed, 8 insertions(+), 6 deletions(-)

diff --git a/FourmiCrawler/spider.py b/FourmiCrawler/spider.py
index fa1c5e2..d1b99a7 100644
--- a/FourmiCrawler/spider.py
+++ b/FourmiCrawler/spider.py
@@ -10,7 +10,7 @@ class FourmiSpider(Spider):
     """
     name = "FourmiSpider"
     _sources = []
-    synonyms = []
+    synonyms = set()
 
     def __init__(self, compound=None, selected_attributes=[".*"], *args, **kwargs):
         """
@@ -19,7 +19,7 @@ class FourmiSpider(Spider):
         :param selected_attributes: A list of regular expressions that the attributes should match.
         """
         super(FourmiSpider, self).__init__(*args, **kwargs)
-        self.synonyms.append(compound)
+        self.synonyms.add(compound)
         self.selected_attributes = selected_attributes
 
     def parse(self, response):
@@ -42,10 +42,12 @@ class FourmiSpider(Spider):
         :return: A list of Scrapy Request objects
         """
         requests = []
-        for parser in self._sources:
-            parser_requests = parser.new_compound_request(compound)
-            if parser_requests is not None:
-                requests.append(parser_requests)
+        if compound not in self.synonyms:
+            self.synonyms.add(compound)
+            for parser in self._sources:
+                parser_requests = parser.new_compound_request(compound)
+                if parser_requests is not None:
+                    requests.append(parser_requests)
         return requests
 
     def start_requests(self):

From 918d6729b6828a475923ffe5c5c47851ddd34a91 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 19:21:44 +0200
Subject: [PATCH 26/37] Added tests for the Spider

---
 tests/test_spider.py | 57 ++++++++++++++++++++++++++++++++++++++++++++
 1 file changed, 57 insertions(+)
 create mode 100644 tests/test_spider.py

diff --git a/tests/test_spider.py b/tests/test_spider.py
new file mode 100644
index 0000000..086f1c8
--- /dev/null
+++ b/tests/test_spider.py
@@ -0,0 +1,57 @@
+import unittest
+
+from scrapy.http import Request
+
+from FourmiCrawler import spider
+from FourmiCrawler.sources.ChemSpider import ChemSpider
+from FourmiCrawler.sources.source import Source
+
+
+class TestFoumiSpider(unittest.TestCase):
+
+    def setUp(self):
+        self.compound = "test_compound"
+        self.attributes = ["a.*", ".*a"]
+        self.spi = spider.FourmiSpider(self.compound, self.attributes)
+
+    def test_init(self):
+        self.assertIn(self.compound, self.spi.synonyms)
+        for attr in self.attributes:
+            self.assertIn(attr, self.spi.selected_attributes)
+
+    def test_add_source(self):
+        src = Source()
+        self.spi.add_source(src)
+        self.assertIn(src, self.spi._sources)
+
+    def test_add_sources(self):
+        srcs = [Source(), Source(), Source()]
+        self.spi.add_sources(srcs)
+
+        for src in srcs:
+            self.assertIn(src, self.spi._sources)
+
+    def test_start_requests(self):
+        self.spi._sources = []
+
+        src = Source()
+        self.spi.add_source(src)
+        self.assertEqual(self.spi.start_requests(), [])
+
+        src2 = ChemSpider()
+        self.spi.add_source(src2)
+        self.assertIsNotNone(self.spi.start_requests())
+
+    def test_synonym_requests(self):
+        self.spi._sources = []
+
+        src = Source()
+        self.spi.add_source(src)
+        self.assertEqual(self.spi.get_synonym_requests("new_compound"), [])
+        self.assertIn("new_compound", self.spi.synonyms)
+
+        src2 = ChemSpider()
+        self.spi.add_source(src2)
+        self.assertIsInstance(self.spi.get_synonym_requests("other_compound")[0], Request)
+        self.assertIn("other_compound", self.spi.synonyms)
+        self.assertEqual(self.spi.get_synonym_requests("other_compound"), [])
\ No newline at end of file

From e1c01c7af6d7dc41fb90ab5fd2dda207ac98e35c Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 19:24:55 +0200
Subject: [PATCH 27/37] Added some documentation for the synonyms request

---
 tests/test_spider.py | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/tests/test_spider.py b/tests/test_spider.py
index 086f1c8..f5c8116 100644
--- a/tests/test_spider.py
+++ b/tests/test_spider.py
@@ -15,16 +15,19 @@ class TestFoumiSpider(unittest.TestCase):
         self.spi = spider.FourmiSpider(self.compound, self.attributes)
 
     def test_init(self):
+        # Test the initiation of the Fourmi spider
         self.assertIn(self.compound, self.spi.synonyms)
         for attr in self.attributes:
             self.assertIn(attr, self.spi.selected_attributes)
 
     def test_add_source(self):
+        # Testing the source adding function of the Fourmi spider
         src = Source()
         self.spi.add_source(src)
         self.assertIn(src, self.spi._sources)
 
     def test_add_sources(self):
+        # Testing the function that adds multiple sources
         srcs = [Source(), Source(), Source()]
         self.spi.add_sources(srcs)
 
@@ -32,6 +35,7 @@ class TestFoumiSpider(unittest.TestCase):
             self.assertIn(src, self.spi._sources)
 
     def test_start_requests(self):
+        # A test for the function that generates the start requests
         self.spi._sources = []
 
         src = Source()
@@ -43,6 +47,7 @@ class TestFoumiSpider(unittest.TestCase):
         self.assertIsNotNone(self.spi.start_requests())
 
     def test_synonym_requests(self):
+        # A test for the synonym request function
         self.spi._sources = []
 
         src = Source()

From b3c230e83585606467ddee2d00381690b37fccd3 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 19:32:41 +0200
Subject: [PATCH 28/37] Import optimazation

---
 tests/test_sourceloader.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/tests/test_sourceloader.py b/tests/test_sourceloader.py
index cf5ed0f..b130e8d 100644
--- a/tests/test_sourceloader.py
+++ b/tests/test_sourceloader.py
@@ -1,4 +1,5 @@
 import unittest
+
 from sourceloader import SourceLoader
 
 

From 046fbed3cd40ae463fb5a9c76e3e291ba9fcc2c9 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 19:34:23 +0200
Subject: [PATCH 29/37] Code reformat

---
 FourmiCrawler/pipelines.py               |  8 ++++----
 FourmiCrawler/settings.py                |  2 +-
 FourmiCrawler/sources/ChemSpider.py      |  2 +-
 FourmiCrawler/sources/NIST.py            | 13 ++++++-------
 FourmiCrawler/sources/WikipediaParser.py |  2 +-
 fourmi.py                                |  2 +-
 tests/test_pipeline.py                   |  7 +++----
 tests/test_sourceloader.py               |  5 ++---
 tests/test_spider.py                     |  1 -
 9 files changed, 19 insertions(+), 23 deletions(-)

diff --git a/FourmiCrawler/pipelines.py b/FourmiCrawler/pipelines.py
index 1bcba3a..dd4e11d 100644
--- a/FourmiCrawler/pipelines.py
+++ b/FourmiCrawler/pipelines.py
@@ -4,8 +4,8 @@ import re
 
 from scrapy.exceptions import DropItem
 
-class RemoveNonePipeline(object):
 
+class RemoveNonePipeline(object):
     def __init__(self):
         pass
 
@@ -21,8 +21,8 @@ class RemoveNonePipeline(object):
                 item[key] = ""
         return item
 
-class DuplicatePipeline(object):
 
+class DuplicatePipeline(object):
     def __init__(self):
         self.known_values = set()
 
@@ -35,13 +35,13 @@ class DuplicatePipeline(object):
         """
         value = (item['attribute'], item['value'], item['conditions'])
         if value in self.known_values:
-            raise DropItem("Duplicate item found: %s" % item) #[todo] append sources of first item.
+            raise DropItem("Duplicate item found: %s" % item)  # [todo] append sources of first item.
         else:
             self.known_values.add(value)
             return item
 
-class AttributeSelectionPipeline(object):
 
+class AttributeSelectionPipeline(object):
     def __init__(self):
         pass;
 
diff --git a/FourmiCrawler/settings.py b/FourmiCrawler/settings.py
index be7c451..8c1df07 100644
--- a/FourmiCrawler/settings.py
+++ b/FourmiCrawler/settings.py
@@ -3,7 +3,7 @@
 # For simplicity, this file contains only the most important settings by
 # default. All the other settings are documented here:
 #
-#     http://doc.scrapy.org/en/latest/topics/settings.html
+# http://doc.scrapy.org/en/latest/topics/settings.html
 #
 
 BOT_NAME = 'FourmiCrawler'
diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py
index 254c1a5..dfada5f 100644
--- a/FourmiCrawler/sources/ChemSpider.py
+++ b/FourmiCrawler/sources/ChemSpider.py
@@ -63,7 +63,7 @@ class ChemSpider(Source):
             # Test for properties without values, with one hardcoded exception
             if (not re.match(r'^\d', prop_value) or
                     (prop_name == 'Polarizability' and
-                    prop_value == '10-24cm3')):
+                             prop_value == '10-24cm3')):
                 continue
 
             # Match for condition in parentheses
diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py
index 2fe5966..a5f784d 100644
--- a/FourmiCrawler/sources/NIST.py
+++ b/FourmiCrawler/sources/NIST.py
@@ -10,7 +10,7 @@ from FourmiCrawler.items import Result
 
 # [TODO]: values can be '128.', perhaps remove the dot in that case?
 # [TODO]: properties have references and comments which do not exist in the
-#         Result item, but should be included eventually.
+# Result item, but should be included eventually.
 
 class NIST(Source):
     """NIST Scraper plugin
@@ -18,7 +18,7 @@ class NIST(Source):
     This plugin manages searching for a chemical on the NIST website
     and parsing the resulting page if the chemical exists on NIST.
     """
-    website = "http://webbook.nist.gov/*"  
+    website = "http://webbook.nist.gov/*"
 
     search = 'cgi/cbook.cgi?Name=%s&Units=SI&cTP=on'
 
@@ -78,7 +78,7 @@ class NIST(Source):
                 requests.extend(self.parse_generic_data(table, summary))
             else:
                 log.msg('NIST table: NOT SUPPORTED', level=log.WARNING)
-                continue #Assume unsupported
+                continue  #Assume unsupported
         return requests
 
     def parse_generic_info(self, sel):
@@ -106,7 +106,7 @@ class NIST(Source):
         data['IUPAC Standard InChI'] = raw_inchi.extract()[0]
 
         raw_inchikey = ul.xpath('li[strong="IUPAC Standard InChIKey:"]'
-                            '/tt/text()')
+                                '/tt/text()')
         data['IUPAC Standard InChIKey'] = raw_inchikey.extract()[0]
 
         raw_cas_number = ul.xpath('li[strong="CAS Registry Number:"]/text()')
@@ -132,10 +132,10 @@ class NIST(Source):
         results = []
         for tr in table.xpath('tr[td]'):
             extra_data_url = tr.xpath('td[last()][a="Individual data points"]'
-                                '/a/@href').extract()
+                                      '/a/@href').extract()
             if extra_data_url:
                 request = Request(url=self.website[:-1] + extra_data_url[0],
-                    callback=self.parse_individual_datapoints)
+                                  callback=self.parse_individual_datapoints)
                 results.append(request)
                 continue
             data = []
@@ -183,7 +183,6 @@ class NIST(Source):
             })
             results.append(result)
 
-
         return results
 
     @staticmethod
diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py
index c4f7a0f..868b49f 100644
--- a/FourmiCrawler/sources/WikipediaParser.py
+++ b/FourmiCrawler/sources/WikipediaParser.py
@@ -38,7 +38,7 @@ class WikipediaParser(Source):
         """ scrape data from infobox on wikipedia. """
         items = []
 
-        #be sure to get chembox (wikipedia template)
+        # be sure to get chembox (wikipedia template)
         tr_list = sel.xpath('.//table[@class="infobox bordered"]//td[not(@colspan)]'). \
             xpath('normalize-space(string())')
         prop_names = tr_list[::2]
diff --git a/fourmi.py b/fourmi.py
index b4c2b48..683e257 100755
--- a/fourmi.py
+++ b/fourmi.py
@@ -1,4 +1,4 @@
-#!/usr/bin/env python
+# !/usr/bin/env python
 """
 Fourmi, a web scraper build to search specific information for a given compound (and it's pseudonyms).
 
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index f1fab36..ab97954 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -7,12 +7,11 @@ from FourmiCrawler import pipelines, spider, items
 
 
 class TestPipelines(unittest.TestCase):
-
     def setUp(self):
         self.testItem = items.Result()
 
     def test_NonePipeline(self):
-        #Testing the pipeline that replaces the None values in items.
+        # Testing the pipeline that replaces the None values in items.
         self.testItem["value"] = "abc"
         pipe = pipelines.RemoveNonePipeline()
         processed = pipe.process_item(self.testItem, spider.FourmiSpider())
@@ -25,7 +24,7 @@ class TestPipelines(unittest.TestCase):
                 self.assertIs(processed[key], "")
 
     def test_DuplicatePipeline(self):
-        #Testing the pipeline that removes duplicates.
+        # Testing the pipeline that removes duplicates.
         self.testItem["attribute"] = "test"
         self.testItem["value"] = "test"
         self.testItem["conditions"] = "test"
@@ -39,7 +38,7 @@ class TestPipelines(unittest.TestCase):
         self.assertEqual(pipe.process_item(otherItem, spider.FourmiSpider()), otherItem)
 
     def test_AttributeSelection(self):
-        #Testing the pipeline that selects attributes.
+        # Testing the pipeline that selects attributes.
         item1 = copy.deepcopy(self.testItem)
         item2 = copy.deepcopy(self.testItem)
 
diff --git a/tests/test_sourceloader.py b/tests/test_sourceloader.py
index b130e8d..1afca2d 100644
--- a/tests/test_sourceloader.py
+++ b/tests/test_sourceloader.py
@@ -4,7 +4,6 @@ from sourceloader import SourceLoader
 
 
 class TestSourceloader(unittest.TestCase):
-
     def setUp(self):
         self.loader = SourceLoader()
 
@@ -16,7 +15,7 @@ class TestSourceloader(unittest.TestCase):
         self.assertIn("Source: WikipediaParser", str(self.loader))
 
     def test_include(self):
-        #Tests for the include functionality.
+        # Tests for the include functionality.
         self.loader.include(["So.rc.*"])
 
         self.assertIn("Source: Source", str(self.loader))
@@ -25,7 +24,7 @@ class TestSourceloader(unittest.TestCase):
         self.assertNotIn("Source: WikipediaParser", str(self.loader))
 
     def test_exclude(self):
-        #Tests for the exclude functionality.
+        # Tests for the exclude functionality.
         self.loader.exclude(["So.rc.*"])
 
         self.assertNotIn("Source: Source", str(self.loader))
diff --git a/tests/test_spider.py b/tests/test_spider.py
index f5c8116..66878eb 100644
--- a/tests/test_spider.py
+++ b/tests/test_spider.py
@@ -8,7 +8,6 @@ from FourmiCrawler.sources.source import Source
 
 
 class TestFoumiSpider(unittest.TestCase):
-
     def setUp(self):
         self.compound = "test_compound"
         self.attributes = ["a.*", ".*a"]

From 242e0bf628b2492314447c59a974f45b2cc0fc69 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 19:43:33 +0200
Subject: [PATCH 30/37] Code inspection

---
 FourmiCrawler/pipelines.py          |  8 +++++---
 FourmiCrawler/sources/ChemSpider.py |  4 +---
 FourmiCrawler/sources/NIST.py       |  5 +++--
 tests/test_pipeline.py              | 12 ++++++------
 4 files changed, 15 insertions(+), 14 deletions(-)

diff --git a/FourmiCrawler/pipelines.py b/FourmiCrawler/pipelines.py
index dd4e11d..55b0f49 100644
--- a/FourmiCrawler/pipelines.py
+++ b/FourmiCrawler/pipelines.py
@@ -9,7 +9,8 @@ class RemoveNonePipeline(object):
     def __init__(self):
         pass
 
-    def process_item(self, item, spider):
+    @staticmethod
+    def process_item(item, spider):
         """
         Processing the items so None values are replaced by empty strings
         :param item: The incoming item
@@ -43,9 +44,10 @@ class DuplicatePipeline(object):
 
 class AttributeSelectionPipeline(object):
     def __init__(self):
-        pass;
+        pass
 
-    def process_item(self, item, spider):
+    @staticmethod
+    def process_item(item, spider):
         """
         The items are processed using the selected attribute list available in the spider,
         items that don't match the selected items are dropped.
diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py
index dfada5f..8c0bd8b 100644
--- a/FourmiCrawler/sources/ChemSpider.py
+++ b/FourmiCrawler/sources/ChemSpider.py
@@ -61,9 +61,7 @@ class ChemSpider(Source):
             prop_conditions = ''
 
             # Test for properties without values, with one hardcoded exception
-            if (not re.match(r'^\d', prop_value) or
-                    (prop_name == 'Polarizability' and
-                             prop_value == '10-24cm3')):
+            if not re.match(r'^\d', prop_value) or (prop_name == 'Polarizability' and prop_value == '10-24cm3'):
                 continue
 
             # Match for condition in parentheses
diff --git a/FourmiCrawler/sources/NIST.py b/FourmiCrawler/sources/NIST.py
index a5f784d..6e8fabb 100644
--- a/FourmiCrawler/sources/NIST.py
+++ b/FourmiCrawler/sources/NIST.py
@@ -78,7 +78,7 @@ class NIST(Source):
                 requests.extend(self.parse_generic_data(table, summary))
             else:
                 log.msg('NIST table: NOT SUPPORTED', level=log.WARNING)
-                continue  #Assume unsupported
+                continue  # Assume unsupported
         return requests
 
     def parse_generic_info(self, sel):
@@ -230,7 +230,8 @@ class NIST(Source):
 
         return results
 
-    def parse_individual_datapoints(self, response):
+    @staticmethod
+    def parse_individual_datapoints(response):
         """Parses the page linked from aggregate data"""
         sel = Selector(response)
         table = sel.xpath('//table[@class="data"]')[0]
diff --git a/tests/test_pipeline.py b/tests/test_pipeline.py
index ab97954..dfb8e83 100644
--- a/tests/test_pipeline.py
+++ b/tests/test_pipeline.py
@@ -10,7 +10,7 @@ class TestPipelines(unittest.TestCase):
     def setUp(self):
         self.testItem = items.Result()
 
-    def test_NonePipeline(self):
+    def test_none_pipeline(self):
         # Testing the pipeline that replaces the None values in items.
         self.testItem["value"] = "abc"
         pipe = pipelines.RemoveNonePipeline()
@@ -23,7 +23,7 @@ class TestPipelines(unittest.TestCase):
             if key is not "value":
                 self.assertIs(processed[key], "")
 
-    def test_DuplicatePipeline(self):
+    def test_duplicate_pipeline(self):
         # Testing the pipeline that removes duplicates.
         self.testItem["attribute"] = "test"
         self.testItem["value"] = "test"
@@ -33,11 +33,11 @@ class TestPipelines(unittest.TestCase):
         self.assertEqual(pipe.process_item(self.testItem, spider.FourmiSpider()), self.testItem)
         self.assertRaises(DropItem, pipe.process_item, self.testItem, spider.FourmiSpider())
 
-        otherItem = copy.deepcopy(self.testItem)
-        otherItem["value"] = "test1"
-        self.assertEqual(pipe.process_item(otherItem, spider.FourmiSpider()), otherItem)
+        other_item = copy.deepcopy(self.testItem)
+        other_item["value"] = "test1"
+        self.assertEqual(pipe.process_item(other_item, spider.FourmiSpider()), other_item)
 
-    def test_AttributeSelection(self):
+    def test_attribute_selection(self):
         # Testing the pipeline that selects attributes.
         item1 = copy.deepcopy(self.testItem)
         item2 = copy.deepcopy(self.testItem)

From 9ea8dfbe41b4b247b27b5efdf2632d6c89ce3868 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 19:52:54 +0200
Subject: [PATCH 31/37] Bumped the version number

---
 fourmi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fourmi.py b/fourmi.py
index 683e257..57146ab 100755
--- a/fourmi.py
+++ b/fourmi.py
@@ -102,7 +102,7 @@ def search(docopt_arguments, source_loader):
 
 # The start for the Fourmi Command Line interface.
 if __name__ == '__main__':
-    arguments = docopt.docopt(__doc__, version='Fourmi - V0.4.0')
+    arguments = docopt.docopt(__doc__, version='Fourmi - V0.4.1')
     loader = SourceLoader()
 
     if arguments["--include"]:

From 7a2ba29e77e322c553626a29268120dcf99f73ed Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 20:17:30 +0200
Subject: [PATCH 32/37] Added coverage

---
 .travis.yml | 8 ++++++--
 1 file changed, 6 insertions(+), 2 deletions(-)

diff --git a/.travis.yml b/.travis.yml
index 63c9412..099f3e1 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,10 +6,14 @@ python: 2.7
 # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
 install:
   - pip install Scrapy docopt
+  - pip install coverall
 
 # command to run tests, e.g. python setup.py test
 script:
-  - nosetests tests
+  - nosetests --with-coverage --cover-package=FourmiCrawler tests
 
 notifications:
-  slack: descartes2:6sgCzx3PvrO9IIMwKxj12dDM
\ No newline at end of file
+  slack: descartes2:6sgCzx3PvrO9IIMwKxj12dDM
+
+after_succes:
+  coveralls
\ No newline at end of file

From 90b8ac3285524beb9ba68d2c71bd483889951912 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 20:35:36 +0200
Subject: [PATCH 33/37] A little typo

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 099f3e1..ca5ec2e 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -6,7 +6,7 @@ python: 2.7
 # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
 install:
   - pip install Scrapy docopt
-  - pip install coverall
+  - pip install coveralls
 
 # command to run tests, e.g. python setup.py test
 script:

From 56624e4647e765f71c9ae91bb4b4c5565d4f4740 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 20:54:41 +0200
Subject: [PATCH 34/37] Not yet uploading stats

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index ca5ec2e..0dd67f5 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -16,4 +16,4 @@ notifications:
   slack: descartes2:6sgCzx3PvrO9IIMwKxj12dDM
 
 after_succes:
-  coveralls
\ No newline at end of file
+  coveralls --verbose
\ No newline at end of file

From 21d6fbfb2be303172f2393798a7250baf6686a39 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Wed, 4 Jun 2014 20:59:13 +0200
Subject: [PATCH 35/37] And again it was a typo

---
 .travis.yml | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/.travis.yml b/.travis.yml
index 0dd67f5..34d3a88 100644
--- a/.travis.yml
+++ b/.travis.yml
@@ -15,5 +15,5 @@ script:
 notifications:
   slack: descartes2:6sgCzx3PvrO9IIMwKxj12dDM
 
-after_succes:
+after_success:
   coveralls --verbose
\ No newline at end of file

From 31790cc10c9ce7219c970df20c02023f9de95a80 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Thu, 5 Jun 2014 15:45:10 +0200
Subject: [PATCH 36/37] Broken script parameter by the code inspector

---
 fourmi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fourmi.py b/fourmi.py
index 57146ab..1913c9c 100755
--- a/fourmi.py
+++ b/fourmi.py
@@ -1,4 +1,4 @@
-# !/usr/bin/env python
+#!/usr/bin/env python
 """
 Fourmi, a web scraper build to search specific information for a given compound (and it's pseudonyms).
 

From b68a4e474b2c502c6e34721be3d5dfb8b05b68af Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Thu, 5 Jun 2014 15:46:29 +0200
Subject: [PATCH 37/37] 0.4.2

---
 fourmi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fourmi.py b/fourmi.py
index 1913c9c..3596cf3 100755
--- a/fourmi.py
+++ b/fourmi.py
@@ -102,7 +102,7 @@ def search(docopt_arguments, source_loader):
 
 # The start for the Fourmi Command Line interface.
 if __name__ == '__main__':
-    arguments = docopt.docopt(__doc__, version='Fourmi - V0.4.1')
+    arguments = docopt.docopt(__doc__, version='Fourmi - V0.4.2')
     loader = SourceLoader()
 
     if arguments["--include"]: