From 971552110a82dadd54b2ec006c3ee165fc17ca35 Mon Sep 17 00:00:00 2001
From: RTB <robtberge@gmail.com>
Date: Tue, 29 Apr 2014 16:30:09 +0200
Subject: [PATCH 01/14] Added test for empty values on properties in ACD/Labs
 tab

---
 FourmiCrawler/sources/ChemSpider.py | 9 +++++++--
 1 file changed, 7 insertions(+), 2 deletions(-)

diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py
index a62f6dd..332c036 100644
--- a/FourmiCrawler/sources/ChemSpider.py
+++ b/FourmiCrawler/sources/ChemSpider.py
@@ -47,7 +47,6 @@ class ChemSpider(Source):
         properties = []
 
         # Predicted - ACD/Labs tab
-        # [TODO] - test if tab contains data, some chemicals do not have data here
         td_list = sel.xpath('.//table[@id="acdlabs-table"]//td').xpath(
             'normalize-space(string())')
         prop_names = td_list[::2]
@@ -58,6 +57,12 @@ class ChemSpider(Source):
             prop_value = prop_value.extract().encode('utf-8')
             prop_conditions = ''
 
+            # Test for properties without values, with one hardcoded exception
+            if (not re.match(r'^\d', prop_value) or
+                    (prop_name == 'Polarizability' and
+                    prop_value == '10-24cm3')):
+                continue
+
             # Match for condition in parentheses
             m = re.match(r'(.*) \((.*)\)', prop_name)
             if m:
@@ -215,4 +220,4 @@ class ChemSpider(Source):
             return None
         searchurl = self.website[:-1] + self.search % compound
         log.msg('chemspider compound', level=log.DEBUG)
-        return Request(url=searchurl, callback=self.parse_searchrequest)
\ No newline at end of file
+        return Request(url=searchurl, callback=self.parse_searchrequest)

From 73753a6294ea7388b58a0ddc167e2df93b4256f5 Mon Sep 17 00:00:00 2001
From: RTB <robtberge@gmail.com>
Date: Thu, 1 May 2014 12:04:44 +0200
Subject: [PATCH 02/14] chemspider source now handles vague search requests

---
 FourmiCrawler/sources/ChemSpider.py | 10 ++++++++--
 1 file changed, 8 insertions(+), 2 deletions(-)

diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py
index 332c036..d7d125b 100644
--- a/FourmiCrawler/sources/ChemSpider.py
+++ b/FourmiCrawler/sources/ChemSpider.py
@@ -205,8 +205,14 @@ class ChemSpider(Source):
         sel = Selector(response)
         log.msg('chemspider parse_searchrequest', level=log.DEBUG)
         sel.register_namespace('cs', 'http://www.chemspider.com/')
-        csid = sel.xpath('.//cs:int/text()').extract()[0]
-        # [TODO] - handle multiple csids in case of vague search term
+        csids = sel.xpath('.//cs:int/text()').extract()
+        if len(csids) == 0:
+            log.msg('ChemSpider found nothing', level=log.ERROR)
+            return
+        elif len(csids) > 1:
+            log.msg('ChemSpider found multiple substances, taking first '
+                    'element', level=log.DEBUG)
+        csid = csids[0]
         structure_url = self.website[:-1] + self.structure % csid
         extendedinfo_url = self.website[:-1] + self.extendedinfo % csid
         log.msg('chemspider URL: %s' % structure_url, level=log.DEBUG)

From ca0a22ae7b7debebb283dece518e29ecbfca15f9 Mon Sep 17 00:00:00 2001
From: RTB <robtberge@gmail.com>
Date: Thu, 1 May 2014 12:14:52 +0200
Subject: [PATCH 03/14] Added test for empty values on properties in ChemSpider
 ExtendedCompoundInfo API

---
 FourmiCrawler/sources/ChemSpider.py | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/FourmiCrawler/sources/ChemSpider.py b/FourmiCrawler/sources/ChemSpider.py
index d7d125b..2fcd07c 100644
--- a/FourmiCrawler/sources/ChemSpider.py
+++ b/FourmiCrawler/sources/ChemSpider.py
@@ -197,7 +197,8 @@ class ChemSpider(Source):
                 'reliability': 'Unknown',
                 'conditions': ''
             })
-            properties.append(result)
+            if result['value']:
+                properties.append(result)
         return properties
 
     def parse_searchrequest(self, response):

From f8d390d3e604bedc2a428cc24824830a8bc31d5a Mon Sep 17 00:00:00 2001
From: Bas Vb <bas.berkel@student.ru.nl>
Date: Thu, 1 May 2014 15:04:11 +0200
Subject: [PATCH 04/14] Starting with fixing the wikiparser

---
 FourmiCrawler/sources/WikipediaParser.py | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py
index c251fca..8d8cded 100644
--- a/FourmiCrawler/sources/WikipediaParser.py
+++ b/FourmiCrawler/sources/WikipediaParser.py
@@ -37,7 +37,7 @@ class WikipediaParser(Source):
         items = []
 
         #be sure to get both chembox (wikipedia template) and drugbox (wikipedia template) to scrape
-        tr_list = sel.xpath('.//table[@class="infobox bordered" or @class="infobox"]//td[not(@colspan)]').\
+        tr_list = sel.xpath('.//table[@class="infobox bordered"]//td[not(@colspan)]').\
             xpath('normalize-space(string())')
         prop_names = tr_list[::2]
         prop_values = tr_list[1::2]
@@ -51,6 +51,23 @@ class WikipediaParser(Source):
             })
             items.append(item)
             log.msg('Wiki prop: |%s| |%s| |%s|' % (item['attribute'], item['value'], item['source']), level=log.DEBUG)
+
+        tr_list2 = sel.xpath('.//table[@class="infobox"]//tr').\
+            xpath('normalize-space(string())')
+        log.msg('%s' %tr_list2,level=log.DEBUG)
+        #prop_names = tr_list2[::2]
+        #prop_values = tr_list2[1::2]
+        #for i, prop_name in enumerate(prop_names):
+        #    item = Result({
+        #        'attribute': prop_name.extract().encode('utf-8'),
+        #        'value': prop_values[i].extract().encode('utf-8'),
+        #        'source': "Wikipedia",
+        #        'reliability': "",
+        #        'conditions': ""
+        #    })
+        #    items.append(item)
+        #    log.msg('Wiki prop: |%s| |%s| |%s|' % (item['attribute'], item['value'], item['source']), level=log.DEBUG)
+
         items = filter(lambda a: a['value'] != '', items)  # remove items with an empty value
         item_list = self.clean_items(items)
 

From 03e652d454e34dbc30d9f2fa3c6f32ef57845e01 Mon Sep 17 00:00:00 2001
From: Bas Vb <bas.berkel@student.ru.nl>
Date: Thu, 1 May 2014 16:05:37 +0200
Subject: [PATCH 05/14] Wikipediaparser now works on chemboxes as well

---
 FourmiCrawler/sources/WikipediaParser.py | 31 ++++++++++++------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py
index 8d8cded..2964567 100644
--- a/FourmiCrawler/sources/WikipediaParser.py
+++ b/FourmiCrawler/sources/WikipediaParser.py
@@ -46,27 +46,26 @@ class WikipediaParser(Source):
                 'attribute': prop_name.extract().encode('utf-8'),
                 'value': prop_values[i].extract().encode('utf-8'),
                 'source': "Wikipedia",
-                'reliability': "",
+                'reliability': "Unknown",
                 'conditions': ""
             })
             items.append(item)
             log.msg('Wiki prop: |%s| |%s| |%s|' % (item['attribute'], item['value'], item['source']), level=log.DEBUG)
 
-        tr_list2 = sel.xpath('.//table[@class="infobox"]//tr').\
-            xpath('normalize-space(string())')
-        log.msg('%s' %tr_list2,level=log.DEBUG)
-        #prop_names = tr_list2[::2]
-        #prop_values = tr_list2[1::2]
-        #for i, prop_name in enumerate(prop_names):
-        #    item = Result({
-        #        'attribute': prop_name.extract().encode('utf-8'),
-        #        'value': prop_values[i].extract().encode('utf-8'),
-        #        'source': "Wikipedia",
-        #        'reliability': "",
-        #        'conditions': ""
-        #    })
-        #    items.append(item)
-        #    log.msg('Wiki prop: |%s| |%s| |%s|' % (item['attribute'], item['value'], item['source']), level=log.DEBUG)
+        tr_list2 = sel.xpath('.//table[@class="infobox"]//tr')#.xpath('normalize-space(string())')
+        log.msg('dit: %s' %tr_list2,level=log.DEBUG)
+        for tablerow in tr_list2:
+            log.msg('item: %s' %tablerow.xpath('./th').xpath('normalize-space(string())'),level=log.DEBUG)
+            if tablerow.xpath('./th').xpath('normalize-space(string())') and tablerow.xpath('./td').xpath('normalize-space(string())'):
+                item = Result({
+                    'attribute': tablerow.xpath('./th').xpath('normalize-space(string())').extract()[0].encode('utf-8'),
+                    'value': tablerow.xpath('./td').xpath('normalize-space(string())').extract()[0].encode('utf-8'),
+                    'source': "Wikipedia",
+                    'reliability': "Unknown",
+                    'conditions': ""
+                })
+                items.append(item)
+                log.msg('Wiki prop: |attribute: %s| |value: %s| |%s|' % (item['attribute'], item['value'], item['source']), level=log.DEBUG)
 
         items = filter(lambda a: a['value'] != '', items)  # remove items with an empty value
         item_list = self.clean_items(items)

From 2fcec009bb3569da9ee788d01d178cf27b9b891f Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Thu, 8 May 2014 15:20:17 +0200
Subject: [PATCH 06/14] Added an Pipeline to deal with attribute selection

---
 FourmiCrawler/pipelines.py | 12 ++++++++++++
 FourmiCrawler/settings.py  |  3 ++-
 FourmiCrawler/spider.py    |  3 ++-
 3 files changed, 16 insertions(+), 2 deletions(-)

diff --git a/FourmiCrawler/pipelines.py b/FourmiCrawler/pipelines.py
index 5f2b68f..cbf50d3 100644
--- a/FourmiCrawler/pipelines.py
+++ b/FourmiCrawler/pipelines.py
@@ -2,6 +2,7 @@
 #
 # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
+import re
 from scrapy.exceptions import DropItem
 
 
@@ -23,3 +24,14 @@ class FourmiPipeline(object):
         else:
             self.known_values.add(value)
             return item
+
+class AttributeSelectionPipeline(object):
+
+    def __init__(self):
+        pass;
+
+    def process_item(self, item, spider):
+        if [x for x in spider.selected_attributes if re.match(x, item["attribute"])]:
+            return item
+        else:
+            raise DropItem("Attribute not selected by used: %s" % item)
\ No newline at end of file
diff --git a/FourmiCrawler/settings.py b/FourmiCrawler/settings.py
index be91fef..a28cf9a 100644
--- a/FourmiCrawler/settings.py
+++ b/FourmiCrawler/settings.py
@@ -11,7 +11,8 @@ BOT_NAME = 'FourmiCrawler'
 SPIDER_MODULES = ['FourmiCrawler']
 NEWSPIDER_MODULE = 'FourmiCrawler'
 ITEM_PIPELINES = {
-    'FourmiCrawler.pipelines.FourmiPipeline': 100
+    'FourmiCrawler.pipelines.AttributeSelectionPipeline': 100,
+    'FourmiCrawler.pipelines.FourmiPipeline': 200,
 }
 FEED_URI = 'results.json'
 FEED_FORMAT = 'jsonlines'
diff --git a/FourmiCrawler/spider.py b/FourmiCrawler/spider.py
index 9f92a84..87f22c6 100644
--- a/FourmiCrawler/spider.py
+++ b/FourmiCrawler/spider.py
@@ -8,9 +8,10 @@ class FourmiSpider(Spider):
     __parsers = []
     synonyms = []
 
-    def __init__(self, compound=None, *args, **kwargs):
+    def __init__(self, compound=None, selected_attributes=[".*"], *args, **kwargs):
         super(FourmiSpider, self).__init__(*args, **kwargs)
         self.synonyms.append(compound)
+        self.selected_attributes = selected_attributes;
 
     def parse(self, reponse):
         for parser in self.__parsers:

From c7051331946ddd1e2beceb6f67c0b3160ba24a39 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Thu, 8 May 2014 15:29:47 +0200
Subject: [PATCH 07/14] Added CLI functionality to deal with attribute
 selection

---
 fourmi.py | 12 +++++++-----
 1 file changed, 7 insertions(+), 5 deletions(-)

diff --git a/fourmi.py b/fourmi.py
index e33a833..c8afab5 100755
--- a/fourmi.py
+++ b/fourmi.py
@@ -12,15 +12,17 @@ Usage:
     fourmi --version
 
 Options:
+    --attributes=<regex>            Include only that match these regular expressions split by a comma. [default: .*]
     -h --help                       Show this screen.
     --version                       Show version.
     --verbose                       Verbose logging output.
     --log=<file>                    Save log to an file.
     -o <file> --output=<file>       Output file [default: result.*format*]
     -f <format> --format=<format>   Output formats (supported: csv, json, jsonlines, xml) [default: jsonlines]
-    --include=<sourcenames>         Include only sources that match the regular these expressions split by a comma.
-    --exclude=<sourcenames>         Exclude the sources that match the regular these expressions split by a comma.
+    --include=<regex>               Include only sources that match these regular expressions split by a comma.
+    --exclude=<regex>               Exclude the sources that match these regular expressions split by a comma.
 """
+import re
 
 from twisted.internet import reactor
 from scrapy.crawler import Crawler
@@ -32,8 +34,8 @@ from FourmiCrawler.spider import FourmiSpider
 from sourceloader import SourceLoader
 
 
-def setup_crawler(searchable, settings, source_loader):
-    spider = FourmiSpider(compound=searchable)
+def setup_crawler(searchable, settings, source_loader, attributes):
+    spider = FourmiSpider(compound=searchable, selected_attributes=attributes)
     spider.add_parsers(source_loader.sources)
     crawler = Crawler(settings)
     crawler.signals.connect(reactor.stop, signal=signals.spider_closed)
@@ -74,7 +76,7 @@ def start_log(docopt_arguments):
 def search(docopt_arguments, source_loader):
     start_log(docopt_arguments)
     settings = scrapy_settings_manipulation(docopt_arguments)
-    setup_crawler(docopt_arguments["<compound>"], settings, source_loader)
+    setup_crawler(docopt_arguments["<compound>"], settings, source_loader, docopt_arguments["--attributes"].split(','))
     reactor.run()
 
 

From 2e654255c59ff238cae2a374eec27b5c2e6f98bf Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Thu, 8 May 2014 15:35:18 +0200
Subject: [PATCH 08/14] Added documentation to the pipeline.

---
 FourmiCrawler/pipelines.py | 7 +++++++
 fourmi.py                  | 1 -
 2 files changed, 7 insertions(+), 1 deletion(-)

diff --git a/FourmiCrawler/pipelines.py b/FourmiCrawler/pipelines.py
index cbf50d3..34217ac 100644
--- a/FourmiCrawler/pipelines.py
+++ b/FourmiCrawler/pipelines.py
@@ -31,6 +31,13 @@ class AttributeSelectionPipeline(object):
         pass;
 
     def process_item(self, item, spider):
+        """
+        The items are processed using the selected attribute list available in the spider,
+        items that don't match the selected items are dropped.
+        :param item: The incoming item
+        :param spider: The spider which scraped the item. Should have an attribute "selected_attributes".
+        :return: :raise DropItem: Returns item if it matches an selected attribute, else it is dropped.
+        """
         if [x for x in spider.selected_attributes if re.match(x, item["attribute"])]:
             return item
         else:
diff --git a/fourmi.py b/fourmi.py
index c8afab5..a9c1d68 100755
--- a/fourmi.py
+++ b/fourmi.py
@@ -22,7 +22,6 @@ Options:
     --include=<regex>               Include only sources that match these regular expressions split by a comma.
     --exclude=<regex>               Exclude the sources that match these regular expressions split by a comma.
 """
-import re
 
 from twisted.internet import reactor
 from scrapy.crawler import Crawler

From f193aac24a6cad32534998af5afa3bfee0eded6f Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Thu, 8 May 2014 15:45:42 +0200
Subject: [PATCH 09/14] Fixed Duplicate Pipeline + rename

---
 FourmiCrawler/pipelines.py | 4 ++--
 FourmiCrawler/settings.py  | 2 +-
 2 files changed, 3 insertions(+), 3 deletions(-)

diff --git a/FourmiCrawler/pipelines.py b/FourmiCrawler/pipelines.py
index 34217ac..e1dadbf 100644
--- a/FourmiCrawler/pipelines.py
+++ b/FourmiCrawler/pipelines.py
@@ -6,7 +6,7 @@ import re
 from scrapy.exceptions import DropItem
 
 
-class FourmiPipeline(object):
+class DuplicatePipeline(object):
 
     def __init__(self):
         self.known_values = set()
@@ -18,7 +18,7 @@ class FourmiPipeline(object):
         :param spider: The spider which scraped the spider
         :return: :raise DropItem: Returns the item if unique or drops them if it's already known
         """
-        value = item['attribute'], item['value']
+        value = (item['attribute'], item['value'], item['conditions'])
         if value in self.known_values:
             raise DropItem("Duplicate item found: %s" % item) # #[todo] append sources of first item.
         else:
diff --git a/FourmiCrawler/settings.py b/FourmiCrawler/settings.py
index a28cf9a..d7ac212 100644
--- a/FourmiCrawler/settings.py
+++ b/FourmiCrawler/settings.py
@@ -12,7 +12,7 @@ SPIDER_MODULES = ['FourmiCrawler']
 NEWSPIDER_MODULE = 'FourmiCrawler'
 ITEM_PIPELINES = {
     'FourmiCrawler.pipelines.AttributeSelectionPipeline': 100,
-    'FourmiCrawler.pipelines.FourmiPipeline': 200,
+    'FourmiCrawler.pipelines.DuplicatePipeline': 200,
 }
 FEED_URI = 'results.json'
 FEED_FORMAT = 'jsonlines'

From b54568bab0281ab80ef9ce2e4ec3a94138322447 Mon Sep 17 00:00:00 2001
From: Bas Vb <bas.berkel@student.ru.nl>
Date: Tue, 13 May 2014 16:18:32 +0200
Subject: [PATCH 10/14] Small fixes

---
 FourmiCrawler/sources/WikipediaParser.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py
index 2964567..cb7d0b9 100644
--- a/FourmiCrawler/sources/WikipediaParser.py
+++ b/FourmiCrawler/sources/WikipediaParser.py
@@ -36,8 +36,8 @@ class WikipediaParser(Source):
         """ scrape data from infobox on wikipedia. """
         items = []
 
-        #be sure to get both chembox (wikipedia template) and drugbox (wikipedia template) to scrape
-        tr_list = sel.xpath('.//table[@class="infobox bordered"]//td[not(@colspan)]').\
+        #be sure to get chembox (wikipedia template)
+        tr_list = sel.xpath('.//table[@class="infobox bordered"]//td[not(@colspan)]'). \
             xpath('normalize-space(string())')
         prop_names = tr_list[::2]
         prop_values = tr_list[1::2]
@@ -52,11 +52,13 @@ class WikipediaParser(Source):
             items.append(item)
             log.msg('Wiki prop: |%s| |%s| |%s|' % (item['attribute'], item['value'], item['source']), level=log.DEBUG)
 
-        tr_list2 = sel.xpath('.//table[@class="infobox"]//tr')#.xpath('normalize-space(string())')
-        log.msg('dit: %s' %tr_list2,level=log.DEBUG)
+        #scrape the  drugbox (wikipedia template)
+        tr_list2 = sel.xpath('.//table[@class="infobox"]//tr')
+        log.msg('dit: %s' % tr_list2, level=log.DEBUG)
         for tablerow in tr_list2:
-            log.msg('item: %s' %tablerow.xpath('./th').xpath('normalize-space(string())'),level=log.DEBUG)
-            if tablerow.xpath('./th').xpath('normalize-space(string())') and tablerow.xpath('./td').xpath('normalize-space(string())'):
+            log.msg('item: %s' % tablerow.xpath('./th').xpath('normalize-space(string())'), level=log.DEBUG)
+            if tablerow.xpath('./th').xpath('normalize-space(string())') and tablerow.xpath('./td').xpath(
+                    'normalize-space(string())'):
                 item = Result({
                     'attribute': tablerow.xpath('./th').xpath('normalize-space(string())').extract()[0].encode('utf-8'),
                     'value': tablerow.xpath('./td').xpath('normalize-space(string())').extract()[0].encode('utf-8'),
@@ -65,7 +67,9 @@ class WikipediaParser(Source):
                     'conditions': ""
                 })
                 items.append(item)
-                log.msg('Wiki prop: |attribute: %s| |value: %s| |%s|' % (item['attribute'], item['value'], item['source']), level=log.DEBUG)
+                log.msg(
+                    'Wiki prop: |attribute: %s| |value: %s| |%s|' % (item['attribute'], item['value'], item['source']),
+                    level=log.DEBUG)
 
         items = filter(lambda a: a['value'] != '', items)  # remove items with an empty value
         item_list = self.clean_items(items)

From 0a2bfeb14990f5b217b122ba8bc256574a0a11bd Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Tue, 13 May 2014 21:43:16 +0200
Subject: [PATCH 11/14] I'm more experienced with Markdown

---
 README.rst => README.md | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename README.rst => README.md (100%)

diff --git a/README.rst b/README.md
similarity index 100%
rename from README.rst
rename to README.md

From b6ae4977d90f4f427786048cb11de26ffbe49d85 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Tue, 13 May 2014 23:26:31 +0200
Subject: [PATCH 12/14] Complete rewrite of the README

---
 README.md | 96 +++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 87 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index c251791..4732c56 100644
--- a/README.md
+++ b/README.md
@@ -1,16 +1,94 @@
-We are the team Descartes 2.
-----------------------------
+# Fourmi
 
-Our team members are:
+Fourmi is an web scraper for chemical substances. The program is designed to be
+used as a search engine to search multiple chemical databases for a specific
+substance. The program will produce all available attributes of the substance
+and conditions associated with the attributes. Fourmi also attempts to estimate
+the reliability of each data point to assist the user in deciding which data
+should be used.
 
-+ Rob ten Berge
+The Fourmi project is open source project licensed under the MIT license. Feel
+free to contribute!
 
-+ Bas van Berkel
+Fourmi is based on the [Scrapy framework](http://scrapy.org/), an open source
+web scraping framework for python. Most of the functionality of this project can
+be traced to this framework. Should the documentation for this application fall
+short, we suggest you take a close look at the [Scrapy architecture]
+(http://doc.scrapy.org/en/latest/topics/architecture.html) and the [Scrapy
+documentation](http://doc.scrapy.org/en/latest/index.html).
 
-+ Nout van Deijck
+### Installing 
 
-+ Jip J. Dekker
+If you're installing Fourmi, please take a look at our [installation guide](...)
+on our wiki. When you've installed the application, make sure to check our
+[usage guide](...).
 
-+ Michail Kuznetcov
+### Using the Source
 
-+ Harmen Prins
\ No newline at end of file
+To use the Fourmi source code multiple dependencies are required. Take a look at
+the [wiki page](...) on using the application source code for a step by step
+installation guide.
+
+When developing for the Fourmi project keep in mind that code readability is a
+must. To maintain the readability, code should be conform with the
+[PEP-8](http://legacy.python.org/dev/peps/pep-0008/) style guide for Python
+code. More information about the different structures and principles of the
+Fourmi application can be found on our [wiki](...).
+
+### To Do
+
+The Fourmi project has the following goals for the nearby future:
+
+** Main goals: **
+
+- Improve our documentation and guides. (Assignee: Dekker)
+
+- Build an graphical user interface(GUI) as alternative for the command line
+interface(CLI). (Assignee: Harmen)
+
+- Compiling the source into an windows executable. (Assignee: Bas)
+
+- Create an configuration file to hold logins and API keys.
+
+- Determine reliability of our data point.
+
+- Create an module to gather data from NIST. (Assignee: Rob)
+
+- Create an module to gather data from PubChem. (Assignee: Rob)
+
+** Side goals: **
+
+- Clean and unify data.
+
+- Extensive reliability analysis using statistical tests.
+
+- Test data with Descartes 1.
+
+### Project Origin
+
+The Fourmi project was started in February of 2014 as part of a software
+engineering course at the Radboud University for students studying Computer
+Science, Information Science or Artificial Intelligence. Students participate in
+a real software development project as part of the
+[Giphouse](http://www.giphouse.nl/).
+
+This particular project was started on behalf of Ivo B. Rietveld. As a chemist
+he was in need of an application to automatically search information on chemical
+substances and create an phase diagram. The so called "Descrates" project was
+split into two teams each creating a different application that has part of the
+functionality. We are the team Descartes 2 and as we were responsible for
+creating a web crawler, we've named our application Fourmi (Englis: Ants).
+
+The following people were part of the original team:
+
+- [Jip J. Dekker](http://jip.dekker.li)
+
+- Rob ten Berge
+
+- Harmen Prins
+
+- Bas van Berkel
+
+- Nout van Deijck
+
+- Michail Kuznetcov
\ No newline at end of file

From c380b740461d4b1d07482511f3dc8a2432df43cc Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Tue, 13 May 2014 23:28:56 +0200
Subject: [PATCH 13/14] Making things bold, removing breaklines

---
 README.md | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index 4732c56..af012fd 100644
--- a/README.md
+++ b/README.md
@@ -39,29 +39,21 @@ Fourmi application can be found on our [wiki](...).
 
 The Fourmi project has the following goals for the nearby future:
 
-** Main goals: **
+__Main goals:__
 
 - Improve our documentation and guides. (Assignee: Dekker)
-
 - Build an graphical user interface(GUI) as alternative for the command line
 interface(CLI). (Assignee: Harmen)
-
 - Compiling the source into an windows executable. (Assignee: Bas)
-
 - Create an configuration file to hold logins and API keys.
-
 - Determine reliability of our data point.
-
 - Create an module to gather data from NIST. (Assignee: Rob)
-
 - Create an module to gather data from PubChem. (Assignee: Rob)
 
-** Side goals: **
+__Side goals:__
 
 - Clean and unify data.
-
 - Extensive reliability analysis using statistical tests.
-
 - Test data with Descartes 1.
 
 ### Project Origin
@@ -82,13 +74,8 @@ creating a web crawler, we've named our application Fourmi (Englis: Ants).
 The following people were part of the original team:
 
 - [Jip J. Dekker](http://jip.dekker.li)
-
 - Rob ten Berge
-
 - Harmen Prins
-
 - Bas van Berkel
-
 - Nout van Deijck
-
 - Michail Kuznetcov
\ No newline at end of file

From 284d24c7830d96bf15f7386b9e4f0e13c9dbb0e6 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Tue, 13 May 2014 23:35:12 +0200
Subject: [PATCH 14/14] Bumped the version number

---
 fourmi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fourmi.py b/fourmi.py
index a9c1d68..efa4e54 100755
--- a/fourmi.py
+++ b/fourmi.py
@@ -80,7 +80,7 @@ def search(docopt_arguments, source_loader):
 
 
 if __name__ == '__main__':
-    arguments = docopt.docopt(__doc__, version='Fourmi - V0.2.6')
+    arguments = docopt.docopt(__doc__, version='Fourmi - V0.3.0')
     loader = SourceLoader()
 
     if arguments["--include"]: