From f8d390d3e604bedc2a428cc24824830a8bc31d5a Mon Sep 17 00:00:00 2001
From: Bas Vb <bas.berkel@student.ru.nl>
Date: Thu, 1 May 2014 15:04:11 +0200
Subject: [PATCH 1/8] Starting with fixing the wikiparser

---
 FourmiCrawler/sources/WikipediaParser.py | 19 ++++++++++++++++++-
 1 file changed, 18 insertions(+), 1 deletion(-)

diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py
index c251fca..8d8cded 100644
--- a/FourmiCrawler/sources/WikipediaParser.py
+++ b/FourmiCrawler/sources/WikipediaParser.py
@@ -37,7 +37,7 @@ class WikipediaParser(Source):
         items = []
 
         #be sure to get both chembox (wikipedia template) and drugbox (wikipedia template) to scrape
-        tr_list = sel.xpath('.//table[@class="infobox bordered" or @class="infobox"]//td[not(@colspan)]').\
+        tr_list = sel.xpath('.//table[@class="infobox bordered"]//td[not(@colspan)]').\
             xpath('normalize-space(string())')
         prop_names = tr_list[::2]
         prop_values = tr_list[1::2]
@@ -51,6 +51,23 @@ class WikipediaParser(Source):
             })
             items.append(item)
             log.msg('Wiki prop: |%s| |%s| |%s|' % (item['attribute'], item['value'], item['source']), level=log.DEBUG)
+
+        tr_list2 = sel.xpath('.//table[@class="infobox"]//tr').\
+            xpath('normalize-space(string())')
+        log.msg('%s' %tr_list2,level=log.DEBUG)
+        #prop_names = tr_list2[::2]
+        #prop_values = tr_list2[1::2]
+        #for i, prop_name in enumerate(prop_names):
+        #    item = Result({
+        #        'attribute': prop_name.extract().encode('utf-8'),
+        #        'value': prop_values[i].extract().encode('utf-8'),
+        #        'source': "Wikipedia",
+        #        'reliability': "",
+        #        'conditions': ""
+        #    })
+        #    items.append(item)
+        #    log.msg('Wiki prop: |%s| |%s| |%s|' % (item['attribute'], item['value'], item['source']), level=log.DEBUG)
+
         items = filter(lambda a: a['value'] != '', items)  # remove items with an empty value
         item_list = self.clean_items(items)
 

From 03e652d454e34dbc30d9f2fa3c6f32ef57845e01 Mon Sep 17 00:00:00 2001
From: Bas Vb <bas.berkel@student.ru.nl>
Date: Thu, 1 May 2014 16:05:37 +0200
Subject: [PATCH 2/8] Wikipediaparser now works on chemboxes as well

---
 FourmiCrawler/sources/WikipediaParser.py | 31 ++++++++++++------------
 1 file changed, 15 insertions(+), 16 deletions(-)

diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py
index 8d8cded..2964567 100644
--- a/FourmiCrawler/sources/WikipediaParser.py
+++ b/FourmiCrawler/sources/WikipediaParser.py
@@ -46,27 +46,26 @@ class WikipediaParser(Source):
                 'attribute': prop_name.extract().encode('utf-8'),
                 'value': prop_values[i].extract().encode('utf-8'),
                 'source': "Wikipedia",
-                'reliability': "",
+                'reliability': "Unknown",
                 'conditions': ""
             })
             items.append(item)
             log.msg('Wiki prop: |%s| |%s| |%s|' % (item['attribute'], item['value'], item['source']), level=log.DEBUG)
 
-        tr_list2 = sel.xpath('.//table[@class="infobox"]//tr').\
-            xpath('normalize-space(string())')
-        log.msg('%s' %tr_list2,level=log.DEBUG)
-        #prop_names = tr_list2[::2]
-        #prop_values = tr_list2[1::2]
-        #for i, prop_name in enumerate(prop_names):
-        #    item = Result({
-        #        'attribute': prop_name.extract().encode('utf-8'),
-        #        'value': prop_values[i].extract().encode('utf-8'),
-        #        'source': "Wikipedia",
-        #        'reliability': "",
-        #        'conditions': ""
-        #    })
-        #    items.append(item)
-        #    log.msg('Wiki prop: |%s| |%s| |%s|' % (item['attribute'], item['value'], item['source']), level=log.DEBUG)
+        tr_list2 = sel.xpath('.//table[@class="infobox"]//tr')#.xpath('normalize-space(string())')
+        log.msg('dit: %s' %tr_list2,level=log.DEBUG)
+        for tablerow in tr_list2:
+            log.msg('item: %s' %tablerow.xpath('./th').xpath('normalize-space(string())'),level=log.DEBUG)
+            if tablerow.xpath('./th').xpath('normalize-space(string())') and tablerow.xpath('./td').xpath('normalize-space(string())'):
+                item = Result({
+                    'attribute': tablerow.xpath('./th').xpath('normalize-space(string())').extract()[0].encode('utf-8'),
+                    'value': tablerow.xpath('./td').xpath('normalize-space(string())').extract()[0].encode('utf-8'),
+                    'source': "Wikipedia",
+                    'reliability': "Unknown",
+                    'conditions': ""
+                })
+                items.append(item)
+                log.msg('Wiki prop: |attribute: %s| |value: %s| |%s|' % (item['attribute'], item['value'], item['source']), level=log.DEBUG)
 
         items = filter(lambda a: a['value'] != '', items)  # remove items with an empty value
         item_list = self.clean_items(items)

From b54568bab0281ab80ef9ce2e4ec3a94138322447 Mon Sep 17 00:00:00 2001
From: Bas Vb <bas.berkel@student.ru.nl>
Date: Tue, 13 May 2014 16:18:32 +0200
Subject: [PATCH 3/8] Small fixes

---
 FourmiCrawler/sources/WikipediaParser.py | 18 +++++++++++-------
 1 file changed, 11 insertions(+), 7 deletions(-)

diff --git a/FourmiCrawler/sources/WikipediaParser.py b/FourmiCrawler/sources/WikipediaParser.py
index 2964567..cb7d0b9 100644
--- a/FourmiCrawler/sources/WikipediaParser.py
+++ b/FourmiCrawler/sources/WikipediaParser.py
@@ -36,8 +36,8 @@ class WikipediaParser(Source):
         """ scrape data from infobox on wikipedia. """
         items = []
 
-        #be sure to get both chembox (wikipedia template) and drugbox (wikipedia template) to scrape
-        tr_list = sel.xpath('.//table[@class="infobox bordered"]//td[not(@colspan)]').\
+        #be sure to get chembox (wikipedia template)
+        tr_list = sel.xpath('.//table[@class="infobox bordered"]//td[not(@colspan)]'). \
             xpath('normalize-space(string())')
         prop_names = tr_list[::2]
         prop_values = tr_list[1::2]
@@ -52,11 +52,13 @@ class WikipediaParser(Source):
             items.append(item)
             log.msg('Wiki prop: |%s| |%s| |%s|' % (item['attribute'], item['value'], item['source']), level=log.DEBUG)
 
-        tr_list2 = sel.xpath('.//table[@class="infobox"]//tr')#.xpath('normalize-space(string())')
-        log.msg('dit: %s' %tr_list2,level=log.DEBUG)
+        #scrape the  drugbox (wikipedia template)
+        tr_list2 = sel.xpath('.//table[@class="infobox"]//tr')
+        log.msg('dit: %s' % tr_list2, level=log.DEBUG)
         for tablerow in tr_list2:
-            log.msg('item: %s' %tablerow.xpath('./th').xpath('normalize-space(string())'),level=log.DEBUG)
-            if tablerow.xpath('./th').xpath('normalize-space(string())') and tablerow.xpath('./td').xpath('normalize-space(string())'):
+            log.msg('item: %s' % tablerow.xpath('./th').xpath('normalize-space(string())'), level=log.DEBUG)
+            if tablerow.xpath('./th').xpath('normalize-space(string())') and tablerow.xpath('./td').xpath(
+                    'normalize-space(string())'):
                 item = Result({
                     'attribute': tablerow.xpath('./th').xpath('normalize-space(string())').extract()[0].encode('utf-8'),
                     'value': tablerow.xpath('./td').xpath('normalize-space(string())').extract()[0].encode('utf-8'),
@@ -65,7 +67,9 @@ class WikipediaParser(Source):
                     'conditions': ""
                 })
                 items.append(item)
-                log.msg('Wiki prop: |attribute: %s| |value: %s| |%s|' % (item['attribute'], item['value'], item['source']), level=log.DEBUG)
+                log.msg(
+                    'Wiki prop: |attribute: %s| |value: %s| |%s|' % (item['attribute'], item['value'], item['source']),
+                    level=log.DEBUG)
 
         items = filter(lambda a: a['value'] != '', items)  # remove items with an empty value
         item_list = self.clean_items(items)

From 0a2bfeb14990f5b217b122ba8bc256574a0a11bd Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Tue, 13 May 2014 21:43:16 +0200
Subject: [PATCH 4/8] I'm more experienced with Markdown

---
 README.rst => README.md | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 rename README.rst => README.md (100%)

diff --git a/README.rst b/README.md
similarity index 100%
rename from README.rst
rename to README.md

From b6ae4977d90f4f427786048cb11de26ffbe49d85 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Tue, 13 May 2014 23:26:31 +0200
Subject: [PATCH 5/8] Complete rewrite of the README

---
 README.md | 96 +++++++++++++++++++++++++++++++++++++++++++++++++------
 1 file changed, 87 insertions(+), 9 deletions(-)

diff --git a/README.md b/README.md
index c251791..4732c56 100644
--- a/README.md
+++ b/README.md
@@ -1,16 +1,94 @@
-We are the team Descartes 2.
-----------------------------
+# Fourmi
 
-Our team members are:
+Fourmi is an web scraper for chemical substances. The program is designed to be
+used as a search engine to search multiple chemical databases for a specific
+substance. The program will produce all available attributes of the substance
+and conditions associated with the attributes. Fourmi also attempts to estimate
+the reliability of each data point to assist the user in deciding which data
+should be used.
 
-+ Rob ten Berge
+The Fourmi project is open source project licensed under the MIT license. Feel
+free to contribute!
 
-+ Bas van Berkel
+Fourmi is based on the [Scrapy framework](http://scrapy.org/), an open source
+web scraping framework for python. Most of the functionality of this project can
+be traced to this framework. Should the documentation for this application fall
+short, we suggest you take a close look at the [Scrapy architecture]
+(http://doc.scrapy.org/en/latest/topics/architecture.html) and the [Scrapy
+documentation](http://doc.scrapy.org/en/latest/index.html).
 
-+ Nout van Deijck
+### Installing 
 
-+ Jip J. Dekker
+If you're installing Fourmi, please take a look at our [installation guide](...)
+on our wiki. When you've installed the application, make sure to check our
+[usage guide](...).
 
-+ Michail Kuznetcov
+### Using the Source
 
-+ Harmen Prins
\ No newline at end of file
+To use the Fourmi source code multiple dependencies are required. Take a look at
+the [wiki page](...) on using the application source code for a step by step
+installation guide.
+
+When developing for the Fourmi project keep in mind that code readability is a
+must. To maintain the readability, code should be conform with the
+[PEP-8](http://legacy.python.org/dev/peps/pep-0008/) style guide for Python
+code. More information about the different structures and principles of the
+Fourmi application can be found on our [wiki](...).
+
+### To Do
+
+The Fourmi project has the following goals for the nearby future:
+
+** Main goals: **
+
+- Improve our documentation and guides. (Assignee: Dekker)
+
+- Build an graphical user interface(GUI) as alternative for the command line
+interface(CLI). (Assignee: Harmen)
+
+- Compiling the source into an windows executable. (Assignee: Bas)
+
+- Create an configuration file to hold logins and API keys.
+
+- Determine reliability of our data point.
+
+- Create an module to gather data from NIST. (Assignee: Rob)
+
+- Create an module to gather data from PubChem. (Assignee: Rob)
+
+** Side goals: **
+
+- Clean and unify data.
+
+- Extensive reliability analysis using statistical tests.
+
+- Test data with Descartes 1.
+
+### Project Origin
+
+The Fourmi project was started in February of 2014 as part of a software
+engineering course at the Radboud University for students studying Computer
+Science, Information Science or Artificial Intelligence. Students participate in
+a real software development project as part of the
+[Giphouse](http://www.giphouse.nl/).
+
+This particular project was started on behalf of Ivo B. Rietveld. As a chemist
+he was in need of an application to automatically search information on chemical
+substances and create an phase diagram. The so called "Descrates" project was
+split into two teams each creating a different application that has part of the
+functionality. We are the team Descartes 2 and as we were responsible for
+creating a web crawler, we've named our application Fourmi (Englis: Ants).
+
+The following people were part of the original team:
+
+- [Jip J. Dekker](http://jip.dekker.li)
+
+- Rob ten Berge
+
+- Harmen Prins
+
+- Bas van Berkel
+
+- Nout van Deijck
+
+- Michail Kuznetcov
\ No newline at end of file

From c380b740461d4b1d07482511f3dc8a2432df43cc Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Tue, 13 May 2014 23:28:56 +0200
Subject: [PATCH 6/8] Making things bold, removing breaklines

---
 README.md | 17 ++---------------
 1 file changed, 2 insertions(+), 15 deletions(-)

diff --git a/README.md b/README.md
index 4732c56..af012fd 100644
--- a/README.md
+++ b/README.md
@@ -39,29 +39,21 @@ Fourmi application can be found on our [wiki](...).
 
 The Fourmi project has the following goals for the nearby future:
 
-** Main goals: **
+__Main goals:__
 
 - Improve our documentation and guides. (Assignee: Dekker)
-
 - Build an graphical user interface(GUI) as alternative for the command line
 interface(CLI). (Assignee: Harmen)
-
 - Compiling the source into an windows executable. (Assignee: Bas)
-
 - Create an configuration file to hold logins and API keys.
-
 - Determine reliability of our data point.
-
 - Create an module to gather data from NIST. (Assignee: Rob)
-
 - Create an module to gather data from PubChem. (Assignee: Rob)
 
-** Side goals: **
+__Side goals:__
 
 - Clean and unify data.
-
 - Extensive reliability analysis using statistical tests.
-
 - Test data with Descartes 1.
 
 ### Project Origin
@@ -82,13 +74,8 @@ creating a web crawler, we've named our application Fourmi (Englis: Ants).
 The following people were part of the original team:
 
 - [Jip J. Dekker](http://jip.dekker.li)
-
 - Rob ten Berge
-
 - Harmen Prins
-
 - Bas van Berkel
-
 - Nout van Deijck
-
 - Michail Kuznetcov
\ No newline at end of file

From 284d24c7830d96bf15f7386b9e4f0e13c9dbb0e6 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Tue, 13 May 2014 23:35:12 +0200
Subject: [PATCH 7/8] Bumped the version number

---
 fourmi.py | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/fourmi.py b/fourmi.py
index a9c1d68..efa4e54 100755
--- a/fourmi.py
+++ b/fourmi.py
@@ -80,7 +80,7 @@ def search(docopt_arguments, source_loader):
 
 
 if __name__ == '__main__':
-    arguments = docopt.docopt(__doc__, version='Fourmi - V0.2.6')
+    arguments = docopt.docopt(__doc__, version='Fourmi - V0.3.0')
     loader = SourceLoader()
 
     if arguments["--include"]:

From ee92e25ab4b25dbed46fd823b8de1a54ca0ea0a4 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Tue, 13 May 2014 23:43:37 +0200
Subject: [PATCH 8/8] Fixed the right assignees

---
 README.md | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/README.md b/README.md
index af012fd..e9150a6 100644
--- a/README.md
+++ b/README.md
@@ -48,7 +48,7 @@ interface(CLI). (Assignee: Harmen)
 - Create an configuration file to hold logins and API keys.
 - Determine reliability of our data point.
 - Create an module to gather data from NIST. (Assignee: Rob)
-- Create an module to gather data from PubChem. (Assignee: Rob)
+- Create an module to gather data from PubChem. (Assignee: Nout)
 
 __Side goals:__