From 6182d4104eef9a68c6958d3320656d7c6ff77900 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <admin@recondor.com>
Date: Sun, 16 Mar 2014 22:54:34 +0100
Subject: [PATCH 01/20] Added an result item which the spiders will return.

---
 Scrapy/items.py | 10 ++++++----
 1 file changed, 6 insertions(+), 4 deletions(-)

diff --git a/Scrapy/items.py b/Scrapy/items.py
index 17b9d3d..5fedc36 100644
--- a/Scrapy/items.py
+++ b/Scrapy/items.py
@@ -5,7 +5,9 @@
 
 from scrapy.item import Item, Field
 
-class FourmiItem(Item):
-    # define the fields for your item here like:
-    # name = Field()
-    pass
+class Result(Item):
+    attribute = Field()
+    value = Field()
+    source = Field()
+    reliability = Field()
+    conditions = Field()
\ No newline at end of file

From 35481128388b8537d9505bf59adb790410a8a3a3 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <admin@recondor.com>
Date: Sun, 16 Mar 2014 23:11:30 +0100
Subject: [PATCH 02/20] Removed all .pyc files and added to the ignore list

---
 .gitignore                  |   3 +++
 Scrapy/__init__.pyc         | Bin 143 -> 0 bytes
 Scrapy/settings.pyc         | Bin 251 -> 0 bytes
 Scrapy/spiders/__init__.pyc | Bin 151 -> 0 bytes
 4 files changed, 3 insertions(+)
 delete mode 100644 Scrapy/__init__.pyc
 delete mode 100644 Scrapy/settings.pyc
 delete mode 100644 Scrapy/spiders/__init__.pyc

diff --git a/.gitignore b/.gitignore
index c1549e0..158ef41 100644
--- a/.gitignore
+++ b/.gitignore
@@ -1,6 +1,9 @@
 #EDITOR AND IDE SPECIFIC SETTINGFILES
 .idea
 
+#Python Specific ignores
+*.pyc
+
 #THINGS WE WOULD NEVER EVER WANT!
 #ignore thumbnails created by windows
 Thumbs.db
diff --git a/Scrapy/__init__.pyc b/Scrapy/__init__.pyc
deleted file mode 100644
index f1096fd5de3c353baeeef45805c9e5c90c6fb80c..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 143
zcmZSn%*%DEQaU)90SXv_v;z<qvjB+{28Lh_kcgiKkYGR~iY<U*`WgATsrp$dsoB}7
zMfxuJ$)&lec_qdA0hJ{g`FZ+o`K3j<nJ_9oJ~J<~BtBlRpt6JmsM#hrKczG$)edAu
IF%UBV09;rfbN~PV

diff --git a/Scrapy/settings.pyc b/Scrapy/settings.pyc
deleted file mode 100644
index 828e88328e417c44fdbd9ba586e07e41d2feda6f..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 251
zcmYk0K?{OF5QTR&q=KSrbm{1+y>^JG&>$#_wg|jPxQklpinAU(^`rX(jTzLyeDh}B
z9G>~=b;r*U56QPE&IL9M5rPmONl8SBPix^DL>PqICc4@}tW{a03G`9&u(;;c1oA}4
z0=(v@jmBz!3cTgc)t|4!#&M6qZttA8F4qI}J@>-@uQuMu4=}}*#VVETCglAt3+AY>
qOl78l9V#8Gj5RS>6M=!$I>{oK<dv=i%u*%Fhn#m%LknRU9r^|skT>-J

diff --git a/Scrapy/spiders/__init__.pyc b/Scrapy/spiders/__init__.pyc
deleted file mode 100644
index c2fd93959faa39176f3f6aca2ae6dd4d68149d9b..0000000000000000000000000000000000000000
GIT binary patch
literal 0
HcmV?d00001

literal 151
zcmZSn%*z#8Cmo#300oRd+5w1*S%5?e14FO|NW@PANHCxg#STC*{fzwFRQ;@!)a>lk
zB7K+q<kH;Kypm%5fXb4L{5*ZP{L-S_O#R^GqQrtq{o;bml+>bP{rLFIyv&mLc)fzk
W5)PmtHo5sJr8%i~AghXjm;nG#4I(uF


From 8dd2c168d2d915f8487dc1176198845523bf2a01 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <admin@recondor.com>
Date: Sun, 16 Mar 2014 23:14:59 +0100
Subject: [PATCH 03/20] Added the basic structure for the first two spiders

---
 Scrapy/spiders/Chemspider.py | 11 +++++++++++
 Scrapy/spiders/Wikipedia.py  | 11 +++++++++++
 2 files changed, 22 insertions(+)
 create mode 100644 Scrapy/spiders/Chemspider.py
 create mode 100644 Scrapy/spiders/Wikipedia.py

diff --git a/Scrapy/spiders/Chemspider.py b/Scrapy/spiders/Chemspider.py
new file mode 100644
index 0000000..3fc74a0
--- /dev/null
+++ b/Scrapy/spiders/Chemspider.py
@@ -0,0 +1,11 @@
+from scrapy.spider import Spider
+
+class ChemspiderSpider(Spider):
+    name = "Chemspider"
+    allowed_domains = ["chemspider.com"]
+    start_urls = (
+        'http://www.chemspider.com/',
+        )
+
+    def parse(self, response):
+        pass 
diff --git a/Scrapy/spiders/Wikipedia.py b/Scrapy/spiders/Wikipedia.py
new file mode 100644
index 0000000..03b202b
--- /dev/null
+++ b/Scrapy/spiders/Wikipedia.py
@@ -0,0 +1,11 @@
+from scrapy.spider import Spider
+
+class WikipediaSpider(Spider):
+    name = "Wikipedia"
+    allowed_domains = ["wikipedia.org"]
+    start_urls = (
+        'http://www.wikipedia.org/',
+        )
+
+    def parse(self, response):
+        pass 

From 55843d320c54b7c7a39c398170d657d07ee80c71 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <admin@recondor.com>
Date: Mon, 17 Mar 2014 16:25:48 +0100
Subject: [PATCH 04/20] Added an formal pipeline to make sure that we don't
 supply double values.

---
 Scrapy/pipelines.py | 19 ++++++++++++++++++-
 Scrapy/settings.py  |  3 +++
 2 files changed, 21 insertions(+), 1 deletion(-)

diff --git a/Scrapy/pipelines.py b/Scrapy/pipelines.py
index 3345787..3194d7e 100644
--- a/Scrapy/pipelines.py
+++ b/Scrapy/pipelines.py
@@ -2,7 +2,24 @@
 #
 # Don't forget to add your pipeline to the ITEM_PIPELINES setting
 # See: http://doc.scrapy.org/en/latest/topics/item-pipeline.html
+from scrapy.exceptions import DropItem
+
 
 class FourmiPipeline(object):
+
+    def __init__(self):
+        self.known_values = set()
+
     def process_item(self, item, spider):
-        return item
+        """
+        Processing the items so exact doubles are dropped
+        :param item: The incoming item
+        :param spider: The spider which scraped the spider
+        :return: :raise DropItem: Returns the item if unique or drops them if it's already known
+        """
+        value = item['attribute'], item['value']
+        if value in self.known_values:
+            raise DropItem("Duplicate item found: %s" % item)
+        else:
+            self.known_values.add(value)
+            return item
diff --git a/Scrapy/settings.py b/Scrapy/settings.py
index e43aa2b..fd379a9 100644
--- a/Scrapy/settings.py
+++ b/Scrapy/settings.py
@@ -10,6 +10,9 @@ BOT_NAME = 'Fourmi'
 
 SPIDER_MODULES = ['Scrapy.spiders']
 NEWSPIDER_MODULE = 'Scrapy.spiders'
+ITEM_PIPELINES = {
+    'Scrapy.pipelines.FourmiPipeline': 100
+}
 
 # Crawl responsibly by identifying yourself (and your website) on the user-agent
 #USER_AGENT = 'Fourmi (+http://www.yourdomain.com)'

From 2cb21c6b810624cf5569f95828b326fc1ccc1996 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <admin@recondor.com>
Date: Mon, 17 Mar 2014 16:38:13 +0100
Subject: [PATCH 05/20] Moved the Scrapy to an other namespace, should prevent
 some importing faults.

---
 {Scrapy => Fourmi}/__init__.py           | 0
 {Scrapy => Fourmi}/items.py              | 0
 {Scrapy => Fourmi}/pipelines.py          | 0
 {Scrapy => Fourmi}/settings.py           | 6 +++---
 {Scrapy => Fourmi}/spiders/Chemspider.py | 0
 {Scrapy => Fourmi}/spiders/Wikipedia.py  | 0
 {Scrapy => Fourmi}/spiders/__init__.py   | 0
 7 files changed, 3 insertions(+), 3 deletions(-)
 rename {Scrapy => Fourmi}/__init__.py (100%)
 rename {Scrapy => Fourmi}/items.py (100%)
 rename {Scrapy => Fourmi}/pipelines.py (100%)
 rename {Scrapy => Fourmi}/settings.py (77%)
 rename {Scrapy => Fourmi}/spiders/Chemspider.py (100%)
 rename {Scrapy => Fourmi}/spiders/Wikipedia.py (100%)
 rename {Scrapy => Fourmi}/spiders/__init__.py (100%)

diff --git a/Scrapy/__init__.py b/Fourmi/__init__.py
similarity index 100%
rename from Scrapy/__init__.py
rename to Fourmi/__init__.py
diff --git a/Scrapy/items.py b/Fourmi/items.py
similarity index 100%
rename from Scrapy/items.py
rename to Fourmi/items.py
diff --git a/Scrapy/pipelines.py b/Fourmi/pipelines.py
similarity index 100%
rename from Scrapy/pipelines.py
rename to Fourmi/pipelines.py
diff --git a/Scrapy/settings.py b/Fourmi/settings.py
similarity index 77%
rename from Scrapy/settings.py
rename to Fourmi/settings.py
index fd379a9..a24e6f6 100644
--- a/Scrapy/settings.py
+++ b/Fourmi/settings.py
@@ -8,10 +8,10 @@
 
 BOT_NAME = 'Fourmi'
 
-SPIDER_MODULES = ['Scrapy.spiders']
-NEWSPIDER_MODULE = 'Scrapy.spiders'
+SPIDER_MODULES = ['Fourmi.spiders']
+NEWSPIDER_MODULE = 'Fourmi.spiders'
 ITEM_PIPELINES = {
-    'Scrapy.pipelines.FourmiPipeline': 100
+    'Fourmi.pipelines.FourmiPipeline': 100
 }
 
 # Crawl responsibly by identifying yourself (and your website) on the user-agent
diff --git a/Scrapy/spiders/Chemspider.py b/Fourmi/spiders/Chemspider.py
similarity index 100%
rename from Scrapy/spiders/Chemspider.py
rename to Fourmi/spiders/Chemspider.py
diff --git a/Scrapy/spiders/Wikipedia.py b/Fourmi/spiders/Wikipedia.py
similarity index 100%
rename from Scrapy/spiders/Wikipedia.py
rename to Fourmi/spiders/Wikipedia.py
diff --git a/Scrapy/spiders/__init__.py b/Fourmi/spiders/__init__.py
similarity index 100%
rename from Scrapy/spiders/__init__.py
rename to Fourmi/spiders/__init__.py

From 4f5b66fff69b68666d4e114afa2d53382476b985 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <admin@recondor.com>
Date: Tue, 18 Mar 2014 17:28:49 +0100
Subject: [PATCH 06/20] Basic structure to make sure the spider use an argument

---
 Fourmi/spiders/Chemspider.py | 7 ++++---
 Fourmi/spiders/Wikipedia.py  | 7 ++++---
 2 files changed, 8 insertions(+), 6 deletions(-)

diff --git a/Fourmi/spiders/Chemspider.py b/Fourmi/spiders/Chemspider.py
index 3fc74a0..b85b44d 100644
--- a/Fourmi/spiders/Chemspider.py
+++ b/Fourmi/spiders/Chemspider.py
@@ -3,9 +3,10 @@ from scrapy.spider import Spider
 class ChemspiderSpider(Spider):
     name = "Chemspider"
     allowed_domains = ["chemspider.com"]
-    start_urls = (
-        'http://www.chemspider.com/',
-        )
+
+    def __init__(self, compound=None, *args, **kwargs):
+        super(ChemspiderSpider, self).__init__(*args, **kwargs)
+        self.start_urls = ["http://chemspiderapiurl/something/%s" % compound] #[TODO] - Give an logical start url.
 
     def parse(self, response):
         pass 
diff --git a/Fourmi/spiders/Wikipedia.py b/Fourmi/spiders/Wikipedia.py
index 03b202b..62ed026 100644
--- a/Fourmi/spiders/Wikipedia.py
+++ b/Fourmi/spiders/Wikipedia.py
@@ -3,9 +3,10 @@ from scrapy.spider import Spider
 class WikipediaSpider(Spider):
     name = "Wikipedia"
     allowed_domains = ["wikipedia.org"]
-    start_urls = (
-        'http://www.wikipedia.org/',
-        )
+
+    def __init__(self, compound=None, *args, **kwargs):
+        super(WikipediaSpider, self).__init__(*args, **kwargs)
+        self.start_urls = ["http://wikipediaurl/something/%s" % compound] #[TODO] - Give an logical start url.
 
     def parse(self, response):
         pass 

From b1840d3a658918281e3bfcd831c375efcb625841 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <admin@recondor.com>
Date: Tue, 18 Mar 2014 17:41:40 +0100
Subject: [PATCH 07/20] Another name change to accommodate an executable script

---
 {Fourmi => FourmiCrawler}/__init__.py           |  0
 {Fourmi => FourmiCrawler}/items.py              |  0
 {Fourmi => FourmiCrawler}/pipelines.py          |  0
 {Fourmi => FourmiCrawler}/settings.py           | 10 +++++-----
 {Fourmi => FourmiCrawler}/spiders/Chemspider.py |  0
 {Fourmi => FourmiCrawler}/spiders/Wikipedia.py  |  0
 {Fourmi => FourmiCrawler}/spiders/__init__.py   |  0
 scrapy.cfg                                      |  2 +-
 8 files changed, 6 insertions(+), 6 deletions(-)
 rename {Fourmi => FourmiCrawler}/__init__.py (100%)
 rename {Fourmi => FourmiCrawler}/items.py (100%)
 rename {Fourmi => FourmiCrawler}/pipelines.py (100%)
 rename {Fourmi => FourmiCrawler}/settings.py (60%)
 rename {Fourmi => FourmiCrawler}/spiders/Chemspider.py (100%)
 rename {Fourmi => FourmiCrawler}/spiders/Wikipedia.py (100%)
 rename {Fourmi => FourmiCrawler}/spiders/__init__.py (100%)

diff --git a/Fourmi/__init__.py b/FourmiCrawler/__init__.py
similarity index 100%
rename from Fourmi/__init__.py
rename to FourmiCrawler/__init__.py
diff --git a/Fourmi/items.py b/FourmiCrawler/items.py
similarity index 100%
rename from Fourmi/items.py
rename to FourmiCrawler/items.py
diff --git a/Fourmi/pipelines.py b/FourmiCrawler/pipelines.py
similarity index 100%
rename from Fourmi/pipelines.py
rename to FourmiCrawler/pipelines.py
diff --git a/Fourmi/settings.py b/FourmiCrawler/settings.py
similarity index 60%
rename from Fourmi/settings.py
rename to FourmiCrawler/settings.py
index a24e6f6..0f5eae8 100644
--- a/Fourmi/settings.py
+++ b/FourmiCrawler/settings.py
@@ -6,13 +6,13 @@
 #     http://doc.scrapy.org/en/latest/topics/settings.html
 #
 
-BOT_NAME = 'Fourmi'
+BOT_NAME = 'FourmiCrawler'
 
-SPIDER_MODULES = ['Fourmi.spiders']
-NEWSPIDER_MODULE = 'Fourmi.spiders'
+SPIDER_MODULES = ['FourmiCrawler.spiders']
+NEWSPIDER_MODULE = 'FourmiCrawler.spiders'
 ITEM_PIPELINES = {
-    'Fourmi.pipelines.FourmiPipeline': 100
+    'FourmiCrawler.pipelines.FourmiPipeline': 100
 }
 
 # Crawl responsibly by identifying yourself (and your website) on the user-agent
-#USER_AGENT = 'Fourmi (+http://www.yourdomain.com)'
+#USER_AGENT = 'FourmiCrawler (+http://www.yourdomain.com)'
diff --git a/Fourmi/spiders/Chemspider.py b/FourmiCrawler/spiders/Chemspider.py
similarity index 100%
rename from Fourmi/spiders/Chemspider.py
rename to FourmiCrawler/spiders/Chemspider.py
diff --git a/Fourmi/spiders/Wikipedia.py b/FourmiCrawler/spiders/Wikipedia.py
similarity index 100%
rename from Fourmi/spiders/Wikipedia.py
rename to FourmiCrawler/spiders/Wikipedia.py
diff --git a/Fourmi/spiders/__init__.py b/FourmiCrawler/spiders/__init__.py
similarity index 100%
rename from Fourmi/spiders/__init__.py
rename to FourmiCrawler/spiders/__init__.py
diff --git a/scrapy.cfg b/scrapy.cfg
index 6f432fb..2226c7c 100644
--- a/scrapy.cfg
+++ b/scrapy.cfg
@@ -4,7 +4,7 @@
 # http://doc.scrapy.org/en/latest/topics/scrapyd.html
 
 [settings]
-default = Scrapy.settings
+default = FourmiCrawler.settings
 
 [deploy]
 #url = http://localhost:6800/

From 7355de1b20b9444879f743e20f03533ed19f192b Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <admin@recondor.com>
Date: Tue, 18 Mar 2014 18:03:22 +0100
Subject: [PATCH 08/20] Added an simple script to run a spider

---
 Fourmi.py | 21 +++++++++++++++++++++
 1 file changed, 21 insertions(+)
 create mode 100644 Fourmi.py

diff --git a/Fourmi.py b/Fourmi.py
new file mode 100644
index 0000000..4ed2c95
--- /dev/null
+++ b/Fourmi.py
@@ -0,0 +1,21 @@
+"""
+Fourmi - An internet webcrawler searching for information on chemical compounds.
+[todo] - Add some more useful text here.
+"""
+
+from twisted.internet import reactor
+from scrapy.crawler import Crawler
+from scrapy import log, signals
+from FourmiCrawler.spiders.Chemspider import ChemspiderSpider # [review] - There should be an easy way to import all spiders!
+from scrapy.utils.project import get_project_settings
+
+# [todo] - Add something to add all spiders, with the right references
+spider = ChemspiderSpider(compound = "Aspirin")
+settings = get_project_settings()
+crawler = Crawler(settings)
+crawler.signals.connect(reactor.stop, signal=signals.spider_closed)
+crawler.configure()
+crawler.crawl(spider)
+crawler.start()
+log.start()
+reactor.run()
\ No newline at end of file

From 826937e25e366fac55e478740eb6b55b8a990c6e Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <admin@recondor.com>
Date: Tue, 18 Mar 2014 18:05:44 +0100
Subject: [PATCH 09/20] Unix machine should be able to execute this without any
 problems.

---
 Fourmi.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Fourmi.py b/Fourmi.py
index 4ed2c95..16029f9 100644
--- a/Fourmi.py
+++ b/Fourmi.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 """
 Fourmi - An internet webcrawler searching for information on chemical compounds.
 [todo] - Add some more useful text here.

From 328cb3808c28237eb5f56713fe827a2d0807e166 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <admin@recondor.com>
Date: Tue, 18 Mar 2014 18:05:44 +0100
Subject: [PATCH 10/20] Unix machine should be able to execute this without any
 problems.

---
 Fourmi.py | 1 +
 1 file changed, 1 insertion(+)
 mode change 100644 => 100755 Fourmi.py

diff --git a/Fourmi.py b/Fourmi.py
old mode 100644
new mode 100755
index 4ed2c95..16029f9
--- a/Fourmi.py
+++ b/Fourmi.py
@@ -1,3 +1,4 @@
+#!/usr/bin/env python
 """
 Fourmi - An internet webcrawler searching for information on chemical compounds.
 [todo] - Add some more useful text here.

From 306a37db1a9be535a0d624b8bf5e1004f218f43c Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <admin@recondor.com>
Date: Sat, 22 Mar 2014 15:48:08 +0100
Subject: [PATCH 11/20] A better structure which is able to start multiple
 spiders.

---
 Fourmi.py | 27 +++++++++++++++++----------
 1 file changed, 17 insertions(+), 10 deletions(-)

diff --git a/Fourmi.py b/Fourmi.py
index 16029f9..640f9f7 100755
--- a/Fourmi.py
+++ b/Fourmi.py
@@ -10,13 +10,20 @@ from scrapy import log, signals
 from FourmiCrawler.spiders.Chemspider import ChemspiderSpider # [review] - There should be an easy way to import all spiders!
 from scrapy.utils.project import get_project_settings
 
-# [todo] - Add something to add all spiders, with the right references
-spider = ChemspiderSpider(compound = "Aspirin")
-settings = get_project_settings()
-crawler = Crawler(settings)
-crawler.signals.connect(reactor.stop, signal=signals.spider_closed)
-crawler.configure()
-crawler.crawl(spider)
-crawler.start()
-log.start()
-reactor.run()
\ No newline at end of file
+defined_spiders = [ChemspiderSpider(compound = "Methane")]
+
+def setup_crawler(Spider, compound):
+  spider = FollowAllSpider(domain=domain) # [todo] - Do something smart to get the different spiders to work here.
+  settings = get_project_settings()
+  crawler = Crawler(settings)
+  crawler.configure()
+  crawler.crawl(spider)
+  crawler.start()
+
+def start():
+  for spider in defined_spiders:
+    setup_crawler(spider, compound)
+  log.start()
+  reactor.run()
+
+start()

From 8175e02f6c54e7b92f35f794a52655963e077e3a Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <admin@recondor.com>
Date: Thu, 27 Mar 2014 13:08:46 +0100
Subject: [PATCH 12/20] New Structure, splitting on parsers instead of Spiders

---
 Fourmi.py                             | 11 ++++-------
 FourmiCrawler/spiders/Chemspider.py   | 12 ------------
 FourmiCrawler/spiders/Fourmispider.py | 12 ++++++++++++
 FourmiCrawler/spiders/Wikipedia.py    | 12 ------------
 4 files changed, 16 insertions(+), 31 deletions(-)
 delete mode 100644 FourmiCrawler/spiders/Chemspider.py
 create mode 100644 FourmiCrawler/spiders/Fourmispider.py
 delete mode 100644 FourmiCrawler/spiders/Wikipedia.py

diff --git a/Fourmi.py b/Fourmi.py
index 640f9f7..a0a9ead 100755
--- a/Fourmi.py
+++ b/Fourmi.py
@@ -7,13 +7,11 @@ Fourmi - An internet webcrawler searching for information on chemical compounds.
 from twisted.internet import reactor
 from scrapy.crawler import Crawler
 from scrapy import log, signals
-from FourmiCrawler.spiders.Chemspider import ChemspiderSpider # [review] - There should be an easy way to import all spiders!
+from FourmiCrawler.spiders.Fourmispider import FourmiSpider
 from scrapy.utils.project import get_project_settings
 
-defined_spiders = [ChemspiderSpider(compound = "Methane")]
-
-def setup_crawler(Spider, compound):
-  spider = FollowAllSpider(domain=domain) # [todo] - Do something smart to get the different spiders to work here.
+def setup_crawler(compound):
+  spider = FourmiSpider(domain=domain) # [todo] - Do something smart to get the different spiders to work here.
   settings = get_project_settings()
   crawler = Crawler(settings)
   crawler.configure()
@@ -21,8 +19,7 @@ def setup_crawler(Spider, compound):
   crawler.start()
 
 def start():
-  for spider in defined_spiders:
-    setup_crawler(spider, compound)
+  setup_crawler(compound)
   log.start()
   reactor.run()
 
diff --git a/FourmiCrawler/spiders/Chemspider.py b/FourmiCrawler/spiders/Chemspider.py
deleted file mode 100644
index b85b44d..0000000
--- a/FourmiCrawler/spiders/Chemspider.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from scrapy.spider import Spider
-
-class ChemspiderSpider(Spider):
-    name = "Chemspider"
-    allowed_domains = ["chemspider.com"]
-
-    def __init__(self, compound=None, *args, **kwargs):
-        super(ChemspiderSpider, self).__init__(*args, **kwargs)
-        self.start_urls = ["http://chemspiderapiurl/something/%s" % compound] #[TODO] - Give an logical start url.
-
-    def parse(self, response):
-        pass 
diff --git a/FourmiCrawler/spiders/Fourmispider.py b/FourmiCrawler/spiders/Fourmispider.py
new file mode 100644
index 0000000..f7b64bd
--- /dev/null
+++ b/FourmiCrawler/spiders/Fourmispider.py
@@ -0,0 +1,12 @@
+from scrapy.spider import Spider
+
+class FourmiSpider(Spider):
+  name="FourmiSpider"
+
+  def __init__(self, compound=None, *args, **kwargs):
+    super(FourmiSpider, self).__init__(*args, **kwargs)
+    # [TODO] - Initiate all parsers for the different websites and get allowed URLs.
+        
+  def parse(self, reponse):
+    # [TODO] - This function should delegate it's functionality to other parsers.
+    pass
diff --git a/FourmiCrawler/spiders/Wikipedia.py b/FourmiCrawler/spiders/Wikipedia.py
deleted file mode 100644
index 62ed026..0000000
--- a/FourmiCrawler/spiders/Wikipedia.py
+++ /dev/null
@@ -1,12 +0,0 @@
-from scrapy.spider import Spider
-
-class WikipediaSpider(Spider):
-    name = "Wikipedia"
-    allowed_domains = ["wikipedia.org"]
-
-    def __init__(self, compound=None, *args, **kwargs):
-        super(WikipediaSpider, self).__init__(*args, **kwargs)
-        self.start_urls = ["http://wikipediaurl/something/%s" % compound] #[TODO] - Give an logical start url.
-
-    def parse(self, response):
-        pass 

From bdcf359da7c5fdda98dade75e5de908edb4d1f32 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <admin@recondor.com>
Date: Thu, 27 Mar 2014 13:12:27 +0100
Subject: [PATCH 13/20] Logical fixes to have some "working" case

---
 Fourmi.py | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/Fourmi.py b/Fourmi.py
index a0a9ead..9bdec24 100755
--- a/Fourmi.py
+++ b/Fourmi.py
@@ -10,8 +10,8 @@ from scrapy import log, signals
 from FourmiCrawler.spiders.Fourmispider import FourmiSpider
 from scrapy.utils.project import get_project_settings
 
-def setup_crawler(compound):
-  spider = FourmiSpider(domain=domain) # [todo] - Do something smart to get the different spiders to work here.
+def setup_crawler(searchable):
+  spider = FourmiSpider(compound=searchable) # [todo] - Do something smart to get the different spiders to work here.
   settings = get_project_settings()
   crawler = Crawler(settings)
   crawler.configure()
@@ -19,7 +19,7 @@ def setup_crawler(compound):
   crawler.start()
 
 def start():
-  setup_crawler(compound)
+  setup_crawler("Methane")
   log.start()
   reactor.run()
 

From 8e9314e753c2390485c96a56364d69dbc0e4f80c Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <admin@recondor.com>
Date: Thu, 27 Mar 2014 13:18:55 +0100
Subject: [PATCH 14/20] One spider should have it's own folder

---
 FourmiCrawler/settings.py                            | 4 ++--
 FourmiCrawler/{spiders/Fourmispider.py => spider.py} | 0
 FourmiCrawler/spiders/__init__.py                    | 4 ----
 3 files changed, 2 insertions(+), 6 deletions(-)
 rename FourmiCrawler/{spiders/Fourmispider.py => spider.py} (100%)
 delete mode 100644 FourmiCrawler/spiders/__init__.py

diff --git a/FourmiCrawler/settings.py b/FourmiCrawler/settings.py
index 0f5eae8..28272d0 100644
--- a/FourmiCrawler/settings.py
+++ b/FourmiCrawler/settings.py
@@ -8,8 +8,8 @@
 
 BOT_NAME = 'FourmiCrawler'
 
-SPIDER_MODULES = ['FourmiCrawler.spiders']
-NEWSPIDER_MODULE = 'FourmiCrawler.spiders'
+SPIDER_MODULES = ['FourmiCrawler']
+NEWSPIDER_MODULE = 'FourmiCrawler'
 ITEM_PIPELINES = {
     'FourmiCrawler.pipelines.FourmiPipeline': 100
 }
diff --git a/FourmiCrawler/spiders/Fourmispider.py b/FourmiCrawler/spider.py
similarity index 100%
rename from FourmiCrawler/spiders/Fourmispider.py
rename to FourmiCrawler/spider.py
diff --git a/FourmiCrawler/spiders/__init__.py b/FourmiCrawler/spiders/__init__.py
deleted file mode 100644
index ebd689a..0000000
--- a/FourmiCrawler/spiders/__init__.py
+++ /dev/null
@@ -1,4 +0,0 @@
-# This package will contain the spiders of your Scrapy project
-#
-# Please refer to the documentation for information on how to create and manage
-# your spiders.

From 5b17627504672dc90f0f744d942d2c5c9a055d78 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <admin@recondor.com>
Date: Thu, 27 Mar 2014 13:23:03 +0100
Subject: [PATCH 15/20] The parsers however could use their own folder

---
 FourmiCrawler/parsers/__init__.py | 0
 1 file changed, 0 insertions(+), 0 deletions(-)
 create mode 100644 FourmiCrawler/parsers/__init__.py

diff --git a/FourmiCrawler/parsers/__init__.py b/FourmiCrawler/parsers/__init__.py
new file mode 100644
index 0000000..e69de29

From 87d10415177ceae3485956bc13450891d3f51182 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <admin@recondor.com>
Date: Fri, 28 Mar 2014 14:11:36 +0100
Subject: [PATCH 16/20] Made all Python files PEP-8 Compatible

---
 Fourmi.py                 | 24 +++++++++++++-----------
 FourmiCrawler/items.py    |  3 ++-
 FourmiCrawler/settings.py |  6 ++++--
 FourmiCrawler/spider.py   | 21 ++++++++++++---------
 4 files changed, 31 insertions(+), 23 deletions(-)

diff --git a/Fourmi.py b/Fourmi.py
index 9bdec24..96c808e 100755
--- a/Fourmi.py
+++ b/Fourmi.py
@@ -1,7 +1,7 @@
 #!/usr/bin/env python
 """
-Fourmi - An internet webcrawler searching for information on chemical compounds.
-[todo] - Add some more useful text here.
+Fourmi - An internet webcrawler searching for information on chemical
+compounds. [todo] - Add some more useful text here.
 """
 
 from twisted.internet import reactor
@@ -10,17 +10,19 @@ from scrapy import log, signals
 from FourmiCrawler.spiders.Fourmispider import FourmiSpider
 from scrapy.utils.project import get_project_settings
 
+
 def setup_crawler(searchable):
-  spider = FourmiSpider(compound=searchable) # [todo] - Do something smart to get the different spiders to work here.
-  settings = get_project_settings()
-  crawler = Crawler(settings)
-  crawler.configure()
-  crawler.crawl(spider)
-  crawler.start()
+    spider = FourmiSpider(compound=searchable)
+    settings = get_project_settings()
+    crawler = Crawler(settings)
+    crawler.configure()
+    crawler.crawl(spider)
+    crawler.start()
+
 
 def start():
-  setup_crawler("Methane")
-  log.start()
-  reactor.run()
+    setup_crawler("Methane")
+    log.start()
+    reactor.run()
 
 start()
diff --git a/FourmiCrawler/items.py b/FourmiCrawler/items.py
index 5fedc36..c7fd41c 100644
--- a/FourmiCrawler/items.py
+++ b/FourmiCrawler/items.py
@@ -5,9 +5,10 @@
 
 from scrapy.item import Item, Field
 
+
 class Result(Item):
     attribute = Field()
     value = Field()
     source = Field()
     reliability = Field()
-    conditions = Field()
\ No newline at end of file
+    conditions = Field()
diff --git a/FourmiCrawler/settings.py b/FourmiCrawler/settings.py
index 28272d0..b025167 100644
--- a/FourmiCrawler/settings.py
+++ b/FourmiCrawler/settings.py
@@ -14,5 +14,7 @@ ITEM_PIPELINES = {
     'FourmiCrawler.pipelines.FourmiPipeline': 100
 }
 
-# Crawl responsibly by identifying yourself (and your website) on the user-agent
-#USER_AGENT = 'FourmiCrawler (+http://www.yourdomain.com)'
+# Crawl responsibly by identifying yourself (and your website) on the
+# user-agent
+
+# USER_AGENT = 'FourmiCrawler (+http://www.yourdomain.com)'
diff --git a/FourmiCrawler/spider.py b/FourmiCrawler/spider.py
index f7b64bd..2805c8e 100644
--- a/FourmiCrawler/spider.py
+++ b/FourmiCrawler/spider.py
@@ -1,12 +1,15 @@
 from scrapy.spider import Spider
 
-class FourmiSpider(Spider):
-  name="FourmiSpider"
 
-  def __init__(self, compound=None, *args, **kwargs):
-    super(FourmiSpider, self).__init__(*args, **kwargs)
-    # [TODO] - Initiate all parsers for the different websites and get allowed URLs.
-        
-  def parse(self, reponse):
-    # [TODO] - This function should delegate it's functionality to other parsers.
-    pass
+class FourmiSpider(Spider):
+    name = "FourmiSpider"
+
+    def __init__(self, compound=None, *args, **kwargs):
+        super(FourmiSpider, self).__init__(*args, **kwargs)
+        # [TODO] - Initiate all parsers for the different websites and get
+        # allowed URLs.
+
+    def parse(self, reponse):
+        # [TODO] - This function should delegate it's functionality to other
+        # parsers.
+        pass

From d91706d6e52892ac1b8bccbe74792bdcf9255ffe Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <admin@recondor.com>
Date: Fri, 28 Mar 2014 14:14:39 +0100
Subject: [PATCH 17/20] The script should stop sometime, added a stopping
 signal

---
 Fourmi.py | 1 +
 1 file changed, 1 insertion(+)

diff --git a/Fourmi.py b/Fourmi.py
index 96c808e..533240e 100755
--- a/Fourmi.py
+++ b/Fourmi.py
@@ -15,6 +15,7 @@ def setup_crawler(searchable):
     spider = FourmiSpider(compound=searchable)
     settings = get_project_settings()
     crawler = Crawler(settings)
+    crawler.signals.connect(reactor.stop, signal=signals.spider_closed)
     crawler.configure()
     crawler.crawl(spider)
     crawler.start()

From 325febe834feaff06f9ceab4462fab17720902ce Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <admin@recondor.com>
Date: Fri, 28 Mar 2014 14:43:22 +0100
Subject: [PATCH 18/20] Added an basic parser class to extend, next step
 implementing the global function

---
 Fourmi.py               | 2 ++
 FourmiCrawler/spider.py | 5 +++--
 2 files changed, 5 insertions(+), 2 deletions(-)

diff --git a/Fourmi.py b/Fourmi.py
index 533240e..f1bf1ba 100755
--- a/Fourmi.py
+++ b/Fourmi.py
@@ -12,6 +12,8 @@ from scrapy.utils.project import get_project_settings
 
 
 def setup_crawler(searchable):
+    # [TODO] - Initiate all parsers for the different websites and get
+    # allowed URLs.
     spider = FourmiSpider(compound=searchable)
     settings = get_project_settings()
     crawler = Crawler(settings)
diff --git a/FourmiCrawler/spider.py b/FourmiCrawler/spider.py
index 2805c8e..4c25df9 100644
--- a/FourmiCrawler/spider.py
+++ b/FourmiCrawler/spider.py
@@ -6,10 +6,11 @@ class FourmiSpider(Spider):
 
     def __init__(self, compound=None, *args, **kwargs):
         super(FourmiSpider, self).__init__(*args, **kwargs)
-        # [TODO] - Initiate all parsers for the different websites and get
-        # allowed URLs.
 
     def parse(self, reponse):
         # [TODO] - This function should delegate it's functionality to other
         # parsers.
         pass
+
+    def add_parser(self, parser):
+        self.parsers.add(parser)

From 32cedecf2e3ebab0c965457d003a5293f1115d91 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <admin@recondor.com>
Date: Fri, 28 Mar 2014 14:43:22 +0100
Subject: [PATCH 19/20] Added an basic parser class to extend, next step
 implementing the global function

---
 Fourmi.py                       | 2 ++
 FourmiCrawler/parsers/parser.py | 9 +++++++++
 FourmiCrawler/spider.py         | 5 +++--
 3 files changed, 14 insertions(+), 2 deletions(-)
 create mode 100644 FourmiCrawler/parsers/parser.py

diff --git a/Fourmi.py b/Fourmi.py
index 533240e..f1bf1ba 100755
--- a/Fourmi.py
+++ b/Fourmi.py
@@ -12,6 +12,8 @@ from scrapy.utils.project import get_project_settings
 
 
 def setup_crawler(searchable):
+    # [TODO] - Initiate all parsers for the different websites and get
+    # allowed URLs.
     spider = FourmiSpider(compound=searchable)
     settings = get_project_settings()
     crawler = Crawler(settings)
diff --git a/FourmiCrawler/parsers/parser.py b/FourmiCrawler/parsers/parser.py
new file mode 100644
index 0000000..3362d59
--- /dev/null
+++ b/FourmiCrawler/parsers/parser.py
@@ -0,0 +1,9 @@
+from scrapy import log
+
+
+class Parser:
+    website = "http://localhost/*"
+
+    def parse(self, reponse):
+        log.msg("The parse function of the empty parser was used.", level=log.Warning)
+        pass
diff --git a/FourmiCrawler/spider.py b/FourmiCrawler/spider.py
index 2805c8e..4c25df9 100644
--- a/FourmiCrawler/spider.py
+++ b/FourmiCrawler/spider.py
@@ -6,10 +6,11 @@ class FourmiSpider(Spider):
 
     def __init__(self, compound=None, *args, **kwargs):
         super(FourmiSpider, self).__init__(*args, **kwargs)
-        # [TODO] - Initiate all parsers for the different websites and get
-        # allowed URLs.
 
     def parse(self, reponse):
         # [TODO] - This function should delegate it's functionality to other
         # parsers.
         pass
+
+    def add_parser(self, parser):
+        self.parsers.add(parser)

From e210ce85588af22f4408f54776c313b8130f8dc8 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <admin@recondor.com>
Date: Sun, 30 Mar 2014 22:08:21 +0200
Subject: [PATCH 20/20] Merge branch 'develop', remote-tracking branch
 'origin/develop' into develop