From 6182d4104eef9a68c6958d3320656d7c6ff77900 Mon Sep 17 00:00:00 2001 From: "Jip J. Dekker" Date: Sun, 16 Mar 2014 22:54:34 +0100 Subject: [PATCH 1/3] Added an result item which the spiders will return. --- Scrapy/items.py | 10 ++++++---- 1 file changed, 6 insertions(+), 4 deletions(-) diff --git a/Scrapy/items.py b/Scrapy/items.py index 17b9d3d..5fedc36 100644 --- a/Scrapy/items.py +++ b/Scrapy/items.py @@ -5,7 +5,9 @@ from scrapy.item import Item, Field -class FourmiItem(Item): - # define the fields for your item here like: - # name = Field() - pass +class Result(Item): + attribute = Field() + value = Field() + source = Field() + reliability = Field() + conditions = Field() \ No newline at end of file From 35481128388b8537d9505bf59adb790410a8a3a3 Mon Sep 17 00:00:00 2001 From: "Jip J. Dekker" Date: Sun, 16 Mar 2014 23:11:30 +0100 Subject: [PATCH 2/3] Removed all .pyc files and added to the ignore list --- .gitignore | 3 +++ Scrapy/__init__.pyc | Bin 143 -> 0 bytes Scrapy/settings.pyc | Bin 251 -> 0 bytes Scrapy/spiders/__init__.pyc | Bin 151 -> 0 bytes 4 files changed, 3 insertions(+) delete mode 100644 Scrapy/__init__.pyc delete mode 100644 Scrapy/settings.pyc delete mode 100644 Scrapy/spiders/__init__.pyc diff --git a/.gitignore b/.gitignore index c1549e0..158ef41 100644 --- a/.gitignore +++ b/.gitignore @@ -1,6 +1,9 @@ #EDITOR AND IDE SPECIFIC SETTINGFILES .idea +#Python Specific ignores +*.pyc + #THINGS WE WOULD NEVER EVER WANT! #ignore thumbnails created by windows Thumbs.db diff --git a/Scrapy/__init__.pyc b/Scrapy/__init__.pyc deleted file mode 100644 index f1096fd5de3c353baeeef45805c9e5c90c6fb80c..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 143 zcmZSn%*%DEQaU)90SXv_v;z^JG&>$#_wg|jPxQklpinAU(^`rX(jTzLyeDh}B z9G>~=b;r*U56QPE&IL9M5rPmONl8SBPix^DL>PqICc4@}tW{a03G`9&u(;;c1oA}4 z0=(v@jmBz!3cTgc)t|4!#&M6qZttA8F4qI}J@>-@uQuMu4=}}*#VVETCglAt3+AY> qOl78l9V#8Gj5RS>6M=!$I>{oK-J diff --git a/Scrapy/spiders/__init__.pyc b/Scrapy/spiders/__init__.pyc deleted file mode 100644 index c2fd93959faa39176f3f6aca2ae6dd4d68149d9b..0000000000000000000000000000000000000000 GIT binary patch literal 0 HcmV?d00001 literal 151 zcmZSn%*z#8Cmo#300oRd+5w1*S%5?e14FO|NW@PANHCxg#STC*{fzwFRQ;@!)a>lk zB7K+qbP{rLFIyv&mLc)fzk W5)PmtHo5sJr8%i~AghXjm;nG#4I(uF From 8dd2c168d2d915f8487dc1176198845523bf2a01 Mon Sep 17 00:00:00 2001 From: "Jip J. Dekker" Date: Sun, 16 Mar 2014 23:14:59 +0100 Subject: [PATCH 3/3] Added the basic structure for the first two spiders --- Scrapy/spiders/Chemspider.py | 11 +++++++++++ Scrapy/spiders/Wikipedia.py | 11 +++++++++++ 2 files changed, 22 insertions(+) create mode 100644 Scrapy/spiders/Chemspider.py create mode 100644 Scrapy/spiders/Wikipedia.py diff --git a/Scrapy/spiders/Chemspider.py b/Scrapy/spiders/Chemspider.py new file mode 100644 index 0000000..3fc74a0 --- /dev/null +++ b/Scrapy/spiders/Chemspider.py @@ -0,0 +1,11 @@ +from scrapy.spider import Spider + +class ChemspiderSpider(Spider): + name = "Chemspider" + allowed_domains = ["chemspider.com"] + start_urls = ( + 'http://www.chemspider.com/', + ) + + def parse(self, response): + pass diff --git a/Scrapy/spiders/Wikipedia.py b/Scrapy/spiders/Wikipedia.py new file mode 100644 index 0000000..03b202b --- /dev/null +++ b/Scrapy/spiders/Wikipedia.py @@ -0,0 +1,11 @@ +from scrapy.spider import Spider + +class WikipediaSpider(Spider): + name = "Wikipedia" + allowed_domains = ["wikipedia.org"] + start_urls = ( + 'http://www.wikipedia.org/', + ) + + def parse(self, response): + pass