From be63315ca2c28208cd7657535c6b23f001e8a808 Mon Sep 17 00:00:00 2001 From: Bas Vb Date: Wed, 16 Apr 2014 17:01:35 +0200 Subject: [PATCH] regex --- FourmiCrawler/parsers/WikipediaParser.py | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/FourmiCrawler/parsers/WikipediaParser.py b/FourmiCrawler/parsers/WikipediaParser.py index 1ceeccb..38d42f8 100644 --- a/FourmiCrawler/parsers/WikipediaParser.py +++ b/FourmiCrawler/parsers/WikipediaParser.py @@ -2,6 +2,7 @@ from scrapy.http import Request from parser import Parser from scrapy.selector import Selector from FourmiCrawler.items import Result +import re class WikipediaParser(Parser): @@ -45,7 +46,9 @@ class WikipediaParser(Parser): def cleanitems(self, items): for item in items: - print item['value'] + value=item['value'] + if re.match('3(...)', value): + print value return items def getboilingpoint(self, sel):