Archived
1
0

Merge branch 'release/v0.6.0'

This commit is contained in:
Jip J. Dekker 2014-06-21 01:40:27 +02:00
commit 50e6835116
23 changed files with 647 additions and 151 deletions

2
.gitignore vendored
View File

@ -6,6 +6,8 @@
#may contain authentication information
sources.cfg
#Another of our config files
GUI.cfg
#THINGS WE WOULD NEVER EVER WANT!
#ignore thumbnails created by windows

View File

@ -3,6 +3,10 @@
language: python
python: 2.7
before_install:
- "export DISPLAY=:99.0"
- "sh -e /etc/init.d/xvfb start"
# command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
install:
- pip install Scrapy docopt
@ -10,10 +14,10 @@ install:
# command to run tests, e.g. python setup.py test
script:
- nosetests --with-coverage --cover-package=FourmiCrawler,utils tests
- nosetests --with-coverage --cover-package=FourmiCrawler,utils,GUI tests
notifications:
slack: descartes2:6sgCzx3PvrO9IIMwKxj12dDM
after_success:
coveralls --verbose
coveralls --verbose

View File

@ -1,3 +1,11 @@
### v0.6.0
- Feature: Added a Graphical User interface
- Feature: Automatic config file createion from config samples
- FIX: The default name of the output files will now consist of the compound name and the file format when using the CLI
- FIX: A lot of bugfixes of the PubChem plugin, as is wasn't working as it should
- FIX: Using absolute path for configuration files
- DEV: General Code cleanup in documentation
### v0.5.3
- FIX: It is now again possible to use both verbose and the source inclusion/exclusion options
- FIX: Logging is now "actually" disabled if not using the verbose option.

View File

@ -21,7 +21,4 @@ FEED_FORMAT = 'jsonlines'
# Crawl responsibly by identifying yourself (and your website) on the
# user-agent
# [todo] - Check for repercussions on spoofing the user agent
# USER_AGENT = 'FourmiCrawler (+http://www.yourdomain.com)'
USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36'
USER_AGENT = 'Fourmi'

View File

@ -9,24 +9,28 @@ from FourmiCrawler.items import Result
# [TODO] - Maybe clean up usage of '.extract()[0]', because of possible IndexError exception.
# [TODO] - Add checks at search request and extendedCompoundInfo on whether the token was valid or not
class ChemSpider(Source):
"""ChemSpider scraper for synonyms and properties
"""
ChemSpider scraper for synonyms and properties
This parser will manage searching for chemicals through the
ChemsSpider API, and parsing the resulting ChemSpider page.
The token required for the API should be in a configuration file
somewhere.
"""
website = 'http://www.chemspider.com/*'
website = 'http://www\\.chemspider\\.com/.*'
search = 'Search.asmx/SimpleSearch?query=%s&token='
structure = 'Chemical-Structure.%s.html'
extendedinfo = 'MassSpecAPI.asmx/GetExtendedCompoundInfo?csid=%s&token='
def __init__(self, config=None):
"""
Initialization of ChemSpider scraper
:param config: a dictionary of settings for this scraper, must contain
'reliability' key
"""
Source.__init__(self, config)
self.ignore_list = []
if 'token' not in self.cfg or self.cfg['token'] == '':
@ -37,6 +41,12 @@ class ChemSpider(Source):
self.extendedinfo += self.cfg['token']
def parse(self, response):
"""
This function is called when a Response matching the variable
'website' is available for parsing the Response object.
:param response: the Scrapy Response object to be parsed
:return: a list of Result items and Request objects
"""
sel = Selector(response)
requests = []
requests_synonyms = self.parse_synonyms(sel)
@ -47,10 +57,26 @@ class ChemSpider(Source):
return requests
def parse_properties(self, sel):
"""scrape Experimental Data and Predicted ACD/Labs tabs"""
"""
This function scrapes the Experimental Data and Predicted ACD/Labs tabs
:param sel: a Selector object of the whole page
:return: a list of Result items
"""
properties = []
properties.extend(self.parse_acdlabstab(sel))
properties.extend(self.parse_experimentaldatatab(sel))
return properties
def parse_acdlabstab(self, sel):
"""
This function scrapes the 'Predicted ACD/Labs tab' under Properties
:param sel: a Selector object of the whole page
:return: a list of Request objects
"""
properties = []
# Predicted - ACD/Labs tab
td_list = sel.xpath('.//table[@id="acdlabs-table"]//td').xpath(
'normalize-space(string())')
prop_names = td_list[::2]
@ -62,16 +88,15 @@ class ChemSpider(Source):
prop_conditions = ''
# Test for properties without values, with one hardcoded exception
if not re.match(r'^\d', prop_value) or (prop_name == 'Polarizability' and prop_value == '10-24cm3'):
if (not re.match(r'^\d', prop_value) or
(prop_name == 'Polarizability' and prop_value == '10-24cm3')):
continue
# Match for condition in parentheses
m = re.match(r'(.*) \((.*)\)', prop_name)
if m:
prop_name = m.group(1)
prop_conditions = m.group(2)
# Match for condition in value seperated by an 'at'
m = re.match(r'(.*) at (.*)', prop_value)
if m:
prop_value = m.group(1)
@ -84,11 +109,18 @@ class ChemSpider(Source):
conditions=prop_conditions
)
properties.append(new_prop)
log.msg('CS prop: |%s| |%s| |%s|' %
(new_prop['attribute'], new_prop['value'], new_prop['source']),
level=log.DEBUG)
# Experimental Data Tab, Physico-chemical properties in particular
return properties
def parse_experimentaldatatab(self, sel):
"""
This function scrapes Experimental Data tab, Physico-chemical
properties in particular.
:param sel: a Selector object of the whole page
:return: a list of Result items
"""
properties = []
scraped_list = sel.xpath('.//li[span="Experimental Physico-chemical '
'Properties"]//li/table/tr/td')
if not scraped_list:
@ -105,15 +137,16 @@ class ChemSpider(Source):
value=line.xpath('text()').extract()[0].rstrip(),
source=line.xpath('strong/text()').extract()[0].rstrip(),
)
properties.append(new_prop)
log.msg('CS prop: |%s| |%s| |%s|' %
(new_prop['attribute'], new_prop['value'],
new_prop['source']), level=log.DEBUG)
properties.append(new_prop)
return properties
def parse_synonyms(self, sel):
"""Scrape list of Names and Identifiers"""
"""
This function scrapes the list of Names and Identifiers
:param sel: a Selector object of the whole page
:return: a list of Requests
"""
requests = []
synonyms = []
@ -145,7 +178,13 @@ class ChemSpider(Source):
return requests
def new_synonym(self, sel, name, category):
"""Scrape for a single synonym at a given HTML tag"""
"""
This function scrapes for a single synonym at a given HTML tag
:param sel: a Selector object of the given HTML tag
:param name: the name of the synonym in the tag
:param category: the name of the category the synonym is labeled as
:return: a dictionary containing data on the synonym
"""
self.ignore_list.append(name)
language = sel.xpath('span[@class="synonym_language"]/text()')
if language:
@ -181,7 +220,12 @@ class ChemSpider(Source):
return synonym
def parse_extendedinfo(self, response):
"""Scrape data from the ChemSpider GetExtendedCompoundInfo API"""
"""
This function scrapes data from the ChemSpider GetExtendedCompoundInfo
API, if a token is present in the configuration settings
:param response: a Response object to be parsed
:return: a list of Result items
"""
sel = Selector(response)
properties = []
names = sel.xpath('*').xpath('name()').extract()
@ -197,17 +241,31 @@ class ChemSpider(Source):
return properties
def newresult(self, attribute, value, conditions='', source='ChemSpider'):
return Result(
{
'attribute': attribute,
'value': value,
'source': source,
'reliability': self.cfg['reliability'],
'conditions': conditions
})
"""
This function abstracts from the Result item and provides default
values.
:param attribute: the name of the attribute
:param value: the value of the attribute
:param conditions: optional conditions regarding the value
:param source: the name of the source if it is not ChemSpider
:return: A Result item
"""
return Result({
'attribute': attribute,
'value': value,
'source': source,
'reliability': self.cfg['reliability'],
'conditions': conditions
})
def parse_searchrequest(self, response):
"""Parse the initial response of the ChemSpider Search API """
"""
This function parses the initial response of the ChemSpider Search API
Requires a valid token to function.
:param response: the Response object to be parsed
:return: A Request for the information page and a Request for the
extendedinfo API call
"""
sel = Selector(response)
log.msg('chemspider parse_searchrequest', level=log.DEBUG)
sel.register_namespace('cs', 'http://www.chemspider.com/')
@ -219,8 +277,8 @@ class ChemSpider(Source):
log.msg('ChemSpider found multiple substances, taking first '
'element', level=log.DEBUG)
csid = csids[0]
structure_url = self.website[:-1] + self.structure % csid
extendedinfo_url = self.website[:-1] + self.extendedinfo % csid
structure_url = self.website[:-2].replace("\\", "") + self.structure % csid
extendedinfo_url = self.website[:-2].replace("\\", "") + self.extendedinfo % csid
log.msg('chemspider URL: %s' % structure_url, level=log.DEBUG)
return [Request(url=structure_url,
callback=self.parse),
@ -228,8 +286,13 @@ class ChemSpider(Source):
callback=self.parse_extendedinfo)]
def new_compound_request(self, compound):
"""
This function is called when a new synonym is returned to the spider
to generate new requests
:param compound: the name of the compound to search for
"""
if compound in self.ignore_list or self.cfg['token'] == '':
return None
searchurl = self.website[:-1] + self.search % compound
searchurl = self.website[:-2].replace("\\", "") + self.search % compound
log.msg('chemspider compound', level=log.DEBUG)
return Request(url=searchurl, callback=self.parse_searchrequest)

View File

@ -13,20 +13,31 @@ from FourmiCrawler.items import Result
# Result item, but should be included eventually.
class NIST(Source):
"""NIST Scraper plugin
"""
NIST Scraper plugin
This plugin manages searching for a chemical on the NIST website
and parsing the resulting page if the chemical exists on NIST.
"""
website = "http://webbook.nist.gov/*"
website = "http://webbook\\.nist\\.gov/.*"
search = 'cgi/cbook.cgi?Name=%s&Units=SI&cTP=on'
def __init__(self, config=None):
"""
Initialization of NIST scraper
:param config: configuration variables for this scraper, must contain
'reliability' key.
"""
Source.__init__(self, config)
self.ignore_list = set()
def parse(self, response):
"""
This function is called when a Response matching the variable
'website' is available for parsing the Response object.
:param response: The Scrapy Response object to be parsed
:return: a list of Result items and Request objects
"""
sel = Selector(response)
title = sel.xpath('head/title/text()').extract()[0]
@ -51,6 +62,21 @@ class NIST(Source):
log.msg('NIST symbol: |%s|, name: |%s|' % (symbol, name),
level=log.DEBUG)
requests.extend(self.parse_tables(sel, symbol_table))
return requests
def parse_tables(self, sel, symbol_table):
"""
This function identifies and distributes parsing of tables to other
functions below.
:param sel: A Selector object of the whole page
:param symbol_table: a dictionary containing translations of raw HTML
tags to human readable names
:return: a list of Result items and Requests
"""
requests = []
for table in sel.xpath('//table[@class="data"]'):
summary = table.xpath('@summary').extract()[0]
if summary == 'One dimensional data':
@ -81,8 +107,12 @@ class NIST(Source):
return requests
def parse_generic_info(self, sel):
"""Parses: synonyms, chemical formula, molecular weight, InChI,
InChiKey, CAS number
"""
This function parses: synonyms, chemical formula, molecular weight,
InChI, InChiKey, CAS number
:param sel: A Selector object of the entire page in the original
response
:return: a list of Result items
"""
ul = sel.xpath('body/ul[li/strong="IUPAC Standard InChI:"]')
@ -121,15 +151,20 @@ class NIST(Source):
return requests
def parse_aggregate_data(self, table, symbol_table):
"""Parses the table(s) which contain possible links to individual
data points
"""
This function parses the table(s) which contain possible links to
individual data points
:param table: a Selector object of the table to be parsed
:param symbol_table: a dictionary containing translations of raw HTML
tags to human readable names
:return: a list of Result items and Request objects
"""
results = []
for tr in table.xpath('tr[td]'):
extra_data_url = tr.xpath('td[last()][a="Individual data points"]'
'/a/@href').extract()
if extra_data_url:
request = Request(url=self.website[:-1] + extra_data_url[0],
request = Request(url=self.website[:-2].replace("\\", "") + extra_data_url[0],
callback=self.parse_individual_datapoints)
results.append(request)
continue
@ -155,14 +190,16 @@ class NIST(Source):
return results
def parse_transition_data(self, table, summary):
"""Parses the table containing properties regarding phase changes"""
"""
This function parses the table containing properties regarding phase
changes
:param table: a Selector object of the table to be parsed
:param summary: the name of the property
:return: a list of Result items
"""
results = []
tr_unit = ''.join(table.xpath('tr[1]/th[1]/node()').extract())
m = re.search(r'\((.*)\)', tr_unit)
unit = '!'
if m:
unit = m.group(1)
unit = self.get_unit(table)
for tr in table.xpath('tr[td]'):
tds = tr.xpath('td/text()').extract()
@ -176,18 +213,18 @@ class NIST(Source):
return results
def parse_generic_data(self, table, summary):
"""Parses the common tables of 4 and 5 rows. Assumes they are of the
"""
Parses the common tables of 4 and 5 rows. Assumes they are of the
form:
Symbol (unit)|Temperature (K)|Method|Reference|Comment
Symbol (unit)|Temperature (K)|Reference|Comment
:param table: a Selector object of the table to be parsed
:param summary: the name of the property
:return: a list of Result items
"""
results = []
tr_unit = ''.join(table.xpath('tr[1]/th[1]/node()').extract())
m = re.search(r'\((.*)\)', tr_unit)
unit = '!'
if m:
unit = m.group(1)
unit = self.get_unit(table)
for tr in table.xpath('tr[td]'):
tds = tr.xpath('td/text()').extract()
@ -200,7 +237,13 @@ class NIST(Source):
return results
def parse_antoine_data(self, table, summary):
"""Parse table containing parameters for the Antione equation"""
"""
This function parses the table containing parameters for the Antione
equation
:param table: a Selector object of the table to be parsed
:param summary: the name of the property
:return: a list of Result items
"""
results = []
for tr in table.xpath('tr[td]'):
@ -215,7 +258,12 @@ class NIST(Source):
return results
def parse_individual_datapoints(self, response):
"""Parses the page linked from aggregate data"""
"""
This function parses the 'individual data points' page linked from
the aggregate data table(s)
:param response: the Scrapy Response object to be parsed
:return: a list of Result items
"""
sel = Selector(response)
table = sel.xpath('//table[@class="data"]')[0]
@ -228,11 +276,7 @@ class NIST(Source):
name = m.group(1)
condition = m.group(2)
tr_unit = ''.join(table.xpath('tr[1]/th[1]/node()').extract())
m = re.search(r'\((.*)\)', tr_unit)
unit = '!'
if m:
unit = m.group(1)
unit = self.get_unit(table)
for tr in table.xpath('tr[td]'):
tds = tr.xpath('td/text()').extract()
@ -250,7 +294,25 @@ class NIST(Source):
return results
@staticmethod
def get_unit(table):
tr_unit = ''.join(table.xpath('tr[1]/th[1]/node()').extract())
m = re.search(r'\((.*)\)', tr_unit)
unit = '!'
if m:
unit = m.group(1)
return unit
def newresult(self, attribute, value, conditions=''):
"""
This function abstracts from the Result item and provides default
values
:param attribute: the name of the attribute
:param value: the value of the attribute
:param conditions: optional conditions regarding the value
:return: A Result item
"""
return Result(
{
'attribute': attribute,
@ -261,7 +323,12 @@ class NIST(Source):
})
def new_compound_request(self, compound):
"""
This function is called when a new synonym is returned to the spider
to generate new requests
:param compound: the name of the compound to search for
"""
if compound not in self.ignore_list:
self.ignore_list.update(compound)
return Request(url=self.website[:-1] + self.search % compound,
return Request(url=self.website[:-2].replace("\\", "") + self.search % compound,
callback=self.parse)

View File

@ -1,9 +1,11 @@
import re
from scrapy.http import Request
from scrapy import log
from source import Source
from scrapy.selector import Selector
from source import Source
from FourmiCrawler.items import Result
import re
class PubChem(Source):
@ -13,10 +15,10 @@ class PubChem(Source):
including sources of the values of properties.
"""
#PubChem has its data on compound name, properties and their values on different html pages, so different URLs used
website = 'https://*.ncbi.nlm.nih.gov/*'
website_www = 'https://www.ncbi.nlm.nih.gov/*'
website_pubchem = 'https://pubchem.ncbi.nlm.nih.gov/*'
# PubChem has its data on compound name, properties and their values on different html pages, so different URLs used
website = 'http://.*\\.ncbi\\.nlm\\.nih\\.gov/.*'
website_www = 'http://www.ncbi.nlm.nih.gov/*'
website_pubchem = 'http://pubchem.ncbi.nlm.nih.gov/.*'
search = 'pccompound?term=%s'
data_url = 'toc/summary_toc.cgi?tocid=27&cid=%s'
@ -49,14 +51,15 @@ class PubChem(Source):
self._spider.get_synonym_requests(synonym)
log.msg('Raw synonyms found: %s' % raw_synonyms, level=log.DEBUG)
n = re.search(r'cid=(\d+)',response.url)
n = re.search(r'cid=(\d+)', response.url)
if n:
cid = n.group(1)
log.msg('cid: %s' % cid, level=log.DEBUG) #getting the right id of the compound with which it can reach
# the seperate html page which contains the properties and their values
log.msg('cid: %s' % cid, level=log.DEBUG) # getting the right id of the compound with which it can reach
# the seperate html page which contains the properties and their values
#using this cid to get the right url and scrape it
requests.append(Request(url=self.website_pubchem[:-1] + self.data_url % cid, callback=self.parse_data))
# using this cid to get the right url and scrape it
requests.append(
Request(url=self.website_pubchem[:-2].replace("\\", "") + self.data_url % cid, callback=self.parse_data))
return requests
def parse_data(self, response):
@ -72,22 +75,22 @@ class PubChem(Source):
props = sel.xpath('//div')
for prop in props:
prop_name = ''.join(prop.xpath('b/text()').extract()) # name of property that it is parsing
if prop.xpath('a'): # parsing for single value in property
prop_name = ''.join(prop.xpath('b/text()').extract()) # name of property that it is parsing
if prop.xpath('a'): # parsing for single value in property
prop_source = ''.join(prop.xpath('a/@title').extract())
prop_value = ''.join(prop.xpath('a/text()').extract())
new_prop = Result({
'attribute': prop_name,
'value': prop_value,
'source': prop_source,
'reliability': 'Unknown',
'reliability': self.cfg['reliability'],
'conditions': ''
})
log.msg('PubChem prop: |%s| |%s| |%s|' %
(new_prop['attribute'], new_prop['value'],
new_prop['source']), level=log.DEBUG)
requests.append(new_prop)
elif prop.xpath('ul'): # parsing for multiple values (list) in property
elif prop.xpath('ul'): # parsing for multiple values (list) in property
prop_values = prop.xpath('ul//li')
for prop_li in prop_values:
prop_value = ''.join(prop_li.xpath('a/text()').extract())
@ -96,16 +99,51 @@ class PubChem(Source):
'attribute': prop_name,
'value': prop_value,
'source': prop_source,
'reliability': 'Unknown',
'reliability': self.cfg['reliability'],
'conditions': ''
})
log.msg('PubChem prop: |%s| |%s| |%s|' %
(new_prop['attribute'], new_prop['value'],
new_prop['source']), level=log.DEBUG)
(new_prop['attribute'], new_prop['value'],
new_prop['source']), level=log.DEBUG)
requests.append(new_prop)
return requests
def parse_searchrequest(self, response):
"""
This function parses the response to the new_compound_request Request
:param response: the Response object to be parsed
:return: A Request for the compound page or what self.parse returns in
case the search request forwarded to the compound page
"""
# check if pubchem forwarded straight to compound page
m = re.match(self.website_pubchem, response.url)
if m:
log.msg('PubChem search forwarded to compound page',
level=log.DEBUG)
return self.parse(response)
sel = Selector(response)
results = sel.xpath('//div[@class="rsltcont"]')
if results:
url = results[0].xpath('div/p/a[1]/@href')
else:
log.msg('PubChem search found nothing or xpath failed',
level=log.DEBUG)
return None
if url:
url = 'http:' + ''.join(url[0].extract())
log.msg('PubChem compound page: %s' % url, level=log.DEBUG)
else:
log.msg('PubChem search found results, but no url in first result',
level=log.DEBUG)
return None
return Request(url=url, callback=self.parse)
def new_compound_request(self, compound):
return Request(url=self.website_www[:-1] + self.search % compound, callback=self.parse)
return Request(url=self.website_www[:-1] + self.search % compound,
callback=self.parse_searchrequest)

View File

@ -15,7 +15,7 @@ class WikipediaParser(Source):
It also returns requests with other external sources which contain information on parsed subject.
"""
website = "http://en.wikipedia.org/wiki/*"
website = "http://en\\.wikipedia\\.org/wiki/.*"
__spider = None
searched_compounds = []
@ -123,7 +123,7 @@ class WikipediaParser(Source):
return items
def new_compound_request(self, compound):
return Request(url=self.website[:-1] + compound, callback=self.parse)
return Request(url=self.website[:-2].replace("\\", "") + compound, callback=self.parse)
@staticmethod
def clean_items(items):

View File

@ -3,7 +3,7 @@ from scrapy import log
class Source:
website = "http://something/*" # Regex of URI's the source is able to parse
website = "http://something/.*" # Regex of URI's the source is able to parse
_spider = None
def __init__(self, config=None):
@ -30,7 +30,7 @@ class Source:
:param compound: A compound name.
:return: A new Scrapy Request
"""
# return Request(url=self.website[:-1] + compound, callback=self.parse)
# return Request(url=self.website[:-2].replace("\\", "") + compound, callback=self.parse)
pass
def set_spider(self, spider):

View File

@ -34,8 +34,9 @@ class FourmiSpider(Spider):
"""
for source in self._sources:
if re.match(source.website, response.url):
log.msg("Url: " + response.url + " -> Source: " + source.website, level=log.DEBUG)
log.msg("URL: " + response.url + " -> Source: " + source.website, level=log.DEBUG)
return source.parse(response)
log.msg("URL: " + response.url + " -> No compatible source", level=log.INFO)
return None
def get_synonym_requests(self, compound, force=False):

10
GUI.cfg.sample Normal file
View File

@ -0,0 +1,10 @@
[GUI]
# Personalize options in your User Interface
# Commonly used parameters are listed in the GUI for easy selection
CommonParameters = Weight, Polarity, Viscosity, Solubility, Name
# Parameters that are always used in the search
AlwaysParameters = Name
OutputTypes = csv, json, jsonlines, xml

1
GUI/__init__.py Normal file
View File

@ -0,0 +1 @@
import gui

30
GUI/configImporter.py Normal file
View File

@ -0,0 +1,30 @@
import ConfigParser
class ConfigImporter():
def __init__(self, filename):
"""Read the filename into the parser."""
self.filename = filename
self.parser = ConfigParser.ConfigParser()
self.parser.read(self.filename)
def load_common_attributes(self):
"""Loads common attributes from the initialized file."""
try:
return self.parser.get('GUI', 'CommonParameters')
except:
return 'One, Two, Three'
def load_output_types(self):
"""Loads output types from the initialized file."""
try:
return self.parser.get('GUI', 'OutputTypes')
except:
return 'csv'
def load_always_attributes(self):
"""Loads attributes that are always searched for from the initialized file."""
try:
return self.parser.get('GUI', 'AlwaysParameters')
except:
return 'Name, Weight'

196
GUI/gui.py Normal file
View File

@ -0,0 +1,196 @@
from Tkinter import *
import os
import shutil
from tkFileDialog import asksaveasfilename
from configImporter import *
class GUI():
def __init__(self, search, config_file='GUI.cfg', sourceloader=None, in_source=True):
"""Boots the window, configuration."""
if not in_source:
current_dir = os.path.dirname(os.path.abspath(__file__))
config_file = current_dir + '../' + config_file
if not os.path.isfile(config_file):
try:
shutil.copyfile(os.path.dirname(os.path.abspath(__file__)) + "/../GUI.cfg.sample", config_file)
except IOError:
print "GUI configuration couldn't be found and couldn't be created."
sys.exit()
self.configurator = ConfigImporter(config_file)
self.sourceloader = sourceloader
self.finish_with_search = False
self.values = {}
self.required_variables = ['substance']
self.search = search
self.window, self.variables = self.generate_window(self.load_common_attributes(), self.load_output_types())
def load_common_attributes(self):
"""Calls the configuration parser for common attributes."""
return [x.strip() for x in self.configurator.load_common_attributes().split(',')]
def load_output_types(self):
"""Calls the configuration parser for output types."""
return [x.strip() for x in self.configurator.load_output_types().split(',')]
def load_always_attributes(self):
"""Calls the configuration parser for attributes that are always used."""
return ','.join([x.strip() for x in self.configurator.load_always_attributes().split(',')])
def set_output(self):
self.variable_output_name.set(asksaveasfilename())
self.button_output_name.config(text=self.variable_output_name.get())
def generate_window(self, common_attributes, output_types):
"""Creates all widgets and variables in the window."""
window = Tk()
window.wm_title("Fourmi Crawler")
variables = {}
variable_substance = StringVar(window)
frame_substance = Frame(window)
label_substance = Label(frame_substance, text="Substance: ")
input_substance = Entry(frame_substance, font=("Helvetica", 12), width=25, textvariable=variable_substance)
variables.update({"substance": variable_substance})
frame_substance.pack(side=TOP)
label_substance.pack()
input_substance.pack()
input_substance.focus()
frame_all_attributes = Frame(window)
frame_selecting_attributes = Frame(frame_all_attributes)
frame_new_attributes = Frame(frame_selecting_attributes)
label_new_attributes = Label(frame_new_attributes, text="Parameters: ")
input_new_attributes = Text(frame_new_attributes, font=("Helvetica", 8), width=25, height=7, padx=5, pady=5)
variables.update({"new_attributes": input_new_attributes})
frame_new_attributes.pack(side=LEFT)
label_new_attributes.pack()
input_new_attributes.pack()
frame_common_attributes = Frame(frame_selecting_attributes)
label_common_attributes = Label(frame_common_attributes, text="Common Parameters: ")
input_common_attributes = Listbox(frame_common_attributes, selectmode=MULTIPLE, height=7)
scrollbar_common_attributes = Scrollbar(frame_common_attributes)
input_common_attributes.config(yscrollcommand=scrollbar_common_attributes.set)
scrollbar_common_attributes.config(command=input_common_attributes.yview)
if common_attributes and len(common_attributes) > 0:
input_common_attributes.insert(END, *common_attributes)
variables.update({"common_attributes": input_common_attributes})
frame_common_attributes.pack(side=RIGHT)
label_common_attributes.pack(side=TOP)
input_common_attributes.pack(side=LEFT)
scrollbar_common_attributes.pack(side=RIGHT, fill=Y)
frame_selecting_attributes.pack()
frame_last = Frame(window)
search_button = Button(frame_last, text="Start search", command=self.prepare_search)
cancel_button = Button(frame_last, text="Cancel", command=window.destroy)
frame_last.pack(side=BOTTOM)
search_button.pack(side=LEFT)
cancel_button.pack(side=RIGHT)
frame_name = Frame(window)
frame_output_name = Frame(frame_name)
label_output_name = Label(frame_output_name, text='Output file:')
self.variable_output_name = StringVar()
self.variable_output_name.set('results.csv')
variables.update({'output_name':self.variable_output_name})
self.button_output_name = Button(frame_output_name, command=self.set_output, text="Select file")
frame_output_name.pack(side=LEFT)
label_output_name.pack()
self.button_output_name.pack()
frame_name.pack(side=BOTTOM)
frame_checkboxes = Frame(window)
frame_checkbox_attributes = Frame(frame_checkboxes)
variable_all_attributes = BooleanVar()
variable_all_attributes.set(True)
input_all_attributes = Checkbutton(frame_checkbox_attributes, text="Search ALL parameters",
variable=variable_all_attributes)
variables.update({"all_attributes": variable_all_attributes})
frame_checkbox_attributes.pack(side=LEFT)
input_all_attributes.pack()
frame_logging = Frame(frame_checkboxes)
variable_logging = BooleanVar()
variable_logging.set(False)
input_logging = Checkbutton(frame_logging, text="Verbose logging", variable=variable_logging)
variables.update({'logging':variable_logging})
frame_logging.pack(side=RIGHT)
frame_checkboxes.pack(side=BOTTOM)
input_logging.pack()
frame_all_attributes.pack()
return window, variables
def prepare_search(self):
"""Saves the values from the window for later retrieval."""
variables = self.variables
values = {}
values.update({"Always attributes": self.load_always_attributes()})
for name, var in variables.iteritems():
if var.__class__ is StringVar:
values.update({name: var.get()})
elif var.__class__ is BooleanVar:
values.update({name: var.get()})
elif var.__class__ is Text:
values.update({name: str(var.get("1.0", END)).strip()})
elif var.__class__ is Listbox:
values.update({name: ", ".join([var.get(int(i)) for i in var.curselection()])})
else:
print "No known class, {}, {}".format(name, var)
values.update({'output_name':self.variable_output_name.get()})
values.update({'output_type':self.check_output_type(values.get('output_name'))})
self.values = values
if all([values.get(i) != '' for i in self.required_variables]):
self.finish_with_search = True
self.window.destroy()
else:
self.finish_with_search = False
#tkMessageBox.showinfo('Not all required information was entered!')
def execute_search(self):
"""Calls the Fourmi crawler with the values from the GUI"""
if self.values.get('all_attributes'):
attributes = ".*"
else:
attribute_types = ['attributes', 'Common attributes', 'Always attributes']
attributes = ','.join([str(self.values.get(attribute)) for attribute in attribute_types])
output_file = "file://" + str(self.values.get('output_name')) #Dealing with absolute paths
arguments = {'--attributes': attributes,
'--exclude': None,
'--format': self.values.get('output_type'),
'--help': False,
'--include': None,
'--log': 'log.txt',
'--output': output_file,
'-v': 0 if self.values.get('logging') else 3,
'--version': False,
'<compound>': self.values.get('substance'),
'list': False,
'search': True}
self.search(arguments, self.sourceloader)
def run(self):
"""Starts the window and the search."""
self.window.mainloop()
if self.finish_with_search:
self.execute_search()
def check_output_type(self, filename):
parts = str(filename).split('.')
output_types = self.load_output_types()
extension = parts[-1]
for type in output_types:
if extension==type:
return extension
return output_types[0]

View File

@ -48,7 +48,6 @@ __Main goals:__
- Build an graphical user interface(GUI) as alternative for the command line
interface(CLI). (Assignee: Harmen)
- Compiling the source into an windows executable. (Assignee: Bas)
- Create an module to gather data from PubChem. (Assignee: Nout)
__Side goals:__

View File

@ -3,19 +3,19 @@
-----BEGIN PGP SIGNATURE-----
Version: GnuPG v1.4.11 (GNU/Linux)
iQIcBAABAgAGBQJTn3GgAAoJEJrQ9RIUCT6/CI4P/RSAQrd6JugGZoQu/gNdW6eB
MYCybqYGZiieVhUaGOnFNVlp68YpXH+sP/Uc6hXEX30UQEsDmhMeT5NA7ZMS+zJ9
MNHGQdJq22lGb3+VoVBV4RTMdkQXOXvx6p5biskjIEtM3tfTxP529GvAX2TFUNnt
gGWk28EDr30M95XwDxwWo+57Xv8VtSb3VSvXEbrdwGYf8EoQo9oPtzYQ0YcdupcC
ET8bukYVcwpAjoTnPlEy89TiHHohwmimr2ASXeQ64Ks5wfjzcF7NENCAmaAfR+KI
VLLuGqdWMBx1ewVuAXTCZ0Mga/kBoRUaO0PC13UmL8LhhZY9Z3cwD4UnPU35/RQi
IbLfQcZHf/gEvyMeiTYCsyWpm+/xxn1+EfHol4/Q9VSXzZgRBX05Ik6tqeCvjdgG
4PyHBaJTTm/HfMNdg3mr1mbyjTv5UxglEyPv+Y4NdfoVfepkXsXbzvNSyVffZ3Bw
UaFp7KzIC4Jugdpv63FleiAdDY0+iZ5shH86wD1+HJ0/a87kn5Ao1yESby7J7U+f
poZQYeMFeuC0T5hY/3iYoyvZ68oH918ESESiucSulp5BvfwuqGL2+xo5uJIwGYXE
3IDQC7xbA14JHX86IVJlSHAD33iWyiC+5yjw4/bRRVl37KPsLdHiXH3YIRnF5I2I
ZbM/uDYyJdZbBe4UoCoF
=AMhi
iQIcBAABAgAGBQJTpMZAAAoJEJrQ9RIUCT6/Hf8P/AyX9ZD5zj6rBi2CwDOTs5aa
flVqw9syvdqTzVfXQaR4UrCSOuyuOeAkiqub0BMjxyCurqAwN/SCPf3uOJ/tGXmt
ZPtYVHjevJ4mbojLhZiJ2av8LC9VOh3Zl+reR3L2cLuBD4rVSrfUMJtczbbtNlk+
+mczRcTpzNvHQW6mKqyUoKn8xqNnLC7C+p5ybNZ5EADUfoKIF1xyTN6je6fpYZ1U
IHxiUzeOvfX9ohmbfnfkpkuSll1nUJWsTgUPKhthJuxEhwCQ1xMdWhxfcyZJaMT2
Pxgo8C8S6lzAk4PxBRBoePjgWAeaFmbr317WXHvw6SSHPIdzToKZgDiDC5LWvKxb
RRdLZ6w7tg0/FSUexekrUafGT8Je0oIoLUQlNaEQzrPNhDpma1uHFfZg0vb2m4Hq
WHLLKTCr6FMczhP1TmuIEtdjKtymT+rO+Ls4ciw+654R7MtBYcmTr+RqmAd+GadJ
vJNmGDod2oPwCydEps8bYAbksqRhMmk3xwco/g6dWYh5/+1GzCr80J7fYpqtoPFH
V5qKyDQovF5jPlb/buq4mH8XYVT1z4Sx8azKVctMLig57zRnvN0WyskpT09oY7dK
TPvIqwTixekndYLcM3QacVq/NhVOOQPFvD0PwU18eKs4EfD2L7iWd2XjV9Az++aD
jUY6EwEuOzDCexWP4eM8
=h6TK
-----END PGP SIGNATURE-----
```
@ -27,38 +27,45 @@ ZbM/uDYyJdZbBe4UoCoF
#### Expect
```
size exec file contents
./
375 .gitignore d2e475a6a4fa51422cac0a07495914e776858fb9ab9c8937a4d491a3e042d6b1
464 .travis.yml 3063ba078607b8d16bd6467afc15fbbaa4b26c1e30be5ce7cef453cfccbaa95c
428 Changelog.md c7791d1914ddca9ff1549d90468a79787a7feafe94cecd756e3d7cbd4bcbc7df
FourmiCrawler/
0 __init__.py e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
304 items.py b00d49a3d53fa13306c7f8b023adb93ab88423c4fce46600689814f6b02bb806
2178 pipelines.py f9b7b84938060751e15e45de5133dffe50c798bff2a20019206fe7c9d677ad49
914 settings.py 0be2eaf8e83e85ed27754c896421180fc80cb5ce44449aa9f1048e465d1a96f2
sources/
9991 ChemSpider.py 847013e34c5c3683ec66a337837287512b4bab9fbea2ece12e4130ab0dbf264d
9898 NIST.py 97abc84fce85c47b789822715a1945ab84cc052a32340c861141c1af66bab644
4754 PubChem.py 58ed4c92519e385f2768cf8034b006b18f8a21632cb1c5a0849b1a329a8c6ffb
6907 WikipediaParser.py 5d6de911c773129a34b76c40a9b547aafc67644a15f39cd0be6afc7a16fb0f97
0 __init__.py e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
1262 source.py 16c4cdfca849b7dc2bc89d7a6f7ad021f4aa1d04234394312f1d0edf0fd9c5a4
3026 spider.py 1ffba2512988b7a6b535a4a31a4ef688ece4f8c595c3d50355c34ef46b23e44a
1081 LICENSE 36951e5f1910bad3e008ab7228f35ad8933192e52d3c3ae6a5e875765e27192c
3965 README.md d21236d6a175be28ef8e2fee8a256e95b6a513163e3f1071c26c62e9093db7f3
3676 x fourmi.py 2ff89f97fd2a49d08417d9ab6cf08e88944d0c45f54ec84550b530be48676c23
261 scrapy.cfg 624c068fd06303daa65b8e0d0d3ef88ac1f123be2694ef5b4f3f9a9dcd983f85
tests/
1 __init__.py 01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b
2837 test_configurator.py 4a0eb6e7121eb09a63ab5cb797570d1a42080c5346c3b8b365da56eefa599e80
1892 test_pipeline.py 387a336b0f36722a20e712aa033e5771c44f9e92561dd73acffd53d622c52031
1260 test_sourceloader.py b108b4b80adcdb7401273a9823b1f1a19eb5178776186eb5a9976aed8b1ee869
2113 test_spider.py 300f280377b522737be0d8e4a80031ab118a4011bdbb92131e9c400fcdab6299
utils/
0 __init__.py e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
3552 configurator.py e2b7e0ee6c1fef4373785dfe5df8ec6950f31ce6a5d9632b69a66ea3d1eaf921
2537 sourceloader.py f5a5ac2a6aba0658dbe11361f465caabcf3c06c5c8dc9a631874211cc19d2d37
size exec file contents
./
412 .gitignore 25059da2ee328837ece01b979cd5c1083ed1679372f06c14c1c58035d8120614
548 .travis.yml 7f11bc58a8e94276ef949afeb107f9f1e184c0dbb84f821705ea2245902ed546
846 Changelog.md 345f9aea4812b37b1b2714703ea0d5edd27414c0f839ec3e322450ad5ec5c6ed
FourmiCrawler/
0 __init__.py e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
304 items.py b00d49a3d53fa13306c7f8b023adb93ab88423c4fce46600689814f6b02bb806
2178 pipelines.py f9b7b84938060751e15e45de5133dffe50c798bff2a20019206fe7c9d677ad49
677 settings.py f1e7d21b899ffc2523516c0ebe67d967dc62495b90c2fe34651042a3049fcd94
sources/
12103 ChemSpider.py f647d70acf9b3f1ee7bde75586aa45156331f977ca7fe836ceac4477a2c0d4ce
12400 NIST.py cdb4c423355ac8fb1097197a9f8df44f667925a785c6bae7c583820da08908ee
6121 PubChem.py 8f8ad40459090b818a384a202e739fe4696a04154df2b8419aee896b0fa02481
6930 WikipediaParser.py ae9f57bbf2aad9c371abcd143fd2dda5995a196cb700734a5035dd94b1988870
0 __init__.py e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
1281 source.py 7927fda259ff2c8096fa526db1f08586de6e04473a491e19a07b092fdeed81fc
3111 spider.py ec7c946907fea10c17ee6dd88a506f3e3bf2cd748e3eb09200487fcec2ae7ba3
GUI/
11 __init__.py 40567015c415e853210425c1b4f3834dbc2a3165e3713e04dd3424b79bc90aa3
940 configImporter.py 5d731d63a3117b25b7e556a746a1dd5b16e8cbb60e57be46de333c31c8c00271
8776 gui.py 20b2220bc3ca55ebfd6d04e8c0bebbf1ae316c85a54db60b8fc02d22642f19d5
299 GUI.cfg.sample 4ee27f7099d588c21358cd645a21621e631d80712f1b514dad898faa5fee2483
1081 LICENSE 36951e5f1910bad3e008ab7228f35ad8933192e52d3c3ae6a5e875765e27192c
3900 README.md f4a1e3ea1700d2b415acfad661cb45f960fe8e8ffbe98dbecb6c7ed071a101ac
3846 x fourmi.py f0b11f5f153f96f6af2e504cdf369e43c04316752de131a659eb6246fd80212a
261 scrapy.cfg 624c068fd06303daa65b8e0d0d3ef88ac1f123be2694ef5b4f3f9a9dcd983f85
416 sources.cfg.sample 11cd0fc18693da17883c98d25a384ae1b6158adfef13778b6dd02b878f6b8a70
tests/
107 __init__.py ce90e54e58a0912cadbe3adcf5166dc72477bf9ce289bf427f8e2f5b25406670
2870 test_configurator.py 318d542b1cda5075a2a9a6be97e9e7a79372ee58e1ab3014c161534094f7364d
1315 test_gui.py 0fb95d0b542765bf52bcebb037bf2ed1299209beab23448af741a93c9fbb1ca8
1892 test_pipeline.py 387a336b0f36722a20e712aa033e5771c44f9e92561dd73acffd53d622c52031
1260 test_sourceloader.py b108b4b80adcdb7401273a9823b1f1a19eb5178776186eb5a9976aed8b1ee869
2113 test_spider.py 300f280377b522737be0d8e4a80031ab118a4011bdbb92131e9c400fcdab6299
utils/
40 __init__.py f1237ae74693e2ec1b3154e57aec27438a80a735e5ccf2411aecd194ef443b6a
4047 configurator.py 8b566a0435a9f105a8ec616b16c3e21edb9b82f8debe1ef9f1df6bbbf20949d5
2537 sourceloader.py f5a5ac2a6aba0658dbe11361f465caabcf3c06c5c8dc9a631874211cc19d2d37
```
#### Ignore

View File

@ -1,8 +1,9 @@
#!/usr/bin/env python
"""
Fourmi, a web scraper build to search specific information for a given compound (and it's pseudonyms).
Fourmi, a web scraper build to search specific information for a given compound (and its pseudonyms).
Usage:
fourmi
fourmi search <compound>
fourmi [options] search <compound>
fourmi [options] [-v | -vv | -vvv] [--include=<sourcename> | --exclude=<sourcename>] search <compound>
@ -17,7 +18,7 @@ Options:
--version Show version.
-v Verbose logging output. (Multiple occurrences increase logging level)
--log=<file> Save log to an file.
-o <file> --output=<file> Output file [default: results.*format*]
-o <file> --output=<file> Output file [default: <compound>.*format*]
-f <format> --format=<format> Output formats (supported: csv, json, jsonlines, xml) [default: csv]
--include=<regex> Include only sources that match these regular expressions split by a comma.
--exclude=<regex> Exclude the sources that match these regular expressions split by a comma.
@ -31,6 +32,7 @@ import docopt
from FourmiCrawler.spider import FourmiSpider
from utils.configurator import Configurator
from utils.sourceloader import SourceLoader
from GUI import gui
def setup_crawler(compound, settings, source_loader, attributes):
@ -58,18 +60,18 @@ def search(docopt_arguments, source_loader):
"""
conf = Configurator()
conf.set_logging(docopt_arguments["--log"], docopt_arguments["-v"])
conf.set_output(docopt_arguments["--output"], docopt_arguments["--format"])
conf.set_output(docopt_arguments["--output"], docopt_arguments["--format"], docopt_arguments["<compound>"])
setup_crawler(docopt_arguments["<compound>"], conf.scrapy_settings,
source_loader, docopt_arguments["--attributes"].split(','))
if conf.scrapy_settings.getbool("LOG_ENABLED"):
log.start(conf.scrapy_settings.get("LOG_FILE"),
conf.scrapy_settings.get("LOG_LEVEL"), conf.scrapy_settings.get("LOG_STDOUT"))
conf.scrapy_settings.get("LOG_LEVEL"), conf.scrapy_settings.get("LOG_STDOUT"))
reactor.run()
# The start for the Fourmi Command Line interface.
if __name__ == '__main__':
arguments = docopt.docopt(__doc__, version='Fourmi - V0.5.3')
arguments = docopt.docopt(__doc__, version='Fourmi - V0.6.0')
loader = SourceLoader()
if arguments["--include"]:
@ -82,3 +84,6 @@ if __name__ == '__main__':
elif arguments["list"]:
print "-== Available Sources ==-"
print str(loader)
else:
gui_window = gui.GUI(search, sourceloader=SourceLoader())
gui_window.run()

19
sources.cfg.sample Normal file
View File

@ -0,0 +1,19 @@
[DEFAULT]
reliability = Unknown
#For each source listed in FourmiCrawler/sources there should be a section
#named exactly as the filename in here. If not present, the DEFAULT value is
#used for reliability of that source.
[ChemSpider]
reliability = High
#token=Paste ChemSpider API token here and remove the hashtag
[NIST]
reliability = High
[WikipediaParser]
reliability = Medium
[PubChem]
reliability = High

View File

@ -1 +1,6 @@
import test_configurator
import test_gui
import test_pipeline
import test_sourceloader
import test_spider

View File

@ -10,16 +10,16 @@ class TestConfigurator(unittest.TestCase):
self.conf = Configurator()
def test_set_output(self):
self.conf.set_output(filename="test.txt", fileformat="csv")
self.conf.set_output(filename="test.txt", fileformat="csv", compound="test")
self.assertEqual(self.conf.scrapy_settings["FEED_URI"], "test.txt")
self.assertEqual(self.conf.scrapy_settings["FEED_FORMAT"], "csv")
self.conf.set_output("results.*format*", "jsonlines")
self.assertEqual(self.conf.scrapy_settings["FEED_URI"], "results.json")
self.conf.set_output("<compound>.*format*", "jsonlines", "test")
self.assertEqual(self.conf.scrapy_settings["FEED_URI"], "test.json")
self.assertEqual(self.conf.scrapy_settings["FEED_FORMAT"], "jsonlines")
self.conf.set_output("results.*format*", "csv")
self.assertEqual(self.conf.scrapy_settings["FEED_URI"], "results.csv")
self.conf.set_output("<compound>.*format*", "csv", "test")
self.assertEqual(self.conf.scrapy_settings["FEED_URI"], "test.csv")
self.assertEqual(self.conf.scrapy_settings["FEED_FORMAT"], "csv")
def test_start_log(self):

32
tests/test_gui.py Normal file
View File

@ -0,0 +1,32 @@
import unittest
from GUI import gui
class TestGUI(unittest.TestCase):
def setUp(self):
pass
def test_empty_attributes(self):
self.test_gui = gui.GUI(None, config_file="../GUI.cfg.sample", in_source=True)
self.test_gui.window.after(9, self.test_gui.prepare_search)
self.test_gui.window.after(11, self.test_gui.window.destroy)
self.test_gui.run()
output_type = self.test_gui.configurator.load_output_types().split(',')[0]
self.assertEqual(self.test_gui.values.get('substance'), '')
self.assertEqual(self.test_gui.values.get('output_type'), output_type)
self.assertEqual(self.test_gui.values.get('output_name'), 'results.csv')
def test_no_configurations(self):
self.test_gui = gui.GUI(None, config_file="../GUI.cfg.sample")
self.test_gui.configurator = gui.ConfigImporter('')
self.test_gui.finish_with_search = True
self.test_gui.window.after(9, self.test_gui.prepare_search)
self.test_gui.window.after(11, self.test_gui.window.destroy)
self.test_gui.run()
self.assertEqual(self.test_gui.values.get('substance'), '')
self.assertEqual(self.test_gui.values.get('output_type'), 'csv')
self.assertEqual(self.test_gui.values.get('output_name'), 'results.csv')

View File

@ -0,0 +1,2 @@
import configurator
import sourceloader

View File

@ -1,4 +1,6 @@
import ConfigParser
import os
import shutil
from scrapy.utils.project import get_project_settings
@ -12,7 +14,7 @@ class Configurator:
def __init__(self):
self.scrapy_settings = get_project_settings()
def set_output(self, filename, fileformat):
def set_output(self, filename, fileformat, compound):
"""
This function manipulates the Scrapy output file settings that normally would be set in the settings file.
In the Fourmi project these are command line arguments.
@ -20,12 +22,12 @@ class Configurator:
:param fileformat: The format in which the output will be.
"""
if filename != 'results.*format*':
if filename != '<compound>.*format*':
self.scrapy_settings.overrides["FEED_URI"] = filename
elif fileformat == "jsonlines":
self.scrapy_settings.overrides["FEED_URI"] = "results.json"
self.scrapy_settings.overrides["FEED_URI"] = compound + ".json"
elif fileformat is not None:
self.scrapy_settings.overrides["FEED_URI"] = "results." + fileformat
self.scrapy_settings.overrides["FEED_URI"] = compound + "." + fileformat
if fileformat is not None:
self.scrapy_settings.overrides["FEED_FORMAT"] = fileformat
@ -66,8 +68,16 @@ class Configurator:
variables for sources
:return a ConfigParser object of sources.cfg
"""
current_dir = os.path.dirname(os.path.abspath(__file__))
config_path = current_dir + '/../sources.cfg'
# [TODO]: location of sources.cfg should be softcoded eventually
if not os.path.isfile(config_path):
try:
shutil.copyfile(os.path.dirname(os.path.abspath(__file__)) + "/../sources.cfg.sample", config_path)
except IOError:
print "WARNING: Source configuration couldn't be found and couldn't be created."
config = ConfigParser.ConfigParser()
config.read('sources.cfg') # [TODO]: should be softcoded eventually
config.read(config_path)
return config
@staticmethod