Merge branch 'release/v0.6.0'

2014-06-21 01:40:27 +02:00 · 2014-06-21 01:40:27 +02:00 · 50e6835116
commit 50e6835116
parent eab61ceaba 3fe217d411
23 changed files with 647 additions and 151 deletions
--- a/.gitignore
+++ b/.gitignore
@ -6,6 +6,8 @@
 #may contain authentication information
 sources.cfg
 #Another of our config files
 GUI.cfg
 #THINGS WE WOULD NEVER EVER WANT!
 #ignore thumbnails created by windows
--- a/.travis.yml
+++ b/.travis.yml
@ -3,6 +3,10 @@
 language: python
 python: 2.7
 before_install:
  - "export DISPLAY=:99.0"
  - "sh -e /etc/init.d/xvfb start"
 # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors
 install:
  - pip install Scrapy docopt
@ -10,10 +14,10 @@ install:
 # command to run tests, e.g. python setup.py test
 script:
-  - nosetests --with-coverage --cover-package=FourmiCrawler,utils tests
+  - nosetests --with-coverage --cover-package=FourmiCrawler,utils,GUI tests
 notifications:
  slack: descartes2:6sgCzx3PvrO9IIMwKxj12dDM
 after_success:
-  coveralls --verbose
+  coveralls --verbose
--- a/Changelog.md
+++ b/Changelog.md
@ -1,3 +1,11 @@
 ### v0.6.0
 - Feature: Added a Graphical User interface
 - Feature: Automatic config file createion from config samples
 - FIX: The default name of the output files will now consist of the compound name and the file format when using the CLI
 - FIX: A lot of bugfixes of the PubChem plugin, as is wasn't working as it should
 - FIX: Using absolute path for configuration files
 - DEV: General Code cleanup in documentation
 ### v0.5.3
 - FIX: It is now again possible to use both verbose and the source inclusion/exclusion options
 - FIX: Logging is now "actually" disabled if not using the verbose option.
--- a/FourmiCrawler/settings.py
+++ b/FourmiCrawler/settings.py
@ -21,7 +21,4 @@ FEED_FORMAT = 'jsonlines'
 # Crawl responsibly by identifying yourself (and your website) on the
 # user-agent
-# [todo] - Check for repercussions on spoofing the user agent
+USER_AGENT = 'Fourmi'
 # USER_AGENT = 'FourmiCrawler (+http://www.yourdomain.com)'
 USER_AGENT = 'Mozilla/5.0 (Macintosh; Intel Mac OS X 10_8_4) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/34.0.1847.137 Safari/537.36'
--- a/FourmiCrawler/sources/ChemSpider.py
+++ b/FourmiCrawler/sources/ChemSpider.py
@ -9,24 +9,28 @@ from FourmiCrawler.items import Result
 # [TODO] - Maybe clean up usage of '.extract()[0]', because of possible IndexError exception.
 # [TODO] - Add checks at search request and extendedCompoundInfo on whether the token was valid or not
 class ChemSpider(Source):
-    """ChemSpider scraper for synonyms and properties
+    """
-
+    ChemSpider scraper for synonyms and properties
    This parser will manage searching for chemicals through the
    ChemsSpider API, and parsing the resulting ChemSpider page.
    The token required for the API should be in a configuration file
    somewhere.
    """
-    website = 'http://www.chemspider.com/*'
+    website = 'http://www\\.chemspider\\.com/.*'
    search = 'Search.asmx/SimpleSearch?query=%s&token='
    structure = 'Chemical-Structure.%s.html'
    extendedinfo = 'MassSpecAPI.asmx/GetExtendedCompoundInfo?csid=%s&token='
    def __init__(self, config=None):
        """
        Initialization of ChemSpider scraper
        :param config: a dictionary of settings for this scraper, must contain 
        'reliability' key
        """
        Source.__init__(self, config)
        self.ignore_list = []
        if 'token' not in self.cfg or self.cfg['token'] == '':
@ -37,6 +41,12 @@ class ChemSpider(Source):
        self.extendedinfo += self.cfg['token']
    def parse(self, response):
        """
        This function is called when a Response matching the variable 
        'website' is available for parsing the Response object.
        :param response: the Scrapy Response object to be parsed
        :return: a list of Result items and Request objects
        """
        sel = Selector(response)
        requests = []
        requests_synonyms = self.parse_synonyms(sel)
@ -47,10 +57,26 @@ class ChemSpider(Source):
        return requests
    def parse_properties(self, sel):
-        """scrape Experimental Data and Predicted ACD/Labs tabs"""
+        """
        This function scrapes the Experimental Data and Predicted ACD/Labs tabs
        :param sel: a Selector object of the whole page
        :return: a list of Result items
        """
        properties = []
        properties.extend(self.parse_acdlabstab(sel))
        properties.extend(self.parse_experimentaldatatab(sel))
        return properties
    def parse_acdlabstab(self, sel):
        """
        This function scrapes the 'Predicted ACD/Labs tab' under Properties
        :param sel: a Selector object of the whole page
        :return: a list of Request objects
        """
        properties = []
        # Predicted - ACD/Labs tab
        td_list = sel.xpath('.//table[@id="acdlabs-table"]//td').xpath(
            'normalize-space(string())')
        prop_names = td_list[::2]
@ -62,16 +88,15 @@ class ChemSpider(Source):
            prop_conditions = ''
            # Test for properties without values, with one hardcoded exception
-            if not re.match(r'^\d', prop_value) or (prop_name == 'Polarizability' and prop_value == '10-24cm3'):
+            if (not re.match(r'^\d', prop_value) or
                    (prop_name == 'Polarizability' and prop_value == '10-24cm3')):
                continue
            # Match for condition in parentheses
            m = re.match(r'(.*) \((.*)\)', prop_name)
            if m:
                prop_name = m.group(1)
                prop_conditions = m.group(2)
            # Match for condition in value seperated by an 'at'
            m = re.match(r'(.*) at (.*)', prop_value)
            if m:
                prop_value = m.group(1)
@ -84,11 +109,18 @@ class ChemSpider(Source):
                conditions=prop_conditions
            )
            properties.append(new_prop)
            log.msg('CS prop: |%s| |%s| |%s|' %
                    (new_prop['attribute'], new_prop['value'], new_prop['source']),
                    level=log.DEBUG)
-        # Experimental Data Tab, Physico-chemical properties in particular
+        return properties
    def parse_experimentaldatatab(self, sel):
        """
        This function scrapes Experimental Data tab, Physico-chemical 
        properties in particular.
        :param sel: a Selector object of the whole page
        :return: a list of Result items
        """
        properties = []
        scraped_list = sel.xpath('.//li[span="Experimental Physico-chemical '
                                 'Properties"]//li/table/tr/td')
        if not scraped_list:
@ -105,15 +137,16 @@ class ChemSpider(Source):
                    value=line.xpath('text()').extract()[0].rstrip(),
                    source=line.xpath('strong/text()').extract()[0].rstrip(),
                )
-                properties.append(new_prop)
+        properties.append(new_prop)
                log.msg('CS prop: |%s| |%s| |%s|' %
                        (new_prop['attribute'], new_prop['value'],
                         new_prop['source']), level=log.DEBUG)
        return properties
    def parse_synonyms(self, sel):
-        """Scrape list of Names and Identifiers"""
+        """
        This function scrapes the list of Names and Identifiers
        :param sel: a Selector object of the whole page
        :return: a list of Requests
        """
        requests = []
        synonyms = []
@ -145,7 +178,13 @@ class ChemSpider(Source):
        return requests
    def new_synonym(self, sel, name, category):
-        """Scrape for a single synonym at a given HTML tag"""
+        """
        This function scrapes for a single synonym at a given HTML tag
        :param sel: a Selector object of the given HTML tag
        :param name: the name of the synonym in the tag
        :param category: the name of the category the synonym is labeled as
        :return: a dictionary containing data on the synonym
        """
        self.ignore_list.append(name)
        language = sel.xpath('span[@class="synonym_language"]/text()')
        if language:
@ -181,7 +220,12 @@ class ChemSpider(Source):
        return synonym
    def parse_extendedinfo(self, response):
-        """Scrape data from the ChemSpider GetExtendedCompoundInfo API"""
+        """
        This function scrapes data from the ChemSpider GetExtendedCompoundInfo 
        API, if a token is present in the configuration settings
        :param response: a Response object to be parsed
        :return: a list of Result items
        """
        sel = Selector(response)
        properties = []
        names = sel.xpath('*').xpath('name()').extract()
@ -197,17 +241,31 @@ class ChemSpider(Source):
        return properties
    def newresult(self, attribute, value, conditions='', source='ChemSpider'):
-        return Result(
+        """
-            {
+        This function abstracts from the Result item and provides default 
-                'attribute': attribute,
+        values.
-                'value': value,
+        :param attribute: the name of the attribute
-                'source': source,
+        :param value: the value of the attribute
-                'reliability': self.cfg['reliability'],
+        :param conditions: optional conditions regarding the value
-                'conditions': conditions
+        :param source: the name of the source if it is not ChemSpider
-            })
+        :return: A Result item
        """
        return Result({
            'attribute': attribute,
            'value': value,
            'source': source,
            'reliability': self.cfg['reliability'],
            'conditions': conditions
        })
    def parse_searchrequest(self, response):
-        """Parse the initial response of the ChemSpider Search API """
+        """
        This function parses the initial response of the ChemSpider Search API
        Requires a valid token to function.
        :param response: the Response object to be parsed
        :return: A Request for the information page and a Request for the 
        extendedinfo API call
        """
        sel = Selector(response)
        log.msg('chemspider parse_searchrequest', level=log.DEBUG)
        sel.register_namespace('cs', 'http://www.chemspider.com/')
@ -219,8 +277,8 @@ class ChemSpider(Source):
            log.msg('ChemSpider found multiple substances, taking first '
                    'element', level=log.DEBUG)
        csid = csids[0]
-        structure_url = self.website[:-1] + self.structure % csid
+        structure_url = self.website[:-2].replace("\\", "") + self.structure % csid
-        extendedinfo_url = self.website[:-1] + self.extendedinfo % csid
+        extendedinfo_url = self.website[:-2].replace("\\", "") + self.extendedinfo % csid
        log.msg('chemspider URL: %s' % structure_url, level=log.DEBUG)
        return [Request(url=structure_url,
                        callback=self.parse),
@ -228,8 +286,13 @@ class ChemSpider(Source):
                        callback=self.parse_extendedinfo)]
    def new_compound_request(self, compound):
        """
        This function is called when a new synonym is returned to the spider 
        to generate new requests
        :param compound: the name of the compound to search for
        """
        if compound in self.ignore_list or self.cfg['token'] == '':
            return None
-        searchurl = self.website[:-1] + self.search % compound
+        searchurl = self.website[:-2].replace("\\", "") + self.search % compound
        log.msg('chemspider compound', level=log.DEBUG)
        return Request(url=searchurl, callback=self.parse_searchrequest)
--- a/FourmiCrawler/sources/NIST.py
+++ b/FourmiCrawler/sources/NIST.py
@ -13,20 +13,31 @@ from FourmiCrawler.items import Result
 # Result item, but should be included eventually.
 class NIST(Source):
-    """NIST Scraper plugin
+    """
-
+    NIST Scraper plugin
    This plugin manages searching for a chemical on the NIST website
    and parsing the resulting page if the chemical exists on NIST.
    """
-    website = "http://webbook.nist.gov/*"
+    website = "http://webbook\\.nist\\.gov/.*"
    search = 'cgi/cbook.cgi?Name=%s&Units=SI&cTP=on'
    def __init__(self, config=None):
        """
        Initialization of NIST scraper
        :param config: configuration variables for this scraper, must contain 
        'reliability' key.
        """
        Source.__init__(self, config)
        self.ignore_list = set()
    def parse(self, response):
        """
        This function is called when a Response matching the variable 
        'website' is available for parsing the Response object.
        :param response: The Scrapy Response object to be parsed
        :return: a list of Result items and Request objects
        """
        sel = Selector(response)
        title = sel.xpath('head/title/text()').extract()[0]
@ -51,6 +62,21 @@ class NIST(Source):
            log.msg('NIST symbol: |%s|, name: |%s|' % (symbol, name),
                    level=log.DEBUG)
        requests.extend(self.parse_tables(sel, symbol_table))
        return requests
    def parse_tables(self, sel, symbol_table):
        """
        This function identifies and distributes parsing of tables to other 
        functions below.
        :param sel: A Selector object of the whole page
        :param symbol_table: a dictionary containing translations of raw HTML 
        tags to human readable names
        :return: a list of Result items and Requests
        """
        requests = []
        for table in sel.xpath('//table[@class="data"]'):
            summary = table.xpath('@summary').extract()[0]
            if summary == 'One dimensional data':
@ -81,8 +107,12 @@ class NIST(Source):
        return requests
    def parse_generic_info(self, sel):
-        """Parses: synonyms, chemical formula, molecular weight, InChI,
+        """
-        InChiKey, CAS number
+        This function parses: synonyms, chemical formula, molecular weight, 
        InChI, InChiKey, CAS number
        :param sel: A Selector object of the entire page in the original 
        response
        :return: a list of Result items
        """
        ul = sel.xpath('body/ul[li/strong="IUPAC Standard InChI:"]')
@ -121,15 +151,20 @@ class NIST(Source):
        return requests
    def parse_aggregate_data(self, table, symbol_table):
-        """Parses the table(s) which contain possible links to individual
+        """
-        data points
+        This function parses the table(s) which contain possible links to 
        individual data points
        :param table: a Selector object of the table to be parsed
        :param symbol_table: a dictionary containing translations of raw HTML 
        tags to human readable names
        :return: a list of Result items and Request objects
        """
        results = []
        for tr in table.xpath('tr[td]'):
            extra_data_url = tr.xpath('td[last()][a="Individual data points"]'
                                      '/a/@href').extract()
            if extra_data_url:
-                request = Request(url=self.website[:-1] + extra_data_url[0],
+                request = Request(url=self.website[:-2].replace("\\", "") + extra_data_url[0],
                                  callback=self.parse_individual_datapoints)
                results.append(request)
                continue
@ -155,14 +190,16 @@ class NIST(Source):
        return results
    def parse_transition_data(self, table, summary):
-        """Parses the table containing properties regarding phase changes"""
+        """
        This function parses the table containing properties regarding phase 
        changes
        :param table: a Selector object of the table to be parsed
        :param summary: the name of the property
        :return: a list of Result items
        """
        results = []
-        tr_unit = ''.join(table.xpath('tr[1]/th[1]/node()').extract())
+        unit = self.get_unit(table)
        m = re.search(r'\((.*)\)', tr_unit)
        unit = '!'
        if m:
            unit = m.group(1)
        for tr in table.xpath('tr[td]'):
            tds = tr.xpath('td/text()').extract()
@ -176,18 +213,18 @@ class NIST(Source):
        return results
    def parse_generic_data(self, table, summary):
-        """Parses the common tables of 4 and 5 rows. Assumes they are of the
+        """
        Parses the common tables of 4 and 5 rows. Assumes they are of the
        form:
        Symbol (unit)|Temperature (K)|Method|Reference|Comment
        Symbol (unit)|Temperature (K)|Reference|Comment
        :param table: a Selector object of the table to be parsed
        :param summary: the name of the property
        :return: a list of Result items
        """
        results = []
-        tr_unit = ''.join(table.xpath('tr[1]/th[1]/node()').extract())
+        unit = self.get_unit(table)
        m = re.search(r'\((.*)\)', tr_unit)
        unit = '!'
        if m:
            unit = m.group(1)
        for tr in table.xpath('tr[td]'):
            tds = tr.xpath('td/text()').extract()
@ -200,7 +237,13 @@ class NIST(Source):
        return results
    def parse_antoine_data(self, table, summary):
-        """Parse table containing parameters for the Antione equation"""
+        """
        This function parses the table containing parameters for the Antione 
        equation
        :param table: a Selector object of the table to be parsed
        :param summary: the name of the property
        :return: a list of Result items
        """
        results = []
        for tr in table.xpath('tr[td]'):
@ -215,7 +258,12 @@ class NIST(Source):
        return results
    def parse_individual_datapoints(self, response):
-        """Parses the page linked from aggregate data"""
+        """
        This function parses the 'individual data points' page linked from 
        the aggregate data table(s)
        :param response: the Scrapy Response object to be parsed
        :return: a list of Result items
        """
        sel = Selector(response)
        table = sel.xpath('//table[@class="data"]')[0]
@ -228,11 +276,7 @@ class NIST(Source):
            name = m.group(1)
            condition = m.group(2)
-        tr_unit = ''.join(table.xpath('tr[1]/th[1]/node()').extract())
+        unit = self.get_unit(table)
        m = re.search(r'\((.*)\)', tr_unit)
        unit = '!'
        if m:
            unit = m.group(1)
        for tr in table.xpath('tr[td]'):
            tds = tr.xpath('td/text()').extract()
@ -250,7 +294,25 @@ class NIST(Source):
        return results
    @staticmethod
    def get_unit(table):
        tr_unit = ''.join(table.xpath('tr[1]/th[1]/node()').extract())
        m = re.search(r'\((.*)\)', tr_unit)
        unit = '!'
        if m:
            unit = m.group(1)
        return unit
    def newresult(self, attribute, value, conditions=''):
        """
        This function abstracts from the Result item and provides default 
        values
        :param attribute: the name of the attribute
        :param value: the value of the attribute
        :param conditions: optional conditions regarding the value
        :return: A Result item
        """
        return Result(
            {
                'attribute': attribute,
@ -261,7 +323,12 @@ class NIST(Source):
            })
    def new_compound_request(self, compound):
        """
        This function is called when a new synonym is returned to the spider 
        to generate new requests
        :param compound: the name of the compound to search for
        """
        if compound not in self.ignore_list:
            self.ignore_list.update(compound)
-            return Request(url=self.website[:-1] + self.search % compound,
+            return Request(url=self.website[:-2].replace("\\", "") + self.search % compound,
                           callback=self.parse)
--- a/FourmiCrawler/sources/PubChem.py
+++ b/FourmiCrawler/sources/PubChem.py
@ -1,9 +1,11 @@
 import re
 from scrapy.http import Request
 from scrapy import log
 from source import Source
 from scrapy.selector import Selector
 from source import Source
 from FourmiCrawler.items import Result
 import re
 class PubChem(Source):
@ -13,10 +15,10 @@ class PubChem(Source):
        including sources of the values of properties.
    """
-    #PubChem has its data on compound name, properties and their values on different html pages, so different URLs used
+    # PubChem has its data on compound name, properties and their values on different html pages, so different URLs used
-    website = 'https://*.ncbi.nlm.nih.gov/*'
+    website = 'http://.*\\.ncbi\\.nlm\\.nih\\.gov/.*'
-    website_www = 'https://www.ncbi.nlm.nih.gov/*'
+    website_www = 'http://www.ncbi.nlm.nih.gov/*'
-    website_pubchem = 'https://pubchem.ncbi.nlm.nih.gov/*'
+    website_pubchem = 'http://pubchem.ncbi.nlm.nih.gov/.*'
    search = 'pccompound?term=%s'
    data_url = 'toc/summary_toc.cgi?tocid=27&cid=%s'
@ -49,14 +51,15 @@ class PubChem(Source):
            self._spider.get_synonym_requests(synonym)
        log.msg('Raw synonyms found: %s' % raw_synonyms, level=log.DEBUG)
-        n = re.search(r'cid=(\d+)',response.url)
+        n = re.search(r'cid=(\d+)', response.url)
        if n:
            cid = n.group(1)
-        log.msg('cid: %s' % cid, level=log.DEBUG)   #getting the right id of the compound with which it can reach
+        log.msg('cid: %s' % cid, level=log.DEBUG)  # getting the right id of the compound with which it can reach
-                                                # the seperate html page which contains the properties and their values
+        # the seperate html page which contains the properties and their values
-        #using this cid to get the right url and scrape it
+        # using this cid to get the right url and scrape it
-        requests.append(Request(url=self.website_pubchem[:-1] + self.data_url % cid, callback=self.parse_data))
+        requests.append(
            Request(url=self.website_pubchem[:-2].replace("\\", "") + self.data_url % cid, callback=self.parse_data))
        return requests
    def parse_data(self, response):
@ -72,22 +75,22 @@ class PubChem(Source):
        props = sel.xpath('//div')
        for prop in props:
-            prop_name = ''.join(prop.xpath('b/text()').extract()) # name of property that it is parsing
+            prop_name = ''.join(prop.xpath('b/text()').extract())  # name of property that it is parsing
-            if prop.xpath('a'):     # parsing for single value in property
+            if prop.xpath('a'):  # parsing for single value in property
                prop_source = ''.join(prop.xpath('a/@title').extract())
                prop_value = ''.join(prop.xpath('a/text()').extract())
                new_prop = Result({
                    'attribute': prop_name,
                    'value': prop_value,
                    'source': prop_source,
-                    'reliability': 'Unknown',
+                    'reliability': self.cfg['reliability'],
                    'conditions': ''
                })
                log.msg('PubChem prop: |%s| |%s| |%s|' %
                        (new_prop['attribute'], new_prop['value'],
                         new_prop['source']), level=log.DEBUG)
                requests.append(new_prop)
-            elif prop.xpath('ul'):    # parsing for multiple values (list) in property
+            elif prop.xpath('ul'):  # parsing for multiple values (list) in property
                prop_values = prop.xpath('ul//li')
                for prop_li in prop_values:
                    prop_value = ''.join(prop_li.xpath('a/text()').extract())
@ -96,16 +99,51 @@ class PubChem(Source):
                        'attribute': prop_name,
                        'value': prop_value,
                        'source': prop_source,
-                        'reliability': 'Unknown',
+                        'reliability': self.cfg['reliability'],
                        'conditions': ''
                    })
                    log.msg('PubChem prop: |%s| |%s| |%s|' %
-                        (new_prop['attribute'], new_prop['value'],
+                            (new_prop['attribute'], new_prop['value'],
-                         new_prop['source']), level=log.DEBUG)
+                             new_prop['source']), level=log.DEBUG)
                    requests.append(new_prop)
        return requests
    def parse_searchrequest(self, response):
        """
        This function parses the response to the new_compound_request Request
        :param response: the Response object to be parsed
        :return: A Request for the compound page or what self.parse returns in
                 case the search request forwarded to the compound page
        """
        # check if pubchem forwarded straight to compound page
        m = re.match(self.website_pubchem, response.url)
        if m:
            log.msg('PubChem search forwarded to compound page',
                    level=log.DEBUG)
            return self.parse(response)
        sel = Selector(response)
        results = sel.xpath('//div[@class="rsltcont"]')
        if results:
            url = results[0].xpath('div/p/a[1]/@href')
        else:
            log.msg('PubChem search found nothing or xpath failed',
                    level=log.DEBUG)
            return None
        if url:
            url = 'http:' + ''.join(url[0].extract())
            log.msg('PubChem compound page: %s' % url, level=log.DEBUG)
        else:
            log.msg('PubChem search found results, but no url in first result',
                    level=log.DEBUG)
            return None
        return Request(url=url, callback=self.parse)
    def new_compound_request(self, compound):
-        return Request(url=self.website_www[:-1] + self.search % compound, callback=self.parse)
+        return Request(url=self.website_www[:-1] + self.search % compound,
                       callback=self.parse_searchrequest)
--- a/FourmiCrawler/sources/WikipediaParser.py
+++ b/FourmiCrawler/sources/WikipediaParser.py
@ -15,7 +15,7 @@ class WikipediaParser(Source):
    It also returns requests with other external sources which contain information on parsed subject.
    """
-    website = "http://en.wikipedia.org/wiki/*"
+    website = "http://en\\.wikipedia\\.org/wiki/.*"
    __spider = None
    searched_compounds = []
@ -123,7 +123,7 @@ class WikipediaParser(Source):
        return items
    def new_compound_request(self, compound):
-        return Request(url=self.website[:-1] + compound, callback=self.parse)
+        return Request(url=self.website[:-2].replace("\\", "") + compound, callback=self.parse)
    @staticmethod
    def clean_items(items):
--- a/FourmiCrawler/sources/source.py
+++ b/FourmiCrawler/sources/source.py
@ -3,7 +3,7 @@ from scrapy import log
 class Source:
-    website = "http://something/*"  # Regex of URI's the source is able to parse
+    website = "http://something/.*"  # Regex of URI's the source is able to parse
    _spider = None
    def __init__(self, config=None):
@ -30,7 +30,7 @@ class Source:
        :param compound: A compound name.
        :return: A new Scrapy Request
        """
-        # return Request(url=self.website[:-1] + compound, callback=self.parse)
+        # return Request(url=self.website[:-2].replace("\\", "") + compound, callback=self.parse)
        pass
    def set_spider(self, spider):
--- a/FourmiCrawler/spider.py
+++ b/FourmiCrawler/spider.py
@ -34,8 +34,9 @@ class FourmiSpider(Spider):
        """
        for source in self._sources:
            if re.match(source.website, response.url):
-                log.msg("Url: " + response.url + " -> Source: " + source.website, level=log.DEBUG)
+                log.msg("URL: " + response.url + " -> Source: " + source.website, level=log.DEBUG)
                return source.parse(response)
        log.msg("URL: " + response.url + " -> No compatible source", level=log.INFO)
        return None
    def get_synonym_requests(self, compound, force=False):
--- a/GUI.cfg.sample
+++ b/GUI.cfg.sample
@ -0,0 +1,10 @@
 [GUI]
 # Personalize options in your User Interface
 # Commonly used parameters are listed in the GUI for easy selection
 CommonParameters = Weight, Polarity, Viscosity, Solubility, Name
 # Parameters that are always used in the search
 AlwaysParameters = Name
 OutputTypes = csv, json, jsonlines, xml
--- a/GUI/init.py
+++ b/GUI/init.py
@ -0,0 +1 @@
 import gui
--- a/GUI/configImporter.py
+++ b/GUI/configImporter.py
@ -0,0 +1,30 @@
 import ConfigParser
 class ConfigImporter():
    def __init__(self, filename):
        """Read the filename into the parser."""
        self.filename = filename
        self.parser = ConfigParser.ConfigParser()
        self.parser.read(self.filename)
    def load_common_attributes(self):
        """Loads common attributes from the initialized file."""
        try:
            return self.parser.get('GUI', 'CommonParameters')
        except:
            return 'One, Two, Three'
    def load_output_types(self):
        """Loads output types from the initialized file."""
        try:
            return self.parser.get('GUI', 'OutputTypes')
        except:
            return 'csv'
    def load_always_attributes(self):
        """Loads attributes that are always searched for from the initialized file."""
        try:
            return self.parser.get('GUI', 'AlwaysParameters')
        except:
            return 'Name, Weight'
--- a/GUI/gui.py
+++ b/GUI/gui.py
@ -0,0 +1,196 @@
 from Tkinter import *
 import os
 import shutil
 from tkFileDialog import asksaveasfilename
 from configImporter import *
 class GUI():
    def __init__(self, search, config_file='GUI.cfg', sourceloader=None, in_source=True):
        """Boots the window, configuration."""
        if not in_source:
            current_dir = os.path.dirname(os.path.abspath(__file__))
            config_file = current_dir + '../' + config_file
        if not os.path.isfile(config_file):
            try:
                shutil.copyfile(os.path.dirname(os.path.abspath(__file__)) + "/../GUI.cfg.sample", config_file)
            except IOError:
                print "GUI configuration couldn't be found and couldn't be created."
                sys.exit()
        self.configurator = ConfigImporter(config_file)
        self.sourceloader = sourceloader
        self.finish_with_search = False
        self.values = {}
        self.required_variables = ['substance']
        self.search = search
        self.window, self.variables = self.generate_window(self.load_common_attributes(), self.load_output_types())
    def load_common_attributes(self):
        """Calls the configuration parser for common attributes."""
        return [x.strip() for x in self.configurator.load_common_attributes().split(',')]
    def load_output_types(self):
        """Calls the configuration parser for output types."""
        return [x.strip() for x in self.configurator.load_output_types().split(',')]
    def load_always_attributes(self):
        """Calls the configuration parser for attributes that are always used."""
        return ','.join([x.strip() for x in self.configurator.load_always_attributes().split(',')])
    def set_output(self):
        self.variable_output_name.set(asksaveasfilename())
        self.button_output_name.config(text=self.variable_output_name.get())
    def generate_window(self, common_attributes, output_types):
        """Creates all widgets and variables in the window."""
        window = Tk()
        window.wm_title("Fourmi Crawler")
        variables = {}
        variable_substance = StringVar(window)
        frame_substance = Frame(window)
        label_substance = Label(frame_substance, text="Substance: ")
        input_substance = Entry(frame_substance, font=("Helvetica", 12), width=25, textvariable=variable_substance)
        variables.update({"substance": variable_substance})
        frame_substance.pack(side=TOP)
        label_substance.pack()
        input_substance.pack()
        input_substance.focus()
        frame_all_attributes = Frame(window)
        frame_selecting_attributes = Frame(frame_all_attributes)
        frame_new_attributes = Frame(frame_selecting_attributes)
        label_new_attributes = Label(frame_new_attributes, text="Parameters: ")
        input_new_attributes = Text(frame_new_attributes, font=("Helvetica", 8), width=25, height=7, padx=5, pady=5)
        variables.update({"new_attributes": input_new_attributes})
        frame_new_attributes.pack(side=LEFT)
        label_new_attributes.pack()
        input_new_attributes.pack()
        frame_common_attributes = Frame(frame_selecting_attributes)
        label_common_attributes = Label(frame_common_attributes, text="Common Parameters: ")
        input_common_attributes = Listbox(frame_common_attributes, selectmode=MULTIPLE, height=7)
        scrollbar_common_attributes = Scrollbar(frame_common_attributes)
        input_common_attributes.config(yscrollcommand=scrollbar_common_attributes.set)
        scrollbar_common_attributes.config(command=input_common_attributes.yview)
        if common_attributes and len(common_attributes) > 0:
            input_common_attributes.insert(END, *common_attributes)
        variables.update({"common_attributes": input_common_attributes})
        frame_common_attributes.pack(side=RIGHT)
        label_common_attributes.pack(side=TOP)
        input_common_attributes.pack(side=LEFT)
        scrollbar_common_attributes.pack(side=RIGHT, fill=Y)
        frame_selecting_attributes.pack()
        frame_last = Frame(window)
        search_button = Button(frame_last, text="Start search", command=self.prepare_search)
        cancel_button = Button(frame_last, text="Cancel", command=window.destroy)
        frame_last.pack(side=BOTTOM)
        search_button.pack(side=LEFT)
        cancel_button.pack(side=RIGHT)
        frame_name = Frame(window)
        frame_output_name = Frame(frame_name)
        label_output_name = Label(frame_output_name, text='Output file:')
        self.variable_output_name = StringVar()
        self.variable_output_name.set('results.csv')
        variables.update({'output_name':self.variable_output_name})
        self.button_output_name = Button(frame_output_name, command=self.set_output, text="Select file")
        frame_output_name.pack(side=LEFT)
        label_output_name.pack()
        self.button_output_name.pack()
        frame_name.pack(side=BOTTOM)
        frame_checkboxes = Frame(window)
        frame_checkbox_attributes = Frame(frame_checkboxes)
        variable_all_attributes = BooleanVar()
        variable_all_attributes.set(True)
        input_all_attributes = Checkbutton(frame_checkbox_attributes, text="Search ALL parameters",
                                           variable=variable_all_attributes)
        variables.update({"all_attributes": variable_all_attributes})
        frame_checkbox_attributes.pack(side=LEFT)
        input_all_attributes.pack()
        frame_logging = Frame(frame_checkboxes)
        variable_logging = BooleanVar()
        variable_logging.set(False)
        input_logging = Checkbutton(frame_logging, text="Verbose logging", variable=variable_logging)
        variables.update({'logging':variable_logging})
        frame_logging.pack(side=RIGHT)
        frame_checkboxes.pack(side=BOTTOM)
        input_logging.pack()
        frame_all_attributes.pack()
        return window, variables
    def prepare_search(self):
        """Saves the values from the window for later retrieval."""
        variables = self.variables
        values = {}
        values.update({"Always attributes": self.load_always_attributes()})
        for name, var in variables.iteritems():
            if var.__class__ is StringVar:
                values.update({name: var.get()})
            elif var.__class__ is BooleanVar:
                values.update({name: var.get()})
            elif var.__class__ is Text:
                values.update({name: str(var.get("1.0", END)).strip()})
            elif var.__class__ is Listbox:
                values.update({name: ", ".join([var.get(int(i)) for i in var.curselection()])})
            else:
                print "No known class, {}, {}".format(name, var)
        values.update({'output_name':self.variable_output_name.get()})
        values.update({'output_type':self.check_output_type(values.get('output_name'))})
        self.values = values
        if all([values.get(i) != '' for i in self.required_variables]):
            self.finish_with_search = True
            self.window.destroy()
        else:
            self.finish_with_search = False
            #tkMessageBox.showinfo('Not all required information was entered!')
    def execute_search(self):
        """Calls the Fourmi crawler with the values from the GUI"""
        if self.values.get('all_attributes'):
            attributes = ".*"
        else:
            attribute_types = ['attributes', 'Common attributes', 'Always attributes']
            attributes = ','.join([str(self.values.get(attribute)) for attribute in attribute_types])
        output_file = "file://" + str(self.values.get('output_name')) #Dealing with absolute paths
        arguments = {'--attributes': attributes,
                     '--exclude': None,
                     '--format': self.values.get('output_type'),
                     '--help': False,
                     '--include': None,
                     '--log': 'log.txt',
                     '--output': output_file,
                     '-v': 0 if self.values.get('logging') else 3,
                     '--version': False,
                     '<compound>': self.values.get('substance'),
                     'list': False,
                     'search': True}
        self.search(arguments, self.sourceloader)
    def run(self):
        """Starts the window and the search."""
        self.window.mainloop()
        if self.finish_with_search:
            self.execute_search()
    def check_output_type(self, filename):
        parts = str(filename).split('.')
        output_types = self.load_output_types()
        extension = parts[-1]
        for type in output_types:
            if extension==type:
                return extension
        return output_types[0]
--- a/README.md
+++ b/README.md
@ -48,7 +48,6 @@ __Main goals:__
 - Build an graphical user interface(GUI) as alternative for the command line
 interface(CLI). (Assignee: Harmen)
 - Compiling the source into an windows executable. (Assignee: Bas)
 - Create an module to gather data from PubChem. (Assignee: Nout)
 __Side goals:__
--- a/SIGNED.md
+++ b/SIGNED.md
@ -3,19 +3,19 @@
 -----BEGIN PGP SIGNATURE-----
 Version: GnuPG v1.4.11 (GNU/Linux)
-iQIcBAABAgAGBQJTn3GgAAoJEJrQ9RIUCT6/CI4P/RSAQrd6JugGZoQu/gNdW6eB
+iQIcBAABAgAGBQJTpMZAAAoJEJrQ9RIUCT6/Hf8P/AyX9ZD5zj6rBi2CwDOTs5aa
-MYCybqYGZiieVhUaGOnFNVlp68YpXH+sP/Uc6hXEX30UQEsDmhMeT5NA7ZMS+zJ9
+flVqw9syvdqTzVfXQaR4UrCSOuyuOeAkiqub0BMjxyCurqAwN/SCPf3uOJ/tGXmt
-MNHGQdJq22lGb3+VoVBV4RTMdkQXOXvx6p5biskjIEtM3tfTxP529GvAX2TFUNnt
+ZPtYVHjevJ4mbojLhZiJ2av8LC9VOh3Zl+reR3L2cLuBD4rVSrfUMJtczbbtNlk+
-gGWk28EDr30M95XwDxwWo+57Xv8VtSb3VSvXEbrdwGYf8EoQo9oPtzYQ0YcdupcC
+mczRcTpzNvHQW6mKqyUoKn8xqNnLC7C+p5ybNZ5EADUfoKIF1xyTN6je6fpYZ1U
-ET8bukYVcwpAjoTnPlEy89TiHHohwmimr2ASXeQ64Ks5wfjzcF7NENCAmaAfR+KI
+IHxiUzeOvfX9ohmbfnfkpkuSll1nUJWsTgUPKhthJuxEhwCQ1xMdWhxfcyZJaMT2
-VLLuGqdWMBx1ewVuAXTCZ0Mga/kBoRUaO0PC13UmL8LhhZY9Z3cwD4UnPU35/RQi
+Pxgo8C8S6lzAk4PxBRBoePjgWAeaFmbr317WXHvw6SSHPIdzToKZgDiDC5LWvKxb
-IbLfQcZHf/gEvyMeiTYCsyWpm+/xxn1+EfHol4/Q9VSXzZgRBX05Ik6tqeCvjdgG
+RRdLZ6w7tg0/FSUexekrUafGT8Je0oIoLUQlNaEQzrPNhDpma1uHFfZg0vb2m4Hq
-4PyHBaJTTm/HfMNdg3mr1mbyjTv5UxglEyPv+Y4NdfoVfepkXsXbzvNSyVffZ3Bw
+WHLLKTCr6FMczhP1TmuIEtdjKtymT+rO+Ls4ciw+654R7MtBYcmTr+RqmAd+GadJ
-UaFp7KzIC4Jugdpv63FleiAdDY0+iZ5shH86wD1+HJ0/a87kn5Ao1yESby7J7U+f
+vJNmGDod2oPwCydEps8bYAbksqRhMmk3xwco/g6dWYh5/+1GzCr80J7fYpqtoPFH
-poZQYeMFeuC0T5hY/3iYoyvZ68oH918ESESiucSulp5BvfwuqGL2+xo5uJIwGYXE
+V5qKyDQovF5jPlb/buq4mH8XYVT1z4Sx8azKVctMLig57zRnvN0WyskpT09oY7dK
-3IDQC7xbA14JHX86IVJlSHAD33iWyiC+5yjw4/bRRVl37KPsLdHiXH3YIRnF5I2I
+TPvIqwTixekndYLcM3QacVq/NhVOOQPFvD0PwU18eKs4EfD2L7iWd2XjV9Az++aD
-ZbM/uDYyJdZbBe4UoCoF
+jUY6EwEuOzDCexWP4eM8
-=AMhi
+=h6TK
 -----END PGP SIGNATURE-----
 ```
@ -27,38 +27,45 @@ ZbM/uDYyJdZbBe4UoCoF
 #### Expect
 ```
-size  exec  file                      contents                                                        
+size   exec  file                      contents                                                        
-            ./                                                                                        
+             ./                                                                                        
-375           .gitignore              d2e475a6a4fa51422cac0a07495914e776858fb9ab9c8937a4d491a3e042d6b1
+412            .gitignore              25059da2ee328837ece01b979cd5c1083ed1679372f06c14c1c58035d8120614
-464           .travis.yml             3063ba078607b8d16bd6467afc15fbbaa4b26c1e30be5ce7cef453cfccbaa95c
+548            .travis.yml             7f11bc58a8e94276ef949afeb107f9f1e184c0dbb84f821705ea2245902ed546
-428           Changelog.md            c7791d1914ddca9ff1549d90468a79787a7feafe94cecd756e3d7cbd4bcbc7df
+846            Changelog.md            345f9aea4812b37b1b2714703ea0d5edd27414c0f839ec3e322450ad5ec5c6ed
-              FourmiCrawler/                                                                          
+               FourmiCrawler/                                                                          
-0               __init__.py           e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
+0                __init__.py           e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
-304             items.py              b00d49a3d53fa13306c7f8b023adb93ab88423c4fce46600689814f6b02bb806
+304              items.py              b00d49a3d53fa13306c7f8b023adb93ab88423c4fce46600689814f6b02bb806
-2178            pipelines.py          f9b7b84938060751e15e45de5133dffe50c798bff2a20019206fe7c9d677ad49
+2178             pipelines.py          f9b7b84938060751e15e45de5133dffe50c798bff2a20019206fe7c9d677ad49
-914             settings.py           0be2eaf8e83e85ed27754c896421180fc80cb5ce44449aa9f1048e465d1a96f2
+677              settings.py           f1e7d21b899ffc2523516c0ebe67d967dc62495b90c2fe34651042a3049fcd94
-                sources/                                                                              
+                 sources/                                                                              
-9991              ChemSpider.py       847013e34c5c3683ec66a337837287512b4bab9fbea2ece12e4130ab0dbf264d
+12103              ChemSpider.py       f647d70acf9b3f1ee7bde75586aa45156331f977ca7fe836ceac4477a2c0d4ce
-9898              NIST.py             97abc84fce85c47b789822715a1945ab84cc052a32340c861141c1af66bab644
+12400              NIST.py             cdb4c423355ac8fb1097197a9f8df44f667925a785c6bae7c583820da08908ee
-4754              PubChem.py          58ed4c92519e385f2768cf8034b006b18f8a21632cb1c5a0849b1a329a8c6ffb
+6121               PubChem.py          8f8ad40459090b818a384a202e739fe4696a04154df2b8419aee896b0fa02481
-6907              WikipediaParser.py  5d6de911c773129a34b76c40a9b547aafc67644a15f39cd0be6afc7a16fb0f97
+6930               WikipediaParser.py  ae9f57bbf2aad9c371abcd143fd2dda5995a196cb700734a5035dd94b1988870
-0                 __init__.py         e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
+0                  __init__.py         e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
-1262              source.py           16c4cdfca849b7dc2bc89d7a6f7ad021f4aa1d04234394312f1d0edf0fd9c5a4
+1281               source.py           7927fda259ff2c8096fa526db1f08586de6e04473a491e19a07b092fdeed81fc
-3026            spider.py             1ffba2512988b7a6b535a4a31a4ef688ece4f8c595c3d50355c34ef46b23e44a
+3111             spider.py             ec7c946907fea10c17ee6dd88a506f3e3bf2cd748e3eb09200487fcec2ae7ba3
-1081          LICENSE                 36951e5f1910bad3e008ab7228f35ad8933192e52d3c3ae6a5e875765e27192c
+               GUI/                                                                                    
-3965          README.md               d21236d6a175be28ef8e2fee8a256e95b6a513163e3f1071c26c62e9093db7f3
+11               __init__.py           40567015c415e853210425c1b4f3834dbc2a3165e3713e04dd3424b79bc90aa3
-3676  x       fourmi.py               2ff89f97fd2a49d08417d9ab6cf08e88944d0c45f54ec84550b530be48676c23
+940              configImporter.py     5d731d63a3117b25b7e556a746a1dd5b16e8cbb60e57be46de333c31c8c00271
-261           scrapy.cfg              624c068fd06303daa65b8e0d0d3ef88ac1f123be2694ef5b4f3f9a9dcd983f85
+8776             gui.py                20b2220bc3ca55ebfd6d04e8c0bebbf1ae316c85a54db60b8fc02d22642f19d5
-              tests/                                                                                  
+299            GUI.cfg.sample          4ee27f7099d588c21358cd645a21621e631d80712f1b514dad898faa5fee2483
-1               __init__.py           01ba4719c80b6fe911b091a7c05124b64eeece964e09c058ef8f9805daca546b
+1081           LICENSE                 36951e5f1910bad3e008ab7228f35ad8933192e52d3c3ae6a5e875765e27192c
-2837            test_configurator.py  4a0eb6e7121eb09a63ab5cb797570d1a42080c5346c3b8b365da56eefa599e80
+3900           README.md               f4a1e3ea1700d2b415acfad661cb45f960fe8e8ffbe98dbecb6c7ed071a101ac
-1892            test_pipeline.py      387a336b0f36722a20e712aa033e5771c44f9e92561dd73acffd53d622c52031
+3846   x       fourmi.py               f0b11f5f153f96f6af2e504cdf369e43c04316752de131a659eb6246fd80212a
-1260            test_sourceloader.py  b108b4b80adcdb7401273a9823b1f1a19eb5178776186eb5a9976aed8b1ee869
+261            scrapy.cfg              624c068fd06303daa65b8e0d0d3ef88ac1f123be2694ef5b4f3f9a9dcd983f85
-2113            test_spider.py        300f280377b522737be0d8e4a80031ab118a4011bdbb92131e9c400fcdab6299
+416            sources.cfg.sample      11cd0fc18693da17883c98d25a384ae1b6158adfef13778b6dd02b878f6b8a70
-              utils/                                                                                  
+               tests/                                                                                  
-0               __init__.py           e3b0c44298fc1c149afbf4c8996fb92427ae41e4649b934ca495991b7852b855
+107              __init__.py           ce90e54e58a0912cadbe3adcf5166dc72477bf9ce289bf427f8e2f5b25406670
-3552            configurator.py       e2b7e0ee6c1fef4373785dfe5df8ec6950f31ce6a5d9632b69a66ea3d1eaf921
+2870             test_configurator.py  318d542b1cda5075a2a9a6be97e9e7a79372ee58e1ab3014c161534094f7364d
-2537            sourceloader.py       f5a5ac2a6aba0658dbe11361f465caabcf3c06c5c8dc9a631874211cc19d2d37
+1315             test_gui.py           0fb95d0b542765bf52bcebb037bf2ed1299209beab23448af741a93c9fbb1ca8
 1892             test_pipeline.py      387a336b0f36722a20e712aa033e5771c44f9e92561dd73acffd53d622c52031
 1260             test_sourceloader.py  b108b4b80adcdb7401273a9823b1f1a19eb5178776186eb5a9976aed8b1ee869
 2113             test_spider.py        300f280377b522737be0d8e4a80031ab118a4011bdbb92131e9c400fcdab6299
               utils/                                                                                  
 40               __init__.py           f1237ae74693e2ec1b3154e57aec27438a80a735e5ccf2411aecd194ef443b6a
 4047             configurator.py       8b566a0435a9f105a8ec616b16c3e21edb9b82f8debe1ef9f1df6bbbf20949d5
 2537             sourceloader.py       f5a5ac2a6aba0658dbe11361f465caabcf3c06c5c8dc9a631874211cc19d2d37
 ```
 #### Ignore
--- a/fourmi.py
+++ b/fourmi.py
@ -1,8 +1,9 @@
 #!/usr/bin/env python
 """
-Fourmi, a web scraper build to search specific information for a given compound (and it's pseudonyms).
+Fourmi, a web scraper build to search specific information for a given compound (and its pseudonyms).
 Usage:
    fourmi
    fourmi search <compound>
    fourmi [options] search <compound>
    fourmi [options] [-v | -vv | -vvv] [--include=<sourcename> | --exclude=<sourcename>] search <compound>
@ -17,7 +18,7 @@ Options:
    --version                       Show version.
    -v                              Verbose logging output. (Multiple occurrences increase logging level)
    --log=<file>                    Save log to an file.
-    -o <file> --output=<file>       Output file [default: results.*format*]
+    -o <file> --output=<file>       Output file [default: <compound>.*format*]
    -f <format> --format=<format>   Output formats (supported: csv, json, jsonlines, xml) [default: csv]
    --include=<regex>               Include only sources that match these regular expressions split by a comma.
    --exclude=<regex>               Exclude the sources that match these regular expressions split by a comma.
@ -31,6 +32,7 @@ import docopt
 from FourmiCrawler.spider import FourmiSpider
 from utils.configurator import Configurator
 from utils.sourceloader import SourceLoader
 from GUI import gui
 def setup_crawler(compound, settings, source_loader, attributes):
@ -58,18 +60,18 @@ def search(docopt_arguments, source_loader):
    """
    conf = Configurator()
    conf.set_logging(docopt_arguments["--log"], docopt_arguments["-v"])
-    conf.set_output(docopt_arguments["--output"], docopt_arguments["--format"])
+    conf.set_output(docopt_arguments["--output"], docopt_arguments["--format"], docopt_arguments["<compound>"])
    setup_crawler(docopt_arguments["<compound>"], conf.scrapy_settings,
                  source_loader, docopt_arguments["--attributes"].split(','))
    if conf.scrapy_settings.getbool("LOG_ENABLED"):
        log.start(conf.scrapy_settings.get("LOG_FILE"),
-              conf.scrapy_settings.get("LOG_LEVEL"), conf.scrapy_settings.get("LOG_STDOUT"))
+                  conf.scrapy_settings.get("LOG_LEVEL"), conf.scrapy_settings.get("LOG_STDOUT"))
    reactor.run()
 # The start for the Fourmi Command Line interface.
 if __name__ == '__main__':
-    arguments = docopt.docopt(__doc__, version='Fourmi - V0.5.3')
+    arguments = docopt.docopt(__doc__, version='Fourmi - V0.6.0')
    loader = SourceLoader()
    if arguments["--include"]:
@ -82,3 +84,6 @@ if __name__ == '__main__':
    elif arguments["list"]:
        print "-== Available Sources ==-"
        print str(loader)
    else:
        gui_window = gui.GUI(search, sourceloader=SourceLoader())
        gui_window.run()
--- a/sources.cfg.sample
+++ b/sources.cfg.sample
@ -0,0 +1,19 @@
 [DEFAULT]
 reliability = Unknown
 #For each source listed in FourmiCrawler/sources there should be a section
 #named exactly as the filename in here. If not present, the DEFAULT value is 
 #used for reliability of that source.
 [ChemSpider]
 reliability = High
 #token=Paste ChemSpider API token here and remove the hashtag
 [NIST]
 reliability = High
 [WikipediaParser]
 reliability = Medium
 [PubChem]
 reliability = High
--- a/tests/init.py
+++ b/tests/init.py
@ -1 +1,6 @@
 import test_configurator
 import test_gui
 import test_pipeline
 import test_sourceloader
 import test_spider
--- a/tests/test_configurator.py
+++ b/tests/test_configurator.py
@ -10,16 +10,16 @@ class TestConfigurator(unittest.TestCase):
        self.conf = Configurator()
    def test_set_output(self):
-        self.conf.set_output(filename="test.txt", fileformat="csv")
+        self.conf.set_output(filename="test.txt", fileformat="csv", compound="test")
        self.assertEqual(self.conf.scrapy_settings["FEED_URI"], "test.txt")
        self.assertEqual(self.conf.scrapy_settings["FEED_FORMAT"], "csv")
-        self.conf.set_output("results.*format*", "jsonlines")
+        self.conf.set_output("<compound>.*format*", "jsonlines", "test")
-        self.assertEqual(self.conf.scrapy_settings["FEED_URI"], "results.json")
+        self.assertEqual(self.conf.scrapy_settings["FEED_URI"], "test.json")
        self.assertEqual(self.conf.scrapy_settings["FEED_FORMAT"], "jsonlines")
-        self.conf.set_output("results.*format*", "csv")
+        self.conf.set_output("<compound>.*format*", "csv", "test")
-        self.assertEqual(self.conf.scrapy_settings["FEED_URI"], "results.csv")
+        self.assertEqual(self.conf.scrapy_settings["FEED_URI"], "test.csv")
        self.assertEqual(self.conf.scrapy_settings["FEED_FORMAT"], "csv")
    def test_start_log(self):
--- a/tests/test_gui.py
+++ b/tests/test_gui.py
@ -0,0 +1,32 @@
 import unittest
 from GUI import gui
 class TestGUI(unittest.TestCase):
    def setUp(self):
        pass
    def test_empty_attributes(self):
        self.test_gui = gui.GUI(None, config_file="../GUI.cfg.sample", in_source=True)
        self.test_gui.window.after(9, self.test_gui.prepare_search)
        self.test_gui.window.after(11, self.test_gui.window.destroy)
        self.test_gui.run()
        output_type = self.test_gui.configurator.load_output_types().split(',')[0]
        self.assertEqual(self.test_gui.values.get('substance'), '')
        self.assertEqual(self.test_gui.values.get('output_type'), output_type)
        self.assertEqual(self.test_gui.values.get('output_name'), 'results.csv')
    def test_no_configurations(self):
        self.test_gui = gui.GUI(None, config_file="../GUI.cfg.sample")
        self.test_gui.configurator = gui.ConfigImporter('')
        self.test_gui.finish_with_search = True
        self.test_gui.window.after(9, self.test_gui.prepare_search)
        self.test_gui.window.after(11, self.test_gui.window.destroy)
        self.test_gui.run()
        self.assertEqual(self.test_gui.values.get('substance'), '')
        self.assertEqual(self.test_gui.values.get('output_type'), 'csv')
        self.assertEqual(self.test_gui.values.get('output_name'), 'results.csv')
--- a/utils/init.py
+++ b/utils/init.py
@ -0,0 +1,2 @@
 import configurator
 import sourceloader
--- a/utils/configurator.py
+++ b/utils/configurator.py
@ -1,4 +1,6 @@
 import ConfigParser
 import os
 import shutil
 from scrapy.utils.project import get_project_settings
@ -12,7 +14,7 @@ class Configurator:
    def __init__(self):
        self.scrapy_settings = get_project_settings()
-    def set_output(self, filename, fileformat):
+    def set_output(self, filename, fileformat, compound):
        """
        This function manipulates the Scrapy output file settings that normally would be set in the settings file.
        In the Fourmi project these are command line arguments.
@ -20,12 +22,12 @@ class Configurator:
        :param fileformat: The format in which the output will be.
        """
-        if filename != 'results.*format*':
+        if filename != '<compound>.*format*':
            self.scrapy_settings.overrides["FEED_URI"] = filename
        elif fileformat == "jsonlines":
-            self.scrapy_settings.overrides["FEED_URI"] = "results.json"
+            self.scrapy_settings.overrides["FEED_URI"] = compound + ".json"
        elif fileformat is not None:
-            self.scrapy_settings.overrides["FEED_URI"] = "results." + fileformat
+            self.scrapy_settings.overrides["FEED_URI"] = compound + "." + fileformat
        if fileformat is not None:
            self.scrapy_settings.overrides["FEED_FORMAT"] = fileformat
@ -66,8 +68,16 @@ class Configurator:
        variables for sources
        :return a ConfigParser object of sources.cfg
        """
        current_dir = os.path.dirname(os.path.abspath(__file__))
        config_path = current_dir + '/../sources.cfg'
        # [TODO]: location of sources.cfg should be softcoded eventually
        if not os.path.isfile(config_path):
            try:
                shutil.copyfile(os.path.dirname(os.path.abspath(__file__)) + "/../sources.cfg.sample", config_path)
            except IOError:
                print "WARNING: Source configuration couldn't be found and couldn't be created."
        config = ConfigParser.ConfigParser()
-        config.read('sources.cfg')  # [TODO]: should be softcoded eventually
+        config.read(config_path)
        return config
    @staticmethod