diff --git a/.gitignore b/.gitignore index 14c4e72..e568415 100644 --- a/.gitignore +++ b/.gitignore @@ -6,6 +6,8 @@ #may contain authentication information sources.cfg +#Another of our config files +GUI.cfg #THINGS WE WOULD NEVER EVER WANT! #ignore thumbnails created by windows diff --git a/.travis.yml b/.travis.yml index 24c5dc5..d01fbbd 100644 --- a/.travis.yml +++ b/.travis.yml @@ -3,6 +3,10 @@ language: python python: 2.7 +before_install: + - "export DISPLAY=:99.0" + - "sh -e /etc/init.d/xvfb start" + # command to install dependencies, e.g. pip install -r requirements.txt --use-mirrors install: - pip install Scrapy docopt @@ -10,10 +14,10 @@ install: # command to run tests, e.g. python setup.py test script: - - nosetests --with-coverage --cover-package=FourmiCrawler,utils tests + - nosetests --with-coverage --cover-package=FourmiCrawler,utils,GUI tests notifications: slack: descartes2:6sgCzx3PvrO9IIMwKxj12dDM after_success: - coveralls --verbose \ No newline at end of file + coveralls --verbose diff --git a/FourmiCrawler/sources/PubChem.py b/FourmiCrawler/sources/PubChem.py index 186aff1..18633a0 100644 --- a/FourmiCrawler/sources/PubChem.py +++ b/FourmiCrawler/sources/PubChem.py @@ -62,8 +62,7 @@ class PubChem(Source): Request(url=self.website_pubchem[:-2].replace("\\", "") + self.data_url % cid, callback=self.parse_data)) return requests - @staticmethod - def parse_data(response): + def parse_data(self, response): """ Parse data found in 'Chemical and Physical properties' part of a substance page. :param response: The response with the page to parse diff --git a/GUI.cfg.sample b/GUI.cfg.sample new file mode 100644 index 0000000..ac68982 --- /dev/null +++ b/GUI.cfg.sample @@ -0,0 +1,10 @@ +[GUI] +# Personalize options in your User Interface + +# Commonly used parameters are listed in the GUI for easy selection +CommonParameters = Weight, Polarity, Viscosity, Solubility, Name + +# Parameters that are always used in the search +AlwaysParameters = Name + +OutputTypes = csv, json, jsonlines, xml diff --git a/GUI/__init__.py b/GUI/__init__.py new file mode 100644 index 0000000..fb01b27 --- /dev/null +++ b/GUI/__init__.py @@ -0,0 +1 @@ +import gui diff --git a/GUI/configImporter.py b/GUI/configImporter.py new file mode 100644 index 0000000..e241a3e --- /dev/null +++ b/GUI/configImporter.py @@ -0,0 +1,30 @@ +import ConfigParser + + +class ConfigImporter(): + def __init__(self, filename): + """Read the filename into the parser.""" + self.filename = filename + self.parser = ConfigParser.ConfigParser() + self.parser.read(self.filename) + + def load_common_attributes(self): + """Loads common attributes from the initialized file.""" + try: + return self.parser.get('GUI', 'CommonParameters') + except: + return 'One, Two, Three' + + def load_output_types(self): + """Loads output types from the initialized file.""" + try: + return self.parser.get('GUI', 'OutputTypes') + except: + return 'csv' + + def load_always_attributes(self): + """Loads attributes that are always searched for from the initialized file.""" + try: + return self.parser.get('GUI', 'AlwaysParameters') + except: + return 'Name, Weight' diff --git a/GUI/gui.py b/GUI/gui.py new file mode 100644 index 0000000..cd02e52 --- /dev/null +++ b/GUI/gui.py @@ -0,0 +1,196 @@ +from Tkinter import * +import os +import shutil +from tkFileDialog import asksaveasfilename + +from configImporter import * + + +class GUI(): + def __init__(self, search, config_file='GUI.cfg', sourceloader=None, in_source=True): + """Boots the window, configuration.""" + if not in_source: + current_dir = os.path.dirname(os.path.abspath(__file__)) + config_file = current_dir + '../' + config_file + if not os.path.isfile(config_file): + try: + shutil.copyfile(os.path.dirname(os.path.abspath(__file__)) + "/../GUI.cfg.sample", config_file) + except IOError: + print "GUI configuration couldn't be found and couldn't be created." + sys.exit() + self.configurator = ConfigImporter(config_file) + self.sourceloader = sourceloader + self.finish_with_search = False + self.values = {} + self.required_variables = ['substance'] + self.search = search + self.window, self.variables = self.generate_window(self.load_common_attributes(), self.load_output_types()) + + def load_common_attributes(self): + """Calls the configuration parser for common attributes.""" + return [x.strip() for x in self.configurator.load_common_attributes().split(',')] + + def load_output_types(self): + """Calls the configuration parser for output types.""" + return [x.strip() for x in self.configurator.load_output_types().split(',')] + + def load_always_attributes(self): + """Calls the configuration parser for attributes that are always used.""" + return ','.join([x.strip() for x in self.configurator.load_always_attributes().split(',')]) + + def set_output(self): + self.variable_output_name.set(asksaveasfilename()) + self.button_output_name.config(text=self.variable_output_name.get()) + + def generate_window(self, common_attributes, output_types): + """Creates all widgets and variables in the window.""" + window = Tk() + window.wm_title("Fourmi Crawler") + + variables = {} + + variable_substance = StringVar(window) + frame_substance = Frame(window) + label_substance = Label(frame_substance, text="Substance: ") + input_substance = Entry(frame_substance, font=("Helvetica", 12), width=25, textvariable=variable_substance) + variables.update({"substance": variable_substance}) + frame_substance.pack(side=TOP) + label_substance.pack() + input_substance.pack() + input_substance.focus() + + frame_all_attributes = Frame(window) + frame_selecting_attributes = Frame(frame_all_attributes) + frame_new_attributes = Frame(frame_selecting_attributes) + label_new_attributes = Label(frame_new_attributes, text="Parameters: ") + input_new_attributes = Text(frame_new_attributes, font=("Helvetica", 8), width=25, height=7, padx=5, pady=5) + variables.update({"new_attributes": input_new_attributes}) + frame_new_attributes.pack(side=LEFT) + label_new_attributes.pack() + input_new_attributes.pack() + + frame_common_attributes = Frame(frame_selecting_attributes) + label_common_attributes = Label(frame_common_attributes, text="Common Parameters: ") + input_common_attributes = Listbox(frame_common_attributes, selectmode=MULTIPLE, height=7) + scrollbar_common_attributes = Scrollbar(frame_common_attributes) + input_common_attributes.config(yscrollcommand=scrollbar_common_attributes.set) + scrollbar_common_attributes.config(command=input_common_attributes.yview) + if common_attributes and len(common_attributes) > 0: + input_common_attributes.insert(END, *common_attributes) + variables.update({"common_attributes": input_common_attributes}) + frame_common_attributes.pack(side=RIGHT) + label_common_attributes.pack(side=TOP) + input_common_attributes.pack(side=LEFT) + scrollbar_common_attributes.pack(side=RIGHT, fill=Y) + frame_selecting_attributes.pack() + + frame_last = Frame(window) + search_button = Button(frame_last, text="Start search", command=self.prepare_search) + cancel_button = Button(frame_last, text="Cancel", command=window.destroy) + frame_last.pack(side=BOTTOM) + search_button.pack(side=LEFT) + cancel_button.pack(side=RIGHT) + + frame_name = Frame(window) + frame_output_name = Frame(frame_name) + label_output_name = Label(frame_output_name, text='Output file:') + self.variable_output_name = StringVar() + self.variable_output_name.set('results.csv') + variables.update({'output_name':self.variable_output_name}) + self.button_output_name = Button(frame_output_name, command=self.set_output, text="Select file") + frame_output_name.pack(side=LEFT) + label_output_name.pack() + self.button_output_name.pack() + frame_name.pack(side=BOTTOM) + + + frame_checkboxes = Frame(window) + frame_checkbox_attributes = Frame(frame_checkboxes) + variable_all_attributes = BooleanVar() + variable_all_attributes.set(True) + input_all_attributes = Checkbutton(frame_checkbox_attributes, text="Search ALL parameters", + variable=variable_all_attributes) + variables.update({"all_attributes": variable_all_attributes}) + frame_checkbox_attributes.pack(side=LEFT) + input_all_attributes.pack() + + frame_logging = Frame(frame_checkboxes) + variable_logging = BooleanVar() + variable_logging.set(False) + input_logging = Checkbutton(frame_logging, text="Verbose logging", variable=variable_logging) + variables.update({'logging':variable_logging}) + frame_logging.pack(side=RIGHT) + frame_checkboxes.pack(side=BOTTOM) + input_logging.pack() + frame_all_attributes.pack() + + return window, variables + + def prepare_search(self): + """Saves the values from the window for later retrieval.""" + variables = self.variables + values = {} + + values.update({"Always attributes": self.load_always_attributes()}) + for name, var in variables.iteritems(): + if var.__class__ is StringVar: + values.update({name: var.get()}) + elif var.__class__ is BooleanVar: + values.update({name: var.get()}) + elif var.__class__ is Text: + values.update({name: str(var.get("1.0", END)).strip()}) + elif var.__class__ is Listbox: + values.update({name: ", ".join([var.get(int(i)) for i in var.curselection()])}) + else: + print "No known class, {}, {}".format(name, var) + + values.update({'output_name':self.variable_output_name.get()}) + values.update({'output_type':self.check_output_type(values.get('output_name'))}) + + self.values = values + if all([values.get(i) != '' for i in self.required_variables]): + self.finish_with_search = True + self.window.destroy() + else: + self.finish_with_search = False + #tkMessageBox.showinfo('Not all required information was entered!') + + def execute_search(self): + """Calls the Fourmi crawler with the values from the GUI""" + if self.values.get('all_attributes'): + attributes = ".*" + else: + attribute_types = ['attributes', 'Common attributes', 'Always attributes'] + attributes = ','.join([str(self.values.get(attribute)) for attribute in attribute_types]) + output_file = "file://" + str(self.values.get('output_name')) #Dealing with absolute paths + + arguments = {'--attributes': attributes, + '--exclude': None, + '--format': self.values.get('output_type'), + '--help': False, + '--include': None, + '--log': 'log.txt', + '--output': output_file, + '-v': 0 if self.values.get('logging') else 3, + '--version': False, + '': self.values.get('substance'), + 'list': False, + 'search': True} + + self.search(arguments, self.sourceloader) + + def run(self): + """Starts the window and the search.""" + self.window.mainloop() + if self.finish_with_search: + self.execute_search() + + def check_output_type(self, filename): + parts = str(filename).split('.') + output_types = self.load_output_types() + extension = parts[-1] + + for type in output_types: + if extension==type: + return extension + return output_types[0] diff --git a/fourmi.py b/fourmi.py index f0caa05..e8c8625 100755 --- a/fourmi.py +++ b/fourmi.py @@ -3,6 +3,7 @@ Fourmi, a web scraper build to search specific information for a given compound (and its pseudonyms). Usage: + fourmi fourmi search fourmi [options] search fourmi [options] [-v | -vv | -vvv] [--include= | --exclude=] search @@ -24,6 +25,7 @@ Options: """ from twisted.internet import reactor + from scrapy.crawler import Crawler from scrapy import signals, log import docopt @@ -31,6 +33,7 @@ import docopt from FourmiCrawler.spider import FourmiSpider from utils.configurator import Configurator from utils.sourceloader import SourceLoader +from GUI import gui def setup_crawler(compound, settings, source_loader, attributes): @@ -82,3 +85,6 @@ if __name__ == '__main__': elif arguments["list"]: print "-== Available Sources ==-" print str(loader) + else: + gui_window = gui.GUI(search, sourceloader=SourceLoader()) + gui_window.run() diff --git a/tests/__init__.py b/tests/__init__.py index 8b13789..d25a27f 100644 --- a/tests/__init__.py +++ b/tests/__init__.py @@ -1 +1,2 @@ +import test_configurator, test_gui, test_pipeline, test_sourceloader, test_spider diff --git a/tests/test_gui.py b/tests/test_gui.py new file mode 100644 index 0000000..db8288a --- /dev/null +++ b/tests/test_gui.py @@ -0,0 +1,32 @@ +import unittest + +from GUI import gui + +class TestGUI(unittest.TestCase): + def setUp(self): + pass + + def test_empty_attributes(self): + self.test_gui = gui.GUI(None, config_file="../GUI.cfg.sample", in_source=True) + self.test_gui.window.after(9, self.test_gui.prepare_search) + self.test_gui.window.after(11, self.test_gui.window.destroy) + self.test_gui.run() + + output_type = self.test_gui.configurator.load_output_types().split(',')[0] + + self.assertEqual(self.test_gui.values.get('substance'), '') + self.assertEqual(self.test_gui.values.get('output_type'), output_type) + self.assertEqual(self.test_gui.values.get('output_name'), 'results.csv') + + + def test_no_configurations(self): + self.test_gui = gui.GUI(None, config_file="../GUI.cfg.sample") + self.test_gui.configurator = gui.ConfigImporter('') + self.test_gui.finish_with_search = True + self.test_gui.window.after(9, self.test_gui.prepare_search) + self.test_gui.window.after(11, self.test_gui.window.destroy) + self.test_gui.run() + + self.assertEqual(self.test_gui.values.get('substance'), '') + self.assertEqual(self.test_gui.values.get('output_type'), 'csv') + self.assertEqual(self.test_gui.values.get('output_name'), 'results.csv') \ No newline at end of file diff --git a/utils/__init__.py b/utils/__init__.py index e69de29..a083716 100644 --- a/utils/__init__.py +++ b/utils/__init__.py @@ -0,0 +1 @@ +import configurator, sourceloader \ No newline at end of file diff --git a/utils/configurator.py b/utils/configurator.py index 2db7cdb..6a076b3 100644 --- a/utils/configurator.py +++ b/utils/configurator.py @@ -1,5 +1,6 @@ import ConfigParser import os +import shutil from scrapy.utils.project import get_project_settings @@ -70,6 +71,11 @@ class Configurator: current_dir = os.path.dirname(os.path.abspath(__file__)) config_path = current_dir + '/../sources.cfg' # [TODO]: location of sources.cfg should be softcoded eventually + if not os.path.isfile(config_path): + try: + shutil.copyfile(os.path.dirname(os.path.abspath(__file__)) + "/../sources.cfg.sample", config_path) + except IOError: + print "WARNING: Source configuration couldn't be found and couldn't be created." config = ConfigParser.ConfigParser() config.read(config_path) return config