Merge branch 'master' of https://github.com/Dekker1/ResearchMethods

2018-05-25 13:43:49 +10:00 · 2018-05-25 13:43:49 +10:00 · abb5c60939
commit abb5c60939
parent a9d1a73bc1 ab59f456e2
3 changed files with 112 additions and 14 deletions
--- a/mini_proj/report/references.bib
+++ b/mini_proj/report/references.bib
@ -21,6 +21,14 @@
  year={1995},
  publisher={Springer}
 }
@inproceedings{svmnonlinear,
  title={A training algorithm for optimal margin classifiers},
  author={Boser, Bernhard E and Guyon, Isabelle M and Vapnik, Vladimir N},
  booktitle={Proceedings of the fifth annual workshop on Computational learning theory},
  pages={144--152},
  year={1992},
  organization={ACM}
 }
@article{naivebayes,
  title={Idiot's Bayes—not so stupid after all?},
  author={Hand, David J and Yu, Keming},
--- a/mini_proj/report/waldo.tex
+++ b/mini_proj/report/waldo.tex
@ -24,11 +24,23 @@
 	\begin{document}
 		\title{What is Waldo?}
-		\author{Kelvin Davis \and Jip J. Dekker\and Anthony Silvestere}
+		\author{Kelvin Davis \and Jip J. Dekker \and Anthony Silvestere}
 		\maketitle
 		\begin{abstract}
-
+%
 		The famous brand of picture puzzles ``Where's Waldo?'' relates well to many
 		unsolved image classification problem. This offers us the opportunity to
 		test different image classification methods on a data set that is both small
 		enough to compute in a reasonable time span and easy for humans to
 		understand. In this report we compare the well known machine learning
 		methods Naive Bayes, Support Vector Machines, $k$-Nearest Neighbors, and
 		Random Forest against the Neural Network Architectures LeNet, Fully
 		Convolutional Neural Networks, and Fully Convolutional Neural Networks.
 		\todo{I don't like this big summation but I think it is the important
 		information}
 		Our comparison shows that \todo{...}
 %
 		\end{abstract}
 		\section{Introduction}
@ -87,7 +99,7 @@
 		architectures, as this method is currently the most used for image
 		classification.
-		\textbf{
+		\todo{
 		\\A couple of papers that may be useful (if needed):
 		- LeNet: http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf
 		- AlexNet: http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks
@ -106,7 +118,17 @@
 		\paragraph{Naive Bayes Classifier}
-		\cite{naivebayes}
+		\cite{naivebayes} is a classification method according to Bayes' theorem,
 		shown in \Cref{eq:bayes}. Bayes' theorem allows us to calculate the
 		probability of an event taking into account prior knowledge of conditions of
 		the event in question. In classification this allows us to calculate the
 		probability that a new instance has a certain class based its features. We
 		then assign the class that has the highest probability.
 		\begin{equation}
 			\label{eq:bayes}
 			P(A\mid B)=\frac {P(B\mid A)\,P(A)}{P(B)}
 		\end{equation}
 		\paragraph{$k$-Nearest Neighbors}
@ -120,11 +142,26 @@
 		\paragraph{Support Vector Machine}
-		\cite{svm}
+		(SVM) \cite{svm} has been very successful in many classification tasks. The
 		method is based on finding boundaries between the different classes. The
 		boundaries are defined as functions on the features of the instances. The
 		boundaries are optimized to have the most amount of space between the
 		boundaries and the training instances on both sides. Originally the
 		boundaries where linear functions, but more recent development allows for
 		the training of non-linear boundaries~\cite{svmnonlinear}. Once the training
 		has defined the boundaries new instances are classified according to on
 		which side of the boundary they belong.
 		\paragraph{Random Forest}
-		\cite{randomforest}
+		\cite{randomforest} is a method that is based on classifications decision
 		trees. In a decision tree a new instances is classified by going down a
 		(binary) tree. Each non-leaf node contain a selection criteria to its
 		branches. Every leaf node contains the class that will be assigned to the
 		instance if the node is reached. In other training methods, decision trees
 		have the tendency to overfit, but in random forest a multitude of decision
 		tree is trained with a certain degree of randomness and the mean of these
 		trees is used which avoids this problem.
 		\subsection{Neural Network Architectures}
 		\tab There are many well established architectures for Neural Networks depending on the task being performed. 
@ -238,9 +275,6 @@
 		\clearpage          % Ensures that the references are on a seperate page
 		\pagebreak
 		% References
 		\section{References}
 		\renewcommand{\refname}{}
 		\bibliographystyle{alpha}
 		\bibliography{references}
 	\end{document}
--- a/mini_proj/traditionals.py
+++ b/mini_proj/traditionals.py
@ -0,0 +1,56 @@
 import numpy as np
 import time as t
 from sklearn import svm, ensemble, naive_bayes, neighbors
 from _image_classifier import ImageClassifier
 def precision(y_true, y_pred):
    y_pred = np.round(y_pred)
    num = np.sum(np.logical_and(y_true, y_pred))
    den = np.sum(y_pred)
    return np.divide(num, den)
 def recall(y_true, y_pred):
    y_pred = np.round(y_pred)
    num = np.sum(np.logical_and(y_true, y_pred))
    den = np.sum(y_true)
    return np.divide(num, den)
 def f_measure(y_true, y_pred):
    p = precision(y_true, y_pred)
    r = recall(y_true, y_pred)
    return 2 * p * r / (p + r)
 def metric_test(iclf, metric, test_X, test_Y):
    return metric(test_Y, iclf.predict(test_X))
 ## Open data
 im_train = np.load('Waldo_train_data.npy')
 im_test = np.load('Waldo_test_data.npy')
 lbl_train = np.load('Waldo_train_lbl.npy')
 lbl_test = np.load('Waldo_test_lbl.npy')
 # lbl_train = to_categorical(lbl_train)       # One hot encoding the labels
 # lbl_test = to_categorical(lbl_test)
 my_metric_test = lambda iclf, f: metric_test(iclf, f, im_test, lbl_test)
 # ## Define model
 svm_iclf = ImageClassifier(svm.SVC)
 tree_iclf = ImageClassifier(neighbors.KNeighborsClassifier)
 naive_bayes_iclf = ImageClassifier(naive_bayes.GaussianNB)
 ensemble_iclf = ImageClassifier(ensemble.RandomForestClassifier)
 classifiers = [
    svm_iclf,
    tree_iclf,
    naive_bayes_iclf,
    ensemble_iclf,
 ]
 for clf in classifiers:
    start = t.time()                            # Records time before training
    clf.fit(im_train, lbl_train)
    end = t.time()                              # Records time after tranining
    print("training time:", end-start)
    print(clf.score(im_test, lbl_test))