Merge branch 'master' of https://github.com/Dekker1/ResearchMethods
This commit is contained in:
commit
abb5c60939
@ -21,6 +21,14 @@
|
|||||||
year={1995},
|
year={1995},
|
||||||
publisher={Springer}
|
publisher={Springer}
|
||||||
}
|
}
|
||||||
|
@inproceedings{svmnonlinear,
|
||||||
|
title={A training algorithm for optimal margin classifiers},
|
||||||
|
author={Boser, Bernhard E and Guyon, Isabelle M and Vapnik, Vladimir N},
|
||||||
|
booktitle={Proceedings of the fifth annual workshop on Computational learning theory},
|
||||||
|
pages={144--152},
|
||||||
|
year={1992},
|
||||||
|
organization={ACM}
|
||||||
|
}
|
||||||
@article{naivebayes,
|
@article{naivebayes,
|
||||||
title={Idiot's Bayes—not so stupid after all?},
|
title={Idiot's Bayes—not so stupid after all?},
|
||||||
author={Hand, David J and Yu, Keming},
|
author={Hand, David J and Yu, Keming},
|
||||||
|
@ -24,11 +24,23 @@
|
|||||||
|
|
||||||
\begin{document}
|
\begin{document}
|
||||||
\title{What is Waldo?}
|
\title{What is Waldo?}
|
||||||
\author{Kelvin Davis \and Jip J. Dekker\and Anthony Silvestere}
|
\author{Kelvin Davis \and Jip J. Dekker \and Anthony Silvestere}
|
||||||
\maketitle
|
\maketitle
|
||||||
|
|
||||||
\begin{abstract}
|
\begin{abstract}
|
||||||
|
%
|
||||||
|
The famous brand of picture puzzles ``Where's Waldo?'' relates well to many
|
||||||
|
unsolved image classification problem. This offers us the opportunity to
|
||||||
|
test different image classification methods on a data set that is both small
|
||||||
|
enough to compute in a reasonable time span and easy for humans to
|
||||||
|
understand. In this report we compare the well known machine learning
|
||||||
|
methods Naive Bayes, Support Vector Machines, $k$-Nearest Neighbors, and
|
||||||
|
Random Forest against the Neural Network Architectures LeNet, Fully
|
||||||
|
Convolutional Neural Networks, and Fully Convolutional Neural Networks.
|
||||||
|
\todo{I don't like this big summation but I think it is the important
|
||||||
|
information}
|
||||||
|
Our comparison shows that \todo{...}
|
||||||
|
%
|
||||||
\end{abstract}
|
\end{abstract}
|
||||||
|
|
||||||
\section{Introduction}
|
\section{Introduction}
|
||||||
@ -87,7 +99,7 @@
|
|||||||
architectures, as this method is currently the most used for image
|
architectures, as this method is currently the most used for image
|
||||||
classification.
|
classification.
|
||||||
|
|
||||||
\textbf{
|
\todo{
|
||||||
\\A couple of papers that may be useful (if needed):
|
\\A couple of papers that may be useful (if needed):
|
||||||
- LeNet: http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf
|
- LeNet: http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf
|
||||||
- AlexNet: http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks
|
- AlexNet: http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks
|
||||||
@ -106,7 +118,17 @@
|
|||||||
|
|
||||||
\paragraph{Naive Bayes Classifier}
|
\paragraph{Naive Bayes Classifier}
|
||||||
|
|
||||||
\cite{naivebayes}
|
\cite{naivebayes} is a classification method according to Bayes' theorem,
|
||||||
|
shown in \Cref{eq:bayes}. Bayes' theorem allows us to calculate the
|
||||||
|
probability of an event taking into account prior knowledge of conditions of
|
||||||
|
the event in question. In classification this allows us to calculate the
|
||||||
|
probability that a new instance has a certain class based its features. We
|
||||||
|
then assign the class that has the highest probability.
|
||||||
|
|
||||||
|
\begin{equation}
|
||||||
|
\label{eq:bayes}
|
||||||
|
P(A\mid B)=\frac {P(B\mid A)\,P(A)}{P(B)}
|
||||||
|
\end{equation}
|
||||||
|
|
||||||
\paragraph{$k$-Nearest Neighbors}
|
\paragraph{$k$-Nearest Neighbors}
|
||||||
|
|
||||||
@ -120,11 +142,26 @@
|
|||||||
|
|
||||||
\paragraph{Support Vector Machine}
|
\paragraph{Support Vector Machine}
|
||||||
|
|
||||||
\cite{svm}
|
(SVM) \cite{svm} has been very successful in many classification tasks. The
|
||||||
|
method is based on finding boundaries between the different classes. The
|
||||||
|
boundaries are defined as functions on the features of the instances. The
|
||||||
|
boundaries are optimized to have the most amount of space between the
|
||||||
|
boundaries and the training instances on both sides. Originally the
|
||||||
|
boundaries where linear functions, but more recent development allows for
|
||||||
|
the training of non-linear boundaries~\cite{svmnonlinear}. Once the training
|
||||||
|
has defined the boundaries new instances are classified according to on
|
||||||
|
which side of the boundary they belong.
|
||||||
|
|
||||||
\paragraph{Random Forest}
|
\paragraph{Random Forest}
|
||||||
|
|
||||||
\cite{randomforest}
|
\cite{randomforest} is a method that is based on classifications decision
|
||||||
|
trees. In a decision tree a new instances is classified by going down a
|
||||||
|
(binary) tree. Each non-leaf node contain a selection criteria to its
|
||||||
|
branches. Every leaf node contains the class that will be assigned to the
|
||||||
|
instance if the node is reached. In other training methods, decision trees
|
||||||
|
have the tendency to overfit, but in random forest a multitude of decision
|
||||||
|
tree is trained with a certain degree of randomness and the mean of these
|
||||||
|
trees is used which avoids this problem.
|
||||||
|
|
||||||
\subsection{Neural Network Architectures}
|
\subsection{Neural Network Architectures}
|
||||||
\tab There are many well established architectures for Neural Networks depending on the task being performed.
|
\tab There are many well established architectures for Neural Networks depending on the task being performed.
|
||||||
@ -238,9 +275,6 @@
|
|||||||
|
|
||||||
\clearpage % Ensures that the references are on a seperate page
|
\clearpage % Ensures that the references are on a seperate page
|
||||||
\pagebreak
|
\pagebreak
|
||||||
% References
|
|
||||||
\section{References}
|
|
||||||
\renewcommand{\refname}{}
|
|
||||||
\bibliographystyle{alpha}
|
\bibliographystyle{alpha}
|
||||||
\bibliography{references}
|
\bibliography{references}
|
||||||
\end{document}
|
\end{document}
|
||||||
|
56
mini_proj/traditionals.py
Normal file
56
mini_proj/traditionals.py
Normal file
@ -0,0 +1,56 @@
|
|||||||
|
import numpy as np
|
||||||
|
import time as t
|
||||||
|
from sklearn import svm, ensemble, naive_bayes, neighbors
|
||||||
|
from _image_classifier import ImageClassifier
|
||||||
|
|
||||||
|
def precision(y_true, y_pred):
|
||||||
|
y_pred = np.round(y_pred)
|
||||||
|
num = np.sum(np.logical_and(y_true, y_pred))
|
||||||
|
den = np.sum(y_pred)
|
||||||
|
return np.divide(num, den)
|
||||||
|
|
||||||
|
def recall(y_true, y_pred):
|
||||||
|
y_pred = np.round(y_pred)
|
||||||
|
num = np.sum(np.logical_and(y_true, y_pred))
|
||||||
|
den = np.sum(y_true)
|
||||||
|
return np.divide(num, den)
|
||||||
|
|
||||||
|
def f_measure(y_true, y_pred):
|
||||||
|
p = precision(y_true, y_pred)
|
||||||
|
r = recall(y_true, y_pred)
|
||||||
|
return 2 * p * r / (p + r)
|
||||||
|
|
||||||
|
def metric_test(iclf, metric, test_X, test_Y):
|
||||||
|
return metric(test_Y, iclf.predict(test_X))
|
||||||
|
|
||||||
|
## Open data
|
||||||
|
im_train = np.load('Waldo_train_data.npy')
|
||||||
|
im_test = np.load('Waldo_test_data.npy')
|
||||||
|
|
||||||
|
lbl_train = np.load('Waldo_train_lbl.npy')
|
||||||
|
lbl_test = np.load('Waldo_test_lbl.npy')
|
||||||
|
|
||||||
|
# lbl_train = to_categorical(lbl_train) # One hot encoding the labels
|
||||||
|
# lbl_test = to_categorical(lbl_test)
|
||||||
|
|
||||||
|
my_metric_test = lambda iclf, f: metric_test(iclf, f, im_test, lbl_test)
|
||||||
|
|
||||||
|
# ## Define model
|
||||||
|
svm_iclf = ImageClassifier(svm.SVC)
|
||||||
|
tree_iclf = ImageClassifier(neighbors.KNeighborsClassifier)
|
||||||
|
naive_bayes_iclf = ImageClassifier(naive_bayes.GaussianNB)
|
||||||
|
ensemble_iclf = ImageClassifier(ensemble.RandomForestClassifier)
|
||||||
|
|
||||||
|
classifiers = [
|
||||||
|
svm_iclf,
|
||||||
|
tree_iclf,
|
||||||
|
naive_bayes_iclf,
|
||||||
|
ensemble_iclf,
|
||||||
|
]
|
||||||
|
|
||||||
|
for clf in classifiers:
|
||||||
|
start = t.time() # Records time before training
|
||||||
|
clf.fit(im_train, lbl_train)
|
||||||
|
end = t.time() # Records time after tranining
|
||||||
|
print("training time:", end-start)
|
||||||
|
print(clf.score(im_test, lbl_test))
|
Reference in New Issue
Block a user