1
0
This commit is contained in:
Silver-T 2018-05-25 13:43:49 +10:00
commit abb5c60939
3 changed files with 112 additions and 14 deletions

View File

@ -1,8 +1,8 @@
@misc{openData,
title={Open Database License (ODbL) v1.0},
url={https://opendatacommons.org/licenses/odbl/1.0/},
journal={Open Data Commons},
year={2018},
@misc{openData,
title={Open Database License (ODbL) v1.0},
url={https://opendatacommons.org/licenses/odbl/1.0/},
journal={Open Data Commons},
year={2018},
month={Feb}
}
@techreport{knn,
@ -21,6 +21,14 @@
year={1995},
publisher={Springer}
}
@inproceedings{svmnonlinear,
title={A training algorithm for optimal margin classifiers},
author={Boser, Bernhard E and Guyon, Isabelle M and Vapnik, Vladimir N},
booktitle={Proceedings of the fifth annual workshop on Computational learning theory},
pages={144--152},
year={1992},
organization={ACM}
}
@article{naivebayes,
title={Idiot's Bayes—not so stupid after all?},
author={Hand, David J and Yu, Keming},

View File

@ -24,11 +24,23 @@
\begin{document}
\title{What is Waldo?}
\author{Kelvin Davis \and Jip J. Dekker\and Anthony Silvestere}
\author{Kelvin Davis \and Jip J. Dekker \and Anthony Silvestere}
\maketitle
\begin{abstract}
%
The famous brand of picture puzzles ``Where's Waldo?'' relates well to many
unsolved image classification problem. This offers us the opportunity to
test different image classification methods on a data set that is both small
enough to compute in a reasonable time span and easy for humans to
understand. In this report we compare the well known machine learning
methods Naive Bayes, Support Vector Machines, $k$-Nearest Neighbors, and
Random Forest against the Neural Network Architectures LeNet, Fully
Convolutional Neural Networks, and Fully Convolutional Neural Networks.
\todo{I don't like this big summation but I think it is the important
information}
Our comparison shows that \todo{...}
%
\end{abstract}
\section{Introduction}
@ -87,7 +99,7 @@
architectures, as this method is currently the most used for image
classification.
\textbf{
\todo{
\\A couple of papers that may be useful (if needed):
- LeNet: http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf
- AlexNet: http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks
@ -106,7 +118,17 @@
\paragraph{Naive Bayes Classifier}
\cite{naivebayes}
\cite{naivebayes} is a classification method according to Bayes' theorem,
shown in \Cref{eq:bayes}. Bayes' theorem allows us to calculate the
probability of an event taking into account prior knowledge of conditions of
the event in question. In classification this allows us to calculate the
probability that a new instance has a certain class based its features. We
then assign the class that has the highest probability.
\begin{equation}
\label{eq:bayes}
P(A\mid B)=\frac {P(B\mid A)\,P(A)}{P(B)}
\end{equation}
\paragraph{$k$-Nearest Neighbors}
@ -120,11 +142,26 @@
\paragraph{Support Vector Machine}
\cite{svm}
(SVM) \cite{svm} has been very successful in many classification tasks. The
method is based on finding boundaries between the different classes. The
boundaries are defined as functions on the features of the instances. The
boundaries are optimized to have the most amount of space between the
boundaries and the training instances on both sides. Originally the
boundaries where linear functions, but more recent development allows for
the training of non-linear boundaries~\cite{svmnonlinear}. Once the training
has defined the boundaries new instances are classified according to on
which side of the boundary they belong.
\paragraph{Random Forest}
\cite{randomforest}
\cite{randomforest} is a method that is based on classifications decision
trees. In a decision tree a new instances is classified by going down a
(binary) tree. Each non-leaf node contain a selection criteria to its
branches. Every leaf node contains the class that will be assigned to the
instance if the node is reached. In other training methods, decision trees
have the tendency to overfit, but in random forest a multitude of decision
tree is trained with a certain degree of randomness and the mean of these
trees is used which avoids this problem.
\subsection{Neural Network Architectures}
\tab There are many well established architectures for Neural Networks depending on the task being performed.
@ -238,9 +275,6 @@
\clearpage % Ensures that the references are on a seperate page
\pagebreak
% References
\section{References}
\renewcommand{\refname}{}
\bibliographystyle{alpha}
\bibliography{references}
\end{document}

56
mini_proj/traditionals.py Normal file
View File

@ -0,0 +1,56 @@
import numpy as np
import time as t
from sklearn import svm, ensemble, naive_bayes, neighbors
from _image_classifier import ImageClassifier
def precision(y_true, y_pred):
y_pred = np.round(y_pred)
num = np.sum(np.logical_and(y_true, y_pred))
den = np.sum(y_pred)
return np.divide(num, den)
def recall(y_true, y_pred):
y_pred = np.round(y_pred)
num = np.sum(np.logical_and(y_true, y_pred))
den = np.sum(y_true)
return np.divide(num, den)
def f_measure(y_true, y_pred):
p = precision(y_true, y_pred)
r = recall(y_true, y_pred)
return 2 * p * r / (p + r)
def metric_test(iclf, metric, test_X, test_Y):
return metric(test_Y, iclf.predict(test_X))
## Open data
im_train = np.load('Waldo_train_data.npy')
im_test = np.load('Waldo_test_data.npy')
lbl_train = np.load('Waldo_train_lbl.npy')
lbl_test = np.load('Waldo_test_lbl.npy')
# lbl_train = to_categorical(lbl_train) # One hot encoding the labels
# lbl_test = to_categorical(lbl_test)
my_metric_test = lambda iclf, f: metric_test(iclf, f, im_test, lbl_test)
# ## Define model
svm_iclf = ImageClassifier(svm.SVC)
tree_iclf = ImageClassifier(neighbors.KNeighborsClassifier)
naive_bayes_iclf = ImageClassifier(naive_bayes.GaussianNB)
ensemble_iclf = ImageClassifier(ensemble.RandomForestClassifier)
classifiers = [
svm_iclf,
tree_iclf,
naive_bayes_iclf,
ensemble_iclf,
]
for clf in classifiers:
start = t.time() # Records time before training
clf.fit(im_train, lbl_train)
end = t.time() # Records time after tranining
print("training time:", end-start)
print(clf.score(im_test, lbl_test))