Merge branch 'master' of https://github.com/Dekker1/ResearchMethods
This commit is contained in:
commit
abb5c60939
@ -21,6 +21,14 @@
|
||||
year={1995},
|
||||
publisher={Springer}
|
||||
}
|
||||
@inproceedings{svmnonlinear,
|
||||
title={A training algorithm for optimal margin classifiers},
|
||||
author={Boser, Bernhard E and Guyon, Isabelle M and Vapnik, Vladimir N},
|
||||
booktitle={Proceedings of the fifth annual workshop on Computational learning theory},
|
||||
pages={144--152},
|
||||
year={1992},
|
||||
organization={ACM}
|
||||
}
|
||||
@article{naivebayes,
|
||||
title={Idiot's Bayes—not so stupid after all?},
|
||||
author={Hand, David J and Yu, Keming},
|
||||
|
@ -28,7 +28,19 @@
|
||||
\maketitle
|
||||
|
||||
\begin{abstract}
|
||||
|
||||
%
|
||||
The famous brand of picture puzzles ``Where's Waldo?'' relates well to many
|
||||
unsolved image classification problem. This offers us the opportunity to
|
||||
test different image classification methods on a data set that is both small
|
||||
enough to compute in a reasonable time span and easy for humans to
|
||||
understand. In this report we compare the well known machine learning
|
||||
methods Naive Bayes, Support Vector Machines, $k$-Nearest Neighbors, and
|
||||
Random Forest against the Neural Network Architectures LeNet, Fully
|
||||
Convolutional Neural Networks, and Fully Convolutional Neural Networks.
|
||||
\todo{I don't like this big summation but I think it is the important
|
||||
information}
|
||||
Our comparison shows that \todo{...}
|
||||
%
|
||||
\end{abstract}
|
||||
|
||||
\section{Introduction}
|
||||
@ -87,7 +99,7 @@
|
||||
architectures, as this method is currently the most used for image
|
||||
classification.
|
||||
|
||||
\textbf{
|
||||
\todo{
|
||||
\\A couple of papers that may be useful (if needed):
|
||||
- LeNet: http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf
|
||||
- AlexNet: http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks
|
||||
@ -106,7 +118,17 @@
|
||||
|
||||
\paragraph{Naive Bayes Classifier}
|
||||
|
||||
\cite{naivebayes}
|
||||
\cite{naivebayes} is a classification method according to Bayes' theorem,
|
||||
shown in \Cref{eq:bayes}. Bayes' theorem allows us to calculate the
|
||||
probability of an event taking into account prior knowledge of conditions of
|
||||
the event in question. In classification this allows us to calculate the
|
||||
probability that a new instance has a certain class based its features. We
|
||||
then assign the class that has the highest probability.
|
||||
|
||||
\begin{equation}
|
||||
\label{eq:bayes}
|
||||
P(A\mid B)=\frac {P(B\mid A)\,P(A)}{P(B)}
|
||||
\end{equation}
|
||||
|
||||
\paragraph{$k$-Nearest Neighbors}
|
||||
|
||||
@ -120,11 +142,26 @@
|
||||
|
||||
\paragraph{Support Vector Machine}
|
||||
|
||||
\cite{svm}
|
||||
(SVM) \cite{svm} has been very successful in many classification tasks. The
|
||||
method is based on finding boundaries between the different classes. The
|
||||
boundaries are defined as functions on the features of the instances. The
|
||||
boundaries are optimized to have the most amount of space between the
|
||||
boundaries and the training instances on both sides. Originally the
|
||||
boundaries where linear functions, but more recent development allows for
|
||||
the training of non-linear boundaries~\cite{svmnonlinear}. Once the training
|
||||
has defined the boundaries new instances are classified according to on
|
||||
which side of the boundary they belong.
|
||||
|
||||
\paragraph{Random Forest}
|
||||
|
||||
\cite{randomforest}
|
||||
\cite{randomforest} is a method that is based on classifications decision
|
||||
trees. In a decision tree a new instances is classified by going down a
|
||||
(binary) tree. Each non-leaf node contain a selection criteria to its
|
||||
branches. Every leaf node contains the class that will be assigned to the
|
||||
instance if the node is reached. In other training methods, decision trees
|
||||
have the tendency to overfit, but in random forest a multitude of decision
|
||||
tree is trained with a certain degree of randomness and the mean of these
|
||||
trees is used which avoids this problem.
|
||||
|
||||
\subsection{Neural Network Architectures}
|
||||
\tab There are many well established architectures for Neural Networks depending on the task being performed.
|
||||
@ -238,9 +275,6 @@
|
||||
|
||||
\clearpage % Ensures that the references are on a seperate page
|
||||
\pagebreak
|
||||
% References
|
||||
\section{References}
|
||||
\renewcommand{\refname}{}
|
||||
\bibliographystyle{alpha}
|
||||
\bibliography{references}
|
||||
\end{document}
|
||||
|
56
mini_proj/traditionals.py
Normal file
56
mini_proj/traditionals.py
Normal file
@ -0,0 +1,56 @@
|
||||
import numpy as np
|
||||
import time as t
|
||||
from sklearn import svm, ensemble, naive_bayes, neighbors
|
||||
from _image_classifier import ImageClassifier
|
||||
|
||||
def precision(y_true, y_pred):
|
||||
y_pred = np.round(y_pred)
|
||||
num = np.sum(np.logical_and(y_true, y_pred))
|
||||
den = np.sum(y_pred)
|
||||
return np.divide(num, den)
|
||||
|
||||
def recall(y_true, y_pred):
|
||||
y_pred = np.round(y_pred)
|
||||
num = np.sum(np.logical_and(y_true, y_pred))
|
||||
den = np.sum(y_true)
|
||||
return np.divide(num, den)
|
||||
|
||||
def f_measure(y_true, y_pred):
|
||||
p = precision(y_true, y_pred)
|
||||
r = recall(y_true, y_pred)
|
||||
return 2 * p * r / (p + r)
|
||||
|
||||
def metric_test(iclf, metric, test_X, test_Y):
|
||||
return metric(test_Y, iclf.predict(test_X))
|
||||
|
||||
## Open data
|
||||
im_train = np.load('Waldo_train_data.npy')
|
||||
im_test = np.load('Waldo_test_data.npy')
|
||||
|
||||
lbl_train = np.load('Waldo_train_lbl.npy')
|
||||
lbl_test = np.load('Waldo_test_lbl.npy')
|
||||
|
||||
# lbl_train = to_categorical(lbl_train) # One hot encoding the labels
|
||||
# lbl_test = to_categorical(lbl_test)
|
||||
|
||||
my_metric_test = lambda iclf, f: metric_test(iclf, f, im_test, lbl_test)
|
||||
|
||||
# ## Define model
|
||||
svm_iclf = ImageClassifier(svm.SVC)
|
||||
tree_iclf = ImageClassifier(neighbors.KNeighborsClassifier)
|
||||
naive_bayes_iclf = ImageClassifier(naive_bayes.GaussianNB)
|
||||
ensemble_iclf = ImageClassifier(ensemble.RandomForestClassifier)
|
||||
|
||||
classifiers = [
|
||||
svm_iclf,
|
||||
tree_iclf,
|
||||
naive_bayes_iclf,
|
||||
ensemble_iclf,
|
||||
]
|
||||
|
||||
for clf in classifiers:
|
||||
start = t.time() # Records time before training
|
||||
clf.fit(im_train, lbl_train)
|
||||
end = t.time() # Records time after tranining
|
||||
print("training time:", end-start)
|
||||
print(clf.score(im_test, lbl_test))
|
Reference in New Issue
Block a user