Merged changes
This commit is contained in:
commit
72d8dfa3f3
@ -1 +1,58 @@
|
||||
@misc{openData, title={Open Database License (ODbL) v1.0}, url={https://opendatacommons.org/licenses/odbl/1.0/}, journal={Open Data Commons}, year={2018}, month={Feb}}
|
||||
@misc{openData,
|
||||
title={Open Database License (ODbL) v1.0},
|
||||
url={https://opendatacommons.org/licenses/odbl/1.0/},
|
||||
journal={Open Data Commons},
|
||||
year={2018},
|
||||
month={Feb}
|
||||
}
|
||||
@techreport{knn,
|
||||
title={Discriminatory analysis-nonparametric discrimination: consistency properties},
|
||||
author={Fix, Evelyn and Hodges Jr, Joseph L},
|
||||
year={1951},
|
||||
institution={California Univ Berkeley}
|
||||
}
|
||||
@article{svm,
|
||||
title={Support-vector networks},
|
||||
author={Cortes, Corinna and Vapnik, Vladimir},
|
||||
journal={Machine learning},
|
||||
volume={20},
|
||||
number={3},
|
||||
pages={273--297},
|
||||
year={1995},
|
||||
publisher={Springer}
|
||||
}
|
||||
@article{naivebayes,
|
||||
title={Idiot's Bayes—not so stupid after all?},
|
||||
author={Hand, David J and Yu, Keming},
|
||||
journal={International statistical review},
|
||||
volume={69},
|
||||
number={3},
|
||||
pages={385--398},
|
||||
year={2001},
|
||||
publisher={Wiley Online Library}
|
||||
}
|
||||
@article{randomforest,
|
||||
title={Classification and regression by randomForest},
|
||||
author={Liaw, Andy and Wiener, Matthew and others},
|
||||
journal={R news},
|
||||
volume={2},
|
||||
number={3},
|
||||
pages={18--22},
|
||||
year={2002}
|
||||
}
|
||||
@article{Kotsiantis2007,
|
||||
abstract = {Supervised machine learning is the search for algorithms that reason from externally supplied instances to produce general hypotheses, which then make predictions about future instances. In other words, the goal of supervised learning is to build a concise model of the distribution of class labels in terms of predictor features. The resulting classifier is then used to assign class labels to the testing instances where the values of the predictor features are known, but the value of the class label is unknown. This paper describes various supervised machine learning classification techniques. Of course, a single article cannot be a complete review of all supervised machine learning classification algorithms (also known induction classification algorithms), yet we hope that the references cited will cover the major theoretical issues, guiding the researcher in interesting research directions and suggesting possible bias combinations that have yet to be explored.},
|
||||
author = {Kotsiantis, Sotiris B.},
|
||||
doi = {10.1115/1.1559160},
|
||||
file = {:home/kelvin/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Kotsiantis - 2007 - Supervised machine learning A review of classification techniques.pdf:pdf},
|
||||
isbn = {1586037803},
|
||||
issn = {09226389},
|
||||
journal = {Informatica},
|
||||
keywords = {algorithms analysis classifiers computational conn,classifiers,data mining techniques,intelligent data analysis,learning algorithms},
|
||||
mendeley-groups = {CS Proj/ML,CS Proj,Thesis,Thesis/ML},
|
||||
pages = {249--268},
|
||||
title = {{Supervised machine learning: A review of classification techniques}},
|
||||
url = {http://books.google.com/books?hl=en{\&}lr={\&}id=vLiTXDHr{\_}sYC{\&}oi=fnd{\&}pg=PA3{\&}dq=survey+machine+learning{\&}ots=CVsyuwYHjo{\&}sig=A6wYWvywU8XTc7Dzp8ZdKJaW7rc{\%}5Cnpapers://5e3e5e59-48a2-47c1-b6b1-a778137d3ec1/Paper/p800{\%}5Cnhttp://www.informatica.si/PDF/31-3/11{\_}Kotsiantis - S},
|
||||
volume = {31},
|
||||
year = {2007}
|
||||
}
|
||||
|
BIN
mini_proj/report/waldo.png
Normal file
BIN
mini_proj/report/waldo.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 86 KiB |
@ -18,10 +18,12 @@
|
||||
% Easier compilation
|
||||
\usepackage{bookmark}
|
||||
\usepackage{natbib}
|
||||
\bibliographystyle{ieeetr}
|
||||
|
||||
\usepackage{xcolor}
|
||||
\newcommand{\todo}[1]{\marginpar{{\textsf{TODO}}}{\textbf{\color{red}[#1]}}}
|
||||
|
||||
\begin{document}
|
||||
\title{Waldo discovery using Neural Networks}
|
||||
\title{What is Waldo?}
|
||||
\author{Kelvin Davis \and Jip J. Dekker\and Anthony Silvestere}
|
||||
\maketitle
|
||||
|
||||
@ -31,17 +33,110 @@
|
||||
|
||||
\section{Introduction}
|
||||
|
||||
\section{Background}
|
||||
Almost every child around the world knows about ``Where's Waldo?'', also
|
||||
known as ``Where's Wally?'' in some countries. This famous puzzle book has
|
||||
spread its way across the world and is published in more than 25 different
|
||||
languages. The idea behind the books is to find the character ``Waldo'',
|
||||
shown in \Cref{fig:waldo}, in the different pictures in the book. This is,
|
||||
however, not as easy as it sounds. Every picture in the book is full of tiny
|
||||
details and Waldo is only one out of many. The puzzle is made even harder by
|
||||
the fact that Waldo is not always fully depicted, sometimes it is just his
|
||||
head or his torso popping out from behind something else. Lastly, the reason
|
||||
that even adults will have trouble spotting Waldo is the fact that the
|
||||
pictures are full of ``Red Herrings'': things that look like (or are colored
|
||||
as) Waldo, but are not actually Waldo.
|
||||
|
||||
A couple of papers that may be useful:
|
||||
\begin{figure}[ht]
|
||||
\includegraphics[scale=0.35]{waldo}
|
||||
\centering
|
||||
\caption{
|
||||
A headshot of the character ``Waldo'', or ``Wally''. Pictures of Waldo
|
||||
copyrighted by Martin Handford and are used under the fair-use policy.
|
||||
}
|
||||
\label{fig:waldo}
|
||||
\end{figure}
|
||||
|
||||
The task of finding Waldo is something that relates to a lot of real life
|
||||
image recognition tasks. Fields like mining, astronomy, surveillance,
|
||||
radiology, and microbiology often have to analyse images (or scans) to find
|
||||
the tiniest details, sometimes undetectable by the human eye. These tasks
|
||||
are especially hard when the thing(s) you are looking for are similar to the
|
||||
rest of the images. These tasks are thus generally performed using computers
|
||||
to identify possible matches.
|
||||
|
||||
``Where's Waldo?'' offers us a great tool to study this kind of problem in a
|
||||
setting that is humanly tangible. In this report we will try to identify
|
||||
Waldo in the puzzle images using different classification methods. Every
|
||||
image will be split into different segments and every segment will have to
|
||||
be classified as either being ``Waldo'' or ``not Waldo''. We will compare
|
||||
various different classification methods from more classical machine
|
||||
learning, like naive Bayes classifiers, to the currently state of the art,
|
||||
Neural Networks. In \Cref{sec:background} we will introduce the different
|
||||
classification methods, \Cref{sec:method} will explain the way in which
|
||||
these methods are trained and how they will be evaluated, in
|
||||
\Cref{sec:results} will discuss the results, and \Cref{sec:conclusion} will
|
||||
offer our final conclusions.
|
||||
|
||||
\section{Background} \label{sec:background}
|
||||
|
||||
The classification methods used can separated into two separate groups:
|
||||
classical machine learning methods and neural network architectures. Many of
|
||||
the classical machine learning algorithms have variations and improvements
|
||||
for various purposes; however, for this report we will be using their only
|
||||
their basic versions. In contrast, we will use different neural network
|
||||
architectures, as this method is currently the most used for image
|
||||
classification.
|
||||
|
||||
\textbf{
|
||||
\\A couple of papers that may be useful (if needed):
|
||||
- LeNet: http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf
|
||||
- AlexNet: http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks
|
||||
- General comparison of LeNet and AlexNet:
|
||||
"On the Performance of GoogLeNet and AlexNet Applied to Sketches", Pedro Ballester and Ricardo Matsumura Araujo
|
||||
- Deep NN Architecture:
|
||||
https://www-sciencedirect-com.ezproxy.lib.monash.edu.au/science/article/pii/S0925231216315533
|
||||
}
|
||||
|
||||
\section{Methods}
|
||||
\subsection{Classical Machine Learning Methods}
|
||||
|
||||
The following paragraphs will give only brief descriptions of the different
|
||||
classical machine learning methods used in this reports. For further reading
|
||||
we recommend reading ``Supervised machine learning: A review of
|
||||
classification techniques'' \cite{Kotsiantis2007}.
|
||||
|
||||
\paragraph{Naive Bayes Classifier}
|
||||
|
||||
\cite{naivebayes}
|
||||
|
||||
\paragraph{$k$-Nearest Neighbors}
|
||||
|
||||
($k$-NN) \cite{knn}
|
||||
|
||||
\paragraph{Support Vector Machine}
|
||||
|
||||
\cite{svm}
|
||||
|
||||
\paragraph{Random Forest}
|
||||
|
||||
\cite{randomforest}
|
||||
|
||||
\subsection{Neural Network Architectures}
|
||||
\todo{Did we only do the three in the end? (Alexnet?)}
|
||||
Yeah, we implemented the LeNet architecture, then improved on it for a fairly standar convolutional neural network (CNN) that was deeper, extracted more features, and condensed that image information more. Then we implemented a more fully convolutional network (FCN) which contained only one dense layer for the final binary classification step. The FCN added an extra convolutional layer, meaning the before classifying each image, the network abstracted the data more than the other two.
|
||||
\begin{itemize}
|
||||
\item LeNet
|
||||
\item CNN
|
||||
\item FCN
|
||||
\end{itemize}
|
||||
|
||||
\paragraph{Convolutional Neural Networks}
|
||||
|
||||
\paragraph{LeNet}
|
||||
|
||||
\paragraph{Fully Convolutional Neural Networks}
|
||||
|
||||
|
||||
\section{Method} \label{sec:method}
|
||||
\tab
|
||||
In order to effectively utilize the aforementioned modelling and classification techniques, a key consideration is the data they are acting on.
|
||||
A dataset containing Waldo and non-Waldo images was obtained from an Open Database\footnote{``The Open Database License (ODbL) is a license agreement intended to allow users to freely share, modify, and use [a] Database while maintaining [the] same freedom for others"\cite{openData}}hosted on the predictive modelling and analytics competition framework, Kaggle.
|
||||
@ -72,14 +167,65 @@
|
||||
Despite the additional data, there were still over ten times as many non-Waldo images than Waldo images.
|
||||
Therefore, it was necessary to cull the no-Waldo data, so that there was an even split of Waldo and non-Waldo images, improving the representation of true positives in the image data set.
|
||||
\\
|
||||
\section{Results}
|
||||
|
||||
\section{Discussion and Conclusion}
|
||||
% Kelvin Start
|
||||
\subsection{Benchmarking}\label{benchmarking}
|
||||
|
||||
In order to benchmark the Neural Networks, the performance of these
|
||||
algorithms are evaluated against other Machine Learning algorithms. We
|
||||
use Support Vector Machines, K-Nearest Neighbours (\(K=5\)), Gaussian
|
||||
Naive Bayes and Random Forest classifiers, as provided in Scikit-Learn.
|
||||
|
||||
\subsection{Performance Metrics}\label{performance-metrics}
|
||||
|
||||
To evaluate the performance of the models, we record the time taken by
|
||||
each model to train, based on the training data and statistics about the
|
||||
predictions the models make on the test data. These prediction
|
||||
statistics include:
|
||||
|
||||
\begin{itemize}
|
||||
\item
|
||||
\textbf{Accuracy:}
|
||||
\[a = \dfrac{|correct\ predictions|}{|predictions|} = \dfrac{tp + tn}{tp + tn + fp + fn}\]
|
||||
\item
|
||||
\textbf{Precision:}
|
||||
\[p = \dfrac{|Waldo\ predicted\ as\ Waldo|}{|predicted\ as\ Waldo|} = \dfrac{tp}{tp + fp}\]
|
||||
\item
|
||||
\textbf{Recall:}
|
||||
\[r = \dfrac{|Waldo\ predicted\ as\ Waldo|}{|actually\ Waldo|} = \dfrac{tp}{tp + fn}\]
|
||||
\item
|
||||
\textbf{F1 Measure:} \[f1 = \dfrac{2pr}{p + r}\] where \(tp\) is the
|
||||
number of true positives, \(tn\) is the number of true negatives,
|
||||
\(fp\) is the number of false positives, and \(tp\) is the number of
|
||||
false negatives.
|
||||
\end{itemize}
|
||||
|
||||
Accuracy is a common performance metric used in Machine Learning,
|
||||
however in classification problems where the training data is heavily
|
||||
biased toward one category, sometimes a model will learn to optimize its
|
||||
accuracy by classifying all instances as one category. I.e. the
|
||||
classifier will classify all images that do not contain Waldo as not
|
||||
containing Waldo, but will also classify all images containing Waldo as
|
||||
not containing Waldo. Thus we use, other metrics to measure performance
|
||||
as well.
|
||||
|
||||
\emph{Precision} returns the percentage of classifications of Waldo that
|
||||
are actually Waldo. \emph{Recall} returns the percentage of Waldos that
|
||||
were actually predicted as Waldo. In the case of a classifier that
|
||||
classifies all things as Waldo, the recall would be 0. \emph{F1-Measure}
|
||||
returns a combination of precision and recall that heavily penalises
|
||||
classifiers that perform poorly in either precision or recall.
|
||||
% Kelvin End
|
||||
|
||||
\section{Results} \label{sec:results}
|
||||
|
||||
\section{Conclusion} \label{sec:conclusion}
|
||||
|
||||
\clearpage % Ensures that the references are on a seperate page
|
||||
\pagebreak
|
||||
% References
|
||||
\section{References}
|
||||
\renewcommand{\refname}{}
|
||||
\bibliographystyle{alpha}
|
||||
\bibliography{references}
|
||||
\end{document}
|
||||
|
Reference in New Issue
Block a user