From 98d096af4933b491061676e270206b5ea54bf7ac Mon Sep 17 00:00:00 2001
From: Kelvin Davis <273degreeskelvin@gmail.com>
Date: Fri, 25 May 2018 11:30:38 +1000
Subject: [PATCH 1/6] Added traditional ml script

---
 mini_proj/traditionals.py | 56 +++++++++++++++++++++++++++++++++++++++
 1 file changed, 56 insertions(+)
 create mode 100644 mini_proj/traditionals.py

diff --git a/mini_proj/traditionals.py b/mini_proj/traditionals.py
new file mode 100644
index 0000000..77ede83
--- /dev/null
+++ b/mini_proj/traditionals.py
@@ -0,0 +1,56 @@
+import numpy as np
+import time as t
+from sklearn import svm, ensemble, naive_bayes, neighbors
+from _image_classifier import ImageClassifier
+
+def precision(y_true, y_pred):
+    y_pred = np.round(y_pred)
+    num = np.sum(np.logical_and(y_true, y_pred))
+    den = np.sum(y_pred)
+    return np.divide(num, den)
+
+def recall(y_true, y_pred):
+    y_pred = np.round(y_pred)
+    num = np.sum(np.logical_and(y_true, y_pred))
+    den = np.sum(y_true)
+    return np.divide(num, den)
+
+def f_measure(y_true, y_pred):
+    p = precision(y_true, y_pred)
+    r = recall(y_true, y_pred)
+    return 2 * p * r / (p + r)
+
+def metric_test(iclf, metric, test_X, test_Y):
+    return metric(test_Y, iclf.predict(test_X))
+
+## Open data
+im_train = np.load('Waldo_train_data.npy')
+im_test = np.load('Waldo_test_data.npy')
+
+lbl_train = np.load('Waldo_train_lbl.npy')
+lbl_test = np.load('Waldo_test_lbl.npy')
+
+# lbl_train = to_categorical(lbl_train)       # One hot encoding the labels
+# lbl_test = to_categorical(lbl_test)
+
+my_metric_test = lambda iclf, f: metric_test(iclf, f, im_test, lbl_test)
+
+# ## Define model
+svm_iclf = ImageClassifier(svm.SVC)
+tree_iclf = ImageClassifier(neighbors.KNeighborsClassifier)
+naive_bayes_iclf = ImageClassifier(naive_bayes.GaussianNB)
+ensemble_iclf = ImageClassifier(ensemble.RandomForestClassifier)
+
+classifiers = [
+    svm_iclf,
+    tree_iclf,
+    naive_bayes_iclf,
+    ensemble_iclf,
+]
+
+for clf in classifiers:
+    start = t.time()                            # Records time before training
+    clf.fit(im_train, lbl_train)
+    end = t.time()                              # Records time after tranining
+    print("training time:", end-start)
+    print(clf.score(im_test, lbl_test))

From 1ef305861d931a98b410371c6d61b959d8e365b7 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Fri, 25 May 2018 12:13:57 +1000
Subject: [PATCH 2/6] Add initial abstract

---
 mini_proj/report/waldo.tex | 16 ++++++++++++++--
 1 file changed, 14 insertions(+), 2 deletions(-)

diff --git a/mini_proj/report/waldo.tex b/mini_proj/report/waldo.tex
index 2b101de..b4b1e81 100644
--- a/mini_proj/report/waldo.tex
+++ b/mini_proj/report/waldo.tex
@@ -24,11 +24,23 @@
 
 	\begin{document}
 		\title{What is Waldo?}
-		\author{Kelvin Davis \and Jip J. Dekker\and Anthony Silvestere}
+		\author{Kelvin Davis \and Jip J. Dekker \and Anthony Silvestere}
 		\maketitle
 
 		\begin{abstract}
-
+%
+		The famous brand of picture puzzles ``Where's Waldo?'' relates well to many
+		unsolved image classification problem. This offers us the opportunity to
+		test different image classification methods on a data set that is both small
+		enough to compute in a reasonable time span and easy for humans to
+		understand. In this report we compare the well known machine learning
+		methods Naive Bayes, Support Vector Machines, $k$-Nearest Neighbors, and
+		Random Forest against the Neural Network Architectures LeNet, Fully
+		Convolutional Neural Networks, and Fully Convolutional Neural Networks.
+		\todo{I don't like this big summation but I think it is the important
+		information}
+		Our comparison shows that \todo{...}
+%
 		\end{abstract}
 
 		\section{Introduction}

From e4cf37d25a4d1e54788d0818ea2b7990fcdb3671 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Fri, 25 May 2018 12:32:39 +1000
Subject: [PATCH 3/6] Naive Bayes description

---
 mini_proj/report/waldo.tex | 12 +++++++++++-
 1 file changed, 11 insertions(+), 1 deletion(-)

diff --git a/mini_proj/report/waldo.tex b/mini_proj/report/waldo.tex
index 8e21bbe..d101572 100644
--- a/mini_proj/report/waldo.tex
+++ b/mini_proj/report/waldo.tex
@@ -118,7 +118,17 @@
 
 		\paragraph{Naive Bayes Classifier}
 
-		\cite{naivebayes}
+		\cite{naivebayes} is a classification method according to Bayes' theorem,
+		shown in \Cref{eq:bayes}. Bayes' theorem allows us to calculate the
+		probability of an event taking into account prior knowledge of conditions of
+		the event in question. In classification this allows us to calculate the
+		probability that a new instance has a certain class based its features. We
+		then assign the class that has the highest probability.
+
+		\begin{equation}
+			\label{eq:bayes}
+			P(A\mid B)=\frac {P(B\mid A)\,P(A)}{P(B)}
+		\end{equation}
 
 		\paragraph{$k$-Nearest Neighbors}
 

From 68e636418a3fed026cc9127fe46b514bc2604d9d Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Fri, 25 May 2018 12:46:14 +1000
Subject: [PATCH 4/6] Small stylistic changes

---
 mini_proj/report/waldo.tex | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/mini_proj/report/waldo.tex b/mini_proj/report/waldo.tex
index d101572..6f0f623 100644
--- a/mini_proj/report/waldo.tex
+++ b/mini_proj/report/waldo.tex
@@ -99,7 +99,7 @@
 		architectures, as this method is currently the most used for image
 		classification.
 
-		\textbf{
+		\todo{
 		\\A couple of papers that may be useful (if needed):
 		- LeNet: http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf
 		- AlexNet: http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks
@@ -251,9 +251,6 @@
 
 		\clearpage          % Ensures that the references are on a seperate page
 		\pagebreak
-		% References
-		\section{References}
-		\renewcommand{\refname}{}
 		\bibliographystyle{alpha}
 		\bibliography{references}
 	\end{document}

From 558fcf084b38f130e211e7b44e402dae395a3d27 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Fri, 25 May 2018 13:23:21 +1000
Subject: [PATCH 5/6] Add description for SVM

---
 mini_proj/report/references.bib | 18 +++++++++++++-----
 mini_proj/report/waldo.tex      | 10 +++++++++-
 2 files changed, 22 insertions(+), 6 deletions(-)

diff --git a/mini_proj/report/references.bib b/mini_proj/report/references.bib
index a6ad907..a8b643e 100644
--- a/mini_proj/report/references.bib
+++ b/mini_proj/report/references.bib
@@ -1,8 +1,8 @@
-@misc{openData, 
-  title={Open Database License (ODbL) v1.0}, 
-  url={https://opendatacommons.org/licenses/odbl/1.0/}, 
-  journal={Open Data Commons}, 
-  year={2018}, 
+@misc{openData,
+  title={Open Database License (ODbL) v1.0},
+  url={https://opendatacommons.org/licenses/odbl/1.0/},
+  journal={Open Data Commons},
+  year={2018},
   month={Feb}
 }
 @techreport{knn,
@@ -21,6 +21,14 @@
   year={1995},
   publisher={Springer}
 }
+@inproceedings{svmnonlinear,
+  title={A training algorithm for optimal margin classifiers},
+  author={Boser, Bernhard E and Guyon, Isabelle M and Vapnik, Vladimir N},
+  booktitle={Proceedings of the fifth annual workshop on Computational learning theory},
+  pages={144--152},
+  year={1992},
+  organization={ACM}
+}
 @article{naivebayes,
   title={Idiot's Bayes—not so stupid after all?},
   author={Hand, David J and Yu, Keming},
diff --git a/mini_proj/report/waldo.tex b/mini_proj/report/waldo.tex
index 6f0f623..6ea06d2 100644
--- a/mini_proj/report/waldo.tex
+++ b/mini_proj/report/waldo.tex
@@ -142,7 +142,15 @@
 
 		\paragraph{Support Vector Machine}
 
-		\cite{svm}
+		(SVM) \cite{svm} has been very successful in many classification tasks. The
+		method is based on finding boundaries between the different classes. The
+		boundaries are defined as functions on the features of the instances. The
+		boundaries are optimized to have the most amount of space between the
+		boundaries and the training instances on both sides. Originally the
+		boundaries where linear functions, but more recent development allows for
+		the training of non-linear boundaries~\cite{svmnonlinear}. Once the training
+		has defined the boundaries new instances are classified according to on
+		which side of the boundary they belong.
 
 		\paragraph{Random Forest}
 

From ab59f456e2aee73da6e65e79bd292a3bd6b4d4c3 Mon Sep 17 00:00:00 2001
From: "Jip J. Dekker" <jip@dekker.li>
Date: Fri, 25 May 2018 13:37:07 +1000
Subject: [PATCH 6/6] Add paragraph on random forest

---
 mini_proj/report/waldo.tex | 9 ++++++++-
 1 file changed, 8 insertions(+), 1 deletion(-)

diff --git a/mini_proj/report/waldo.tex b/mini_proj/report/waldo.tex
index 6ea06d2..b5fb94e 100644
--- a/mini_proj/report/waldo.tex
+++ b/mini_proj/report/waldo.tex
@@ -154,7 +154,14 @@
 
 		\paragraph{Random Forest}
 
-		\cite{randomforest}
+		\cite{randomforest} is a method that is based on classifications decision
+		trees. In a decision tree a new instances is classified by going down a
+		(binary) tree. Each non-leaf node contain a selection criteria to its
+		branches. Every leaf node contains the class that will be assigned to the
+		instance if the node is reached. In other training methods, decision trees
+		have the tendency to overfit, but in random forest a multitude of decision
+		tree is trained with a certain degree of randomness and the mean of these
+		trees is used which avoids this problem.
 
 		\subsection{Neural Network Architectures}
 		\todo{Did we only do the three in the end? (Alexnet?)}