From 9b4c4bf9094e0e9e6d82f4868eff7918f2ff5bd7 Mon Sep 17 00:00:00 2001 From: "Jip J. Dekker" Date: Fri, 25 May 2018 13:48:11 +1000 Subject: [PATCH 01/11] Organise references --- mini_proj/report/references.bib | 75 ++++++++++++++++++++++++--------- mini_proj/report/waldo.tex | 12 +++--- 2 files changed, 59 insertions(+), 28 deletions(-) diff --git a/mini_proj/report/references.bib b/mini_proj/report/references.bib index 0bf6a63..795a143 100644 --- a/mini_proj/report/references.bib +++ b/mini_proj/report/references.bib @@ -1,9 +1,11 @@ -@misc{openData, - title={Open Database License (ODbL) v1.0}, - url={https://opendatacommons.org/licenses/odbl/1.0/}, - journal={Open Data Commons}, - year={2018}, - month={Feb} +Classical Machine Learning +@article{MLReview, + title={Supervised machine learning: A review of classification techniques}, + author={Kotsiantis, Sotiris B and Zaharakis, I and Pintelas, P}, + journal={Emerging artificial intelligence applications in computer engineering}, + volume={160}, + pages={3--24}, + year={2007} } @techreport{knn, title={Discriminatory analysis-nonparametric discrimination: consistency properties}, @@ -48,21 +50,52 @@ pages={18--22}, year={2002} } -@article{Kotsiantis2007, -abstract = {Supervised machine learning is the search for algorithms that reason from externally supplied instances to produce general hypotheses, which then make predictions about future instances. In other words, the goal of supervised learning is to build a concise model of the distribution of class labels in terms of predictor features. The resulting classifier is then used to assign class labels to the testing instances where the values of the predictor features are known, but the value of the class label is unknown. This paper describes various supervised machine learning classification techniques. Of course, a single article cannot be a complete review of all supervised machine learning classification algorithms (also known induction classification algorithms), yet we hope that the references cited will cover the major theoretical issues, guiding the researcher in interesting research directions and suggesting possible bias combinations that have yet to be explored.}, -author = {Kotsiantis, Sotiris B.}, -doi = {10.1115/1.1559160}, -file = {:home/kelvin/.local/share/data/Mendeley Ltd./Mendeley Desktop/Downloaded/Kotsiantis - 2007 - Supervised machine learning A review of classification techniques.pdf:pdf}, -isbn = {1586037803}, -issn = {09226389}, -journal = {Informatica}, -keywords = {algorithms analysis classifiers computational conn,classifiers,data mining techniques,intelligent data analysis,learning algorithms}, -mendeley-groups = {CS Proj/ML,CS Proj,Thesis,Thesis/ML}, -pages = {249--268}, -title = {{Supervised machine learning: A review of classification techniques}}, -url = {http://books.google.com/books?hl=en{\&}lr={\&}id=vLiTXDHr{\_}sYC{\&}oi=fnd{\&}pg=PA3{\&}dq=survey+machine+learning{\&}ots=CVsyuwYHjo{\&}sig=A6wYWvywU8XTc7Dzp8ZdKJaW7rc{\%}5Cnpapers://5e3e5e59-48a2-47c1-b6b1-a778137d3ec1/Paper/p800{\%}5Cnhttp://www.informatica.si/PDF/31-3/11{\_}Kotsiantis - S}, -volume = {31}, -year = {2007} + +Neural Networks +@article{lenet, + title={Gradient-based learning applied to document recognition}, + author={LeCun, Yann and Bottou, L{\'e}on and Bengio, Yoshua and Haffner, Patrick}, + journal={Proceedings of the IEEE}, + volume={86}, + number={11}, + pages={2278--2324}, + year={1998}, + publisher={IEEE} +} +@inproceedings{alexnet, + title={Imagenet classification with deep convolutional neural networks}, + author={Krizhevsky, Alex and Sutskever, Ilya and Hinton, Geoffrey E}, + booktitle={Advances in neural information processing systems}, + pages={1097--1105}, + year={2012} +} +@inproceedings{lenetVSalexnet, + title={On the Performance of GoogLeNet and AlexNet Applied to Sketches.}, + author={Ballester, Pedro and de Ara{\'u}jo, Ricardo Matsumura}, + booktitle={AAAI}, + pages={1124--1128}, + year={2016} +} +@article{deepNN, + title = "A survey of deep neural network architectures and their applications", + journal = "Neurocomputing", + volume = "234", + pages = "11 - 26", + year = "2017", + issn = "0925-2312", + doi = "https://doi.org/10.1016/j.neucom.2016.12.038", + url = "http://www.sciencedirect.com/science/article/pii/S0925231216315533", + author = "Weibo Liu and Zidong Wang and Xiaohui Liu and Nianyin Zeng and Yurong Liu and Fuad E. Alsaadi", + keywords = "Autoencoder, Convolutional neural network, Deep learning, Deep belief network, Restricted Boltzmann machine" +} + +MISC +@misc{openData, + title={Open Database License (ODbL) v1.0}, + url={https://opendatacommons.org/licenses/odbl/1.0/}, + journal={Open Data Commons}, + year={2018}, + month={Feb} } @incollection{NIPS2012_4824, title = {ImageNet Classification with Deep Convolutional Neural Networks}, diff --git a/mini_proj/report/waldo.tex b/mini_proj/report/waldo.tex index 056385c..c0d960a 100644 --- a/mini_proj/report/waldo.tex +++ b/mini_proj/report/waldo.tex @@ -101,12 +101,10 @@ \todo{ \\A couple of papers that may be useful (if needed): - - LeNet: http://yann.lecun.com/exdb/publis/pdf/lecun-01a.pdf - - AlexNet: http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks - - General comparison of LeNet and AlexNet: - "On the Performance of GoogLeNet and AlexNet Applied to Sketches", Pedro Ballester and Ricardo Matsumura Araujo - - Deep NN Architecture: - https://www-sciencedirect-com.ezproxy.lib.monash.edu.au/science/article/pii/S0925231216315533 + - LeNet: \cite{lenet} + - AlexNet: \cite{alexnet} + - General comparison of LeNet and AlexNet:\cite{lenetVSalexnet} + - Deep NN Architecture:\cite{deepNN} } \subsection{Classical Machine Learning Methods} @@ -114,7 +112,7 @@ The following paragraphs will give only brief descriptions of the different classical machine learning methods used in this reports. For further reading we recommend reading ``Supervised machine learning: A review of - classification techniques'' \cite{Kotsiantis2007}. + classification techniques'' \cite{MLReview}. \paragraph{Naive Bayes Classifier} From 2a00c812fb0f94a08d2a1d5a5b3770755c4f94f7 Mon Sep 17 00:00:00 2001 From: "Jip J. Dekker" Date: Fri, 25 May 2018 14:37:32 +1000 Subject: [PATCH 02/11] Add some steps --- mini_proj/test_nn.py | 24 +++++++++++++++++++++--- 1 file changed, 21 insertions(+), 3 deletions(-) diff --git a/mini_proj/test_nn.py b/mini_proj/test_nn.py index 9acdc4e..f33f837 100644 --- a/mini_proj/test_nn.py +++ b/mini_proj/test_nn.py @@ -3,6 +3,7 @@ from keras.models import Model, load_model from keras.utils import to_categorical import cv2 from skimage import color, exposure +from _cutter import image_cut def man_result_check(): pred_y = np.load("predicted_results.npy") @@ -14,7 +15,7 @@ def man_result_check(): z = 0 for i in range(0, len(test_y)): print(pred_y[i], test_y[i], file=f) - # Calculates correct predictions + # Calculates correct predictions if pred_y[i][0] == test_y[i][0]: z+=1 @@ -25,7 +26,7 @@ def man_result_check(): Purpose:Loads a trained neural network model (using Keras) to classify an image Input: path/to/trained_model image [or] path/to/image [if from_file=True] -Returns:Boolean variable +Returns:Boolean variable ''' def is_Wally(trained_model_path, image, from_file=False): if from_file: @@ -50,5 +51,22 @@ def is_Wally(trained_model_path, image, from_file=False): # Mark Wally image somehow (colour the border) # Stitch original image back together +if __name__ == '__main__': + # Read image + image = cv2.imread("10.jpg") + # Split image + cuts = image_cut(image, 64, 64) + for i in len(cuts): + # Transform block + hsv = color.rgb2hsv(cuts[i]) + hsv[:, :, 2] = exposure.equalize_hist(hsv[:, :, 2]) + block = color.hsv2rgb(hsv) + block = np.rollaxis(block, -1) + if is_Wally("Waldo.h5", block): + # Border block + cuts[i] = cv2.copyMakeBorder(cuts[i],5,5,5,5,cv2.BORDER_CONSTANT,value=RED) -is_Wally("Waldo.h5", image) \ No newline at end of file + # Stitch image TODO! + # Show image + cv.imshow('Image', image) + cv.waitKey(0) From 0955aee4cbc6a33391d0e30928cdcb89a01ca015 Mon Sep 17 00:00:00 2001 From: "Jip J. Dekker" Date: Fri, 25 May 2018 14:41:48 +1000 Subject: [PATCH 03/11] Write image instead of show --- mini_proj/test_nn.py | 3 +-- 1 file changed, 1 insertion(+), 2 deletions(-) diff --git a/mini_proj/test_nn.py b/mini_proj/test_nn.py index f33f837..0d8ec2e 100644 --- a/mini_proj/test_nn.py +++ b/mini_proj/test_nn.py @@ -68,5 +68,4 @@ if __name__ == '__main__': # Stitch image TODO! # Show image - cv.imshow('Image', image) - cv.waitKey(0) + cv2.imwrite('output.png',image) From 2dbb5f9e76e6002d09bd136836808b647ec69df9 Mon Sep 17 00:00:00 2001 From: "Jip J. Dekker" Date: Fri, 25 May 2018 14:46:14 +1000 Subject: [PATCH 04/11] Add numpy citation --- mini_proj/report/references.bib | 29 ++++++++++------- mini_proj/report/waldo.tex | 55 ++++++++++++++++----------------- 2 files changed, 45 insertions(+), 39 deletions(-) diff --git a/mini_proj/report/references.bib b/mini_proj/report/references.bib index 795a143..05c27c3 100644 --- a/mini_proj/report/references.bib +++ b/mini_proj/report/references.bib @@ -107,15 +107,22 @@ year = {2012}, publisher = {Curran Associates, Inc.}, url = {http://papers.nips.cc/paper/4824-imagenet-classification-with-deep-convolutional-neural-networks.pdf} } -@ARTICLE{726791, -author={Y. Lecun and L. Bottou and Y. Bengio and P. Haffner}, -journal={Proceedings of the IEEE}, -title={Gradient-based learning applied to document recognition}, -year={1998}, -volume={86}, -number={11}, -pages={2278-2324}, -keywords={backpropagation;convolution;multilayer perceptrons;optical character recognition;2D shape variability;GTN;back-propagation;cheque reading;complex decision surface synthesis;convolutional neural network character recognizers;document recognition;document recognition systems;field extraction;gradient based learning technique;gradient-based learning;graph transformer networks;handwritten character recognition;handwritten digit recognition task;high-dimensional patterns;language modeling;multilayer neural networks;multimodule systems;performance measure minimization;segmentation recognition;Character recognition;Feature extraction;Hidden Markov models;Machine learning;Multi-layer neural network;Neural networks;Optical character recognition software;Optical computing;Pattern recognition;Principal component analysis}, -doi={10.1109/5.726791}, -ISSN={0018-9219}, +@ARTICLE{726791, +author={Y. Lecun and L. Bottou and Y. Bengio and P. Haffner}, +journal={Proceedings of the IEEE}, +title={Gradient-based learning applied to document recognition}, +year={1998}, +volume={86}, +number={11}, +pages={2278-2324}, +keywords={backpropagation;convolution;multilayer perceptrons;optical character recognition;2D shape variability;GTN;back-propagation;cheque reading;complex decision surface synthesis;convolutional neural network character recognizers;document recognition;document recognition systems;field extraction;gradient based learning technique;gradient-based learning;graph transformer networks;handwritten character recognition;handwritten digit recognition task;high-dimensional patterns;language modeling;multilayer neural networks;multimodule systems;performance measure minimization;segmentation recognition;Character recognition;Feature extraction;Hidden Markov models;Machine learning;Multi-layer neural network;Neural networks;Optical character recognition software;Optical computing;Pattern recognition;Principal component analysis}, +doi={10.1109/5.726791}, +ISSN={0018-9219}, month={Nov},} +@book{numpy, + title={A guide to NumPy}, + author={Oliphant, Travis E}, + volume={1}, + year={2006}, + publisher={Trelgol Publishing USA} +} diff --git a/mini_proj/report/waldo.tex b/mini_proj/report/waldo.tex index c0d960a..502f94d 100644 --- a/mini_proj/report/waldo.tex +++ b/mini_proj/report/waldo.tex @@ -162,59 +162,58 @@ trees is used which avoids this problem. \subsection{Neural Network Architectures} - \tab There are many well established architectures for Neural Networks depending on the task being performed. - In this paper, the focus is placed on convolution neural networks, which have been proven to effectively classify images \cite{NIPS2012_4824}. - One of the pioneering works in the field, the LeNet \cite{726791}architecture, will be implemented to compare against two rudimentary networks with more depth. - These networks have been constructed to improve on the LeNet architecture by extracting more features, condensing image information, and allowing for more parameters in the network. - The difference between the two network use of convolutional and dense layers. - The convolutional neural network contains dense layers in the final stages of the network. - The Fully Convolutional Network (FCN) contains only one dense layer for the final binary classification step. + \tab There are many well established architectures for Neural Networks depending on the task being performed. + In this paper, the focus is placed on convolution neural networks, which have been proven to effectively classify images \cite{NIPS2012_4824}. + One of the pioneering works in the field, the LeNet \cite{726791}architecture, will be implemented to compare against two rudimentary networks with more depth. + These networks have been constructed to improve on the LeNet architecture by extracting more features, condensing image information, and allowing for more parameters in the network. + The difference between the two network use of convolutional and dense layers. + The convolutional neural network contains dense layers in the final stages of the network. + The Fully Convolutional Network (FCN) contains only one dense layer for the final binary classification step. The FCN instead consists of an extra convolutional layer, resulting in an increased ability for the network to abstract the input data relative to the other two configurations. \\ \textbf{Insert image of LeNet from slides} \section{Method} \label{sec:method} \tab - In order to effectively utilize the aforementioned modelling and classification techniques, a key consideration is the data they are acting on. - A dataset containing Waldo and non-Waldo images was obtained from an Open Database\footnote{``The Open Database License (ODbL) is a license agreement intended to allow users to freely share, modify, and use [a] Database while maintaining [the] same freedom for others"\cite{openData}}hosted on the predictive modelling and analytics competition framework, Kaggle. - The distinction between images containing Waldo, and those that do not, was providied by the separation of the images in different sub-directories. + In order to effectively utilize the aforementioned modelling and classification techniques, a key consideration is the data they are acting on. + A dataset containing Waldo and non-Waldo images was obtained from an Open Database\footnote{``The Open Database License (ODbL) is a license agreement intended to allow users to freely share, modify, and use [a] Database while maintaining [the] same freedom for others"\cite{openData}}hosted on the predictive modelling and analytics competition framework, Kaggle. + The distinction between images containing Waldo, and those that do not, was providied by the separation of the images in different sub-directories. It was therefore necessary to preprocess these images before they could be utilised by the proposed machine learning algorithms. - + \subsection{Image Processing} \label{imageProcessing} \tab - The Waldo image database consists of images of size 64$\times$64, 128$\times$128, and 256$\times$256 pixels obtained by dividing complete Where's Waldo? puzzles. - Within each set of images, those containing Waldo are located in a folder called `waldo', and those not containing Waldo, in a folder called `not\_waldo'. - Since Where's Waldo? puzzles are usually densely populated and contain fine details, the 64$\times$64 pixel set of images were selected to train and evaluate the machine learning models. + The Waldo image database consists of images of size 64$\times$64, 128$\times$128, and 256$\times$256 pixels obtained by dividing complete Where's Waldo? puzzles. + Within each set of images, those containing Waldo are located in a folder called `waldo', and those not containing Waldo, in a folder called `not\_waldo'. + Since Where's Waldo? puzzles are usually densely populated and contain fine details, the 64$\times$64 pixel set of images were selected to train and evaluate the machine learning models. These images provide the added benefit of containing the most individual images of the three size groups. \\ \par - Each of the 64$\times$64 pixel images were inserted into a Numpy - \footnote{Numpy is a popular Python programming library for scientific computing} - array of images, and a binary value was inserted into a seperate list at the same index. - These binary values form the labels for each image (waldo or not waldo). + Each of the 64$\times$64 pixel images were inserted into a Numpy~\cite{numpy} + array of images, and a binary value was inserted into a seperate list at the same index. + These binary values form the labels for each image (waldo or not waldo). Colour normalisation was performed on each so that artefacts in an image's colour profile correspond to meaningful features of the image (rather than photographic method). \\ \par - Each original puzzle is broken down into many images, and only contains one Waldo. Although Waldo might span multiple 64$\times$64 pixel squares, this means that the non-Waldo data far outnumbers the Waldo data. - To combat the bias introduced by the skewed data, all Waldo images were artificially augmented by performing random rotations, reflections, and introducing random noise in the image to produce news images. - In this way, each original Waldo image was used to produce an additional 10 variations of the image, inserted into the image array. + Each original puzzle is broken down into many images, and only contains one Waldo. Although Waldo might span multiple 64$\times$64 pixel squares, this means that the non-Waldo data far outnumbers the Waldo data. + To combat the bias introduced by the skewed data, all Waldo images were artificially augmented by performing random rotations, reflections, and introducing random noise in the image to produce news images. + In this way, each original Waldo image was used to produce an additional 10 variations of the image, inserted into the image array. This provided more variation in the true positives of the data set and assists in the development of more robust methods by exposing each technique to variations of the image during the training phase. \\ \par - Despite the additional data, there were still over ten times as many non-Waldo images than Waldo images. + Despite the additional data, there were still over ten times as many non-Waldo images than Waldo images. Therefore, it was necessary to cull the no-Waldo data, so that there was an even split of Waldo and non-Waldo images, improving the representation of true positives in the image data set. Following preprocessing, the images (and associated labels) were divided into a training and a test set with a 3:1 split. \\ \subsection{Neural Network Training}\label{nnTraining} \tab The neural networks used to classify the images were supervised learning models; requiring training on a dataset of typical images. - Each network was trained using the preprocessed training dataset and labels, for 25 epochs (one forward and backward pass of all data) in batches of 150. - The number of epochs was chosen to maximise training time and prevent overfitting\footnote{Overfitting occurs when a model learns from the data too specifically, and loses its ability to generalise its predictions for new data (resulting in loss of prediction accuracy)} of the training data, given current model parameters. - The batch size is the number of images sent through each pass of the network. Using the entire dataset would train the network quickly, but decrease the network's ability to learn unique features from the data. - Passing one image at a time may allow the model to learn more about each image, however it would also increase the training time and risk of overfitting the data. + Each network was trained using the preprocessed training dataset and labels, for 25 epochs (one forward and backward pass of all data) in batches of 150. + The number of epochs was chosen to maximise training time and prevent overfitting\footnote{Overfitting occurs when a model learns from the data too specifically, and loses its ability to generalise its predictions for new data (resulting in loss of prediction accuracy)} of the training data, given current model parameters. + The batch size is the number of images sent through each pass of the network. Using the entire dataset would train the network quickly, but decrease the network's ability to learn unique features from the data. + Passing one image at a time may allow the model to learn more about each image, however it would also increase the training time and risk of overfitting the data. Therefore the batch size was chosen to maintain training accuracy while minimising training time. \subsection{Neural Network Testing}\label{nnTesting} - \tab After training each network, a separate test set of images (and labels) was used to evaluate the models. - The result of this testing was expressed primarily in the form of an accuracy (percentage). + \tab After training each network, a separate test set of images (and labels) was used to evaluate the models. + The result of this testing was expressed primarily in the form of an accuracy (percentage). These results as well as the other methods presented in this paper are given in Figure \textbf{[insert ref to results here]} of the Results section. \textbf{***********} % Kelvin Start From 259c4b92437341e70489185811d5f199b68d7e74 Mon Sep 17 00:00:00 2001 From: Kelvin Davis <273degreeskelvin@gmail.com> Date: Fri, 25 May 2018 14:51:59 +1000 Subject: [PATCH 05/11] changed cutter --- mini_proj/_cutter.py | 6 +++--- mini_proj/report/waldo.tex | 2 +- 2 files changed, 4 insertions(+), 4 deletions(-) diff --git a/mini_proj/_cutter.py b/mini_proj/_cutter.py index f6414e0..4ff5c7c 100644 --- a/mini_proj/_cutter.py +++ b/mini_proj/_cutter.py @@ -8,19 +8,19 @@ def image_cut(image, size0, size1): dims = image.shape assert dims[0] >= size0 assert dims[1] >= size1 - return np.array([image[size0 * i:size0 * (i+1), size1 * j:size1 * (j+1)] \ + return np.array([image[size0 * i:size0 * (i+1), size1 * j:size1 * (j+1), :] \ for i in range(dims[0] // size0) for j in range(dims[1] // size1)] + \ [image[size0 * i:size0 * (i+1), dims[1]-size1:] \ for i in range(dims[0] // size0) if dims[1] % size1 != 0] + \ [image[dims[0]-size0:, size1 * j:size1 * (j+1)] \ - for j in range(dims[1] // size1) if dims[0] % size0 != \ + for j in range(dims[1] // size1) if dims[0] % size0 != 0] \ ) if __name__ == '__main__': # test = np.random.rand(5,4,3) test = np.array([[ - k + 4*j for k in range(4) + [k + 4*j, k + 4*j] for k in range(4) ] for j in range(5)]) print(test) print(image_cut(test, 2, 2)) diff --git a/mini_proj/report/waldo.tex b/mini_proj/report/waldo.tex index d101572..d23c873 100644 --- a/mini_proj/report/waldo.tex +++ b/mini_proj/report/waldo.tex @@ -59,7 +59,7 @@ as) Waldo, but are not actually Waldo. \begin{figure}[ht] - \includegraphics[scale=0.35]{waldo} + \includegraphics[scale=0.35]{waldo.png} \centering \caption{ A headshot of the character ``Waldo'', or ``Wally''. Pictures of Waldo From bf5b88c46d3229286211bd779d6fade617c31afc Mon Sep 17 00:00:00 2001 From: "Jip J. Dekker" Date: Fri, 25 May 2018 14:53:03 +1000 Subject: [PATCH 06/11] A lot of cleanup + spell check --- mini_proj/report/references.bib | 11 +++ mini_proj/report/waldo.tex | 163 ++++++++++++++++++++------------ 2 files changed, 111 insertions(+), 63 deletions(-) diff --git a/mini_proj/report/references.bib b/mini_proj/report/references.bib index 05c27c3..1a35b55 100644 --- a/mini_proj/report/references.bib +++ b/mini_proj/report/references.bib @@ -126,3 +126,14 @@ month={Nov},} year={2006}, publisher={Trelgol Publishing USA} } +@article{scikit-learn, + title={Scikit-learn: Machine Learning in {P}ython}, + author={Pedregosa, F. and Varoquaux, G. and Gramfort, A. and Michel, V. + and Thirion, B. and Grisel, O. and Blondel, M. and Prettenhofer, P. + and Weiss, R. and Dubourg, V. and Vanderplas, J. and Passos, A. and + Cournapeau, D. and Brucher, M. and Perrot, M. and Duchesnay, E.}, + journal={Journal of Machine Learning Research}, + volume={12}, + pages={2825--2830}, + year={2011} +} diff --git a/mini_proj/report/waldo.tex b/mini_proj/report/waldo.tex index 502f94d..856f002 100644 --- a/mini_proj/report/waldo.tex +++ b/mini_proj/report/waldo.tex @@ -28,7 +28,7 @@ \maketitle \begin{abstract} -% + The famous brand of picture puzzles ``Where's Waldo?'' relates well to many unsolved image classification problem. This offers us the opportunity to test different image classification methods on a data set that is both small @@ -40,7 +40,7 @@ \todo{I don't like this big summation but I think it is the important information} Our comparison shows that \todo{...} -% + \end{abstract} \section{Introduction} @@ -98,15 +98,7 @@ their basic versions. In contrast, we will use different neural network architectures, as this method is currently the most used for image classification. - - \todo{ - \\A couple of papers that may be useful (if needed): - - LeNet: \cite{lenet} - - AlexNet: \cite{alexnet} - - General comparison of LeNet and AlexNet:\cite{lenetVSalexnet} - - Deep NN Architecture:\cite{deepNN} - } - + \subsection{Classical Machine Learning Methods} The following paragraphs will give only brief descriptions of the different @@ -162,67 +154,112 @@ trees is used which avoids this problem. \subsection{Neural Network Architectures} - \tab There are many well established architectures for Neural Networks depending on the task being performed. - In this paper, the focus is placed on convolution neural networks, which have been proven to effectively classify images \cite{NIPS2012_4824}. - One of the pioneering works in the field, the LeNet \cite{726791}architecture, will be implemented to compare against two rudimentary networks with more depth. - These networks have been constructed to improve on the LeNet architecture by extracting more features, condensing image information, and allowing for more parameters in the network. - The difference between the two network use of convolutional and dense layers. - The convolutional neural network contains dense layers in the final stages of the network. - The Fully Convolutional Network (FCN) contains only one dense layer for the final binary classification step. - The FCN instead consists of an extra convolutional layer, resulting in an increased ability for the network to abstract the input data relative to the other two configurations. - \\ - \textbf{Insert image of LeNet from slides} + + There are many well established architectures for Neural Networks depending + on the task being performed. In this paper, the focus is placed on + convolution neural networks, which have been proven to effectively classify + images \cite{NIPS2012_4824}. One of the pioneering works in the field, the + LeNet architecture~\cite{726791}, will be implemented to compare against two + rudimentary networks with more depth. These networks have been constructed + to improve on the LeNet architecture by extracting more features, condensing + image information, and allowing for more parameters in the network. The + difference between the two network use of convolutional and dense layers. + The convolutional neural network contains dense layers in the final stages + of the network. The Fully Convolutional Network (FCN) contains only one + dense layer for the final binary classification step. The FCN instead + consists of an extra convolutional layer, resulting in an increased ability + for the network to abstract the input data relative to the other two + configurations. + + \todo{Insert image of LeNet from slides} \section{Method} \label{sec:method} - \tab - In order to effectively utilize the aforementioned modelling and classification techniques, a key consideration is the data they are acting on. - A dataset containing Waldo and non-Waldo images was obtained from an Open Database\footnote{``The Open Database License (ODbL) is a license agreement intended to allow users to freely share, modify, and use [a] Database while maintaining [the] same freedom for others"\cite{openData}}hosted on the predictive modelling and analytics competition framework, Kaggle. - The distinction between images containing Waldo, and those that do not, was providied by the separation of the images in different sub-directories. - It was therefore necessary to preprocess these images before they could be utilised by the proposed machine learning algorithms. + + In order to effectively utilize the aforementioned modeling and + classification techniques, a key consideration is the data they are acting + on. A dataset containing Waldo and non-Waldo images was obtained from an + Open Database\footnote{``The Open Database License (ODbL) is a license + agreement intended to allow users to freely share, modify, and use [a] + Database while maintaining [the] same freedom for + others"\cite{openData}}hosted on the predictive modeling and analytics + competition framework, Kaggle. The distinction between images containing + Waldo, and those that do not, was provided by the separation of the images + in different sub-directories. It was therefore necessary to preprocess these + images before they could be utilized by the proposed machine learning + algorithms. \subsection{Image Processing} \label{imageProcessing} - \tab - The Waldo image database consists of images of size 64$\times$64, 128$\times$128, and 256$\times$256 pixels obtained by dividing complete Where's Waldo? puzzles. - Within each set of images, those containing Waldo are located in a folder called `waldo', and those not containing Waldo, in a folder called `not\_waldo'. - Since Where's Waldo? puzzles are usually densely populated and contain fine details, the 64$\times$64 pixel set of images were selected to train and evaluate the machine learning models. - These images provide the added benefit of containing the most individual images of the three size groups. - \\ - \par - Each of the 64$\times$64 pixel images were inserted into a Numpy~\cite{numpy} - array of images, and a binary value was inserted into a seperate list at the same index. - These binary values form the labels for each image (waldo or not waldo). - Colour normalisation was performed on each so that artefacts in an image's colour profile correspond to meaningful features of the image (rather than photographic method). - \\ - \par - Each original puzzle is broken down into many images, and only contains one Waldo. Although Waldo might span multiple 64$\times$64 pixel squares, this means that the non-Waldo data far outnumbers the Waldo data. - To combat the bias introduced by the skewed data, all Waldo images were artificially augmented by performing random rotations, reflections, and introducing random noise in the image to produce news images. - In this way, each original Waldo image was used to produce an additional 10 variations of the image, inserted into the image array. - This provided more variation in the true positives of the data set and assists in the development of more robust methods by exposing each technique to variations of the image during the training phase. - \\ - \par - Despite the additional data, there were still over ten times as many non-Waldo images than Waldo images. - Therefore, it was necessary to cull the no-Waldo data, so that there was an even split of Waldo and non-Waldo images, improving the representation of true positives in the image data set. Following preprocessing, the images (and associated labels) were divided into a training and a test set with a 3:1 split. - \\ + + The Waldo image database consists of images of size 64$\times$64, + 128$\times$128, and 256$\times$256 pixels obtained by dividing complete + Where's Waldo? puzzles. Within each set of images, those containing Waldo + are located in a folder called `waldo', and those not containing Waldo, in a + folder called `not\_waldo'. Since Where's Waldo? puzzles are usually densely + populated and contain fine details, the 64$\times$64 pixel set of images + were selected to train and evaluate the machine learning models. These + images provide the added benefit of containing the most individual images of + the three size groups. \\ + + Each of the 64$\times$64 pixel images were inserted into a + Numpy~\cite{numpy} array of images, and a binary value was inserted into a + seperate list at the same index. These binary values form the labels for + each image (waldo or not waldo). Colour normalisation was performed on each + so that artefacts in an image's colour profile correspond to meaningful + features of the image (rather than photographic method).\\ + + + Each original puzzle is broken down into many images, and only contains one + Waldo. Although Waldo might span multiple 64$\times$64 pixel squares, this + means that the non-Waldo data far outnumbers the Waldo data. To combat the + bias introduced by the skewed data, all Waldo images were artificially + augmented by performing random rotations, reflections, and introducing + random noise in the image to produce news images. In this way, each original + Waldo image was used to produce an additional 10 variations of the image, + inserted into the image array. This provided more variation in the true + positives of the data set and assists in the development of more robust + methods by exposing each technique to variations of the image during the + training phase. \\ + + Despite the additional data, there were still over ten times as many + non-Waldo images than Waldo images. Therefore, it was necessary to cull the + no-Waldo data, so that there was an even split of Waldo and non-Waldo + images, improving the representation of true positives in the image data + set. Following preprocessing, the images (and associated labels) were + divided into a training and a test set with a 3:1 split. \\ + \subsection{Neural Network Training}\label{nnTraining} - \tab The neural networks used to classify the images were supervised learning models; requiring training on a dataset of typical images. - Each network was trained using the preprocessed training dataset and labels, for 25 epochs (one forward and backward pass of all data) in batches of 150. - The number of epochs was chosen to maximise training time and prevent overfitting\footnote{Overfitting occurs when a model learns from the data too specifically, and loses its ability to generalise its predictions for new data (resulting in loss of prediction accuracy)} of the training data, given current model parameters. - The batch size is the number of images sent through each pass of the network. Using the entire dataset would train the network quickly, but decrease the network's ability to learn unique features from the data. - Passing one image at a time may allow the model to learn more about each image, however it would also increase the training time and risk of overfitting the data. - Therefore the batch size was chosen to maintain training accuracy while minimising training time. + + The neural networks used to classify the images were supervised learning + models; requiring training on a dataset of typical images. Each network was + trained using the preprocessed training dataset and labels, for 25 epochs + (one forward and backward pass of all data) in batches of 150. The number of + epochs was chosen to maximise training time and prevent + overfitting\footnote{Overfitting occurs when a model learns from the data + too specifically, and loses its ability to generalise its predictions for + new data (resulting in loss of prediction accuracy)} of the training data, + given current model parameters. The batch size is the number of images sent + through each pass of the network. Using the entire dataset would train the + network quickly, but decrease the network's ability to learn unique features + from the data. Passing one image at a time may allow the model to learn more + about each image, however it would also increase the training time and risk + of overfitting the data. Therefore the batch size was chosen to maintain + training accuracy while minimising training time. \subsection{Neural Network Testing}\label{nnTesting} - \tab After training each network, a separate test set of images (and labels) was used to evaluate the models. - The result of this testing was expressed primarily in the form of an accuracy (percentage). - These results as well as the other methods presented in this paper are given in Figure \textbf{[insert ref to results here]} of the Results section. - \textbf{***********} - % Kelvin Start + + After training each network, a separate test set of images (and labels) was + used to evaluate the models. The result of this testing was expressed + primarily in the form of an accuracy (percentage). These results as well as + the other methods presented in this paper are given in Figure + \todo{insert ref to results here} of the Results section. + \todo{***********} + \subsection{Benchmarking}\label{benchmarking} In order to benchmark the Neural Networks, the performance of these - algorithms are evaluated against other Machine Learning algorithms. We - use Support Vector Machines, K-Nearest Neighbours (\(K=5\)), Gaussian - Naive Bayes and Random Forest classifiers, as provided in Scikit-Learn. + algorithms are evaluated against other Machine Learning algorithms. We use + Support Vector Machines, K-Nearest Neighbors (\(K=5\)), Naive Bayes and + Random Forest classifiers, as provided in Scikit-Learn~\cite{scikit-learn}. \subsection{Performance Metrics}\label{performance-metrics} @@ -262,7 +299,7 @@ are actually Waldo. \emph{Recall} returns the percentage of Waldos that were actually predicted as Waldo. In the case of a classifier that classifies all things as Waldo, the recall would be 0. \emph{F1-Measure} - returns a combination of precision and recall that heavily penalises + returns a combination of precision and recall that heavily penalizes classifiers that perform poorly in either precision or recall. % Kelvin End From f93594007dffd12738446ef29c3c0af99c3e8353 Mon Sep 17 00:00:00 2001 From: "Jip J. Dekker" Date: Fri, 25 May 2018 14:55:13 +1000 Subject: [PATCH 07/11] A little more cleanup --- mini_proj/report/waldo.tex | 31 ++++++++++++++----------------- 1 file changed, 14 insertions(+), 17 deletions(-) diff --git a/mini_proj/report/waldo.tex b/mini_proj/report/waldo.tex index 28d2689..4c1881c 100644 --- a/mini_proj/report/waldo.tex +++ b/mini_proj/report/waldo.tex @@ -98,7 +98,7 @@ their basic versions. In contrast, we will use different neural network architectures, as this method is currently the most used for image classification. - + \subsection{Classical Machine Learning Methods} The following paragraphs will give only brief descriptions of the different @@ -286,22 +286,19 @@ \end{itemize} \emph{Accuracy} is a common performance metric used in Machine Learning, - however in classification problems where the training data is heavily - biased toward one category, sometimes a model will learn to optimize its - accuracy by classifying all instances as one category. I.e. the - classifier will classify all images that do not contain Waldo as not - containing Waldo, but will also classify all images containing Waldo as - not containing Waldo. Thus we use, other metrics to measure performance - as well. - \\ - \par - \emph{Precision} returns the percentage of classifications of Waldo that - are actually Waldo. \emph{Recall} returns the percentage of Waldos that - were actually predicted as Waldo. In the case of a classifier that - classifies all things as Waldo, the recall would be 0. \emph{F1-Measure} - returns a combination of precision and recall that heavily penalizes - classifiers that perform poorly in either precision or recall. - % Kelvin End + however in classification problems where the training data is heavily biased + toward one category, sometimes a model will learn to optimize its accuracy + by classifying all instances as one category. I.e. the classifier will + classify all images that do not contain Waldo as not containing Waldo, but + will also classify all images containing Waldo as not containing Waldo. Thus + we use, other metrics to measure performance as well. \\ + + \emph{Precision} returns the percentage of classifications of Waldo that are + actually Waldo. \emph{Recall} returns the percentage of Waldos that were + actually predicted as Waldo. In the case of a classifier that classifies all + things as Waldo, the recall would be 0. \emph{F1-Measure} returns a + combination of precision and recall that heavily penalizes classifiers that + perform poorly in either precision or recall. \section{Results} \label{sec:results} From 08813f9b9fc355fa647a63c16151d177b8679e39 Mon Sep 17 00:00:00 2001 From: "Jip J. Dekker" Date: Fri, 25 May 2018 14:57:37 +1000 Subject: [PATCH 08/11] Move overfitting footnote to it's first occurrence --- mini_proj/report/waldo.tex | 27 +++++++++++++-------------- 1 file changed, 13 insertions(+), 14 deletions(-) diff --git a/mini_proj/report/waldo.tex b/mini_proj/report/waldo.tex index 4c1881c..36e76bb 100644 --- a/mini_proj/report/waldo.tex +++ b/mini_proj/report/waldo.tex @@ -149,9 +149,11 @@ (binary) tree. Each non-leaf node contain a selection criteria to its branches. Every leaf node contains the class that will be assigned to the instance if the node is reached. In other training methods, decision trees - have the tendency to overfit, but in random forest a multitude of decision - tree is trained with a certain degree of randomness and the mean of these - trees is used which avoids this problem. + have the tendency to overfit\footnote{Overfitting occurs when a model learns + from the data too specifically, and loses its ability to generalise its + predictions for new data (resulting in loss of prediction accuracy)}, but in + random forest a multitude of decision tree is trained with a certain degree + of randomness and the mean of these trees is used which avoids this problem. \subsection{Neural Network Architectures} @@ -233,17 +235,14 @@ models; requiring training on a dataset of typical images. Each network was trained using the preprocessed training dataset and labels, for 25 epochs (one forward and backward pass of all data) in batches of 150. The number of - epochs was chosen to maximise training time and prevent - overfitting\footnote{Overfitting occurs when a model learns from the data - too specifically, and loses its ability to generalise its predictions for - new data (resulting in loss of prediction accuracy)} of the training data, - given current model parameters. The batch size is the number of images sent - through each pass of the network. Using the entire dataset would train the - network quickly, but decrease the network's ability to learn unique features - from the data. Passing one image at a time may allow the model to learn more - about each image, however it would also increase the training time and risk - of overfitting the data. Therefore the batch size was chosen to maintain - training accuracy while minimising training time. + epochs was chosen to maximise training time and prevent overfitting of the + training data, given current model parameters. The batch size is the number + of images sent through each pass of the network. Using the entire dataset + would train the network quickly, but decrease the network's ability to learn + unique features from the data. Passing one image at a time may allow the + model to learn more about each image, however it would also increase the + training time and risk of overfitting the data. Therefore the batch size was + chosen to maintain training accuracy while minimising training time. \subsection{Neural Network Testing}\label{nnTesting} From 7ccbb169cadd3c291d76d6435df4d6052a472d4d Mon Sep 17 00:00:00 2001 From: "Jip J. Dekker" Date: Fri, 25 May 2018 15:09:41 +1000 Subject: [PATCH 09/11] Making the text more constant --- mini_proj/report/waldo.tex | 41 +++++++++++++++++++------------------- 1 file changed, 20 insertions(+), 21 deletions(-) diff --git a/mini_proj/report/waldo.tex b/mini_proj/report/waldo.tex index 36e76bb..ee035da 100644 --- a/mini_proj/report/waldo.tex +++ b/mini_proj/report/waldo.tex @@ -194,21 +194,20 @@ The Waldo image database consists of images of size 64$\times$64, 128$\times$128, and 256$\times$256 pixels obtained by dividing complete - Where's Waldo? puzzles. Within each set of images, those containing Waldo - are located in a folder called `waldo', and those not containing Waldo, in a - folder called `not\_waldo'. Since Where's Waldo? puzzles are usually densely - populated and contain fine details, the 64$\times$64 pixel set of images - were selected to train and evaluate the machine learning models. These - images provide the added benefit of containing the most individual images of - the three size groups. \\ + ``Where's Waldo?'' puzzles. Within each set of images, those containing + Waldo are located in a folder called \texttt{waldo}, and those not containing + Waldo, in a folder called \texttt{not\_waldo}. Since ``Where's Waldo?'' + puzzles are usually densely populated and contain fine details, the + 64$\times$64 pixel set of images were selected to train and evaluate the + machine learning models. These images provide the added benefit of + containing the most individual images of the three size groups. \\ Each of the 64$\times$64 pixel images were inserted into a Numpy~\cite{numpy} array of images, and a binary value was inserted into a - seperate list at the same index. These binary values form the labels for - each image (waldo or not waldo). Colour normalisation was performed on each - so that artefacts in an image's colour profile correspond to meaningful - features of the image (rather than photographic method).\\ - + separate list at the same index. These binary values form the labels for + each image (``Waldo'' or ``not Waldo''). Color normalization was performed + on each so that artifacts in an image's color profile correspond to + meaningful features of the image (rather than photographic method).\\ Each original puzzle is broken down into many images, and only contains one Waldo. Although Waldo might span multiple 64$\times$64 pixel squares, this @@ -222,27 +221,27 @@ methods by exposing each technique to variations of the image during the training phase. \\ - Despite the additional data, there were still over ten times as many - non-Waldo images than Waldo images. Therefore, it was necessary to cull the - no-Waldo data, so that there was an even split of Waldo and non-Waldo - images, improving the representation of true positives in the image data - set. Following preprocessing, the images (and associated labels) were - divided into a training and a test set with a 3:1 split. \\ + Despite the additional data, there were still ten times more ``non-Waldo'' + images than Waldo images. Therefore, it was necessary to cull the + ``non-Waldo'' data, so that there was an even split of ``Waldo'' and + ``non-Waldo'' images, improving the representation of true positives in the + image data set. Following preprocessing, the images (and associated labels) + were divided into a training and a test set with a 3:1 split. \\ \subsection{Neural Network Training}\label{nnTraining} The neural networks used to classify the images were supervised learning models; requiring training on a dataset of typical images. Each network was - trained using the preprocessed training dataset and labels, for 25 epochs + trained using the preprocessed training dataset and labels for 25 epochs (one forward and backward pass of all data) in batches of 150. The number of - epochs was chosen to maximise training time and prevent overfitting of the + epochs was chosen to maximize training time and prevent overfitting of the training data, given current model parameters. The batch size is the number of images sent through each pass of the network. Using the entire dataset would train the network quickly, but decrease the network's ability to learn unique features from the data. Passing one image at a time may allow the model to learn more about each image, however it would also increase the training time and risk of overfitting the data. Therefore the batch size was - chosen to maintain training accuracy while minimising training time. + chosen to maintain training accuracy while minimizing training time. \subsection{Neural Network Testing}\label{nnTesting} From ea18a4ca3eecb03ab47372e28d946a4163e80c7d Mon Sep 17 00:00:00 2001 From: "Jip J. Dekker" Date: Fri, 25 May 2018 15:11:00 +1000 Subject: [PATCH 10/11] Missed one --- mini_proj/report/waldo.tex | 18 +++++++++--------- 1 file changed, 9 insertions(+), 9 deletions(-) diff --git a/mini_proj/report/waldo.tex b/mini_proj/report/waldo.tex index ee035da..a7a730b 100644 --- a/mini_proj/report/waldo.tex +++ b/mini_proj/report/waldo.tex @@ -211,15 +211,15 @@ Each original puzzle is broken down into many images, and only contains one Waldo. Although Waldo might span multiple 64$\times$64 pixel squares, this - means that the non-Waldo data far outnumbers the Waldo data. To combat the - bias introduced by the skewed data, all Waldo images were artificially - augmented by performing random rotations, reflections, and introducing - random noise in the image to produce news images. In this way, each original - Waldo image was used to produce an additional 10 variations of the image, - inserted into the image array. This provided more variation in the true - positives of the data set and assists in the development of more robust - methods by exposing each technique to variations of the image during the - training phase. \\ + means that the ``non-Waldo'' data far outnumbers the ``Waldo'' data. To + combat the bias introduced by the skewed data, all Waldo images were + artificially augmented by performing random rotations, reflections, and + introducing random noise in the image to produce news images. In this way, + each original Waldo image was used to produce an additional 10 variations of + the image, inserted into the image array. This provided more variation in + the true positives of the data set and assists in the development of more + robust methods by exposing each technique to variations of the image during + the training phase. \\ Despite the additional data, there were still ten times more ``non-Waldo'' images than Waldo images. Therefore, it was necessary to cull the From 1208bb3246c9a545b405860ee21b67e5a9c47c49 Mon Sep 17 00:00:00 2001 From: "Jip J. Dekker" Date: Fri, 25 May 2018 16:15:44 +1000 Subject: [PATCH 11/11] Correctly putting the borders in place --- mini_proj/test_nn.py | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/mini_proj/test_nn.py b/mini_proj/test_nn.py index 0d8ec2e..987b6b5 100644 --- a/mini_proj/test_nn.py +++ b/mini_proj/test_nn.py @@ -56,15 +56,17 @@ if __name__ == '__main__': image = cv2.imread("10.jpg") # Split image cuts = image_cut(image, 64, 64) - for i in len(cuts): + for i in range(len(cuts)): # Transform block hsv = color.rgb2hsv(cuts[i]) hsv[:, :, 2] = exposure.equalize_hist(hsv[:, :, 2]) block = color.hsv2rgb(hsv) block = np.rollaxis(block, -1) if is_Wally("Waldo.h5", block): + # if True: # Border block - cuts[i] = cv2.copyMakeBorder(cuts[i],5,5,5,5,cv2.BORDER_CONSTANT,value=RED) + GREEN = [0, 255, 0] + cuts[i] = cv2.copyMakeBorder(cuts[i][1:61,1:61],2,2,2,2,cv2.BORDER_CONSTANT,value=GREEN) # Stitch image TODO! # Show image