diff --git a/mini_proj/benchmark_results.csv b/mini_proj/benchmark_results.csv new file mode 100644 index 0000000..280fff5 --- /dev/null +++ b/mini_proj/benchmark_results.csv @@ -0,0 +1,5 @@ +name,time,accuracy +svm,7.871559143066406,0.8446601941747572 +tree,0.25446152687072754,0.7087378640776699 +naive_bayes,0.12949371337890625,0.8252427184466019 +forest,0.2792677879333496,0.9514563106796117 diff --git a/mini_proj/traditionals.py b/mini_proj/traditionals.py index 77ede83..08668d2 100644 --- a/mini_proj/traditionals.py +++ b/mini_proj/traditionals.py @@ -3,19 +3,27 @@ import time as t from sklearn import svm, ensemble, naive_bayes, neighbors from _image_classifier import ImageClassifier +def accuracy(y_true, y_pred): + """returns the accuracy""" + y_pred = np.round(y_pred) + return (y_true == y_pred).mean() + def precision(y_true, y_pred): + """returns the precision""" y_pred = np.round(y_pred) num = np.sum(np.logical_and(y_true, y_pred)) den = np.sum(y_pred) return np.divide(num, den) def recall(y_true, y_pred): + """returns the recall""" y_pred = np.round(y_pred) num = np.sum(np.logical_and(y_true, y_pred)) den = np.sum(y_true) return np.divide(num, den) def f_measure(y_true, y_pred): + """returns the F1 measure""" p = precision(y_true, y_pred) r = recall(y_true, y_pred) return 2 * p * r / (p + r) @@ -39,18 +47,40 @@ my_metric_test = lambda iclf, f: metric_test(iclf, f, im_test, lbl_test) svm_iclf = ImageClassifier(svm.SVC) tree_iclf = ImageClassifier(neighbors.KNeighborsClassifier) naive_bayes_iclf = ImageClassifier(naive_bayes.GaussianNB) -ensemble_iclf = ImageClassifier(ensemble.RandomForestClassifier) +forest_iclf = ImageClassifier(ensemble.RandomForestClassifier) classifiers = [ svm_iclf, tree_iclf, naive_bayes_iclf, - ensemble_iclf, + forest_iclf, ] -for clf in classifiers: +classifier_names = [ + "svm", + "tree", + "naive_bayes", + "forest", +] + +# print("name,time,accuracy,precision,recall,f_measure") +print("name,time,accuracy") +for clf, name in zip(classifiers, classifier_names): start = t.time() # Records time before training clf.fit(im_train, lbl_train) end = t.time() # Records time after tranining - print("training time:", end-start) - print(clf.score(im_test, lbl_test)) + y_pred = clf.predict(im_test) + print( + name, + end-start, + clf.score(im_test, lbl_test), + # precision(lbl_test, y_pred), + # recall(lbl_test, y_pred), + # f_measure(lbl_test, y_pred), + sep="," + ) + # print("training time:\t", end-start) + # print("Accuracy:\t", clf.score(im_test, lbl_test)) + # print("Precision:\t", precision(lbl_test, y_pred)) + # print("Recall:\t\t", recall(lbl_test, y_pred)) + # print("F1-measure:\t", f_measure(lbl_test, y_pred)) diff --git a/mini_proj/vis.py b/mini_proj/vis.py new file mode 100644 index 0000000..df23c72 --- /dev/null +++ b/mini_proj/vis.py @@ -0,0 +1,20 @@ +import pandas as pd +import matplotlib.pyplot as plt + +df = pd.read_csv("benchmark_results.csv") +names = df.name +df = df.drop("name", axis=1) + +plt.figure() +plt.bar(names, df.time) +plt.title("Training times") +plt.xlabel("Classifier") +plt.ylabel("Time (s)") + +plt.figure() +plt.bar(names, df.accuracy) +plt.title("Prediction Accuracy") +plt.xlabel("Classifier") +plt.ylabel("Accuracy") + +plt.show()