diff --git a/wk9/pearson.png b/wk9/pearson.png
new file mode 100644
index 0000000..3fc0e5e
Binary files /dev/null and b/wk9/pearson.png differ
diff --git a/wk9/spearman.png b/wk9/spearman.png
new file mode 100644
index 0000000..73e3ed1
Binary files /dev/null and b/wk9/spearman.png differ
diff --git a/wk9/week9.tex b/wk9/week9.tex
index edec3d2..361097b 100644
--- a/wk9/week9.tex
+++ b/wk9/week9.tex
@@ -12,6 +12,7 @@
\usepackage[utf8]{inputenc} %support umlauts in the input
% Easier compilation
\usepackage{bookmark}
+\usepackage{graphicx}
\begin{document}
\title{Week 9 - Correlation and Regression}
@@ -35,6 +36,8 @@
samples and samples of ranking ranges within the top 200. To this end, we made
use of Microsoft Excel tools and functions of the Python library SciPy.
+ TODO: Describe Python method as a sanity check
+
\section{Results} \label{sec:results}
We performed separate statistical analyses on 10 different samples of the
population, as well as the population itself. This included 11 separate
@@ -75,6 +78,15 @@
places}
\end{table}
+ \begin{figure}[ht]
+ \centering
+ \label{fig:scipy}
+ \includegraphics[width=0.6\textwidth]{pearson.png}
+ \includegraphics[width=0.6\textwidth]{spearman.png}
+ \caption{The Pearsion (top) and Spearman (bottom) correlations coefficients
+ of the data set as computed by the Pandas Python library}
+ \end{figure}
+
\section{Discussion} \label{sec:discussion}
The results generally indicate that there is a fairly strong positive
correlation between the weight and weight of an individual tennis player,
diff --git a/wk9/wk9.ipynb b/wk9/wk9.ipynb
new file mode 100644
index 0000000..35b7f21
--- /dev/null
+++ b/wk9/wk9.ipynb
@@ -0,0 +1,252 @@
+{
+ "cells": [
+ {
+ "cell_type": "code",
+ "execution_count": 1,
+ "metadata": {},
+ "outputs": [
+ {
+ "name": "stdout",
+ "output_type": "stream",
+ "text": [
+ "Using matplotlib backend: MacOSX\n",
+ "Populating the interactive namespace from numpy and matplotlib\n"
+ ]
+ }
+ ],
+ "source": [
+ "%pylab\n",
+ "%matplotlib inline\n",
+ "import pandas as pd\n",
+ "import numpy as np\n",
+ "import matplotlib.pyplot as plt\n",
+ "from scipy import stats\n",
+ "from matplotlib import colors\n",
+ "\n",
+ "data = pd.read_csv(\"Tennis players 2017-09.csv\")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 2,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " \n",
+ "
\n",
+ " \n",
+ " | \n",
+ " DOB | \n",
+ " RANK | \n",
+ " HEIGHT | \n",
+ " Weight | \n",
+ "
\n",
+ " \n",
+ " DOB | \n",
+ " 1 | \n",
+ " 0.277766 | \n",
+ " 0.139684 | \n",
+ " -0.030479 | \n",
+ "
\n",
+ " RANK | \n",
+ " 0.277766 | \n",
+ " 1 | \n",
+ " -0.16755 | \n",
+ " -0.121946 | \n",
+ "
\n",
+ " HEIGHT | \n",
+ " 0.139684 | \n",
+ " -0.16755 | \n",
+ " 1 | \n",
+ " 0.779526 | \n",
+ "
\n",
+ " Weight | \n",
+ " -0.030479 | \n",
+ " -0.121946 | \n",
+ " 0.779526 | \n",
+ " 1 | \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 2,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "def background_gradient(s, m, M, cmap='Wistia', low=0, high=0):\n",
+ " rng = M - m\n",
+ " norm = colors.Normalize(m - (rng * low),\n",
+ " M + (rng * high))\n",
+ " normed = norm(s.values)\n",
+ " c = [colors.rgb2hex(x) for x in plt.cm.get_cmap(cmap)(normed)]\n",
+ " return ['background-color: %s' % color for color in c]\n",
+ "\n",
+ "data = data[[\"SEX\", \"DOB\", \"RANK\", \"HANDED\", \"Country\", \"HEIGHT\", \"Weight\"]]\n",
+ "data.drop_duplicates\n",
+ "\n",
+ "pearson = data.corr()\n",
+ "pearson.style.apply(background_gradient,\n",
+ " cmap='Wistia',\n",
+ " m=pearson.min().min(),\n",
+ " M=pearson.max().max()\n",
+ ")"
+ ]
+ },
+ {
+ "cell_type": "code",
+ "execution_count": 3,
+ "metadata": {},
+ "outputs": [
+ {
+ "data": {
+ "text/html": [
+ " \n",
+ " \n",
+ " \n",
+ " | \n",
+ " DOB | \n",
+ " RANK | \n",
+ " HEIGHT | \n",
+ " Weight | \n",
+ "
\n",
+ " \n",
+ " DOB | \n",
+ " 1 | \n",
+ " 0.280386 | \n",
+ " 0.122412 | \n",
+ " 0.00769861 | \n",
+ "
\n",
+ " RANK | \n",
+ " 0.280386 | \n",
+ " 1 | \n",
+ " -0.160006 | \n",
+ " -0.0908714 | \n",
+ "
\n",
+ " HEIGHT | \n",
+ " 0.122412 | \n",
+ " -0.160006 | \n",
+ " 1 | \n",
+ " 0.739246 | \n",
+ "
\n",
+ " Weight | \n",
+ " 0.00769861 | \n",
+ " -0.0908714 | \n",
+ " 0.739246 | \n",
+ " 1 | \n",
+ "
\n",
+ "
"
+ ],
+ "text/plain": [
+ ""
+ ]
+ },
+ "execution_count": 3,
+ "metadata": {},
+ "output_type": "execute_result"
+ }
+ ],
+ "source": [
+ "spearman = data.corr(method=\"spearman\")\n",
+ "spearman.style.apply(background_gradient,\n",
+ " cmap='Wistia',\n",
+ " m=spearman.min().min(),\n",
+ " M=spearman.max().max()\n",
+ ")"
+ ]
+ }
+ ],
+ "metadata": {
+ "kernelspec": {
+ "display_name": "Python 3",
+ "language": "python",
+ "name": "python3"
+ },
+ "language_info": {
+ "codemirror_mode": {
+ "name": "ipython",
+ "version": 3
+ },
+ "file_extension": ".py",
+ "mimetype": "text/x-python",
+ "name": "python",
+ "nbconvert_exporter": "python",
+ "pygments_lexer": "ipython3",
+ "version": "3.6.4"
+ }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}