diff --git a/wk9/pearson.png b/wk9/pearson.png new file mode 100644 index 0000000..3fc0e5e Binary files /dev/null and b/wk9/pearson.png differ diff --git a/wk9/spearman.png b/wk9/spearman.png new file mode 100644 index 0000000..73e3ed1 Binary files /dev/null and b/wk9/spearman.png differ diff --git a/wk9/week9.tex b/wk9/week9.tex index edec3d2..361097b 100644 --- a/wk9/week9.tex +++ b/wk9/week9.tex @@ -12,6 +12,7 @@ \usepackage[utf8]{inputenc} %support umlauts in the input % Easier compilation \usepackage{bookmark} +\usepackage{graphicx} \begin{document} \title{Week 9 - Correlation and Regression} @@ -35,6 +36,8 @@ samples and samples of ranking ranges within the top 200. To this end, we made use of Microsoft Excel tools and functions of the Python library SciPy. + TODO: Describe Python method as a sanity check + \section{Results} \label{sec:results} We performed separate statistical analyses on 10 different samples of the population, as well as the population itself. This included 11 separate @@ -75,6 +78,15 @@ places} \end{table} + \begin{figure}[ht] + \centering + \label{fig:scipy} + \includegraphics[width=0.6\textwidth]{pearson.png} + \includegraphics[width=0.6\textwidth]{spearman.png} + \caption{The Pearsion (top) and Spearman (bottom) correlations coefficients + of the data set as computed by the Pandas Python library} + \end{figure} + \section{Discussion} \label{sec:discussion} The results generally indicate that there is a fairly strong positive correlation between the weight and weight of an individual tennis player, diff --git a/wk9/wk9.ipynb b/wk9/wk9.ipynb new file mode 100644 index 0000000..35b7f21 --- /dev/null +++ b/wk9/wk9.ipynb @@ -0,0 +1,252 @@ +{ + "cells": [ + { + "cell_type": "code", + "execution_count": 1, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Using matplotlib backend: MacOSX\n", + "Populating the interactive namespace from numpy and matplotlib\n" + ] + } + ], + "source": [ + "%pylab\n", + "%matplotlib inline\n", + "import pandas as pd\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from scipy import stats\n", + "from matplotlib import colors\n", + "\n", + "data = pd.read_csv(\"Tennis players 2017-09.csv\")" + ] + }, + { + "cell_type": "code", + "execution_count": 2, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DOBRANKHEIGHTWeight
DOB10.2777660.139684-0.030479
RANK0.2777661-0.16755-0.121946
HEIGHT0.139684-0.1675510.779526
Weight-0.030479-0.1219460.7795261
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 2, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "def background_gradient(s, m, M, cmap='Wistia', low=0, high=0):\n", + " rng = M - m\n", + " norm = colors.Normalize(m - (rng * low),\n", + " M + (rng * high))\n", + " normed = norm(s.values)\n", + " c = [colors.rgb2hex(x) for x in plt.cm.get_cmap(cmap)(normed)]\n", + " return ['background-color: %s' % color for color in c]\n", + "\n", + "data = data[[\"SEX\", \"DOB\", \"RANK\", \"HANDED\", \"Country\", \"HEIGHT\", \"Weight\"]]\n", + "data.drop_duplicates\n", + "\n", + "pearson = data.corr()\n", + "pearson.style.apply(background_gradient,\n", + " cmap='Wistia',\n", + " m=pearson.min().min(),\n", + " M=pearson.max().max()\n", + ")" + ] + }, + { + "cell_type": "code", + "execution_count": 3, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
DOBRANKHEIGHTWeight
DOB10.2803860.1224120.00769861
RANK0.2803861-0.160006-0.0908714
HEIGHT0.122412-0.16000610.739246
Weight0.00769861-0.09087140.7392461
" + ], + "text/plain": [ + "" + ] + }, + "execution_count": 3, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "spearman = data.corr(method=\"spearman\")\n", + "spearman.style.apply(background_gradient,\n", + " cmap='Wistia',\n", + " m=spearman.min().min(),\n", + " M=spearman.max().max()\n", + ")" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.6.4" + } + }, + "nbformat": 4, + "nbformat_minor": 2 +}