1
0

Add python checks to the report

This commit is contained in:
Jip J. Dekker 2018-05-04 11:57:16 +10:00
parent 787989ca2d
commit f73766cfa2
No known key found for this signature in database
GPG Key ID: 9CB1B58997DD0D58
4 changed files with 264 additions and 0 deletions

BIN
wk9/pearson.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 90 KiB

BIN
wk9/spearman.png Normal file

Binary file not shown.

After

Width:  |  Height:  |  Size: 88 KiB

View File

@ -12,6 +12,7 @@
\usepackage[utf8]{inputenc} %support umlauts in the input \usepackage[utf8]{inputenc} %support umlauts in the input
% Easier compilation % Easier compilation
\usepackage{bookmark} \usepackage{bookmark}
\usepackage{graphicx}
\begin{document} \begin{document}
\title{Week 9 - Correlation and Regression} \title{Week 9 - Correlation and Regression}
@ -35,6 +36,8 @@
samples and samples of ranking ranges within the top 200. To this end, we made samples and samples of ranking ranges within the top 200. To this end, we made
use of Microsoft Excel tools and functions of the Python library SciPy. use of Microsoft Excel tools and functions of the Python library SciPy.
TODO: Describe Python method as a sanity check
\section{Results} \label{sec:results} \section{Results} \label{sec:results}
We performed separate statistical analyses on 10 different samples of the We performed separate statistical analyses on 10 different samples of the
population, as well as the population itself. This included 11 separate population, as well as the population itself. This included 11 separate
@ -75,6 +78,15 @@
places} places}
\end{table} \end{table}
\begin{figure}[ht]
\centering
\label{fig:scipy}
\includegraphics[width=0.6\textwidth]{pearson.png}
\includegraphics[width=0.6\textwidth]{spearman.png}
\caption{The Pearsion (top) and Spearman (bottom) correlations coefficients
of the data set as computed by the Pandas Python library}
\end{figure}
\section{Discussion} \label{sec:discussion} \section{Discussion} \label{sec:discussion}
The results generally indicate that there is a fairly strong positive The results generally indicate that there is a fairly strong positive
correlation between the weight and weight of an individual tennis player, correlation between the weight and weight of an individual tennis player,

252
wk9/wk9.ipynb Normal file
View File

@ -0,0 +1,252 @@
{
"cells": [
{
"cell_type": "code",
"execution_count": 1,
"metadata": {},
"outputs": [
{
"name": "stdout",
"output_type": "stream",
"text": [
"Using matplotlib backend: MacOSX\n",
"Populating the interactive namespace from numpy and matplotlib\n"
]
}
],
"source": [
"%pylab\n",
"%matplotlib inline\n",
"import pandas as pd\n",
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from scipy import stats\n",
"from matplotlib import colors\n",
"\n",
"data = pd.read_csv(\"Tennis players 2017-09.csv\")"
]
},
{
"cell_type": "code",
"execution_count": 2,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style type=\"text/css\" >\n",
" #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col0 {\n",
" background-color: #fc7f00;\n",
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col1 {\n",
" background-color: #ffd20c;\n",
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col2 {\n",
" background-color: #ffe619;\n",
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col3 {\n",
" background-color: #f1f44d;\n",
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col0 {\n",
" background-color: #ffd20c;\n",
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col1 {\n",
" background-color: #fc7f00;\n",
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col2 {\n",
" background-color: #e4ff7a;\n",
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col3 {\n",
" background-color: #e8fc6c;\n",
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col0 {\n",
" background-color: #ffe619;\n",
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col1 {\n",
" background-color: #e4ff7a;\n",
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col2 {\n",
" background-color: #fc7f00;\n",
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col3 {\n",
" background-color: #fe9800;\n",
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col0 {\n",
" background-color: #f1f44d;\n",
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col1 {\n",
" background-color: #e8fc6c;\n",
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col2 {\n",
" background-color: #fe9800;\n",
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col3 {\n",
" background-color: #fc7f00;\n",
" }</style> \n",
"<table id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2\" > \n",
"<thead> <tr> \n",
" <th class=\"blank level0\" ></th> \n",
" <th class=\"col_heading level0 col0\" >DOB</th> \n",
" <th class=\"col_heading level0 col1\" >RANK</th> \n",
" <th class=\"col_heading level0 col2\" >HEIGHT</th> \n",
" <th class=\"col_heading level0 col3\" >Weight</th> \n",
" </tr></thead> \n",
"<tbody> <tr> \n",
" <th id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2level0_row0\" class=\"row_heading level0 row0\" >DOB</th> \n",
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col0\" class=\"data row0 col0\" >1</td> \n",
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col1\" class=\"data row0 col1\" >0.277766</td> \n",
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col2\" class=\"data row0 col2\" >0.139684</td> \n",
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col3\" class=\"data row0 col3\" >-0.030479</td> \n",
" </tr> <tr> \n",
" <th id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2level0_row1\" class=\"row_heading level0 row1\" >RANK</th> \n",
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col0\" class=\"data row1 col0\" >0.277766</td> \n",
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col1\" class=\"data row1 col1\" >1</td> \n",
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col2\" class=\"data row1 col2\" >-0.16755</td> \n",
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col3\" class=\"data row1 col3\" >-0.121946</td> \n",
" </tr> <tr> \n",
" <th id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2level0_row2\" class=\"row_heading level0 row2\" >HEIGHT</th> \n",
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col0\" class=\"data row2 col0\" >0.139684</td> \n",
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col1\" class=\"data row2 col1\" >-0.16755</td> \n",
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col2\" class=\"data row2 col2\" >1</td> \n",
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col3\" class=\"data row2 col3\" >0.779526</td> \n",
" </tr> <tr> \n",
" <th id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2level0_row3\" class=\"row_heading level0 row3\" >Weight</th> \n",
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col0\" class=\"data row3 col0\" >-0.030479</td> \n",
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col1\" class=\"data row3 col1\" >-0.121946</td> \n",
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col2\" class=\"data row3 col2\" >0.779526</td> \n",
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col3\" class=\"data row3 col3\" >1</td> \n",
" </tr></tbody> \n",
"</table> "
],
"text/plain": [
"<pandas.io.formats.style.Styler at 0x1a197d7b38>"
]
},
"execution_count": 2,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"def background_gradient(s, m, M, cmap='Wistia', low=0, high=0):\n",
" rng = M - m\n",
" norm = colors.Normalize(m - (rng * low),\n",
" M + (rng * high))\n",
" normed = norm(s.values)\n",
" c = [colors.rgb2hex(x) for x in plt.cm.get_cmap(cmap)(normed)]\n",
" return ['background-color: %s' % color for color in c]\n",
"\n",
"data = data[[\"SEX\", \"DOB\", \"RANK\", \"HANDED\", \"Country\", \"HEIGHT\", \"Weight\"]]\n",
"data.drop_duplicates\n",
"\n",
"pearson = data.corr()\n",
"pearson.style.apply(background_gradient,\n",
" cmap='Wistia',\n",
" m=pearson.min().min(),\n",
" M=pearson.max().max()\n",
")"
]
},
{
"cell_type": "code",
"execution_count": 3,
"metadata": {},
"outputs": [
{
"data": {
"text/html": [
"<style type=\"text/css\" >\n",
" #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col0 {\n",
" background-color: #fc7f00;\n",
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col1 {\n",
" background-color: #ffd20c;\n",
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col2 {\n",
" background-color: #fee91d;\n",
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col3 {\n",
" background-color: #f4f242;\n",
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col0 {\n",
" background-color: #ffd20c;\n",
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col1 {\n",
" background-color: #fc7f00;\n",
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col2 {\n",
" background-color: #e4ff7a;\n",
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col3 {\n",
" background-color: #eafa63;\n",
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col0 {\n",
" background-color: #fee91d;\n",
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col1 {\n",
" background-color: #e4ff7a;\n",
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col2 {\n",
" background-color: #fc7f00;\n",
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col3 {\n",
" background-color: #ff9d00;\n",
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col0 {\n",
" background-color: #f4f242;\n",
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col1 {\n",
" background-color: #eafa63;\n",
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col2 {\n",
" background-color: #ff9d00;\n",
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col3 {\n",
" background-color: #fc7f00;\n",
" }</style> \n",
"<table id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2\" > \n",
"<thead> <tr> \n",
" <th class=\"blank level0\" ></th> \n",
" <th class=\"col_heading level0 col0\" >DOB</th> \n",
" <th class=\"col_heading level0 col1\" >RANK</th> \n",
" <th class=\"col_heading level0 col2\" >HEIGHT</th> \n",
" <th class=\"col_heading level0 col3\" >Weight</th> \n",
" </tr></thead> \n",
"<tbody> <tr> \n",
" <th id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2level0_row0\" class=\"row_heading level0 row0\" >DOB</th> \n",
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col0\" class=\"data row0 col0\" >1</td> \n",
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col1\" class=\"data row0 col1\" >0.280386</td> \n",
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col2\" class=\"data row0 col2\" >0.122412</td> \n",
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col3\" class=\"data row0 col3\" >0.00769861</td> \n",
" </tr> <tr> \n",
" <th id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2level0_row1\" class=\"row_heading level0 row1\" >RANK</th> \n",
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col0\" class=\"data row1 col0\" >0.280386</td> \n",
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col1\" class=\"data row1 col1\" >1</td> \n",
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col2\" class=\"data row1 col2\" >-0.160006</td> \n",
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col3\" class=\"data row1 col3\" >-0.0908714</td> \n",
" </tr> <tr> \n",
" <th id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2level0_row2\" class=\"row_heading level0 row2\" >HEIGHT</th> \n",
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col0\" class=\"data row2 col0\" >0.122412</td> \n",
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col1\" class=\"data row2 col1\" >-0.160006</td> \n",
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col2\" class=\"data row2 col2\" >1</td> \n",
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col3\" class=\"data row2 col3\" >0.739246</td> \n",
" </tr> <tr> \n",
" <th id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2level0_row3\" class=\"row_heading level0 row3\" >Weight</th> \n",
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col0\" class=\"data row3 col0\" >0.00769861</td> \n",
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col1\" class=\"data row3 col1\" >-0.0908714</td> \n",
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col2\" class=\"data row3 col2\" >0.739246</td> \n",
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col3\" class=\"data row3 col3\" >1</td> \n",
" </tr></tbody> \n",
"</table> "
],
"text/plain": [
"<pandas.io.formats.style.Styler at 0x111a3b198>"
]
},
"execution_count": 3,
"metadata": {},
"output_type": "execute_result"
}
],
"source": [
"spearman = data.corr(method=\"spearman\")\n",
"spearman.style.apply(background_gradient,\n",
" cmap='Wistia',\n",
" m=spearman.min().min(),\n",
" M=spearman.max().max()\n",
")"
]
}
],
"metadata": {
"kernelspec": {
"display_name": "Python 3",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.6.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}