Merge branch 'master' of https://github.com/Dekker1/ResearchMethods

2018-05-04 16:06:10 +10:00 · 2018-05-04 16:06:10 +10:00 · 89eeff700e
commit 89eeff700e
parent 75496dbd13 03beb98766
9 changed files with 1277 additions and 341 deletions
--- a/wk7/week7.tex
+++ b/wk7/week7.tex
@ -36,6 +36,76 @@

 	\section{Results} \label{sec:results}

+	\subsection{The advantage of height}
+
+	\begin{table}[ht]
+		\centering
+		\label{tab:chi-height}
+		\begin{tabular}{|l|r|r|r|r|}
+			\hline
+												& \textbf{M: 168 - 188} & \textbf{M: 189 - 210} & \textbf{F: 155 - 171} & \textbf{F: 172 - 189} \\ \hline
+		\textbf{1 - 99}    & 67 / 73               & 32 / 26               & 38 / 42               & 60 / 55               \\
+		\textbf{100 - 199} & 69 / 72               & 30 / 26               & 31 / 27               & 32 / 36               \\
+		\textbf{200 - 299} & 75 / 68               & 17 / 25               & 18 / 17               & 22 / 23               \\
+		\textbf{300 - 399} & 61 / 60               & 21 / 23               & 11 /12                & 17 / 16               \\
+		\textbf{400 - 499} & 59 / 60               & 22 / 22               & 7 / 6
+		& 7 / 8 \\
+		\hline
+		\end{tabular}
+		\caption{Observed / Expected values used for the $\chi^2$-test. The groups are divided by their rank (vertical) and, per gender, their height (horizontal).}
+	\end{table}
+
+	$$
+		\chi^2 \approx 7.697606186049128
+	$$
+
+	$$
+		df = (5-1)(4-1) = 12
+	$$
+
+	$$
+		\chi^2(7.69\dots,12) \approx 0.8082925814979871
+	$$
+
+
+	\textbf {t-test men:} T score: 1.711723, P score: 0.043815
+
+	\textbf {t-test women:} T score: 1.860241, P score: 0.032030
+
+
+	\subsection{The advantage of left-handedness}
+
+	\begin{table}[ht]
+		\centering
+		\label{tab:chi-hand}
+		\begin{tabular}{|l|l|l|l|l|l|}
+		\hline
+								& \textbf{1 - 99} & \textbf{100 - 199} & \textbf{200 - 299} & \textbf{300 - 399} & \textbf{400 - 499} \\
+		\hline
+		\textbf{L} & 22 / 21         & 23 / 18            & 17 / 15            & 6  / 12            & 8 / 10             \\
+		\textbf{R} & 174 / 177       & 139 / 144          & 117 / 119          &
+		105 / 98           & 88 / 86 \\
+		\hline
+		\end{tabular}
+		\caption{Observed / Expected values used for the $\chi^2$-test. The groups are divided by which hand they use (vertical) and their rank (horizontal).}
+	\end{table}
+
+
+	$$
+		\chi^2 \approx 6.467312944404331
+	$$
+
+	$$
+		df = (2-1)(5-1) = 4
+	$$
+
+	$$
+		\chi^2(6.46\dots,4) \approx 0.1668616190847413
+	$$
+
+	\textbf {t-test:} T score: 0.451694, P score: 0.325815
+
+
 	\section{Discussion} \label{sec:discussion}

 \end{document}
--- a/wk7/wk7.ipynb
+++ b/wk7/wk7.ipynb
--- a/wk8/A1_data.xlsx
+++ b/wk8/A1_data.xlsx
--- a/wk8/week8.tex
+++ b/wk8/week8.tex
@ -12,6 +12,8 @@
 \usepackage[utf8]{inputenc} %support umlauts in the input
 % Easier compilation
 \usepackage{bookmark}
+\usepackage{natbib}
+\usepackage{graphicx}

 \begin{document}
 	\title{Week 8 - Quantitative data analysis}
@ -25,8 +27,161 @@

 	\section{Method} \label{sec:method}

+	The purpose of this report is to re-analyse the data presented in the paper by
+	\cite{dong2018methods}, which investigates the effect that protests (as an
+	example of disruptive social behaviours in general) have on consumer
+	behaviours. \cite{dong2018methods} hypothesise that protests decrease
+	consumer behaviour in the surrounding area of the event, and suggest that
+	consumer spending could be used as an additional non-traditional economic
+	indicator and as a gauge of consumer sentiment. Consumer spending was analysed
+	using credit card transaction data from a metropolitan area within a country
+	that is part of The Organisation for Economic Co-operation and Development
+	(OECD). Although \cite{dong2018methods} investigate temporal and spatial
+	effects on consumer spending, for the purposes of this analysis, only the
+	spatial effect of variables (with relation to the geographical distance from
+	the event) is considered. The dataset consists of variables measured as a
+	function of the distance from the event (in km), including: the number of
+	customers, the median spending amount, the number of transactions, and the
+	total sales amount.
+
+	The re-analysis is conducted on the data provided in the
+	paper\cite{dong2018methods}, using Python in conjunction with packages such as
+	pandas, matplotlib, numpy and seaborn, to process and visualise the data. As
+	aformentioned, only spatial data and the variables mentioned above are
+	considered, for the reference days and the change occuring Day 62 (day of
+	first socially disruptive event). The distribution of the difference between
+	the reference period and Day 62 is visualised by plotting a histogram for each
+	variable. Since the decrease of each the variables from the reference period
+	to Day 62 is provided, the mean and the median of these distributions can be
+	used to perform a one-sample (as we have are given the difference) hypothesis
+	test to assess whether the protests on Day 62 had a discernable effect.
+
+	Assuming the mean of each variable over the reference period is the midpoint
+	between their respective maximum and minimum values, we can reconstruct
+	approximate actual values for Day 62 (given the decrease in value on Day 62
+	from the reference period). By comparing these value to the range over the
+	reference period, another assessment can be made to determine whether the data
+	presents a discernible effect on consumer spending as a result of social
+	discuption, scaling with distance.
+
+	Although time series data was not explicitely provided, by extrapolating
+	information from a graph in \cite{dong2018methods} we can quantify the decrease
+	in number of customers and median spending on Day 62 using information about the
+	reference days (from 43 to 61). After collecting the values for each of the
+	reference days (43-61), the mean and standard deviation of this sample can be
+	calculated. Assuming a normal distribution of the data, we can calculate a
+	z-score for each observation on Day 62, and use this to assess the original
+	hypothesis.
+
+	By performing each of the above test, a re-analysis will be conducted on
+	\cite{dong2018methods}'s paper hypothesising that consumer spending decreases
+	as a result of social events such as protests. In the Results section, we will
+	perform the statistical analyses described above. The results of these tests
+	will then be explored in the Discussion section, along with assumptions and
+	limitations of the tests and what can be conclused from them.
+
 	\section{Results} \label{sec:results}

+	For each of the variables in the given data (number of customers, median
+	spending amount, number of transactions, and sales totals) we construct a
+	histogram of the decrease of each (on Day 62). We then compute the mean and
+	median of the data so we can proceed to perform a one-sample hypothesis test.
+
+	\begin{figure}[ht]
+		\centering
+		\label{fig:distr}
+		\includegraphics[width=\textwidth]{distr.png}
+		\caption{Distribution of each of the variables recorded in the data, as a function of the distance from an event}
+	\end{figure}
+
+	Using a mean/median of the reference period, obtained by taking the midpoint of the minimum and maximum values over for each distance measure, a value can be reconstructed for the measurement on Day 62 (for each location) using:
+
+	\begin{equation}
+		\textrm{value} = \frac{\textrm{min} + \text{max}}{2} - \textrm{decrease.}
+		\tag{1}
+	\end{equation}
+\\
+	We can then plot the maximum and minimum values for the reference period, as well as the reconstructed Day 62 variables to observe the behaviour of consumer spending after the event.
+
+	\begin{figure}[ht]
+		\centering
+		\label{fig:effect}
+		\includegraphics[width=\textwidth]{effect.png}
+		\caption{The reconstructed values for Day 62 of each variable plotted against their respective minimums and maximums over the reference period}
+	\end{figure}
+
+	Using the data recorded, for each of the three distance recorded, the mean and standard deviation of the reference period can be calculated. The z-score for each observed value on Day 62 can be computed using:
+
+	\begin{equation}
+	\textrm{Z} = \frac{\textrm{X} - \mu}{\sigma},
+	\tag{2}
+	\end{equation}
+\\
+	where X is the observed value, $\mu$ and $\sigma$ are the mean and standard deviation (respectively) of the reference period.
+
+	\begin{table}[ht]
+		\centering
+		\label{my-label}
+		\begin{tabular}{|l|l|r|r|}
+		\hline
+		\textbf{Variable}        & \textbf{Distance} & \textbf{X} & \textbf{Z} \\
+		\hline
+		\textbf{Customers}       & \textless 2km     & -0.600     &  6.87798   \\
+		\textbf{Customers}       & 2km - 4km         & -0.200     & -3.33253   \\
+		\textbf{Customers}       & \textgreater 4km  & -0.100     & -3.70740   \\
+		\textbf{Median Spending} & \textless 2km     & -0.200     & -3.05849   \\
+		\textbf{Median Spending} & 2km - 4km         & -0.100     & -1.46508   \\
+		\textbf{Median Spending} & \textgreater 4km  & -0.035     & -1.99199   \\
+		\hline
+		\end{tabular}
+		\caption{The $Z$ score computed using equation 2 and the temporal data}
+	\end{table}
+
 	\section{Discussion} \label{sec:discussion}

+	As shown in each of the subplots of Figure 1, the mean and median values of
+	the decrease in each of the distributions are greater than zero (note: higher
+	values of the decrease variable indicate a larger decrease/negative change).
+	These mean and median values can be used to perform a one-sample hypothesis
+	tests, which finds that since each of the mean/median values is greater than
+	zero, we can infer that the event had a net decreasing affect on the number of
+	customers, median spending amount, number of transactions, and total sales
+	amount.
+
+	In Figure \ref{fig:effect} values were approximated for each variable on Day
+	62, using Equation 1, and plotted against the minimum and maximum values of
+	the respective variables. This allows us to visually assess whether the
+	reconstructed value for Day 62 lies outside the range of recorded values for
+	the reference period, and presents uncharacteristic behaviour. A decrease is
+	evident in each of the variables after the event has occurred (on Day 62)
+	within a distance of approximately 2 km, and appears to stabilise thereafter.
+	This provides support to \cite{dong2018methods}'s hypothesis that consumer
+	spending is affected by socially disruptive events, and also provides evidence
+	to the notion of spatial scaling of this effect (based on the event location).
+	It is important to note that the approximation used in this technique is
+	subject to a level of error due to the ideal calculation of the mean/median of
+	the reference data as the midpoint between the minimum and maximum values
+	provided.
+
+	Extrapolating data from a graph in \cite{dong2018methods} provided time series
+	data (divided into three radius') to analyse. This data was collected by
+	visually estimating the values from the graph which will inherently introduce
+	a source of error. However, by computing the z-score as described in Equation
+	2, the table provided in Figure 3 was constructed. Each of the z-score values
+	in the table are negative, indicating a decrease in both the number of
+	customers and median spending on Day 62. The much larger magnitude of z-scores
+	for the <2km distance ring for both variables is in agreement with earlier
+	discussion, strengthening the hypothesis of the spatial correlation of
+	consumer spending.
+
+	Each of the above tests have agreed on the spatial and temporal correlation of
+	consumer spending and socially disruptive events. With the limited data
+	available, we can therefore concur with the hypothesis of Dong et al. that
+	consumer spending decreases in the area around disruptive social behaviour,
+	after finding the temporal correlation on Day 62, as well as the spatially
+	decreasing effect further from the event.
+
+	\bibliographystyle{humannat}
+	\bibliography{references}
+
 \end{document}
--- a/wk8/wk8.ipynb
+++ b/wk8/wk8.ipynb
--- a/wk9/pearson.png
+++ b/wk9/pearson.png
--- a/wk9/spearman.png
+++ b/wk9/spearman.png
--- a/wk9/week9.tex
+++ b/wk9/week9.tex
@ -12,6 +12,7 @@
 \usepackage[utf8]{inputenc} %support umlauts in the input
 % Easier compilation
 \usepackage{bookmark}
+\usepackage{graphicx}

 \begin{document}
 	\title{Week 9 - Correlation and Regression}
@ -22,6 +23,12 @@
 	\maketitle

 	\section{Introduction} \label{sec:introduction}
+	We present a report on the relationship between the heights and weights of the
+	top tennis players as catalogued in provided data. We use statistical analysis 
+	techniques to numerically describe the characteristics of the data, to see how 
+	trends are exhibited within the data set. We conclude the report with a brief 
+	discussion of the implications of the analysis and provide insights on 
+	potential correlations that may exist.  

 	\section{Method} \label{sec:method}
 	Provided with a set of 132 unique records of the top 200 male tennis players,
@ -34,21 +41,33 @@
 	samples and samples of ranking ranges within the top 200. To this end, we made
 	use of Microsoft Excel tools and functions of the Python library SciPy.

+	We specifically have made use of these separate statistical analysis tools in the 
+	interest of sanity checking our findings. To do this, we simply replicated the 
+	correlation tests within other software environments. 
+
 	\section{Results} \label{sec:results}
-	We performed seperate statistical analyses on 10 different samples of the
-	population, as well as the population itself. This included 5 separate subsets
-	of the rankings (top 20 and 50, middle 20, bottom 20 and 50) and 5 seperate
-	randomly chosen samples of 20 players.
-\\ \\
-	\Cref{tab:excel-results} shows the the results for the conducted tests.
+	We performed separate statistical analyses on 10 different samples of the
+	population, as well as the population itself. This included 11 separate
+	subsets of the rankings:
+	\begin{itemize}
+		\item The top 20 entries
+		\item The middle 20 entries
+		\item The bottom 20 entries
+		\item The top 50 entries
+		\item The bottom 50 entries
+		\item 5 randomly chosen sets of 20 entries
+	\end{itemize}
+\vspace{1em}
+	Table \ref{tab:excel_results} shows the the results for the conducted tests.

 	\begin{table}[ht]
 		\centering
+		\label{tab:excel_results}
 		\begin{tabular}{|l|r|r|}
 		\hline
 		\textbf{Test Set}       & \textbf{Pearson's Coefficient} & \textbf{Spearman's Coefficient} \\
 		\hline
-		\textbf{Population}     & 0.77953                        & 0.73925                         \\
+		\textbf{Full Population}     & 0.77953                        & 0.73925                         \\
 		\textbf{Top 20}         & 0.80743                        & 0.80345                         \\
 		\textbf{Middle 20}      & 0.54134                        & 0.36565                         \\
 		\textbf{Bottom 20}      & 0.84046                        & 0.88172                         \\
@ -61,11 +80,20 @@
 		\textbf{Random Set \#5} & 0.86203                        & 0.77832
 		\\ \hline
 		\end{tabular}
-		\caption{TODO: Insert better caption for this table. All data is rounded to 5 decimal
+		\caption{Table showing the correlation coefficients between height and
+		weight using different test sets. All data is rounded to 5 decimal
 		places}
-		\label{tab:excel-results}
 	\end{table}

+	\begin{figure}[ht]
+		\centering
+		\label{fig:scipy}
+		\includegraphics[width=0.6\textwidth]{pearson.png}
+		\includegraphics[width=0.6\textwidth]{spearman.png}
+		\caption{The Pearsion (top) and Spearman (bottom) correlations coefficients
+		of the data set as computed by the Pandas Python library}
+	\end{figure}
+
 	\section{Discussion} \label{sec:discussion}
 	The results generally indicate that there is a fairly strong positive
 	correlation between the weight and weight of an individual tennis player,
--- a/wk9/wk9.ipynb
+++ b/wk9/wk9.ipynb
@ -0,0 +1,252 @@
+{
+ "cells": [
+  {
+   "cell_type": "code",
+   "execution_count": 1,
+   "metadata": {},
+   "outputs": [
+    {
+     "name": "stdout",
+     "output_type": "stream",
+     "text": [
+      "Using matplotlib backend: MacOSX\n",
+      "Populating the interactive namespace from numpy and matplotlib\n"
+     ]
+    }
+   ],
+   "source": [
+    "%pylab\n",
+    "%matplotlib inline\n",
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import matplotlib.pyplot as plt\n",
+    "from scipy import stats\n",
+    "from matplotlib import colors\n",
+    "\n",
+    "data = pd.read_csv(\"Tennis players 2017-09.csv\")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 2,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style  type=\"text/css\" >\n",
+       "    #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col0 {\n",
+       "            background-color:  #fc7f00;\n",
+       "        }    #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col1 {\n",
+       "            background-color:  #ffd20c;\n",
+       "        }    #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col2 {\n",
+       "            background-color:  #ffe619;\n",
+       "        }    #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col3 {\n",
+       "            background-color:  #f1f44d;\n",
+       "        }    #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col0 {\n",
+       "            background-color:  #ffd20c;\n",
+       "        }    #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col1 {\n",
+       "            background-color:  #fc7f00;\n",
+       "        }    #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col2 {\n",
+       "            background-color:  #e4ff7a;\n",
+       "        }    #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col3 {\n",
+       "            background-color:  #e8fc6c;\n",
+       "        }    #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col0 {\n",
+       "            background-color:  #ffe619;\n",
+       "        }    #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col1 {\n",
+       "            background-color:  #e4ff7a;\n",
+       "        }    #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col2 {\n",
+       "            background-color:  #fc7f00;\n",
+       "        }    #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col3 {\n",
+       "            background-color:  #fe9800;\n",
+       "        }    #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col0 {\n",
+       "            background-color:  #f1f44d;\n",
+       "        }    #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col1 {\n",
+       "            background-color:  #e8fc6c;\n",
+       "        }    #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col2 {\n",
+       "            background-color:  #fe9800;\n",
+       "        }    #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col3 {\n",
+       "            background-color:  #fc7f00;\n",
+       "        }</style>  \n",
+       "<table id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2\" > \n",
+       "<thead>    <tr> \n",
+       "        <th class=\"blank level0\" ></th> \n",
+       "        <th class=\"col_heading level0 col0\" >DOB</th> \n",
+       "        <th class=\"col_heading level0 col1\" >RANK</th> \n",
+       "        <th class=\"col_heading level0 col2\" >HEIGHT</th> \n",
+       "        <th class=\"col_heading level0 col3\" >Weight</th> \n",
+       "    </tr></thead> \n",
+       "<tbody>    <tr> \n",
+       "        <th id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2level0_row0\" class=\"row_heading level0 row0\" >DOB</th> \n",
+       "        <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col0\" class=\"data row0 col0\" >1</td> \n",
+       "        <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col1\" class=\"data row0 col1\" >0.277766</td> \n",
+       "        <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col2\" class=\"data row0 col2\" >0.139684</td> \n",
+       "        <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col3\" class=\"data row0 col3\" >-0.030479</td> \n",
+       "    </tr>    <tr> \n",
+       "        <th id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2level0_row1\" class=\"row_heading level0 row1\" >RANK</th> \n",
+       "        <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col0\" class=\"data row1 col0\" >0.277766</td> \n",
+       "        <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col1\" class=\"data row1 col1\" >1</td> \n",
+       "        <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col2\" class=\"data row1 col2\" >-0.16755</td> \n",
+       "        <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col3\" class=\"data row1 col3\" >-0.121946</td> \n",
+       "    </tr>    <tr> \n",
+       "        <th id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2level0_row2\" class=\"row_heading level0 row2\" >HEIGHT</th> \n",
+       "        <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col0\" class=\"data row2 col0\" >0.139684</td> \n",
+       "        <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col1\" class=\"data row2 col1\" >-0.16755</td> \n",
+       "        <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col2\" class=\"data row2 col2\" >1</td> \n",
+       "        <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col3\" class=\"data row2 col3\" >0.779526</td> \n",
+       "    </tr>    <tr> \n",
+       "        <th id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2level0_row3\" class=\"row_heading level0 row3\" >Weight</th> \n",
+       "        <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col0\" class=\"data row3 col0\" >-0.030479</td> \n",
+       "        <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col1\" class=\"data row3 col1\" >-0.121946</td> \n",
+       "        <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col2\" class=\"data row3 col2\" >0.779526</td> \n",
+       "        <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col3\" class=\"data row3 col3\" >1</td> \n",
+       "    </tr></tbody> \n",
+       "</table> "
+      ],
+      "text/plain": [
+       "<pandas.io.formats.style.Styler at 0x1a197d7b38>"
+      ]
+     },
+     "execution_count": 2,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "def background_gradient(s, m, M, cmap='Wistia', low=0, high=0):\n",
+    "    rng = M - m\n",
+    "    norm = colors.Normalize(m - (rng * low),\n",
+    "                            M + (rng * high))\n",
+    "    normed = norm(s.values)\n",
+    "    c = [colors.rgb2hex(x) for x in plt.cm.get_cmap(cmap)(normed)]\n",
+    "    return ['background-color: %s' % color for color in c]\n",
+    "\n",
+    "data = data[[\"SEX\", \"DOB\", \"RANK\", \"HANDED\", \"Country\", \"HEIGHT\", \"Weight\"]]\n",
+    "data.drop_duplicates\n",
+    "\n",
+    "pearson = data.corr()\n",
+    "pearson.style.apply(background_gradient,\n",
+    "               cmap='Wistia',\n",
+    "               m=pearson.min().min(),\n",
+    "               M=pearson.max().max()\n",
+    ")"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": 3,
+   "metadata": {},
+   "outputs": [
+    {
+     "data": {
+      "text/html": [
+       "<style  type=\"text/css\" >\n",
+       "    #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col0 {\n",
+       "            background-color:  #fc7f00;\n",
+       "        }    #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col1 {\n",
+       "            background-color:  #ffd20c;\n",
+       "        }    #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col2 {\n",
+       "            background-color:  #fee91d;\n",
+       "        }    #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col3 {\n",
+       "            background-color:  #f4f242;\n",
+       "        }    #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col0 {\n",
+       "            background-color:  #ffd20c;\n",
+       "        }    #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col1 {\n",
+       "            background-color:  #fc7f00;\n",
+       "        }    #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col2 {\n",
+       "            background-color:  #e4ff7a;\n",
+       "        }    #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col3 {\n",
+       "            background-color:  #eafa63;\n",
+       "        }    #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col0 {\n",
+       "            background-color:  #fee91d;\n",
+       "        }    #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col1 {\n",
+       "            background-color:  #e4ff7a;\n",
+       "        }    #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col2 {\n",
+       "            background-color:  #fc7f00;\n",
+       "        }    #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col3 {\n",
+       "            background-color:  #ff9d00;\n",
+       "        }    #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col0 {\n",
+       "            background-color:  #f4f242;\n",
+       "        }    #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col1 {\n",
+       "            background-color:  #eafa63;\n",
+       "        }    #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col2 {\n",
+       "            background-color:  #ff9d00;\n",
+       "        }    #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col3 {\n",
+       "            background-color:  #fc7f00;\n",
+       "        }</style>  \n",
+       "<table id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2\" > \n",
+       "<thead>    <tr> \n",
+       "        <th class=\"blank level0\" ></th> \n",
+       "        <th class=\"col_heading level0 col0\" >DOB</th> \n",
+       "        <th class=\"col_heading level0 col1\" >RANK</th> \n",
+       "        <th class=\"col_heading level0 col2\" >HEIGHT</th> \n",
+       "        <th class=\"col_heading level0 col3\" >Weight</th> \n",
+       "    </tr></thead> \n",
+       "<tbody>    <tr> \n",
+       "        <th id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2level0_row0\" class=\"row_heading level0 row0\" >DOB</th> \n",
+       "        <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col0\" class=\"data row0 col0\" >1</td> \n",
+       "        <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col1\" class=\"data row0 col1\" >0.280386</td> \n",
+       "        <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col2\" class=\"data row0 col2\" >0.122412</td> \n",
+       "        <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col3\" class=\"data row0 col3\" >0.00769861</td> \n",
+       "    </tr>    <tr> \n",
+       "        <th id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2level0_row1\" class=\"row_heading level0 row1\" >RANK</th> \n",
+       "        <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col0\" class=\"data row1 col0\" >0.280386</td> \n",
+       "        <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col1\" class=\"data row1 col1\" >1</td> \n",
+       "        <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col2\" class=\"data row1 col2\" >-0.160006</td> \n",
+       "        <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col3\" class=\"data row1 col3\" >-0.0908714</td> \n",
+       "    </tr>    <tr> \n",
+       "        <th id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2level0_row2\" class=\"row_heading level0 row2\" >HEIGHT</th> \n",
+       "        <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col0\" class=\"data row2 col0\" >0.122412</td> \n",
+       "        <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col1\" class=\"data row2 col1\" >-0.160006</td> \n",
+       "        <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col2\" class=\"data row2 col2\" >1</td> \n",
+       "        <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col3\" class=\"data row2 col3\" >0.739246</td> \n",
+       "    </tr>    <tr> \n",
+       "        <th id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2level0_row3\" class=\"row_heading level0 row3\" >Weight</th> \n",
+       "        <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col0\" class=\"data row3 col0\" >0.00769861</td> \n",
+       "        <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col1\" class=\"data row3 col1\" >-0.0908714</td> \n",
+       "        <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col2\" class=\"data row3 col2\" >0.739246</td> \n",
+       "        <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col3\" class=\"data row3 col3\" >1</td> \n",
+       "    </tr></tbody> \n",
+       "</table> "
+      ],
+      "text/plain": [
+       "<pandas.io.formats.style.Styler at 0x111a3b198>"
+      ]
+     },
+     "execution_count": 3,
+     "metadata": {},
+     "output_type": "execute_result"
+    }
+   ],
+   "source": [
+    "spearman = data.corr(method=\"spearman\")\n",
+    "spearman.style.apply(background_gradient,\n",
+    "               cmap='Wistia',\n",
+    "               m=spearman.min().min(),\n",
+    "               M=spearman.max().max()\n",
+    ")"
+   ]
+  }
+ ],
+ "metadata": {
+  "kernelspec": {
+   "display_name": "Python 3",
+   "language": "python",
+   "name": "python3"
+  },
+  "language_info": {
+   "codemirror_mode": {
+    "name": "ipython",
+    "version": 3
+   },
+   "file_extension": ".py",
+   "mimetype": "text/x-python",
+   "name": "python",
+   "nbconvert_exporter": "python",
+   "pygments_lexer": "ipython3",
+   "version": "3.6.4"
+  }
+ },
+ "nbformat": 4,
+ "nbformat_minor": 2
+}