Merge branch 'master' of https://github.com/Dekker1/ResearchMethods
This commit is contained in:
commit
89eeff700e
@ -36,6 +36,76 @@
|
||||
|
||||
\section{Results} \label{sec:results}
|
||||
|
||||
\subsection{The advantage of height}
|
||||
|
||||
\begin{table}[ht]
|
||||
\centering
|
||||
\label{tab:chi-height}
|
||||
\begin{tabular}{|l|r|r|r|r|}
|
||||
\hline
|
||||
& \textbf{M: 168 - 188} & \textbf{M: 189 - 210} & \textbf{F: 155 - 171} & \textbf{F: 172 - 189} \\ \hline
|
||||
\textbf{1 - 99} & 67 / 73 & 32 / 26 & 38 / 42 & 60 / 55 \\
|
||||
\textbf{100 - 199} & 69 / 72 & 30 / 26 & 31 / 27 & 32 / 36 \\
|
||||
\textbf{200 - 299} & 75 / 68 & 17 / 25 & 18 / 17 & 22 / 23 \\
|
||||
\textbf{300 - 399} & 61 / 60 & 21 / 23 & 11 /12 & 17 / 16 \\
|
||||
\textbf{400 - 499} & 59 / 60 & 22 / 22 & 7 / 6
|
||||
& 7 / 8 \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
\caption{Observed / Expected values used for the $\chi^2$-test. The groups are divided by their rank (vertical) and, per gender, their height (horizontal).}
|
||||
\end{table}
|
||||
|
||||
$$
|
||||
\chi^2 \approx 7.697606186049128
|
||||
$$
|
||||
|
||||
$$
|
||||
df = (5-1)(4-1) = 12
|
||||
$$
|
||||
|
||||
$$
|
||||
\chi^2(7.69\dots,12) \approx 0.8082925814979871
|
||||
$$
|
||||
|
||||
|
||||
\textbf {t-test men:} T score: 1.711723, P score: 0.043815
|
||||
|
||||
\textbf {t-test women:} T score: 1.860241, P score: 0.032030
|
||||
|
||||
|
||||
\subsection{The advantage of left-handedness}
|
||||
|
||||
\begin{table}[ht]
|
||||
\centering
|
||||
\label{tab:chi-hand}
|
||||
\begin{tabular}{|l|l|l|l|l|l|}
|
||||
\hline
|
||||
& \textbf{1 - 99} & \textbf{100 - 199} & \textbf{200 - 299} & \textbf{300 - 399} & \textbf{400 - 499} \\
|
||||
\hline
|
||||
\textbf{L} & 22 / 21 & 23 / 18 & 17 / 15 & 6 / 12 & 8 / 10 \\
|
||||
\textbf{R} & 174 / 177 & 139 / 144 & 117 / 119 &
|
||||
105 / 98 & 88 / 86 \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
\caption{Observed / Expected values used for the $\chi^2$-test. The groups are divided by which hand they use (vertical) and their rank (horizontal).}
|
||||
\end{table}
|
||||
|
||||
|
||||
$$
|
||||
\chi^2 \approx 6.467312944404331
|
||||
$$
|
||||
|
||||
$$
|
||||
df = (2-1)(5-1) = 4
|
||||
$$
|
||||
|
||||
$$
|
||||
\chi^2(6.46\dots,4) \approx 0.1668616190847413
|
||||
$$
|
||||
|
||||
\textbf {t-test:} T score: 0.451694, P score: 0.325815
|
||||
|
||||
|
||||
\section{Discussion} \label{sec:discussion}
|
||||
|
||||
\end{document}
|
||||
|
754
wk7/wk7.ipynb
754
wk7/wk7.ipynb
File diff suppressed because one or more lines are too long
BIN
wk8/A1_data.xlsx
BIN
wk8/A1_data.xlsx
Binary file not shown.
155
wk8/week8.tex
155
wk8/week8.tex
@ -12,6 +12,8 @@
|
||||
\usepackage[utf8]{inputenc} %support umlauts in the input
|
||||
% Easier compilation
|
||||
\usepackage{bookmark}
|
||||
\usepackage{natbib}
|
||||
\usepackage{graphicx}
|
||||
|
||||
\begin{document}
|
||||
\title{Week 8 - Quantitative data analysis}
|
||||
@ -25,8 +27,161 @@
|
||||
|
||||
\section{Method} \label{sec:method}
|
||||
|
||||
The purpose of this report is to re-analyse the data presented in the paper by
|
||||
\cite{dong2018methods}, which investigates the effect that protests (as an
|
||||
example of disruptive social behaviours in general) have on consumer
|
||||
behaviours. \cite{dong2018methods} hypothesise that protests decrease
|
||||
consumer behaviour in the surrounding area of the event, and suggest that
|
||||
consumer spending could be used as an additional non-traditional economic
|
||||
indicator and as a gauge of consumer sentiment. Consumer spending was analysed
|
||||
using credit card transaction data from a metropolitan area within a country
|
||||
that is part of The Organisation for Economic Co-operation and Development
|
||||
(OECD). Although \cite{dong2018methods} investigate temporal and spatial
|
||||
effects on consumer spending, for the purposes of this analysis, only the
|
||||
spatial effect of variables (with relation to the geographical distance from
|
||||
the event) is considered. The dataset consists of variables measured as a
|
||||
function of the distance from the event (in km), including: the number of
|
||||
customers, the median spending amount, the number of transactions, and the
|
||||
total sales amount.
|
||||
|
||||
The re-analysis is conducted on the data provided in the
|
||||
paper\cite{dong2018methods}, using Python in conjunction with packages such as
|
||||
pandas, matplotlib, numpy and seaborn, to process and visualise the data. As
|
||||
aformentioned, only spatial data and the variables mentioned above are
|
||||
considered, for the reference days and the change occuring Day 62 (day of
|
||||
first socially disruptive event). The distribution of the difference between
|
||||
the reference period and Day 62 is visualised by plotting a histogram for each
|
||||
variable. Since the decrease of each the variables from the reference period
|
||||
to Day 62 is provided, the mean and the median of these distributions can be
|
||||
used to perform a one-sample (as we have are given the difference) hypothesis
|
||||
test to assess whether the protests on Day 62 had a discernable effect.
|
||||
|
||||
Assuming the mean of each variable over the reference period is the midpoint
|
||||
between their respective maximum and minimum values, we can reconstruct
|
||||
approximate actual values for Day 62 (given the decrease in value on Day 62
|
||||
from the reference period). By comparing these value to the range over the
|
||||
reference period, another assessment can be made to determine whether the data
|
||||
presents a discernible effect on consumer spending as a result of social
|
||||
discuption, scaling with distance.
|
||||
|
||||
Although time series data was not explicitely provided, by extrapolating
|
||||
information from a graph in \cite{dong2018methods} we can quantify the decrease
|
||||
in number of customers and median spending on Day 62 using information about the
|
||||
reference days (from 43 to 61). After collecting the values for each of the
|
||||
reference days (43-61), the mean and standard deviation of this sample can be
|
||||
calculated. Assuming a normal distribution of the data, we can calculate a
|
||||
z-score for each observation on Day 62, and use this to assess the original
|
||||
hypothesis.
|
||||
|
||||
By performing each of the above test, a re-analysis will be conducted on
|
||||
\cite{dong2018methods}'s paper hypothesising that consumer spending decreases
|
||||
as a result of social events such as protests. In the Results section, we will
|
||||
perform the statistical analyses described above. The results of these tests
|
||||
will then be explored in the Discussion section, along with assumptions and
|
||||
limitations of the tests and what can be conclused from them.
|
||||
|
||||
\section{Results} \label{sec:results}
|
||||
|
||||
For each of the variables in the given data (number of customers, median
|
||||
spending amount, number of transactions, and sales totals) we construct a
|
||||
histogram of the decrease of each (on Day 62). We then compute the mean and
|
||||
median of the data so we can proceed to perform a one-sample hypothesis test.
|
||||
|
||||
\begin{figure}[ht]
|
||||
\centering
|
||||
\label{fig:distr}
|
||||
\includegraphics[width=\textwidth]{distr.png}
|
||||
\caption{Distribution of each of the variables recorded in the data, as a function of the distance from an event}
|
||||
\end{figure}
|
||||
|
||||
Using a mean/median of the reference period, obtained by taking the midpoint of the minimum and maximum values over for each distance measure, a value can be reconstructed for the measurement on Day 62 (for each location) using:
|
||||
|
||||
\begin{equation}
|
||||
\textrm{value} = \frac{\textrm{min} + \text{max}}{2} - \textrm{decrease.}
|
||||
\tag{1}
|
||||
\end{equation}
|
||||
\\
|
||||
We can then plot the maximum and minimum values for the reference period, as well as the reconstructed Day 62 variables to observe the behaviour of consumer spending after the event.
|
||||
|
||||
\begin{figure}[ht]
|
||||
\centering
|
||||
\label{fig:effect}
|
||||
\includegraphics[width=\textwidth]{effect.png}
|
||||
\caption{The reconstructed values for Day 62 of each variable plotted against their respective minimums and maximums over the reference period}
|
||||
\end{figure}
|
||||
|
||||
Using the data recorded, for each of the three distance recorded, the mean and standard deviation of the reference period can be calculated. The z-score for each observed value on Day 62 can be computed using:
|
||||
|
||||
\begin{equation}
|
||||
\textrm{Z} = \frac{\textrm{X} - \mu}{\sigma},
|
||||
\tag{2}
|
||||
\end{equation}
|
||||
\\
|
||||
where X is the observed value, $\mu$ and $\sigma$ are the mean and standard deviation (respectively) of the reference period.
|
||||
|
||||
\begin{table}[ht]
|
||||
\centering
|
||||
\label{my-label}
|
||||
\begin{tabular}{|l|l|r|r|}
|
||||
\hline
|
||||
\textbf{Variable} & \textbf{Distance} & \textbf{X} & \textbf{Z} \\
|
||||
\hline
|
||||
\textbf{Customers} & \textless 2km & -0.600 & 6.87798 \\
|
||||
\textbf{Customers} & 2km - 4km & -0.200 & -3.33253 \\
|
||||
\textbf{Customers} & \textgreater 4km & -0.100 & -3.70740 \\
|
||||
\textbf{Median Spending} & \textless 2km & -0.200 & -3.05849 \\
|
||||
\textbf{Median Spending} & 2km - 4km & -0.100 & -1.46508 \\
|
||||
\textbf{Median Spending} & \textgreater 4km & -0.035 & -1.99199 \\
|
||||
\hline
|
||||
\end{tabular}
|
||||
\caption{The $Z$ score computed using equation 2 and the temporal data}
|
||||
\end{table}
|
||||
|
||||
\section{Discussion} \label{sec:discussion}
|
||||
|
||||
As shown in each of the subplots of Figure 1, the mean and median values of
|
||||
the decrease in each of the distributions are greater than zero (note: higher
|
||||
values of the decrease variable indicate a larger decrease/negative change).
|
||||
These mean and median values can be used to perform a one-sample hypothesis
|
||||
tests, which finds that since each of the mean/median values is greater than
|
||||
zero, we can infer that the event had a net decreasing affect on the number of
|
||||
customers, median spending amount, number of transactions, and total sales
|
||||
amount.
|
||||
|
||||
In Figure \ref{fig:effect} values were approximated for each variable on Day
|
||||
62, using Equation 1, and plotted against the minimum and maximum values of
|
||||
the respective variables. This allows us to visually assess whether the
|
||||
reconstructed value for Day 62 lies outside the range of recorded values for
|
||||
the reference period, and presents uncharacteristic behaviour. A decrease is
|
||||
evident in each of the variables after the event has occurred (on Day 62)
|
||||
within a distance of approximately 2 km, and appears to stabilise thereafter.
|
||||
This provides support to \cite{dong2018methods}'s hypothesis that consumer
|
||||
spending is affected by socially disruptive events, and also provides evidence
|
||||
to the notion of spatial scaling of this effect (based on the event location).
|
||||
It is important to note that the approximation used in this technique is
|
||||
subject to a level of error due to the ideal calculation of the mean/median of
|
||||
the reference data as the midpoint between the minimum and maximum values
|
||||
provided.
|
||||
|
||||
Extrapolating data from a graph in \cite{dong2018methods} provided time series
|
||||
data (divided into three radius') to analyse. This data was collected by
|
||||
visually estimating the values from the graph which will inherently introduce
|
||||
a source of error. However, by computing the z-score as described in Equation
|
||||
2, the table provided in Figure 3 was constructed. Each of the z-score values
|
||||
in the table are negative, indicating a decrease in both the number of
|
||||
customers and median spending on Day 62. The much larger magnitude of z-scores
|
||||
for the <2km distance ring for both variables is in agreement with earlier
|
||||
discussion, strengthening the hypothesis of the spatial correlation of
|
||||
consumer spending.
|
||||
|
||||
Each of the above tests have agreed on the spatial and temporal correlation of
|
||||
consumer spending and socially disruptive events. With the limited data
|
||||
available, we can therefore concur with the hypothesis of Dong et al. that
|
||||
consumer spending decreases in the area around disruptive social behaviour,
|
||||
after finding the temporal correlation on Day 62, as well as the spatially
|
||||
decreasing effect further from the event.
|
||||
|
||||
\bibliographystyle{humannat}
|
||||
\bibliography{references}
|
||||
|
||||
\end{document}
|
||||
|
341
wk8/wk8.ipynb
341
wk8/wk8.ipynb
File diff suppressed because one or more lines are too long
BIN
wk9/pearson.png
Normal file
BIN
wk9/pearson.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 90 KiB |
BIN
wk9/spearman.png
Normal file
BIN
wk9/spearman.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 88 KiB |
@ -12,6 +12,7 @@
|
||||
\usepackage[utf8]{inputenc} %support umlauts in the input
|
||||
% Easier compilation
|
||||
\usepackage{bookmark}
|
||||
\usepackage{graphicx}
|
||||
|
||||
\begin{document}
|
||||
\title{Week 9 - Correlation and Regression}
|
||||
@ -22,6 +23,12 @@
|
||||
\maketitle
|
||||
|
||||
\section{Introduction} \label{sec:introduction}
|
||||
We present a report on the relationship between the heights and weights of the
|
||||
top tennis players as catalogued in provided data. We use statistical analysis
|
||||
techniques to numerically describe the characteristics of the data, to see how
|
||||
trends are exhibited within the data set. We conclude the report with a brief
|
||||
discussion of the implications of the analysis and provide insights on
|
||||
potential correlations that may exist.
|
||||
|
||||
\section{Method} \label{sec:method}
|
||||
Provided with a set of 132 unique records of the top 200 male tennis players,
|
||||
@ -34,21 +41,33 @@
|
||||
samples and samples of ranking ranges within the top 200. To this end, we made
|
||||
use of Microsoft Excel tools and functions of the Python library SciPy.
|
||||
|
||||
We specifically have made use of these separate statistical analysis tools in the
|
||||
interest of sanity checking our findings. To do this, we simply replicated the
|
||||
correlation tests within other software environments.
|
||||
|
||||
\section{Results} \label{sec:results}
|
||||
We performed seperate statistical analyses on 10 different samples of the
|
||||
population, as well as the population itself. This included 5 separate subsets
|
||||
of the rankings (top 20 and 50, middle 20, bottom 20 and 50) and 5 seperate
|
||||
randomly chosen samples of 20 players.
|
||||
\\ \\
|
||||
\Cref{tab:excel-results} shows the the results for the conducted tests.
|
||||
We performed separate statistical analyses on 10 different samples of the
|
||||
population, as well as the population itself. This included 11 separate
|
||||
subsets of the rankings:
|
||||
\begin{itemize}
|
||||
\item The top 20 entries
|
||||
\item The middle 20 entries
|
||||
\item The bottom 20 entries
|
||||
\item The top 50 entries
|
||||
\item The bottom 50 entries
|
||||
\item 5 randomly chosen sets of 20 entries
|
||||
\end{itemize}
|
||||
\vspace{1em}
|
||||
Table \ref{tab:excel_results} shows the the results for the conducted tests.
|
||||
|
||||
\begin{table}[ht]
|
||||
\centering
|
||||
\label{tab:excel_results}
|
||||
\begin{tabular}{|l|r|r|}
|
||||
\hline
|
||||
\textbf{Test Set} & \textbf{Pearson's Coefficient} & \textbf{Spearman's Coefficient} \\
|
||||
\hline
|
||||
\textbf{Population} & 0.77953 & 0.73925 \\
|
||||
\textbf{Full Population} & 0.77953 & 0.73925 \\
|
||||
\textbf{Top 20} & 0.80743 & 0.80345 \\
|
||||
\textbf{Middle 20} & 0.54134 & 0.36565 \\
|
||||
\textbf{Bottom 20} & 0.84046 & 0.88172 \\
|
||||
@ -61,11 +80,20 @@
|
||||
\textbf{Random Set \#5} & 0.86203 & 0.77832
|
||||
\\ \hline
|
||||
\end{tabular}
|
||||
\caption{TODO: Insert better caption for this table. All data is rounded to 5 decimal
|
||||
\caption{Table showing the correlation coefficients between height and
|
||||
weight using different test sets. All data is rounded to 5 decimal
|
||||
places}
|
||||
\label{tab:excel-results}
|
||||
\end{table}
|
||||
|
||||
\begin{figure}[ht]
|
||||
\centering
|
||||
\label{fig:scipy}
|
||||
\includegraphics[width=0.6\textwidth]{pearson.png}
|
||||
\includegraphics[width=0.6\textwidth]{spearman.png}
|
||||
\caption{The Pearsion (top) and Spearman (bottom) correlations coefficients
|
||||
of the data set as computed by the Pandas Python library}
|
||||
\end{figure}
|
||||
|
||||
\section{Discussion} \label{sec:discussion}
|
||||
The results generally indicate that there is a fairly strong positive
|
||||
correlation between the weight and weight of an individual tennis player,
|
||||
|
252
wk9/wk9.ipynb
Normal file
252
wk9/wk9.ipynb
Normal file
@ -0,0 +1,252 @@
|
||||
{
|
||||
"cells": [
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 1,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"name": "stdout",
|
||||
"output_type": "stream",
|
||||
"text": [
|
||||
"Using matplotlib backend: MacOSX\n",
|
||||
"Populating the interactive namespace from numpy and matplotlib\n"
|
||||
]
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"%pylab\n",
|
||||
"%matplotlib inline\n",
|
||||
"import pandas as pd\n",
|
||||
"import numpy as np\n",
|
||||
"import matplotlib.pyplot as plt\n",
|
||||
"from scipy import stats\n",
|
||||
"from matplotlib import colors\n",
|
||||
"\n",
|
||||
"data = pd.read_csv(\"Tennis players 2017-09.csv\")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<style type=\"text/css\" >\n",
|
||||
" #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col0 {\n",
|
||||
" background-color: #fc7f00;\n",
|
||||
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col1 {\n",
|
||||
" background-color: #ffd20c;\n",
|
||||
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col2 {\n",
|
||||
" background-color: #ffe619;\n",
|
||||
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col3 {\n",
|
||||
" background-color: #f1f44d;\n",
|
||||
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col0 {\n",
|
||||
" background-color: #ffd20c;\n",
|
||||
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col1 {\n",
|
||||
" background-color: #fc7f00;\n",
|
||||
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col2 {\n",
|
||||
" background-color: #e4ff7a;\n",
|
||||
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col3 {\n",
|
||||
" background-color: #e8fc6c;\n",
|
||||
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col0 {\n",
|
||||
" background-color: #ffe619;\n",
|
||||
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col1 {\n",
|
||||
" background-color: #e4ff7a;\n",
|
||||
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col2 {\n",
|
||||
" background-color: #fc7f00;\n",
|
||||
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col3 {\n",
|
||||
" background-color: #fe9800;\n",
|
||||
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col0 {\n",
|
||||
" background-color: #f1f44d;\n",
|
||||
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col1 {\n",
|
||||
" background-color: #e8fc6c;\n",
|
||||
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col2 {\n",
|
||||
" background-color: #fe9800;\n",
|
||||
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col3 {\n",
|
||||
" background-color: #fc7f00;\n",
|
||||
" }</style> \n",
|
||||
"<table id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2\" > \n",
|
||||
"<thead> <tr> \n",
|
||||
" <th class=\"blank level0\" ></th> \n",
|
||||
" <th class=\"col_heading level0 col0\" >DOB</th> \n",
|
||||
" <th class=\"col_heading level0 col1\" >RANK</th> \n",
|
||||
" <th class=\"col_heading level0 col2\" >HEIGHT</th> \n",
|
||||
" <th class=\"col_heading level0 col3\" >Weight</th> \n",
|
||||
" </tr></thead> \n",
|
||||
"<tbody> <tr> \n",
|
||||
" <th id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2level0_row0\" class=\"row_heading level0 row0\" >DOB</th> \n",
|
||||
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col0\" class=\"data row0 col0\" >1</td> \n",
|
||||
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col1\" class=\"data row0 col1\" >0.277766</td> \n",
|
||||
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col2\" class=\"data row0 col2\" >0.139684</td> \n",
|
||||
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col3\" class=\"data row0 col3\" >-0.030479</td> \n",
|
||||
" </tr> <tr> \n",
|
||||
" <th id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2level0_row1\" class=\"row_heading level0 row1\" >RANK</th> \n",
|
||||
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col0\" class=\"data row1 col0\" >0.277766</td> \n",
|
||||
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col1\" class=\"data row1 col1\" >1</td> \n",
|
||||
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col2\" class=\"data row1 col2\" >-0.16755</td> \n",
|
||||
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col3\" class=\"data row1 col3\" >-0.121946</td> \n",
|
||||
" </tr> <tr> \n",
|
||||
" <th id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2level0_row2\" class=\"row_heading level0 row2\" >HEIGHT</th> \n",
|
||||
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col0\" class=\"data row2 col0\" >0.139684</td> \n",
|
||||
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col1\" class=\"data row2 col1\" >-0.16755</td> \n",
|
||||
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col2\" class=\"data row2 col2\" >1</td> \n",
|
||||
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col3\" class=\"data row2 col3\" >0.779526</td> \n",
|
||||
" </tr> <tr> \n",
|
||||
" <th id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2level0_row3\" class=\"row_heading level0 row3\" >Weight</th> \n",
|
||||
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col0\" class=\"data row3 col0\" >-0.030479</td> \n",
|
||||
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col1\" class=\"data row3 col1\" >-0.121946</td> \n",
|
||||
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col2\" class=\"data row3 col2\" >0.779526</td> \n",
|
||||
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col3\" class=\"data row3 col3\" >1</td> \n",
|
||||
" </tr></tbody> \n",
|
||||
"</table> "
|
||||
],
|
||||
"text/plain": [
|
||||
"<pandas.io.formats.style.Styler at 0x1a197d7b38>"
|
||||
]
|
||||
},
|
||||
"execution_count": 2,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"def background_gradient(s, m, M, cmap='Wistia', low=0, high=0):\n",
|
||||
" rng = M - m\n",
|
||||
" norm = colors.Normalize(m - (rng * low),\n",
|
||||
" M + (rng * high))\n",
|
||||
" normed = norm(s.values)\n",
|
||||
" c = [colors.rgb2hex(x) for x in plt.cm.get_cmap(cmap)(normed)]\n",
|
||||
" return ['background-color: %s' % color for color in c]\n",
|
||||
"\n",
|
||||
"data = data[[\"SEX\", \"DOB\", \"RANK\", \"HANDED\", \"Country\", \"HEIGHT\", \"Weight\"]]\n",
|
||||
"data.drop_duplicates\n",
|
||||
"\n",
|
||||
"pearson = data.corr()\n",
|
||||
"pearson.style.apply(background_gradient,\n",
|
||||
" cmap='Wistia',\n",
|
||||
" m=pearson.min().min(),\n",
|
||||
" M=pearson.max().max()\n",
|
||||
")"
|
||||
]
|
||||
},
|
||||
{
|
||||
"cell_type": "code",
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"outputs": [
|
||||
{
|
||||
"data": {
|
||||
"text/html": [
|
||||
"<style type=\"text/css\" >\n",
|
||||
" #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col0 {\n",
|
||||
" background-color: #fc7f00;\n",
|
||||
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col1 {\n",
|
||||
" background-color: #ffd20c;\n",
|
||||
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col2 {\n",
|
||||
" background-color: #fee91d;\n",
|
||||
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col3 {\n",
|
||||
" background-color: #f4f242;\n",
|
||||
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col0 {\n",
|
||||
" background-color: #ffd20c;\n",
|
||||
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col1 {\n",
|
||||
" background-color: #fc7f00;\n",
|
||||
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col2 {\n",
|
||||
" background-color: #e4ff7a;\n",
|
||||
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col3 {\n",
|
||||
" background-color: #eafa63;\n",
|
||||
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col0 {\n",
|
||||
" background-color: #fee91d;\n",
|
||||
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col1 {\n",
|
||||
" background-color: #e4ff7a;\n",
|
||||
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col2 {\n",
|
||||
" background-color: #fc7f00;\n",
|
||||
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col3 {\n",
|
||||
" background-color: #ff9d00;\n",
|
||||
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col0 {\n",
|
||||
" background-color: #f4f242;\n",
|
||||
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col1 {\n",
|
||||
" background-color: #eafa63;\n",
|
||||
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col2 {\n",
|
||||
" background-color: #ff9d00;\n",
|
||||
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col3 {\n",
|
||||
" background-color: #fc7f00;\n",
|
||||
" }</style> \n",
|
||||
"<table id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2\" > \n",
|
||||
"<thead> <tr> \n",
|
||||
" <th class=\"blank level0\" ></th> \n",
|
||||
" <th class=\"col_heading level0 col0\" >DOB</th> \n",
|
||||
" <th class=\"col_heading level0 col1\" >RANK</th> \n",
|
||||
" <th class=\"col_heading level0 col2\" >HEIGHT</th> \n",
|
||||
" <th class=\"col_heading level0 col3\" >Weight</th> \n",
|
||||
" </tr></thead> \n",
|
||||
"<tbody> <tr> \n",
|
||||
" <th id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2level0_row0\" class=\"row_heading level0 row0\" >DOB</th> \n",
|
||||
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col0\" class=\"data row0 col0\" >1</td> \n",
|
||||
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col1\" class=\"data row0 col1\" >0.280386</td> \n",
|
||||
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col2\" class=\"data row0 col2\" >0.122412</td> \n",
|
||||
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col3\" class=\"data row0 col3\" >0.00769861</td> \n",
|
||||
" </tr> <tr> \n",
|
||||
" <th id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2level0_row1\" class=\"row_heading level0 row1\" >RANK</th> \n",
|
||||
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col0\" class=\"data row1 col0\" >0.280386</td> \n",
|
||||
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col1\" class=\"data row1 col1\" >1</td> \n",
|
||||
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col2\" class=\"data row1 col2\" >-0.160006</td> \n",
|
||||
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col3\" class=\"data row1 col3\" >-0.0908714</td> \n",
|
||||
" </tr> <tr> \n",
|
||||
" <th id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2level0_row2\" class=\"row_heading level0 row2\" >HEIGHT</th> \n",
|
||||
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col0\" class=\"data row2 col0\" >0.122412</td> \n",
|
||||
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col1\" class=\"data row2 col1\" >-0.160006</td> \n",
|
||||
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col2\" class=\"data row2 col2\" >1</td> \n",
|
||||
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col3\" class=\"data row2 col3\" >0.739246</td> \n",
|
||||
" </tr> <tr> \n",
|
||||
" <th id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2level0_row3\" class=\"row_heading level0 row3\" >Weight</th> \n",
|
||||
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col0\" class=\"data row3 col0\" >0.00769861</td> \n",
|
||||
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col1\" class=\"data row3 col1\" >-0.0908714</td> \n",
|
||||
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col2\" class=\"data row3 col2\" >0.739246</td> \n",
|
||||
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col3\" class=\"data row3 col3\" >1</td> \n",
|
||||
" </tr></tbody> \n",
|
||||
"</table> "
|
||||
],
|
||||
"text/plain": [
|
||||
"<pandas.io.formats.style.Styler at 0x111a3b198>"
|
||||
]
|
||||
},
|
||||
"execution_count": 3,
|
||||
"metadata": {},
|
||||
"output_type": "execute_result"
|
||||
}
|
||||
],
|
||||
"source": [
|
||||
"spearman = data.corr(method=\"spearman\")\n",
|
||||
"spearman.style.apply(background_gradient,\n",
|
||||
" cmap='Wistia',\n",
|
||||
" m=spearman.min().min(),\n",
|
||||
" M=spearman.max().max()\n",
|
||||
")"
|
||||
]
|
||||
}
|
||||
],
|
||||
"metadata": {
|
||||
"kernelspec": {
|
||||
"display_name": "Python 3",
|
||||
"language": "python",
|
||||
"name": "python3"
|
||||
},
|
||||
"language_info": {
|
||||
"codemirror_mode": {
|
||||
"name": "ipython",
|
||||
"version": 3
|
||||
},
|
||||
"file_extension": ".py",
|
||||
"mimetype": "text/x-python",
|
||||
"name": "python",
|
||||
"nbconvert_exporter": "python",
|
||||
"pygments_lexer": "ipython3",
|
||||
"version": "3.6.4"
|
||||
}
|
||||
},
|
||||
"nbformat": 4,
|
||||
"nbformat_minor": 2
|
||||
}
|
Reference in New Issue
Block a user