Merge branch 'master' of https://github.com/Dekker1/ResearchMethods
This commit is contained in:
commit
89eeff700e
@ -36,6 +36,76 @@
|
|||||||
|
|
||||||
\section{Results} \label{sec:results}
|
\section{Results} \label{sec:results}
|
||||||
|
|
||||||
|
\subsection{The advantage of height}
|
||||||
|
|
||||||
|
\begin{table}[ht]
|
||||||
|
\centering
|
||||||
|
\label{tab:chi-height}
|
||||||
|
\begin{tabular}{|l|r|r|r|r|}
|
||||||
|
\hline
|
||||||
|
& \textbf{M: 168 - 188} & \textbf{M: 189 - 210} & \textbf{F: 155 - 171} & \textbf{F: 172 - 189} \\ \hline
|
||||||
|
\textbf{1 - 99} & 67 / 73 & 32 / 26 & 38 / 42 & 60 / 55 \\
|
||||||
|
\textbf{100 - 199} & 69 / 72 & 30 / 26 & 31 / 27 & 32 / 36 \\
|
||||||
|
\textbf{200 - 299} & 75 / 68 & 17 / 25 & 18 / 17 & 22 / 23 \\
|
||||||
|
\textbf{300 - 399} & 61 / 60 & 21 / 23 & 11 /12 & 17 / 16 \\
|
||||||
|
\textbf{400 - 499} & 59 / 60 & 22 / 22 & 7 / 6
|
||||||
|
& 7 / 8 \\
|
||||||
|
\hline
|
||||||
|
\end{tabular}
|
||||||
|
\caption{Observed / Expected values used for the $\chi^2$-test. The groups are divided by their rank (vertical) and, per gender, their height (horizontal).}
|
||||||
|
\end{table}
|
||||||
|
|
||||||
|
$$
|
||||||
|
\chi^2 \approx 7.697606186049128
|
||||||
|
$$
|
||||||
|
|
||||||
|
$$
|
||||||
|
df = (5-1)(4-1) = 12
|
||||||
|
$$
|
||||||
|
|
||||||
|
$$
|
||||||
|
\chi^2(7.69\dots,12) \approx 0.8082925814979871
|
||||||
|
$$
|
||||||
|
|
||||||
|
|
||||||
|
\textbf {t-test men:} T score: 1.711723, P score: 0.043815
|
||||||
|
|
||||||
|
\textbf {t-test women:} T score: 1.860241, P score: 0.032030
|
||||||
|
|
||||||
|
|
||||||
|
\subsection{The advantage of left-handedness}
|
||||||
|
|
||||||
|
\begin{table}[ht]
|
||||||
|
\centering
|
||||||
|
\label{tab:chi-hand}
|
||||||
|
\begin{tabular}{|l|l|l|l|l|l|}
|
||||||
|
\hline
|
||||||
|
& \textbf{1 - 99} & \textbf{100 - 199} & \textbf{200 - 299} & \textbf{300 - 399} & \textbf{400 - 499} \\
|
||||||
|
\hline
|
||||||
|
\textbf{L} & 22 / 21 & 23 / 18 & 17 / 15 & 6 / 12 & 8 / 10 \\
|
||||||
|
\textbf{R} & 174 / 177 & 139 / 144 & 117 / 119 &
|
||||||
|
105 / 98 & 88 / 86 \\
|
||||||
|
\hline
|
||||||
|
\end{tabular}
|
||||||
|
\caption{Observed / Expected values used for the $\chi^2$-test. The groups are divided by which hand they use (vertical) and their rank (horizontal).}
|
||||||
|
\end{table}
|
||||||
|
|
||||||
|
|
||||||
|
$$
|
||||||
|
\chi^2 \approx 6.467312944404331
|
||||||
|
$$
|
||||||
|
|
||||||
|
$$
|
||||||
|
df = (2-1)(5-1) = 4
|
||||||
|
$$
|
||||||
|
|
||||||
|
$$
|
||||||
|
\chi^2(6.46\dots,4) \approx 0.1668616190847413
|
||||||
|
$$
|
||||||
|
|
||||||
|
\textbf {t-test:} T score: 0.451694, P score: 0.325815
|
||||||
|
|
||||||
|
|
||||||
\section{Discussion} \label{sec:discussion}
|
\section{Discussion} \label{sec:discussion}
|
||||||
|
|
||||||
\end{document}
|
\end{document}
|
||||||
|
754
wk7/wk7.ipynb
754
wk7/wk7.ipynb
File diff suppressed because one or more lines are too long
BIN
wk8/A1_data.xlsx
BIN
wk8/A1_data.xlsx
Binary file not shown.
155
wk8/week8.tex
155
wk8/week8.tex
@ -12,6 +12,8 @@
|
|||||||
\usepackage[utf8]{inputenc} %support umlauts in the input
|
\usepackage[utf8]{inputenc} %support umlauts in the input
|
||||||
% Easier compilation
|
% Easier compilation
|
||||||
\usepackage{bookmark}
|
\usepackage{bookmark}
|
||||||
|
\usepackage{natbib}
|
||||||
|
\usepackage{graphicx}
|
||||||
|
|
||||||
\begin{document}
|
\begin{document}
|
||||||
\title{Week 8 - Quantitative data analysis}
|
\title{Week 8 - Quantitative data analysis}
|
||||||
@ -25,8 +27,161 @@
|
|||||||
|
|
||||||
\section{Method} \label{sec:method}
|
\section{Method} \label{sec:method}
|
||||||
|
|
||||||
|
The purpose of this report is to re-analyse the data presented in the paper by
|
||||||
|
\cite{dong2018methods}, which investigates the effect that protests (as an
|
||||||
|
example of disruptive social behaviours in general) have on consumer
|
||||||
|
behaviours. \cite{dong2018methods} hypothesise that protests decrease
|
||||||
|
consumer behaviour in the surrounding area of the event, and suggest that
|
||||||
|
consumer spending could be used as an additional non-traditional economic
|
||||||
|
indicator and as a gauge of consumer sentiment. Consumer spending was analysed
|
||||||
|
using credit card transaction data from a metropolitan area within a country
|
||||||
|
that is part of The Organisation for Economic Co-operation and Development
|
||||||
|
(OECD). Although \cite{dong2018methods} investigate temporal and spatial
|
||||||
|
effects on consumer spending, for the purposes of this analysis, only the
|
||||||
|
spatial effect of variables (with relation to the geographical distance from
|
||||||
|
the event) is considered. The dataset consists of variables measured as a
|
||||||
|
function of the distance from the event (in km), including: the number of
|
||||||
|
customers, the median spending amount, the number of transactions, and the
|
||||||
|
total sales amount.
|
||||||
|
|
||||||
|
The re-analysis is conducted on the data provided in the
|
||||||
|
paper\cite{dong2018methods}, using Python in conjunction with packages such as
|
||||||
|
pandas, matplotlib, numpy and seaborn, to process and visualise the data. As
|
||||||
|
aformentioned, only spatial data and the variables mentioned above are
|
||||||
|
considered, for the reference days and the change occuring Day 62 (day of
|
||||||
|
first socially disruptive event). The distribution of the difference between
|
||||||
|
the reference period and Day 62 is visualised by plotting a histogram for each
|
||||||
|
variable. Since the decrease of each the variables from the reference period
|
||||||
|
to Day 62 is provided, the mean and the median of these distributions can be
|
||||||
|
used to perform a one-sample (as we have are given the difference) hypothesis
|
||||||
|
test to assess whether the protests on Day 62 had a discernable effect.
|
||||||
|
|
||||||
|
Assuming the mean of each variable over the reference period is the midpoint
|
||||||
|
between their respective maximum and minimum values, we can reconstruct
|
||||||
|
approximate actual values for Day 62 (given the decrease in value on Day 62
|
||||||
|
from the reference period). By comparing these value to the range over the
|
||||||
|
reference period, another assessment can be made to determine whether the data
|
||||||
|
presents a discernible effect on consumer spending as a result of social
|
||||||
|
discuption, scaling with distance.
|
||||||
|
|
||||||
|
Although time series data was not explicitely provided, by extrapolating
|
||||||
|
information from a graph in \cite{dong2018methods} we can quantify the decrease
|
||||||
|
in number of customers and median spending on Day 62 using information about the
|
||||||
|
reference days (from 43 to 61). After collecting the values for each of the
|
||||||
|
reference days (43-61), the mean and standard deviation of this sample can be
|
||||||
|
calculated. Assuming a normal distribution of the data, we can calculate a
|
||||||
|
z-score for each observation on Day 62, and use this to assess the original
|
||||||
|
hypothesis.
|
||||||
|
|
||||||
|
By performing each of the above test, a re-analysis will be conducted on
|
||||||
|
\cite{dong2018methods}'s paper hypothesising that consumer spending decreases
|
||||||
|
as a result of social events such as protests. In the Results section, we will
|
||||||
|
perform the statistical analyses described above. The results of these tests
|
||||||
|
will then be explored in the Discussion section, along with assumptions and
|
||||||
|
limitations of the tests and what can be conclused from them.
|
||||||
|
|
||||||
\section{Results} \label{sec:results}
|
\section{Results} \label{sec:results}
|
||||||
|
|
||||||
|
For each of the variables in the given data (number of customers, median
|
||||||
|
spending amount, number of transactions, and sales totals) we construct a
|
||||||
|
histogram of the decrease of each (on Day 62). We then compute the mean and
|
||||||
|
median of the data so we can proceed to perform a one-sample hypothesis test.
|
||||||
|
|
||||||
|
\begin{figure}[ht]
|
||||||
|
\centering
|
||||||
|
\label{fig:distr}
|
||||||
|
\includegraphics[width=\textwidth]{distr.png}
|
||||||
|
\caption{Distribution of each of the variables recorded in the data, as a function of the distance from an event}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
Using a mean/median of the reference period, obtained by taking the midpoint of the minimum and maximum values over for each distance measure, a value can be reconstructed for the measurement on Day 62 (for each location) using:
|
||||||
|
|
||||||
|
\begin{equation}
|
||||||
|
\textrm{value} = \frac{\textrm{min} + \text{max}}{2} - \textrm{decrease.}
|
||||||
|
\tag{1}
|
||||||
|
\end{equation}
|
||||||
|
\\
|
||||||
|
We can then plot the maximum and minimum values for the reference period, as well as the reconstructed Day 62 variables to observe the behaviour of consumer spending after the event.
|
||||||
|
|
||||||
|
\begin{figure}[ht]
|
||||||
|
\centering
|
||||||
|
\label{fig:effect}
|
||||||
|
\includegraphics[width=\textwidth]{effect.png}
|
||||||
|
\caption{The reconstructed values for Day 62 of each variable plotted against their respective minimums and maximums over the reference period}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
|
Using the data recorded, for each of the three distance recorded, the mean and standard deviation of the reference period can be calculated. The z-score for each observed value on Day 62 can be computed using:
|
||||||
|
|
||||||
|
\begin{equation}
|
||||||
|
\textrm{Z} = \frac{\textrm{X} - \mu}{\sigma},
|
||||||
|
\tag{2}
|
||||||
|
\end{equation}
|
||||||
|
\\
|
||||||
|
where X is the observed value, $\mu$ and $\sigma$ are the mean and standard deviation (respectively) of the reference period.
|
||||||
|
|
||||||
|
\begin{table}[ht]
|
||||||
|
\centering
|
||||||
|
\label{my-label}
|
||||||
|
\begin{tabular}{|l|l|r|r|}
|
||||||
|
\hline
|
||||||
|
\textbf{Variable} & \textbf{Distance} & \textbf{X} & \textbf{Z} \\
|
||||||
|
\hline
|
||||||
|
\textbf{Customers} & \textless 2km & -0.600 & 6.87798 \\
|
||||||
|
\textbf{Customers} & 2km - 4km & -0.200 & -3.33253 \\
|
||||||
|
\textbf{Customers} & \textgreater 4km & -0.100 & -3.70740 \\
|
||||||
|
\textbf{Median Spending} & \textless 2km & -0.200 & -3.05849 \\
|
||||||
|
\textbf{Median Spending} & 2km - 4km & -0.100 & -1.46508 \\
|
||||||
|
\textbf{Median Spending} & \textgreater 4km & -0.035 & -1.99199 \\
|
||||||
|
\hline
|
||||||
|
\end{tabular}
|
||||||
|
\caption{The $Z$ score computed using equation 2 and the temporal data}
|
||||||
|
\end{table}
|
||||||
|
|
||||||
\section{Discussion} \label{sec:discussion}
|
\section{Discussion} \label{sec:discussion}
|
||||||
|
|
||||||
|
As shown in each of the subplots of Figure 1, the mean and median values of
|
||||||
|
the decrease in each of the distributions are greater than zero (note: higher
|
||||||
|
values of the decrease variable indicate a larger decrease/negative change).
|
||||||
|
These mean and median values can be used to perform a one-sample hypothesis
|
||||||
|
tests, which finds that since each of the mean/median values is greater than
|
||||||
|
zero, we can infer that the event had a net decreasing affect on the number of
|
||||||
|
customers, median spending amount, number of transactions, and total sales
|
||||||
|
amount.
|
||||||
|
|
||||||
|
In Figure \ref{fig:effect} values were approximated for each variable on Day
|
||||||
|
62, using Equation 1, and plotted against the minimum and maximum values of
|
||||||
|
the respective variables. This allows us to visually assess whether the
|
||||||
|
reconstructed value for Day 62 lies outside the range of recorded values for
|
||||||
|
the reference period, and presents uncharacteristic behaviour. A decrease is
|
||||||
|
evident in each of the variables after the event has occurred (on Day 62)
|
||||||
|
within a distance of approximately 2 km, and appears to stabilise thereafter.
|
||||||
|
This provides support to \cite{dong2018methods}'s hypothesis that consumer
|
||||||
|
spending is affected by socially disruptive events, and also provides evidence
|
||||||
|
to the notion of spatial scaling of this effect (based on the event location).
|
||||||
|
It is important to note that the approximation used in this technique is
|
||||||
|
subject to a level of error due to the ideal calculation of the mean/median of
|
||||||
|
the reference data as the midpoint between the minimum and maximum values
|
||||||
|
provided.
|
||||||
|
|
||||||
|
Extrapolating data from a graph in \cite{dong2018methods} provided time series
|
||||||
|
data (divided into three radius') to analyse. This data was collected by
|
||||||
|
visually estimating the values from the graph which will inherently introduce
|
||||||
|
a source of error. However, by computing the z-score as described in Equation
|
||||||
|
2, the table provided in Figure 3 was constructed. Each of the z-score values
|
||||||
|
in the table are negative, indicating a decrease in both the number of
|
||||||
|
customers and median spending on Day 62. The much larger magnitude of z-scores
|
||||||
|
for the <2km distance ring for both variables is in agreement with earlier
|
||||||
|
discussion, strengthening the hypothesis of the spatial correlation of
|
||||||
|
consumer spending.
|
||||||
|
|
||||||
|
Each of the above tests have agreed on the spatial and temporal correlation of
|
||||||
|
consumer spending and socially disruptive events. With the limited data
|
||||||
|
available, we can therefore concur with the hypothesis of Dong et al. that
|
||||||
|
consumer spending decreases in the area around disruptive social behaviour,
|
||||||
|
after finding the temporal correlation on Day 62, as well as the spatially
|
||||||
|
decreasing effect further from the event.
|
||||||
|
|
||||||
|
\bibliographystyle{humannat}
|
||||||
|
\bibliography{references}
|
||||||
|
|
||||||
\end{document}
|
\end{document}
|
||||||
|
341
wk8/wk8.ipynb
341
wk8/wk8.ipynb
File diff suppressed because one or more lines are too long
BIN
wk9/pearson.png
Normal file
BIN
wk9/pearson.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 90 KiB |
BIN
wk9/spearman.png
Normal file
BIN
wk9/spearman.png
Normal file
Binary file not shown.
After Width: | Height: | Size: 88 KiB |
@ -12,6 +12,7 @@
|
|||||||
\usepackage[utf8]{inputenc} %support umlauts in the input
|
\usepackage[utf8]{inputenc} %support umlauts in the input
|
||||||
% Easier compilation
|
% Easier compilation
|
||||||
\usepackage{bookmark}
|
\usepackage{bookmark}
|
||||||
|
\usepackage{graphicx}
|
||||||
|
|
||||||
\begin{document}
|
\begin{document}
|
||||||
\title{Week 9 - Correlation and Regression}
|
\title{Week 9 - Correlation and Regression}
|
||||||
@ -22,6 +23,12 @@
|
|||||||
\maketitle
|
\maketitle
|
||||||
|
|
||||||
\section{Introduction} \label{sec:introduction}
|
\section{Introduction} \label{sec:introduction}
|
||||||
|
We present a report on the relationship between the heights and weights of the
|
||||||
|
top tennis players as catalogued in provided data. We use statistical analysis
|
||||||
|
techniques to numerically describe the characteristics of the data, to see how
|
||||||
|
trends are exhibited within the data set. We conclude the report with a brief
|
||||||
|
discussion of the implications of the analysis and provide insights on
|
||||||
|
potential correlations that may exist.
|
||||||
|
|
||||||
\section{Method} \label{sec:method}
|
\section{Method} \label{sec:method}
|
||||||
Provided with a set of 132 unique records of the top 200 male tennis players,
|
Provided with a set of 132 unique records of the top 200 male tennis players,
|
||||||
@ -34,21 +41,33 @@
|
|||||||
samples and samples of ranking ranges within the top 200. To this end, we made
|
samples and samples of ranking ranges within the top 200. To this end, we made
|
||||||
use of Microsoft Excel tools and functions of the Python library SciPy.
|
use of Microsoft Excel tools and functions of the Python library SciPy.
|
||||||
|
|
||||||
|
We specifically have made use of these separate statistical analysis tools in the
|
||||||
|
interest of sanity checking our findings. To do this, we simply replicated the
|
||||||
|
correlation tests within other software environments.
|
||||||
|
|
||||||
\section{Results} \label{sec:results}
|
\section{Results} \label{sec:results}
|
||||||
We performed seperate statistical analyses on 10 different samples of the
|
We performed separate statistical analyses on 10 different samples of the
|
||||||
population, as well as the population itself. This included 5 separate subsets
|
population, as well as the population itself. This included 11 separate
|
||||||
of the rankings (top 20 and 50, middle 20, bottom 20 and 50) and 5 seperate
|
subsets of the rankings:
|
||||||
randomly chosen samples of 20 players.
|
\begin{itemize}
|
||||||
\\ \\
|
\item The top 20 entries
|
||||||
\Cref{tab:excel-results} shows the the results for the conducted tests.
|
\item The middle 20 entries
|
||||||
|
\item The bottom 20 entries
|
||||||
|
\item The top 50 entries
|
||||||
|
\item The bottom 50 entries
|
||||||
|
\item 5 randomly chosen sets of 20 entries
|
||||||
|
\end{itemize}
|
||||||
|
\vspace{1em}
|
||||||
|
Table \ref{tab:excel_results} shows the the results for the conducted tests.
|
||||||
|
|
||||||
\begin{table}[ht]
|
\begin{table}[ht]
|
||||||
\centering
|
\centering
|
||||||
|
\label{tab:excel_results}
|
||||||
\begin{tabular}{|l|r|r|}
|
\begin{tabular}{|l|r|r|}
|
||||||
\hline
|
\hline
|
||||||
\textbf{Test Set} & \textbf{Pearson's Coefficient} & \textbf{Spearman's Coefficient} \\
|
\textbf{Test Set} & \textbf{Pearson's Coefficient} & \textbf{Spearman's Coefficient} \\
|
||||||
\hline
|
\hline
|
||||||
\textbf{Population} & 0.77953 & 0.73925 \\
|
\textbf{Full Population} & 0.77953 & 0.73925 \\
|
||||||
\textbf{Top 20} & 0.80743 & 0.80345 \\
|
\textbf{Top 20} & 0.80743 & 0.80345 \\
|
||||||
\textbf{Middle 20} & 0.54134 & 0.36565 \\
|
\textbf{Middle 20} & 0.54134 & 0.36565 \\
|
||||||
\textbf{Bottom 20} & 0.84046 & 0.88172 \\
|
\textbf{Bottom 20} & 0.84046 & 0.88172 \\
|
||||||
@ -61,11 +80,20 @@
|
|||||||
\textbf{Random Set \#5} & 0.86203 & 0.77832
|
\textbf{Random Set \#5} & 0.86203 & 0.77832
|
||||||
\\ \hline
|
\\ \hline
|
||||||
\end{tabular}
|
\end{tabular}
|
||||||
\caption{TODO: Insert better caption for this table. All data is rounded to 5 decimal
|
\caption{Table showing the correlation coefficients between height and
|
||||||
|
weight using different test sets. All data is rounded to 5 decimal
|
||||||
places}
|
places}
|
||||||
\label{tab:excel-results}
|
|
||||||
\end{table}
|
\end{table}
|
||||||
|
|
||||||
|
\begin{figure}[ht]
|
||||||
|
\centering
|
||||||
|
\label{fig:scipy}
|
||||||
|
\includegraphics[width=0.6\textwidth]{pearson.png}
|
||||||
|
\includegraphics[width=0.6\textwidth]{spearman.png}
|
||||||
|
\caption{The Pearsion (top) and Spearman (bottom) correlations coefficients
|
||||||
|
of the data set as computed by the Pandas Python library}
|
||||||
|
\end{figure}
|
||||||
|
|
||||||
\section{Discussion} \label{sec:discussion}
|
\section{Discussion} \label{sec:discussion}
|
||||||
The results generally indicate that there is a fairly strong positive
|
The results generally indicate that there is a fairly strong positive
|
||||||
correlation between the weight and weight of an individual tennis player,
|
correlation between the weight and weight of an individual tennis player,
|
||||||
|
252
wk9/wk9.ipynb
Normal file
252
wk9/wk9.ipynb
Normal file
@ -0,0 +1,252 @@
|
|||||||
|
{
|
||||||
|
"cells": [
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 1,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"name": "stdout",
|
||||||
|
"output_type": "stream",
|
||||||
|
"text": [
|
||||||
|
"Using matplotlib backend: MacOSX\n",
|
||||||
|
"Populating the interactive namespace from numpy and matplotlib\n"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"%pylab\n",
|
||||||
|
"%matplotlib inline\n",
|
||||||
|
"import pandas as pd\n",
|
||||||
|
"import numpy as np\n",
|
||||||
|
"import matplotlib.pyplot as plt\n",
|
||||||
|
"from scipy import stats\n",
|
||||||
|
"from matplotlib import colors\n",
|
||||||
|
"\n",
|
||||||
|
"data = pd.read_csv(\"Tennis players 2017-09.csv\")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<style type=\"text/css\" >\n",
|
||||||
|
" #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col0 {\n",
|
||||||
|
" background-color: #fc7f00;\n",
|
||||||
|
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col1 {\n",
|
||||||
|
" background-color: #ffd20c;\n",
|
||||||
|
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col2 {\n",
|
||||||
|
" background-color: #ffe619;\n",
|
||||||
|
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col3 {\n",
|
||||||
|
" background-color: #f1f44d;\n",
|
||||||
|
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col0 {\n",
|
||||||
|
" background-color: #ffd20c;\n",
|
||||||
|
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col1 {\n",
|
||||||
|
" background-color: #fc7f00;\n",
|
||||||
|
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col2 {\n",
|
||||||
|
" background-color: #e4ff7a;\n",
|
||||||
|
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col3 {\n",
|
||||||
|
" background-color: #e8fc6c;\n",
|
||||||
|
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col0 {\n",
|
||||||
|
" background-color: #ffe619;\n",
|
||||||
|
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col1 {\n",
|
||||||
|
" background-color: #e4ff7a;\n",
|
||||||
|
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col2 {\n",
|
||||||
|
" background-color: #fc7f00;\n",
|
||||||
|
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col3 {\n",
|
||||||
|
" background-color: #fe9800;\n",
|
||||||
|
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col0 {\n",
|
||||||
|
" background-color: #f1f44d;\n",
|
||||||
|
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col1 {\n",
|
||||||
|
" background-color: #e8fc6c;\n",
|
||||||
|
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col2 {\n",
|
||||||
|
" background-color: #fe9800;\n",
|
||||||
|
" } #T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col3 {\n",
|
||||||
|
" background-color: #fc7f00;\n",
|
||||||
|
" }</style> \n",
|
||||||
|
"<table id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2\" > \n",
|
||||||
|
"<thead> <tr> \n",
|
||||||
|
" <th class=\"blank level0\" ></th> \n",
|
||||||
|
" <th class=\"col_heading level0 col0\" >DOB</th> \n",
|
||||||
|
" <th class=\"col_heading level0 col1\" >RANK</th> \n",
|
||||||
|
" <th class=\"col_heading level0 col2\" >HEIGHT</th> \n",
|
||||||
|
" <th class=\"col_heading level0 col3\" >Weight</th> \n",
|
||||||
|
" </tr></thead> \n",
|
||||||
|
"<tbody> <tr> \n",
|
||||||
|
" <th id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2level0_row0\" class=\"row_heading level0 row0\" >DOB</th> \n",
|
||||||
|
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col0\" class=\"data row0 col0\" >1</td> \n",
|
||||||
|
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col1\" class=\"data row0 col1\" >0.277766</td> \n",
|
||||||
|
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col2\" class=\"data row0 col2\" >0.139684</td> \n",
|
||||||
|
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row0_col3\" class=\"data row0 col3\" >-0.030479</td> \n",
|
||||||
|
" </tr> <tr> \n",
|
||||||
|
" <th id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2level0_row1\" class=\"row_heading level0 row1\" >RANK</th> \n",
|
||||||
|
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col0\" class=\"data row1 col0\" >0.277766</td> \n",
|
||||||
|
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col1\" class=\"data row1 col1\" >1</td> \n",
|
||||||
|
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col2\" class=\"data row1 col2\" >-0.16755</td> \n",
|
||||||
|
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row1_col3\" class=\"data row1 col3\" >-0.121946</td> \n",
|
||||||
|
" </tr> <tr> \n",
|
||||||
|
" <th id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2level0_row2\" class=\"row_heading level0 row2\" >HEIGHT</th> \n",
|
||||||
|
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col0\" class=\"data row2 col0\" >0.139684</td> \n",
|
||||||
|
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col1\" class=\"data row2 col1\" >-0.16755</td> \n",
|
||||||
|
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col2\" class=\"data row2 col2\" >1</td> \n",
|
||||||
|
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row2_col3\" class=\"data row2 col3\" >0.779526</td> \n",
|
||||||
|
" </tr> <tr> \n",
|
||||||
|
" <th id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2level0_row3\" class=\"row_heading level0 row3\" >Weight</th> \n",
|
||||||
|
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col0\" class=\"data row3 col0\" >-0.030479</td> \n",
|
||||||
|
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col1\" class=\"data row3 col1\" >-0.121946</td> \n",
|
||||||
|
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col2\" class=\"data row3 col2\" >0.779526</td> \n",
|
||||||
|
" <td id=\"T_7277b07a_4f3e_11e8_b8a3_787b8ab7acb2row3_col3\" class=\"data row3 col3\" >1</td> \n",
|
||||||
|
" </tr></tbody> \n",
|
||||||
|
"</table> "
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"<pandas.io.formats.style.Styler at 0x1a197d7b38>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 2,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"def background_gradient(s, m, M, cmap='Wistia', low=0, high=0):\n",
|
||||||
|
" rng = M - m\n",
|
||||||
|
" norm = colors.Normalize(m - (rng * low),\n",
|
||||||
|
" M + (rng * high))\n",
|
||||||
|
" normed = norm(s.values)\n",
|
||||||
|
" c = [colors.rgb2hex(x) for x in plt.cm.get_cmap(cmap)(normed)]\n",
|
||||||
|
" return ['background-color: %s' % color for color in c]\n",
|
||||||
|
"\n",
|
||||||
|
"data = data[[\"SEX\", \"DOB\", \"RANK\", \"HANDED\", \"Country\", \"HEIGHT\", \"Weight\"]]\n",
|
||||||
|
"data.drop_duplicates\n",
|
||||||
|
"\n",
|
||||||
|
"pearson = data.corr()\n",
|
||||||
|
"pearson.style.apply(background_gradient,\n",
|
||||||
|
" cmap='Wistia',\n",
|
||||||
|
" m=pearson.min().min(),\n",
|
||||||
|
" M=pearson.max().max()\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
{
|
||||||
|
"cell_type": "code",
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"outputs": [
|
||||||
|
{
|
||||||
|
"data": {
|
||||||
|
"text/html": [
|
||||||
|
"<style type=\"text/css\" >\n",
|
||||||
|
" #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col0 {\n",
|
||||||
|
" background-color: #fc7f00;\n",
|
||||||
|
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col1 {\n",
|
||||||
|
" background-color: #ffd20c;\n",
|
||||||
|
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col2 {\n",
|
||||||
|
" background-color: #fee91d;\n",
|
||||||
|
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col3 {\n",
|
||||||
|
" background-color: #f4f242;\n",
|
||||||
|
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col0 {\n",
|
||||||
|
" background-color: #ffd20c;\n",
|
||||||
|
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col1 {\n",
|
||||||
|
" background-color: #fc7f00;\n",
|
||||||
|
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col2 {\n",
|
||||||
|
" background-color: #e4ff7a;\n",
|
||||||
|
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col3 {\n",
|
||||||
|
" background-color: #eafa63;\n",
|
||||||
|
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col0 {\n",
|
||||||
|
" background-color: #fee91d;\n",
|
||||||
|
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col1 {\n",
|
||||||
|
" background-color: #e4ff7a;\n",
|
||||||
|
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col2 {\n",
|
||||||
|
" background-color: #fc7f00;\n",
|
||||||
|
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col3 {\n",
|
||||||
|
" background-color: #ff9d00;\n",
|
||||||
|
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col0 {\n",
|
||||||
|
" background-color: #f4f242;\n",
|
||||||
|
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col1 {\n",
|
||||||
|
" background-color: #eafa63;\n",
|
||||||
|
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col2 {\n",
|
||||||
|
" background-color: #ff9d00;\n",
|
||||||
|
" } #T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col3 {\n",
|
||||||
|
" background-color: #fc7f00;\n",
|
||||||
|
" }</style> \n",
|
||||||
|
"<table id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2\" > \n",
|
||||||
|
"<thead> <tr> \n",
|
||||||
|
" <th class=\"blank level0\" ></th> \n",
|
||||||
|
" <th class=\"col_heading level0 col0\" >DOB</th> \n",
|
||||||
|
" <th class=\"col_heading level0 col1\" >RANK</th> \n",
|
||||||
|
" <th class=\"col_heading level0 col2\" >HEIGHT</th> \n",
|
||||||
|
" <th class=\"col_heading level0 col3\" >Weight</th> \n",
|
||||||
|
" </tr></thead> \n",
|
||||||
|
"<tbody> <tr> \n",
|
||||||
|
" <th id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2level0_row0\" class=\"row_heading level0 row0\" >DOB</th> \n",
|
||||||
|
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col0\" class=\"data row0 col0\" >1</td> \n",
|
||||||
|
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col1\" class=\"data row0 col1\" >0.280386</td> \n",
|
||||||
|
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col2\" class=\"data row0 col2\" >0.122412</td> \n",
|
||||||
|
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row0_col3\" class=\"data row0 col3\" >0.00769861</td> \n",
|
||||||
|
" </tr> <tr> \n",
|
||||||
|
" <th id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2level0_row1\" class=\"row_heading level0 row1\" >RANK</th> \n",
|
||||||
|
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col0\" class=\"data row1 col0\" >0.280386</td> \n",
|
||||||
|
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col1\" class=\"data row1 col1\" >1</td> \n",
|
||||||
|
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col2\" class=\"data row1 col2\" >-0.160006</td> \n",
|
||||||
|
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row1_col3\" class=\"data row1 col3\" >-0.0908714</td> \n",
|
||||||
|
" </tr> <tr> \n",
|
||||||
|
" <th id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2level0_row2\" class=\"row_heading level0 row2\" >HEIGHT</th> \n",
|
||||||
|
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col0\" class=\"data row2 col0\" >0.122412</td> \n",
|
||||||
|
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col1\" class=\"data row2 col1\" >-0.160006</td> \n",
|
||||||
|
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col2\" class=\"data row2 col2\" >1</td> \n",
|
||||||
|
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row2_col3\" class=\"data row2 col3\" >0.739246</td> \n",
|
||||||
|
" </tr> <tr> \n",
|
||||||
|
" <th id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2level0_row3\" class=\"row_heading level0 row3\" >Weight</th> \n",
|
||||||
|
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col0\" class=\"data row3 col0\" >0.00769861</td> \n",
|
||||||
|
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col1\" class=\"data row3 col1\" >-0.0908714</td> \n",
|
||||||
|
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col2\" class=\"data row3 col2\" >0.739246</td> \n",
|
||||||
|
" <td id=\"T_727bef98_4f3e_11e8_a315_787b8ab7acb2row3_col3\" class=\"data row3 col3\" >1</td> \n",
|
||||||
|
" </tr></tbody> \n",
|
||||||
|
"</table> "
|
||||||
|
],
|
||||||
|
"text/plain": [
|
||||||
|
"<pandas.io.formats.style.Styler at 0x111a3b198>"
|
||||||
|
]
|
||||||
|
},
|
||||||
|
"execution_count": 3,
|
||||||
|
"metadata": {},
|
||||||
|
"output_type": "execute_result"
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"source": [
|
||||||
|
"spearman = data.corr(method=\"spearman\")\n",
|
||||||
|
"spearman.style.apply(background_gradient,\n",
|
||||||
|
" cmap='Wistia',\n",
|
||||||
|
" m=spearman.min().min(),\n",
|
||||||
|
" M=spearman.max().max()\n",
|
||||||
|
")"
|
||||||
|
]
|
||||||
|
}
|
||||||
|
],
|
||||||
|
"metadata": {
|
||||||
|
"kernelspec": {
|
||||||
|
"display_name": "Python 3",
|
||||||
|
"language": "python",
|
||||||
|
"name": "python3"
|
||||||
|
},
|
||||||
|
"language_info": {
|
||||||
|
"codemirror_mode": {
|
||||||
|
"name": "ipython",
|
||||||
|
"version": 3
|
||||||
|
},
|
||||||
|
"file_extension": ".py",
|
||||||
|
"mimetype": "text/x-python",
|
||||||
|
"name": "python",
|
||||||
|
"nbconvert_exporter": "python",
|
||||||
|
"pygments_lexer": "ipython3",
|
||||||
|
"version": "3.6.4"
|
||||||
|
}
|
||||||
|
},
|
||||||
|
"nbformat": 4,
|
||||||
|
"nbformat_minor": 2
|
||||||
|
}
|
Reference in New Issue
Block a user