diff --git a/README.md b/README.md index 8726d5b..177282e 100644 --- a/README.md +++ b/README.md @@ -19,6 +19,7 @@ We need need to separate the groups by gender the. - Jupyter notebook/lab - python packages - numpy + - scipy - matplotlib - pandas - seaborn diff --git a/wk7/wk7.ipynb b/wk7/wk7.ipynb index c5d84b4..ed1dd7c 100644 --- a/wk7/wk7.ipynb +++ b/wk7/wk7.ipynb @@ -9,7 +9,7 @@ }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 1, "metadata": {}, "outputs": [ { @@ -17,9 +17,9 @@ "evalue": "name 'pylab' is not defined", "output_type": "error", "traceback": [ - "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", - "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", - "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpylab\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_line_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'matplotlib'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'inline'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", + "\u001b[0;31m--------------------------------------------------------------------------\u001b[0m", + "\u001b[0;31mNameError\u001b[0m Traceback (most recent call last)", + "\u001b[0;32m\u001b[0m in \u001b[0;36m\u001b[0;34m()\u001b[0m\n\u001b[0;32m----> 1\u001b[0;31m \u001b[0mpylab\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n\u001b[0m\u001b[1;32m 2\u001b[0m \u001b[0mget_ipython\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m.\u001b[0m\u001b[0mrun_line_magic\u001b[0m\u001b[0;34m(\u001b[0m\u001b[0;34m'matplotlib'\u001b[0m\u001b[0;34m,\u001b[0m \u001b[0;34m'inline'\u001b[0m\u001b[0;34m)\u001b[0m\u001b[0;34m\u001b[0m\u001b[0m\n", "\u001b[0;31mNameError\u001b[0m: name 'pylab' is not defined" ] } @@ -31,17 +31,20 @@ }, { "cell_type": "code", - "execution_count": 2, + "execution_count": 37, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", - "import seaborn as sn" + "import seaborn as sn\n", + "import numpy as np\n", + "import matplotlib.pyplot as plt\n", + "from scipy import stats" ] }, { "cell_type": "code", - "execution_count": 3, + "execution_count": 7, "metadata": {}, "outputs": [], "source": [ @@ -50,7 +53,7 @@ }, { "cell_type": "code", - "execution_count": 4, + "execution_count": 8, "metadata": {}, "outputs": [ { @@ -82,7 +85,7 @@ }, { "cell_type": "code", - "execution_count": 7, + "execution_count": 9, "metadata": {}, "outputs": [ { @@ -106,92 +109,111 @@ " \n", " \n", " \n", - " SEX\n", - " SURNAME\n", - " HANDED\n", - " HEIGHT\n", - " Weight\n", - " Country\n", - " RANK\n", - " Firstname\n", - " DOB\n", + " ranking\n", + " country\n", + " player\n", + " age\n", + " points\n", + " tournplayed\n", + " born\n", + " weight\n", + " height\n", + " hand\n", + " gender\n", " \n", " \n", " \n", " \n", " 0\n", - " FEMALE\n", - " Pliskova\n", - " RIGHT\n", - " 186.0\n", - " NaN\n", - " CZE\n", - " 4\n", - " Karolina\n", - " 1992.0\n", + " 1\n", + " ESP\n", + " Rafael Nadal\n", + " 31\n", + " 8770\n", + " 14\n", + " 1986\n", + " 85.0\n", + " 185.0\n", + " L\n", + " M\n", " \n", " \n", " 1\n", - " FEMALE\n", - " Halep\n", - " RIGHT\n", - " 168.0\n", - " NaN\n", - " Rou\n", " 2\n", - " Simona\n", - " 1991.0\n", + " SUI\n", + " Roger Federer\n", + " 36\n", + " 8670\n", + " 17\n", + " 1981\n", + " 85.0\n", + " 185.0\n", + " R\n", + " M\n", " \n", " \n", " 2\n", - " FEMALE\n", - " Garbine\n", - " RIGHT\n", - " 182.0\n", - " NaN\n", - " ESP\n", - " 1\n", - " Muguruza\n", - " 1993.0\n", + " 3\n", + " CRO\n", + " Marin Cilic\n", + " 29\n", + " 4985\n", + " 20\n", + " 1988\n", + " 89.0\n", + " 198.0\n", + " R\n", + " M\n", " \n", " \n", " 3\n", - " FEMALE\n", - " Svitolina\n", - " RIGHT\n", - " 174.0\n", - " NaN\n", - " UKR\n", - " 3\n", - " Elina\n", - " 1994.0\n", + " 4\n", + " GER\n", + " Alexander Zverev\n", + " 20\n", + " 4925\n", + " 24\n", + " 1997\n", + " 86.0\n", + " 198.0\n", + " R\n", + " M\n", " \n", " \n", " 4\n", - " FEMALE\n", - " Wozniacki\n", - " RIGHT\n", - " 177.0\n", - " NaN\n", - " DEN\n", - " 6\n", - " Caroline\n", - " 1990.0\n", + " 5\n", + " BUL\n", + " Grigor Dimitrov\n", + " 26\n", + " 4635\n", + " 22\n", + " 1991\n", + " 80.0\n", + " 191.0\n", + " R\n", + " M\n", " \n", " \n", "\n", "" ], "text/plain": [ - " SEX SURNAME HANDED HEIGHT Weight Country RANK Firstname DOB\n", - "0 FEMALE Pliskova RIGHT 186.0 NaN CZE 4 Karolina 1992.0\n", - "1 FEMALE Halep RIGHT 168.0 NaN Rou 2 Simona 1991.0\n", - "2 FEMALE Garbine RIGHT 182.0 NaN ESP 1 Muguruza 1993.0\n", - "3 FEMALE Svitolina RIGHT 174.0 NaN UKR 3 Elina 1994.0\n", - "4 FEMALE Wozniacki RIGHT 177.0 NaN DEN 6 Caroline 1990.0" + " ranking country player age points tournplayed born weight \\\n", + "0 1 ESP Rafael Nadal 31 8770 14 1986 85.0 \n", + "1 2 SUI Roger Federer 36 8670 17 1981 85.0 \n", + "2 3 CRO Marin Cilic 29 4985 20 1988 89.0 \n", + "3 4 GER Alexander Zverev 20 4925 24 1997 86.0 \n", + "4 5 BUL Grigor Dimitrov 26 4635 22 1991 80.0 \n", + "\n", + " height hand gender \n", + "0 185.0 L M \n", + "1 185.0 R M \n", + "2 198.0 R M \n", + "3 198.0 R M \n", + "4 191.0 R M " ] }, - "execution_count": 7, + "execution_count": 9, "metadata": {}, "output_type": "execute_result" } @@ -202,113 +224,113 @@ }, { "cell_type": "code", - "execution_count": 8, + "execution_count": 16, "metadata": {}, "outputs": [ { "data": { "text/html": [ " \n", - " \n", + "
\n", " \n", " \n", " \n", @@ -320,76 +342,76 @@ " \n", " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", - " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", " \n", "
rankingheight
ranking1-0.227788-0.593329-0.3253730.236752-0.1050340.0094234ranking1-0.227788-0.593329-0.3253730.236752-0.1050340.0094234
age-0.22778810.168840.015939-0.9941930.111297-0.0215108age-0.22778810.168840.015939-0.9941930.111297-0.0215108
points-0.5933290.1688410.043939-0.1765840.152671-0.00995652points-0.5933290.1688410.043939-0.1765840.152671-0.00995652
tournplayed-0.3253730.0159390.0439391-0.0219477-0.0960524-0.0567095tournplayed-0.3253730.0159390.0439391-0.0219477-0.0960524-0.0567095
born0.236752-0.994193-0.176584-0.02194771-0.1203760.0264355born0.236752-0.994193-0.176584-0.02194771-0.1203760.0264355
weight-0.1050340.1112970.152671-0.0960524-0.12037610.635301weight-0.1050340.1112970.152671-0.0960524-0.12037610.635301
height0.0094234-0.0215108-0.00995652-0.05670950.02643550.6353011height0.0094234-0.0215108-0.00995652-0.05670950.02643550.6353011
" ], "text/plain": [ - "" + "" ] }, - "execution_count": 8, + "execution_count": 16, "metadata": {}, "output_type": "execute_result" } @@ -400,7 +422,7 @@ }, { "cell_type": "code", - "execution_count": 15, + "execution_count": 17, "metadata": {}, "outputs": [ { @@ -417,6 +439,1072 @@ "print(\"Average rank of LH:\", df.loc[df[\"hand\"] == \"L\"][\"ranking\"].mean())" ] }, + { + "cell_type": "code", + "execution_count": 18, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "Average rank of RH: 595.3157076205288\n", + "Average rank of LH: 642.2278481012659\n" + ] + } + ], + "source": [ + "print(\"Average rank of RH:\", df.loc[df[\"hand\"] == \"R\"][\"points\"].mean())\n", + "print(\"Average rank of LH:\", df.loc[df[\"hand\"] == \"L\"][\"points\"].mean())" + ] + }, + { + "cell_type": "code", + "execution_count": 19, + "metadata": {}, + "outputs": [], + "source": [ + "dfM = df.loc[df[\"gender\"] == \"M\"]\n" + ] + }, + { + "cell_type": "code", + "execution_count": 21, + "metadata": {}, + "outputs": [ + { + "data": { + "text/html": [ + "
\n", + "\n", + "\n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + " \n", + "
rankingcountryplayeragepointstournplayedbornweightheighthandgender
01ESPRafael Nadal31877014198685.0185.0LM
12SUIRoger Federer36867017198185.0185.0RM
23CROMarin Cilic29498520198889.0198.0RM
34GERAlexander Zverev20492524199786.0198.0RM
45BULGrigor Dimitrov26463522199180.0191.0RM
56ARGJuan Martin del Potro29447020198897.0198.0RM
67AUTDominic Thiem24366525199382.0185.0RM
78RSAKevin Anderson31339022198693.0203.0RM
89USAJohn Isner323125251985108.0208.0RM
910BELDavid Goffin27311024199068.0180.0RM
1011FRALucas Pouille24241024199481.0185.0RM
1112ESPPablo Carreno Busta26239525199178.0188.0RM
1213SRBNovak Djokovic30231015198777.0188.0RM
1314USASam Querrey30222023198795.0198.0RM
1415ARGDiego Schwartzman25222026199264.0170.0RM
1516ESPRoberto Bautista Agut30217525198876.0183.0RM
1617USAJack Sock25215522199284.0191.0RM
1718CZETomas Berdych32214019198591.0196.0RM
1819KORHyeon Chung21189722199687.0188.0RM
1920ITAFabio Fognini30184024198774.0178.0RM
2021SUIStan Wawrinka33178514198581.0183.0RM
2122CANMilos Raonic27176520199098.0196.0RM
2223GBRKyle Edmund23175725199583.0188.0RM
2324ESPAlbert Ramos-Vinolas30174529198880.0188.0LM
2425AUSNick Kyrgios22172019199585.0193.0RM
2526FRAAdrian Mannarino29165526198870.0180.0LM
2627SRBFilip Krajinovic26161625199275.0185.0RM
2728LUXGilles Muller34146522198389.0193.0LM
2829GBRAndy Murray30145014198784.0191.0RM
2930ESPFeliciano Lopez36142024198188.0188.0LM
....................................
470471HUNZsombor Piros187617199965.0178.0RM
471472AUTDavid Pichler227625199670.0178.0RM
472473ITAPietro Rondoni247626199378.0185.0RM
473474IRLJames McGee30758198780.0183.0RM
474475GBRTom Farquharson2675131992NaNNaNNaNM
475476FRALaurent Lokoli2375171994NaNNaNRM
476477USARyan Shane247416199489.0193.0RM
477478GBRLloyd Glasspool247417199385.0191.0RM
478479ISRIgor Smilansky237423199585.0188.0RM
479480ZIMBenjamin Lock257431199386.0201.0RM
480481FRAGianni Mina267318199285.0188.0RM
481482BELClement Geens227323199668.0180.0RM
482483AUTPascal Brunner287218198975.0180.0RM
483484GERJulian Lenz257117199382.0188.0RM
484485NEDBotic Van de Zandschulp227117199583.0188.0RM
485486GERPeter Torebko307020198884.0185.0RM
486487IRLSam Barry267020199283.0191.0RM
487488USAUlises Blanch207022199878.0191.0RM
488489JPNKento Takeuchi3070241987NaNNaNNaNM
489490USARonnie Schneider237025199470.0175.0RM
490491FRAJonathan Kanar237029199472.0180.0LM
491492ITAEdoardo Eremin246913199395.0185.0RM
492493AUSBlake Ellis196916199975.0191.0RM
493494JPNShuichi Sekiguchi266919199166.0168.0RM
494495ESPAndres Artunedo Martinavarro246920199375.0183.0RM
495496NEDJelle Sels226923199583.0188.0RM
496497USASekou Bangoura266925199177.0183.0RM
497498GERElmar Ejupovic256928199390.0193.0RM
498499NEDGijs Brouwer226821199672.0191.0LM
499500BRAJoao Menezes216822199680.0185.0RM
\n", + "

500 rows × 11 columns

\n", + "
" + ], + "text/plain": [ + " ranking country player age points tournplayed \\\n", + "0 1 ESP Rafael Nadal 31 8770 14 \n", + "1 2 SUI Roger Federer 36 8670 17 \n", + "2 3 CRO Marin Cilic 29 4985 20 \n", + "3 4 GER Alexander Zverev 20 4925 24 \n", + "4 5 BUL Grigor Dimitrov 26 4635 22 \n", + "5 6 ARG Juan Martin del Potro 29 4470 20 \n", + "6 7 AUT Dominic Thiem 24 3665 25 \n", + "7 8 RSA Kevin Anderson 31 3390 22 \n", + "8 9 USA John Isner 32 3125 25 \n", + "9 10 BEL David Goffin 27 3110 24 \n", + "10 11 FRA Lucas Pouille 24 2410 24 \n", + "11 12 ESP Pablo Carreno Busta 26 2395 25 \n", + "12 13 SRB Novak Djokovic 30 2310 15 \n", + "13 14 USA Sam Querrey 30 2220 23 \n", + "14 15 ARG Diego Schwartzman 25 2220 26 \n", + "15 16 ESP Roberto Bautista Agut 30 2175 25 \n", + "16 17 USA Jack Sock 25 2155 22 \n", + "17 18 CZE Tomas Berdych 32 2140 19 \n", + "18 19 KOR Hyeon Chung 21 1897 22 \n", + "19 20 ITA Fabio Fognini 30 1840 24 \n", + "20 21 SUI Stan Wawrinka 33 1785 14 \n", + "21 22 CAN Milos Raonic 27 1765 20 \n", + "22 23 GBR Kyle Edmund 23 1757 25 \n", + "23 24 ESP Albert Ramos-Vinolas 30 1745 29 \n", + "24 25 AUS Nick Kyrgios 22 1720 19 \n", + "25 26 FRA Adrian Mannarino 29 1655 26 \n", + "26 27 SRB Filip Krajinovic 26 1616 25 \n", + "27 28 LUX Gilles Muller 34 1465 22 \n", + "28 29 GBR Andy Murray 30 1450 14 \n", + "29 30 ESP Feliciano Lopez 36 1420 24 \n", + ".. ... ... ... ... ... ... \n", + "470 471 HUN Zsombor Piros 18 76 17 \n", + "471 472 AUT David Pichler 22 76 25 \n", + "472 473 ITA Pietro Rondoni 24 76 26 \n", + "473 474 IRL James McGee 30 75 8 \n", + "474 475 GBR Tom Farquharson 26 75 13 \n", + "475 476 FRA Laurent Lokoli 23 75 17 \n", + "476 477 USA Ryan Shane 24 74 16 \n", + "477 478 GBR Lloyd Glasspool 24 74 17 \n", + "478 479 ISR Igor Smilansky 23 74 23 \n", + "479 480 ZIM Benjamin Lock 25 74 31 \n", + "480 481 FRA Gianni Mina 26 73 18 \n", + "481 482 BEL Clement Geens 22 73 23 \n", + "482 483 AUT Pascal Brunner 28 72 18 \n", + "483 484 GER Julian Lenz 25 71 17 \n", + "484 485 NED Botic Van de Zandschulp 22 71 17 \n", + "485 486 GER Peter Torebko 30 70 20 \n", + "486 487 IRL Sam Barry 26 70 20 \n", + "487 488 USA Ulises Blanch 20 70 22 \n", + "488 489 JPN Kento Takeuchi 30 70 24 \n", + "489 490 USA Ronnie Schneider 23 70 25 \n", + "490 491 FRA Jonathan Kanar 23 70 29 \n", + "491 492 ITA Edoardo Eremin 24 69 13 \n", + "492 493 AUS Blake Ellis 19 69 16 \n", + "493 494 JPN Shuichi Sekiguchi 26 69 19 \n", + "494 495 ESP Andres Artunedo Martinavarro 24 69 20 \n", + "495 496 NED Jelle Sels 22 69 23 \n", + "496 497 USA Sekou Bangoura 26 69 25 \n", + "497 498 GER Elmar Ejupovic 25 69 28 \n", + "498 499 NED Gijs Brouwer 22 68 21 \n", + "499 500 BRA Joao Menezes 21 68 22 \n", + "\n", + " born weight height hand gender \n", + "0 1986 85.0 185.0 L M \n", + "1 1981 85.0 185.0 R M \n", + "2 1988 89.0 198.0 R M \n", + "3 1997 86.0 198.0 R M \n", + "4 1991 80.0 191.0 R M \n", + "5 1988 97.0 198.0 R M \n", + "6 1993 82.0 185.0 R M \n", + "7 1986 93.0 203.0 R M \n", + "8 1985 108.0 208.0 R M \n", + "9 1990 68.0 180.0 R M \n", + "10 1994 81.0 185.0 R M \n", + "11 1991 78.0 188.0 R M \n", + "12 1987 77.0 188.0 R M \n", + "13 1987 95.0 198.0 R M \n", + "14 1992 64.0 170.0 R M \n", + "15 1988 76.0 183.0 R M \n", + "16 1992 84.0 191.0 R M \n", + "17 1985 91.0 196.0 R M \n", + "18 1996 87.0 188.0 R M \n", + "19 1987 74.0 178.0 R M \n", + "20 1985 81.0 183.0 R M \n", + "21 1990 98.0 196.0 R M \n", + "22 1995 83.0 188.0 R M \n", + "23 1988 80.0 188.0 L M \n", + "24 1995 85.0 193.0 R M \n", + "25 1988 70.0 180.0 L M \n", + "26 1992 75.0 185.0 R M \n", + "27 1983 89.0 193.0 L M \n", + "28 1987 84.0 191.0 R M \n", + "29 1981 88.0 188.0 L M \n", + ".. ... ... ... ... ... \n", + "470 1999 65.0 178.0 R M \n", + "471 1996 70.0 178.0 R M \n", + "472 1993 78.0 185.0 R M \n", + "473 1987 80.0 183.0 R M \n", + "474 1992 NaN NaN NaN M \n", + "475 1994 NaN NaN R M \n", + "476 1994 89.0 193.0 R M \n", + "477 1993 85.0 191.0 R M \n", + "478 1995 85.0 188.0 R M \n", + "479 1993 86.0 201.0 R M \n", + "480 1992 85.0 188.0 R M \n", + "481 1996 68.0 180.0 R M \n", + "482 1989 75.0 180.0 R M \n", + "483 1993 82.0 188.0 R M \n", + "484 1995 83.0 188.0 R M \n", + "485 1988 84.0 185.0 R M \n", + "486 1992 83.0 191.0 R M \n", + "487 1998 78.0 191.0 R M \n", + "488 1987 NaN NaN NaN M \n", + "489 1994 70.0 175.0 R M \n", + "490 1994 72.0 180.0 L M \n", + "491 1993 95.0 185.0 R M \n", + "492 1999 75.0 191.0 R M \n", + "493 1991 66.0 168.0 R M \n", + "494 1993 75.0 183.0 R M \n", + "495 1995 83.0 188.0 R M \n", + "496 1991 77.0 183.0 R M \n", + "497 1993 90.0 193.0 R M \n", + "498 1996 72.0 191.0 L M \n", + "499 1996 80.0 185.0 R M \n", + "\n", + "[500 rows x 11 columns]" + ] + }, + "execution_count": 21, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "dfM" + ] + }, { "cell_type": "code", "execution_count": 11, @@ -750,6 +1838,13 @@ "ylabel(\"points\")" ] }, + { + "cell_type": "code", + "execution_count": null, + "metadata": {}, + "outputs": [], + "source": [] + }, { "cell_type": "code", "execution_count": 55, @@ -800,12 +1895,114 @@ ], "source": [] }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "# Handedness t-test" + ] + }, + { + "cell_type": "code", + "execution_count": 23, + "metadata": {}, + "outputs": [], + "source": [ + "scoresL = df.loc[df[\"hand\"] == \"L\"][\"points\"]\n", + "scoresR = df.loc[df[\"hand\"] == \"R\"][\"points\"]" + ] + }, + { + "cell_type": "code", + "execution_count": 31, + "metadata": {}, + "outputs": [ + { + "name": "stdout", + "output_type": "stream", + "text": [ + "left-handed mean: 642.2278481012659\n", + "left-handed var: 1196169.9474196683\n", + "right-handed mean: 595.3157076205288\n", + "right-handed var: 885851.1384912046\n" + ] + } + ], + "source": [ + "print(\"left-handed mean:\", scoresL.mean())\n", + "print(\"left-handed var:\", scoresL.var())\n", + "print(\"right-handed mean:\", scoresR.mean())\n", + "print(\"right-handed var:\", scoresR.var())" + ] + }, + { + "cell_type": "code", + "execution_count": 33, + "metadata": {}, + "outputs": [], + "source": [ + "ttest = (scoresL.mean() - scoresR.mean()) / (np.sqrt(scoresL.var()/(len(scoresL)-1) + scoresR.var()/(len(scoresR)-1)))" + ] + }, + { + "cell_type": "code", + "execution_count": 34, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "0.3628504016994108" + ] + }, + "execution_count": 34, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "ttest" + ] + }, + { + "cell_type": "code", + "execution_count": 36, + "metadata": {}, + "outputs": [ + { + "data": { + "text/plain": [ + "720" + ] + }, + "execution_count": 36, + "metadata": {}, + "output_type": "execute_result" + } + ], + "source": [ + "degfree = (len(scoresL) + len(scoresR)) - 2\n", + "degfree" + ] + }, + { + "cell_type": "markdown", + "metadata": {}, + "source": [ + "Links to the converters:\n", + "- https://surfstat.anu.edu.au/surfstat-home/tables/t.php\n", + "- http://www.socscistatistics.com/pvalues/tdistribution.aspx\n", + "- https://goodcalculators.com/student-t-value-calculator/" + ] + }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], - "source": [] + "source": [ + "stats." + ] } ], "metadata": {