{ "cells": [ { "cell_type": "code", "execution_count": 479, "metadata": {}, "outputs": [], "source": [ "import pandas as pd\n", "\n", "# Assume you've already read all files into a list of DataFrames\n", "dataframes = [\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_torn_mcl.csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_torn_hamstring.csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_sprained_mcl.csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_shoulder_sprain.csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_shoulder_labrum.csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_rotator_cuff.csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_hip_labrum.csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_hip_flexor_surgery.csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_hip_flexor_strain.csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_calf_strain.csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_dislocated_shoulder.csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_back_surgery.csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_back_spasm.csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_quad.csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_meniscus.csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_leg_fractured.csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_hand_finger_fractured.csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_hamstring.csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_foot_sprain.csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_foot_fracture.csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_bone_spurs.csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_arm.csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_ankle_sprain.csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_ankle_fracture (1).csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_acl (1).csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_achilles.csv\"),\n", " pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_ankle_fracture.csv\"),\n", "]\n", "\n", "# Concatenate all DataFrames\n", "combined_df = pd.concat(dataframes, ignore_index=True)\n", "\n", "\n", "\n" ] }, { "cell_type": "code", "execution_count": 480, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0.1Unnamed: 0InjuredNotesNameActivatedClassificationsTop ClassificationTop ScoreSpecific Injurydays_injured
00130482015-10-27placed on IL with torn left AchillesBrandon Jenningsnone{'sequence': 'placed on IL with torn left Achi...achilles tear0.475043left achilles tear injury250
11171642017-01-07placed on IL with Achilles injury / sprained a...Avery Bradleynone{'sequence': 'placed on IL with Achilles injur...achilles tear0.474971achilles tear injury250
22163362016-11-21placed on IL with sore/strained left AchillesTreveon Grahamnone{'sequence': 'placed on IL with sore/strained ...achilles tear0.472060left achilles tear injury250
33117242015-01-27placed on IL with torn left Achilles (out for ...Brandon Jenningsnone{'sequence': 'placed on IL with torn left Achi...achilles tear0.471717left achilles tear injury250
44169142016-12-23placed on IL with sore left AchillesJose Bareanone{'sequence': 'placed on IL with sore left Achi...achilles tear0.471639left achilles tear injury250
....................................
258258203802017-12-30strained right Achilles tendon (DTD)Austin Riversnone{'sequence': 'strained right Achilles tendon (...achilles tear0.456959right achilles tear injury250
259259150532016-03-12strained right Achilles tendon (DTD)Richaun Holmesnone{'sequence': 'strained right Achilles tendon (...achilles tear0.456959right achilles tear injury250
26026011322011-01-12Achilles tendon injury (DTD)Reggie Williamsnone{'sequence': 'Achilles tendon injury (DTD)', '...achilles tear0.456949achilles tear injury250
26126152010-10-08surgery to repair torn right Achilles tendonJonas Jerebkonone{'sequence': 'surgery to repair torn right Ach...achilles tear0.456810right achilles tear injury250
26226220502011-03-19right Achilles tendon injury (DTD)Al Harringtonnone{'sequence': 'right Achilles tendon injury (DT...achilles tear0.455742right achilles tear injury250
\n", "

263 rows × 11 columns

\n", "
" ], "text/plain": [ " Unnamed: 0.1 Unnamed: 0 Injured \\\n", "0 0 13048 2015-10-27 \n", "1 1 17164 2017-01-07 \n", "2 2 16336 2016-11-21 \n", "3 3 11724 2015-01-27 \n", "4 4 16914 2016-12-23 \n", ".. ... ... ... \n", "258 258 20380 2017-12-30 \n", "259 259 15053 2016-03-12 \n", "260 260 1132 2011-01-12 \n", "261 261 5 2010-10-08 \n", "262 262 2050 2011-03-19 \n", "\n", " Notes Name \\\n", "0 placed on IL with torn left Achilles Brandon Jennings \n", "1 placed on IL with Achilles injury / sprained a... Avery Bradley \n", "2 placed on IL with sore/strained left Achilles Treveon Graham \n", "3 placed on IL with torn left Achilles (out for ... Brandon Jennings \n", "4 placed on IL with sore left Achilles Jose Barea \n", ".. ... ... \n", "258 strained right Achilles tendon (DTD) Austin Rivers \n", "259 strained right Achilles tendon (DTD) Richaun Holmes \n", "260 Achilles tendon injury (DTD) Reggie Williams \n", "261 surgery to repair torn right Achilles tendon Jonas Jerebko \n", "262 right Achilles tendon injury (DTD) Al Harrington \n", "\n", " Activated Classifications \\\n", "0 none {'sequence': 'placed on IL with torn left Achi... \n", "1 none {'sequence': 'placed on IL with Achilles injur... \n", "2 none {'sequence': 'placed on IL with sore/strained ... \n", "3 none {'sequence': 'placed on IL with torn left Achi... \n", "4 none {'sequence': 'placed on IL with sore left Achi... \n", ".. ... ... \n", "258 none {'sequence': 'strained right Achilles tendon (... \n", "259 none {'sequence': 'strained right Achilles tendon (... \n", "260 none {'sequence': 'Achilles tendon injury (DTD)', '... \n", "261 none {'sequence': 'surgery to repair torn right Ach... \n", "262 none {'sequence': 'right Achilles tendon injury (DT... \n", "\n", " Top Classification Top Score Specific Injury days_injured \n", "0 achilles tear 0.475043 left achilles tear injury 250 \n", "1 achilles tear 0.474971 achilles tear injury 250 \n", "2 achilles tear 0.472060 left achilles tear injury 250 \n", "3 achilles tear 0.471717 left achilles tear injury 250 \n", "4 achilles tear 0.471639 left achilles tear injury 250 \n", ".. ... ... ... ... \n", "258 achilles tear 0.456959 right achilles tear injury 250 \n", "259 achilles tear 0.456959 right achilles tear injury 250 \n", "260 achilles tear 0.456949 achilles tear injury 250 \n", "261 achilles tear 0.456810 right achilles tear injury 250 \n", "262 achilles tear 0.455742 right achilles tear injury 250 \n", "\n", "[263 rows x 11 columns]" ] }, "execution_count": 480, "metadata": {}, "output_type": "execute_result" } ], "source": [ "achilles = pd.read_csv(\"/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_achilles.csv\")\n", "achilles" ] }, { "cell_type": "code", "execution_count": 481, "metadata": {}, "outputs": [ { "ename": "KeyError", "evalue": "\"['Unnamed: 0.2'] not found in axis\"", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[481], line 2\u001b[0m\n\u001b[1;32m 1\u001b[0m \u001b[38;5;28;01mimport\u001b[39;00m \u001b[38;5;21;01mnumpy\u001b[39;00m \u001b[38;5;28;01mas\u001b[39;00m \u001b[38;5;21;01mnp\u001b[39;00m\n\u001b[0;32m----> 2\u001b[0m achilles \u001b[38;5;241m=\u001b[39m achilles\u001b[38;5;241m.\u001b[39mdrop(columns\u001b[38;5;241m=\u001b[39m{\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mUnnamed: 0.2\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mUnnamed: 0.1\u001b[39m\u001b[38;5;124m'\u001b[39m})\n", "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/pandas/core/frame.py:5581\u001b[0m, in \u001b[0;36mDataFrame.drop\u001b[0;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[1;32m 5433\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdrop\u001b[39m(\n\u001b[1;32m 5434\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 5435\u001b[0m labels: IndexLabel \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 5442\u001b[0m errors: IgnoreRaise \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 5443\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DataFrame \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 5444\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 5445\u001b[0m \u001b[38;5;124;03m Drop specified labels from rows or columns.\u001b[39;00m\n\u001b[1;32m 5446\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 5579\u001b[0m \u001b[38;5;124;03m weight 1.0 0.8\u001b[39;00m\n\u001b[1;32m 5580\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 5581\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mdrop(\n\u001b[1;32m 5582\u001b[0m labels\u001b[38;5;241m=\u001b[39mlabels,\n\u001b[1;32m 5583\u001b[0m axis\u001b[38;5;241m=\u001b[39maxis,\n\u001b[1;32m 5584\u001b[0m index\u001b[38;5;241m=\u001b[39mindex,\n\u001b[1;32m 5585\u001b[0m columns\u001b[38;5;241m=\u001b[39mcolumns,\n\u001b[1;32m 5586\u001b[0m level\u001b[38;5;241m=\u001b[39mlevel,\n\u001b[1;32m 5587\u001b[0m inplace\u001b[38;5;241m=\u001b[39minplace,\n\u001b[1;32m 5588\u001b[0m errors\u001b[38;5;241m=\u001b[39merrors,\n\u001b[1;32m 5589\u001b[0m )\n", "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/pandas/core/generic.py:4788\u001b[0m, in \u001b[0;36mNDFrame.drop\u001b[0;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[1;32m 4786\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m axis, labels \u001b[38;5;129;01min\u001b[39;00m axes\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m 4787\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m labels \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 4788\u001b[0m obj \u001b[38;5;241m=\u001b[39m obj\u001b[38;5;241m.\u001b[39m_drop_axis(labels, axis, level\u001b[38;5;241m=\u001b[39mlevel, errors\u001b[38;5;241m=\u001b[39merrors)\n\u001b[1;32m 4790\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m inplace:\n\u001b[1;32m 4791\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_update_inplace(obj)\n", "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/pandas/core/generic.py:4830\u001b[0m, in \u001b[0;36mNDFrame._drop_axis\u001b[0;34m(self, labels, axis, level, errors, only_slice)\u001b[0m\n\u001b[1;32m 4828\u001b[0m new_axis \u001b[38;5;241m=\u001b[39m axis\u001b[38;5;241m.\u001b[39mdrop(labels, level\u001b[38;5;241m=\u001b[39mlevel, errors\u001b[38;5;241m=\u001b[39merrors)\n\u001b[1;32m 4829\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 4830\u001b[0m new_axis \u001b[38;5;241m=\u001b[39m axis\u001b[38;5;241m.\u001b[39mdrop(labels, errors\u001b[38;5;241m=\u001b[39merrors)\n\u001b[1;32m 4831\u001b[0m indexer \u001b[38;5;241m=\u001b[39m axis\u001b[38;5;241m.\u001b[39mget_indexer(new_axis)\n\u001b[1;32m 4833\u001b[0m \u001b[38;5;66;03m# Case for non-unique axis\u001b[39;00m\n\u001b[1;32m 4834\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/pandas/core/indexes/base.py:7070\u001b[0m, in \u001b[0;36mIndex.drop\u001b[0;34m(self, labels, errors)\u001b[0m\n\u001b[1;32m 7068\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m mask\u001b[38;5;241m.\u001b[39many():\n\u001b[1;32m 7069\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m errors \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m-> 7070\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mlabels[mask]\u001b[38;5;241m.\u001b[39mtolist()\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not found in axis\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 7071\u001b[0m indexer \u001b[38;5;241m=\u001b[39m indexer[\u001b[38;5;241m~\u001b[39mmask]\n\u001b[1;32m 7072\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdelete(indexer)\n", "\u001b[0;31mKeyError\u001b[0m: \"['Unnamed: 0.2'] not found in axis\"" ] } ], "source": [ "import numpy as np\n", "achilles = achilles.drop(columns={'Unnamed: 0.2','Unnamed: 0.1'})\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "ename": "KeyError", "evalue": "\"['Out_For_The_Season', 'Has_Injury'] not found in axis\"", "output_type": "error", "traceback": [ "\u001b[0;31m---------------------------------------------------------------------------\u001b[0m", "\u001b[0;31mKeyError\u001b[0m Traceback (most recent call last)", "Cell \u001b[0;32mIn[455], line 1\u001b[0m\n\u001b[0;32m----> 1\u001b[0m achilles \u001b[38;5;241m=\u001b[39m achilles\u001b[38;5;241m.\u001b[39mdrop(columns\u001b[38;5;241m=\u001b[39m{\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mOut_For_The_Season\u001b[39m\u001b[38;5;124m'\u001b[39m,\u001b[38;5;124m'\u001b[39m\u001b[38;5;124mHas_Injury\u001b[39m\u001b[38;5;124m'\u001b[39m})\n\u001b[1;32m 3\u001b[0m achilles\u001b[38;5;241m.\u001b[39mto_csv(\u001b[38;5;124m'\u001b[39m\u001b[38;5;124m/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_achilles.csv\u001b[39m\u001b[38;5;124m'\u001b[39m)\n", "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/pandas/core/frame.py:5581\u001b[0m, in \u001b[0;36mDataFrame.drop\u001b[0;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[1;32m 5433\u001b[0m \u001b[38;5;28;01mdef\u001b[39;00m \u001b[38;5;21mdrop\u001b[39m(\n\u001b[1;32m 5434\u001b[0m \u001b[38;5;28mself\u001b[39m,\n\u001b[1;32m 5435\u001b[0m labels: IndexLabel \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m \u001b[38;5;241m=\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m,\n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 5442\u001b[0m errors: IgnoreRaise \u001b[38;5;241m=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mraise\u001b[39m\u001b[38;5;124m\"\u001b[39m,\n\u001b[1;32m 5443\u001b[0m ) \u001b[38;5;241m-\u001b[39m\u001b[38;5;241m>\u001b[39m DataFrame \u001b[38;5;241m|\u001b[39m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[1;32m 5444\u001b[0m \u001b[38;5;250m \u001b[39m\u001b[38;5;124;03m\"\"\"\u001b[39;00m\n\u001b[1;32m 5445\u001b[0m \u001b[38;5;124;03m Drop specified labels from rows or columns.\u001b[39;00m\n\u001b[1;32m 5446\u001b[0m \n\u001b[0;32m (...)\u001b[0m\n\u001b[1;32m 5579\u001b[0m \u001b[38;5;124;03m weight 1.0 0.8\u001b[39;00m\n\u001b[1;32m 5580\u001b[0m \u001b[38;5;124;03m \"\"\"\u001b[39;00m\n\u001b[0;32m-> 5581\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28msuper\u001b[39m()\u001b[38;5;241m.\u001b[39mdrop(\n\u001b[1;32m 5582\u001b[0m labels\u001b[38;5;241m=\u001b[39mlabels,\n\u001b[1;32m 5583\u001b[0m axis\u001b[38;5;241m=\u001b[39maxis,\n\u001b[1;32m 5584\u001b[0m index\u001b[38;5;241m=\u001b[39mindex,\n\u001b[1;32m 5585\u001b[0m columns\u001b[38;5;241m=\u001b[39mcolumns,\n\u001b[1;32m 5586\u001b[0m level\u001b[38;5;241m=\u001b[39mlevel,\n\u001b[1;32m 5587\u001b[0m inplace\u001b[38;5;241m=\u001b[39minplace,\n\u001b[1;32m 5588\u001b[0m errors\u001b[38;5;241m=\u001b[39merrors,\n\u001b[1;32m 5589\u001b[0m )\n", "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/pandas/core/generic.py:4788\u001b[0m, in \u001b[0;36mNDFrame.drop\u001b[0;34m(self, labels, axis, index, columns, level, inplace, errors)\u001b[0m\n\u001b[1;32m 4786\u001b[0m \u001b[38;5;28;01mfor\u001b[39;00m axis, labels \u001b[38;5;129;01min\u001b[39;00m axes\u001b[38;5;241m.\u001b[39mitems():\n\u001b[1;32m 4787\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m labels \u001b[38;5;129;01mis\u001b[39;00m \u001b[38;5;129;01mnot\u001b[39;00m \u001b[38;5;28;01mNone\u001b[39;00m:\n\u001b[0;32m-> 4788\u001b[0m obj \u001b[38;5;241m=\u001b[39m obj\u001b[38;5;241m.\u001b[39m_drop_axis(labels, axis, level\u001b[38;5;241m=\u001b[39mlevel, errors\u001b[38;5;241m=\u001b[39merrors)\n\u001b[1;32m 4790\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m inplace:\n\u001b[1;32m 4791\u001b[0m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39m_update_inplace(obj)\n", "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/pandas/core/generic.py:4830\u001b[0m, in \u001b[0;36mNDFrame._drop_axis\u001b[0;34m(self, labels, axis, level, errors, only_slice)\u001b[0m\n\u001b[1;32m 4828\u001b[0m new_axis \u001b[38;5;241m=\u001b[39m axis\u001b[38;5;241m.\u001b[39mdrop(labels, level\u001b[38;5;241m=\u001b[39mlevel, errors\u001b[38;5;241m=\u001b[39merrors)\n\u001b[1;32m 4829\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n\u001b[0;32m-> 4830\u001b[0m new_axis \u001b[38;5;241m=\u001b[39m axis\u001b[38;5;241m.\u001b[39mdrop(labels, errors\u001b[38;5;241m=\u001b[39merrors)\n\u001b[1;32m 4831\u001b[0m indexer \u001b[38;5;241m=\u001b[39m axis\u001b[38;5;241m.\u001b[39mget_indexer(new_axis)\n\u001b[1;32m 4833\u001b[0m \u001b[38;5;66;03m# Case for non-unique axis\u001b[39;00m\n\u001b[1;32m 4834\u001b[0m \u001b[38;5;28;01melse\u001b[39;00m:\n", "File \u001b[0;32m/opt/anaconda3/lib/python3.12/site-packages/pandas/core/indexes/base.py:7070\u001b[0m, in \u001b[0;36mIndex.drop\u001b[0;34m(self, labels, errors)\u001b[0m\n\u001b[1;32m 7068\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m mask\u001b[38;5;241m.\u001b[39many():\n\u001b[1;32m 7069\u001b[0m \u001b[38;5;28;01mif\u001b[39;00m errors \u001b[38;5;241m!=\u001b[39m \u001b[38;5;124m\"\u001b[39m\u001b[38;5;124mignore\u001b[39m\u001b[38;5;124m\"\u001b[39m:\n\u001b[0;32m-> 7070\u001b[0m \u001b[38;5;28;01mraise\u001b[39;00m \u001b[38;5;167;01mKeyError\u001b[39;00m(\u001b[38;5;124mf\u001b[39m\u001b[38;5;124m\"\u001b[39m\u001b[38;5;132;01m{\u001b[39;00mlabels[mask]\u001b[38;5;241m.\u001b[39mtolist()\u001b[38;5;132;01m}\u001b[39;00m\u001b[38;5;124m not found in axis\u001b[39m\u001b[38;5;124m\"\u001b[39m)\n\u001b[1;32m 7071\u001b[0m indexer \u001b[38;5;241m=\u001b[39m indexer[\u001b[38;5;241m~\u001b[39mmask]\n\u001b[1;32m 7072\u001b[0m \u001b[38;5;28;01mreturn\u001b[39;00m \u001b[38;5;28mself\u001b[39m\u001b[38;5;241m.\u001b[39mdelete(indexer)\n", "\u001b[0;31mKeyError\u001b[0m: \"['Out_For_The_Season', 'Has_Injury'] not found in axis\"" ] } ], "source": [ "achilles = achilles.drop(columns={'Out_For_The_Season','Has_Injury'})\n", "\n", "achilles.to_csv('/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_achilles.csv')\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "achilles = achilles.rename(columns={'injured':'Injured','Activated_From_IL':'Activated'})\n", "achilles['Activated'] = 'none'\n", "achilles['days_injured'] = 250\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0.1Unnamed: 0InjuredNotesNameActivatedClassificationsTop ClassificationTop ScoreSpecific Injurydays_injured
00130482015-10-27placed on IL with torn left AchillesBrandon Jenningsnone{'sequence': 'placed on IL with torn left Achi...achilles tear0.475043left achilles tear injury250
11171642017-01-07placed on IL with Achilles injury / sprained a...Avery Bradleynone{'sequence': 'placed on IL with Achilles injur...achilles tear0.474971achilles tear injury250
22163362016-11-21placed on IL with sore/strained left AchillesTreveon Grahamnone{'sequence': 'placed on IL with sore/strained ...achilles tear0.472060left achilles tear injury250
33117242015-01-27placed on IL with torn left Achilles (out for ...Brandon Jenningsnone{'sequence': 'placed on IL with torn left Achi...achilles tear0.471717left achilles tear injury250
44169142016-12-23placed on IL with sore left AchillesJose Bareanone{'sequence': 'placed on IL with sore left Achi...achilles tear0.471639left achilles tear injury250
....................................
258258203802017-12-30strained right Achilles tendon (DTD)Austin Riversnone{'sequence': 'strained right Achilles tendon (...achilles tear0.456959right achilles tear injury250
259259150532016-03-12strained right Achilles tendon (DTD)Richaun Holmesnone{'sequence': 'strained right Achilles tendon (...achilles tear0.456959right achilles tear injury250
26026011322011-01-12Achilles tendon injury (DTD)Reggie Williamsnone{'sequence': 'Achilles tendon injury (DTD)', '...achilles tear0.456949achilles tear injury250
26126152010-10-08surgery to repair torn right Achilles tendonJonas Jerebkonone{'sequence': 'surgery to repair torn right Ach...achilles tear0.456810right achilles tear injury250
26226220502011-03-19right Achilles tendon injury (DTD)Al Harringtonnone{'sequence': 'right Achilles tendon injury (DT...achilles tear0.455742right achilles tear injury250
\n", "

263 rows × 11 columns

\n", "
" ], "text/plain": [ " Unnamed: 0.1 Unnamed: 0 Injured \\\n", "0 0 13048 2015-10-27 \n", "1 1 17164 2017-01-07 \n", "2 2 16336 2016-11-21 \n", "3 3 11724 2015-01-27 \n", "4 4 16914 2016-12-23 \n", ".. ... ... ... \n", "258 258 20380 2017-12-30 \n", "259 259 15053 2016-03-12 \n", "260 260 1132 2011-01-12 \n", "261 261 5 2010-10-08 \n", "262 262 2050 2011-03-19 \n", "\n", " Notes Name \\\n", "0 placed on IL with torn left Achilles Brandon Jennings \n", "1 placed on IL with Achilles injury / sprained a... Avery Bradley \n", "2 placed on IL with sore/strained left Achilles Treveon Graham \n", "3 placed on IL with torn left Achilles (out for ... Brandon Jennings \n", "4 placed on IL with sore left Achilles Jose Barea \n", ".. ... ... \n", "258 strained right Achilles tendon (DTD) Austin Rivers \n", "259 strained right Achilles tendon (DTD) Richaun Holmes \n", "260 Achilles tendon injury (DTD) Reggie Williams \n", "261 surgery to repair torn right Achilles tendon Jonas Jerebko \n", "262 right Achilles tendon injury (DTD) Al Harrington \n", "\n", " Activated Classifications \\\n", "0 none {'sequence': 'placed on IL with torn left Achi... \n", "1 none {'sequence': 'placed on IL with Achilles injur... \n", "2 none {'sequence': 'placed on IL with sore/strained ... \n", "3 none {'sequence': 'placed on IL with torn left Achi... \n", "4 none {'sequence': 'placed on IL with sore left Achi... \n", ".. ... ... \n", "258 none {'sequence': 'strained right Achilles tendon (... \n", "259 none {'sequence': 'strained right Achilles tendon (... \n", "260 none {'sequence': 'Achilles tendon injury (DTD)', '... \n", "261 none {'sequence': 'surgery to repair torn right Ach... \n", "262 none {'sequence': 'right Achilles tendon injury (DT... \n", "\n", " Top Classification Top Score Specific Injury days_injured \n", "0 achilles tear 0.475043 left achilles tear injury 250 \n", "1 achilles tear 0.474971 achilles tear injury 250 \n", "2 achilles tear 0.472060 left achilles tear injury 250 \n", "3 achilles tear 0.471717 left achilles tear injury 250 \n", "4 achilles tear 0.471639 left achilles tear injury 250 \n", ".. ... ... ... ... \n", "258 achilles tear 0.456959 right achilles tear injury 250 \n", "259 achilles tear 0.456959 right achilles tear injury 250 \n", "260 achilles tear 0.456949 achilles tear injury 250 \n", "261 achilles tear 0.456810 right achilles tear injury 250 \n", "262 achilles tear 0.455742 right achilles tear injury 250 \n", "\n", "[263 rows x 11 columns]" ] }, "execution_count": 457, "metadata": {}, "output_type": "execute_result" } ], "source": [ "achilles" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "new_order = ['Unnamed: 0','Name','Notes','Injured','Activated','days_injured','Classifications','Top Classification','Top Score','Specific Injury']\n", "achilles = achilles[new_order]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/8t/t11lp0b952n0xtfmnwbzxzvw0000gn/T/ipykernel_11952/3813116858.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " achilles['Injured'] = pd.to_datetime(achilles['Injured'], errors='coerce')\n" ] }, { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0NameNotesInjuredActivateddays_injuredClassificationsTop ClassificationTop ScoreSpecific Injury
013048Brandon Jenningsplaced on IL with torn left Achilles2015-10-27none250{'sequence': 'placed on IL with torn left Achi...achilles tear0.475043left achilles tear injury
117164Avery Bradleyplaced on IL with Achilles injury / sprained a...2017-01-07none250{'sequence': 'placed on IL with Achilles injur...achilles tear0.474971achilles tear injury
216336Treveon Grahamplaced on IL with sore/strained left Achilles2016-11-21none250{'sequence': 'placed on IL with sore/strained ...achilles tear0.472060left achilles tear injury
311724Brandon Jenningsplaced on IL with torn left Achilles (out for ...2015-01-27none250{'sequence': 'placed on IL with torn left Achi...achilles tear0.471717left achilles tear injury
416914Jose Bareaplaced on IL with sore left Achilles2016-12-23none250{'sequence': 'placed on IL with sore left Achi...achilles tear0.471639left achilles tear injury
.................................
25820380Austin Riversstrained right Achilles tendon (DTD)2017-12-30none250{'sequence': 'strained right Achilles tendon (...achilles tear0.456959right achilles tear injury
25915053Richaun Holmesstrained right Achilles tendon (DTD)2016-03-12none250{'sequence': 'strained right Achilles tendon (...achilles tear0.456959right achilles tear injury
2601132Reggie WilliamsAchilles tendon injury (DTD)2011-01-12none250{'sequence': 'Achilles tendon injury (DTD)', '...achilles tear0.456949achilles tear injury
2615Jonas Jerebkosurgery to repair torn right Achilles tendon2010-10-08none250{'sequence': 'surgery to repair torn right Ach...achilles tear0.456810right achilles tear injury
2622050Al Harringtonright Achilles tendon injury (DTD)2011-03-19none250{'sequence': 'right Achilles tendon injury (DT...achilles tear0.455742right achilles tear injury
\n", "

263 rows × 10 columns

\n", "
" ], "text/plain": [ " Unnamed: 0 Name \\\n", "0 13048 Brandon Jennings \n", "1 17164 Avery Bradley \n", "2 16336 Treveon Graham \n", "3 11724 Brandon Jennings \n", "4 16914 Jose Barea \n", ".. ... ... \n", "258 20380 Austin Rivers \n", "259 15053 Richaun Holmes \n", "260 1132 Reggie Williams \n", "261 5 Jonas Jerebko \n", "262 2050 Al Harrington \n", "\n", " Notes Injured Activated \\\n", "0 placed on IL with torn left Achilles 2015-10-27 none \n", "1 placed on IL with Achilles injury / sprained a... 2017-01-07 none \n", "2 placed on IL with sore/strained left Achilles 2016-11-21 none \n", "3 placed on IL with torn left Achilles (out for ... 2015-01-27 none \n", "4 placed on IL with sore left Achilles 2016-12-23 none \n", ".. ... ... ... \n", "258 strained right Achilles tendon (DTD) 2017-12-30 none \n", "259 strained right Achilles tendon (DTD) 2016-03-12 none \n", "260 Achilles tendon injury (DTD) 2011-01-12 none \n", "261 surgery to repair torn right Achilles tendon 2010-10-08 none \n", "262 right Achilles tendon injury (DTD) 2011-03-19 none \n", "\n", " days_injured Classifications \\\n", "0 250 {'sequence': 'placed on IL with torn left Achi... \n", "1 250 {'sequence': 'placed on IL with Achilles injur... \n", "2 250 {'sequence': 'placed on IL with sore/strained ... \n", "3 250 {'sequence': 'placed on IL with torn left Achi... \n", "4 250 {'sequence': 'placed on IL with sore left Achi... \n", ".. ... ... \n", "258 250 {'sequence': 'strained right Achilles tendon (... \n", "259 250 {'sequence': 'strained right Achilles tendon (... \n", "260 250 {'sequence': 'Achilles tendon injury (DTD)', '... \n", "261 250 {'sequence': 'surgery to repair torn right Ach... \n", "262 250 {'sequence': 'right Achilles tendon injury (DT... \n", "\n", " Top Classification Top Score Specific Injury \n", "0 achilles tear 0.475043 left achilles tear injury \n", "1 achilles tear 0.474971 achilles tear injury \n", "2 achilles tear 0.472060 left achilles tear injury \n", "3 achilles tear 0.471717 left achilles tear injury \n", "4 achilles tear 0.471639 left achilles tear injury \n", ".. ... ... ... \n", "258 achilles tear 0.456959 right achilles tear injury \n", "259 achilles tear 0.456959 right achilles tear injury \n", "260 achilles tear 0.456949 achilles tear injury \n", "261 achilles tear 0.456810 right achilles tear injury \n", "262 achilles tear 0.455742 right achilles tear injury \n", "\n", "[263 rows x 10 columns]" ] }, "execution_count": 459, "metadata": {}, "output_type": "execute_result" } ], "source": [ "achilles['Injured'] = pd.to_datetime(achilles['Injured'], errors='coerce')\n", "achilles" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "achilles.to_csv('/Users/laraschuman/Desktop/CTP-Project/combined_sorted_notes/sorted_notes_with_injury_achilles.csv')\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0injuredNotesNameActivatedClassificationsTop ClassificationTop ScoreSpecific Injury
01304827/10/2015placed on IL with torn left AchillesBrandon JenningsFalse{'sequence': 'placed on IL with torn left Achi...achilles tear0.475043left achilles tear injury
11716407/01/2017placed on IL with Achilles injury / sprained a...Avery BradleyFalse{'sequence': 'placed on IL with Achilles injur...achilles tear0.474971achilles tear injury
21633621/11/2016placed on IL with sore/strained left AchillesTreveon GrahamFalse{'sequence': 'placed on IL with sore/strained ...achilles tear0.472060left achilles tear injury
31172427/01/2015placed on IL with torn left Achilles (out for ...Brandon JenningsFalse{'sequence': 'placed on IL with torn left Achi...achilles tear0.471717left achilles tear injury
41691423/12/2016placed on IL with sore left AchillesJose BareaFalse{'sequence': 'placed on IL with sore left Achi...achilles tear0.471639left achilles tear injury
..............................
2582038030/12/2017strained right Achilles tendon (DTD)Austin RiversFalse{'sequence': 'strained right Achilles tendon (...achilles tear0.456959right achilles tear injury
2591505312/03/2016strained right Achilles tendon (DTD)Richaun HolmesFalse{'sequence': 'strained right Achilles tendon (...achilles tear0.456959right achilles tear injury
260113212/01/2011Achilles tendon injury (DTD)Reggie WilliamsFalse{'sequence': 'Achilles tendon injury (DTD)', '...achilles tear0.456949achilles tear injury
261508/10/2010surgery to repair torn right Achilles tendonJonas JerebkoFalse{'sequence': 'surgery to repair torn right Ach...achilles tear0.456810right achilles tear injury
262205019/03/2011right Achilles tendon injury (DTD)Al HarringtonFalse{'sequence': 'right Achilles tendon injury (DT...achilles tear0.455742right achilles tear injury
\n", "

263 rows × 9 columns

\n", "
" ], "text/plain": [ " Unnamed: 0 injured \\\n", "0 13048 27/10/2015 \n", "1 17164 07/01/2017 \n", "2 16336 21/11/2016 \n", "3 11724 27/01/2015 \n", "4 16914 23/12/2016 \n", ".. ... ... \n", "258 20380 30/12/2017 \n", "259 15053 12/03/2016 \n", "260 1132 12/01/2011 \n", "261 5 08/10/2010 \n", "262 2050 19/03/2011 \n", "\n", " Notes Name \\\n", "0 placed on IL with torn left Achilles Brandon Jennings \n", "1 placed on IL with Achilles injury / sprained a... Avery Bradley \n", "2 placed on IL with sore/strained left Achilles Treveon Graham \n", "3 placed on IL with torn left Achilles (out for ... Brandon Jennings \n", "4 placed on IL with sore left Achilles Jose Barea \n", ".. ... ... \n", "258 strained right Achilles tendon (DTD) Austin Rivers \n", "259 strained right Achilles tendon (DTD) Richaun Holmes \n", "260 Achilles tendon injury (DTD) Reggie Williams \n", "261 surgery to repair torn right Achilles tendon Jonas Jerebko \n", "262 right Achilles tendon injury (DTD) Al Harrington \n", "\n", " Activated Classifications \\\n", "0 False {'sequence': 'placed on IL with torn left Achi... \n", "1 False {'sequence': 'placed on IL with Achilles injur... \n", "2 False {'sequence': 'placed on IL with sore/strained ... \n", "3 False {'sequence': 'placed on IL with torn left Achi... \n", "4 False {'sequence': 'placed on IL with sore left Achi... \n", ".. ... ... \n", "258 False {'sequence': 'strained right Achilles tendon (... \n", "259 False {'sequence': 'strained right Achilles tendon (... \n", "260 False {'sequence': 'Achilles tendon injury (DTD)', '... \n", "261 False {'sequence': 'surgery to repair torn right Ach... \n", "262 False {'sequence': 'right Achilles tendon injury (DT... \n", "\n", " Top Classification Top Score Specific Injury \n", "0 achilles tear 0.475043 left achilles tear injury \n", "1 achilles tear 0.474971 achilles tear injury \n", "2 achilles tear 0.472060 left achilles tear injury \n", "3 achilles tear 0.471717 left achilles tear injury \n", "4 achilles tear 0.471639 left achilles tear injury \n", ".. ... ... ... \n", "258 achilles tear 0.456959 right achilles tear injury \n", "259 achilles tear 0.456959 right achilles tear injury \n", "260 achilles tear 0.456949 achilles tear injury \n", "261 achilles tear 0.456810 right achilles tear injury \n", "262 achilles tear 0.455742 right achilles tear injury \n", "\n", "[263 rows x 9 columns]" ] }, "execution_count": 396, "metadata": {}, "output_type": "execute_result" } ], "source": [ "achilles" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Unnamed: 0NameNotesInjuredActivateddays_injuredClassificationsTop ClassificationTop ScoreSpecific InjuryUnnamed: 0.1
045518Sergey Karasevsurgery on right knee to repair torn MCL, disl...2015-03-122015-12-08271.0{'sequence': 'surgery on right knee to repair ...torn mcl0.467165right torn mcl injuryNaN
146401Sergey Karasevsurgery on right knee to repair torn MCL, disl...2015-03-122015-12-08271.0{'sequence': 'surgery on right knee to repair ...torn mcl0.467165right torn mcl injuryNaN
250960Sergey Karasevsurgery on right knee to repair torn MCL, disl...2015-03-122015-12-08271.0{'sequence': 'surgery on right knee to repair ...torn mcl0.467165right torn mcl injuryNaN
354657Sergey Karasevsurgery on right knee to repair torn MCL, disl...2015-03-122015-12-08271.0{'sequence': 'surgery on right knee to repair ...torn mcl0.467165right torn mcl injuryNaN
455315Sergey Karasevsurgery on right knee to repair torn MCL, disl...2015-03-122015-12-08271.0{'sequence': 'surgery on right knee to repair ...torn mcl0.467165right torn mcl injuryNaN
....................................
328746878Shane Larkinfractured right ankle (out indefinitely)2013-07-122013-11-18129.0{'sequence': 'fractured right ankle (out indef...ankle fracture0.455315right ankle fracture injuryNaN
3288203837Shane Larkinfractured right ankle (out indefinitely)2013-07-122013-11-18129.0{'sequence': 'fractured right ankle (out indef...ankle fracture0.455315right ankle fracture injuryNaN
328983193Shane Larkinfractured right ankle (out indefinitely)2013-07-122013-11-18129.0{'sequence': 'fractured right ankle (out indef...ankle fracture0.455315right ankle fracture injuryNaN
3290107300Shane Larkinfractured right ankle (out indefinitely)2013-07-122013-11-18129.0{'sequence': 'fractured right ankle (out indef...ankle fracture0.455315right ankle fracture injuryNaN
3291184549Shane Larkinfractured right ankle (out indefinitely)2013-07-122013-11-18129.0{'sequence': 'fractured right ankle (out indef...ankle fracture0.455315right ankle fracture injuryNaN
\n", "

3292 rows × 11 columns

\n", "
" ], "text/plain": [ " Unnamed: 0 Name \\\n", "0 45518 Sergey Karasev \n", "1 46401 Sergey Karasev \n", "2 50960 Sergey Karasev \n", "3 54657 Sergey Karasev \n", "4 55315 Sergey Karasev \n", "... ... ... \n", "3287 46878 Shane Larkin \n", "3288 203837 Shane Larkin \n", "3289 83193 Shane Larkin \n", "3290 107300 Shane Larkin \n", "3291 184549 Shane Larkin \n", "\n", " Notes Injured \\\n", "0 surgery on right knee to repair torn MCL, disl... 2015-03-12 \n", "1 surgery on right knee to repair torn MCL, disl... 2015-03-12 \n", "2 surgery on right knee to repair torn MCL, disl... 2015-03-12 \n", "3 surgery on right knee to repair torn MCL, disl... 2015-03-12 \n", "4 surgery on right knee to repair torn MCL, disl... 2015-03-12 \n", "... ... ... \n", "3287 fractured right ankle (out indefinitely) 2013-07-12 \n", "3288 fractured right ankle (out indefinitely) 2013-07-12 \n", "3289 fractured right ankle (out indefinitely) 2013-07-12 \n", "3290 fractured right ankle (out indefinitely) 2013-07-12 \n", "3291 fractured right ankle (out indefinitely) 2013-07-12 \n", "\n", " Activated days_injured \\\n", "0 2015-12-08 271.0 \n", "1 2015-12-08 271.0 \n", "2 2015-12-08 271.0 \n", "3 2015-12-08 271.0 \n", "4 2015-12-08 271.0 \n", "... ... ... \n", "3287 2013-11-18 129.0 \n", "3288 2013-11-18 129.0 \n", "3289 2013-11-18 129.0 \n", "3290 2013-11-18 129.0 \n", "3291 2013-11-18 129.0 \n", "\n", " Classifications Top Classification \\\n", "0 {'sequence': 'surgery on right knee to repair ... torn mcl \n", "1 {'sequence': 'surgery on right knee to repair ... torn mcl \n", "2 {'sequence': 'surgery on right knee to repair ... torn mcl \n", "3 {'sequence': 'surgery on right knee to repair ... torn mcl \n", "4 {'sequence': 'surgery on right knee to repair ... torn mcl \n", "... ... ... \n", "3287 {'sequence': 'fractured right ankle (out indef... ankle fracture \n", "3288 {'sequence': 'fractured right ankle (out indef... ankle fracture \n", "3289 {'sequence': 'fractured right ankle (out indef... ankle fracture \n", "3290 {'sequence': 'fractured right ankle (out indef... ankle fracture \n", "3291 {'sequence': 'fractured right ankle (out indef... ankle fracture \n", "\n", " Top Score Specific Injury Unnamed: 0.1 \n", "0 0.467165 right torn mcl injury NaN \n", "1 0.467165 right torn mcl injury NaN \n", "2 0.467165 right torn mcl injury NaN \n", "3 0.467165 right torn mcl injury NaN \n", "4 0.467165 right torn mcl injury NaN \n", "... ... ... ... \n", "3287 0.455315 right ankle fracture injury NaN \n", "3288 0.455315 right ankle fracture injury NaN \n", "3289 0.455315 right ankle fracture injury NaN \n", "3290 0.455315 right ankle fracture injury NaN \n", "3291 0.455315 right ankle fracture injury NaN \n", "\n", "[3292 rows x 11 columns]" ] }, "execution_count": 460, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined_df" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "combined_df = combined_df.drop(columns=['Unnamed: 0','Classifications','Notes','Top Classification','Top Score'])\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Name Injured Activated days_injured Specific Injury \n", "Andrew Bogut 2012-01-26 2012-11-05 284.0 left ankle fracture injury 58\n", " Tony Parker 2017-05-04 2017-11-27 207.0 left quad injury injury 49\n", "Derrick Rose 2017-04-05 2017-10-29 207.0 left meniscus tear injury 39\n", "Chandler Parsons 2017-03-13 2017-12-01 263.0 left meniscus tear injury 39\n", " 2016-03-25 2016-11-06 226.0 right meniscus tear injury 39\n", " ..\n", "Alan Williams 2017-09-25 2018-03-26 182.0 right meniscus tear injury 3\n", "Michael Frazier II 2020-03-10 2020-07-31 143.0 left arm injury injury 2\n", "Jason Thompson 2011-06-15 2012-03-28 287.0 right foot fracture injury 2\n", "Ronnie Brewer 2012-09-07 2013-02-27 173.0 left meniscus tear injury 1\n", "Justin Patton 2017-07-04 2018-02-24 235.0 left foot fracture injury 1\n", "Name: count, Length: 187, dtype: int64" ] }, "execution_count": 39, "metadata": {}, "output_type": "execute_result" } ], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NameInjuredActivateddays_injuredSpecific InjuryUnnamed: 0.1
0Sergey Karasev2015-03-122015-12-08271.0right torn mcl injuryNaN
1Sergey Karasev2015-03-122015-12-08271.0right torn mcl injuryNaN
2Sergey Karasev2015-03-122015-12-08271.0right torn mcl injuryNaN
3Sergey Karasev2015-03-122015-12-08271.0right torn mcl injuryNaN
4Sergey Karasev2015-03-122015-12-08271.0right torn mcl injuryNaN
.....................
3287Shane Larkin2013-07-122013-11-18129.0right ankle fracture injuryNaN
3288Shane Larkin2013-07-122013-11-18129.0right ankle fracture injuryNaN
3289Shane Larkin2013-07-122013-11-18129.0right ankle fracture injuryNaN
3290Shane Larkin2013-07-122013-11-18129.0right ankle fracture injuryNaN
3291Shane Larkin2013-07-122013-11-18129.0right ankle fracture injuryNaN
\n", "

3292 rows × 6 columns

\n", "
" ], "text/plain": [ " Name Injured Activated days_injured \\\n", "0 Sergey Karasev 2015-03-12 2015-12-08 271.0 \n", "1 Sergey Karasev 2015-03-12 2015-12-08 271.0 \n", "2 Sergey Karasev 2015-03-12 2015-12-08 271.0 \n", "3 Sergey Karasev 2015-03-12 2015-12-08 271.0 \n", "4 Sergey Karasev 2015-03-12 2015-12-08 271.0 \n", "... ... ... ... ... \n", "3287 Shane Larkin 2013-07-12 2013-11-18 129.0 \n", "3288 Shane Larkin 2013-07-12 2013-11-18 129.0 \n", "3289 Shane Larkin 2013-07-12 2013-11-18 129.0 \n", "3290 Shane Larkin 2013-07-12 2013-11-18 129.0 \n", "3291 Shane Larkin 2013-07-12 2013-11-18 129.0 \n", "\n", " Specific Injury Unnamed: 0.1 \n", "0 right torn mcl injury NaN \n", "1 right torn mcl injury NaN \n", "2 right torn mcl injury NaN \n", "3 right torn mcl injury NaN \n", "4 right torn mcl injury NaN \n", "... ... ... \n", "3287 right ankle fracture injury NaN \n", "3288 right ankle fracture injury NaN \n", "3289 right ankle fracture injury NaN \n", "3290 right ankle fracture injury NaN \n", "3291 right ankle fracture injury NaN \n", "\n", "[3292 rows x 6 columns]" ] }, "execution_count": 463, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined_df = combined_df.rename(columns={'Specfic Injury': 'Injury'})\n", "combined_df\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "combined_df = combined_df.drop(columns='Unnamed: 0.1')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
NameInjuredActivateddays_injuredSpecific Injury
0Sergey Karasev2015-03-122015-12-08271.0right torn mcl injury
14Jordan Farmar2013-12-022014-04-06125.0left torn hamstring injury
16Khris Middleton2016-09-212017-02-08140.0left torn hamstring injury
34Khris Middleton2016-09-292017-02-08132.0left torn hamstring injury
41Solomon Hill2017-08-282018-03-18202.0left torn hamstring injury
..................
3216Jerome Robinson2020-03-08none250.0left achilles tear injury
3219Eric Bledsoe2019-02-02none250.0left achilles tear injury
3223Isaiah Hartenstein2019-03-11none250.0right achilles tear injury
3227Rudy Fernandez2012-01-21none250.0right achilles tear injury
3230Reggie Williams2011-01-12none250.0achilles tear injury
\n", "

296 rows × 5 columns

\n", "
" ], "text/plain": [ " Name Injured Activated days_injured \\\n", "0 Sergey Karasev 2015-03-12 2015-12-08 271.0 \n", "14 Jordan Farmar 2013-12-02 2014-04-06 125.0 \n", "16 Khris Middleton 2016-09-21 2017-02-08 140.0 \n", "34 Khris Middleton 2016-09-29 2017-02-08 132.0 \n", "41 Solomon Hill 2017-08-28 2018-03-18 202.0 \n", "... ... ... ... ... \n", "3216 Jerome Robinson 2020-03-08 none 250.0 \n", "3219 Eric Bledsoe 2019-02-02 none 250.0 \n", "3223 Isaiah Hartenstein 2019-03-11 none 250.0 \n", "3227 Rudy Fernandez 2012-01-21 none 250.0 \n", "3230 Reggie Williams 2011-01-12 none 250.0 \n", "\n", " Specific Injury \n", "0 right torn mcl injury \n", "14 left torn hamstring injury \n", "16 left torn hamstring injury \n", "34 left torn hamstring injury \n", "41 left torn hamstring injury \n", "... ... \n", "3216 left achilles tear injury \n", "3219 left achilles tear injury \n", "3223 right achilles tear injury \n", "3227 right achilles tear injury \n", "3230 achilles tear injury \n", "\n", "[296 rows x 5 columns]" ] }, "execution_count": 468, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined_df_test = combined_df.drop_duplicates(subset=['Name', 'Specific Injury', 'days_injured'], keep='first')\n", "combined_df_test\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/8t/t11lp0b952n0xtfmnwbzxzvw0000gn/T/ipykernel_11952/3471448739.py:9: FutureWarning: \n", "\n", "Passing `palette` without assigning `hue` is deprecated and will be removed in v0.14.0. Assign the `x` variable to `hue` and set `legend=False` for the same effect.\n", "\n", " sns.barplot(x=injury_counts.index, y=injury_counts.values, palette='viridis')\n" ] }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import matplotlib.pyplot as plt\n", "import seaborn as sns\n", "\n", "# Count the occurrences of each injury type\n", "injury_counts = combined_df_test['Specific Injury'].value_counts()\n", "\n", "# Create a bar chart\n", "plt.figure(figsize=(10, 6))\n", "sns.barplot(x=injury_counts.index, y=injury_counts.values, palette='viridis')\n", "\n", "# Add labels and title\n", "plt.xlabel('Injury Type')\n", "plt.ylabel('Count')\n", "plt.title('Frequency of Injuries')\n", "\n", "# Rotate x-axis labels if needed\n", "plt.xticks(rotation=45, ha='right')\n", "\n", "# Show the plot\n", "plt.tight_layout()\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Specific Injury\n", "left achilles tear injury 46\n", "right achilles tear injury 45\n", "achilles tear injury 18\n", "left ankle sprain injury 15\n", "right foot fracture injury 14\n", "right meniscus tear injury 13\n", "left meniscus tear injury 12\n", "left acl tear injury 11\n", "right fractured hand injury 9\n", "back surgery injury 7\n", "right ankle sprain injury 7\n", "left hamstring injury injury 6\n", "left fractured leg injury 6\n", "left hip labrum injury 6\n", "right fractured leg injury 5\n", "left torn hamstring injury 4\n", "left foot fracture injury 4\n", "left fractured hand injury 4\n", "right hip labrum injury 4\n", "right torn shoulder labrum injury 4\n", "right sprained mcl injury 4\n", "left dislocated shoulder injury 4\n", "left sprained mcl injury 4\n", "left quad injury injury 3\n", "right quad injury injury 3\n", "right acl tear injury 3\n", "right ankle fracture injury 3\n", "left shoulder sprain injury 3\n", "right hamstring injury injury 3\n", "right shoulder sprain injury 2\n", "left bone spurs injury 2\n", "right bone spurs injury 2\n", "right hip flexor strain injury 2\n", "right foot sprain injury 2\n", "right torn rotator cuff injury injury 2\n", "right hip flexor surgery injury 2\n", "right calf strain injury 2\n", "left calf strain injury 1\n", "left arm injury injury 1\n", "right arm injury injury 1\n", "left torn shoulder labrum injury 1\n", "fractured leg injury 1\n", "left ankle fracture injury 1\n", "left hip flexor surgery injury 1\n", "acl tear injury 1\n", "lower back spasm injury 1\n", "right torn mcl injury 1\n", "Name: count, dtype: int64" ] }, "execution_count": 470, "metadata": {}, "output_type": "execute_result" } ], "source": [ "combined_df_test['Specific Injury'].value_counts()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "False" ] }, "execution_count": 72, "metadata": {}, "output_type": "execute_result" } ], "source": [ "acl_tear_injuries = combined_df_test[combined_df_test['Specific Injury'].str.contains('achilles tear', case=False, na=False)]\n", "acl_tear_injuries\n", "player_name = \"Klay Thompson\"\n", "exists = player_name in acl_tear_injuries['Name'].values\n", "exists" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Specific Injury\n", "achilles tear injury NaN\n", "acl tear injury 178.000000\n", "back surgery injury 201.285714\n", "fractured leg injury 205.000000\n", "left achilles tear injury NaN\n", "left acl tear injury 283.909091\n", "left ankle fracture injury 284.000000\n", "left ankle sprain injury 250.266667\n", "left arm injury injury 143.000000\n", "left bone spurs injury 194.500000\n", "left calf strain injury 337.000000\n", "left dislocated shoulder injury 269.000000\n", "left foot fracture injury 182.250000\n", "left fractured hand injury 240.000000\n", "left fractured leg injury 213.666667\n", "left hamstring injury injury 187.666667\n", "left hip flexor surgery injury 96.000000\n", "left hip labrum injury 243.166667\n", "left meniscus tear injury 209.666667\n", "left quad injury injury 251.666667\n", "left shoulder sprain injury 286.000000\n", "left sprained mcl injury 243.000000\n", "left torn hamstring injury 149.750000\n", "left torn shoulder labrum injury 246.000000\n", "lower back spasm injury 234.000000\n", "right achilles tear injury NaN\n", "right acl tear injury 254.000000\n", "right ankle fracture injury 119.333333\n", "right ankle sprain injury 234.714286\n", "right arm injury injury 291.000000\n", "right bone spurs injury 151.500000\n", "right calf strain injury 236.000000\n", "right foot fracture injury 244.285714\n", "right foot sprain injury 294.000000\n", "right fractured hand injury 175.666667\n", "right fractured leg injury 232.000000\n", "right hamstring injury injury 209.666667\n", "right hip flexor strain injury 249.500000\n", "right hip flexor surgery injury 258.000000\n", "right hip labrum injury 296.500000\n", "right meniscus tear injury 217.076923\n", "right quad injury injury 283.000000\n", "right shoulder sprain injury 259.500000\n", "right sprained mcl injury 203.750000\n", "right torn mcl injury 271.000000\n", "right torn rotator cuff injury injury 251.500000\n", "right torn shoulder labrum injury 194.500000\n", "Name: days_injured, dtype: float64" ] }, "execution_count": 290, "metadata": {}, "output_type": "execute_result" } ], "source": [ "avg_days_injured = combined_df_test.groupby('Specific Injury')['days_injured'].mean()\n", "avg_days_injured" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Nameteam_abbreviationageplayer_heightplayer_weightcountrydraft_yeardraft_rounddraft_numbergpptsrebastnet_ratingoreb_pctdreb_pctusg_pctts_pctast_pctseason
0JR SmithDenver Nuggets25.0198.1299.790240USA20041187912.34.12.25.80.0280.1620.2200.5500.1412010
1JaVale McGeeWashington Wizards23.0213.36114.305184USA20081187910.18.00.5-4.40.1160.2140.1650.5660.0272010
2Jamaal MagloireMiami Heat33.0210.82115.665960Canada2000119181.93.40.27.20.1310.3520.1050.5850.0312010
3James PoseyIndiana Pacers34.0203.2098.429464USA1999118494.93.00.7-3.70.0130.1930.1460.4850.0682010
4Jamario MoonLos Angeles Clippers31.0203.2092.986360USAUndraftedUndraftedUndrafted594.32.80.9-6.50.0250.1540.1200.4990.0742010
\n", "
" ], "text/plain": [ " Name team_abbreviation age player_height player_weight \\\n", "0 JR Smith Denver Nuggets 25.0 198.12 99.790240 \n", "1 JaVale McGee Washington Wizards 23.0 213.36 114.305184 \n", "2 Jamaal Magloire Miami Heat 33.0 210.82 115.665960 \n", "3 James Posey Indiana Pacers 34.0 203.20 98.429464 \n", "4 Jamario Moon Los Angeles Clippers 31.0 203.20 92.986360 \n", "\n", " country draft_year draft_round draft_number gp pts reb ast net_rating \\\n", "0 USA 2004 1 18 79 12.3 4.1 2.2 5.8 \n", "1 USA 2008 1 18 79 10.1 8.0 0.5 -4.4 \n", "2 Canada 2000 1 19 18 1.9 3.4 0.2 7.2 \n", "3 USA 1999 1 18 49 4.9 3.0 0.7 -3.7 \n", "4 USA Undrafted Undrafted Undrafted 59 4.3 2.8 0.9 -6.5 \n", "\n", " oreb_pct dreb_pct usg_pct ts_pct ast_pct season \n", "0 0.028 0.162 0.220 0.550 0.141 2010 \n", "1 0.116 0.214 0.165 0.566 0.027 2010 \n", "2 0.131 0.352 0.105 0.585 0.031 2010 \n", "3 0.013 0.193 0.146 0.485 0.068 2010 \n", "4 0.025 0.154 0.120 0.499 0.074 2010 " ] }, "execution_count": 210, "metadata": {}, "output_type": "execute_result" } ], "source": [ "df_players = pd.read_csv('/Users/laraschuman/Desktop/CTP-Project/player_data.csv')\n", "df_players.head()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/8t/t11lp0b952n0xtfmnwbzxzvw0000gn/T/ipykernel_11952/3076390773.py:2: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " combined_df_test['season'] = pd.to_datetime(combined_df_test['Injured']).dt.year\n" ] }, { "data": { "text/plain": [ "Index(['Name', 'team_abbreviation', 'age', 'player_height', 'player_weight',\n", " 'country', 'draft_year', 'draft_round', 'draft_number', 'gp', 'pts',\n", " 'reb', 'ast', 'net_rating', 'oreb_pct', 'dreb_pct', 'usg_pct', 'ts_pct',\n", " 'ast_pct', 'season', 'Injured', 'Activated', 'days_injured',\n", " 'Specific Injury'],\n", " dtype='object')" ] }, "execution_count": 212, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Extract the year from the 'Date' column in new_df\n", "combined_df_test['season'] = pd.to_datetime(combined_df_test['Injured']).dt.year\n", "\n", "# Merge the two dataframes on 'Name' and 'Year'\n", "result_df = pd.merge(df_players, combined_df_test, how=\"left\", on=[\"Name\", \"season\"])\n", "\n", "\n", "# Show the first few rows of the result\n", "result_df.columns\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['Name', 'team_abbreviation', 'age', 'player_height', 'player_weight',\n", " 'country', 'draft_year', 'draft_round', 'draft_number', 'gp', 'pts',\n", " 'reb', 'ast', 'net_rating', 'oreb_pct', 'dreb_pct', 'usg_pct', 'ts_pct',\n", " 'ast_pct', 'season', 'Injured', 'Activated', 'days_injured',\n", " 'Specific Injury'],\n", " dtype='object')" ] }, "execution_count": 213, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result_df.columns\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "result_df['days_injured'] = result_df['days_injured'].fillna(0)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "0 12.3\n", "1 10.1\n", "2 1.9\n", "3 4.9\n", "4 4.3\n", " ... \n", "5494 3.1\n", "5495 3.9\n", "5496 10.4\n", "5497 5.3\n", "5498 14.1\n", "Name: pts, Length: 5499, dtype: float64" ] }, "execution_count": 215, "metadata": {}, "output_type": "execute_result" } ], "source": [ "result_df['pts']" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# Ensure that 'Activated' is in datetime format if it's not already\n", "result_df['Activated'] = pd.to_datetime(result_df['Activated'])\n", "\n", "# Define the injury season (assuming 'Activated' gives the injury date)\n", "result_df['injury_season'] = result_df['Activated'].dt.year\n", "\n", "# Mark rows as 'before injury' or 'after injury' based on the injury season\n", "result_df['before_after_injury'] = result_df.apply(\n", " lambda row: 'before_injury' if row['season'] < row['injury_season'] else ('after_injury' if row['season'] > row['injury_season'] else 'injury_season'),\n", " axis=1\n", ")\n", "\n", "# Separate the data into before injury, after injury, and injury season\n", "before_injury = result_df[result_df['before_after_injury'] == 'before_injury']\n", "after_injury = result_df[result_df['before_after_injury'] == 'after_injury']\n", "injury_season = result_df[result_df['before_after_injury'] == 'injury_season']\n", "\n", "# Check the resulting datasets\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Nameteam_abbreviationageplayer_heightplayer_weightcountrydraft_yeardraft_rounddraft_numbergp...usg_pctts_pctast_pctseasonInjuredActivateddays_injuredSpecific Injuryinjury_seasonbefore_after_injury
390Reggie EvansToronto Raptors31.0203.20111.130040USA20103030...0.1020.4660.07020102010-11-262011-03-09103.0right foot fracture injury2011.0before_injury
489Jason ThompsonSacramento Kings25.0210.82113.398000USA200811264...0.1530.5580.07020112011-06-152012-03-28287.0right foot fracture injury2012.0before_injury
662Eric BledsoeLos Angeles Clippers22.0185.4288.450440USA201011840...0.1970.4540.22820112011-10-072012-01-26111.0right meniscus tear injury2012.0before_injury
683Dominique JonesDallas Mavericks23.0195.5897.522280USA201012533...0.2010.4930.29820112011-02-112012-01-23346.0right foot fracture injury2012.0before_injury
684Dominique JonesDallas Mavericks23.0195.5897.522280USA201012533...0.2010.4930.29820112011-02-082012-01-23349.0right foot fracture injury2012.0before_injury
..................................................................
4759Stephen CurryGolden State Warriors32.0190.5083.914520USA2009175...0.2860.5570.35520192019-10-312020-03-04125.0left fractured hand injury2020.0before_injury
4762Svi MykhailiukDetroit Pistons23.0200.6692.986360Ukraine201824756...0.1650.5780.12020192019-04-052020-02-12313.0left fractured hand injury2020.0before_injury
4827Zion WilliamsonNew Orleans Pelicans19.0198.12128.820128USA20191124...0.2910.6160.11920192019-10-182020-01-2296.0right meniscus tear injury2020.0before_injury
4828Zion WilliamsonNew Orleans Pelicans19.0198.12128.820128USA20191124...0.2910.6160.11920192019-10-212020-01-2293.0right meniscus tear injury2020.0before_injury
4928Richaun HolmesSacramento Kings26.0208.28106.594120USA201523744...0.1560.6810.05020192019-04-052020-03-07337.0left ankle sprain injury2020.0before_injury
\n", "

74 rows × 26 columns

\n", "
" ], "text/plain": [ " Name team_abbreviation age player_height \\\n", "390 Reggie Evans Toronto Raptors 31.0 203.20 \n", "489 Jason Thompson Sacramento Kings 25.0 210.82 \n", "662 Eric Bledsoe Los Angeles Clippers 22.0 185.42 \n", "683 Dominique Jones Dallas Mavericks 23.0 195.58 \n", "684 Dominique Jones Dallas Mavericks 23.0 195.58 \n", "... ... ... ... ... \n", "4759 Stephen Curry Golden State Warriors 32.0 190.50 \n", "4762 Svi Mykhailiuk Detroit Pistons 23.0 200.66 \n", "4827 Zion Williamson New Orleans Pelicans 19.0 198.12 \n", "4828 Zion Williamson New Orleans Pelicans 19.0 198.12 \n", "4928 Richaun Holmes Sacramento Kings 26.0 208.28 \n", "\n", " player_weight country draft_year draft_round draft_number gp ... \\\n", "390 111.130040 USA 2010 3 0 30 ... \n", "489 113.398000 USA 2008 1 12 64 ... \n", "662 88.450440 USA 2010 1 18 40 ... \n", "683 97.522280 USA 2010 1 25 33 ... \n", "684 97.522280 USA 2010 1 25 33 ... \n", "... ... ... ... ... ... .. ... \n", "4759 83.914520 USA 2009 1 7 5 ... \n", "4762 92.986360 Ukraine 2018 2 47 56 ... \n", "4827 128.820128 USA 2019 1 1 24 ... \n", "4828 128.820128 USA 2019 1 1 24 ... \n", "4928 106.594120 USA 2015 2 37 44 ... \n", "\n", " usg_pct ts_pct ast_pct season Injured Activated days_injured \\\n", "390 0.102 0.466 0.070 2010 2010-11-26 2011-03-09 103.0 \n", "489 0.153 0.558 0.070 2011 2011-06-15 2012-03-28 287.0 \n", "662 0.197 0.454 0.228 2011 2011-10-07 2012-01-26 111.0 \n", "683 0.201 0.493 0.298 2011 2011-02-11 2012-01-23 346.0 \n", "684 0.201 0.493 0.298 2011 2011-02-08 2012-01-23 349.0 \n", "... ... ... ... ... ... ... ... \n", "4759 0.286 0.557 0.355 2019 2019-10-31 2020-03-04 125.0 \n", "4762 0.165 0.578 0.120 2019 2019-04-05 2020-02-12 313.0 \n", "4827 0.291 0.616 0.119 2019 2019-10-18 2020-01-22 96.0 \n", "4828 0.291 0.616 0.119 2019 2019-10-21 2020-01-22 93.0 \n", "4928 0.156 0.681 0.050 2019 2019-04-05 2020-03-07 337.0 \n", "\n", " Specific Injury injury_season before_after_injury \n", "390 right foot fracture injury 2011.0 before_injury \n", "489 right foot fracture injury 2012.0 before_injury \n", "662 right meniscus tear injury 2012.0 before_injury \n", "683 right foot fracture injury 2012.0 before_injury \n", "684 right foot fracture injury 2012.0 before_injury \n", "... ... ... ... \n", "4759 left fractured hand injury 2020.0 before_injury \n", "4762 left fractured hand injury 2020.0 before_injury \n", "4827 right meniscus tear injury 2020.0 before_injury \n", "4828 right meniscus tear injury 2020.0 before_injury \n", "4928 left ankle sprain injury 2020.0 before_injury \n", "\n", "[74 rows x 26 columns]" ] }, "execution_count": 364, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Step 1: Filter before_injury DataFrame to only keep rows with players in after_injury\n", "before_injury_filtered = before_injury[before_injury['Name'].isin(after_injury['Name'])]\n", "\n", "# Step 2: Concatenate the filtered before_injury and after_injury data\n", "injury_data_cleaned = pd.concat([before_injury_filtered, after_injury])\n", "\n", "# Step 3: Combine the original before_injury and after_injury into one final dataset\n", "injury_data_cleaned = pd.concat([before_injury, after_injury])\n", "\n", "# Step 4: Replace 'Undrafted' in the 'draft_round' and 'draft_number' columns\n", "injury_data_cleaned['draft_round'] = injury_data_cleaned['draft_round'].replace('Undrafted', 3)\n", "injury_data_cleaned['draft_number'] = injury_data_cleaned['draft_number'].replace('Undrafted', 0)\n", "\n", "# Step 5: Replace 'Undrafted' in the 'draft_year' column with the value from 'season'\n", "injury_data_cleaned.loc[injury_data_cleaned['draft_year'] == 'Undrafted', 'draft_year'] = injury_data_cleaned['season']\n", "\n", "# Display the final dataset\n", "injury_data_cleaned\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/8t/t11lp0b952n0xtfmnwbzxzvw0000gn/T/ipykernel_11952/4162268832.py:14: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " before_injury['Injury_Status'] = 'Before'\n", "/var/folders/8t/t11lp0b952n0xtfmnwbzxzvw0000gn/T/ipykernel_11952/4162268832.py:15: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " after_injury['Injury_Status'] = 'After'\n" ] } ], "source": [ "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.ensemble import RandomForestRegressor\n", "from sklearn.metrics import mean_absolute_error, r2_score\n", "\n", "# Assuming 'result_df' has the required data\n", "# Split the dataset into before and after injury\n", "before_injury = result_df[result_df['days_injured'] == 0] # Before injury (assuming 0 means no injury)\n", "after_injury = result_df[result_df['days_injured'] > 0] # After injury\n", "\n", "# Combine the before and after data into one dataset\n", "# Add a column to indicate before/after injury\n", "before_injury['Injury_Status'] = 'Before'\n", "after_injury['Injury_Status'] = 'After'\n", "\n", "# Get the list of players that exist in both before_injury and after_injury\n", "common_players = after_injury['Name'].isin(before_injury['Name'])\n", "\n", "# Filter the before_injury DataFrame to only keep rows with players that exist in after_injury\n", "before_injury_filtered = before_injury[before_injury['Name'].isin(after_injury['Name'])]\n", "\n", "# Concatenate the filtered before_injury and after_injury data\n", "injury_data = pd.concat([before_injury_filtered, after_injury])\n", "\n", "# Combine into a single dataset\n", "injury_data = pd.concat([before_injury, after_injury])\n", "injury_data['draft_round'] = injury_data['draft_round'].replace('Undrafted', 3)\n", "injury_data['draft_number'] = injury_data['draft_number'].replace('Undrafted', 0)\n", "injury_data.loc[injury_data['draft_year'] == 'Undrafted', 'draft_year'] = injury_data['season']\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Namegpptsrebastnet_ratingoreb_pctdreb_pctusg_pctts_pct
\n", "
" ], "text/plain": [ "Empty DataFrame\n", "Columns: [Name, gp, pts, reb, ast, net_rating, oreb_pct, dreb_pct, usg_pct, ts_pct]\n", "Index: []" ] }, "execution_count": 366, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# List of tangible stats you want to aggregate\n", "tangible_stats = ['gp', 'pts', 'reb', 'ast', 'net_rating', 'oreb_pct', 'dreb_pct', 'usg_pct', 'ts_pct']\n", "\n", "# Group by player Name and calculate the mean for all tangible stats\n", "before_injury_avg = before_injury_filtered.groupby('Name')[tangible_stats].mean().round(2).reset_index()\n", "\n", "# Display the result\n", "before_injury_avg.head()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Namegpptsrebastnet_ratingoreb_pctdreb_pctusg_pctts_pct
0Al Harrington10.05.12.71.0-16.00.060.210.250.43
1Alan Williams5.04.04.41.60.10.040.260.180.46
2Alec Burks31.013.33.52.0-2.00.020.140.250.52
3Alex Len69.06.36.60.50.50.100.220.140.54
4Alonzo Gee13.00.81.20.51.40.050.140.110.31
.................................
121Toney Douglas38.06.21.92.0-0.30.030.100.240.39
122Tony Allen22.04.72.10.4-2.00.070.090.180.51
123Wilson Chandler43.013.05.11.39.30.050.170.230.56
124Zach LaVine24.016.73.93.0-12.10.010.140.290.50
125Zion Williamson24.022.56.32.15.10.090.110.290.62
\n", "

126 rows × 10 columns

\n", "
" ], "text/plain": [ " Name gp pts reb ast net_rating oreb_pct dreb_pct \\\n", "0 Al Harrington 10.0 5.1 2.7 1.0 -16.0 0.06 0.21 \n", "1 Alan Williams 5.0 4.0 4.4 1.6 0.1 0.04 0.26 \n", "2 Alec Burks 31.0 13.3 3.5 2.0 -2.0 0.02 0.14 \n", "3 Alex Len 69.0 6.3 6.6 0.5 0.5 0.10 0.22 \n", "4 Alonzo Gee 13.0 0.8 1.2 0.5 1.4 0.05 0.14 \n", ".. ... ... ... ... ... ... ... ... \n", "121 Toney Douglas 38.0 6.2 1.9 2.0 -0.3 0.03 0.10 \n", "122 Tony Allen 22.0 4.7 2.1 0.4 -2.0 0.07 0.09 \n", "123 Wilson Chandler 43.0 13.0 5.1 1.3 9.3 0.05 0.17 \n", "124 Zach LaVine 24.0 16.7 3.9 3.0 -12.1 0.01 0.14 \n", "125 Zion Williamson 24.0 22.5 6.3 2.1 5.1 0.09 0.11 \n", "\n", " usg_pct ts_pct \n", "0 0.25 0.43 \n", "1 0.18 0.46 \n", "2 0.25 0.52 \n", "3 0.14 0.54 \n", "4 0.11 0.31 \n", ".. ... ... \n", "121 0.24 0.39 \n", "122 0.18 0.51 \n", "123 0.23 0.56 \n", "124 0.29 0.50 \n", "125 0.29 0.62 \n", "\n", "[126 rows x 10 columns]" ] }, "execution_count": 218, "metadata": {}, "output_type": "execute_result" } ], "source": [ "after_injury_avg = after_injury.groupby('Name')[tangible_stats].mean().round(2).reset_index()\n", "after_injury_avg" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Namegp_beforepts_beforereb_beforeast_beforenet_rating_beforeoreb_pct_beforedreb_pct_beforeusg_pct_beforets_pct_beforegp_afterpts_afterreb_afterast_afternet_rating_afteroreb_pct_afterdreb_pct_afterusg_pct_afterts_pct_after
0Al Harrington57.0010.434.331.204.200.050.180.230.5210.05.12.71.0-16.00.060.210.250.43
1Alan Williams20.674.634.600.538.870.150.360.210.555.04.04.41.60.10.040.260.180.46
2Alec Burks57.0010.333.391.87-2.960.030.140.210.5331.013.33.52.0-2.00.020.140.250.52
3Alex Len66.007.605.640.84-6.710.100.210.180.5669.06.36.60.50.50.100.220.140.54
4Alonzo Gee65.506.633.281.05-7.050.050.120.150.5313.00.81.20.51.40.050.140.110.31
............................................................
121Toney Douglas50.006.672.202.221.070.030.120.180.5038.06.21.92.0-0.30.030.100.240.39
122Tony Allen66.008.964.231.374.560.070.120.180.5222.04.72.10.4-2.00.070.090.180.51
123Wilson Chandler56.3811.225.221.76-1.450.040.160.180.5243.013.05.11.39.30.050.170.230.56
124Zach LaVine64.5019.933.923.88-5.880.020.110.260.5724.016.73.93.0-12.10.010.140.290.50
125Zion Williamson61.0027.007.203.702.100.080.130.290.6524.022.56.32.15.10.090.110.290.62
\n", "

126 rows × 19 columns

\n", "
" ], "text/plain": [ " Name gp_before pts_before reb_before ast_before \\\n", "0 Al Harrington 57.00 10.43 4.33 1.20 \n", "1 Alan Williams 20.67 4.63 4.60 0.53 \n", "2 Alec Burks 57.00 10.33 3.39 1.87 \n", "3 Alex Len 66.00 7.60 5.64 0.84 \n", "4 Alonzo Gee 65.50 6.63 3.28 1.05 \n", ".. ... ... ... ... ... \n", "121 Toney Douglas 50.00 6.67 2.20 2.22 \n", "122 Tony Allen 66.00 8.96 4.23 1.37 \n", "123 Wilson Chandler 56.38 11.22 5.22 1.76 \n", "124 Zach LaVine 64.50 19.93 3.92 3.88 \n", "125 Zion Williamson 61.00 27.00 7.20 3.70 \n", "\n", " net_rating_before oreb_pct_before dreb_pct_before usg_pct_before \\\n", "0 4.20 0.05 0.18 0.23 \n", "1 8.87 0.15 0.36 0.21 \n", "2 -2.96 0.03 0.14 0.21 \n", "3 -6.71 0.10 0.21 0.18 \n", "4 -7.05 0.05 0.12 0.15 \n", ".. ... ... ... ... \n", "121 1.07 0.03 0.12 0.18 \n", "122 4.56 0.07 0.12 0.18 \n", "123 -1.45 0.04 0.16 0.18 \n", "124 -5.88 0.02 0.11 0.26 \n", "125 2.10 0.08 0.13 0.29 \n", "\n", " ts_pct_before gp_after pts_after reb_after ast_after \\\n", "0 0.52 10.0 5.1 2.7 1.0 \n", "1 0.55 5.0 4.0 4.4 1.6 \n", "2 0.53 31.0 13.3 3.5 2.0 \n", "3 0.56 69.0 6.3 6.6 0.5 \n", "4 0.53 13.0 0.8 1.2 0.5 \n", ".. ... ... ... ... ... \n", "121 0.50 38.0 6.2 1.9 2.0 \n", "122 0.52 22.0 4.7 2.1 0.4 \n", "123 0.52 43.0 13.0 5.1 1.3 \n", "124 0.57 24.0 16.7 3.9 3.0 \n", "125 0.65 24.0 22.5 6.3 2.1 \n", "\n", " net_rating_after oreb_pct_after dreb_pct_after usg_pct_after \\\n", "0 -16.0 0.06 0.21 0.25 \n", "1 0.1 0.04 0.26 0.18 \n", "2 -2.0 0.02 0.14 0.25 \n", "3 0.5 0.10 0.22 0.14 \n", "4 1.4 0.05 0.14 0.11 \n", ".. ... ... ... ... \n", "121 -0.3 0.03 0.10 0.24 \n", "122 -2.0 0.07 0.09 0.18 \n", "123 9.3 0.05 0.17 0.23 \n", "124 -12.1 0.01 0.14 0.29 \n", "125 5.1 0.09 0.11 0.29 \n", "\n", " ts_pct_after \n", "0 0.43 \n", "1 0.46 \n", "2 0.52 \n", "3 0.54 \n", "4 0.31 \n", ".. ... \n", "121 0.39 \n", "122 0.51 \n", "123 0.56 \n", "124 0.50 \n", "125 0.62 \n", "\n", "[126 rows x 19 columns]" ] }, "execution_count": 219, "metadata": {}, "output_type": "execute_result" } ], "source": [ "performance_data = pd.merge(before_injury_avg, after_injury_avg, on='Name', suffixes=('_before', '_after'))\n", "performance_data\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "performance_data['points_change'] = performance_data['pts_after'] - performance_data['pts_before']\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Namegp_beforepts_beforereb_beforeast_beforenet_rating_beforeoreb_pct_beforedreb_pct_beforeusg_pct_beforets_pct_before...ts_pct_changegp_changepts_changereb_changeast_changenet_rating_changeoreb_pct_changedreb_pct_changeusg_pct_changets_pct_change
0Al Harrington57.0010.434.331.204.200.050.180.230.52...-0.09-47.00-5.33-1.63-0.20-20.200.010.030.02-0.09
1Alan Williams20.674.634.600.538.870.150.360.210.55...-0.09-15.67-0.63-0.201.07-8.77-0.11-0.10-0.03-0.09
2Alec Burks57.0010.333.391.87-2.960.030.140.210.53...-0.01-26.002.970.110.130.96-0.010.000.04-0.01
3Alex Len66.007.605.640.84-6.710.100.210.180.56...-0.023.00-1.300.96-0.347.210.000.01-0.04-0.02
4Alonzo Gee65.506.633.281.05-7.050.050.120.150.53...-0.22-52.50-5.83-2.08-0.558.450.000.02-0.04-0.22
..................................................................
121Toney Douglas50.006.672.202.221.070.030.120.180.50...-0.11-12.00-0.47-0.30-0.22-1.370.00-0.020.06-0.11
122Tony Allen66.008.964.231.374.560.070.120.180.52...-0.01-44.00-4.26-2.13-0.97-6.560.00-0.030.00-0.01
123Wilson Chandler56.3811.225.221.76-1.450.040.160.180.52...0.04-13.381.78-0.12-0.4610.750.010.010.050.04
124Zach LaVine64.5019.933.923.88-5.880.020.110.260.57...-0.07-40.50-3.23-0.02-0.88-6.22-0.010.030.03-0.07
125Zion Williamson61.0027.007.203.702.100.080.130.290.65...-0.03-37.00-4.50-0.90-1.603.000.01-0.020.00-0.03
\n", "

126 rows × 38 columns

\n", "
" ], "text/plain": [ " Name gp_before pts_before reb_before ast_before \\\n", "0 Al Harrington 57.00 10.43 4.33 1.20 \n", "1 Alan Williams 20.67 4.63 4.60 0.53 \n", "2 Alec Burks 57.00 10.33 3.39 1.87 \n", "3 Alex Len 66.00 7.60 5.64 0.84 \n", "4 Alonzo Gee 65.50 6.63 3.28 1.05 \n", ".. ... ... ... ... ... \n", "121 Toney Douglas 50.00 6.67 2.20 2.22 \n", "122 Tony Allen 66.00 8.96 4.23 1.37 \n", "123 Wilson Chandler 56.38 11.22 5.22 1.76 \n", "124 Zach LaVine 64.50 19.93 3.92 3.88 \n", "125 Zion Williamson 61.00 27.00 7.20 3.70 \n", "\n", " net_rating_before oreb_pct_before dreb_pct_before usg_pct_before \\\n", "0 4.20 0.05 0.18 0.23 \n", "1 8.87 0.15 0.36 0.21 \n", "2 -2.96 0.03 0.14 0.21 \n", "3 -6.71 0.10 0.21 0.18 \n", "4 -7.05 0.05 0.12 0.15 \n", ".. ... ... ... ... \n", "121 1.07 0.03 0.12 0.18 \n", "122 4.56 0.07 0.12 0.18 \n", "123 -1.45 0.04 0.16 0.18 \n", "124 -5.88 0.02 0.11 0.26 \n", "125 2.10 0.08 0.13 0.29 \n", "\n", " ts_pct_before ... ts_pct_change gp_change pts_change reb_change \\\n", "0 0.52 ... -0.09 -47.00 -5.33 -1.63 \n", "1 0.55 ... -0.09 -15.67 -0.63 -0.20 \n", "2 0.53 ... -0.01 -26.00 2.97 0.11 \n", "3 0.56 ... -0.02 3.00 -1.30 0.96 \n", "4 0.53 ... -0.22 -52.50 -5.83 -2.08 \n", ".. ... ... ... ... ... ... \n", "121 0.50 ... -0.11 -12.00 -0.47 -0.30 \n", "122 0.52 ... -0.01 -44.00 -4.26 -2.13 \n", "123 0.52 ... 0.04 -13.38 1.78 -0.12 \n", "124 0.57 ... -0.07 -40.50 -3.23 -0.02 \n", "125 0.65 ... -0.03 -37.00 -4.50 -0.90 \n", "\n", " ast_change net_rating_change oreb_pct_change dreb_pct_change \\\n", "0 -0.20 -20.20 0.01 0.03 \n", "1 1.07 -8.77 -0.11 -0.10 \n", "2 0.13 0.96 -0.01 0.00 \n", "3 -0.34 7.21 0.00 0.01 \n", "4 -0.55 8.45 0.00 0.02 \n", ".. ... ... ... ... \n", "121 -0.22 -1.37 0.00 -0.02 \n", "122 -0.97 -6.56 0.00 -0.03 \n", "123 -0.46 10.75 0.01 0.01 \n", "124 -0.88 -6.22 -0.01 0.03 \n", "125 -1.60 3.00 0.01 -0.02 \n", "\n", " usg_pct_change ts_pct_change \n", "0 0.02 -0.09 \n", "1 -0.03 -0.09 \n", "2 0.04 -0.01 \n", "3 -0.04 -0.02 \n", "4 -0.04 -0.22 \n", ".. ... ... \n", "121 0.06 -0.11 \n", "122 0.00 -0.01 \n", "123 0.05 0.04 \n", "124 0.03 -0.07 \n", "125 0.00 -0.03 \n", "\n", "[126 rows x 38 columns]" ] }, "execution_count": 222, "metadata": {}, "output_type": "execute_result" } ], "source": [ "performance_changes = pd.DataFrame()\n", "\n", "# Calculate the change in each stat\n", "for stat in tangible_stats:\n", " performance_changes[f'{stat}_change'] = performance_data[f'{stat}_after'] - performance_data[f'{stat}_before']\n", "\n", "# Combine the performance_changes DataFrame with the original performance_data\n", "performance_data = pd.concat([performance_data, performance_changes], axis=1)\n", "performance_data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['Name', 'gp_before', 'pts_before', 'reb_before', 'ast_before',\n", " 'net_rating_before', 'oreb_pct_before', 'dreb_pct_before',\n", " 'usg_pct_before', 'ts_pct_before', 'gp_after', 'pts_after', 'reb_after',\n", " 'ast_after', 'net_rating_after', 'oreb_pct_after', 'dreb_pct_after',\n", " 'usg_pct_after', 'ts_pct_after', 'points_change', 'gp_change',\n", " 'pts_change', 'reb_change', 'ast_change', 'net_rating_change',\n", " 'oreb_pct_change', 'dreb_pct_change', 'usg_pct_change', 'ts_pct_change',\n", " 'gp_change', 'pts_change', 'reb_change', 'ast_change',\n", " 'net_rating_change', 'oreb_pct_change', 'dreb_pct_change',\n", " 'usg_pct_change', 'ts_pct_change'],\n", " dtype='object')" ] }, "execution_count": 191, "metadata": {}, "output_type": "execute_result" } ], "source": [ "performance_data.columns" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "injury_data_merged= pd.merge(injury_data, performance_data[['Name', 'points_change', 'gp_change',\n", " 'pts_change', 'reb_change', 'ast_change', 'net_rating_change',\n", " 'oreb_pct_change', 'dreb_pct_change', 'usg_pct_change', 'ts_pct_change',\n", " 'gp_change', 'pts_change', 'reb_change', 'ast_change',\n", " 'net_rating_change', 'oreb_pct_change', 'dreb_pct_change',\n", " 'usg_pct_change', 'ts_pct_change']], on='Name')\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Index(['Name', 'team_abbreviation', 'age', 'player_height', 'player_weight',\n", " 'country', 'draft_year', 'draft_round', 'draft_number', 'gp', 'pts',\n", " 'reb', 'ast', 'net_rating', 'oreb_pct', 'dreb_pct', 'usg_pct', 'ts_pct',\n", " 'ast_pct', 'season', 'Injured', 'Activated', 'days_injured',\n", " 'Specific Injury', 'Injury_Status', 'points_change', 'gp_change',\n", " 'gp_change', 'pts_change', 'pts_change', 'reb_change', 'reb_change',\n", " 'ast_change', 'ast_change', 'net_rating_change', 'net_rating_change',\n", " 'oreb_pct_change', 'oreb_pct_change', 'dreb_pct_change',\n", " 'dreb_pct_change', 'usg_pct_change', 'usg_pct_change', 'ts_pct_change',\n", " 'ts_pct_change', 'gp_change', 'gp_change', 'pts_change', 'pts_change',\n", " 'reb_change', 'reb_change', 'ast_change', 'ast_change',\n", " 'net_rating_change', 'net_rating_change', 'oreb_pct_change',\n", " 'oreb_pct_change', 'dreb_pct_change', 'dreb_pct_change',\n", " 'usg_pct_change', 'usg_pct_change', 'ts_pct_change', 'ts_pct_change'],\n", " dtype='object')" ] }, "execution_count": 224, "metadata": {}, "output_type": "execute_result" } ], "source": [ "injury_data_merged.columns" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "injury_data_merged = injury_data_merged.loc[:, ~injury_data_merged.columns.duplicated()]\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Nameteam_abbreviationageplayer_heightplayer_weightcountrydraft_yeardraft_rounddraft_numbergp...points_changegp_changepts_changereb_changeast_changenet_rating_changeoreb_pct_changedreb_pct_changeusg_pct_changets_pct_change
0Jameer NelsonOrlando Magic29.0182.8886.182480USA200412076...-6.10-12.00-6.10-0.91-2.13-4.79-0.01-0.01-0.06-0.02
1Jason ThompsonSacramento Kings24.0210.82113.398000USA200811275...1.90-9.401.901.180.32-1.100.03-0.01-0.010.03
2Gary NealSan Antonio Spurs26.0193.0495.254320USA20103080...1.17-12.001.170.22-0.33-6.070.010.01-0.010.08
3Glen DavisBoston Celtics25.0205.74131.088088USA200723578...-0.626.25-0.620.12-0.05-5.080.01-0.01-0.020.00
4Greivis VasquezMemphis Grizzlies24.0198.1295.707912Venezuela201012870...-2.27-40.00-2.27-0.23-0.35-5.120.000.000.00-0.05
..................................................................
932Svi MykhailiukDetroit Pistons23.0200.6692.986360Ukraine201824756...3.152.003.150.200.603.75-0.01-0.01-0.010.09
933Zion WilliamsonNew Orleans Pelicans19.0198.12128.820128USA20191124...-4.50-37.00-4.50-0.90-1.603.000.01-0.020.00-0.03
934Zion WilliamsonNew Orleans Pelicans19.0198.12128.820128USA20191124...-4.50-37.00-4.50-0.90-1.603.000.01-0.020.00-0.03
935Richaun HolmesSacramento Kings26.0208.28106.594120USA201523744...3.44-13.403.443.00-0.104.560.010.02-0.010.06
936Jakob PoeltlSan Antonio Spurs25.0215.90111.130040Austria20161969...3.32-0.753.323.180.92-1.35-0.010.000.00-0.02
\n", "

937 rows × 35 columns

\n", "
" ], "text/plain": [ " Name team_abbreviation age player_height \\\n", "0 Jameer Nelson Orlando Magic 29.0 182.88 \n", "1 Jason Thompson Sacramento Kings 24.0 210.82 \n", "2 Gary Neal San Antonio Spurs 26.0 193.04 \n", "3 Glen Davis Boston Celtics 25.0 205.74 \n", "4 Greivis Vasquez Memphis Grizzlies 24.0 198.12 \n", ".. ... ... ... ... \n", "932 Svi Mykhailiuk Detroit Pistons 23.0 200.66 \n", "933 Zion Williamson New Orleans Pelicans 19.0 198.12 \n", "934 Zion Williamson New Orleans Pelicans 19.0 198.12 \n", "935 Richaun Holmes Sacramento Kings 26.0 208.28 \n", "936 Jakob Poeltl San Antonio Spurs 25.0 215.90 \n", "\n", " player_weight country draft_year draft_round draft_number gp ... \\\n", "0 86.182480 USA 2004 1 20 76 ... \n", "1 113.398000 USA 2008 1 12 75 ... \n", "2 95.254320 USA 2010 3 0 80 ... \n", "3 131.088088 USA 2007 2 35 78 ... \n", "4 95.707912 Venezuela 2010 1 28 70 ... \n", ".. ... ... ... ... ... .. ... \n", "932 92.986360 Ukraine 2018 2 47 56 ... \n", "933 128.820128 USA 2019 1 1 24 ... \n", "934 128.820128 USA 2019 1 1 24 ... \n", "935 106.594120 USA 2015 2 37 44 ... \n", "936 111.130040 Austria 2016 1 9 69 ... \n", "\n", " points_change gp_change pts_change reb_change ast_change \\\n", "0 -6.10 -12.00 -6.10 -0.91 -2.13 \n", "1 1.90 -9.40 1.90 1.18 0.32 \n", "2 1.17 -12.00 1.17 0.22 -0.33 \n", "3 -0.62 6.25 -0.62 0.12 -0.05 \n", "4 -2.27 -40.00 -2.27 -0.23 -0.35 \n", ".. ... ... ... ... ... \n", "932 3.15 2.00 3.15 0.20 0.60 \n", "933 -4.50 -37.00 -4.50 -0.90 -1.60 \n", "934 -4.50 -37.00 -4.50 -0.90 -1.60 \n", "935 3.44 -13.40 3.44 3.00 -0.10 \n", "936 3.32 -0.75 3.32 3.18 0.92 \n", "\n", " net_rating_change oreb_pct_change dreb_pct_change usg_pct_change \\\n", "0 -4.79 -0.01 -0.01 -0.06 \n", "1 -1.10 0.03 -0.01 -0.01 \n", "2 -6.07 0.01 0.01 -0.01 \n", "3 -5.08 0.01 -0.01 -0.02 \n", "4 -5.12 0.00 0.00 0.00 \n", ".. ... ... ... ... \n", "932 3.75 -0.01 -0.01 -0.01 \n", "933 3.00 0.01 -0.02 0.00 \n", "934 3.00 0.01 -0.02 0.00 \n", "935 4.56 0.01 0.02 -0.01 \n", "936 -1.35 -0.01 0.00 0.00 \n", "\n", " ts_pct_change \n", "0 -0.02 \n", "1 0.03 \n", "2 0.08 \n", "3 0.00 \n", "4 -0.05 \n", ".. ... \n", "932 0.09 \n", "933 -0.03 \n", "934 -0.03 \n", "935 0.06 \n", "936 -0.02 \n", "\n", "[937 rows x 35 columns]" ] }, "execution_count": 256, "metadata": {}, "output_type": "execute_result" } ], "source": [ "injury_data_merged" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "grouped_data = injury_data_merged.groupby(['Name','player_height','player_weight','age']).agg({\n", " 'pts_change': 'mean',\n", " 'gp_change': 'mean',\n", " 'reb_change': 'mean',\n", " 'ast_change': 'mean',\n", " 'net_rating_change': 'mean',\n", " 'oreb_pct_change': 'mean',\n", " 'dreb_pct_change': 'mean',\n", " 'usg_pct_change': 'mean',\n", " 'ts_pct_change': 'mean'\n", "}).round(2).reset_index()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "injury_grouped_data = injury_data_merged.groupby(['Specific Injury','Name']).agg({\n", " 'pts_change': 'mean',\n", " 'gp_change': 'mean',\n", " 'reb_change': 'mean',\n", " 'ast_change': 'mean',\n", " 'net_rating_change': 'mean',\n", " 'oreb_pct_change': 'mean',\n", " 'dreb_pct_change': 'mean',\n", " 'usg_pct_change': 'mean',\n", " 'ts_pct_change': 'mean'\n", "}).round(2).reset_index()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Specific InjuryNamepts_changegp_changereb_changeast_changenet_rating_changeoreb_pct_changedreb_pct_changeusg_pct_changets_pct_change
0back surgery injuryDonatas Motiejunas0.28-5.80-0.220.2617.24-0.010.000.000.00
1back surgery injuryMarquis Daniels-2.30-16.00-0.700.001.700.000.010.02-0.08
2back surgery injuryMartell Webster-5.20-27.17-0.80-0.73-6.550.010.00-0.01-0.10
3back surgery injuryMarvin Williams1.03-15.220.23-0.044.260.020.010.03-0.02
4fractured leg injuryThabo Sefolosha0.7918.780.880.22-2.98-0.010.020.000.03
....................................
145right torn rotator cuff injury injuryKobe Bryant-10.00-27.25-1.45-0.67-13.48-0.01-0.01-0.03-0.04
146right torn shoulder labrum injuryDelon Wright-1.92-34.40-1.38-0.906.680.00-0.040.00-0.01
147right torn shoulder labrum injuryMichael Carter-Williams-2.43-7.57-0.94-0.862.45-0.010.020.01-0.03
148right torn shoulder labrum injuryMichael Kidd-Gilchrist4.90-55.711.230.2014.69-0.01-0.010.020.09
149right torn shoulder labrum injuryToney Douglas-0.47-12.00-0.30-0.22-1.370.00-0.020.06-0.11
\n", "

150 rows × 11 columns

\n", "
" ], "text/plain": [ " Specific Injury Name \\\n", "0 back surgery injury Donatas Motiejunas \n", "1 back surgery injury Marquis Daniels \n", "2 back surgery injury Martell Webster \n", "3 back surgery injury Marvin Williams \n", "4 fractured leg injury Thabo Sefolosha \n", ".. ... ... \n", "145 right torn rotator cuff injury injury Kobe Bryant \n", "146 right torn shoulder labrum injury Delon Wright \n", "147 right torn shoulder labrum injury Michael Carter-Williams \n", "148 right torn shoulder labrum injury Michael Kidd-Gilchrist \n", "149 right torn shoulder labrum injury Toney Douglas \n", "\n", " pts_change gp_change reb_change ast_change net_rating_change \\\n", "0 0.28 -5.80 -0.22 0.26 17.24 \n", "1 -2.30 -16.00 -0.70 0.00 1.70 \n", "2 -5.20 -27.17 -0.80 -0.73 -6.55 \n", "3 1.03 -15.22 0.23 -0.04 4.26 \n", "4 0.79 18.78 0.88 0.22 -2.98 \n", ".. ... ... ... ... ... \n", "145 -10.00 -27.25 -1.45 -0.67 -13.48 \n", "146 -1.92 -34.40 -1.38 -0.90 6.68 \n", "147 -2.43 -7.57 -0.94 -0.86 2.45 \n", "148 4.90 -55.71 1.23 0.20 14.69 \n", "149 -0.47 -12.00 -0.30 -0.22 -1.37 \n", "\n", " oreb_pct_change dreb_pct_change usg_pct_change ts_pct_change \n", "0 -0.01 0.00 0.00 0.00 \n", "1 0.00 0.01 0.02 -0.08 \n", "2 0.01 0.00 -0.01 -0.10 \n", "3 0.02 0.01 0.03 -0.02 \n", "4 -0.01 0.02 0.00 0.03 \n", ".. ... ... ... ... \n", "145 -0.01 -0.01 -0.03 -0.04 \n", "146 0.00 -0.04 0.00 -0.01 \n", "147 -0.01 0.02 0.01 -0.03 \n", "148 -0.01 -0.01 0.02 0.09 \n", "149 0.00 -0.02 0.06 -0.11 \n", "\n", "[150 rows x 11 columns]" ] }, "execution_count": 308, "metadata": {}, "output_type": "execute_result" } ], "source": [ "injury_grouped_data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Name player_height player_weight age pts_change gp_change reb_change ast_change net_rating_change oreb_pct_change dreb_pct_change usg_pct_change ts_pct_change\n", "Al Harrington 205.74 111.130040 33.0 -5.33 -47.00 -1.63 -0.20 -20.20 0.01 0.03 0.02 -0.09 1\n", "Luc Mbah a Moute 203.20 104.326160 30.0 -2.99 24.00 -1.01 -0.33 11.28 0.00 -0.02 -0.04 -0.01 1\n", " 32.0 -2.99 24.00 -1.01 -0.33 11.28 0.00 -0.02 -0.04 -0.01 1\n", " 33.0 -2.99 24.00 -1.01 -0.33 11.28 0.00 -0.02 -0.04 -0.01 1\n", "Luke Babbitt 205.74 102.058200 22.0 0.53 2.71 -0.41 -0.01 1.23 -0.02 -0.05 -0.01 0.06 1\n", " ..\n", "Gary Neal 193.04 95.254320 32.0 1.17 -12.00 0.22 -0.33 -6.07 0.01 0.01 -0.01 0.08 1\n", "Glen Davis 205.74 131.088088 25.0 -0.62 6.25 0.12 -0.05 -5.08 0.01 -0.01 -0.02 0.00 1\n", " 26.0 -0.62 6.25 0.12 -0.05 -5.08 0.01 -0.01 -0.02 0.00 1\n", " 27.0 -0.62 6.25 0.12 -0.05 -5.08 0.01 -0.01 -0.02 0.00 1\n", "Zion Williamson 200.66 128.820128 20.0 -4.50 -37.00 -0.90 -1.60 3.00 0.01 -0.02 0.00 -0.03 1\n", "Name: count, Length: 912, dtype: int64" ] }, "execution_count": 298, "metadata": {}, "output_type": "execute_result" } ], "source": [ "grouped_data.value_counts()" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "merged_data_group = pd.merge(injury_data, grouped_data, on=['Name'], suffixes=('', '_grouped'))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "merged_data_group_injury = pd.merge(injury_data, injury_grouped_data, on=['Name'], suffixes=('', '_grouped'))\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Nameteam_abbreviationageplayer_heightplayer_weightcountrydraft_yeardraft_rounddraft_numbergp...Injury_Statuspts_changegp_changereb_changeast_changenet_rating_changeoreb_pct_changedreb_pct_changeusg_pct_changets_pct_change
0Jameer NelsonOrlando Magic29.0182.8886.182480USA200412076...Before-6.10-12.00-0.91-2.13-4.79-0.01-0.01-0.06-0.02
1Jason ThompsonSacramento Kings24.0210.82113.398000USA200811275...Before1.90-9.401.180.32-1.100.03-0.01-0.010.03
2Gary NealSan Antonio Spurs26.0193.0495.254320USA20103080...Before1.17-12.000.22-0.33-6.070.010.01-0.010.08
3Glen DavisBoston Celtics25.0205.74131.088088USA200723578...Before-0.626.250.12-0.05-5.080.01-0.01-0.020.00
4Greivis VasquezMemphis Grizzlies24.0198.1295.707912Venezuela201012870...Before-2.27-40.00-0.23-0.35-5.120.000.000.00-0.05
..................................................................
932Svi MykhailiukDetroit Pistons23.0200.6692.986360Ukraine201824756...After3.152.000.200.603.75-0.01-0.01-0.010.09
933Zion WilliamsonNew Orleans Pelicans19.0198.12128.820128USA20191124...After-4.50-37.00-0.90-1.603.000.01-0.020.00-0.03
934Zion WilliamsonNew Orleans Pelicans19.0198.12128.820128USA20191124...After-4.50-37.00-0.90-1.603.000.01-0.020.00-0.03
935Richaun HolmesSacramento Kings26.0208.28106.594120USA201523744...After3.44-13.403.00-0.104.560.010.02-0.010.06
936Jakob PoeltlSan Antonio Spurs25.0215.90111.130040Austria20161969...After3.32-0.753.180.92-1.35-0.010.000.00-0.02
\n", "

937 rows × 34 columns

\n", "
" ], "text/plain": [ " Name team_abbreviation age player_height \\\n", "0 Jameer Nelson Orlando Magic 29.0 182.88 \n", "1 Jason Thompson Sacramento Kings 24.0 210.82 \n", "2 Gary Neal San Antonio Spurs 26.0 193.04 \n", "3 Glen Davis Boston Celtics 25.0 205.74 \n", "4 Greivis Vasquez Memphis Grizzlies 24.0 198.12 \n", ".. ... ... ... ... \n", "932 Svi Mykhailiuk Detroit Pistons 23.0 200.66 \n", "933 Zion Williamson New Orleans Pelicans 19.0 198.12 \n", "934 Zion Williamson New Orleans Pelicans 19.0 198.12 \n", "935 Richaun Holmes Sacramento Kings 26.0 208.28 \n", "936 Jakob Poeltl San Antonio Spurs 25.0 215.90 \n", "\n", " player_weight country draft_year draft_round draft_number gp ... \\\n", "0 86.182480 USA 2004 1 20 76 ... \n", "1 113.398000 USA 2008 1 12 75 ... \n", "2 95.254320 USA 2010 3 0 80 ... \n", "3 131.088088 USA 2007 2 35 78 ... \n", "4 95.707912 Venezuela 2010 1 28 70 ... \n", ".. ... ... ... ... ... .. ... \n", "932 92.986360 Ukraine 2018 2 47 56 ... \n", "933 128.820128 USA 2019 1 1 24 ... \n", "934 128.820128 USA 2019 1 1 24 ... \n", "935 106.594120 USA 2015 2 37 44 ... \n", "936 111.130040 Austria 2016 1 9 69 ... \n", "\n", " Injury_Status pts_change gp_change reb_change ast_change \\\n", "0 Before -6.10 -12.00 -0.91 -2.13 \n", "1 Before 1.90 -9.40 1.18 0.32 \n", "2 Before 1.17 -12.00 0.22 -0.33 \n", "3 Before -0.62 6.25 0.12 -0.05 \n", "4 Before -2.27 -40.00 -0.23 -0.35 \n", ".. ... ... ... ... ... \n", "932 After 3.15 2.00 0.20 0.60 \n", "933 After -4.50 -37.00 -0.90 -1.60 \n", "934 After -4.50 -37.00 -0.90 -1.60 \n", "935 After 3.44 -13.40 3.00 -0.10 \n", "936 After 3.32 -0.75 3.18 0.92 \n", "\n", " net_rating_change oreb_pct_change dreb_pct_change usg_pct_change \\\n", "0 -4.79 -0.01 -0.01 -0.06 \n", "1 -1.10 0.03 -0.01 -0.01 \n", "2 -6.07 0.01 0.01 -0.01 \n", "3 -5.08 0.01 -0.01 -0.02 \n", "4 -5.12 0.00 0.00 0.00 \n", ".. ... ... ... ... \n", "932 3.75 -0.01 -0.01 -0.01 \n", "933 3.00 0.01 -0.02 0.00 \n", "934 3.00 0.01 -0.02 0.00 \n", "935 4.56 0.01 0.02 -0.01 \n", "936 -1.35 -0.01 0.00 0.00 \n", "\n", " ts_pct_change \n", "0 -0.02 \n", "1 0.03 \n", "2 0.08 \n", "3 0.00 \n", "4 -0.05 \n", ".. ... \n", "932 0.09 \n", "933 -0.03 \n", "934 -0.03 \n", "935 0.06 \n", "936 -0.02 \n", "\n", "[937 rows x 34 columns]" ] }, "execution_count": 294, "metadata": {}, "output_type": "execute_result" } ], "source": [ "merged_data_group" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "drop_duplicated = merged_data_group.drop_duplicates(subset=['Specific Injury', 'Name', 'season'])\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "drop_duplicated_injury = merged_data_group_injury.drop_duplicates(subset=['Specific Injury', 'Name', 'season'])\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "drop_duplicated_injury.to_csv('/Users/laraschuman/Desktop/CTP-Project/drop_duplicated.csv')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Nameteam_abbreviationageplayer_heightplayer_weightcountrydraft_yeardraft_rounddraft_numbergp...Specific Injury_groupedpts_changegp_changereb_changeast_changenet_rating_changeoreb_pct_changedreb_pct_changeusg_pct_changets_pct_change
0Jameer NelsonOrlando Magic29.0182.8886.182480USA200412076...right calf strain injury-6.10-12.00-0.91-2.13-4.79-0.01-0.01-0.06-0.02
1Jason ThompsonSacramento Kings24.0210.82113.398000USA200811275...right foot fracture injury1.90-9.401.180.32-1.100.03-0.01-0.010.03
2Gary NealSan Antonio Spurs26.0193.0495.254320USA20103080...left ankle sprain injury1.17-12.000.22-0.33-6.070.010.01-0.010.08
3Glen DavisBoston Celtics25.0205.74131.088088USA200723578...left foot fracture injury-0.626.250.12-0.05-5.080.01-0.01-0.020.00
4Greivis VasquezMemphis Grizzlies24.0198.1295.707912Venezuela201012870...right bone spurs injury-2.27-40.00-0.23-0.35-5.120.000.000.00-0.05
..................................................................
1166Stephen CurryGolden State Warriors32.0190.5083.914520USA2009175...left fractured hand injury-3.71-62.700.630.14-25.340.000.040.01-0.07
1167Svi MykhailiukDetroit Pistons23.0200.6692.986360Ukraine201824756...left fractured hand injury3.152.000.200.603.75-0.01-0.01-0.010.09
1168Zion WilliamsonNew Orleans Pelicans19.0198.12128.820128USA20191124...right meniscus tear injury-4.50-37.00-0.90-1.603.000.01-0.020.00-0.03
1170Richaun HolmesSacramento Kings26.0208.28106.594120USA201523744...left ankle sprain injury3.44-13.403.00-0.104.560.010.02-0.010.06
1171Jakob PoeltlSan Antonio Spurs25.0215.90111.130040Austria20161969...right sprained mcl injury3.32-0.753.180.92-1.35-0.010.000.00-0.02
\n", "

920 rows × 35 columns

\n", "
" ], "text/plain": [ " Name team_abbreviation age player_height \\\n", "0 Jameer Nelson Orlando Magic 29.0 182.88 \n", "1 Jason Thompson Sacramento Kings 24.0 210.82 \n", "2 Gary Neal San Antonio Spurs 26.0 193.04 \n", "3 Glen Davis Boston Celtics 25.0 205.74 \n", "4 Greivis Vasquez Memphis Grizzlies 24.0 198.12 \n", "... ... ... ... ... \n", "1166 Stephen Curry Golden State Warriors 32.0 190.50 \n", "1167 Svi Mykhailiuk Detroit Pistons 23.0 200.66 \n", "1168 Zion Williamson New Orleans Pelicans 19.0 198.12 \n", "1170 Richaun Holmes Sacramento Kings 26.0 208.28 \n", "1171 Jakob Poeltl San Antonio Spurs 25.0 215.90 \n", "\n", " player_weight country draft_year draft_round draft_number gp ... \\\n", "0 86.182480 USA 2004 1 20 76 ... \n", "1 113.398000 USA 2008 1 12 75 ... \n", "2 95.254320 USA 2010 3 0 80 ... \n", "3 131.088088 USA 2007 2 35 78 ... \n", "4 95.707912 Venezuela 2010 1 28 70 ... \n", "... ... ... ... ... ... .. ... \n", "1166 83.914520 USA 2009 1 7 5 ... \n", "1167 92.986360 Ukraine 2018 2 47 56 ... \n", "1168 128.820128 USA 2019 1 1 24 ... \n", "1170 106.594120 USA 2015 2 37 44 ... \n", "1171 111.130040 Austria 2016 1 9 69 ... \n", "\n", " Specific Injury_grouped pts_change gp_change reb_change \\\n", "0 right calf strain injury -6.10 -12.00 -0.91 \n", "1 right foot fracture injury 1.90 -9.40 1.18 \n", "2 left ankle sprain injury 1.17 -12.00 0.22 \n", "3 left foot fracture injury -0.62 6.25 0.12 \n", "4 right bone spurs injury -2.27 -40.00 -0.23 \n", "... ... ... ... ... \n", "1166 left fractured hand injury -3.71 -62.70 0.63 \n", "1167 left fractured hand injury 3.15 2.00 0.20 \n", "1168 right meniscus tear injury -4.50 -37.00 -0.90 \n", "1170 left ankle sprain injury 3.44 -13.40 3.00 \n", "1171 right sprained mcl injury 3.32 -0.75 3.18 \n", "\n", " ast_change net_rating_change oreb_pct_change dreb_pct_change \\\n", "0 -2.13 -4.79 -0.01 -0.01 \n", "1 0.32 -1.10 0.03 -0.01 \n", "2 -0.33 -6.07 0.01 0.01 \n", "3 -0.05 -5.08 0.01 -0.01 \n", "4 -0.35 -5.12 0.00 0.00 \n", "... ... ... ... ... \n", "1166 0.14 -25.34 0.00 0.04 \n", "1167 0.60 3.75 -0.01 -0.01 \n", "1168 -1.60 3.00 0.01 -0.02 \n", "1170 -0.10 4.56 0.01 0.02 \n", "1171 0.92 -1.35 -0.01 0.00 \n", "\n", " usg_pct_change ts_pct_change \n", "0 -0.06 -0.02 \n", "1 -0.01 0.03 \n", "2 -0.01 0.08 \n", "3 -0.02 0.00 \n", "4 0.00 -0.05 \n", "... ... ... \n", "1166 0.01 -0.07 \n", "1167 -0.01 0.09 \n", "1168 0.00 -0.03 \n", "1170 -0.01 0.06 \n", "1171 0.00 -0.02 \n", "\n", "[920 rows x 35 columns]" ] }, "execution_count": 345, "metadata": {}, "output_type": "execute_result" } ], "source": [ "drop_duplicated_injury" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
ptspts_changeSpecific Injuryseasondays_injured
72127.0-4.5NaN20200.0
93322.5-4.5right meniscus tear injury201996.0
\n", "
" ], "text/plain": [ " pts pts_change Specific Injury season days_injured\n", "721 27.0 -4.5 NaN 2020 0.0\n", "933 22.5 -4.5 right meniscus tear injury 2019 96.0" ] }, "execution_count": 371, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Filter the data for 'Name' equal to 'Zion'\n", "zion_data = drop_duplicated[drop_duplicated['Name'] == 'Zion Williamson']\n", "\n", "# Drop duplicates within the 'zion_data'\n", "zion_data_unique = zion_data.drop_duplicates()\n", "\n", "# Display the unique data for Zion\n", "# Select 'pts' and 'pts_change' columns\n", "zion_pts_data = zion_data_unique[['pts', 'pts_change','Specific Injury','season','days_injured']]\n", "\n", "# Display the selected columns\n", "zion_pts_data" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# One-hot encode the 'Specific Injury' column on the merged data\n", "merged_data_encoded = pd.get_dummies(drop_duplicated_injury, columns=['Specific Injury_grouped'], drop_first=True)\n", "\n", "\n", "# Define the features for training (including the one-hot encoded 'Specific Injury' columns)\n", "features = ['age', 'player_height', 'player_weight', 'season'] + [col for col in merged_data_encoded.columns if col.startswith('Specific Injury_grouped')]\n", "\n", "# Define the multiple target columns\n", "targets = ['pts_change', 'ast_change', 'reb_change']\n", "\n", "# Set up the features (X) and target (y) for training\n", "X_merged = merged_data_encoded[features]\n", "y_merged = merged_data_encoded[targets]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "# One-hot encode the 'Specific Injury' column on the merged data\n", "merged_data_encoded = pd.get_dummies(drop_duplicated, columns=['Specific Injury'], drop_first=True)\n", "\n", "\n", "# Define the features for training (including the one-hot encoded 'Specific Injury' columns)\n", "features = ['age', 'player_height', 'player_weight', 'season'] + [col for col in merged_data_encoded.columns if col.startswith('Specific Injury')]\n", "\n", "# Define the multiple target columns\n", "targets = ['pts_change', 'ast_change', 'reb_change']\n", "\n", "# Set up the features (X) and target (y) for training\n", "X_merged = merged_data_encoded[features]\n", "y_merged = merged_data_encoded[targets]" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Mean Absolute Error: 0.47548278985507225\n", "R²: 0.8035047608572542\n", "Model saved successfully!\n" ] } ], "source": [ "from sklearn.model_selection import train_test_split\n", "from sklearn.linear_model import LinearRegression\n", "from sklearn.ensemble import RandomForestRegressor\n", "from sklearn.metrics import mean_absolute_error, r2_score\n", "import pickle\n", "\n", "\n", "# Split into training and testing sets\n", "X_train, X_test, y_train, y_test = train_test_split(X_merged, y_merged, test_size=0.2, random_state=42)\n", "\n", "# Example: Using Random Forest Regressor\n", "model = RandomForestRegressor(random_state=42)\n", "model.fit(X_train, y_train)\n", "\n", "# Make predictions\n", "y_pred = model.predict(X_test)\n", "\n", "# Evaluate the model\n", "mae = mean_absolute_error(y_test, y_pred)\n", "r2 = r2_score(y_test, y_pred)\n", "\n", "print(f'Mean Absolute Error: {mae}')\n", "print(f'R²: {r2}')\n", "\n", "with open('/Users/laraschuman/Desktop/CTP-Project/injury_model.pkl', 'wb') as f:\n", " pickle.dump(model, f)\n", "\n", "print(\"Model saved successfully!\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model loaded successfully!\n", "Predicted values: [[-5.8681 -2.0426 -0.8884]\n", " [ 1.5225 0.2629 1.013 ]\n", " [ 0.9691 -0.26 0.1798]\n", " ...\n", " [-3.653 -1.1847 -0.7626]\n", " [ 2.9848 -0.1239 2.6627]\n", " [ 2.4768 0.6983 2.3908]]\n" ] } ], "source": [ "import pandas as pd\n", "\n", "# Load the pre-trained model\n", "with open('/Users/laraschuman/Desktop/CTP-Project/injury_model.pkl', 'rb') as f:\n", " loaded_model = pickle.load(f)\n", "\n", "print(\"Model loaded successfully!\")\n", "\n", "\n", "# Ensure the input data matches the training feature set\n", "# Reorder or drop columns as necessary to align with X_train during training\n", "training_features = X_train.columns # Use the features from your training set\n", "new_data = merged_data_encoded[training_features] # Select only the relevant columns\n", "\n", "# Apply any preprocessing (e.g., scaling or encoding) used during training, if applicable\n", "\n", "# Make predictions\n", "predictions = loaded_model.predict(new_data)\n", "\n", "print(f\"Predicted values: {predictions}\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Model saved successfully!\n", "Mean Absolute Error: 0.9993055525362317\n", "R²: 0.4734527115674096\n", "Model loaded successfully!\n", "Predicted values:\n", " pts_change ast_change reb_change\n", "0 -6.1649 -2.0992 -0.9387\n", "1 0.4630 0.1794 0.1823\n", "2 0.2398 -0.3065 0.0635\n", "3 -1.0060 -0.0507 -0.2680\n", "4 -2.1998 -0.3124 -0.3197\n", ".. ... ... ...\n", "915 -1.6661 -0.1137 0.2257\n", "916 2.9123 0.5944 0.2251\n", "917 -3.2647 -1.0888 -0.5737\n", "918 2.2471 -0.0985 1.8709\n", "919 2.1491 0.6155 2.1662\n", "\n", "[920 rows x 3 columns]\n" ] } ], "source": [ "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.ensemble import RandomForestRegressor\n", "from sklearn.metrics import mean_absolute_error, r2_score\n", "import pickle\n", "\n", "# One-hot encode the 'Specific Injury' column\n", "merged_data_encoded = pd.get_dummies(drop_duplicated, columns=['Specific Injury'], drop_first=True)\n", "\n", "# Define features and targets\n", "features = ['age', 'player_height', 'player_weight', 'season'] + \\\n", " [col for col in merged_data_encoded.columns if col.startswith('Specific Injury')]\n", "targets = ['pts_change', 'ast_change', 'reb_change']\n", "\n", "X_merged = merged_data_encoded[features]\n", "y_merged = merged_data_encoded[targets]\n", "\n", "# Split into training and testing sets\n", "X_train, X_test, y_train, y_test = train_test_split(X_merged, y_merged, test_size=0.2, random_state=42)\n", "\n", "# Train the Random Forest Regressor\n", "model = RandomForestRegressor(random_state=42)\n", "model.fit(X_train, y_train)\n", "\n", "# Save the trained model\n", "model_path = '/Users/laraschuman/Desktop/CTP-Project/injury_model.pkl'\n", "with open(model_path, 'wb') as f:\n", " pickle.dump(model, f)\n", "print(\"Model saved successfully!\")\n", "\n", "# Evaluate the model\n", "y_pred = model.predict(X_test)\n", "mae = mean_absolute_error(y_test, y_pred)\n", "r2 = r2_score(y_test, y_pred)\n", "\n", "print(f\"Mean Absolute Error: {mae}\")\n", "print(f\"R²: {r2}\")\n", "\n", "# Load the saved model\n", "with open(model_path, 'rb') as f:\n", " loaded_model = pickle.load(f)\n", "print(\"Model loaded successfully!\")\n", "\n", "# Align new data with training features\n", "training_features = X_train.columns\n", "new_data = merged_data_encoded[training_features] # Ensure the columns match exactly\n", "\n", "# Reindex `new_data` to align with `training_features`\n", "new_data = new_data.reindex(columns=training_features, fill_value=0)\n", "\n", "# Make predictions\n", "predictions = loaded_model.predict(new_data)\n", "\n", "# Format predictions\n", "predicted_df = pd.DataFrame(predictions, columns=targets)\n", "print(\"Predicted values:\")\n", "print(predicted_df)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Specific Injury\n", "left ankle sprain injury 14\n", "right foot fracture injury 11\n", "right meniscus tear injury 10\n", "left meniscus tear injury 8\n", "left acl tear injury 8\n", "right ankle sprain injury 7\n", "right fractured hand injury 7\n", "left fractured leg injury 6\n", "back surgery injury 5\n", "left hamstring injury injury 5\n", "right fractured leg injury 5\n", "right torn shoulder labrum injury 4\n", "left fractured hand injury 4\n", "right sprained mcl injury 4\n", "left foot fracture injury 4\n", "left torn hamstring injury 3\n", "left sprained mcl injury 3\n", "right hamstring injury injury 3\n", "left shoulder sprain injury 3\n", "left dislocated shoulder injury 3\n", "right quad injury injury 3\n", "left hip labrum injury 3\n", "right shoulder sprain injury 2\n", "right torn rotator cuff injury injury 2\n", "right calf strain injury 2\n", "right foot sprain injury 2\n", "left quad injury injury 2\n", "right ankle fracture injury 2\n", "right hip flexor strain injury 2\n", "left bone spurs injury 2\n", "right hip flexor surgery injury 2\n", "right hip labrum injury 2\n", "right bone spurs injury 2\n", "right torn mcl injury 1\n", "fractured leg injury 1\n", "right acl tear injury 1\n", "left hip flexor surgery injury 1\n", "left ankle fracture injury 1\n", "right arm injury injury 1\n", "left calf strain injury 1\n", "left torn shoulder labrum injury 1\n", "lower back spasm injury 1\n", "Name: count, dtype: int64" ] }, "execution_count": 440, "metadata": {}, "output_type": "execute_result" } ], "source": [ "drop_duplicated['Specific Injury'].value_counts()" ] }, { "cell_type": "code", "execution_count": 489, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/8t/t11lp0b952n0xtfmnwbzxzvw0000gn/T/ipykernel_11952/613144370.py:1: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " drop_duplicated_injury['specific_injury'] = drop_duplicated_injury['Specific Injury_grouped'].str.replace(\"left \", \"\", regex=False)\n", "/var/folders/8t/t11lp0b952n0xtfmnwbzxzvw0000gn/T/ipykernel_11952/613144370.py:2: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " drop_duplicated_injury['specific_injury'] = drop_duplicated_injury['Specific Injury_grouped'].str.replace(\"right \", \"\", regex=False)\n" ] }, { "data": { "text/plain": [ "0 calf strain injury\n", "1 foot fracture injury\n", "2 left ankle sprain injury\n", "3 left foot fracture injury\n", "4 bone spurs injury\n", " ... \n", "1166 left fractured hand injury\n", "1167 left fractured hand injury\n", "1168 meniscus tear injury\n", "1170 left ankle sprain injury\n", "1171 sprained mcl injury\n", "Name: specific_injury, Length: 920, dtype: object" ] }, "execution_count": 489, "metadata": {}, "output_type": "execute_result" } ], "source": [ "drop_duplicated_injury['specific_injury'] = drop_duplicated_injury['Specific Injury_grouped'].str.replace(\"left \", \"\", regex=False)\n", "drop_duplicated_injury['specific_injury'] = drop_duplicated_injury['Specific Injury_grouped'].str.replace(\"right \", \"\", regex=False)\n", "\n", "# Drop duplicate rows based on 'specific_injury' (if needed)\n", "drop_duplicated_injury['specific_injury']" ] }, { "cell_type": "code", "execution_count": 485, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Nameteam_abbreviationageplayer_heightplayer_weightcountrydraft_yeardraft_rounddraft_numbergp...Specific Injury_groupedpts_changegp_changereb_changeast_changenet_rating_changeoreb_pct_changedreb_pct_changeusg_pct_changets_pct_change
0Jameer NelsonOrlando Magic29.0182.8886.182480USA200412076...right calf strain injury-6.10-12.00-0.91-2.13-4.79-0.01-0.01-0.06-0.02
1Jason ThompsonSacramento Kings24.0210.82113.398000USA200811275...right foot fracture injury1.90-9.401.180.32-1.100.03-0.01-0.010.03
2Gary NealSan Antonio Spurs26.0193.0495.254320USA20103080...left ankle sprain injury1.17-12.000.22-0.33-6.070.010.01-0.010.08
3Glen DavisBoston Celtics25.0205.74131.088088USA200723578...left foot fracture injury-0.626.250.12-0.05-5.080.01-0.01-0.020.00
4Greivis VasquezMemphis Grizzlies24.0198.1295.707912Venezuela201012870...right bone spurs injury-2.27-40.00-0.23-0.35-5.120.000.000.00-0.05
..................................................................
1166Stephen CurryGolden State Warriors32.0190.5083.914520USA2009175...left fractured hand injury-3.71-62.700.630.14-25.340.000.040.01-0.07
1167Svi MykhailiukDetroit Pistons23.0200.6692.986360Ukraine201824756...left fractured hand injury3.152.000.200.603.75-0.01-0.01-0.010.09
1168Zion WilliamsonNew Orleans Pelicans19.0198.12128.820128USA20191124...right meniscus tear injury-4.50-37.00-0.90-1.603.000.01-0.020.00-0.03
1170Richaun HolmesSacramento Kings26.0208.28106.594120USA201523744...left ankle sprain injury3.44-13.403.00-0.104.560.010.02-0.010.06
1171Jakob PoeltlSan Antonio Spurs25.0215.90111.130040Austria20161969...right sprained mcl injury3.32-0.753.180.92-1.35-0.010.000.00-0.02
\n", "

920 rows × 35 columns

\n", "
" ], "text/plain": [ " Name team_abbreviation age player_height \\\n", "0 Jameer Nelson Orlando Magic 29.0 182.88 \n", "1 Jason Thompson Sacramento Kings 24.0 210.82 \n", "2 Gary Neal San Antonio Spurs 26.0 193.04 \n", "3 Glen Davis Boston Celtics 25.0 205.74 \n", "4 Greivis Vasquez Memphis Grizzlies 24.0 198.12 \n", "... ... ... ... ... \n", "1166 Stephen Curry Golden State Warriors 32.0 190.50 \n", "1167 Svi Mykhailiuk Detroit Pistons 23.0 200.66 \n", "1168 Zion Williamson New Orleans Pelicans 19.0 198.12 \n", "1170 Richaun Holmes Sacramento Kings 26.0 208.28 \n", "1171 Jakob Poeltl San Antonio Spurs 25.0 215.90 \n", "\n", " player_weight country draft_year draft_round draft_number gp ... \\\n", "0 86.182480 USA 2004 1 20 76 ... \n", "1 113.398000 USA 2008 1 12 75 ... \n", "2 95.254320 USA 2010 3 0 80 ... \n", "3 131.088088 USA 2007 2 35 78 ... \n", "4 95.707912 Venezuela 2010 1 28 70 ... \n", "... ... ... ... ... ... .. ... \n", "1166 83.914520 USA 2009 1 7 5 ... \n", "1167 92.986360 Ukraine 2018 2 47 56 ... \n", "1168 128.820128 USA 2019 1 1 24 ... \n", "1170 106.594120 USA 2015 2 37 44 ... \n", "1171 111.130040 Austria 2016 1 9 69 ... \n", "\n", " Specific Injury_grouped pts_change gp_change reb_change \\\n", "0 right calf strain injury -6.10 -12.00 -0.91 \n", "1 right foot fracture injury 1.90 -9.40 1.18 \n", "2 left ankle sprain injury 1.17 -12.00 0.22 \n", "3 left foot fracture injury -0.62 6.25 0.12 \n", "4 right bone spurs injury -2.27 -40.00 -0.23 \n", "... ... ... ... ... \n", "1166 left fractured hand injury -3.71 -62.70 0.63 \n", "1167 left fractured hand injury 3.15 2.00 0.20 \n", "1168 right meniscus tear injury -4.50 -37.00 -0.90 \n", "1170 left ankle sprain injury 3.44 -13.40 3.00 \n", "1171 right sprained mcl injury 3.32 -0.75 3.18 \n", "\n", " ast_change net_rating_change oreb_pct_change dreb_pct_change \\\n", "0 -2.13 -4.79 -0.01 -0.01 \n", "1 0.32 -1.10 0.03 -0.01 \n", "2 -0.33 -6.07 0.01 0.01 \n", "3 -0.05 -5.08 0.01 -0.01 \n", "4 -0.35 -5.12 0.00 0.00 \n", "... ... ... ... ... \n", "1166 0.14 -25.34 0.00 0.04 \n", "1167 0.60 3.75 -0.01 -0.01 \n", "1168 -1.60 3.00 0.01 -0.02 \n", "1170 -0.10 4.56 0.01 0.02 \n", "1171 0.92 -1.35 -0.01 0.00 \n", "\n", " usg_pct_change ts_pct_change \n", "0 -0.06 -0.02 \n", "1 -0.01 0.03 \n", "2 -0.01 0.08 \n", "3 -0.02 0.00 \n", "4 0.00 -0.05 \n", "... ... ... \n", "1166 0.01 -0.07 \n", "1167 -0.01 0.09 \n", "1168 0.00 -0.03 \n", "1170 -0.01 0.06 \n", "1171 0.00 -0.02 \n", "\n", "[920 rows x 35 columns]" ] }, "execution_count": 485, "metadata": {}, "output_type": "execute_result" } ], "source": [ "drop_duplicated_injury" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "Specific Injury\n", "foot fracture injury 1\n", "hip flexor surgery injury 1\n", "calf strain injury 1\n", "quad injury injury 1\n", "shoulder sprain injury 1\n", "foot sprain injury 1\n", "torn rotator cuff injury injury 1\n", "torn mcl injury 1\n", "hip flexor strain injury 1\n", "fractured leg injury 1\n", "sprained mcl injury 1\n", "ankle sprain injury 1\n", "hamstring injury injury 1\n", "meniscus tear injury 1\n", "torn hamstring injury 1\n", "dislocated shoulder injury 1\n", "ankle fracture injury 1\n", "fractured hand injury 1\n", "bone spurs injury 1\n", "acl tear injury 1\n", "hip labrum injury 1\n", "back surgery injury 1\n", "arm injury injury 1\n", "torn shoulder labrum injury 1\n", "lower back spasm injury 1\n", "Name: count, dtype: int64" ] }, "execution_count": 448, "metadata": {}, "output_type": "execute_result" } ], "source": [ "drop_duplicated['Specific Injury'].value_counts()" ] }, { "cell_type": "code", "execution_count": 492, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Evaluation Metrics by Target:\n", "pts_change: MAE = 1.273210045289855, R² = 0.6798948156812019\n", "ast_change: MAE = 0.31011294038992415, R² = 0.7708349254437783\n", "reb_change: MAE = 0.43051706521739136, R² = 0.6194001893032414\n", "Top Features by Importance:\n", " Feature Importance\n", "2 player_weight 0.341395\n", "1 player_height 0.223940\n", "0 age 0.117367\n", "12 specific_injury_fractured leg injury 0.053004\n", "13 specific_injury_sprained mcl injury 0.031181\n", "Model saved successfully to /Users/laraschuman/Desktop/CTP-Project/rf_injury_change_model.pkl!\n" ] } ], "source": [ "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.ensemble import RandomForestRegressor\n", "from sklearn.multioutput import MultiOutputRegressor\n", "from sklearn.metrics import mean_absolute_error, r2_score\n", "from sklearn.impute import SimpleImputer\n", "import pickle\n", "\n", "# Step 1: Preprocess the Data\n", "# One-hot encode 'Specific Injury' column\n", "merged_data_encoded = pd.get_dummies(drop_duplicated_injury, columns=['specific_injury'], drop_first=True)\n", "\n", "# Features for training (including one-hot encoded 'Specific Injury')\n", "features = ['age', 'player_height', 'player_weight', 'days_injured'] + [\n", " col for col in merged_data_encoded.columns if col.startswith('specific_injury_')\n", "]\n", "\n", "# Targets\n", "targets = ['pts_change', 'ast_change', 'reb_change']\n", "\n", "# Set up features (X) and targets (y)\n", "X_merged = merged_data_encoded[features]\n", "y_merged = merged_data_encoded[targets]\n", "\n", "# Handle missing values\n", "imputer = SimpleImputer(strategy=\"median\")\n", "X_merged = pd.DataFrame(imputer.fit_transform(X_merged), columns=X_merged.columns)\n", "\n", "# Step 2: Train-Test Split\n", "X_train, X_test, y_train, y_test = train_test_split(X_merged, y_merged, test_size=0.2, random_state=42)\n", "\n", "# Step 3: Train the Model\n", "rf_model = MultiOutputRegressor(RandomForestRegressor(random_state=42))\n", "rf_model.fit(X_train, y_train)\n", "\n", "# Step 4: Evaluate the Model\n", "y_pred = rf_model.predict(X_test)\n", "\n", "# Calculate Mean Absolute Error (MAE) and R² for each target\n", "evaluation_metrics = {\n", " target: {\n", " 'MAE': mean_absolute_error(y_test[target], y_pred[:, idx]),\n", " 'R²': r2_score(y_test[target], y_pred[:, idx])\n", " }\n", " for idx, target in enumerate(targets)\n", "}\n", "\n", "print(\"Evaluation Metrics by Target:\")\n", "for target, metrics in evaluation_metrics.items():\n", " print(f\"{target}: MAE = {metrics['MAE']}, R² = {metrics['R²']}\")\n", "\n", "# Step 5: Feature Importance\n", "# Get feature importance from the first Random Forest Regressor as a proxy\n", "feature_importances = pd.DataFrame({\n", " 'Feature': X_merged.columns,\n", " 'Importance': rf_model.estimators_[0].feature_importances_\n", "}).sort_values(by='Importance', ascending=False)\n", "\n", "print(\"Top Features by Importance:\")\n", "print(feature_importances.head())\n", "\n", "# Step 6: Save the Model\n", "model_path = '/Users/laraschuman/Desktop/CTP-Project/rf_injury_change_model.pkl'\n", "with open(model_path, 'wb') as f:\n", " pickle.dump(rf_model, f)\n", "\n", "print(f\"Model saved successfully to {model_path}!\")\n" ] }, { "cell_type": "code", "execution_count": 493, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/8t/t11lp0b952n0xtfmnwbzxzvw0000gn/T/ipykernel_11952/1065402976.py:42: SettingWithCopyWarning: \n", "A value is trying to be set on a copy of a slice from a DataFrame.\n", "Try using .loc[row_indexer,col_indexer] = value instead\n", "\n", "See the caveats in the documentation: https://pandas.pydata.org/pandas-docs/stable/user_guide/indexing.html#returning-a-view-versus-a-copy\n", " drop_duplicated_injury['specific_injury'] = pd.Categorical(drop_duplicated_injury['specific_injury'], categories=injury_list, ordered=False)\n" ] }, { "name": "stdout", "output_type": "stream", "text": [ "Evaluation Metrics by Target:\n", "pts_change: MAE = 1.225611992753623, R² = 0.6870166424159725\n", "ast_change: MAE = 0.30058603260869576, R² = 0.7719661450788522\n", "reb_change: MAE = 0.41204285196687374, R² = 0.6485851920112147\n", "Top Injury Features by Importance:\n", " Feature Importance\n", "13 fractured leg injury 0.053007\n", "14 sprained mcl injury 0.030865\n", "7 quad injury injury 0.026076\n", "15 ankle sprain injury 0.025429\n", "4 foot fracture injury 0.023935\n", "17 meniscus tear injury 0.022673\n", "16 hamstring injury injury 0.022511\n", "5 hip flexor surgery injury 0.021272\n", "6 calf strain injury 0.017689\n", "27 torn shoulder labrum injury 0.012570\n" ] }, { "data": { "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" }, { "name": "stdout", "output_type": "stream", "text": [ "Model saved successfully to /Users/laraschuman/Desktop/CTP-Project/rf_injury_change_model.pkl!\n" ] } ], "source": [ "import pandas as pd\n", "from sklearn.model_selection import train_test_split\n", "from sklearn.ensemble import RandomForestRegressor\n", "from sklearn.multioutput import MultiOutputRegressor\n", "from sklearn.metrics import mean_absolute_error, r2_score\n", "from sklearn.impute import SimpleImputer\n", "import pickle\n", "import matplotlib.pyplot as plt\n", "\n", "# List of all possible injuries to ensure consistent encoding\n", "injury_list = [\n", " \"foot fracture injury\",\n", " \"hip flexor surgery injury\",\n", " \"calf strain injury\",\n", " \"quad injury injury\",\n", " \"shoulder sprain injury\",\n", " \"foot sprain injury\",\n", " \"torn rotator cuff injury injury\",\n", " \"torn mcl injury\",\n", " \"hip flexor strain injury\",\n", " \"fractured leg injury\",\n", " \"sprained mcl injury\",\n", " \"ankle sprain injury\",\n", " \"hamstring injury injury\",\n", " \"meniscus tear injury\",\n", " \"torn hamstring injury\",\n", " \"dislocated shoulder injury\",\n", " \"ankle fracture injury\",\n", " \"fractured hand injury\",\n", " \"bone spurs injury\",\n", " \"acl tear injury\",\n", " \"hip labrum injury\",\n", " \"back surgery injury\",\n", " \"arm injury injury\",\n", " \"torn shoulder labrum injury\",\n", " \"lower back spasm injury\"\n", "]\n", "\n", "\n", "# Step 1: Preprocess the Data\n", "# One-hot encode 'specific_injury' column using the injury_list\n", "drop_duplicated_injury['specific_injury'] = pd.Categorical(drop_duplicated_injury['specific_injury'], categories=injury_list, ordered=False)\n", "merged_data_encoded = pd.get_dummies(drop_duplicated_injury, columns=['specific_injury'], prefix='', prefix_sep='')\n", "\n", "# Features for training (ensuring all injuries are included)\n", "features = ['age', 'player_height', 'player_weight', 'days_injured'] + [\n", " injury for injury in injury_list if injury in merged_data_encoded.columns\n", "]\n", "\n", "# Targets\n", "targets = ['pts_change', 'ast_change', 'reb_change']\n", "\n", "# Set up features (X) and targets (y)\n", "X_merged = merged_data_encoded[features]\n", "y_merged = merged_data_encoded[targets]\n", "\n", "# Handle missing values\n", "imputer = SimpleImputer(strategy=\"median\")\n", "X_merged = pd.DataFrame(imputer.fit_transform(X_merged), columns=X_merged.columns)\n", "\n", "# Step 2: Train-Test Split\n", "X_train, X_test, y_train, y_test = train_test_split(X_merged, y_merged, test_size=0.2, random_state=42)\n", "\n", "# Step 3: Train the Model\n", "rf_model = MultiOutputRegressor(RandomForestRegressor(random_state=42))\n", "rf_model.fit(X_train, y_train)\n", "\n", "# Step 4: Evaluate the Model\n", "y_pred = rf_model.predict(X_test)\n", "\n", "# Calculate Mean Absolute Error (MAE) and R² for each target\n", "evaluation_metrics = {\n", " target: {\n", " 'MAE': mean_absolute_error(y_test[target], y_pred[:, idx]),\n", " 'R²': r2_score(y_test[target], y_pred[:, idx])\n", " }\n", " for idx, target in enumerate(targets)\n", "}\n", "\n", "print(\"Evaluation Metrics by Target:\")\n", "for target, metrics in evaluation_metrics.items():\n", " print(f\"{target}: MAE = {metrics['MAE']}, R² = {metrics['R²']}\")\n", "\n", "# Step 5: Feature Importance\n", "# Get feature importance from the first Random Forest Regressor as a proxy\n", "feature_importances = pd.DataFrame({\n", " 'Feature': X_merged.columns,\n", " 'Importance': rf_model.estimators_[0].feature_importances_\n", "}).sort_values(by='Importance', ascending=False)\n", "\n", "# Display and analyze specific injury impact\n", "injury_importances = feature_importances[feature_importances['Feature'].isin(injury_list)]\n", "\n", "print(\"Top Injury Features by Importance:\")\n", "print(injury_importances.head(10))\n", "\n", "# Plot injury feature importances\n", "plt.figure(figsize=(12, 6))\n", "injury_importances.head(10).plot(kind='barh', x='Feature', y='Importance', legend=False, title=\"Top 10 Injury Features by Importance\")\n", "plt.xlabel(\"Importance\")\n", "plt.ylabel(\"Injury Type\")\n", "plt.show()\n", "\n", "# Save feature importance\n", "feature_importances.to_csv('/Users/laraschuman/Desktop/CTP-Project/feature_importances.csv', index=False)\n", "\n", "# Step 6: Save the Model\n", "model_path = '/Users/laraschuman/Desktop/CTP-Project/rf_injury_change_model.pkl'\n", "with open(model_path, 'wb') as f:\n", " pickle.dump(rf_model, f)\n", "\n", "print(f\"Model saved successfully to {model_path}!\")\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "threshold = 0" ] }, { "cell_type": "code", "execution_count": 498, "metadata": {}, "outputs": [ { "data": { "text/plain": [ "days_injured\n", "0.0 766\n", "260.0 5\n", "125.0 4\n", "129.0 3\n", "246.0 3\n", " ... \n", "242.0 1\n", "136.0 1\n", "184.0 1\n", "191.0 1\n", "152.0 1\n", "Name: count, Length: 115, dtype: int64" ] }, "execution_count": 498, "metadata": {}, "output_type": "execute_result" } ], "source": [ "drop_duplicated_injury['days_injured'].value_counts()" ] }, { "cell_type": "code", "execution_count": 500, "metadata": {}, "outputs": [ { "data": { "text/html": [ "
\n", "\n", "\n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", " \n", "
Nameteam_abbreviationageplayer_heightplayer_weightcountrydraft_yeardraft_rounddraft_numbergp...pts_changegp_changereb_changeast_changenet_rating_changeoreb_pct_changedreb_pct_changeusg_pct_changets_pct_changespecific_injury
934Reggie EvansToronto Raptors31.0203.20111.130040USA20103030...0.87-29.254.380.82-6.350.030.01-0.02-0.03foot fracture injury
935Jason ThompsonSacramento Kings25.0210.82113.398000USA200811264...1.90-9.401.180.32-1.100.03-0.01-0.010.03foot fracture injury
936Eric BledsoeLos Angeles Clippers22.0185.4288.450440USA201011840...-11.91-26.20-2.49-3.342.060.03-0.01-0.03-0.10meniscus tear injury
937Dominique JonesDallas Mavericks23.0195.5897.522280USA201012533...-0.459.50-0.20-0.70-6.35-0.020.01-0.030.07foot fracture injury
939Toney DouglasNew York Knicks26.0187.9683.914520USA200912938...-0.47-12.00-0.30-0.22-1.370.00-0.020.06-0.11torn shoulder labrum injury
..................................................................
1166Stephen CurryGolden State Warriors32.0190.5083.914520USA2009175...-3.71-62.700.630.14-25.340.000.040.01-0.07NaN
1167Svi MykhailiukDetroit Pistons23.0200.6692.986360Ukraine201824756...3.152.000.200.603.75-0.01-0.01-0.010.09NaN
1168Zion WilliamsonNew Orleans Pelicans19.0198.12128.820128USA20191124...-4.50-37.00-0.90-1.603.000.01-0.020.00-0.03meniscus tear injury
1170Richaun HolmesSacramento Kings26.0208.28106.594120USA201523744...3.44-13.403.00-0.104.560.010.02-0.010.06NaN
1171Jakob PoeltlSan Antonio Spurs25.0215.90111.130040Austria20161969...3.32-0.753.180.92-1.35-0.010.000.00-0.02sprained mcl injury
\n", "

154 rows × 36 columns

\n", "
" ], "text/plain": [ " Name team_abbreviation age player_height \\\n", "934 Reggie Evans Toronto Raptors 31.0 203.20 \n", "935 Jason Thompson Sacramento Kings 25.0 210.82 \n", "936 Eric Bledsoe Los Angeles Clippers 22.0 185.42 \n", "937 Dominique Jones Dallas Mavericks 23.0 195.58 \n", "939 Toney Douglas New York Knicks 26.0 187.96 \n", "... ... ... ... ... \n", "1166 Stephen Curry Golden State Warriors 32.0 190.50 \n", "1167 Svi Mykhailiuk Detroit Pistons 23.0 200.66 \n", "1168 Zion Williamson New Orleans Pelicans 19.0 198.12 \n", "1170 Richaun Holmes Sacramento Kings 26.0 208.28 \n", "1171 Jakob Poeltl San Antonio Spurs 25.0 215.90 \n", "\n", " player_weight country draft_year draft_round draft_number gp ... \\\n", "934 111.130040 USA 2010 3 0 30 ... \n", "935 113.398000 USA 2008 1 12 64 ... \n", "936 88.450440 USA 2010 1 18 40 ... \n", "937 97.522280 USA 2010 1 25 33 ... \n", "939 83.914520 USA 2009 1 29 38 ... \n", "... ... ... ... ... ... .. ... \n", "1166 83.914520 USA 2009 1 7 5 ... \n", "1167 92.986360 Ukraine 2018 2 47 56 ... \n", "1168 128.820128 USA 2019 1 1 24 ... \n", "1170 106.594120 USA 2015 2 37 44 ... \n", "1171 111.130040 Austria 2016 1 9 69 ... \n", "\n", " pts_change gp_change reb_change ast_change net_rating_change \\\n", "934 0.87 -29.25 4.38 0.82 -6.35 \n", "935 1.90 -9.40 1.18 0.32 -1.10 \n", "936 -11.91 -26.20 -2.49 -3.34 2.06 \n", "937 -0.45 9.50 -0.20 -0.70 -6.35 \n", "939 -0.47 -12.00 -0.30 -0.22 -1.37 \n", "... ... ... ... ... ... \n", "1166 -3.71 -62.70 0.63 0.14 -25.34 \n", "1167 3.15 2.00 0.20 0.60 3.75 \n", "1168 -4.50 -37.00 -0.90 -1.60 3.00 \n", "1170 3.44 -13.40 3.00 -0.10 4.56 \n", "1171 3.32 -0.75 3.18 0.92 -1.35 \n", "\n", " oreb_pct_change dreb_pct_change usg_pct_change ts_pct_change \\\n", "934 0.03 0.01 -0.02 -0.03 \n", "935 0.03 -0.01 -0.01 0.03 \n", "936 0.03 -0.01 -0.03 -0.10 \n", "937 -0.02 0.01 -0.03 0.07 \n", "939 0.00 -0.02 0.06 -0.11 \n", "... ... ... ... ... \n", "1166 0.00 0.04 0.01 -0.07 \n", "1167 -0.01 -0.01 -0.01 0.09 \n", "1168 0.01 -0.02 0.00 -0.03 \n", "1170 0.01 0.02 -0.01 0.06 \n", "1171 -0.01 0.00 0.00 -0.02 \n", "\n", " specific_injury \n", "934 foot fracture injury \n", "935 foot fracture injury \n", "936 meniscus tear injury \n", "937 foot fracture injury \n", "939 torn shoulder labrum injury \n", "... ... \n", "1166 NaN \n", "1167 NaN \n", "1168 meniscus tear injury \n", "1170 NaN \n", "1171 sprained mcl injury \n", "\n", "[154 rows x 36 columns]" ] }, "execution_count": 500, "metadata": {}, "output_type": "execute_result" } ], "source": [ "# Filter out rows where 'days_injured' is equal to zero\n", "filtered_injury_data = drop_duplicated_injury[drop_duplicated_injury['days_injured'] != 0]\n", "\n", "# Display the value counts for the filtered data\n", "filtered_injury_data" ] }, { "cell_type": "code", "execution_count": 501, "metadata": {}, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "/var/folders/8t/t11lp0b952n0xtfmnwbzxzvw0000gn/T/ipykernel_11952/1296445815.py:1: FutureWarning: The default of observed=False is deprecated and will be changed to True in a future version of pandas. Pass observed=False to retain current behavior or observed=True to adopt the future default and silence this warning.\n", " avg_days_injured = filtered_injury_data.groupby('specific_injury')['days_injured'].mean()\n" ] }, { "data": { "text/plain": [ "specific_injury\n", "foot fracture injury 207.666667\n", "hip flexor surgery injury 256.000000\n", "calf strain injury 236.000000\n", "quad injury injury 283.000000\n", "shoulder sprain injury 259.500000\n", "foot sprain injury 294.000000\n", "torn rotator cuff injury injury NaN\n", "torn mcl injury 271.000000\n", "hip flexor strain injury 253.000000\n", "fractured leg injury 250.250000\n", "sprained mcl injury 228.666667\n", "ankle sprain injury 231.333333\n", "hamstring injury injury 220.000000\n", "meniscus tear injury 201.250000\n", "torn hamstring injury NaN\n", "dislocated shoulder injury NaN\n", "ankle fracture injury 114.500000\n", "fractured hand injury 169.142857\n", "bone spurs injury 151.500000\n", "acl tear injury 268.000000\n", "hip labrum injury 247.500000\n", "back surgery injury 215.800000\n", "arm injury injury 303.666667\n", "torn shoulder labrum injury 195.666667\n", "lower back spasm injury NaN\n", "Name: days_injured, dtype: float64" ] }, "execution_count": 501, "metadata": {}, "output_type": "execute_result" } ], "source": [ "avg_days_injured = filtered_injury_data.groupby('specific_injury')['days_injured'].mean()\n", "avg_days_injured" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "y_test_class = (y_test['pts_change'] > threshold).astype(int)\n", "\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [ "if y_pred.ndim == 1: # For single-output regression\n", " y_pred_class = (y_pred > threshold).astype(int)\n", "else: # Multi-output regression\n", " y_pred_class = (y_pred[:, 0] > threshold).astype(int)\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "Accuracy: 0.91\n", "Recall: 0.86\n", "F1-Score: 0.89\n" ] } ], "source": [ "from sklearn.metrics import accuracy_score, recall_score, f1_score\n", "\n", "# Classification Metrics\n", "accuracy = accuracy_score(y_test_class, y_pred_class)\n", "recall = recall_score(y_test_class, y_pred_class)\n", "f1 = f1_score(y_test_class, y_pred_class)\n", "\n", "print(f'Accuracy: {accuracy:.2f}')\n", "print(f'Recall: {recall:.2f}')\n", "print(f'F1-Score: {f1:.2f}')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "import seaborn as sns\n", "import matplotlib.pyplot as plt\n", "\n", "sns.barplot(data=injury_grouped_data, x='Specific Injury', y='pts_change')\n", "plt.xticks(rotation=90)\n", "plt.title('Performance Change by Injury Type')\n", "plt.xlabel('Injury Type')\n", "plt.ylabel('Average Points Change')\n", "plt.show()\n" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [ { "data": { "image/png": "", "text/plain": [ "
" ] }, "metadata": {}, "output_type": "display_data" } ], "source": [ "corr_matrix = merged_data_encoded[['pts_change', 'gp_change', 'reb_change', 'ast_change', 'net_rating_change']].corr()\n", "plt.figure(figsize=(8,6))\n", "sns.heatmap(corr_matrix, annot=True, cmap='coolwarm', fmt='.2f', cbar=True)\n", "plt.title('Correlation Matrix of Performance Changes')\n", "plt.show()\n" ] } ], "metadata": { "kernelspec": { "display_name": "base", "language": "python", "name": "python3" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.12.4" } }, "nbformat": 4, "nbformat_minor": 2 }