{ "cells": [ { "cell_type": "code", "execution_count": 1, "metadata": { "execution": { "iopub.execute_input": "2023-12-30T16:22:50.898496Z", "iopub.status.busy": "2023-12-30T16:22:50.897517Z", "iopub.status.idle": "2023-12-30T16:22:51.655894Z", "shell.execute_reply": "2023-12-30T16:22:51.655400Z" } }, "outputs": [], "source": [ "import os\n", "import warnings\n", "import tqdm\n", "import pandas as pd\n", "warnings.simplefilter(action='ignore', category=pd.errors.PerformanceWarning)" ] }, { "cell_type": "code", "execution_count": 2, "metadata": { "execution": { "iopub.execute_input": "2023-12-30T16:22:51.657898Z", "iopub.status.busy": "2023-12-30T16:22:51.657688Z", "iopub.status.idle": "2023-12-30T16:22:53.073634Z", "shell.execute_reply": "2023-12-30T16:22:53.073077Z" } }, "outputs": [], "source": [ "%load_ext autoreload\n", "%autoreload 2\n", "import socceraction.spadl as spadl\n", "import socceraction.vaep.features as fs\n", "import socceraction.vaep.labels as lab" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Select data" ] }, { "cell_type": "code", "execution_count": 3, "metadata": { "execution": { "iopub.execute_input": "2023-12-30T16:22:53.075787Z", "iopub.status.busy": "2023-12-30T16:22:53.075566Z", "iopub.status.idle": "2023-12-30T16:22:53.099754Z", "shell.execute_reply": "2023-12-30T16:22:53.099105Z" } }, "outputs": [], "source": [ "# Configure file and folder names\n", "datafolder = \"../data-fifa\"\n", "spadl_h5 = os.path.join(datafolder, \"spadl-statsbomb.h5\")\n", "features_h5 = os.path.join(datafolder, \"features.h5\")\n", "labels_h5 = os.path.join(datafolder, \"labels.h5\")" ] }, { "cell_type": "code", "execution_count": 4, "metadata": { "execution": { "iopub.execute_input": "2023-12-30T16:22:53.102252Z", "iopub.status.busy": "2023-12-30T16:22:53.102092Z", "iopub.status.idle": "2023-12-30T16:22:54.357338Z", "shell.execute_reply": "2023-12-30T16:22:54.356656Z" } }, "outputs": [ { "name": "stdout", "output_type": "stream", "text": [ "nb of games: 64\n" ] } ], "source": [ "games = pd.read_hdf(spadl_h5, \"games\")\n", "print(\"nb of games:\", len(games))" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Compute features" ] }, { "cell_type": "code", "execution_count": 5, "metadata": { "execution": { "iopub.execute_input": "2023-12-30T16:22:54.359778Z", "iopub.status.busy": "2023-12-30T16:22:54.359474Z", "iopub.status.idle": "2023-12-30T16:23:15.688875Z", "shell.execute_reply": "2023-12-30T16:23:15.688214Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Generating and storing features in ../data-fifa/features.h5: 100%|█████████████████████| 64/64 [00:20<00:00, 3.20it/s]\n" ] } ], "source": [ "xfns = [\n", " fs.actiontype,\n", " fs.actiontype_onehot,\n", " fs.bodypart,\n", " fs.bodypart_onehot,\n", " fs.result,\n", " fs.result_onehot,\n", " fs.goalscore,\n", " fs.startlocation,\n", " fs.endlocation,\n", " fs.movement,\n", " fs.space_delta,\n", " fs.startpolar,\n", " fs.endpolar,\n", " fs.team,\n", " fs.time,\n", " fs.time_delta\n", "]\n", "\n", "with pd.HDFStore(spadl_h5) as spadlstore, pd.HDFStore(features_h5) as featurestore:\n", " for game in tqdm.tqdm(list(games.itertuples()), desc=f\"Generating and storing features in {features_h5}\"):\n", " actions = spadlstore[f\"actions/game_{game.game_id}\"]\n", " gamestates = fs.gamestates(spadl.add_names(actions), 3)\n", " gamestates = fs.play_left_to_right(gamestates, game.home_team_id)\n", " \n", " X = pd.concat([fn(gamestates) for fn in xfns], axis=1)\n", " featurestore.put(f\"game_{game.game_id}\", X, format='table')" ] }, { "cell_type": "markdown", "metadata": {}, "source": [ "## Compute labels" ] }, { "cell_type": "code", "execution_count": 6, "metadata": { "execution": { "iopub.execute_input": "2023-12-30T16:23:15.691809Z", "iopub.status.busy": "2023-12-30T16:23:15.691615Z", "iopub.status.idle": "2023-12-30T16:23:21.874808Z", "shell.execute_reply": "2023-12-30T16:23:21.873954Z" } }, "outputs": [ { "name": "stderr", "output_type": "stream", "text": [ "Computing and storing labels in ../data-fifa/labels.h5: 100%|██████████████████████████| 64/64 [00:06<00:00, 10.56it/s]\n" ] } ], "source": [ "yfns = [lab.scores, lab.concedes, lab.goal_from_shot]\n", "\n", "with pd.HDFStore(spadl_h5) as spadlstore, pd.HDFStore(labels_h5) as labelstore:\n", " for game in tqdm.tqdm(list(games.itertuples()), desc=f\"Computing and storing labels in {labels_h5}\"):\n", " actions = spadlstore[f\"actions/game_{game.game_id}\"]\n", " Y = pd.concat([fn(spadl.add_names(actions)) for fn in yfns], axis=1)\n", " labelstore.put(f\"game_{game.game_id}\", Y, format='table')" ] }, { "cell_type": "code", "execution_count": null, "metadata": {}, "outputs": [], "source": [] } ], "metadata": { "kernelspec": { "display_name": "socceraction", "language": "python", "name": "socceraction" }, "language_info": { "codemirror_mode": { "name": "ipython", "version": 3 }, "file_extension": ".py", "mimetype": "text/x-python", "name": "python", "nbconvert_exporter": "python", "pygments_lexer": "ipython3", "version": "3.11.1" }, "toc": { "base_numbering": 1, "nav_menu": {}, "number_sections": true, "sideBar": true, "skip_h1_title": false, "title_cell": "Table of Contents", "title_sidebar": "Contents", "toc_cell": false, "toc_position": {}, "toc_section_display": true, "toc_window_display": true }, "varInspector": { "cols": { "lenName": 16, "lenType": 16, "lenVar": 40 }, "kernels_config": { "python": { "delete_cmd_postfix": "", "delete_cmd_prefix": "del ", "library": "var_list.py", "varRefreshCmd": "print(var_dic_list())" }, "r": { "delete_cmd_postfix": ") ", "delete_cmd_prefix": "rm(", "library": "var_list.r", "varRefreshCmd": "cat(var_dic_list()) " } }, "types_to_exclude": [ "module", "function", "builtin_function_or_method", "instance", "_Feature" ], "window_display": false } }, "nbformat": 4, "nbformat_minor": 4 }