File size: 4,022 Bytes
d6ea71e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 |
"""Implements the label tranformers of the VAEP framework."""
import pandas as pd # type: ignore
from pandera.typing import DataFrame
import socceraction.spadl.config as spadl
from socceraction.spadl.schema import SPADLSchema
def scores(actions: DataFrame[SPADLSchema], nr_actions: int = 10) -> pd.DataFrame:
"""Determine whether the team possessing the ball scored a goal within the next x actions.
Parameters
----------
actions : pd.DataFrame
The actions of a game.
nr_actions : int, default=10 # noqa: DAR103
Number of actions after the current action to consider.
Returns
-------
pd.DataFrame
A dataframe with a column 'scores' and a row for each action set to
True if a goal was scored by the team possessing the ball within the
next x actions; otherwise False.
"""
# merging goals, owngoals and team_ids
goals = actions["type_name"].str.contains("shot") & (
actions["result_id"] == spadl.results.index("success")
)
owngoals = actions["type_name"].str.contains("shot") & (
actions["result_id"] == spadl.results.index("owngoal")
)
y = pd.concat([goals, owngoals, actions["team_id"]], axis=1)
y.columns = ["goal", "owngoal", "team_id"]
# adding future results
for i in range(1, nr_actions):
for c in ["team_id", "goal", "owngoal"]:
shifted = y[c].shift(-i)
shifted[-i:] = y[c].iloc[len(y) - 1]
y["%s+%d" % (c, i)] = shifted
res = y["goal"]
for i in range(1, nr_actions):
gi = y["goal+%d" % i] & (y["team_id+%d" % i] == y["team_id"])
ogi = y["owngoal+%d" % i] & (y["team_id+%d" % i] != y["team_id"])
res = res | gi | ogi
return pd.DataFrame(res, columns=["scores"])
def concedes(actions: DataFrame[SPADLSchema], nr_actions: int = 10) -> pd.DataFrame:
"""Determine whether the team possessing the ball conceded a goal within the next x actions.
Parameters
----------
actions : pd.DataFrame
The actions of a game.
nr_actions : int, default=10 # noqa: DAR103
Number of actions after the current action to consider.
Returns
-------
pd.DataFrame
A dataframe with a column 'concedes' and a row for each action set to
True if a goal was conceded by the team possessing the ball within the
next x actions; otherwise False.
"""
# merging goals,owngoals and team_ids
goals = actions["type_name"].str.contains("shot") & (
actions["result_id"] == spadl.results.index("success")
)
owngoals = actions["type_name"].str.contains("shot") & (
actions["result_id"] == spadl.results.index("owngoal")
)
y = pd.concat([goals, owngoals, actions["team_id"]], axis=1)
y.columns = ["goal", "owngoal", "team_id"]
# adding future results
for i in range(1, nr_actions):
for c in ["team_id", "goal", "owngoal"]:
shifted = y[c].shift(-i)
shifted[-i:] = y[c].iloc[len(y) - 1]
y["%s+%d" % (c, i)] = shifted
res = y["owngoal"]
for i in range(1, nr_actions):
gi = y["goal+%d" % i] & (y["team_id+%d" % i] != y["team_id"])
ogi = y["owngoal+%d" % i] & (y["team_id+%d" % i] == y["team_id"])
res = res | gi | ogi
return pd.DataFrame(res, columns=["concedes"])
def goal_from_shot(actions: DataFrame[SPADLSchema]) -> pd.DataFrame:
"""Determine whether a goal was scored from the current action.
This label can be use to train an xG model.
Parameters
----------
actions : pd.DataFrame
The actions of a game.
Returns
-------
pd.DataFrame
A dataframe with a column 'goal' and a row for each action set to
True if a goal was scored from the current action; otherwise False.
"""
goals = actions["type_name"].str.contains("shot") & (
actions["result_id"] == spadl.results.index("success")
)
return pd.DataFrame(goals, columns=["goal_from_shot"])
|