File size: 9,356 Bytes
d6ea71e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 205 206 207 208 209 210 211 212 213 214 215 216 217 218 219 220 221 222 223 224 225 226 227 228 229 230 231 232 233 234 235 236 237 238 239 240 241 242 243 244 245 246 247 248 249 250 251 252 253 254 255 256 257 258 259 260 261 262 263 264 265 266 267 268 269 270 271 272 273 274 275 276 277 278 279 280 281 282 283 284 285 286 287 288 289 290 291 292 293 294 295 296 297 |
"""Opta event stream data to SPADL converter."""
from typing import Any, cast
import pandas as pd # type: ignore
from pandera.typing import DataFrame
from . import config as spadlconfig
from .base import (
_add_dribbles,
_fix_clearances,
_fix_direction_of_play,
min_dribble_length,
)
from .schema import SPADLSchema
def convert_to_actions(events: pd.DataFrame, home_team_id: int) -> DataFrame[SPADLSchema]:
"""
Convert Opta events to SPADL actions.
Parameters
----------
events : pd.DataFrame
DataFrame containing Opta events from a single game.
home_team_id : int
ID of the home team in the corresponding game.
Returns
-------
actions : pd.DataFrame
DataFrame with corresponding SPADL actions.
"""
actions = pd.DataFrame()
actions["game_id"] = events.game_id
actions["original_event_id"] = events.event_id.astype(object)
actions["period_id"] = events.period_id
actions["time_seconds"] = (
60 * events.minute
+ events.second
- ((events.period_id > 1) * 45 * 60)
- ((events.period_id > 2) * 45 * 60)
- ((events.period_id > 3) * 15 * 60)
- ((events.period_id > 4) * 15 * 60)
)
actions["team_id"] = events.team_id
actions["player_id"] = events.player_id
for col in ["start_x", "end_x"]:
actions[col] = events[col].clip(0, 100) / 100 * spadlconfig.field_length
for col in ["start_y", "end_y"]:
actions[col] = events[col].clip(0, 100) / 100 * spadlconfig.field_width
actions["type_id"] = events[["type_name", "outcome", "qualifiers"]].apply(_get_type_id, axis=1)
actions["result_id"] = events[["type_name", "outcome", "qualifiers"]].apply(
_get_result_id, axis=1
)
actions["bodypart_id"] = events[["type_name", "outcome", "qualifiers"]].apply(
_get_bodypart_id, axis=1
)
actions = _fix_recoveries(actions, events.type_name)
actions = _fix_unintentional_ball_touches(actions, events.type_name, events.outcome)
actions = (
actions[actions.type_id != spadlconfig.actiontypes.index("non_action")]
.sort_values(["game_id", "period_id", "time_seconds"], kind="mergesort")
.reset_index(drop=True)
)
actions = _fix_owngoals(actions)
actions = _fix_direction_of_play(actions, home_team_id)
actions = _fix_clearances(actions)
actions = _fix_interceptions(actions)
actions["action_id"] = range(len(actions))
actions = _add_dribbles(actions)
return cast(DataFrame[SPADLSchema], actions)
def _get_bodypart_id(args: tuple[str, bool, dict[int, Any]]) -> int:
e, outcome, q = args
if 15 in q or 3 in q or 168 in q:
b = "head"
elif 21 in q:
b = "other"
elif 20 in q:
b = "foot_right"
elif 72 in q:
b = "foot_left"
elif 107 in q: # throw-in
b = "other"
else:
if e in ["save", "claim", "punch", "keeper pick-up"]:
b = "other"
else:
b = "foot"
return spadlconfig.bodyparts.index(b)
def _get_result_id(args: tuple[str, bool, dict[int, Any]]) -> int:
e, outcome, q = args
if e == "offside pass":
r = "offside" # offside
elif e == "foul":
r = "fail"
elif e in ["attempt saved", "miss", "post"]:
r = "fail"
elif e == "goal":
if 28 in q:
r = "owngoal" # own goal, x and y must be switched
else:
r = "success"
elif e == "ball touch":
r = "fail"
elif outcome:
r = "success"
else:
r = "fail"
return spadlconfig.results.index(r)
def _get_type_id(args: tuple[str, bool, dict[int, Any]]) -> int: # noqa: C901
eventname, outcome, q = args
fairplay = 238 in q
if fairplay:
a = "non_action"
elif eventname in ("pass", "offside pass"):
cross = 2 in q
longball = 1 in q
chipped = 155 in q
freekick = 5 in q
corner = 6 in q
throw_in = 107 in q
goalkick = 124 in q
if throw_in:
a = "throw_in"
elif freekick and (cross or longball or chipped):
a = "freekick_crossed"
elif freekick:
a = "freekick_short"
elif corner and cross:
a = "corner_crossed"
elif corner:
a = "corner_short"
elif cross:
a = "cross"
elif goalkick:
a = "goalkick"
else:
a = "pass"
elif eventname == "take on":
a = "take_on"
elif eventname == "foul" and outcome is False:
a = "foul"
elif eventname == "tackle":
a = "tackle"
elif eventname in ("interception", "blocked pass"):
a = "interception"
elif eventname in ["miss", "post", "attempt saved", "goal"]:
if 9 in q:
a = "shot_penalty"
elif 26 in q:
a = "shot_freekick"
else:
a = "shot"
elif eventname == "save":
if 94 in q:
a = "non_action"
else:
a = "keeper_save"
elif eventname == "claim":
a = "keeper_claim"
elif eventname == "punch":
a = "keeper_punch"
elif eventname == "keeper pick-up":
a = "keeper_pick_up"
elif eventname == "clearance":
a = "clearance"
elif eventname == "ball touch" and outcome is False:
a = "bad_touch"
else:
a = "non_action"
return spadlconfig.actiontypes.index(a)
def _fix_owngoals(actions: pd.DataFrame) -> pd.DataFrame:
owngoals_idx = (actions.result_id == spadlconfig.results.index("owngoal")) & (
actions.type_id == spadlconfig.actiontypes.index("shot")
)
actions.loc[owngoals_idx, "end_x"] = (
spadlconfig.field_length - actions[owngoals_idx].end_x.values
)
actions.loc[owngoals_idx, "end_y"] = (
spadlconfig.field_width - actions[owngoals_idx].end_y.values
)
actions.loc[owngoals_idx, "type_id"] = spadlconfig.actiontypes.index("bad_touch")
return actions
def _fix_recoveries(df_actions: pd.DataFrame, opta_types: pd.Series) -> pd.DataFrame:
"""Convert ball recovery events to dribbles.
This function converts the Opta 'ball recovery' event (type_id 49) into
a dribble.
Parameters
----------
df_actions : pd.DataFrame
Opta actions dataframe
opta_types : pd.Series
Original Opta event types
Returns
-------
pd.DataFrame
Opta event dataframe without any ball recovery events
"""
df_actions_next = df_actions.shift(-1)
df_actions_next = df_actions_next.mask(
df_actions_next.type_id == spadlconfig.actiontypes.index("non_action")
).bfill()
selector_recovery = opta_types == "ball recovery"
same_x = abs(df_actions["end_x"] - df_actions_next["start_x"]) < min_dribble_length
same_y = abs(df_actions["end_y"] - df_actions_next["start_y"]) < min_dribble_length
same_loc = same_x & same_y
df_actions.loc[selector_recovery & ~same_loc, "type_id"] = spadlconfig.actiontypes.index(
"dribble"
)
df_actions.loc[selector_recovery & same_loc, "type_id"] = spadlconfig.actiontypes.index(
"non_action"
)
df_actions.loc[selector_recovery, ["end_x", "end_y"]] = df_actions_next.loc[
selector_recovery, ["start_x", "start_y"]
].values
return df_actions
def _fix_interceptions(df_actions: pd.DataFrame) -> pd.DataFrame:
"""Set the result of interceptions to 'fail' if they do not regain possession.
Parameters
----------
df_actions : pd.DataFrame
Opta actions dataframe.
Returns
-------
pd.DataFrame
Opta event dataframe without any ball recovery events
"""
mask_interception = df_actions.type_id == spadlconfig.actiontypes.index("interception")
same_team = df_actions.team_id == df_actions.shift(-1).team_id
df_actions.loc[mask_interception & ~same_team, "result_id"] = spadlconfig.results.index("fail")
return df_actions
def _fix_unintentional_ball_touches(
df_actions: pd.DataFrame, opta_type: pd.Series, opta_outcome: pd.Series
) -> pd.DataFrame:
"""Discard unintentional ball touches.
Passes that are deflected but still reach their target are registered as
successful passes. The (unintentional) deflection is not recored as an
action, because players should not be credited for it.
Parameters
----------
df_actions : pd.DataFrame
Opta actions dataframe
opta_type : pd.Series
Original Opta event types
opta_outcome : pd.Series
Original Opta event outcomes
Returns
-------
pd.DataFrame
Opta event dataframe without any unintentional ball touches.
"""
df_actions_next = df_actions.shift(-2)
selector_pass = df_actions["type_id"] == spadlconfig.actiontypes.index("pass")
selector_deflected = (opta_type.shift(-1) == "ball touch") & (opta_outcome.shift(-1))
selector_same_team = df_actions["team_id"] == df_actions_next["team_id"]
df_actions.loc[selector_deflected, ["end_x", "end_y"]] = df_actions_next.loc[
selector_deflected, ["start_x", "start_y"]
].values
df_actions.loc[selector_pass & selector_deflected & selector_same_team, "result_id"] = (
spadlconfig.results.index("success")
)
return df_actions
|