|
"""Opta event stream data to SPADL converter.""" |
|
|
|
from typing import Any, cast |
|
|
|
import pandas as pd |
|
from pandera.typing import DataFrame |
|
|
|
from . import config as spadlconfig |
|
from .base import ( |
|
_add_dribbles, |
|
_fix_clearances, |
|
_fix_direction_of_play, |
|
min_dribble_length, |
|
) |
|
from .schema import SPADLSchema |
|
|
|
|
|
def convert_to_actions(events: pd.DataFrame, home_team_id: int) -> DataFrame[SPADLSchema]: |
|
""" |
|
Convert Opta events to SPADL actions. |
|
|
|
Parameters |
|
---------- |
|
events : pd.DataFrame |
|
DataFrame containing Opta events from a single game. |
|
home_team_id : int |
|
ID of the home team in the corresponding game. |
|
|
|
Returns |
|
------- |
|
actions : pd.DataFrame |
|
DataFrame with corresponding SPADL actions. |
|
|
|
""" |
|
actions = pd.DataFrame() |
|
|
|
actions["game_id"] = events.game_id |
|
actions["original_event_id"] = events.event_id.astype(object) |
|
actions["period_id"] = events.period_id |
|
|
|
actions["time_seconds"] = ( |
|
60 * events.minute |
|
+ events.second |
|
- ((events.period_id > 1) * 45 * 60) |
|
- ((events.period_id > 2) * 45 * 60) |
|
- ((events.period_id > 3) * 15 * 60) |
|
- ((events.period_id > 4) * 15 * 60) |
|
) |
|
actions["team_id"] = events.team_id |
|
actions["player_id"] = events.player_id |
|
|
|
for col in ["start_x", "end_x"]: |
|
actions[col] = events[col].clip(0, 100) / 100 * spadlconfig.field_length |
|
for col in ["start_y", "end_y"]: |
|
actions[col] = events[col].clip(0, 100) / 100 * spadlconfig.field_width |
|
|
|
actions["type_id"] = events[["type_name", "outcome", "qualifiers"]].apply(_get_type_id, axis=1) |
|
actions["result_id"] = events[["type_name", "outcome", "qualifiers"]].apply( |
|
_get_result_id, axis=1 |
|
) |
|
actions["bodypart_id"] = events[["type_name", "outcome", "qualifiers"]].apply( |
|
_get_bodypart_id, axis=1 |
|
) |
|
|
|
actions = _fix_recoveries(actions, events.type_name) |
|
actions = _fix_unintentional_ball_touches(actions, events.type_name, events.outcome) |
|
actions = ( |
|
actions[actions.type_id != spadlconfig.actiontypes.index("non_action")] |
|
.sort_values(["game_id", "period_id", "time_seconds"], kind="mergesort") |
|
.reset_index(drop=True) |
|
) |
|
actions = _fix_owngoals(actions) |
|
actions = _fix_direction_of_play(actions, home_team_id) |
|
actions = _fix_clearances(actions) |
|
actions = _fix_interceptions(actions) |
|
actions["action_id"] = range(len(actions)) |
|
actions = _add_dribbles(actions) |
|
|
|
return cast(DataFrame[SPADLSchema], actions) |
|
|
|
|
|
def _get_bodypart_id(args: tuple[str, bool, dict[int, Any]]) -> int: |
|
e, outcome, q = args |
|
if 15 in q or 3 in q or 168 in q: |
|
b = "head" |
|
elif 21 in q: |
|
b = "other" |
|
elif 20 in q: |
|
b = "foot_right" |
|
elif 72 in q: |
|
b = "foot_left" |
|
elif 107 in q: |
|
b = "other" |
|
else: |
|
if e in ["save", "claim", "punch", "keeper pick-up"]: |
|
b = "other" |
|
else: |
|
b = "foot" |
|
return spadlconfig.bodyparts.index(b) |
|
|
|
|
|
def _get_result_id(args: tuple[str, bool, dict[int, Any]]) -> int: |
|
e, outcome, q = args |
|
if e == "offside pass": |
|
r = "offside" |
|
elif e == "foul": |
|
r = "fail" |
|
elif e in ["attempt saved", "miss", "post"]: |
|
r = "fail" |
|
elif e == "goal": |
|
if 28 in q: |
|
r = "owngoal" |
|
else: |
|
r = "success" |
|
elif e == "ball touch": |
|
r = "fail" |
|
elif outcome: |
|
r = "success" |
|
else: |
|
r = "fail" |
|
return spadlconfig.results.index(r) |
|
|
|
|
|
def _get_type_id(args: tuple[str, bool, dict[int, Any]]) -> int: |
|
eventname, outcome, q = args |
|
fairplay = 238 in q |
|
if fairplay: |
|
a = "non_action" |
|
elif eventname in ("pass", "offside pass"): |
|
cross = 2 in q |
|
longball = 1 in q |
|
chipped = 155 in q |
|
freekick = 5 in q |
|
corner = 6 in q |
|
throw_in = 107 in q |
|
goalkick = 124 in q |
|
if throw_in: |
|
a = "throw_in" |
|
elif freekick and (cross or longball or chipped): |
|
a = "freekick_crossed" |
|
elif freekick: |
|
a = "freekick_short" |
|
elif corner and cross: |
|
a = "corner_crossed" |
|
elif corner: |
|
a = "corner_short" |
|
elif cross: |
|
a = "cross" |
|
elif goalkick: |
|
a = "goalkick" |
|
else: |
|
a = "pass" |
|
elif eventname == "take on": |
|
a = "take_on" |
|
elif eventname == "foul" and outcome is False: |
|
a = "foul" |
|
elif eventname == "tackle": |
|
a = "tackle" |
|
elif eventname in ("interception", "blocked pass"): |
|
a = "interception" |
|
elif eventname in ["miss", "post", "attempt saved", "goal"]: |
|
if 9 in q: |
|
a = "shot_penalty" |
|
elif 26 in q: |
|
a = "shot_freekick" |
|
else: |
|
a = "shot" |
|
elif eventname == "save": |
|
if 94 in q: |
|
a = "non_action" |
|
else: |
|
a = "keeper_save" |
|
elif eventname == "claim": |
|
a = "keeper_claim" |
|
elif eventname == "punch": |
|
a = "keeper_punch" |
|
elif eventname == "keeper pick-up": |
|
a = "keeper_pick_up" |
|
elif eventname == "clearance": |
|
a = "clearance" |
|
elif eventname == "ball touch" and outcome is False: |
|
a = "bad_touch" |
|
else: |
|
a = "non_action" |
|
return spadlconfig.actiontypes.index(a) |
|
|
|
|
|
def _fix_owngoals(actions: pd.DataFrame) -> pd.DataFrame: |
|
owngoals_idx = (actions.result_id == spadlconfig.results.index("owngoal")) & ( |
|
actions.type_id == spadlconfig.actiontypes.index("shot") |
|
) |
|
actions.loc[owngoals_idx, "end_x"] = ( |
|
spadlconfig.field_length - actions[owngoals_idx].end_x.values |
|
) |
|
actions.loc[owngoals_idx, "end_y"] = ( |
|
spadlconfig.field_width - actions[owngoals_idx].end_y.values |
|
) |
|
actions.loc[owngoals_idx, "type_id"] = spadlconfig.actiontypes.index("bad_touch") |
|
return actions |
|
|
|
|
|
def _fix_recoveries(df_actions: pd.DataFrame, opta_types: pd.Series) -> pd.DataFrame: |
|
"""Convert ball recovery events to dribbles. |
|
|
|
This function converts the Opta 'ball recovery' event (type_id 49) into |
|
a dribble. |
|
|
|
Parameters |
|
---------- |
|
df_actions : pd.DataFrame |
|
Opta actions dataframe |
|
opta_types : pd.Series |
|
Original Opta event types |
|
|
|
Returns |
|
------- |
|
pd.DataFrame |
|
Opta event dataframe without any ball recovery events |
|
""" |
|
df_actions_next = df_actions.shift(-1) |
|
df_actions_next = df_actions_next.mask( |
|
df_actions_next.type_id == spadlconfig.actiontypes.index("non_action") |
|
).bfill() |
|
|
|
selector_recovery = opta_types == "ball recovery" |
|
|
|
same_x = abs(df_actions["end_x"] - df_actions_next["start_x"]) < min_dribble_length |
|
same_y = abs(df_actions["end_y"] - df_actions_next["start_y"]) < min_dribble_length |
|
same_loc = same_x & same_y |
|
|
|
df_actions.loc[selector_recovery & ~same_loc, "type_id"] = spadlconfig.actiontypes.index( |
|
"dribble" |
|
) |
|
df_actions.loc[selector_recovery & same_loc, "type_id"] = spadlconfig.actiontypes.index( |
|
"non_action" |
|
) |
|
df_actions.loc[selector_recovery, ["end_x", "end_y"]] = df_actions_next.loc[ |
|
selector_recovery, ["start_x", "start_y"] |
|
].values |
|
|
|
return df_actions |
|
|
|
|
|
def _fix_interceptions(df_actions: pd.DataFrame) -> pd.DataFrame: |
|
"""Set the result of interceptions to 'fail' if they do not regain possession. |
|
|
|
Parameters |
|
---------- |
|
df_actions : pd.DataFrame |
|
Opta actions dataframe. |
|
|
|
Returns |
|
------- |
|
pd.DataFrame |
|
Opta event dataframe without any ball recovery events |
|
""" |
|
mask_interception = df_actions.type_id == spadlconfig.actiontypes.index("interception") |
|
same_team = df_actions.team_id == df_actions.shift(-1).team_id |
|
df_actions.loc[mask_interception & ~same_team, "result_id"] = spadlconfig.results.index("fail") |
|
return df_actions |
|
|
|
|
|
def _fix_unintentional_ball_touches( |
|
df_actions: pd.DataFrame, opta_type: pd.Series, opta_outcome: pd.Series |
|
) -> pd.DataFrame: |
|
"""Discard unintentional ball touches. |
|
|
|
Passes that are deflected but still reach their target are registered as |
|
successful passes. The (unintentional) deflection is not recored as an |
|
action, because players should not be credited for it. |
|
|
|
Parameters |
|
---------- |
|
df_actions : pd.DataFrame |
|
Opta actions dataframe |
|
opta_type : pd.Series |
|
Original Opta event types |
|
opta_outcome : pd.Series |
|
Original Opta event outcomes |
|
|
|
Returns |
|
------- |
|
pd.DataFrame |
|
Opta event dataframe without any unintentional ball touches. |
|
""" |
|
df_actions_next = df_actions.shift(-2) |
|
selector_pass = df_actions["type_id"] == spadlconfig.actiontypes.index("pass") |
|
selector_deflected = (opta_type.shift(-1) == "ball touch") & (opta_outcome.shift(-1)) |
|
selector_same_team = df_actions["team_id"] == df_actions_next["team_id"] |
|
df_actions.loc[selector_deflected, ["end_x", "end_y"]] = df_actions_next.loc[ |
|
selector_deflected, ["start_x", "start_y"] |
|
].values |
|
df_actions.loc[selector_pass & selector_deflected & selector_same_team, "result_id"] = ( |
|
spadlconfig.results.index("success") |
|
) |
|
return df_actions |
|
|