|
"""StatsBomb event stream data to SPADL converter.""" |
|
|
|
import warnings |
|
from typing import Any, Optional, cast |
|
|
|
import numpy as np |
|
import numpy.typing as npt |
|
import pandas as pd |
|
from pandera.typing import DataFrame |
|
|
|
from . import config as spadlconfig |
|
from .base import _add_dribbles, _fix_clearances, _fix_direction_of_play |
|
from .schema import SPADLSchema |
|
|
|
|
|
def convert_to_actions( |
|
events: pd.DataFrame, |
|
home_team_id: int, |
|
xy_fidelity_version: Optional[int] = None, |
|
shot_fidelity_version: Optional[int] = None, |
|
) -> DataFrame[SPADLSchema]: |
|
""" |
|
Convert StatsBomb events to SPADL actions. |
|
|
|
Parameters |
|
---------- |
|
events : pd.DataFrame |
|
DataFrame containing StatsBomb events from a single game. |
|
home_team_id : int |
|
ID of the home team in the corresponding game. |
|
xy_fidelity_version : int, optional |
|
Whether low or high fidelity coordinates are used in the event data. |
|
If not specified, the fidelity version is inferred from the data. |
|
shot_fidelity_version : int, optional |
|
Whether low or high fidelity coordinates are used in the event data |
|
for shots. If not specified, the fidelity version is inferred from the |
|
data. |
|
|
|
Returns |
|
------- |
|
actions : pd.DataFrame |
|
DataFrame with corresponding SPADL actions. |
|
|
|
""" |
|
actions = pd.DataFrame() |
|
|
|
|
|
infered_xy_fidelity_version, infered_shot_fidelity_version = _infer_xy_fidelity_versions( |
|
events |
|
) |
|
if xy_fidelity_version is None: |
|
xy_fidelity_version = infered_xy_fidelity_version |
|
warnings.warn( |
|
f"Inferred xy_fidelity_version={infered_xy_fidelity_version}." |
|
+ " If this is incorrect, please specify the correct version" |
|
+ " using the xy_fidelity_version argument" |
|
) |
|
else: |
|
assert xy_fidelity_version in [1, 2], "xy_fidelity_version must be 1 or 2" |
|
if shot_fidelity_version is None: |
|
if xy_fidelity_version == 2: |
|
shot_fidelity_version = 2 |
|
else: |
|
shot_fidelity_version = infered_shot_fidelity_version |
|
warnings.warn( |
|
f"Inferred shot_fidelity_version={infered_shot_fidelity_version}." |
|
+ " If this is incorrect, please specify the correct version" |
|
+ " using the shot_fidelity_version argument" |
|
) |
|
else: |
|
assert shot_fidelity_version in [1, 2], "shot_fidelity_version must be 1 or 2" |
|
|
|
events = events.copy() |
|
events = _insert_interception_passes(events) |
|
events["extra"].fillna({}, inplace=True) |
|
|
|
actions["game_id"] = events.game_id |
|
actions["original_event_id"] = events.event_id |
|
actions["period_id"] = events.period_id |
|
actions["time_seconds"] = pd.to_timedelta(events.timestamp).dt.total_seconds() |
|
actions["team_id"] = events.team_id |
|
actions["player_id"] = events.player_id |
|
|
|
|
|
end_location = events[["location", "extra"]].apply(_get_end_location, axis=1) |
|
|
|
actions.loc[events.type_name == "Shot", ["start_x", "start_y"]] = _convert_locations( |
|
events.loc[events.type_name == "Shot", "location"], |
|
shot_fidelity_version, |
|
) |
|
actions.loc[events.type_name != "Shot", ["start_x", "start_y"]] = _convert_locations( |
|
events.loc[events.type_name != "Shot", "location"], |
|
shot_fidelity_version, |
|
) |
|
actions.loc[events.type_name == "Shot", ["end_x", "end_y"]] = _convert_locations( |
|
end_location.loc[events.type_name == "Shot"], |
|
shot_fidelity_version, |
|
) |
|
actions.loc[events.type_name != "Shot", ["end_x", "end_y"]] = _convert_locations( |
|
end_location.loc[events.type_name != "Shot"], |
|
shot_fidelity_version, |
|
) |
|
|
|
actions[["type_id", "result_id", "bodypart_id"]] = events[["type_name", "extra"]].apply( |
|
_parse_event, axis=1, result_type="expand" |
|
) |
|
|
|
actions = ( |
|
actions[actions.type_id != spadlconfig.actiontypes.index("non_action")] |
|
.sort_values(["game_id", "period_id", "time_seconds"], kind="mergesort") |
|
.reset_index(drop=True) |
|
) |
|
actions = _fix_direction_of_play(actions, home_team_id) |
|
actions = _fix_clearances(actions) |
|
|
|
actions["action_id"] = range(len(actions)) |
|
actions = _add_dribbles(actions) |
|
|
|
return cast(DataFrame[SPADLSchema], actions) |
|
|
|
|
|
Location = tuple[float, float] |
|
|
|
|
|
def _insert_interception_passes(df_events: pd.DataFrame) -> pd.DataFrame: |
|
"""Insert interception actions before passes. |
|
|
|
This function converts passes that are also interceptions (type 64) in the |
|
StatsBomb event data into two separate events, first an interception and |
|
then a pass. |
|
|
|
Parameters |
|
---------- |
|
df_events : pd.DataFrame |
|
StatsBomb event dataframe |
|
|
|
Returns |
|
------- |
|
pd.DataFrame |
|
StatsBomb event dataframe in which passes that were also denoted as |
|
interceptions in the StatsBomb notation are transformed into two events. |
|
""" |
|
|
|
def is_interception_pass(x: dict) -> bool: |
|
return x.get("extra", {}).get("pass", {}).get("type", {}).get("name") == "Interception" |
|
|
|
df_events_interceptions = df_events[df_events.apply(is_interception_pass, axis=1)].copy() |
|
|
|
if not df_events_interceptions.empty: |
|
df_events_interceptions["type_name"] = "Interception" |
|
df_events_interceptions["extra"] = [ |
|
{"interception": {"outcome": {"id": 16, "name": "Success In Play"}}} |
|
] * len(df_events_interceptions) |
|
|
|
df_events = pd.concat([df_events_interceptions, df_events], ignore_index=True) |
|
df_events = df_events.sort_values(["timestamp"], kind="mergesort") |
|
df_events = df_events.reset_index(drop=True) |
|
|
|
return df_events |
|
|
|
|
|
def _infer_xy_fidelity_versions(events: pd.DataFrame) -> tuple[int, int]: |
|
"""Find out if x and y are integers disguised as floats.""" |
|
mask_shot = events.type_name == "Shot" |
|
mask_other = events.type_name != "Shot" |
|
locations = events.location.apply(pd.Series) |
|
mask_valid_location = locations.notna().any(axis=1) |
|
high_fidelity_shots = (locations.loc[mask_valid_location & mask_shot] % 1 != 0).any(axis=None) |
|
high_fidelity_other = (locations.loc[mask_valid_location & mask_other] % 1 != 0).any(axis=None) |
|
xy_fidelity_version = 2 if high_fidelity_other else 1 |
|
shot_fidelity_version = 2 if high_fidelity_shots else xy_fidelity_version |
|
return shot_fidelity_version, xy_fidelity_version |
|
|
|
|
|
def _convert_locations(locations: pd.Series, fidelity_version: int) -> npt.NDArray[np.float32]: |
|
"""Convert StatsBomb locations to spadl coordinates. |
|
|
|
StatsBomb coordinates are cell-based, using a 120x80 grid, so 1,1 is the |
|
top-left square 'yard' of the field (in landscape), even though 0,0 is the |
|
true coordinate of the corner flag. |
|
|
|
Some matches have metadata like "xy_fidelity_version" : "2", which means |
|
the grid has higher granularity. In this case 0.1,0.1 is the top left |
|
cell. |
|
""" |
|
|
|
|
|
|
|
|
|
|
|
|
|
cell_side = 0.1 if fidelity_version == 2 else 1.0 |
|
cell_relative_center = cell_side / 2 |
|
coordinates = np.empty((len(locations), 2), dtype=float) |
|
for i, loc in enumerate(locations): |
|
if isinstance(loc, list) and len(loc) == 2: |
|
coordinates[i, 0] = (loc[0] - cell_relative_center) / 120 * spadlconfig.field_length |
|
coordinates[i, 1] = ( |
|
spadlconfig.field_width |
|
- (loc[1] - cell_relative_center) / 80 * spadlconfig.field_width |
|
) |
|
elif isinstance(loc, list) and len(loc) == 3: |
|
|
|
|
|
|
|
coordinates[i, 0] = (loc[0] - cell_relative_center) / 120 * spadlconfig.field_length |
|
coordinates[i, 1] = ( |
|
spadlconfig.field_width - (loc[1] - 0.05) / 80 * spadlconfig.field_width |
|
) |
|
coordinates[:, 0] = np.clip(coordinates[:, 0], 0, spadlconfig.field_length) |
|
coordinates[:, 1] = np.clip(coordinates[:, 1], 0, spadlconfig.field_width) |
|
return coordinates |
|
|
|
|
|
def _get_end_location(q: tuple[Location, dict[str, Any]]) -> Location: |
|
start_location, extra = q |
|
for event in ["pass", "shot", "carry"]: |
|
if event in extra and "end_location" in extra[event]: |
|
return extra[event]["end_location"] |
|
return start_location |
|
|
|
|
|
def _parse_event(q: tuple[str, dict[str, Any]]) -> tuple[int, int, int]: |
|
t, x = q |
|
events = { |
|
"Pass": _parse_pass_event, |
|
"Dribble": _parse_dribble_event, |
|
"Carry": _parse_carry_event, |
|
"Foul Committed": _parse_foul_event, |
|
"Duel": _parse_duel_event, |
|
"Interception": _parse_interception_event, |
|
"Shot": _parse_shot_event, |
|
"Own Goal Against": _parse_own_goal_event, |
|
"Goal Keeper": _parse_goalkeeper_event, |
|
"Clearance": _parse_clearance_event, |
|
"Miscontrol": _parse_miscontrol_event, |
|
} |
|
parser = events.get(t, _parse_event_as_non_action) |
|
a, r, b = parser(x) |
|
actiontype = spadlconfig.actiontypes.index(a) |
|
result = spadlconfig.results.index(r) |
|
bodypart = spadlconfig.bodyparts.index(b) |
|
return actiontype, result, bodypart |
|
|
|
|
|
def _parse_event_as_non_action(_extra: dict[str, Any]) -> tuple[str, str, str]: |
|
a = "non_action" |
|
r = "success" |
|
b = "foot" |
|
return a, r, b |
|
|
|
|
|
def _parse_pass_event(extra: dict[str, Any]) -> tuple[str, str, str]: |
|
a = "pass" |
|
b = "foot" |
|
p = extra.get("pass", {}) |
|
ptype = p.get("type", {}).get("name") |
|
height = p.get("height", {}).get("name") |
|
cross = p.get("cross") |
|
if ptype == "Free Kick": |
|
if height == "High Pass" or cross: |
|
a = "freekick_crossed" |
|
else: |
|
a = "freekick_short" |
|
elif ptype == "Corner": |
|
if height == "High Pass" or cross: |
|
a = "corner_crossed" |
|
else: |
|
a = "corner_short" |
|
elif ptype == "Goal Kick": |
|
a = "goalkick" |
|
elif ptype == "Throw-in": |
|
a = "throw_in" |
|
b = "other" |
|
elif cross: |
|
a = "cross" |
|
else: |
|
a = "pass" |
|
|
|
pass_outcome = extra.get("pass", {}).get("outcome", {}).get("name") |
|
if pass_outcome in ["Incomplete", "Out"]: |
|
r = "fail" |
|
elif pass_outcome == "Pass Offside": |
|
r = "offside" |
|
elif pass_outcome in ["Injury Clearance", "Unknown"]: |
|
|
|
a = "non_action" |
|
r = "success" |
|
else: |
|
r = "success" |
|
|
|
bp = extra.get("pass", {}).get("body_part", {}).get("name") |
|
if bp is not None: |
|
if "Head" in bp: |
|
b = "head" |
|
elif bp == "Left Foot": |
|
b = "foot_left" |
|
elif bp == "Right Foot": |
|
b = "foot_right" |
|
elif "Foot" in bp or bp == "Drop Kick": |
|
b = "foot" |
|
else: |
|
b = "other" |
|
|
|
return a, r, b |
|
|
|
|
|
def _parse_dribble_event(extra: dict[str, Any]) -> tuple[str, str, str]: |
|
a = "take_on" |
|
|
|
dribble_outcome = extra.get("dribble", {}).get("outcome", {}).get("name") |
|
if dribble_outcome == "Incomplete": |
|
r = "fail" |
|
elif dribble_outcome == "Complete": |
|
r = "success" |
|
else: |
|
r = "success" |
|
|
|
b = "foot" |
|
|
|
return a, r, b |
|
|
|
|
|
def _parse_carry_event(_extra: dict[str, Any]) -> tuple[str, str, str]: |
|
a = "dribble" |
|
r = "success" |
|
b = "foot" |
|
return a, r, b |
|
|
|
|
|
def _parse_foul_event(extra: dict[str, Any]) -> tuple[str, str, str]: |
|
a = "foul" |
|
|
|
foul_card = extra.get("foul_committed", {}).get("card", {}).get("name", "") |
|
if "Yellow" in foul_card: |
|
r = "yellow_card" |
|
elif "Red" in foul_card: |
|
r = "red_card" |
|
else: |
|
r = "fail" |
|
|
|
b = "foot" |
|
|
|
return a, r, b |
|
|
|
|
|
def _parse_duel_event(extra: dict[str, Any]) -> tuple[str, str, str]: |
|
if extra.get("duel", {}).get("type", {}).get("name") == "Tackle": |
|
a = "tackle" |
|
duel_outcome = extra.get("duel", {}).get("outcome", {}).get("name") |
|
if duel_outcome in ["Lost In Play", "Lost Out"]: |
|
r = "fail" |
|
elif duel_outcome in ["Success in Play", "Won"]: |
|
r = "success" |
|
else: |
|
r = "success" |
|
|
|
b = "foot" |
|
return a, r, b |
|
return _parse_event_as_non_action(extra) |
|
|
|
|
|
def _parse_interception_event(extra: dict[str, Any]) -> tuple[str, str, str]: |
|
a = "interception" |
|
interception_outcome = extra.get("interception", {}).get("outcome", {}).get("name") |
|
if interception_outcome in ["Lost In Play", "Lost Out"]: |
|
r = "fail" |
|
elif interception_outcome == "Won": |
|
r = "success" |
|
else: |
|
r = "success" |
|
b = "foot" |
|
return a, r, b |
|
|
|
|
|
def _parse_shot_event(extra: dict[str, Any]) -> tuple[str, str, str]: |
|
extra_type = extra.get("shot", {}).get("type", {}).get("name") |
|
if extra_type == "Free Kick": |
|
a = "shot_freekick" |
|
elif extra_type == "Penalty": |
|
a = "shot_penalty" |
|
else: |
|
a = "shot" |
|
|
|
shot_outcome = extra.get("shot", {}).get("outcome", {}).get("name") |
|
if shot_outcome == "Goal": |
|
r = "success" |
|
elif shot_outcome in ["Blocked", "Off T", "Post", "Saved", "Wayward"]: |
|
r = "fail" |
|
else: |
|
r = "fail" |
|
|
|
bp = extra.get("shot", {}).get("body_part", {}).get("name") |
|
if bp is None: |
|
b = "foot" |
|
elif "Head" in bp: |
|
b = "head" |
|
elif bp == "Left Foot": |
|
b = "foot_left" |
|
elif bp == "Right Foot": |
|
b = "foot_right" |
|
elif "Foot" in bp: |
|
b = "foot" |
|
else: |
|
b = "other" |
|
|
|
return a, r, b |
|
|
|
|
|
def _parse_own_goal_event(_extra: dict[str, Any]) -> tuple[str, str, str]: |
|
a = "bad_touch" |
|
r = "owngoal" |
|
b = "foot" |
|
return a, r, b |
|
|
|
|
|
def _parse_goalkeeper_event(extra: dict[str, Any]) -> tuple[str, str, str]: |
|
extra_type = extra.get("goalkeeper", {}).get("type", {}).get("name") |
|
if extra_type == "Shot Saved": |
|
a = "keeper_save" |
|
elif extra_type in ("Collected", "Keeper Sweeper"): |
|
a = "keeper_claim" |
|
elif extra_type == "Punch": |
|
a = "keeper_punch" |
|
else: |
|
a = "non_action" |
|
|
|
goalkeeper_outcome = extra.get("goalkeeper", {}).get("outcome", {}).get("name", "x") |
|
if goalkeeper_outcome in [ |
|
"Claim", |
|
"Clear", |
|
"Collected Twice", |
|
"In Play Safe", |
|
"Success", |
|
"Touched Out", |
|
]: |
|
r = "success" |
|
elif goalkeeper_outcome in ["In Play Danger", "No Touch"]: |
|
r = "fail" |
|
else: |
|
r = "success" |
|
|
|
bp = extra.get("goalkeeper", {}).get("body_part", {}).get("name") |
|
if bp is None: |
|
b = "other" |
|
elif "Head" in bp: |
|
b = "head" |
|
elif bp == "Left Foot": |
|
b = "foot_left" |
|
elif bp == "Right Foot": |
|
b = "foot_right" |
|
elif "Foot" in bp or bp == "Drop Kick": |
|
b = "foot" |
|
else: |
|
b = "other" |
|
|
|
return a, r, b |
|
|
|
|
|
def _parse_clearance_event(extra: dict[str, Any]) -> tuple[str, str, str]: |
|
a = "clearance" |
|
r = "success" |
|
bp = extra.get("clearance", {}).get("body_part", {}).get("name") |
|
if bp is None: |
|
b = "foot" |
|
elif "Head" in bp: |
|
b = "head" |
|
elif bp == "Left Foot": |
|
b = "foot_left" |
|
elif bp == "Right Foot": |
|
b = "foot_right" |
|
elif "Foot" in bp: |
|
b = "foot" |
|
else: |
|
b = "other" |
|
return a, r, b |
|
|
|
|
|
def _parse_miscontrol_event(_extra: dict[str, Any]) -> tuple[str, str, str]: |
|
a = "bad_touch" |
|
r = "fail" |
|
b = "foot" |
|
return a, r, b |
|
|