"""Wyscout event stream data to SPADL converter.""" from typing import Any, Optional, cast import pandas as pd # type: ignore from pandera.typing import DataFrame from . import config as spadlconfig from .base import ( _add_dribbles, _fix_clearances, _fix_direction_of_play, min_dribble_length, ) from .schema import SPADLSchema ################################### # WARNING: HERE BE DRAGONS # This code for converting wyscout data was organically grown over a long period of time. # It works for now, but needs to be cleaned up in the future. # Enter at your own risk. ################################### def convert_to_actions(events: pd.DataFrame, home_team_id: int) -> DataFrame[SPADLSchema]: """ Convert Wyscout events to SPADL actions. Parameters ---------- events : pd.DataFrame DataFrame containing Wyscout events from a single game. home_team_id : int ID of the home team in the corresponding game. Returns ------- actions : pd.DataFrame DataFrame with corresponding SPADL actions. """ events = pd.concat([events, get_tagsdf(events)], axis=1) events = make_new_positions(events) events = fix_wyscout_events(events) actions = create_df_actions(events) actions = fix_actions(actions) actions = _fix_direction_of_play(actions, home_team_id) actions = _fix_clearances(actions) actions["action_id"] = range(len(actions)) actions = _add_dribbles(actions) return cast(DataFrame[SPADLSchema], actions) def _get_tag_set(tags: list[dict[str, Any]]) -> set[int]: return {tag["id"] for tag in tags} def get_tagsdf(events: pd.DataFrame) -> pd.DataFrame: """Represent Wyscout tags as a boolean dataframe. Parameters ---------- events : pd.DataFrame Wyscout event dataframe Returns ------- pd.DataFrame A dataframe with a column for each tag. """ tags = events.tags.apply(_get_tag_set) tagsdf = pd.DataFrame() for tag_id, column in wyscout_tags: tagsdf[column] = tags.apply(lambda x, tag=tag_id: tag in x) return tagsdf wyscout_tags = [ (101, "goal"), (102, "own_goal"), (301, "assist"), (302, "key_pass"), (1901, "counter_attack"), (401, "left_foot"), (402, "right_foot"), (403, "head/body"), (1101, "direct"), (1102, "indirect"), (2001, "dangerous_ball_lost"), (2101, "blocked"), (801, "high"), (802, "low"), (1401, "interception"), (1501, "clearance"), (201, "opportunity"), (1301, "feint"), (1302, "missed_ball"), (501, "free_space_right"), (502, "free_space_left"), (503, "take_on_left"), (504, "take_on_right"), (1601, "sliding_tackle"), (601, "anticipated"), (602, "anticipation"), (1701, "red_card"), (1702, "yellow_card"), (1703, "second_yellow_card"), (1201, "position_goal_low_center"), (1202, "position_goal_low_right"), (1203, "position_goal_mid_center"), (1204, "position_goal_mid_left"), (1205, "position_goal_low_left"), (1206, "position_goal_mid_right"), (1207, "position_goal_high_center"), (1208, "position_goal_high_left"), (1209, "position_goal_high_right"), (1210, "position_out_low_right"), (1211, "position_out_mid_left"), (1212, "position_out_low_left"), (1213, "position_out_mid_right"), (1214, "position_out_high_center"), (1215, "position_out_high_left"), (1216, "position_out_high_right"), (1217, "position_post_low_right"), (1218, "position_post_mid_left"), (1219, "position_post_low_left"), (1220, "position_post_mid_right"), (1221, "position_post_high_center"), (1222, "position_post_high_left"), (1223, "position_post_high_right"), (901, "through"), (1001, "fairplay"), (701, "lost"), (702, "neutral"), (703, "won"), (1801, "accurate"), (1802, "not_accurate"), ] def _make_position_vars(event_id: int, positions: list[dict[str, Optional[float]]]) -> pd.Series: if len(positions) == 2: # if less than 2 then action is removed start_x = positions[0]["x"] start_y = positions[0]["y"] end_x = positions[1]["x"] end_y = positions[1]["y"] elif len(positions) == 1: start_x = positions[0]["x"] start_y = positions[0]["y"] end_x = start_x end_y = start_y else: start_x = None start_y = None end_x = None end_y = None return pd.Series([event_id, start_x, start_y, end_x, end_y]) def make_new_positions(events: pd.DataFrame) -> pd.DataFrame: """Extract the start and end coordinates for each action. Parameters ---------- events : pd.DataFrame Wyscout event dataframe Returns ------- pd.DataFrame Wyscout event dataframe with start and end coordinates for each action. """ new_positions = events[["event_id", "positions"]].apply( lambda row: _make_position_vars(row["event_id"], row["positions"]), axis=1 ) new_positions.columns = ["event_id", "start_x", "start_y", "end_x", "end_y"] events = pd.merge(events, new_positions, left_on="event_id", right_on="event_id") events[["start_x", "end_x"]] = events[["start_x", "end_x"]].astype(float) events[["start_y", "end_y"]] = events[["start_y", "end_y"]].astype(float) events = events.drop("positions", axis=1) return events def fix_wyscout_events(df_events: pd.DataFrame) -> pd.DataFrame: """Perform some fixes on the Wyscout events such that the spadl action dataframe can be built. Parameters ---------- df_events : pd.DataFrame Wyscout event dataframe Returns ------- pd.DataFrame Wyscout event dataframe with an extra column 'offside' """ df_events = create_shot_coordinates(df_events) df_events = convert_duels(df_events) df_events = insert_interceptions(df_events) df_events = add_offside_variable(df_events) df_events = convert_touches(df_events) df_events = convert_simulations(df_events) return df_events def create_shot_coordinates(df_events: pd.DataFrame) -> pd.DataFrame: """Create shot coordinates (estimates) from the Wyscout tags. Parameters ---------- df_events : pd.DataFrame Wyscout event dataframe Returns ------- pd.DataFrame Wyscout event dataframe with end coordinates for shots """ shot = df_events.subtype_id.isin([33, 100]) pas = df_events.type_id == 8 goal_center_idx = ( df_events["position_goal_low_center"] | df_events["position_goal_mid_center"] | df_events["position_goal_high_center"] ) df_events.loc[shot & goal_center_idx, "end_x"] = 100.0 df_events.loc[shot & goal_center_idx, "end_y"] = 50.0 goal_right_idx = ( df_events["position_goal_low_right"] | df_events["position_goal_mid_right"] | df_events["position_goal_high_right"] ) df_events.loc[shot & goal_right_idx, "end_x"] = 100.0 df_events.loc[shot & goal_right_idx, "end_y"] = 55.0 goal_left_idx = ( df_events["position_goal_mid_left"] | df_events["position_goal_low_left"] | df_events["position_goal_high_left"] ) df_events.loc[shot & goal_left_idx, "end_x"] = 100.0 df_events.loc[shot & goal_left_idx, "end_y"] = 45.0 out_center_idx = df_events["position_out_high_center"] | df_events["position_post_high_center"] df_events.loc[shot & out_center_idx, "end_x"] = 100.0 df_events.loc[shot & out_center_idx, "end_y"] = 50.0 out_right_idx = ( df_events["position_out_low_right"] | df_events["position_out_mid_right"] | df_events["position_out_high_right"] ) df_events.loc[shot & out_right_idx, "end_x"] = 100.0 df_events.loc[shot & out_right_idx, "end_y"] = 60.0 out_left_idx = ( df_events["position_out_mid_left"] | df_events["position_out_low_left"] | df_events["position_out_high_left"] ) df_events.loc[shot & out_left_idx, "end_x"] = 100.0 df_events.loc[shot & out_left_idx, "end_y"] = 40.0 post_left_idx = ( df_events["position_post_mid_left"] | df_events["position_post_low_left"] | df_events["position_post_high_left"] ) df_events.loc[shot & post_left_idx, "end_x"] = 100.0 df_events.loc[shot & post_left_idx, "end_y"] = 55.38 post_right_idx = ( df_events["position_post_low_right"] | df_events["position_post_mid_right"] | df_events["position_post_high_right"] ) df_events.loc[shot & post_right_idx, "end_x"] = 100.0 df_events.loc[shot & post_right_idx, "end_y"] = 44.62 blocked_idx = df_events["blocked"] df_events.loc[(shot | pas) & blocked_idx, "end_x"] = df_events.loc[blocked_idx, "start_x"] df_events.loc[(shot | pas) & blocked_idx, "end_y"] = df_events.loc[blocked_idx, "start_y"] return df_events def convert_duels(df_events: pd.DataFrame) -> pd.DataFrame: """Convert duel events. This function converts Wyscout duels that end with the ball out of field (subtype_id 50) into a pass for the player winning the duel to the location of where the ball went out of field. The remaining duels are removed as they are not on-the-ball actions. Parameters ---------- df_events : pd.DataFrame Wyscout event dataframe Returns ------- pd.DataFrame Wyscout event dataframe in which the duels are either removed or transformed into a pass """ # Shift events dataframe by one and two time steps df_events1 = df_events.shift(-1) df_events2 = df_events.shift(-2) # Define selector for same period id selector_same_period = df_events["period_id"] == df_events2["period_id"] # Define selector for duels that are followed by an 'out of field' event selector_duel_out_of_field = ( (df_events["type_id"] == 1) & (df_events1["type_id"] == 1) & (df_events2["subtype_id"] == 50) & selector_same_period ) # Define selectors for current time step selector0_duel_won = selector_duel_out_of_field & ( df_events["team_id"] != df_events2["team_id"] ) selector0_duel_won_air = selector0_duel_won & (df_events["subtype_id"] == 10) selector0_duel_won_not_air = selector0_duel_won & (df_events["subtype_id"] != 10) # Define selectors for next time step selector1_duel_won = selector_duel_out_of_field & ( df_events1["team_id"] != df_events2["team_id"] ) selector1_duel_won_air = selector1_duel_won & (df_events1["subtype_id"] == 10) selector1_duel_won_not_air = selector1_duel_won & (df_events1["subtype_id"] != 10) # Aggregate selectors selector_duel_won = selector0_duel_won | selector1_duel_won selector_duel_won_air = selector0_duel_won_air | selector1_duel_won_air selector_duel_won_not_air = selector0_duel_won_not_air | selector1_duel_won_not_air # Set types and subtypes df_events.loc[selector_duel_won, "type_id"] = 8 df_events.loc[selector_duel_won_air, "subtype_id"] = 82 df_events.loc[selector_duel_won_not_air, "subtype_id"] = 85 # set end location equal to ball out of field location df_events.loc[selector_duel_won, "accurate"] = False df_events.loc[selector_duel_won, "not_accurate"] = True df_events.loc[selector_duel_won, "end_x"] = 100 - df_events2.loc[selector_duel_won, "start_x"] df_events.loc[selector_duel_won, "end_y"] = 100 - df_events2.loc[selector_duel_won, "start_y"] # df_events.loc[selector_duel_won, 'end_x'] = df_events2.loc[selector_duel_won, 'start_x'] # df_events.loc[selector_duel_won, 'end_y'] = df_events2.loc[selector_duel_won, 'start_y'] # Define selector for ground attacking duels with take on selector_attacking_duel = df_events["subtype_id"] == 11 selector_take_on = (df_events["take_on_left"]) | (df_events["take_on_right"]) selector_att_duel_take_on = selector_attacking_duel & selector_take_on # Set take ons type to 0 df_events.loc[selector_att_duel_take_on, "type_id"] = 0 # Set sliding tackles type to 0 df_events.loc[df_events["sliding_tackle"], "type_id"] = 0 # Remove the remaining duels df_events = df_events[df_events["type_id"] != 1] # Reset the index df_events = df_events.reset_index(drop=True) return df_events def insert_interceptions(df_events: pd.DataFrame) -> pd.DataFrame: """Insert interception actions before passes, clearances and dribbles. This function converts passes (type_id 8), clearances (subtype_id 71) and accelerations (subtype_id 70) that are also interceptions (tag interception) in the Wyscout event data into two separate events, first an interception and then a pass/clearance/dribble. Parameters ---------- df_events : pd.DataFrame Wyscout event dataframe Returns ------- pd.DataFrame Wyscout event dataframe in which passes that were also denoted as interceptions in the Wyscout notation are transformed into two events """ df_events_interceptions = df_events[ df_events["interception"] & ( (df_events["type_id"] == 8) | (df_events["subtype_id"] == 70) | (df_events["subtype_id"] == 71) ) ].copy() if not df_events_interceptions.empty: df_events_interceptions.loc[:, [t[1] for t in wyscout_tags]] = False df_events_interceptions["interception"] = True df_events_interceptions["type_id"] = 0 df_events_interceptions["subtype_id"] = 0 df_events_interceptions[["end_x", "end_y"]] = df_events_interceptions[ ["start_x", "start_y"] ] df_events = pd.concat([df_events_interceptions, df_events], ignore_index=True) df_events = df_events.sort_values(["period_id", "milliseconds"], kind="mergesort") df_events = df_events.reset_index(drop=True) return df_events def add_offside_variable(df_events: pd.DataFrame) -> pd.DataFrame: """Attach offside events to the previous action. This function removes the offside events in the Wyscout event data and adds sets offside to 1 for the previous event (if this was a passing event) Parameters ---------- df_events : pd.DataFrame Wyscout event dataframe Returns ------- pd.DataFrame Wyscout event dataframe with an extra column 'offside' """ # Create a new column for the offside variable df_events["offside"] = 0 # Shift events dataframe by one timestep df_events1 = df_events.shift(-1) # Select offside passes selector_offside = (df_events1["type_id"] == 6) & (df_events["type_id"] == 8) # Set variable 'offside' to 1 for all offside passes df_events.loc[selector_offside, "offside"] = 1 # Remove offside events df_events = df_events[df_events["type_id"] != 6] # Reset index df_events = df_events.reset_index(drop=True) return df_events def convert_simulations(df_events: pd.DataFrame) -> pd.DataFrame: """Convert simulations to failed take-ons. Parameters ---------- df_events : pd.DataFrame Wyscout event dataframe Returns ------- pd.DataFrame Wyscout event dataframe in which simulation events are either transformed into a failed take-on """ prev_events = df_events.shift(1) # Select simulations selector_simulation = df_events["subtype_id"] == 25 # Select actions preceded by a failed take-on selector_previous_is_failed_take_on = ( (prev_events["take_on_left"]) | (prev_events["take_on_right"]) & prev_events["not_accurate"] ) # Transform simulations not preceded by a failed take-on to a failed take-on df_events.loc[selector_simulation & ~selector_previous_is_failed_take_on, "type_id"] = 0 df_events.loc[selector_simulation & ~selector_previous_is_failed_take_on, "subtype_id"] = 0 df_events.loc[selector_simulation & ~selector_previous_is_failed_take_on, "accurate"] = False df_events.loc[selector_simulation & ~selector_previous_is_failed_take_on, "not_accurate"] = ( True ) # Set take_on_left or take_on_right to True df_events.loc[selector_simulation & ~selector_previous_is_failed_take_on, "take_on_left"] = ( True ) # Remove simulation events which are preceded by a failed take-on df_events = df_events[~(selector_simulation & selector_previous_is_failed_take_on)] # Reset index df_events = df_events.reset_index(drop=True) return df_events def convert_touches(df_events: pd.DataFrame) -> pd.DataFrame: """Convert touch events to dribbles or passes. This function converts the Wyscout 'touch' event (sub_type_id 72) into either a dribble or a pass (accurate or not depending on receiver) Parameters ---------- df_events : pd.DataFrame Wyscout event dataframe Returns ------- pd.DataFrame Wyscout event dataframe without any touch events """ df_events1 = df_events.shift(-1) selector_touch = ( (df_events["subtype_id"] == 72) & ~df_events["interception"] & ~df_events["missed_ball"] ) selector_same_player = df_events["player_id"] == df_events1["player_id"] selector_same_team = df_events["team_id"] == df_events1["team_id"] # selector_touch_same_player = selector_touch & selector_same_player selector_touch_same_team = selector_touch & ~selector_same_player & selector_same_team selector_touch_other = selector_touch & ~selector_same_player & ~selector_same_team same_x = abs(df_events["end_x"] - df_events1["start_x"]) < min_dribble_length same_y = abs(df_events["end_y"] - df_events1["start_y"]) < min_dribble_length same_loc = same_x & same_y # df_events.loc[selector_touch_same_player & same_loc, 'subtype_id'] = 70 # df_events.loc[selector_touch_same_player & same_loc, 'accurate'] = True # df_events.loc[selector_touch_same_player & same_loc, 'not_accurate'] = False df_events.loc[selector_touch_same_team & same_loc, "type_id"] = 8 df_events.loc[selector_touch_same_team & same_loc, "subtype_id"] = 85 df_events.loc[selector_touch_same_team & same_loc, "accurate"] = True df_events.loc[selector_touch_same_team & same_loc, "not_accurate"] = False df_events.loc[selector_touch_other & same_loc, "type_id"] = 8 df_events.loc[selector_touch_other & same_loc, "subtype_id"] = 85 df_events.loc[selector_touch_other & same_loc, "accurate"] = False df_events.loc[selector_touch_other & same_loc, "not_accurate"] = True return df_events def create_df_actions(df_events: pd.DataFrame) -> pd.DataFrame: """Create the SciSports action dataframe. Parameters ---------- df_events : pd.DataFrame Wyscout event dataframe Returns ------- pd.DataFrame SciSports action dataframe """ df_events["time_seconds"] = df_events["milliseconds"] / 1000 df_actions = df_events[ [ "game_id", "period_id", "time_seconds", "team_id", "player_id", "start_x", "start_y", "end_x", "end_y", ] ].copy() df_actions["original_event_id"] = df_events["event_id"].astype(object) df_actions["bodypart_id"] = df_events.apply(determine_bodypart_id, axis=1) df_actions["type_id"] = df_events.apply(determine_type_id, axis=1) df_actions["result_id"] = df_events.apply(determine_result_id, axis=1) df_actions = remove_non_actions(df_actions) # remove all non-actions left return df_actions def determine_bodypart_id(event: pd.DataFrame) -> int: """Determint eht body part for each action. Parameters ---------- event : pd.Series Wyscout event Series Returns ------- int id of the body part used for the action """ if event["subtype_id"] in [81, 36, 21, 90, 91]: body_part = "other" elif event["subtype_id"] == 82: body_part = "head" elif event["type_id"] == 10 and event["head/body"]: body_part = "head/other" elif event["left_foot"]: body_part = "foot_left" elif event["right_foot"]: body_part = "foot_right" else: # all other cases body_part = "foot" return spadlconfig.bodyparts.index(body_part) def determine_type_id(event: pd.DataFrame) -> int: # noqa: C901 """Determine the type of each action. This function transforms the Wyscout events, sub_events and tags into the corresponding SciSports action type Parameters ---------- event : pd.Series A series from the Wyscout event dataframe Returns ------- int id of the action type """ if event["fairplay"]: action_type = "non_action" elif event["own_goal"]: action_type = "bad_touch" elif event["type_id"] == 8: if event["subtype_id"] == 80: action_type = "cross" else: action_type = "pass" elif event["subtype_id"] == 36: action_type = "throw_in" elif event["subtype_id"] == 30: if event["high"]: action_type = "corner_crossed" else: action_type = "corner_short" elif event["subtype_id"] == 32: action_type = "freekick_crossed" elif event["subtype_id"] == 31: action_type = "freekick_short" elif event["subtype_id"] == 34: action_type = "goalkick" elif event["type_id"] == 2 and (event["subtype_id"] not in [22, 23, 24, 26]): action_type = "foul" elif event["type_id"] == 10: action_type = "shot" elif event["subtype_id"] == 35: action_type = "shot_penalty" elif event["subtype_id"] == 33: action_type = "shot_freekick" elif event["type_id"] == 9: action_type = "keeper_save" elif event["subtype_id"] == 71: action_type = "clearance" elif event["subtype_id"] == 72 and event["not_accurate"]: action_type = "bad_touch" elif event["subtype_id"] == 70: action_type = "dribble" elif event["take_on_left"] or event["take_on_right"]: action_type = "take_on" elif event["sliding_tackle"]: action_type = "tackle" elif event["interception"] and (event["subtype_id"] in [0, 10, 11, 12, 13, 72]): action_type = "interception" else: action_type = "non_action" return spadlconfig.actiontypes.index(action_type) def determine_result_id(event: pd.DataFrame) -> int: # noqa: C901 """Determine the result of each event. Parameters ---------- event : pd.Series Wyscout event Series Returns ------- int result of the action """ if event["offside"] == 1: return 2 if event["type_id"] == 2: # foul if event["yellow_card"]: return 4 elif event["second_yellow_card"] or event["red_card"]: return 5 return 0 if event["goal"]: # goal return 1 if event["own_goal"]: # own goal return 3 if event["subtype_id"] in [100, 33, 35]: # no goal, so 0 return 0 if event["accurate"]: return 1 if event["not_accurate"]: return 0 if ( event["interception"] or event["clearance"] or event["subtype_id"] == 71 ): # interception or clearance always success return 1 if event["type_id"] == 9: # keeper save always success return 1 # no idea, assume it was successful return 1 def remove_non_actions(df_actions: pd.DataFrame) -> pd.DataFrame: """Remove the remaining non_actions from the action dataframe. Parameters ---------- df_actions : pd.DataFrame SciSports action dataframe Returns ------- pd.DataFrame SciSports action dataframe without non-actions """ df_actions = df_actions[df_actions["type_id"] != spadlconfig.actiontypes.index("non_action")] # remove remaining ball out of field, whistle and goalkeeper from line df_actions = df_actions.reset_index(drop=True) return df_actions def fix_actions(df_actions: pd.DataFrame) -> pd.DataFrame: """Fix the generated actions. Parameters ---------- df_actions : pd.DataFrame SPADL actions dataframe Returns ------- pd.DataFrame SpADL actions dataframe with end coordinates for shots """ df_actions["start_x"] = (df_actions["start_x"] * spadlconfig.field_length / 100).clip( 0, spadlconfig.field_length ) df_actions["start_y"] = ( (100 - df_actions["start_y"]) * spadlconfig.field_width / 100 # y is from top to bottom in Wyscout ).clip(0, spadlconfig.field_width) df_actions["end_x"] = (df_actions["end_x"] * spadlconfig.field_length / 100).clip( 0, spadlconfig.field_length ) df_actions["end_y"] = ( (100 - df_actions["end_y"]) * spadlconfig.field_width / 100 # y is from top to bottom in Wyscout ).clip(0, spadlconfig.field_width) df_actions = fix_goalkick_coordinates(df_actions) df_actions = adjust_goalkick_result(df_actions) df_actions = fix_foul_coordinates(df_actions) df_actions = fix_keeper_save_coordinates(df_actions) df_actions = remove_keeper_goal_actions(df_actions) df_actions.reset_index(drop=True, inplace=True) return df_actions def fix_goalkick_coordinates(df_actions: pd.DataFrame) -> pd.DataFrame: """Fix goalkick coordinates. This function sets the goalkick start coordinates to (5,34) Parameters ---------- df_actions : pd.DataFrame SciSports action dataframe with start coordinates for goalkicks in the corner of the pitch Returns ------- pd.DataFrame SciSports action dataframe including start coordinates for goalkicks """ goalkicks_idx = df_actions["type_id"] == spadlconfig.actiontypes.index("goalkick") df_actions.loc[goalkicks_idx, "start_x"] = 5.0 df_actions.loc[goalkicks_idx, "start_y"] = 34.0 return df_actions def fix_foul_coordinates(df_actions: pd.DataFrame) -> pd.DataFrame: """Fix fould coordinates. This function sets foul end coordinates equal to the foul start coordinates Parameters ---------- df_actions : pd.DataFrame SciSports action dataframe with no end coordinates for fouls Returns ------- pd.DataFrame SciSports action dataframe including start coordinates for goalkicks """ fouls_idx = df_actions["type_id"] == spadlconfig.actiontypes.index("foul") df_actions.loc[fouls_idx, "end_x"] = df_actions.loc[fouls_idx, "start_x"] df_actions.loc[fouls_idx, "end_y"] = df_actions.loc[fouls_idx, "start_y"] return df_actions def fix_keeper_save_coordinates(df_actions: pd.DataFrame) -> pd.DataFrame: """Fix keeper save coordinates. This function sets keeper_save start coordinates equal to keeper_save end coordinates. It also inverts the shot coordinates to the own goal. Parameters ---------- df_actions : pd.DataFrame SciSports action dataframe with start coordinates in the corner of the pitch Returns ------- pd.DataFrame SciSports action dataframe with correct keeper_save coordinates """ saves_idx = df_actions["type_id"] == spadlconfig.actiontypes.index("keeper_save") # invert the coordinates df_actions.loc[saves_idx, "end_x"] = ( spadlconfig.field_length - df_actions.loc[saves_idx, "end_x"] ) df_actions.loc[saves_idx, "end_y"] = ( spadlconfig.field_width - df_actions.loc[saves_idx, "end_y"] ) # set start coordinates equal to start coordinates df_actions.loc[saves_idx, "start_x"] = df_actions.loc[saves_idx, "end_x"] df_actions.loc[saves_idx, "start_y"] = df_actions.loc[saves_idx, "end_y"] return df_actions def remove_keeper_goal_actions(df_actions: pd.DataFrame) -> pd.DataFrame: """Remove keeper goal-saving actions. This function removes keeper_save actions that appear directly after a goal Parameters ---------- df_actions : pd.DataFrame SciSports action dataframe with keeper actions directly after a goal Returns ------- pd.DataFrame SciSports action dataframe without keeper actions directly after a goal """ prev_actions = df_actions.shift(1) same_phase = prev_actions.time_seconds + 10 > df_actions.time_seconds shot_goals = (prev_actions.type_id == spadlconfig.actiontypes.index("shot")) & ( prev_actions.result_id == 1 ) penalty_goals = (prev_actions.type_id == spadlconfig.actiontypes.index("shot_penalty")) & ( prev_actions.result_id == 1 ) freekick_goals = (prev_actions.type_id == spadlconfig.actiontypes.index("shot_freekick")) & ( prev_actions.result_id == 1 ) goals = shot_goals | penalty_goals | freekick_goals keeper_save = df_actions["type_id"] == spadlconfig.actiontypes.index("keeper_save") goals_keepers_idx = same_phase & goals & keeper_save df_actions = df_actions.drop(df_actions.index[goals_keepers_idx]) df_actions = df_actions.reset_index(drop=True) return df_actions def adjust_goalkick_result(df_actions: pd.DataFrame) -> pd.DataFrame: """Adjust goalkick results. This function adjusts goalkick results depending on whether the next action is performed by the same team or not Parameters ---------- df_actions : pd.DataFrame SciSports action dataframe with incorrect goalkick results Returns ------- pd.DataFrame SciSports action dataframe with correct goalkick results """ nex_actions = df_actions.shift(-1) goalkicks = df_actions["type_id"] == spadlconfig.actiontypes.index("goalkick") same_team = df_actions["team_id"] == nex_actions["team_id"] accurate = same_team & goalkicks not_accurate = ~same_team & goalkicks df_actions.loc[accurate, "result_id"] = 1 df_actions.loc[not_accurate, "result_id"] = 0 return df_actions