File size: 9,356 Bytes
d6ea71e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
"""Opta event stream data to SPADL converter."""

from typing import Any, cast

import pandas as pd  # type: ignore
from pandera.typing import DataFrame

from . import config as spadlconfig
from .base import (
    _add_dribbles,
    _fix_clearances,
    _fix_direction_of_play,
    min_dribble_length,
)
from .schema import SPADLSchema


def convert_to_actions(events: pd.DataFrame, home_team_id: int) -> DataFrame[SPADLSchema]:
    """
    Convert Opta events to SPADL actions.

    Parameters
    ----------
    events : pd.DataFrame
        DataFrame containing Opta events from a single game.
    home_team_id : int
        ID of the home team in the corresponding game.

    Returns
    -------
    actions : pd.DataFrame
        DataFrame with corresponding SPADL actions.

    """
    actions = pd.DataFrame()

    actions["game_id"] = events.game_id
    actions["original_event_id"] = events.event_id.astype(object)
    actions["period_id"] = events.period_id

    actions["time_seconds"] = (
        60 * events.minute
        + events.second
        - ((events.period_id > 1) * 45 * 60)
        - ((events.period_id > 2) * 45 * 60)
        - ((events.period_id > 3) * 15 * 60)
        - ((events.period_id > 4) * 15 * 60)
    )
    actions["team_id"] = events.team_id
    actions["player_id"] = events.player_id

    for col in ["start_x", "end_x"]:
        actions[col] = events[col].clip(0, 100) / 100 * spadlconfig.field_length
    for col in ["start_y", "end_y"]:
        actions[col] = events[col].clip(0, 100) / 100 * spadlconfig.field_width

    actions["type_id"] = events[["type_name", "outcome", "qualifiers"]].apply(_get_type_id, axis=1)
    actions["result_id"] = events[["type_name", "outcome", "qualifiers"]].apply(
        _get_result_id, axis=1
    )
    actions["bodypart_id"] = events[["type_name", "outcome", "qualifiers"]].apply(
        _get_bodypart_id, axis=1
    )

    actions = _fix_recoveries(actions, events.type_name)
    actions = _fix_unintentional_ball_touches(actions, events.type_name, events.outcome)
    actions = (
        actions[actions.type_id != spadlconfig.actiontypes.index("non_action")]
        .sort_values(["game_id", "period_id", "time_seconds"], kind="mergesort")
        .reset_index(drop=True)
    )
    actions = _fix_owngoals(actions)
    actions = _fix_direction_of_play(actions, home_team_id)
    actions = _fix_clearances(actions)
    actions = _fix_interceptions(actions)
    actions["action_id"] = range(len(actions))
    actions = _add_dribbles(actions)

    return cast(DataFrame[SPADLSchema], actions)


def _get_bodypart_id(args: tuple[str, bool, dict[int, Any]]) -> int:
    e, outcome, q = args
    if 15 in q or 3 in q or 168 in q:
        b = "head"
    elif 21 in q:
        b = "other"
    elif 20 in q:
        b = "foot_right"
    elif 72 in q:
        b = "foot_left"
    elif 107 in q:  # throw-in
        b = "other"
    else:
        if e in ["save", "claim", "punch", "keeper pick-up"]:
            b = "other"
        else:
            b = "foot"
    return spadlconfig.bodyparts.index(b)


def _get_result_id(args: tuple[str, bool, dict[int, Any]]) -> int:
    e, outcome, q = args
    if e == "offside pass":
        r = "offside"  # offside
    elif e == "foul":
        r = "fail"
    elif e in ["attempt saved", "miss", "post"]:
        r = "fail"
    elif e == "goal":
        if 28 in q:
            r = "owngoal"  # own goal, x and y must be switched
        else:
            r = "success"
    elif e == "ball touch":
        r = "fail"
    elif outcome:
        r = "success"
    else:
        r = "fail"
    return spadlconfig.results.index(r)


def _get_type_id(args: tuple[str, bool, dict[int, Any]]) -> int:  # noqa: C901
    eventname, outcome, q = args
    fairplay = 238 in q
    if fairplay:
        a = "non_action"
    elif eventname in ("pass", "offside pass"):
        cross = 2 in q
        longball = 1 in q
        chipped = 155 in q
        freekick = 5 in q
        corner = 6 in q
        throw_in = 107 in q
        goalkick = 124 in q
        if throw_in:
            a = "throw_in"
        elif freekick and (cross or longball or chipped):
            a = "freekick_crossed"
        elif freekick:
            a = "freekick_short"
        elif corner and cross:
            a = "corner_crossed"
        elif corner:
            a = "corner_short"
        elif cross:
            a = "cross"
        elif goalkick:
            a = "goalkick"
        else:
            a = "pass"
    elif eventname == "take on":
        a = "take_on"
    elif eventname == "foul" and outcome is False:
        a = "foul"
    elif eventname == "tackle":
        a = "tackle"
    elif eventname in ("interception", "blocked pass"):
        a = "interception"
    elif eventname in ["miss", "post", "attempt saved", "goal"]:
        if 9 in q:
            a = "shot_penalty"
        elif 26 in q:
            a = "shot_freekick"
        else:
            a = "shot"
    elif eventname == "save":
        if 94 in q:
            a = "non_action"
        else:
            a = "keeper_save"
    elif eventname == "claim":
        a = "keeper_claim"
    elif eventname == "punch":
        a = "keeper_punch"
    elif eventname == "keeper pick-up":
        a = "keeper_pick_up"
    elif eventname == "clearance":
        a = "clearance"
    elif eventname == "ball touch" and outcome is False:
        a = "bad_touch"
    else:
        a = "non_action"
    return spadlconfig.actiontypes.index(a)


def _fix_owngoals(actions: pd.DataFrame) -> pd.DataFrame:
    owngoals_idx = (actions.result_id == spadlconfig.results.index("owngoal")) & (
        actions.type_id == spadlconfig.actiontypes.index("shot")
    )
    actions.loc[owngoals_idx, "end_x"] = (
        spadlconfig.field_length - actions[owngoals_idx].end_x.values
    )
    actions.loc[owngoals_idx, "end_y"] = (
        spadlconfig.field_width - actions[owngoals_idx].end_y.values
    )
    actions.loc[owngoals_idx, "type_id"] = spadlconfig.actiontypes.index("bad_touch")
    return actions


def _fix_recoveries(df_actions: pd.DataFrame, opta_types: pd.Series) -> pd.DataFrame:
    """Convert ball recovery events to dribbles.

    This function converts the Opta 'ball recovery' event (type_id 49) into
    a dribble.

    Parameters
    ----------
    df_actions : pd.DataFrame
        Opta actions dataframe
    opta_types : pd.Series
        Original Opta event types

    Returns
    -------
    pd.DataFrame
        Opta event dataframe without any ball recovery events
    """
    df_actions_next = df_actions.shift(-1)
    df_actions_next = df_actions_next.mask(
        df_actions_next.type_id == spadlconfig.actiontypes.index("non_action")
    ).bfill()

    selector_recovery = opta_types == "ball recovery"

    same_x = abs(df_actions["end_x"] - df_actions_next["start_x"]) < min_dribble_length
    same_y = abs(df_actions["end_y"] - df_actions_next["start_y"]) < min_dribble_length
    same_loc = same_x & same_y

    df_actions.loc[selector_recovery & ~same_loc, "type_id"] = spadlconfig.actiontypes.index(
        "dribble"
    )
    df_actions.loc[selector_recovery & same_loc, "type_id"] = spadlconfig.actiontypes.index(
        "non_action"
    )
    df_actions.loc[selector_recovery, ["end_x", "end_y"]] = df_actions_next.loc[
        selector_recovery, ["start_x", "start_y"]
    ].values

    return df_actions


def _fix_interceptions(df_actions: pd.DataFrame) -> pd.DataFrame:
    """Set the result of interceptions to 'fail' if they do not regain possession.

    Parameters
    ----------
    df_actions : pd.DataFrame
        Opta actions dataframe.

    Returns
    -------
    pd.DataFrame
        Opta event dataframe without any ball recovery events
    """
    mask_interception = df_actions.type_id == spadlconfig.actiontypes.index("interception")
    same_team = df_actions.team_id == df_actions.shift(-1).team_id
    df_actions.loc[mask_interception & ~same_team, "result_id"] = spadlconfig.results.index("fail")
    return df_actions


def _fix_unintentional_ball_touches(
    df_actions: pd.DataFrame, opta_type: pd.Series, opta_outcome: pd.Series
) -> pd.DataFrame:
    """Discard unintentional ball touches.

    Passes that are deflected but still reach their target are registered as
    successful passes. The (unintentional) deflection is not recored as an
    action, because players should not be credited for it.

    Parameters
    ----------
    df_actions : pd.DataFrame
        Opta actions dataframe
    opta_type : pd.Series
        Original Opta event types
    opta_outcome : pd.Series
        Original Opta event outcomes

    Returns
    -------
    pd.DataFrame
        Opta event dataframe without any unintentional ball touches.
    """
    df_actions_next = df_actions.shift(-2)
    selector_pass = df_actions["type_id"] == spadlconfig.actiontypes.index("pass")
    selector_deflected = (opta_type.shift(-1) == "ball touch") & (opta_outcome.shift(-1))
    selector_same_team = df_actions["team_id"] == df_actions_next["team_id"]
    df_actions.loc[selector_deflected, ["end_x", "end_y"]] = df_actions_next.loc[
        selector_deflected, ["start_x", "start_y"]
    ].values
    df_actions.loc[selector_pass & selector_deflected & selector_same_team, "result_id"] = (
        spadlconfig.results.index("success")
    )
    return df_actions