File size: 4,208 Bytes
d6ea71e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 |
"""SPADL schema for StatsBomb data."""
from typing import Optional
import pandera as pa
from pandera.typing import Object, Series, Timedelta
from socceraction.data.schema import (
CompetitionSchema,
EventSchema,
GameSchema,
PlayerSchema,
TeamSchema,
)
class StatsBombCompetitionSchema(CompetitionSchema):
"""Definition of a dataframe containing a list of competitions and seasons."""
country_name: Series[str]
"""The name of the country the competition relates to."""
competition_gender: Series[str]
"""The gender of the players competing in the competition."""
class StatsBombGameSchema(GameSchema):
"""Definition of a dataframe containing a list of games."""
competition_stage: Series[str]
"""The name of the phase of the competition this game is in."""
home_score: Series[int]
"""The final score of the home team."""
away_score: Series[int]
"""The final score of the away team."""
venue: Series[str] = pa.Field(nullable=True)
"""The name of the stadium where the game was played."""
referee: Series[str] = pa.Field(nullable=True)
"""The name of the referee."""
class StatsBombPlayerSchema(PlayerSchema):
"""Definition of a dataframe containing the list of players of a game."""
nickname: Series[str] = pa.Field(nullable=True)
"""The nickname of the player on the team."""
starting_position_id: Series[int]
"""The unique identifier for the starting position of the player on the team."""
starting_position_name: Series[str]
"""The name of the starting position of the player on the team."""
class StatsBombTeamSchema(TeamSchema):
"""Definition of a dataframe containing the list of teams of a game."""
class StatsBombEventSchema(EventSchema):
"""Definition of a dataframe containing event stream data of a game."""
index: Series[int]
"""Sequence notation for the ordering of events within each match."""
timestamp: Series[Timedelta]
"""Time in the match the event takes place, recorded to the millisecond."""
minute: Series[int]
"""The minutes on the clock at the time of this event."""
second: Series[int] = pa.Field(ge=0, le=59)
"""The second part of the timestamp."""
possession: Series[int]
"""Indicates the current unique possession in the game."""
possession_team_id: Series[int]
"""The ID of the team that started this possession in control of the ball."""
possession_team_name: Series[str]
"""The name of the team that started this possession in control of the ball."""
play_pattern_id: Series[int]
"""The ID of the play pattern relevant to this event."""
play_pattern_name: Series[str]
"""The name of the play pattern relevant to this event."""
team_name: Series[str]
"""The name of the team this event relates to."""
duration: Series[float] = pa.Field(nullable=True)
"""If relevant, the length in seconds the event lasted."""
extra: Series[Object]
"""A JSON string containing type-specific information."""
related_events: Series[Object]
"""A comma separated list of the IDs of related events."""
player_name: Series[str] = pa.Field(nullable=True)
"""The name of the player this event relates to."""
position_id: Series[float] = pa.Field(nullable=True)
"""The ID of the position the player was in at the time of this event."""
position_name: Series[str] = pa.Field(nullable=True)
"""The name of the position the player was in at the time of this event."""
location: Series[Object] = pa.Field(nullable=True)
"""Array containing the x and y coordinates of the event."""
under_pressure: Series[bool] = pa.Field(nullable=True)
"""Whether the action was performed while being pressured by an opponent."""
counterpress: Series[bool] = pa.Field(nullable=True)
"""Pressing actions within 5 seconds of an open play turnover."""
visible_area_360: Optional[Series[Object]] = pa.Field(nullable=True)
"""An array of coordinates describing the polygon visible to the camera / in the 360 frame."""
freeze_frame_360: Optional[Series[Object]] = pa.Field(nullable=True)
"""An array of freeze frame objects."""
|