File size: 3,381 Bytes
d6ea71e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 |
"""SPADL schema for Opta data."""
from typing import Optional
import pandas as pd
import pandera as pa
from pandera.typing import DateTime, Object, Series
from socceraction.data.schema import (
CompetitionSchema,
EventSchema,
GameSchema,
PlayerSchema,
TeamSchema,
)
class OptaCompetitionSchema(CompetitionSchema):
"""Definition of a dataframe containing a list of competitions and seasons."""
class OptaGameSchema(GameSchema):
"""Definition of a dataframe containing a list of games."""
home_score: Optional[Series[int]] = pa.Field(nullable=True)
"""The final score of the home team."""
away_score: Optional[Series[int]] = pa.Field(nullable=True)
"""The final score of the away team."""
duration: Optional[Series[int]] = pa.Field(nullable=True)
"""The total duration of the game in minutes."""
referee: Optional[Series[str]] = pa.Field(nullable=True)
"""The name of the referee."""
venue: Optional[Series[str]] = pa.Field(nullable=True)
"""The name of the stadium where the game was played."""
attendance: Optional[Series[int]] = pa.Field(nullable=True)
"""The number of people who attended the game."""
home_manager: Optional[Series[str]] = pa.Field(nullable=True)
"""The name of the manager of the home team."""
away_manager: Optional[Series[str]] = pa.Field(nullable=True)
"""The name of the manager of the away team."""
class OptaPlayerSchema(PlayerSchema):
"""Definition of a dataframe containing the list of players of a game."""
starting_position: Series[str]
"""The starting position of the player."""
class OptaTeamSchema(TeamSchema):
"""Definition of a dataframe containing the list of teams of a game."""
class OptaEventSchema(EventSchema):
"""Definition of a dataframe containing event stream data of a game."""
timestamp: Series[DateTime]
"""Time in the match the event takes place, recorded to the millisecond."""
minute: Series[int]
"""The minutes on the clock at the time of this event."""
second: Series[int] = pa.Field(ge=0, le=59)
"""The second part of the timestamp."""
outcome: Series[bool]
"""Whether the event had a successful outcome or not."""
start_x: Series[float] = pa.Field(nullable=True)
"""The x coordinate of the location where the event started."""
start_y: Series[float] = pa.Field(nullable=True)
"""The y coordinate of the location where the event started."""
end_x: Series[float] = pa.Field(nullable=True)
"""The x coordinate of the location where the event ended."""
end_y: Series[float] = pa.Field(nullable=True)
"""The y coordinate of the location where the event ended."""
qualifiers: Series[Object]
"""A JSON object containing the Opta qualifiers of the event."""
assist: Optional[Series[bool]]
"""Whether the event was an assist or not."""
keypass: Optional[Series[bool]]
"""Whether the event was a keypass or not."""
goal: Optional[Series[bool]]
"""Whether the event was a goal or not."""
shot: Optional[Series[bool]]
"""Whether the event was a shot or not."""
touch: Optional[Series[bool]]
"""Whether the event was a on-the-ball action or not."""
related_player_id: Optional[Series[pd.Int64Dtype]] = pa.Field(nullable=True)
"""The ID of a second player that was involved in this event."""
|