File size: 3,381 Bytes
d6ea71e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
"""SPADL schema for Opta data."""

from typing import Optional

import pandas as pd
import pandera as pa
from pandera.typing import DateTime, Object, Series

from socceraction.data.schema import (
    CompetitionSchema,
    EventSchema,
    GameSchema,
    PlayerSchema,
    TeamSchema,
)


class OptaCompetitionSchema(CompetitionSchema):
    """Definition of a dataframe containing a list of competitions and seasons."""


class OptaGameSchema(GameSchema):
    """Definition of a dataframe containing a list of games."""

    home_score: Optional[Series[int]] = pa.Field(nullable=True)
    """The final score of the home team."""
    away_score: Optional[Series[int]] = pa.Field(nullable=True)
    """The final score of the away team."""
    duration: Optional[Series[int]] = pa.Field(nullable=True)
    """The total duration of the game in minutes."""
    referee: Optional[Series[str]] = pa.Field(nullable=True)
    """The name of the referee."""
    venue: Optional[Series[str]] = pa.Field(nullable=True)
    """The name of the stadium where the game was played."""
    attendance: Optional[Series[int]] = pa.Field(nullable=True)
    """The number of people who attended the game."""
    home_manager: Optional[Series[str]] = pa.Field(nullable=True)
    """The name of the manager of the home team."""
    away_manager: Optional[Series[str]] = pa.Field(nullable=True)
    """The name of the manager of the away team."""


class OptaPlayerSchema(PlayerSchema):
    """Definition of a dataframe containing the list of players of a game."""

    starting_position: Series[str]
    """The starting position of the player."""


class OptaTeamSchema(TeamSchema):
    """Definition of a dataframe containing the list of teams of a game."""


class OptaEventSchema(EventSchema):
    """Definition of a dataframe containing event stream data of a game."""

    timestamp: Series[DateTime]
    """Time in the match the event takes place, recorded to the millisecond."""
    minute: Series[int]
    """The minutes on the clock at the time of this event."""
    second: Series[int] = pa.Field(ge=0, le=59)
    """The second part of the timestamp."""
    outcome: Series[bool]
    """Whether the event had a successful outcome or not."""
    start_x: Series[float] = pa.Field(nullable=True)
    """The x coordinate of the location where the event started."""
    start_y: Series[float] = pa.Field(nullable=True)
    """The y coordinate of the location where the event started."""
    end_x: Series[float] = pa.Field(nullable=True)
    """The x coordinate of the location where the event ended."""
    end_y: Series[float] = pa.Field(nullable=True)
    """The y coordinate of the location where the event ended."""
    qualifiers: Series[Object]
    """A JSON object containing the Opta qualifiers of the event."""
    assist: Optional[Series[bool]]
    """Whether the event was an assist or not."""
    keypass: Optional[Series[bool]]
    """Whether the event was a keypass or not."""
    goal: Optional[Series[bool]]
    """Whether the event was a goal or not."""
    shot: Optional[Series[bool]]
    """Whether the event was a shot or not."""
    touch: Optional[Series[bool]]
    """Whether the event was a on-the-ball action or not."""
    related_player_id: Optional[Series[pd.Int64Dtype]] = pa.Field(nullable=True)
    """The ID of a second player that was involved in this event."""