|
"""Base class and utility functions for all event stream data serializers. |
|
|
|
A serializer should extend the 'EventDataLoader' class to (down)load event |
|
stream data. |
|
""" |
|
|
|
import base64 |
|
import json |
|
import warnings |
|
from abc import ABC, abstractmethod |
|
from typing import Any, Union |
|
from urllib import request |
|
|
|
from pandera.typing import DataFrame |
|
|
|
JSONType = Union[str, int, float, bool, None, dict[str, Any], list[Any]] |
|
|
|
|
|
class ParseError(Exception): |
|
"""Exception raised when a file is not correctly formatted.""" |
|
|
|
|
|
class MissingDataError(Exception): |
|
"""Exception raised when a field is missing in the input data.""" |
|
|
|
|
|
class NoAuthWarning(UserWarning): |
|
"""Warning raised when no user credentials are provided.""" |
|
|
|
|
|
def _remoteloadjson(path: str) -> JSONType: |
|
"""Load JSON data from a URL. |
|
|
|
Parameters |
|
---------- |
|
path : str |
|
URL of the data source. |
|
|
|
Returns |
|
------- |
|
JSONType |
|
A dictionary with the loaded JSON data. |
|
""" |
|
return json.loads(request.urlopen(path).read()) |
|
|
|
|
|
def _auth_remoteloadjson(user: str, passwd: str) -> None: |
|
"""Add a Authorization header to all requests. |
|
|
|
Parameters |
|
---------- |
|
user : str |
|
Username. |
|
passwd : str |
|
Password. |
|
""" |
|
auth = base64.b64encode(f"{user}:{passwd}".encode()) |
|
opener = request.build_opener() |
|
opener.addheaders = [("Authorization", f"Basic {auth.decode()}")] |
|
request.install_opener(opener) |
|
|
|
|
|
def _localloadjson(path: str) -> JSONType: |
|
"""Load a dictionary from a JSON's filepath. |
|
|
|
Parameters |
|
---------- |
|
path : str |
|
JSON's filepath. |
|
|
|
Returns |
|
------- |
|
JSONType |
|
A dictionary with the data loaded. |
|
""" |
|
with open(path, encoding="utf-8") as fh: |
|
return json.load(fh) |
|
|
|
|
|
def _has_auth(creds: dict[str, str]) -> bool: |
|
"""Check if user credentials are provided. |
|
|
|
Parameters |
|
---------- |
|
creds : dict |
|
A dictionary with user credentials. It should contain "user" and |
|
"passwd" keys. |
|
|
|
Returns |
|
------- |
|
bool |
|
True if user credentials are provided, False otherwise. |
|
""" |
|
if creds.get("user") in [None, ""] or creds.get("passwd") in [None, ""]: |
|
warnings.warn("Credentials were not supplied. Public data access only.", NoAuthWarning) |
|
return False |
|
return True |
|
|
|
|
|
def _expand_minute(minute: int, periods_duration: list[int]) -> int: |
|
"""Expand a timestamp with injury time of previous periods. |
|
|
|
Parameters |
|
---------- |
|
minute : int |
|
Timestamp in minutes. |
|
periods_duration : List[int] |
|
Total duration of each period in minutes. |
|
|
|
Returns |
|
------- |
|
int |
|
Timestamp expanded with injury time. |
|
""" |
|
expanded_minute = minute |
|
periods_regular = [45, 45, 15, 15, 0] |
|
for period in range(len(periods_duration) - 1): |
|
if minute > sum(periods_regular[: period + 1]): |
|
expanded_minute += periods_duration[period] - periods_regular[period] |
|
else: |
|
break |
|
return expanded_minute |
|
|
|
|
|
class EventDataLoader(ABC): |
|
"""Load event data either from a remote location or from a local folder. |
|
|
|
Parameters |
|
---------- |
|
root : str |
|
Root-path of the data. |
|
getter : str |
|
"remote" or "local" |
|
""" |
|
|
|
@abstractmethod |
|
def competitions(self) -> DataFrame[Any]: |
|
"""Return a dataframe with all available competitions and seasons. |
|
|
|
Returns |
|
------- |
|
pd.DataFrame |
|
A dataframe containing all available competitions and seasons. See |
|
:class:`~socceraction.spadl.base.CompetitionSchema` for the schema. |
|
""" |
|
|
|
@abstractmethod |
|
def games(self, competition_id: int, season_id: int) -> DataFrame[Any]: |
|
"""Return a dataframe with all available games in a season. |
|
|
|
Parameters |
|
---------- |
|
competition_id : int |
|
The ID of the competition. |
|
season_id : int |
|
The ID of the season. |
|
|
|
Returns |
|
------- |
|
pd.DataFrame |
|
A dataframe containing all available games. See |
|
:class:`~socceraction.spadl.base.GameSchema` for the schema. |
|
""" |
|
|
|
@abstractmethod |
|
def teams(self, game_id: int) -> DataFrame[Any]: |
|
"""Return a dataframe with both teams that participated in a game. |
|
|
|
Parameters |
|
---------- |
|
game_id : int |
|
The ID of the game. |
|
|
|
Returns |
|
------- |
|
pd.DataFrame |
|
A dataframe containing both teams. See |
|
:class:`~socceraction.spadl.base.TeamSchema` for the schema. |
|
""" |
|
|
|
@abstractmethod |
|
def players(self, game_id: int) -> DataFrame[Any]: |
|
"""Return a dataframe with all players that participated in a game. |
|
|
|
Parameters |
|
---------- |
|
game_id : int |
|
The ID of the game. |
|
|
|
Returns |
|
------- |
|
pd.DataFrame |
|
A dataframe containing all players. See |
|
:class:`~socceraction.spadl.base.PlayerSchema` for the schema. |
|
""" |
|
|
|
@abstractmethod |
|
def events(self, game_id: int) -> DataFrame[Any]: |
|
"""Return a dataframe with the event stream of a game. |
|
|
|
Parameters |
|
---------- |
|
game_id : int |
|
The ID of the game. |
|
|
|
Returns |
|
------- |
|
pd.DataFrame |
|
A dataframe containing the event stream. See |
|
:class:`~socceraction.spadl.base.EventSchema` for the schema. |
|
""" |
|
|