|
"""JSON parser for Opta F24 feeds.""" |
|
|
|
from datetime import datetime |
|
from typing import Any |
|
|
|
from ...base import MissingDataError |
|
from .base import OptaJSONParser, _get_end_x, _get_end_y, assertget |
|
|
|
|
|
class F24JSONParser(OptaJSONParser): |
|
"""Extract data from a Opta F24 data stream. |
|
|
|
Parameters |
|
---------- |
|
path : str |
|
Path of the data file. |
|
""" |
|
|
|
def _get_doc(self) -> dict[str, Any]: |
|
for node in self.root: |
|
if "Games" in node["data"].keys(): |
|
return node |
|
raise MissingDataError |
|
|
|
def extract_games(self) -> dict[int, dict[str, Any]]: |
|
"""Return a dictionary with all available games. |
|
|
|
Returns |
|
------- |
|
dict |
|
A mapping between game IDs and the information available about |
|
each game in the data stream. |
|
""" |
|
f24 = self._get_doc() |
|
|
|
data = assertget(f24, "data") |
|
games = assertget(data, "Games") |
|
game = assertget(games, "Game") |
|
attr = assertget(game, "@attributes") |
|
|
|
game_id = int(assertget(attr, "id")) |
|
game_dict = { |
|
game_id: { |
|
|
|
"game_id": game_id, |
|
"season_id": int(assertget(attr, "season_id")), |
|
"competition_id": int(assertget(attr, "competition_id")), |
|
"game_day": int(assertget(attr, "matchday")), |
|
"game_date": datetime.strptime( |
|
assertget(assertget(attr, "game_date"), "locale"), "%Y-%m-%dT%H:%M:%S.%fZ" |
|
).replace(tzinfo=None), |
|
"home_team_id": int(assertget(attr, "home_team_id")), |
|
"away_team_id": int(assertget(attr, "away_team_id")), |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
} |
|
} |
|
return game_dict |
|
|
|
def extract_events(self) -> dict[tuple[int, int], dict[str, Any]]: |
|
"""Return a dictionary with all available events. |
|
|
|
Returns |
|
------- |
|
dict |
|
A mapping between (game ID, event ID) tuples and the information |
|
available about each event in the data stream. |
|
""" |
|
f24 = self._get_doc() |
|
|
|
data = assertget(f24, "data") |
|
games = assertget(data, "Games") |
|
game = assertget(games, "Game") |
|
game_attr = assertget(game, "@attributes") |
|
game_id = int(assertget(game_attr, "id")) |
|
|
|
events = {} |
|
for element in assertget(game, "Event"): |
|
attr = element["@attributes"] |
|
timestamp = attr["TimeStamp"].get("locale") if attr.get("TimeStamp") else None |
|
timestamp = datetime.strptime(timestamp, "%Y-%m-%dT%H:%M:%S.%fZ") |
|
qualifiers = { |
|
int(q["@attributes"]["qualifier_id"]): q["@attributes"]["value"] |
|
for q in element.get("Q", []) |
|
} |
|
start_x = float(assertget(attr, "x")) |
|
start_y = float(assertget(attr, "y")) |
|
end_x = _get_end_x(qualifiers) |
|
end_y = _get_end_y(qualifiers) |
|
|
|
event_id = int(assertget(attr, "id")) |
|
events[(game_id, event_id)] = { |
|
|
|
"game_id": game_id, |
|
"event_id": event_id, |
|
"period_id": int(assertget(attr, "period_id")), |
|
"team_id": int(assertget(attr, "team_id")), |
|
"player_id": int(assertget(attr, "player_id")), |
|
"type_id": int(assertget(attr, "type_id")), |
|
|
|
|
|
"timestamp": timestamp, |
|
"minute": int(assertget(attr, "min")), |
|
"second": int(assertget(attr, "sec")), |
|
"outcome": bool(int(attr.get("outcome", 1))), |
|
"start_x": start_x, |
|
"start_y": start_y, |
|
"end_x": end_x if end_x is not None else start_x, |
|
"end_y": end_y if end_y is not None else start_y, |
|
"qualifiers": qualifiers, |
|
|
|
"assist": bool(int(attr.get("assist", 0))), |
|
"keypass": bool(int(attr.get("keypass", 0))), |
|
} |
|
return events |
|
|