|
"""XML parser for Opta F24 feeds.""" |
|
|
|
from datetime import datetime |
|
from typing import Any |
|
|
|
from lxml import objectify |
|
|
|
from .base import OptaXMLParser, _get_end_x, _get_end_y, assertget |
|
|
|
|
|
class F24XMLParser(OptaXMLParser): |
|
"""Extract data from a Opta F24 data stream. |
|
|
|
Parameters |
|
---------- |
|
path : str |
|
Path of the data file. |
|
""" |
|
|
|
def _get_doc(self) -> objectify.ObjectifiedElement: |
|
return self.root |
|
|
|
def extract_games(self) -> dict[int, dict[str, Any]]: |
|
"""Return a dictionary with all available games. |
|
|
|
Returns |
|
------- |
|
dict |
|
A mapping between game IDs and the information available about |
|
each game in the data stream. |
|
""" |
|
optadocument = self._get_doc() |
|
game_elem = optadocument.find("Game") |
|
attr = game_elem.attrib |
|
game_id = int(assertget(attr, "id")) |
|
game_dict = { |
|
|
|
"game_id": game_id, |
|
"season_id": int(assertget(attr, "season_id")), |
|
"competition_id": int(assertget(attr, "competition_id")), |
|
"game_day": int(assertget(attr, "matchday")), |
|
"game_date": datetime.strptime(assertget(attr, "game_date"), "%Y-%m-%dT%H:%M:%S"), |
|
"home_team_id": int(assertget(attr, "home_team_id")), |
|
"away_team_id": int(assertget(attr, "away_team_id")), |
|
|
|
"home_score": int(assertget(attr, "home_score")), |
|
"away_score": int(assertget(attr, "away_score")), |
|
|
|
|
|
|
|
|
|
|
|
|
|
} |
|
return {game_id: game_dict} |
|
|
|
def extract_events(self) -> dict[tuple[int, int], dict[str, Any]]: |
|
"""Return a dictionary with all available events. |
|
|
|
Returns |
|
------- |
|
dict |
|
A mapping between (game ID, event ID) tuples and the information |
|
available about each event in the data stream. |
|
""" |
|
optadocument = self._get_doc() |
|
game_elm = optadocument.find("Game") |
|
game_id = int(assertget(game_elm.attrib, "id")) |
|
events = {} |
|
for event_elm in game_elm.iterchildren("Event"): |
|
attr = dict(event_elm.attrib) |
|
event_id = int(assertget(attr, "id")) |
|
|
|
qualifiers = { |
|
int(qualifier_elm.attrib["qualifier_id"]): qualifier_elm.attrib.get("value") |
|
for qualifier_elm in event_elm.iterchildren("Q") |
|
} |
|
start_x = float(assertget(attr, "x")) |
|
start_y = float(assertget(attr, "y")) |
|
end_x = _get_end_x(qualifiers) |
|
end_y = _get_end_y(qualifiers) |
|
|
|
events[(game_id, event_id)] = { |
|
|
|
"game_id": game_id, |
|
"event_id": event_id, |
|
"period_id": int(assertget(attr, "period_id")), |
|
"team_id": int(assertget(attr, "team_id")), |
|
"player_id": int(attr["player_id"]) if "player_id" in attr else None, |
|
"type_id": int(assertget(attr, "type_id")), |
|
|
|
|
|
"timestamp": datetime.strptime( |
|
assertget(attr, "timestamp"), "%Y-%m-%dT%H:%M:%S.%f" |
|
), |
|
"minute": int(assertget(attr, "min")), |
|
"second": int(assertget(attr, "sec")), |
|
"outcome": bool(int(attr["outcome"])) if "outcome" in attr else None, |
|
"start_x": start_x, |
|
"start_y": start_y, |
|
"end_x": end_x if end_x is not None else start_x, |
|
"end_y": end_y if end_y is not None else start_y, |
|
"qualifiers": qualifiers, |
|
|
|
"assist": bool(int(attr.get("assist", 0))), |
|
"keypass": bool(int(attr.get("keypass", 0))), |
|
} |
|
return events |
|
|