File size: 3,889 Bytes
d6ea71e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
"""JSON parser for Opta F1 feeds."""

from datetime import datetime
from typing import Any

from ...base import MissingDataError
from .base import OptaJSONParser, assertget


class F1JSONParser(OptaJSONParser):
    """Extract data from a Opta F1 data stream.

    Parameters
    ----------
    path : str
        Path of the data file.
    """

    def _get_feed(self) -> dict[str, Any]:
        for node in self.root:
            if "OptaFeed" in node["data"].keys():
                return node
        raise MissingDataError

    def _get_doc(self) -> dict[str, Any]:
        f1 = self._get_feed()
        data = assertget(f1, "data")
        optafeed = assertget(data, "OptaFeed")
        optadocument = assertget(optafeed, "OptaDocument")
        return optadocument

    def extract_competitions(self) -> dict[tuple[int, int], dict[str, Any]]:
        """Return a dictionary with all available competitions.

        Returns
        -------
        dict
            A mapping between (competion ID, season ID) tuples and the
            information available about each competition in the data stream.
        """
        optadocument = self._get_doc()
        attr = assertget(optadocument, "@attributes")
        competition_id = int(assertget(attr, "competition_id"))
        season_id = int(assertget(attr, "season_id"))
        competition = {
            # Fields required by the base schema
            "season_id": season_id,
            "season_name": str(assertget(attr, "season_id")),
            "competition_id": competition_id,
            "competition_name": assertget(attr, "competition_name"),
        }
        return {(competition_id, season_id): competition}

    def extract_games(self) -> dict[int, dict[str, Any]]:
        """Return a dictionary with all available games.

        Returns
        -------
        dict
            A mapping between game IDs and the information available about
            each game in the data stream.
        """
        optadocument = self._get_doc()
        attr = assertget(optadocument, "@attributes")
        matchdata = assertget(optadocument, "MatchData")
        matches = {}
        for match in matchdata:
            matchattr = assertget(match, "@attributes")
            matchinfo = assertget(match, "MatchInfo")
            matchinfoattr = assertget(matchinfo, "@attributes")
            game_id = int(assertget(matchattr, "uID")[1:])
            matches[game_id] = {
                # Fields required by the base schema
                "game_id": game_id,
                "competition_id": int(assertget(attr, "competition_id")),
                "season_id": int(assertget(attr, "season_id")),
                "game_day": int(assertget(matchinfoattr, "MatchDay")),
                "game_date": datetime.strptime(assertget(matchinfo, "Date"), "%Y-%m-%d %H:%M:%S"),
                # home_team_id=see below,
                # away_team_id=see below,
                # Optional fields
                # home_score=see below,
                # away_score=see below,
                # duration=?
                # referee=?
                # venue=?,
                # attendance=?
                # home_manager=?
                # away_manager=?
            }
            teamdata = assertget(match, "TeamData")
            for team in teamdata:
                teamattr = assertget(team, "@attributes")
                side = assertget(teamattr, "Side")
                teamid = assertget(teamattr, "TeamRef")
                score = assertget(teamattr, "Score")
                if side == "Home":
                    matches[game_id]["home_team_id"] = int(teamid[1:])
                    matches[game_id]["home_score"] = int(score)
                else:
                    matches[game_id]["away_team_id"] = int(teamid[1:])
                    matches[game_id]["away_score"] = int(score)
        return matches