import os from datetime import datetime import pandas as pd from pytest import fixture from socceraction.data.opta import ( OptaCompetitionSchema, OptaGameSchema, OptaPlayerSchema, OptaTeamSchema, ) from socceraction.data.opta.parsers import F7XMLParser @fixture() def f7xml_parser() -> F7XMLParser: path = os.path.join( os.path.dirname(__file__), os.pardir, os.pardir, os.pardir, "datasets", "opta", "f7-23-2018-1009316-matchresults.xml", ) return F7XMLParser(str(path)) def test_extract_competitions(f7xml_parser: F7XMLParser) -> None: competitions = f7xml_parser.extract_competitions() assert len(competitions) == 1 assert competitions[(23, 2018)] == { "competition_id": 23, "season_id": 2018, "competition_name": "Spanish La Liga", "season_name": "Season 2018/2019", } OptaCompetitionSchema.validate(pd.DataFrame.from_dict(competitions, orient="index")) def test_extract_games(f7xml_parser: F7XMLParser) -> None: games = f7xml_parser.extract_games() assert len(games) == 1 assert games[1009316] == { "game_id": 1009316, "season_id": 2018, "competition_id": 23, "game_day": 1, "game_date": datetime(2018, 8, 20, 21, 0), "home_team_id": 174, "away_team_id": 957, "home_score": 2, "away_score": 1, "duration": 96, "referee": "Adrián Cordero Vega", "venue": "San Mamés", "attendance": 38575, "home_manager": "Eduardo Berizzo", "away_manager": "Mauricio Pellegrino", } OptaGameSchema.validate(pd.DataFrame.from_dict(games, orient="index")) def test_extract_teams(f7xml_parser: F7XMLParser) -> None: teams = f7xml_parser.extract_teams() assert len(teams) == 2 assert teams[957] == { "team_id": 957, "team_name": "Leganés", } assert teams[174] == { "team_id": 174, "team_name": "Athletic Club", } OptaTeamSchema.validate(pd.DataFrame.from_dict(teams, orient="index")) def test_extract_players(f7xml_parser: F7XMLParser) -> None: players = f7xml_parser.extract_players() assert len(players) == 36 assert players[(1009316, 242831)] == { "game_id": 1009316, "team_id": 174, "player_id": 242831, "player_name": "Peru Nolaskoain", "is_starter": True, "minutes_played": 96, "jersey_number": 31, "starting_position": "Defender", } OptaPlayerSchema.validate(pd.DataFrame.from_dict(players, orient="index"))