socr / tests /data /opta /parsers /test_ma3_json.py
scfive's picture
Upload 203 files
d6ea71e verified
import os
from datetime import datetime
import pandas as pd
from pytest import fixture
from socceraction.data.opta import (
OptaCompetitionSchema,
OptaEventSchema,
OptaGameSchema,
OptaPlayerSchema,
OptaTeamSchema,
)
from socceraction.data.opta.parsers import MA3JSONParser
@fixture()
def ma3json_parser() -> MA3JSONParser:
path = os.path.join(
os.path.dirname(__file__),
os.pardir,
os.pardir,
os.pardir,
"datasets",
"opta",
"ma3_bl2020-21-0000000066.json",
)
return MA3JSONParser(str(path))
def test_extract_competitions(ma3json_parser: MA3JSONParser) -> None:
competitions = ma3json_parser.extract_competitions()
assert len(competitions) == 1
assert competitions[("722fdbecxzcq9788l6jqclzlw", "7u6i088r32wrl84442qxr0gh6")] == {
"competition_id": "722fdbecxzcq9788l6jqclzlw",
"season_id": "7u6i088r32wrl84442qxr0gh6",
"competition_name": "2. Bundesliga",
"season_name": "2020/2021",
}
OptaCompetitionSchema.validate(pd.DataFrame.from_dict(competitions, orient="index"))
def test_extract_games(ma3json_parser: MA3JSONParser) -> None:
games = ma3json_parser.extract_games()
assert len(games) == 1
assert games["bl2020-21-0000000066"] == {
"game_id": "bl2020-21-0000000066",
"season_id": "7u6i088r32wrl84442qxr0gh6",
"competition_id": "722fdbecxzcq9788l6jqclzlw",
"game_day": 8,
"game_date": datetime(2020, 11, 21, 13, 00),
"home_team_id": "kxpw3rqn4ukt7nqmtjj62lbn",
"away_team_id": "aojwbjr39s1w2mcd9l2bf2dhk",
"home_score": 2,
"away_score": 2,
"duration": 93,
"venue": "Wildparkstadion",
}
OptaGameSchema.validate(pd.DataFrame.from_dict(games, orient="index"))
def test_extract_teams(ma3json_parser: MA3JSONParser) -> None:
teams = ma3json_parser.extract_teams()
assert len(teams) == 2
assert teams["kxpw3rqn4ukt7nqmtjj62lbn"] == {
"team_id": "kxpw3rqn4ukt7nqmtjj62lbn",
"team_name": "Eintracht Braunschweig",
}
assert teams["aojwbjr39s1w2mcd9l2bf2dhk"] == {
"team_id": "aojwbjr39s1w2mcd9l2bf2dhk",
"team_name": "Karlsruher SC",
}
OptaTeamSchema.validate(pd.DataFrame.from_dict(teams, orient="index"))
def test_extract_players(ma3json_parser: MA3JSONParser) -> None:
players = ma3json_parser.extract_players()
assert len(players) == 28
assert players[("bl2020-21-0000000066", "yuw4a34cpasw5e4vqsg6ex1x")] == {
"game_id": "bl2020-21-0000000066",
"player_id": "yuw4a34cpasw5e4vqsg6ex1x",
"player_name": "D. Diamantakos",
"team_id": "aojwbjr39s1w2mcd9l2bf2dhk",
"jersey_number": 9,
"minutes_played": 36,
"starting_position": "Substitute",
"is_starter": False,
}
# red card
assert players[("bl2020-21-0000000066", "2175hvbfk4jn4lnj3cetfpp1")]["minutes_played"] == 59
OptaPlayerSchema.validate(pd.DataFrame.from_dict(players, orient="index"))
def test_extract_events(ma3json_parser: MA3JSONParser) -> None:
events = ma3json_parser.extract_events()
assert len(events) == 1955
assert events[("bl2020-21-0000000066", 1760864446)] == {
"game_id": "bl2020-21-0000000066",
"event_id": 1760864446,
"period_id": 2,
"team_id": "kxpw3rqn4ukt7nqmtjj62lbn",
"player_id": "61xxo4zsk6hby0swa756l3wlx",
"type_id": 1,
"timestamp": datetime(2016, 2, 20, 13, 14, 21, 606000),
"minute": 56,
"second": 40,
"outcome": False,
"start_x": 31.8,
"start_y": 2.6,
"end_x": 80.0,
"end_y": 5.6,
"qualifiers": {
1: None,
5: None,
56: "Right",
140: "80.0",
213: "0.0",
152: None,
141: "5.6",
157: None,
212: "50.7",
307: "793",
},
"assist": False,
"keypass": False,
}
df = pd.DataFrame.from_dict(events, orient="index")
df["type_name"] = "Added later"
OptaEventSchema.validate(df)