File size: 4,150 Bytes
d6ea71e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 |
import os
from datetime import datetime
import pandas as pd
from pytest import fixture
from socceraction.data.opta import (
OptaCompetitionSchema,
OptaEventSchema,
OptaGameSchema,
OptaPlayerSchema,
OptaTeamSchema,
)
from socceraction.data.opta.parsers import MA3JSONParser
@fixture()
def ma3json_parser() -> MA3JSONParser:
path = os.path.join(
os.path.dirname(__file__),
os.pardir,
os.pardir,
os.pardir,
"datasets",
"opta",
"ma3_bl2020-21-0000000066.json",
)
return MA3JSONParser(str(path))
def test_extract_competitions(ma3json_parser: MA3JSONParser) -> None:
competitions = ma3json_parser.extract_competitions()
assert len(competitions) == 1
assert competitions[("722fdbecxzcq9788l6jqclzlw", "7u6i088r32wrl84442qxr0gh6")] == {
"competition_id": "722fdbecxzcq9788l6jqclzlw",
"season_id": "7u6i088r32wrl84442qxr0gh6",
"competition_name": "2. Bundesliga",
"season_name": "2020/2021",
}
OptaCompetitionSchema.validate(pd.DataFrame.from_dict(competitions, orient="index"))
def test_extract_games(ma3json_parser: MA3JSONParser) -> None:
games = ma3json_parser.extract_games()
assert len(games) == 1
assert games["bl2020-21-0000000066"] == {
"game_id": "bl2020-21-0000000066",
"season_id": "7u6i088r32wrl84442qxr0gh6",
"competition_id": "722fdbecxzcq9788l6jqclzlw",
"game_day": 8,
"game_date": datetime(2020, 11, 21, 13, 00),
"home_team_id": "kxpw3rqn4ukt7nqmtjj62lbn",
"away_team_id": "aojwbjr39s1w2mcd9l2bf2dhk",
"home_score": 2,
"away_score": 2,
"duration": 93,
"venue": "Wildparkstadion",
}
OptaGameSchema.validate(pd.DataFrame.from_dict(games, orient="index"))
def test_extract_teams(ma3json_parser: MA3JSONParser) -> None:
teams = ma3json_parser.extract_teams()
assert len(teams) == 2
assert teams["kxpw3rqn4ukt7nqmtjj62lbn"] == {
"team_id": "kxpw3rqn4ukt7nqmtjj62lbn",
"team_name": "Eintracht Braunschweig",
}
assert teams["aojwbjr39s1w2mcd9l2bf2dhk"] == {
"team_id": "aojwbjr39s1w2mcd9l2bf2dhk",
"team_name": "Karlsruher SC",
}
OptaTeamSchema.validate(pd.DataFrame.from_dict(teams, orient="index"))
def test_extract_players(ma3json_parser: MA3JSONParser) -> None:
players = ma3json_parser.extract_players()
assert len(players) == 28
assert players[("bl2020-21-0000000066", "yuw4a34cpasw5e4vqsg6ex1x")] == {
"game_id": "bl2020-21-0000000066",
"player_id": "yuw4a34cpasw5e4vqsg6ex1x",
"player_name": "D. Diamantakos",
"team_id": "aojwbjr39s1w2mcd9l2bf2dhk",
"jersey_number": 9,
"minutes_played": 36,
"starting_position": "Substitute",
"is_starter": False,
}
# red card
assert players[("bl2020-21-0000000066", "2175hvbfk4jn4lnj3cetfpp1")]["minutes_played"] == 59
OptaPlayerSchema.validate(pd.DataFrame.from_dict(players, orient="index"))
def test_extract_events(ma3json_parser: MA3JSONParser) -> None:
events = ma3json_parser.extract_events()
assert len(events) == 1955
assert events[("bl2020-21-0000000066", 1760864446)] == {
"game_id": "bl2020-21-0000000066",
"event_id": 1760864446,
"period_id": 2,
"team_id": "kxpw3rqn4ukt7nqmtjj62lbn",
"player_id": "61xxo4zsk6hby0swa756l3wlx",
"type_id": 1,
"timestamp": datetime(2016, 2, 20, 13, 14, 21, 606000),
"minute": 56,
"second": 40,
"outcome": False,
"start_x": 31.8,
"start_y": 2.6,
"end_x": 80.0,
"end_y": 5.6,
"qualifiers": {
1: None,
5: None,
56: "Right",
140: "80.0",
213: "0.0",
152: None,
141: "5.6",
157: None,
212: "50.7",
307: "793",
},
"assist": False,
"keypass": False,
}
df = pd.DataFrame.from_dict(events, orient="index")
df["type_name"] = "Added later"
OptaEventSchema.validate(df)
|