|
import json |
|
import os |
|
from datetime import datetime |
|
|
|
import pandas as pd |
|
import pytest |
|
from py.path import local |
|
from pytest import fixture |
|
from socceraction.data.base import MissingDataError |
|
from socceraction.data.opta import ( |
|
OptaEventSchema, |
|
OptaGameSchema, |
|
OptaPlayerSchema, |
|
OptaTeamSchema, |
|
) |
|
from socceraction.data.opta.parsers import WhoScoredParser |
|
|
|
|
|
@fixture() |
|
def whoscored_parser() -> WhoScoredParser: |
|
path = os.path.join( |
|
os.path.dirname(__file__), |
|
os.pardir, |
|
os.pardir, |
|
os.pardir, |
|
"datasets", |
|
"whoscored", |
|
"1005916.json", |
|
) |
|
return WhoScoredParser(str(path), competition_id=5, season_id=1516, game_id=1005916) |
|
|
|
|
|
def test_extract_competition_id(tmpdir: local) -> None: |
|
path = os.path.join( |
|
os.path.dirname(__file__), |
|
os.pardir, |
|
os.pardir, |
|
os.pardir, |
|
"datasets", |
|
"whoscored", |
|
"1005916.json", |
|
) |
|
|
|
parser = WhoScoredParser(path, competition_id=1234, season_id=1516, game_id=1005916) |
|
assert parser.competition_id == 1234 |
|
|
|
parser = WhoScoredParser(path, competition_id=None, season_id=1516, game_id=1005916) |
|
assert parser.competition_id == 5 |
|
|
|
p = tmpdir.join("1005916.json") |
|
p.write(json.dumps({})) |
|
with pytest.raises(MissingDataError): |
|
WhoScoredParser(str(p), competition_id=None, season_id=1516, game_id=1005916) |
|
|
|
|
|
def test_extract_season_id(tmpdir: local) -> None: |
|
path = os.path.join( |
|
os.path.dirname(__file__), |
|
os.pardir, |
|
os.pardir, |
|
os.pardir, |
|
"datasets", |
|
"whoscored", |
|
"1005916.json", |
|
) |
|
|
|
parser = WhoScoredParser(path, competition_id=5, season_id=1234, game_id=1005916) |
|
assert parser.season_id == 1234 |
|
|
|
parser = WhoScoredParser(path, competition_id=5, season_id=None, game_id=1005916) |
|
assert parser.season_id == 1516 |
|
|
|
p = tmpdir.join("1005916.json") |
|
p.write(json.dumps({})) |
|
with pytest.raises(MissingDataError): |
|
WhoScoredParser(str(p), competition_id=5, season_id=None, game_id=1005916) |
|
|
|
|
|
def test_extract_game_id(tmpdir: local) -> None: |
|
path = os.path.join( |
|
os.path.dirname(__file__), |
|
os.pardir, |
|
os.pardir, |
|
os.pardir, |
|
"datasets", |
|
"whoscored", |
|
"1005916.json", |
|
) |
|
|
|
parser = WhoScoredParser(path, competition_id=5, season_id=1516, game_id=1234) |
|
assert parser.game_id == 1234 |
|
|
|
parser = WhoScoredParser(path, competition_id=5, season_id=1516, game_id=None) |
|
assert parser.game_id == 1005916 |
|
|
|
p = tmpdir.join("1005916.json") |
|
p.write(json.dumps({})) |
|
with pytest.raises(MissingDataError): |
|
WhoScoredParser(str(p), competition_id=5, season_id=1516, game_id=None) |
|
|
|
|
|
def test_extract_games(whoscored_parser: WhoScoredParser) -> None: |
|
games = whoscored_parser.extract_games() |
|
assert len(games) == 1 |
|
assert games[1005916] == { |
|
"game_id": 1005916, |
|
"season_id": 1516, |
|
"competition_id": 5, |
|
"game_day": None, |
|
"game_date": datetime(2015, 8, 23, 19, 45), |
|
"home_team_id": 272, |
|
"away_team_id": 267, |
|
"home_score": 1, |
|
"away_score": 3, |
|
"duration": 96, |
|
"venue": "Carlo Castellani", |
|
"attendance": 7309, |
|
"referee": "Maurizio Mariani", |
|
"home_manager": "Marco Giampaolo", |
|
"away_manager": "Rolando Maran", |
|
} |
|
OptaGameSchema.validate(pd.DataFrame.from_dict(games, orient="index")) |
|
|
|
|
|
def test_extract_teams(whoscored_parser: WhoScoredParser) -> None: |
|
teams = whoscored_parser.extract_teams() |
|
assert len(teams) == 2 |
|
assert teams[272] == { |
|
"team_id": 272, |
|
"team_name": "Empoli", |
|
} |
|
assert teams[267] == { |
|
"team_id": 267, |
|
"team_name": "Chievo", |
|
} |
|
OptaTeamSchema.validate(pd.DataFrame.from_dict(teams, orient="index")) |
|
|
|
|
|
def test_extract_players(whoscored_parser: WhoScoredParser) -> None: |
|
players = whoscored_parser.extract_players() |
|
assert len(players) == 21 + 23 |
|
assert players[(1005916, 4444)] == { |
|
"game_id": 1005916, |
|
"team_id": 267, |
|
"player_id": 4444, |
|
"player_name": "Albano Bizzarri", |
|
"is_starter": True, |
|
"minutes_played": 96, |
|
"jersey_number": 1, |
|
"starting_position": "GK", |
|
} |
|
OptaPlayerSchema.validate(pd.DataFrame.from_dict(players, orient="index")) |
|
|
|
|
|
def test_extract_events(whoscored_parser: WhoScoredParser) -> None: |
|
events = whoscored_parser.extract_events() |
|
assert len(events) == 1562 |
|
assert events[(1005916, 832925173)] == { |
|
"game_id": 1005916, |
|
"event_id": 832925173, |
|
"period_id": 1, |
|
"team_id": 272, |
|
"player_id": 128778, |
|
"type_id": 1, |
|
"timestamp": datetime(2015, 8, 23, 19, 45, 1), |
|
"minute": 0, |
|
"second": 1, |
|
"outcome": True, |
|
"start_x": 50.9, |
|
"start_y": 48.8, |
|
"end_x": 35.9, |
|
"end_y": 49.8, |
|
"qualifiers": {56: "Back", 140: "35.9", 141: "49.8", 212: "15.8", 213: "3.1"}, |
|
"related_player_id": None, |
|
"goal": False, |
|
"shot": False, |
|
"touch": True, |
|
} |
|
df = pd.DataFrame.from_dict(events, orient="index") |
|
df["type_name"] = "Added later" |
|
OptaEventSchema.validate(df) |
|
|
|
|
|
def test_extract_substitutions(whoscored_parser: WhoScoredParser) -> None: |
|
substitutions = whoscored_parser.extract_substitutions() |
|
assert len(substitutions) == 6 |
|
assert substitutions[(1005916, 294162)] == { |
|
"game_id": 1005916, |
|
"team_id": 272, |
|
"period_id": 2, |
|
"period_milliseconds": 1693000, |
|
"player_in_id": 294162, |
|
"player_out_id": 260588, |
|
} |
|
|
|
|
|
def test_extract_positions(whoscored_parser: WhoScoredParser) -> None: |
|
positions = whoscored_parser.extract_positions() |
|
assert len(positions) == 88 |
|
assert positions[(1005916, 4444, 0)] == { |
|
"game_id": 1005916, |
|
"team_id": 267, |
|
"player_id": 4444, |
|
"period_id": 1, |
|
"period_milliseconds": 0, |
|
"start_milliseconds": 0, |
|
"end_milliseconds": 2520000, |
|
"formation_scheme": "442", |
|
"player_position": "GK", |
|
"player_position_x": 0.0, |
|
"player_position_y": 5.0, |
|
} |
|
|
|
|
|
def test_extract_teamgamestats(whoscored_parser: WhoScoredParser) -> None: |
|
teamgamestats = whoscored_parser.extract_teamgamestats() |
|
assert len(teamgamestats) == 2 |
|
assert teamgamestats[(1005916, 272)]["game_id"] == 1005916 |
|
assert teamgamestats[(1005916, 272)]["team_id"] == 272 |
|
assert teamgamestats[(1005916, 272)]["side"] == "home" |
|
assert teamgamestats[(1005916, 272)]["score"] == 1 |
|
assert teamgamestats[(1005916, 272)]["shootout_score"] is None |
|
assert teamgamestats[(1005916, 272)]["aerials_total"] == 34 |
|
assert teamgamestats[(1005916, 272)]["aerials_won"] == 10 |
|
assert "aerials_success" not in teamgamestats[(1005916, 272)] |
|
|
|
|
|
def test_extract_playergamestats(whoscored_parser: WhoScoredParser) -> None: |
|
playergamestats = whoscored_parser.extract_playergamestats() |
|
assert len(playergamestats) == 21 + 23 |
|
assert playergamestats[(1005916, 90878)]["game_id"] == 1005916 |
|
assert playergamestats[(1005916, 90878)]["team_id"] == 272 |
|
assert playergamestats[(1005916, 90878)]["player_id"] == 90878 |
|
assert playergamestats[(1005916, 90878)]["mvp"] is False |
|
assert playergamestats[(1005916, 90878)]["minute_start"] == 0 |
|
assert playergamestats[(1005916, 90878)]["minute_end"] == 96 |
|
assert playergamestats[(1005916, 90878)]["minutes_played"] == 96 |
|
assert playergamestats[(1005916, 90878)]["passes_total"] == 47 |
|
assert playergamestats[(1005916, 90878)]["passes_accurate"] == 37 |
|
assert "pass_success" not in playergamestats[(1005916, 90878)] |
|
|