socr / tests /data /opta /parsers /test_whoscored.py
scfive's picture
Upload 203 files
d6ea71e verified
import json
import os
from datetime import datetime
import pandas as pd
import pytest
from py.path import local
from pytest import fixture
from socceraction.data.base import MissingDataError
from socceraction.data.opta import (
OptaEventSchema,
OptaGameSchema,
OptaPlayerSchema,
OptaTeamSchema,
)
from socceraction.data.opta.parsers import WhoScoredParser
@fixture()
def whoscored_parser() -> WhoScoredParser:
path = os.path.join(
os.path.dirname(__file__),
os.pardir,
os.pardir,
os.pardir,
"datasets",
"whoscored",
"1005916.json",
)
return WhoScoredParser(str(path), competition_id=5, season_id=1516, game_id=1005916)
def test_extract_competition_id(tmpdir: local) -> None:
path = os.path.join(
os.path.dirname(__file__),
os.pardir,
os.pardir,
os.pardir,
"datasets",
"whoscored",
"1005916.json",
)
# Read from parameter is the default
parser = WhoScoredParser(path, competition_id=1234, season_id=1516, game_id=1005916)
assert parser.competition_id == 1234
# Read from stream
parser = WhoScoredParser(path, competition_id=None, season_id=1516, game_id=1005916)
assert parser.competition_id == 5
# Raise error when not in stream
p = tmpdir.join("1005916.json")
p.write(json.dumps({}))
with pytest.raises(MissingDataError):
WhoScoredParser(str(p), competition_id=None, season_id=1516, game_id=1005916)
def test_extract_season_id(tmpdir: local) -> None:
path = os.path.join(
os.path.dirname(__file__),
os.pardir,
os.pardir,
os.pardir,
"datasets",
"whoscored",
"1005916.json",
)
# Read from parameter is the default
parser = WhoScoredParser(path, competition_id=5, season_id=1234, game_id=1005916)
assert parser.season_id == 1234
# Read from stream
parser = WhoScoredParser(path, competition_id=5, season_id=None, game_id=1005916)
assert parser.season_id == 1516
# Raise error when not in stream
p = tmpdir.join("1005916.json")
p.write(json.dumps({}))
with pytest.raises(MissingDataError):
WhoScoredParser(str(p), competition_id=5, season_id=None, game_id=1005916)
def test_extract_game_id(tmpdir: local) -> None:
path = os.path.join(
os.path.dirname(__file__),
os.pardir,
os.pardir,
os.pardir,
"datasets",
"whoscored",
"1005916.json",
)
# Read from parameter is the default
parser = WhoScoredParser(path, competition_id=5, season_id=1516, game_id=1234)
assert parser.game_id == 1234
# Read from stream
parser = WhoScoredParser(path, competition_id=5, season_id=1516, game_id=None)
assert parser.game_id == 1005916
# Raise error when not in stream
p = tmpdir.join("1005916.json")
p.write(json.dumps({}))
with pytest.raises(MissingDataError):
WhoScoredParser(str(p), competition_id=5, season_id=1516, game_id=None)
def test_extract_games(whoscored_parser: WhoScoredParser) -> None:
games = whoscored_parser.extract_games()
assert len(games) == 1
assert games[1005916] == {
"game_id": 1005916,
"season_id": 1516,
"competition_id": 5,
"game_day": None,
"game_date": datetime(2015, 8, 23, 19, 45),
"home_team_id": 272,
"away_team_id": 267,
"home_score": 1,
"away_score": 3,
"duration": 96,
"venue": "Carlo Castellani",
"attendance": 7309,
"referee": "Maurizio Mariani",
"home_manager": "Marco Giampaolo",
"away_manager": "Rolando Maran",
}
OptaGameSchema.validate(pd.DataFrame.from_dict(games, orient="index"))
def test_extract_teams(whoscored_parser: WhoScoredParser) -> None:
teams = whoscored_parser.extract_teams()
assert len(teams) == 2
assert teams[272] == {
"team_id": 272,
"team_name": "Empoli",
}
assert teams[267] == {
"team_id": 267,
"team_name": "Chievo",
}
OptaTeamSchema.validate(pd.DataFrame.from_dict(teams, orient="index"))
def test_extract_players(whoscored_parser: WhoScoredParser) -> None:
players = whoscored_parser.extract_players()
assert len(players) == 21 + 23
assert players[(1005916, 4444)] == {
"game_id": 1005916,
"team_id": 267,
"player_id": 4444,
"player_name": "Albano Bizzarri",
"is_starter": True,
"minutes_played": 96,
"jersey_number": 1,
"starting_position": "GK",
}
OptaPlayerSchema.validate(pd.DataFrame.from_dict(players, orient="index"))
def test_extract_events(whoscored_parser: WhoScoredParser) -> None:
events = whoscored_parser.extract_events()
assert len(events) == 1562
assert events[(1005916, 832925173)] == {
"game_id": 1005916,
"event_id": 832925173,
"period_id": 1,
"team_id": 272,
"player_id": 128778,
"type_id": 1,
"timestamp": datetime(2015, 8, 23, 19, 45, 1),
"minute": 0,
"second": 1,
"outcome": True,
"start_x": 50.9,
"start_y": 48.8,
"end_x": 35.9,
"end_y": 49.8,
"qualifiers": {56: "Back", 140: "35.9", 141: "49.8", 212: "15.8", 213: "3.1"},
"related_player_id": None,
"goal": False,
"shot": False,
"touch": True,
}
df = pd.DataFrame.from_dict(events, orient="index")
df["type_name"] = "Added later"
OptaEventSchema.validate(df)
def test_extract_substitutions(whoscored_parser: WhoScoredParser) -> None:
substitutions = whoscored_parser.extract_substitutions()
assert len(substitutions) == 6
assert substitutions[(1005916, 294162)] == {
"game_id": 1005916,
"team_id": 272,
"period_id": 2,
"period_milliseconds": 1693000,
"player_in_id": 294162,
"player_out_id": 260588,
}
def test_extract_positions(whoscored_parser: WhoScoredParser) -> None:
positions = whoscored_parser.extract_positions()
assert len(positions) == 88
assert positions[(1005916, 4444, 0)] == {
"game_id": 1005916,
"team_id": 267,
"player_id": 4444,
"period_id": 1,
"period_milliseconds": 0,
"start_milliseconds": 0,
"end_milliseconds": 2520000,
"formation_scheme": "442",
"player_position": "GK",
"player_position_x": 0.0,
"player_position_y": 5.0,
}
def test_extract_teamgamestats(whoscored_parser: WhoScoredParser) -> None:
teamgamestats = whoscored_parser.extract_teamgamestats()
assert len(teamgamestats) == 2
assert teamgamestats[(1005916, 272)]["game_id"] == 1005916
assert teamgamestats[(1005916, 272)]["team_id"] == 272
assert teamgamestats[(1005916, 272)]["side"] == "home"
assert teamgamestats[(1005916, 272)]["score"] == 1
assert teamgamestats[(1005916, 272)]["shootout_score"] is None
assert teamgamestats[(1005916, 272)]["aerials_total"] == 34
assert teamgamestats[(1005916, 272)]["aerials_won"] == 10
assert "aerials_success" not in teamgamestats[(1005916, 272)]
def test_extract_playergamestats(whoscored_parser: WhoScoredParser) -> None:
playergamestats = whoscored_parser.extract_playergamestats()
assert len(playergamestats) == 21 + 23
assert playergamestats[(1005916, 90878)]["game_id"] == 1005916
assert playergamestats[(1005916, 90878)]["team_id"] == 272
assert playergamestats[(1005916, 90878)]["player_id"] == 90878
assert playergamestats[(1005916, 90878)]["mvp"] is False
assert playergamestats[(1005916, 90878)]["minute_start"] == 0
assert playergamestats[(1005916, 90878)]["minute_end"] == 96
assert playergamestats[(1005916, 90878)]["minutes_played"] == 96
assert playergamestats[(1005916, 90878)]["passes_total"] == 47
assert playergamestats[(1005916, 90878)]["passes_accurate"] == 37
assert "pass_success" not in playergamestats[(1005916, 90878)]