File size: 3,818 Bytes
d6ea71e |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 |
import os
import pytest
from socceraction.data import wyscout as wy
from socceraction.data.wyscout import (
WyscoutCompetitionSchema,
WyscoutEventSchema,
WyscoutGameSchema,
WyscoutPlayerSchema,
WyscoutTeamSchema,
)
class TestPublicWyscoutLoader:
def setup_method(self) -> None:
data_dir = os.path.join(
os.path.dirname(__file__), os.pardir, "datasets", "wyscout_public", "raw"
)
self.WSL = wy.PublicWyscoutLoader(root=data_dir, download=False)
def test_competitions(self) -> None:
df_competitions = self.WSL.competitions()
assert len(df_competitions) > 0
WyscoutCompetitionSchema.validate(df_competitions)
def test_matches(self) -> None:
df_matches = self.WSL.games(28, 10078) # World Cup, 2018
assert len(df_matches) == 64
WyscoutGameSchema.validate(df_matches)
def test_teams(self) -> None:
df_teams = self.WSL.teams(2058007)
assert len(df_teams) == 2
WyscoutTeamSchema.validate(df_teams)
def test_players(self) -> None:
df_players = self.WSL.players(2058007)
assert len(df_players) == 26
assert df_players.minutes_played.sum() == 22 * 96
WyscoutPlayerSchema.validate(df_players)
def test_players_with_missing_id(self) -> None:
# The substituted player(s) are sometimes missing
# See https://github.com/ML-KULeuven/socceraction/issues/276
with pytest.warns(UserWarning):
self.WSL.players(2576016)
def test_minutes_played(self) -> None:
# Injury time should be added
df_players = self.WSL.players(2058007).set_index("player_id")
assert df_players.at[122, "minutes_played"] == 66
assert df_players.at[8249, "minutes_played"] == 96 - 66
# Penalty shoot-outs should no be added
df_players = self.WSL.players(2058005).set_index("player_id")
assert df_players.minutes_played.sum() / 22 == 127
# COL - JAP: red card in '3
df_players = self.WSL.players(2057997).set_index("player_id")
assert df_players.at[26518, "minutes_played"] == 3
# GER - SWE: double yellow card in '82 + 2' injury time
df_players = self.WSL.players(2057986).set_index("player_id")
assert df_players.at[14716, "minutes_played"] == 84
def test_events(self) -> None:
df_events = self.WSL.events(2058007)
assert len(df_events) > 0
WyscoutEventSchema.validate(df_events)
class TestWyscoutLoader:
def setup_method(self) -> None:
data_dir = os.path.join(os.path.dirname(__file__), os.pardir, "datasets", "wyscout_api")
feeds = {
"competitions": "competitions.json",
"seasons": "seasons_{competition_id}.json",
# "games": "matches_{season_id}.json",
"events": "events_{game_id}.json",
}
self.WSL = wy.WyscoutLoader(root=data_dir, getter="local", feeds=feeds)
def test_competitions(self) -> None:
df_competitions = self.WSL.competitions()
assert len(df_competitions) > 0
WyscoutCompetitionSchema.validate(df_competitions)
def test_matches(self) -> None:
df_matches = self.WSL.games(10, 10174)
assert len(df_matches) == 1
WyscoutGameSchema.validate(df_matches)
def test_teams(self) -> None:
df_teams = self.WSL.teams(2852835)
assert len(df_teams) == 2
WyscoutTeamSchema.validate(df_teams)
def test_players(self) -> None:
df_players = self.WSL.players(2852835)
assert len(df_players) == 30
WyscoutPlayerSchema.validate(df_players)
def test_events(self) -> None:
df_events = self.WSL.events(2852835)
assert len(df_events) > 0
WyscoutEventSchema.validate(df_events)
|