File size: 3,818 Bytes
d6ea71e
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
import os

import pytest
from socceraction.data import wyscout as wy
from socceraction.data.wyscout import (
    WyscoutCompetitionSchema,
    WyscoutEventSchema,
    WyscoutGameSchema,
    WyscoutPlayerSchema,
    WyscoutTeamSchema,
)


class TestPublicWyscoutLoader:
    def setup_method(self) -> None:
        data_dir = os.path.join(
            os.path.dirname(__file__), os.pardir, "datasets", "wyscout_public", "raw"
        )
        self.WSL = wy.PublicWyscoutLoader(root=data_dir, download=False)

    def test_competitions(self) -> None:
        df_competitions = self.WSL.competitions()
        assert len(df_competitions) > 0
        WyscoutCompetitionSchema.validate(df_competitions)

    def test_matches(self) -> None:
        df_matches = self.WSL.games(28, 10078)  # World Cup, 2018
        assert len(df_matches) == 64
        WyscoutGameSchema.validate(df_matches)

    def test_teams(self) -> None:
        df_teams = self.WSL.teams(2058007)
        assert len(df_teams) == 2
        WyscoutTeamSchema.validate(df_teams)

    def test_players(self) -> None:
        df_players = self.WSL.players(2058007)
        assert len(df_players) == 26
        assert df_players.minutes_played.sum() == 22 * 96
        WyscoutPlayerSchema.validate(df_players)

    def test_players_with_missing_id(self) -> None:
        # The substituted player(s) are sometimes missing
        # See https://github.com/ML-KULeuven/socceraction/issues/276
        with pytest.warns(UserWarning):
            self.WSL.players(2576016)

    def test_minutes_played(self) -> None:
        # Injury time should be added
        df_players = self.WSL.players(2058007).set_index("player_id")
        assert df_players.at[122, "minutes_played"] == 66
        assert df_players.at[8249, "minutes_played"] == 96 - 66
        # Penalty shoot-outs should no be added
        df_players = self.WSL.players(2058005).set_index("player_id")
        assert df_players.minutes_played.sum() / 22 == 127
        # COL - JAP: red card in '3
        df_players = self.WSL.players(2057997).set_index("player_id")
        assert df_players.at[26518, "minutes_played"] == 3
        # GER - SWE: double yellow card in '82 + 2' injury time
        df_players = self.WSL.players(2057986).set_index("player_id")
        assert df_players.at[14716, "minutes_played"] == 84

    def test_events(self) -> None:
        df_events = self.WSL.events(2058007)
        assert len(df_events) > 0
        WyscoutEventSchema.validate(df_events)


class TestWyscoutLoader:
    def setup_method(self) -> None:
        data_dir = os.path.join(os.path.dirname(__file__), os.pardir, "datasets", "wyscout_api")
        feeds = {
            "competitions": "competitions.json",
            "seasons": "seasons_{competition_id}.json",
            # "games": "matches_{season_id}.json",
            "events": "events_{game_id}.json",
        }
        self.WSL = wy.WyscoutLoader(root=data_dir, getter="local", feeds=feeds)

    def test_competitions(self) -> None:
        df_competitions = self.WSL.competitions()
        assert len(df_competitions) > 0
        WyscoutCompetitionSchema.validate(df_competitions)

    def test_matches(self) -> None:
        df_matches = self.WSL.games(10, 10174)
        assert len(df_matches) == 1
        WyscoutGameSchema.validate(df_matches)

    def test_teams(self) -> None:
        df_teams = self.WSL.teams(2852835)
        assert len(df_teams) == 2
        WyscoutTeamSchema.validate(df_teams)

    def test_players(self) -> None:
        df_players = self.WSL.players(2852835)
        assert len(df_players) == 30
        WyscoutPlayerSchema.validate(df_players)

    def test_events(self) -> None:
        df_events = self.WSL.events(2852835)
        assert len(df_events) > 0
        WyscoutEventSchema.validate(df_events)