socr / tests /spadl /test_kloppy.py
scfive's picture
Upload 203 files
d6ea71e verified
import os
from typing import NamedTuple
import pandas as pd
import pytest
from kloppy import opta, statsbomb, wyscout
from kloppy.domain import Orientation
from pandas.testing import assert_frame_equal
from socceraction.data.opta import OptaLoader
from socceraction.data.statsbomb import StatsBombLoader
from socceraction.data.wyscout import PublicWyscoutLoader, WyscoutLoader
from socceraction.spadl import config as spadl
from socceraction.spadl import kloppy as kl
from socceraction.spadl import opta as spadl_opta
from socceraction.spadl import statsbomb as sb
from socceraction.spadl import wyscout as spadl_wyscout
pd.set_option("display.max_columns", None)
pd.set_option("display.max_rows", None)
class Dataset(NamedTuple):
kloppy: pd.DataFrame
socceraction: pd.DataFrame
@pytest.fixture(scope="session")
def statsbomb_actions() -> Dataset:
data_dir = os.path.join(os.path.dirname(__file__), os.pardir, "datasets", "statsbomb", "raw")
kloppy_dataset = statsbomb.load(
event_data=os.path.join(data_dir, "events", "7584.json"),
lineup_data=os.path.join(data_dir, "lineups", "7584.json"),
)
df_actions_kl = kl.convert_to_actions(kloppy_dataset, game_id=7584)
SBL = StatsBombLoader(root=data_dir, getter="local")
df_actions_sa = sb.convert_to_actions(SBL.events(7584), 782)
return Dataset(df_actions_kl, df_actions_sa)
@pytest.mark.parametrize("actiontype", spadl.actiontypes)
def test_kloppy_to_actions_statsbomb(statsbomb_actions: Dataset, actiontype: str) -> None:
# columns to compare
cols = [
"game_id",
"original_event_id",
"period_id",
"time_seconds",
"team_id",
"player_id",
# 'start_x',
# 'start_y',
# 'end_x',
# 'end_y',
"type_id",
"result_id",
"bodypart_id",
# 'action_id',
]
# load statsbomb data using socceraction
sel_actions_sa = statsbomb_actions.socceraction.loc[
(statsbomb_actions.socceraction.type_id == spadl.actiontypes.index(actiontype)),
cols,
]
# load statsbomb data using kloppy
sel_actions_kl = statsbomb_actions.kloppy.loc[
(statsbomb_actions.kloppy.type_id == spadl.actiontypes.index(actiontype)),
cols,
].replace({"original_event_id": {"interception-": ""}}, regex=True)
# FIXME
sel_actions_sa["team_id"] = sel_actions_sa["team_id"].astype(str)
sel_actions_sa["player_id"] = sel_actions_sa["player_id"].astype("Int64").astype(str)
if actiontype in ["keeper_save", "keeper_punch"]:
sel_actions_sa["result_id"] = spadl.results.index("success")
#
print(
"These events should not be included",
set(sel_actions_kl.original_event_id) - set(sel_actions_sa.original_event_id),
)
print(
"These events are missing",
set(sel_actions_sa.original_event_id) - set(sel_actions_kl.original_event_id),
)
# compare the two datasets
assert_frame_equal(
sel_actions_kl.set_index("original_event_id"),
sel_actions_sa.set_index("original_event_id"),
)
@pytest.fixture(scope="session")
def opta_actions() -> Dataset:
data_dir = os.path.join(os.path.dirname(__file__), os.pardir, "datasets", "opta")
kloppy_dataset = opta.load(
f7_data=os.path.join(data_dir, "f7-23-2018-1009316-matchresults.xml"),
f24_data=os.path.join(data_dir, "f24-23-2018-1009316-eventdetails.xml"),
)
df_actions_kl = kl.convert_to_actions(kloppy_dataset, game_id=1009316)
loader = OptaLoader(
root=data_dir,
parser="xml",
feeds={
"f7": "f7-{competition_id}-{season_id}-{game_id}-matchresults.xml",
"f24": "f24-{competition_id}-{season_id}-{game_id}-eventdetails.xml",
},
)
df_actions_sa = spadl_opta.convert_to_actions(loader.events(1009316), 174)
return Dataset(df_actions_kl, df_actions_sa)
# def test_dummy_opta() -> None:
# data_dir = os.path.join(os.path.dirname(__file__), os.pardir, "datasets", "opta")
# kloppy_dataset = opta.load(
# f7_data=os.path.join(data_dir, "f7-23-2018-1009316-matchresults.xml"),
# f24_data=os.path.join(data_dir, "f24-23-2018-1009316-eventdetails.xml"),
# ).transform(
# to_orientation=Orientation.HOME_AWAY, # FIXME
# to_coordinate_system=kl._SoccerActionCoordinateSystem(),
# )
#
# EVENT_ID = "1189699160"
#
# event = kloppy_dataset.get_event_by_id(EVENT_ID)
# print("KLOPPY EVENT", event.qualifiers, "\n")
# loader = OptaLoader(
# root=data_dir,
# parser="xml",
# feeds={
# "f7": "f7-{competition_id}-{season_id}-{game_id}-matchresults.xml",
# "f24": "f24-{competition_id}-{season_id}-{game_id}-eventdetails.xml",
# },
# )
# df = loader.events(1009316)
# print("RAW EVENT", df.loc[df.event_id == int(EVENT_ID)], "\n")
#
# df_actions = spadl_opta.convert_to_actions(loader.events(1009316), 174)
# print("ACTION", df_actions.loc[df_actions.original_event_id == int(EVENT_ID)], "\n")
#
# assert False
@pytest.mark.skip(reason="not yet supported")
@pytest.mark.parametrize("actiontype", spadl.actiontypes)
def test_kloppy_to_actions_opta(opta_actions: Dataset, actiontype: str) -> None:
# columns to compare
cols = [
"game_id",
"original_event_id",
"period_id",
# 'time_seconds', # FIXME
"team_id",
"player_id",
# "start_x",
# "start_y",
# "end_x",
# "end_y",
"type_id",
"result_id",
"bodypart_id",
# 'action_id',
]
# load statsbomb data using socceraction
sel_actions_sa = opta_actions.socceraction.loc[
(opta_actions.socceraction.type_id == spadl.actiontypes.index(actiontype)),
cols,
]
# load statsbomb data using kloppy
sel_actions_kl = opta_actions.kloppy.loc[
(opta_actions.kloppy.type_id == spadl.actiontypes.index(actiontype)),
cols,
]
# FIXME
sel_actions_kl["team_id"] = sel_actions_kl["team_id"].astype(int)
sel_actions_kl["player_id"] = sel_actions_kl["player_id"].astype(float)
# sel_actions_kl["original_event_id"] = sel_actions_kl["original_event_id"].astype(float)
sel_actions_sa["original_event_id"] = sel_actions_sa["original_event_id"].astype(str)
#
print(
"These events should not be included",
set(sel_actions_kl.original_event_id) - set(sel_actions_sa.original_event_id),
)
print(
"These events are missing",
set(sel_actions_sa.original_event_id) - set(sel_actions_kl.original_event_id),
)
print("These events are different")
df = pd.concat(
[
sel_actions_kl.set_index("original_event_id"),
sel_actions_sa.set_index("original_event_id"),
]
) # concat dataframes
df = df.reset_index(drop=False) # reset the index
df_gpby = df.groupby(list(df.columns)) # group by
idx = [x[0] for x in df_gpby.groups.values() if len(x) == 1] # reindex
print(df.reindex(idx))
# compare the two datasets
assert_frame_equal(
sel_actions_kl.set_index("original_event_id"),
sel_actions_sa.set_index("original_event_id"),
)
@pytest.fixture(scope="session")
def wyscout_actions() -> Dataset:
data_dir = os.path.join(os.path.dirname(__file__), os.pardir, "datasets", "wyscout_api")
kloppy_dataset = wyscout.load(
event_data=os.path.join(data_dir, "events_2852835.json"),
)
df_actions_kl = kl.convert_to_actions(kloppy_dataset, game_id=2852835)
WSL = WyscoutLoader(
root=data_dir,
getter="local",
feeds={
"competitions": "competitions.json",
"seasons": "seasons_{competition_id}.json",
# "games": "matches_{season_id}.json",
"events": "events_{game_id}.json",
},
)
df_actions_sa = spadl_wyscout.convert_to_actions(WSL.events(2852835), 3166)
return Dataset(df_actions_kl, df_actions_sa)
@pytest.fixture(scope="session")
def public_wyscout_actions() -> tuple[pd.DataFrame, pd.DataFrame]:
data_dir = os.path.join(
os.path.dirname(__file__), os.pardir, "datasets", "wyscout_public", "raw"
)
kloppy_dataset = wyscout.load_open_data(match_id="2058007")
kloppy_dataset.metadata.orientation = Orientation.ACTION_EXECUTING_TEAM
df_actions_kl = kl.convert_to_actions(kloppy_dataset, game_id=2058007)
WSL = PublicWyscoutLoader(root=data_dir, download=False)
df_actions_sa = spadl_wyscout.convert_to_actions(WSL.events(2058007), 5629)
return Dataset(df_actions_kl, df_actions_sa)
# def test_dummy_wyscout() -> None:
# kloppy_dataset = wyscout.load_open_data(match_id="2058007").transform(
# to_orientation=Orientation.FIXED_HOME_AWAY, # FIXME
# to_coordinate_system=kl._SoccerActionCoordinateSystem(normalized=False),
# )
#
# event = kloppy_dataset.get_event_by_id("261445568")
# print(event)
# print(event.qualifiers)
# print(event.coordinates)
# # print(event.end_coordinates)
# print(event.raw_event)
#
# assert False
@pytest.mark.skip(reason="not yet supported")
@pytest.mark.parametrize("actiontype", spadl.actiontypes)
def test_kloppy_to_actions_wyscout(public_wyscout_actions: Dataset, actiontype: str) -> None:
# columns to compare
cols = [
"game_id",
"original_event_id",
"period_id",
"time_seconds",
"team_id",
"player_id",
"start_x",
"start_y",
"end_x",
"end_y",
"type_id",
"result_id",
"bodypart_id",
# 'action_id',
]
# load statsbomb data using socceraction
sel_actions_sa = public_wyscout_actions.socceraction.loc[
(public_wyscout_actions.socceraction.type_id == spadl.actiontypes.index(actiontype)),
cols,
]
# load statsbomb data using kloppy
sel_actions_kl = public_wyscout_actions.kloppy.loc[
(public_wyscout_actions.kloppy.type_id == spadl.actiontypes.index(actiontype)),
cols,
].replace({"original_event_id": {"interception-": ""}}, regex=True)
# FIXME
sel_actions_kl["team_id"] = sel_actions_kl["team_id"].astype(int)
sel_actions_kl["player_id"] = sel_actions_kl["player_id"].astype(int)
sel_actions_sa["original_event_id"] = sel_actions_sa["original_event_id"].astype(str)
#
print(
"These events should not be included",
set(sel_actions_kl.original_event_id) - set(sel_actions_sa.original_event_id),
)
print(
"These events are missing",
set(sel_actions_sa.original_event_id) - set(sel_actions_kl.original_event_id),
)
# compare the two datasets
assert_frame_equal(
sel_actions_kl.set_index("original_event_id"),
sel_actions_sa.set_index("original_event_id"),
)