"""
The data process is base on https://www.kaggle.com/code/sslp23/predicting-fifa-2022-world-cup-with-ml
"""
import os.path

import pandas as pd
from sklearn.model_selection import train_test_split

from configs.config import cfg
from configs.constants import DATA_ROOT


def result_finder(home, away):
    """
    Encode the data
    :param home:
    :param away:
    :return:
    """
    if home > away:
        return pd.Series([0, 3, 0])
    if home < away:
        return pd.Series([1, 0, 3])
    else:
        return pd.Series([2, 1, 1])


def create_dataset(df: pd.DataFrame):
    """
    Create train, test dataset
    :param df:
    :return:
    """
    x_, y = df.iloc[:, 3:], df[["target"]]
    x_train, x_test, y_train, y_test = train_test_split(
        x_, y, test_size=0.22, random_state=100
    )
    return x_train, x_test, y_train, y_test


def data_preparing():
    """
    Data preparing
    :return:
    """
    try:
        df = pd.read_csv(cfg.data.result_url)
    except Exception as e:
        print(e)
        df = pd.read_csv(os.path.join(DATA_ROOT, cfg.data.result_file))
    df["date"] = pd.to_datetime(df["date"])
    df.dropna(inplace=True)
    df = df[(df["date"] >= cfg.day_get_result)].reset_index(drop=True)

    # RANK data prepare
    rank = pd.read_csv(os.path.join(DATA_ROOT, cfg.data.rank_file))
    rank["rank_date"] = pd.to_datetime(rank["rank_date"])
    rank = rank[(rank["rank_date"] >= cfg.day_get_rank)].reset_index(drop=True)
    rank["country_full"] = (
        rank["country_full"]
        .str.replace("IR Iran", "Iran")
        .str.replace("Korea Republic", "South Korea")
        .str.replace("USA", "United States")
    )

    # The merge is made in order to get a dataset FIFA games and its rankings.
    rank = (
        rank.set_index(["rank_date"])
        .groupby(["country_full"], group_keys=False)
        .resample("D")
        .first()
        .fillna(method="ffill")
        .reset_index()
    )
    df_wc_ranked = df.merge(
        rank[
            [
                "country_full",
                "total_points",
                "previous_points",
                "rank",
                "rank_change",
                "rank_date",
            ]
        ],
        left_on=["date", "home_team"],
        right_on=["rank_date", "country_full"],
    ).drop(["rank_date", "country_full"], axis=1)

    df_wc_ranked = df_wc_ranked.merge(
        rank[
            [
                "country_full",
                "total_points",
                "previous_points",
                "rank",
                "rank_change",
                "rank_date",
            ]
        ],
        left_on=["date", "away_team"],
        right_on=["rank_date", "country_full"],
        suffixes=("_home", "_away"),
    ).drop(["rank_date", "country_full"], axis=1)

    # Featuring
    df = df_wc_ranked

    df[["result", "home_team_points", "away_team_points"]] = df.apply(
        lambda x: result_finder(x["home_score"], x["away_score"]), axis=1
    )

    # we create columns that will help in the creation of the features: ranking difference,
    # points won at the game vs. team faced rank, and goals difference in the game.
    # All features that are not differences should be created for the two teams (away and home).
    df["rank_dif"] = df["rank_home"] - df["rank_away"]
    df["sg"] = df["home_score"] - df["away_score"]
    df["points_home_by_rank"] = df["home_team_points"] / df["rank_away"]
    df["points_away_by_rank"] = df["away_team_points"] / df["rank_home"]

    # In order to create the features, I'll separate the dataset in home team's and away team's dataset,
    # unify them and calculate the past game values.
    # After that, I'll separate again and merge them, retrieving the original dataset.
    # This process optimizes the creation of the features.
    home_team = df[
        [
            "date",
            "home_team",
            "home_score",
            "away_score",
            "rank_home",
            "rank_away",
            "rank_change_home",
            "total_points_home",
            "result",
            "rank_dif",
            "points_home_by_rank",
            "home_team_points",
        ]
    ]

    away_team = df[
        [
            "date",
            "away_team",
            "away_score",
            "home_score",
            "rank_away",
            "rank_home",
            "rank_change_away",
            "total_points_away",
            "result",
            "rank_dif",
            "points_away_by_rank",
            "away_team_points",
        ]
    ]
    home_team.columns = [
        h.replace("home_", "")
        .replace("_home", "")
        .replace("away_", "suf_")
        .replace("_away", "_suf")
        for h in home_team.columns
    ]

    away_team.columns = [
        a.replace("away_", "")
        .replace("_away", "")
        .replace("home_", "suf_")
        .replace("_home", "_suf")
        for a in away_team.columns
    ]
    team_stats = home_team.append(away_team)

    stats_val = []

    for index, row in team_stats.iterrows():
        team = row["team"]
        date = row["date"]
        past_games = team_stats.loc[
            (team_stats["team"] == team) & (team_stats["date"] < date)
        ].sort_values(by=["date"], ascending=False)
        last5 = past_games.head(5)

        goals = past_games["score"].mean()
        goals_l5 = last5["score"].mean()

        goals_suf = past_games["suf_score"].mean()
        goals_suf_l5 = last5["suf_score"].mean()

        rank = past_games["rank_suf"].mean()
        rank_l5 = last5["rank_suf"].mean()

        if len(last5) > 0:
            points = (
                past_games["total_points"].values[0]
                - past_games["total_points"].values[-1]
            )  # amount of points earned
            points_l5 = (
                last5["total_points"].values[0] - last5["total_points"].values[-1]
            )
        else:
            points = 0
            points_l5 = 0

        gp = past_games["team_points"].mean()
        gp_l5 = last5["team_points"].mean()

        gp_rank = past_games["points_by_rank"].mean()
        gp_rank_l5 = last5["points_by_rank"].mean()

        stats_val.append(
            [
                goals,
                goals_l5,
                goals_suf,
                goals_suf_l5,
                rank,
                rank_l5,
                points,
                points_l5,
                gp,
                gp_l5,
                gp_rank,
                gp_rank_l5,
            ]
        )

    stats_cols = [
        "goals_mean",
        "goals_mean_l5",
        "goals_suf_mean",
        "goals_suf_mean_l5",
        "rank_mean",
        "rank_mean_l5",
        "points_mean",
        "points_mean_l5",
        "game_points_mean",
        "game_points_mean_l5",
        "game_points_rank_mean",
        "game_points_rank_mean_l5",
    ]

    stats_df = pd.DataFrame(stats_val, columns=stats_cols)

    full_df = pd.concat(
        [team_stats.reset_index(drop=True), stats_df], axis=1, ignore_index=False
    )

    home_team_stats = full_df.iloc[: int(full_df.shape[0] / 2), :]
    away_team_stats = full_df.iloc[int(full_df.shape[0] / 2) :, :]

    home_team_stats = home_team_stats[home_team_stats.columns[-12:]]
    away_team_stats = away_team_stats[away_team_stats.columns[-12:]]

    home_team_stats.columns = ["home_" + str(col) for col in home_team_stats.columns]
    away_team_stats.columns = ["away_" + str(col) for col in away_team_stats.columns]

    # In order to unify the database, is needed to add home and away suffix for each column.
    # After that, the data is ready to be merged.
    match_stats = pd.concat(
        [home_team_stats, away_team_stats.reset_index(drop=True)],
        axis=1,
        ignore_index=False,
    )

    full_df = pd.concat(
        [df, match_stats.reset_index(drop=True)], axis=1, ignore_index=False
    )

    # Drop friendly game
    full_df["is_friendly"] = full_df["tournament"].apply(lambda x: find_friendly(x))
    full_df = pd.get_dummies(full_df, columns=["is_friendly"])

    base_df = full_df[
        [
            "date",
            "home_team",
            "away_team",
            "rank_home",
            "rank_away",
            "home_score",
            "away_score",
            "result",
            "rank_dif",
            "rank_change_home",
            "rank_change_away",
            "home_goals_mean",
            "home_goals_mean_l5",
            "home_goals_suf_mean",
            "home_goals_suf_mean_l5",
            "home_rank_mean",
            "home_rank_mean_l5",
            "home_points_mean",
            "home_points_mean_l5",
            "away_goals_mean",
            "away_goals_mean_l5",
            "away_goals_suf_mean",
            "away_goals_suf_mean_l5",
            "away_rank_mean",
            "away_rank_mean_l5",
            "away_points_mean",
            "away_points_mean_l5",
            "home_game_points_mean",
            "home_game_points_mean_l5",
            "home_game_points_rank_mean",
            "home_game_points_rank_mean_l5",
            "away_game_points_mean",
            "away_game_points_mean_l5",
            "away_game_points_rank_mean",
            "away_game_points_rank_mean_l5",
            "is_friendly_0",
            "is_friendly_1",
        ]
    ]

    df = base_df.dropna()

    df["target"] = df["result"].apply(lambda x: no_draw(x))

    model_db = create_db(df)

    return df, model_db


def find_friendly(x):
    """
    Return whether the match is friendly match or not.
    :param x:
    :return:
    """
    if x == "Friendly":
        return 1
    else:
        return 0


def create_db(df):
    """

    :param df:
    :return:
    """
    columns = [
        "home_team",
        "away_team",
        "target",
        "rank_dif",
        "home_goals_mean",
        "home_rank_mean",
        "away_goals_mean",
        "away_rank_mean",
        "home_rank_mean_l5",
        "away_rank_mean_l5",
        "home_goals_suf_mean",
        "away_goals_suf_mean",
        "home_goals_mean_l5",
        "away_goals_mean_l5",
        "home_goals_suf_mean_l5",
        "away_goals_suf_mean_l5",
        "home_game_points_rank_mean",
        "home_game_points_rank_mean_l5",
        "away_game_points_rank_mean",
        "away_game_points_rank_mean_l5",
        "is_friendly_0",
        "is_friendly_1",
    ]

    base = df.loc[:, columns]
    base.loc[:, "goals_dif"] = base["home_goals_mean"] - base["away_goals_mean"]
    base.loc[:, "goals_dif_l5"] = (
        base["home_goals_mean_l5"] - base["away_goals_mean_l5"]
    )
    base.loc[:, "goals_suf_dif"] = (
        base["home_goals_suf_mean"] - base["away_goals_suf_mean"]
    )
    base.loc[:, "goals_suf_dif_l5"] = (
        base["home_goals_suf_mean_l5"] - base["away_goals_suf_mean_l5"]
    )
    base.loc[:, "goals_per_ranking_dif"] = (
        base["home_goals_mean"] / base["home_rank_mean"]
    ) - (base["away_goals_mean"] / base["away_rank_mean"])
    base.loc[:, "dif_rank_agst"] = base["home_rank_mean"] - base["away_rank_mean"]
    base.loc[:, "dif_rank_agst_l5"] = (
        base["home_rank_mean_l5"] - base["away_rank_mean_l5"]
    )
    base.loc[:, "dif_points_rank"] = (
        base["home_game_points_rank_mean"] - base["away_game_points_rank_mean"]
    )
    base.loc[:, "dif_points_rank_l5"] = (
        base["home_game_points_rank_mean_l5"] - base["away_game_points_rank_mean_l5"]
    )

    model_df = base[
        [
            "home_team",
            "away_team",
            "target",
            "rank_dif",
            "goals_dif",
            "goals_dif_l5",
            "goals_suf_dif",
            "goals_suf_dif_l5",
            "goals_per_ranking_dif",
            "dif_rank_agst",
            "dif_rank_agst_l5",
            "dif_points_rank",
            "dif_points_rank_l5",
            "is_friendly_0",
            "is_friendly_1",
        ]
    ]
    return model_df


def no_draw(x):
    """

    :param x:
    :return:
    """
    if x == 2:
        return 1
    else:
        return x