import os
import pandas as pd
import pandas as pd
import numpy as np
from forecast import get_forecast_datasets, get_forecast_data
from data_pipelines.historical_weather_data import (
    download_historical_weather_data,
    aggregate_hourly_weather_data,
)
from utils.soil_utils import find_nearest_point_to_coordinates
from utils.summary import get_meterological_summary, get_agricultural_yield_comparison


def get_meterological_past_data():
    download_historical_weather_data(latitude, longitude, start_year, end_year)


def pre_process_data(scenario: str, lat: float = 47.0, lon: float = 5.0):
    start_year, end_year = 2010, 2025

    historical_df = aggregate_hourly_weather_data(
        download_historical_weather_data(
            latitude=lat, longitude=lon, start_year=start_year, end_year=end_year
        )
    )
    forecast_df = get_forecast_data(
        scenario=scenario, longitude=lon, latitude=lat, shading_coef=0
    )

    forecast_df["time"] = pd.to_datetime(forecast_df["time"])
    forecast_df["year"] = forecast_df["time"].dt.year
    new_forecast_df = (
        forecast_df.groupby(by="year", as_index=False).mean().reset_index()
    )
    # new_forecast_df = new_forecast_df[new_forecast_df["year"] > 2025]

    historical_df = (
        historical_df.reset_index()
        .rename(columns={"index": "time"})
        .sort_values(by="time")
    )
    historical_df["year"] = historical_df["time"].dt.year
    historical_df["precipitation"] = (
        historical_df["precipitation"] / 3600
    )  # to transform the data to kg m2 per s

    new_historical_df = (
        historical_df.groupby(by="year", as_index=False).mean().reset_index()
    )
    new_historical_df = new_historical_df[new_historical_df["year"] < 2024]

    return new_historical_df, new_forecast_df


def process_all_data_for_meterological_summary(
    historical_data: pd.DataFrame, forecast_data: pd.DataFrame
):

    temperature_df = pd.concat(
        [
            historical_data[["year", "air_temperature_mean"]].rename(
                columns={"air_temperature_mean": "Near Surface Air Temperature (°C)"}
            ),
            forecast_data[["year", "Near Surface Air Temperature (°C)"]],
        ],
        axis=0,
    )

    irradiance_df = pd.concat(
        [
            historical_data[["year", "irradiance"]].rename(
                columns={"irradiance": "Surface Downwelling Shortwave Radiation (W/m²)"}
            ),
            forecast_data[["year", "Surface Downwelling Shortwave Radiation (W/m²)"]],
        ],
        axis=0,
    )

    rain_df = pd.concat(
        [
            historical_data[["year", "precipitation"]].rename(
                columns={"precipitation": "Precipitation (kg m-2 s-1)"}
            ),
            forecast_data[["year", "Precipitation (kg m-2 s-1)"]],
        ],
        axis=0,
    )

    return temperature_df, rain_df, irradiance_df


def get_yield_data(
    region: str = "Bourgogne-Franche-Comté", culture: str = "Blé tendre d'hiver"
):

    yield_past_data = pd.read_csv("data/data_yield/data_rendement.csv")
    # yield_forecast_data = pd.read_csv("data/data_yield/data_rendement.csv")
    yield_past_data = yield_past_data[
        (yield_past_data["LIB_REG2"] == region)
        & (yield_past_data["LIB_SAA"].str.contains("Colza grain d'hiver"))
    ]
    yield_past_data = yield_past_data[
        ["LIB_REG2", "LIB_SAA"]
        + [col for col in yield_past_data.columns if "REND" in col]
    ]
    # Transformation
    yield_past_data = yield_past_data.melt(
        id_vars=["LIB_REG2", "LIB_SAA"], var_name="year", value_name="past_yield"
    )

    # Nettoyer la colonne "temps" pour enlever "REND_"
    yield_past_data["year"] = (
        yield_past_data["year"].str.replace("REND_", "").astype(int)
    )

    yield_forecast_data = pd.read_csv("data/data_yield/rendement_forecast.csv")
    yield_forecast_data = yield_forecast_data[
        yield_forecast_data["culture"].str.contains(culture)
    ]
    return (
        yield_past_data[["year", "past_yield"]],
        yield_forecast_data[
            ["year", "yield_simple_forecast", "yield_with_shading_forecast"]
        ],
    )


def get_summaries():
    scenario = "pessimist"
    lat, lon = 47.0, 5.0
    culture = "Colza d'hiver"
    region = "Bourgogne-Franche-Comté"

    historical_df, forecast_df = pre_process_data(scenario, lat, lon)

    temperature_df, rain_df, irradiance_df = process_all_data_for_meterological_summary(
        historical_df, forecast_df
    )

    #######@
    meterological_summary = get_meterological_summary(
        scenario=scenario,
        temperature_df=temperature_df,
        irradiance_df=irradiance_df,
        rain_df=rain_df,
    )
    print(meterological_summary)

    climate_data = temperature_df.merge(rain_df, on="year").merge(
        irradiance_df, on="year"
    )  # meteo ok
    closest_soil_data = find_nearest_point_to_coordinates(
        latitude=lat, longitude=lon
    )  # soil ok
    water_deficit_data = forecast_df[["year", "Water Deficit (mm/day)"]]
    ############ forecast data PV ############
    forecast_df_pv = get_forecast_data(
        scenario=scenario, longitude=lon, latitude=lat, shading_coef=0.2
    )
    forecast_df_pv["time"] = pd.to_datetime(forecast_df_pv["time"])
    forecast_df_pv["year"] = forecast_df_pv["time"].dt.year
    water_deficit_data_pv = (
        forecast_df_pv.groupby(by="year", as_index=False)
        .mean()
        .reset_index()[["year", "Water Deficit (mm/day)"]]
    )

    # add a step to transform gps coordinates into french region to be able to filter yield data
    yield_past_data, yield_forecast_data = get_yield_data(
        region=region, culture=culture
    )
    print(yield_forecast_data.tail())

    # rendement (avec et sans ombrage)

    second_summary = get_agricultural_yield_comparison(
        culture=culture,
        region="bourgogne franche comté",
        water_df=water_deficit_data,
        water_df_pv=water_deficit_data_pv,
        climate_df=climate_data,
        soil_df=closest_soil_data,
        forecast_yield_df=yield_forecast_data,
        historical_yield_df=yield_past_data,
    )

    print(yield_forecast_data.tail())
    print(second_summary)
    return meterological_summary, second_summary
    # from utils.soil_utils import find_nearest_point
    # city = "Bourgogne Franche Comté"
    # closest_soil_features = find_nearest_point(city)
    # print(closest_soil_features)

    # Example usage
    # import pandas as pd
    # import numpy as np

    # from utils.soil_utils import find_nearest_point

    # city = "Bourgogne Franche Comté"
    # closest_soil_features = find_nearest_point(city)
    # print(closest_soil_features)

    # # Définir la période de 4 ans dans le passé + 15 ans dans le futur (19 ans)
    # start_date = "2010-01"
    # end_date = "2029-12"

    # # Générer une série de dates mensuelles
    # dates = pd.date_range(start=start_date, end=end_date, freq='M')
    # Générer une série de dates mensuelles
    # dates = pd.date_range(start=start_date, end=end_date, freq="M")

    # # Générer des données fictives de rendement (en tonnes par hectare)
    # np.random.seed(42)  # Pour reproductibilité

    # # Tendance générale du rendement sans ombrage (augmentation progressive)
    # trend = np.linspace(2.5, 3.2, len(dates))  # Augmente légèrement sur les années

    # # Ajout de variations saisonnières et aléatoires
    # seasonality = 0.3 * np.sin(np.linspace(0, 12 * np.pi, len(dates)))  # Effet saisonnier
    # random_variation = np.random.normal(0, 0.1, len(dates))  # Bruit aléatoire

    # # Calcul du rendement sans ombrage
    # yield_no_shade = trend + seasonality + random_variation

    # # Appliquer un effet d'ombrage (réduction de 10-20% du rendement)
    # shade_factor = np.random.uniform(0.1, 0.2, len(dates))  # Entre 10% et 20% de réduction
    # yield_with_shade = yield_no_shade * (1 - shade_factor)

    # # Créer le DataFrame
    # df = pd.DataFrame({
    #     "date": dates,
    #     "yield_no_shade": yield_no_shade,
    #     "yield_with_shade": yield_with_shade
    # })
    # water_deficit_data = pd.DataFrame()
    # climate_data = pd.DataFrame()

    # print(get_agricultural_yield_comparison(culture="orge",
    #                                         region="bourgogne franche comté",
    #                                         water_df=water_deficit_data,
    #                                         climate_df=climate_data,
    #                                         soil_df=closest_soil_features,
    #                                         agri_yield_df=df))