Spaces:

hackathon-ombrea
/

gaia

Runtime error

File size: 8,669 Bytes

a07ee0c
a8dd5f5
 
 
a07ee0c
db975c4
 
 
 
ba70fb9
9c23216
38af3d3
9c23216
a07ee0c
 
 
db975c4
 
a07ee0c
ba70fb9
db975c4
 
 
 
 
 
 
 
a07ee0c
 
db975c4
 
 
 
a07ee0c
 
db975c4
 
 
 
 
a07ee0c
db975c4
 
 
a07ee0c
db975c4
 
 
a07ee0c
 
ba70fb9
 
a07ee0c
db975c4
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
a07ee0c
 
 
db975c4
 
 
 
ba70fb9
 
 
db975c4
 
 
 
 
 
 
 
ba70fb9
db975c4
 
 
ba70fb9
 
db975c4
 
 
ba70fb9
 
db975c4
 
 
 
 
 
 
 
 
ba70fb9
a07ee0c
fe52f62
a07ee0c
 
db975c4
ba70fb9
 
 
 
db975c4
 
 
 
e25f53d
db975c4
 
 
 
 
 
e25f53d
ba70fb9
db975c4
 
 
 
 
 
4d46cce
 
db975c4
 
 
4d46cce
db975c4
 
 
 
 
 
 
ba70fb9
db975c4
 
 
4d46cce
ba70fb9
e25f53d
db975c4
 
 
 
 
 
 
 
 
 
 
e25f53d
4d46cce
e25f53d
fe52f62
a07ee0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
9c23216
a07ee0c
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
db975c4
 
a07ee0c
db975c4
a07ee0c
db975c4
a07ee0c

import os
import pandas as pd
import pandas as pd
import numpy as np
from forecast import get_forecast_datasets, get_forecast_data
from data_pipelines.historical_weather_data import (
    download_historical_weather_data,
    aggregate_hourly_weather_data,
)
from utils.soil_utils import find_nearest_point_to_coordinates
from utils.summary import get_meterological_summary, get_agricultural_yield_comparison


def get_meterological_past_data():
    download_historical_weather_data(latitude, longitude, start_year, end_year)


def pre_process_data(scenario: str, lat: float = 47.0, lon: float = 5.0):
    start_year, end_year = 2010, 2025

    historical_df = aggregate_hourly_weather_data(
        download_historical_weather_data(
            latitude=lat, longitude=lon, start_year=start_year, end_year=end_year
        )
    )
    forecast_df = get_forecast_data(
        scenario=scenario, longitude=lon, latitude=lat, shading_coef=0
    )

    forecast_df["time"] = pd.to_datetime(forecast_df["time"])
    forecast_df["year"] = forecast_df["time"].dt.year
    new_forecast_df = (
        forecast_df.groupby(by="year", as_index=False).mean().reset_index()
    )
    # new_forecast_df = new_forecast_df[new_forecast_df["year"] > 2025]

    historical_df = (
        historical_df.reset_index()
        .rename(columns={"index": "time"})
        .sort_values(by="time")
    )
    historical_df["year"] = historical_df["time"].dt.year
    historical_df["precipitation"] = (
        historical_df["precipitation"] / 3600
    )  # to transform the data to kg m2 per s

    new_historical_df = (
        historical_df.groupby(by="year", as_index=False).mean().reset_index()
    )
    new_historical_df = new_historical_df[new_historical_df["year"] < 2024]

    return new_historical_df, new_forecast_df


def process_all_data_for_meterological_summary(
    historical_data: pd.DataFrame, forecast_data: pd.DataFrame
):

    temperature_df = pd.concat(
        [
            historical_data[["year", "air_temperature_mean"]].rename(
                columns={"air_temperature_mean": "Near Surface Air Temperature (°C)"}
            ),
            forecast_data[["year", "Near Surface Air Temperature (°C)"]],
        ],
        axis=0,
    )

    irradiance_df = pd.concat(
        [
            historical_data[["year", "irradiance"]].rename(
                columns={"irradiance": "Surface Downwelling Shortwave Radiation (W/m²)"}
            ),
            forecast_data[["year", "Surface Downwelling Shortwave Radiation (W/m²)"]],
        ],
        axis=0,
    )

    rain_df = pd.concat(
        [
            historical_data[["year", "precipitation"]].rename(
                columns={"precipitation": "Precipitation (kg m-2 s-1)"}
            ),
            forecast_data[["year", "Precipitation (kg m-2 s-1)"]],
        ],
        axis=0,
    )

    return temperature_df, rain_df, irradiance_df


def get_yield_data(
    region: str = "Bourgogne-Franche-Comté", culture: str = "Blé tendre d'hiver"
):

    yield_past_data = pd.read_csv("data/data_yield/data_rendement.csv")
    # yield_forecast_data = pd.read_csv("data/data_yield/data_rendement.csv")
    yield_past_data = yield_past_data[
        (yield_past_data["LIB_REG2"] == region)
        & (yield_past_data["LIB_SAA"].str.contains("Colza grain d'hiver"))
    ]
    yield_past_data = yield_past_data[
        ["LIB_REG2", "LIB_SAA"]
        + [col for col in yield_past_data.columns if "REND" in col]
    ]
    # Transformation
    yield_past_data = yield_past_data.melt(
        id_vars=["LIB_REG2", "LIB_SAA"], var_name="year", value_name="past_yield"
    )

    # Nettoyer la colonne "temps" pour enlever "REND_"
    yield_past_data["year"] = (
        yield_past_data["year"].str.replace("REND_", "").astype(int)
    )

    yield_forecast_data = pd.read_csv("data/data_yield/rendement_forecast.csv")
    yield_forecast_data = yield_forecast_data[
        yield_forecast_data["culture"].str.contains(culture)
    ]
    return (
        yield_past_data[["year", "past_yield"]],
        yield_forecast_data[
            ["year", "yield_simple_forecast", "yield_with_shading_forecast"]
        ],
    )


def get_summaries():
    scenario = "pessimist"
    lat, lon = 47.0, 5.0
    culture = "Colza d'hiver"
    region = "Bourgogne-Franche-Comté"

    historical_df, forecast_df = pre_process_data(scenario, lat, lon)

    temperature_df, rain_df, irradiance_df = process_all_data_for_meterological_summary(
        historical_df, forecast_df
    )

    #######@
    meterological_summary = get_meterological_summary(
        scenario=scenario,
        temperature_df=temperature_df,
        irradiance_df=irradiance_df,
        rain_df=rain_df,
    )
    print(meterological_summary)

    climate_data = temperature_df.merge(rain_df, on="year").merge(
        irradiance_df, on="year"
    )  # meteo ok
    closest_soil_data = find_nearest_point_to_coordinates(
        latitude=lat, longitude=lon
    )  # soil ok
    water_deficit_data = forecast_df[["year", "Water Deficit (mm/day)"]]
    ############ forecast data PV ############
    forecast_df_pv = get_forecast_data(
        scenario=scenario, longitude=lon, latitude=lat, shading_coef=0.2
    )
    forecast_df_pv["time"] = pd.to_datetime(forecast_df_pv["time"])
    forecast_df_pv["year"] = forecast_df_pv["time"].dt.year
    water_deficit_data_pv = (
        forecast_df_pv.groupby(by="year", as_index=False)
        .mean()
        .reset_index()[["year", "Water Deficit (mm/day)"]]
    )

    # add a step to transform gps coordinates into french region to be able to filter yield data
    yield_past_data, yield_forecast_data = get_yield_data(
        region=region, culture=culture
    )
    print(yield_forecast_data.tail())

    # rendement (avec et sans ombrage)

    second_summary = get_agricultural_yield_comparison(
        culture=culture,
        region="bourgogne franche comté",
        water_df=water_deficit_data,
        water_df_pv=water_deficit_data_pv,
        climate_df=climate_data,
        soil_df=closest_soil_data,
        forecast_yield_df=yield_forecast_data,
        historical_yield_df=yield_past_data,
    )

    print(yield_forecast_data.tail())
    print(second_summary)
    return meterological_summary, second_summary
    # from utils.soil_utils import find_nearest_point
    # city = "Bourgogne Franche Comté"
    # closest_soil_features = find_nearest_point(city)
    # print(closest_soil_features)

    # Example usage
    # import pandas as pd
    # import numpy as np

    # from utils.soil_utils import find_nearest_point

    # city = "Bourgogne Franche Comté"
    # closest_soil_features = find_nearest_point(city)
    # print(closest_soil_features)

    # # Définir la période de 4 ans dans le passé + 15 ans dans le futur (19 ans)
    # start_date = "2010-01"
    # end_date = "2029-12"

    # # Générer une série de dates mensuelles
    # dates = pd.date_range(start=start_date, end=end_date, freq='M')
    # Générer une série de dates mensuelles
    # dates = pd.date_range(start=start_date, end=end_date, freq="M")

    # # Générer des données fictives de rendement (en tonnes par hectare)
    # np.random.seed(42)  # Pour reproductibilité

    # # Tendance générale du rendement sans ombrage (augmentation progressive)
    # trend = np.linspace(2.5, 3.2, len(dates))  # Augmente légèrement sur les années

    # # Ajout de variations saisonnières et aléatoires
    # seasonality = 0.3 * np.sin(np.linspace(0, 12 * np.pi, len(dates)))  # Effet saisonnier
    # random_variation = np.random.normal(0, 0.1, len(dates))  # Bruit aléatoire

    # # Calcul du rendement sans ombrage
    # yield_no_shade = trend + seasonality + random_variation

    # # Appliquer un effet d'ombrage (réduction de 10-20% du rendement)
    # shade_factor = np.random.uniform(0.1, 0.2, len(dates))  # Entre 10% et 20% de réduction
    # yield_with_shade = yield_no_shade * (1 - shade_factor)

    # # Créer le DataFrame
    # df = pd.DataFrame({
    #     "date": dates,
    #     "yield_no_shade": yield_no_shade,
    #     "yield_with_shade": yield_with_shade
    # })
    # water_deficit_data = pd.DataFrame()
    # climate_data = pd.DataFrame()

    # print(get_agricultural_yield_comparison(culture="orge",
    #                                         region="bourgogne franche comté",
    #                                         water_df=water_deficit_data,
    #                                         climate_df=climate_data,
    #                                         soil_df=closest_soil_features,
    #                                         agri_yield_df=df))