import os import pandas as pd import pandas as pd import numpy as np from forecast import get_forecast_datasets, get_forecast_data from data_pipelines.historical_weather_data import ( download_historical_weather_data, aggregate_hourly_weather_data, ) from utils.soil_utils import find_nearest_point_to_coordinates from utils.summary import get_meterological_summary, get_agricultural_yield_comparison def get_meterological_past_data(): download_historical_weather_data(latitude, longitude, start_year, end_year) def pre_process_data(scenario: str, lat: float = 47.0, lon: float = 5.0): start_year, end_year = 2010, 2025 historical_df = aggregate_hourly_weather_data( download_historical_weather_data( latitude=lat, longitude=lon, start_year=start_year, end_year=end_year ) ) forecast_df = get_forecast_data( scenario=scenario, longitude=lon, latitude=lat, shading_coef=0 ) forecast_df["time"] = pd.to_datetime(forecast_df["time"]) forecast_df["year"] = forecast_df["time"].dt.year new_forecast_df = ( forecast_df.groupby(by="year", as_index=False).mean().reset_index() ) # new_forecast_df = new_forecast_df[new_forecast_df["year"] > 2025] historical_df = ( historical_df.reset_index() .rename(columns={"index": "time"}) .sort_values(by="time") ) historical_df["year"] = historical_df["time"].dt.year historical_df["precipitation"] = ( historical_df["precipitation"] / 3600 ) # to transform the data to kg m2 per s new_historical_df = ( historical_df.groupby(by="year", as_index=False).mean().reset_index() ) new_historical_df = new_historical_df[new_historical_df["year"] < 2024] return new_historical_df, new_forecast_df def process_all_data_for_meterological_summary( historical_data: pd.DataFrame, forecast_data: pd.DataFrame ): temperature_df = pd.concat( [ historical_data[["year", "air_temperature_mean"]].rename( columns={"air_temperature_mean": "Near Surface Air Temperature (°C)"} ), forecast_data[["year", "Near Surface Air Temperature (°C)"]], ], axis=0, ) irradiance_df = pd.concat( [ historical_data[["year", "irradiance"]].rename( columns={"irradiance": "Surface Downwelling Shortwave Radiation (W/m²)"} ), forecast_data[["year", "Surface Downwelling Shortwave Radiation (W/m²)"]], ], axis=0, ) rain_df = pd.concat( [ historical_data[["year", "precipitation"]].rename( columns={"precipitation": "Precipitation (kg m-2 s-1)"} ), forecast_data[["year", "Precipitation (kg m-2 s-1)"]], ], axis=0, ) return temperature_df, rain_df, irradiance_df def get_yield_data( region: str = "Bourgogne-Franche-Comté", culture: str = "Blé tendre d'hiver" ): yield_past_data = pd.read_csv("data/data_yield/data_rendement.csv") # yield_forecast_data = pd.read_csv("data/data_yield/data_rendement.csv") yield_past_data = yield_past_data[ (yield_past_data["LIB_REG2"] == region) & (yield_past_data["LIB_SAA"].str.contains("Colza grain d'hiver")) ] yield_past_data = yield_past_data[ ["LIB_REG2", "LIB_SAA"] + [col for col in yield_past_data.columns if "REND" in col] ] # Transformation yield_past_data = yield_past_data.melt( id_vars=["LIB_REG2", "LIB_SAA"], var_name="year", value_name="past_yield" ) # Nettoyer la colonne "temps" pour enlever "REND_" yield_past_data["year"] = ( yield_past_data["year"].str.replace("REND_", "").astype(int) ) yield_forecast_data = pd.read_csv("data/data_yield/rendement_forecast.csv") yield_forecast_data = yield_forecast_data[ yield_forecast_data["culture"].str.contains(culture) ] return ( yield_past_data[["year", "past_yield"]], yield_forecast_data[ ["year", "yield_simple_forecast", "yield_with_shading_forecast"] ], ) def get_summaries(): scenario = "pessimist" lat, lon = 47.0, 5.0 culture = "Colza d'hiver" region = "Bourgogne-Franche-Comté" historical_df, forecast_df = pre_process_data(scenario, lat, lon) temperature_df, rain_df, irradiance_df = process_all_data_for_meterological_summary( historical_df, forecast_df ) #######@ meterological_summary = get_meterological_summary( scenario=scenario, temperature_df=temperature_df, irradiance_df=irradiance_df, rain_df=rain_df, ) print(meterological_summary) climate_data = temperature_df.merge(rain_df, on="year").merge( irradiance_df, on="year" ) # meteo ok closest_soil_data = find_nearest_point_to_coordinates( latitude=lat, longitude=lon ) # soil ok water_deficit_data = forecast_df[["year", "Water Deficit (mm/day)"]] ############ forecast data PV ############ forecast_df_pv = get_forecast_data( scenario=scenario, longitude=lon, latitude=lat, shading_coef=0.2 ) forecast_df_pv["time"] = pd.to_datetime(forecast_df_pv["time"]) forecast_df_pv["year"] = forecast_df_pv["time"].dt.year water_deficit_data_pv = ( forecast_df_pv.groupby(by="year", as_index=False) .mean() .reset_index()[["year", "Water Deficit (mm/day)"]] ) # add a step to transform gps coordinates into french region to be able to filter yield data yield_past_data, yield_forecast_data = get_yield_data( region=region, culture=culture ) print(yield_forecast_data.tail()) # rendement (avec et sans ombrage) second_summary = get_agricultural_yield_comparison( culture=culture, region="bourgogne franche comté", water_df=water_deficit_data, water_df_pv=water_deficit_data_pv, climate_df=climate_data, soil_df=closest_soil_data, forecast_yield_df=yield_forecast_data, historical_yield_df=yield_past_data, ) print(yield_forecast_data.tail()) print(second_summary) return meterological_summary, second_summary # from utils.soil_utils import find_nearest_point # city = "Bourgogne Franche Comté" # closest_soil_features = find_nearest_point(city) # print(closest_soil_features) # Example usage # import pandas as pd # import numpy as np # from utils.soil_utils import find_nearest_point # city = "Bourgogne Franche Comté" # closest_soil_features = find_nearest_point(city) # print(closest_soil_features) # # Définir la période de 4 ans dans le passé + 15 ans dans le futur (19 ans) # start_date = "2010-01" # end_date = "2029-12" # # Générer une série de dates mensuelles # dates = pd.date_range(start=start_date, end=end_date, freq='M') # Générer une série de dates mensuelles # dates = pd.date_range(start=start_date, end=end_date, freq="M") # # Générer des données fictives de rendement (en tonnes par hectare) # np.random.seed(42) # Pour reproductibilité # # Tendance générale du rendement sans ombrage (augmentation progressive) # trend = np.linspace(2.5, 3.2, len(dates)) # Augmente légèrement sur les années # # Ajout de variations saisonnières et aléatoires # seasonality = 0.3 * np.sin(np.linspace(0, 12 * np.pi, len(dates))) # Effet saisonnier # random_variation = np.random.normal(0, 0.1, len(dates)) # Bruit aléatoire # # Calcul du rendement sans ombrage # yield_no_shade = trend + seasonality + random_variation # # Appliquer un effet d'ombrage (réduction de 10-20% du rendement) # shade_factor = np.random.uniform(0.1, 0.2, len(dates)) # Entre 10% et 20% de réduction # yield_with_shade = yield_no_shade * (1 - shade_factor) # # Créer le DataFrame # df = pd.DataFrame({ # "date": dates, # "yield_no_shade": yield_no_shade, # "yield_with_shade": yield_with_shade # }) # water_deficit_data = pd.DataFrame() # climate_data = pd.DataFrame() # print(get_agricultural_yield_comparison(culture="orge", # region="bourgogne franche comté", # water_df=water_deficit_data, # climate_df=climate_data, # soil_df=closest_soil_features, # agri_yield_df=df))