Spaces:
Runtime error
Runtime error
import os | |
import pandas as pd | |
import pandas as pd | |
import numpy as np | |
from forecast import get_forecast_datasets, get_forecast_data | |
from data_pipelines.historical_weather_data import ( | |
download_historical_weather_data, | |
aggregate_hourly_weather_data, | |
) | |
from utils.soil_utils import find_nearest_point_to_coordinates | |
from utils.summary import get_meterological_summary, get_agricultural_yield_comparison | |
def get_meterological_past_data(): | |
download_historical_weather_data(latitude, longitude, start_year, end_year) | |
def pre_process_data(scenario: str, lat: float = 47.0, lon: float = 5.0): | |
start_year, end_year = 2010, 2025 | |
historical_df = aggregate_hourly_weather_data( | |
download_historical_weather_data( | |
latitude=lat, longitude=lon, start_year=start_year, end_year=end_year | |
) | |
) | |
forecast_df = get_forecast_data( | |
scenario=scenario, longitude=lon, latitude=lat, shading_coef=0 | |
) | |
forecast_df["time"] = pd.to_datetime(forecast_df["time"]) | |
forecast_df["year"] = forecast_df["time"].dt.year | |
new_forecast_df = ( | |
forecast_df.groupby(by="year", as_index=False).mean().reset_index() | |
) | |
# new_forecast_df = new_forecast_df[new_forecast_df["year"] > 2025] | |
historical_df = ( | |
historical_df.reset_index() | |
.rename(columns={"index": "time"}) | |
.sort_values(by="time") | |
) | |
historical_df["year"] = historical_df["time"].dt.year | |
historical_df["precipitation"] = ( | |
historical_df["precipitation"] / 3600 | |
) # to transform the data to kg m2 per s | |
new_historical_df = ( | |
historical_df.groupby(by="year", as_index=False).mean().reset_index() | |
) | |
new_historical_df = new_historical_df[new_historical_df["year"] < 2024] | |
return new_historical_df, new_forecast_df | |
def process_all_data_for_meterological_summary( | |
historical_data: pd.DataFrame, forecast_data: pd.DataFrame | |
): | |
temperature_df = pd.concat( | |
[ | |
historical_data[["year", "air_temperature_mean"]].rename( | |
columns={"air_temperature_mean": "Near Surface Air Temperature (°C)"} | |
), | |
forecast_data[["year", "Near Surface Air Temperature (°C)"]], | |
], | |
axis=0, | |
) | |
irradiance_df = pd.concat( | |
[ | |
historical_data[["year", "irradiance"]].rename( | |
columns={"irradiance": "Surface Downwelling Shortwave Radiation (W/m²)"} | |
), | |
forecast_data[["year", "Surface Downwelling Shortwave Radiation (W/m²)"]], | |
], | |
axis=0, | |
) | |
rain_df = pd.concat( | |
[ | |
historical_data[["year", "precipitation"]].rename( | |
columns={"precipitation": "Precipitation (kg m-2 s-1)"} | |
), | |
forecast_data[["year", "Precipitation (kg m-2 s-1)"]], | |
], | |
axis=0, | |
) | |
return temperature_df, rain_df, irradiance_df | |
def get_yield_data( | |
region: str = "Bourgogne-Franche-Comté", culture: str = "Blé tendre d'hiver" | |
): | |
yield_past_data = pd.read_csv("data/data_yield/data_rendement.csv") | |
# yield_forecast_data = pd.read_csv("data/data_yield/data_rendement.csv") | |
yield_past_data = yield_past_data[ | |
(yield_past_data["LIB_REG2"] == region) | |
& (yield_past_data["LIB_SAA"].str.contains("Colza grain d'hiver")) | |
] | |
yield_past_data = yield_past_data[ | |
["LIB_REG2", "LIB_SAA"] | |
+ [col for col in yield_past_data.columns if "REND" in col] | |
] | |
# Transformation | |
yield_past_data = yield_past_data.melt( | |
id_vars=["LIB_REG2", "LIB_SAA"], var_name="year", value_name="past_yield" | |
) | |
# Nettoyer la colonne "temps" pour enlever "REND_" | |
yield_past_data["year"] = ( | |
yield_past_data["year"].str.replace("REND_", "").astype(int) | |
) | |
yield_forecast_data = pd.read_csv("data/data_yield/rendement_forecast.csv") | |
yield_forecast_data = yield_forecast_data[ | |
yield_forecast_data["culture"].str.contains(culture) | |
] | |
return ( | |
yield_past_data[["year", "past_yield"]], | |
yield_forecast_data[ | |
["year", "yield_simple_forecast", "yield_with_shading_forecast"] | |
], | |
) | |
def get_summaries(): | |
scenario = "pessimist" | |
lat, lon = 47.0, 5.0 | |
culture = "Colza d'hiver" | |
region = "Bourgogne-Franche-Comté" | |
historical_df, forecast_df = pre_process_data(scenario, lat, lon) | |
temperature_df, rain_df, irradiance_df = process_all_data_for_meterological_summary( | |
historical_df, forecast_df | |
) | |
#######@ | |
meterological_summary = get_meterological_summary( | |
scenario=scenario, | |
temperature_df=temperature_df, | |
irradiance_df=irradiance_df, | |
rain_df=rain_df, | |
) | |
print(meterological_summary) | |
climate_data = temperature_df.merge(rain_df, on="year").merge( | |
irradiance_df, on="year" | |
) # meteo ok | |
closest_soil_data = find_nearest_point_to_coordinates( | |
latitude=lat, longitude=lon | |
) # soil ok | |
water_deficit_data = forecast_df[["year", "Water Deficit (mm/day)"]] | |
############ forecast data PV ############ | |
forecast_df_pv = get_forecast_data( | |
scenario=scenario, longitude=lon, latitude=lat, shading_coef=0.2 | |
) | |
forecast_df_pv["time"] = pd.to_datetime(forecast_df_pv["time"]) | |
forecast_df_pv["year"] = forecast_df_pv["time"].dt.year | |
water_deficit_data_pv = ( | |
forecast_df_pv.groupby(by="year", as_index=False) | |
.mean() | |
.reset_index()[["year", "Water Deficit (mm/day)"]] | |
) | |
# add a step to transform gps coordinates into french region to be able to filter yield data | |
yield_past_data, yield_forecast_data = get_yield_data( | |
region=region, culture=culture | |
) | |
print(yield_forecast_data.tail()) | |
# rendement (avec et sans ombrage) | |
second_summary = get_agricultural_yield_comparison( | |
culture=culture, | |
region="bourgogne franche comté", | |
water_df=water_deficit_data, | |
water_df_pv=water_deficit_data_pv, | |
climate_df=climate_data, | |
soil_df=closest_soil_data, | |
forecast_yield_df=yield_forecast_data, | |
historical_yield_df=yield_past_data, | |
) | |
print(yield_forecast_data.tail()) | |
print(second_summary) | |
return meterological_summary, second_summary | |
# from utils.soil_utils import find_nearest_point | |
# city = "Bourgogne Franche Comté" | |
# closest_soil_features = find_nearest_point(city) | |
# print(closest_soil_features) | |
# Example usage | |
# import pandas as pd | |
# import numpy as np | |
# from utils.soil_utils import find_nearest_point | |
# city = "Bourgogne Franche Comté" | |
# closest_soil_features = find_nearest_point(city) | |
# print(closest_soil_features) | |
# # Définir la période de 4 ans dans le passé + 15 ans dans le futur (19 ans) | |
# start_date = "2010-01" | |
# end_date = "2029-12" | |
# # Générer une série de dates mensuelles | |
# dates = pd.date_range(start=start_date, end=end_date, freq='M') | |
# Générer une série de dates mensuelles | |
# dates = pd.date_range(start=start_date, end=end_date, freq="M") | |
# # Générer des données fictives de rendement (en tonnes par hectare) | |
# np.random.seed(42) # Pour reproductibilité | |
# # Tendance générale du rendement sans ombrage (augmentation progressive) | |
# trend = np.linspace(2.5, 3.2, len(dates)) # Augmente légèrement sur les années | |
# # Ajout de variations saisonnières et aléatoires | |
# seasonality = 0.3 * np.sin(np.linspace(0, 12 * np.pi, len(dates))) # Effet saisonnier | |
# random_variation = np.random.normal(0, 0.1, len(dates)) # Bruit aléatoire | |
# # Calcul du rendement sans ombrage | |
# yield_no_shade = trend + seasonality + random_variation | |
# # Appliquer un effet d'ombrage (réduction de 10-20% du rendement) | |
# shade_factor = np.random.uniform(0.1, 0.2, len(dates)) # Entre 10% et 20% de réduction | |
# yield_with_shade = yield_no_shade * (1 - shade_factor) | |
# # Créer le DataFrame | |
# df = pd.DataFrame({ | |
# "date": dates, | |
# "yield_no_shade": yield_no_shade, | |
# "yield_with_shade": yield_with_shade | |
# }) | |
# water_deficit_data = pd.DataFrame() | |
# climate_data = pd.DataFrame() | |
# print(get_agricultural_yield_comparison(culture="orge", | |
# region="bourgogne franche comté", | |
# water_df=water_deficit_data, | |
# climate_df=climate_data, | |
# soil_df=closest_soil_features, | |
# agri_yield_df=df)) | |