Spaces:
Runtime error
Runtime error
File size: 7,587 Bytes
af358f2 aa4cf91 af358f2 aa4cf91 af358f2 aa4cf91 af358f2 aa4cf91 af358f2 aa4cf91 af358f2 aa4cf91 af358f2 aa4cf91 af358f2 aa4cf91 af358f2 aa4cf91 af358f2 aa4cf91 |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 |
import os
import xarray as xr
import pandas as pd
from matplotlib import pyplot as plt
import docs.agro_indicators as agro_indicators
import numpy as np
from datetime import datetime
# Mapping of variable names to metadata (title, unit, and NetCDF variable key)
VARIABLE_MAPPING = {
'surface_downwelling_shortwave_radiation': ('Surface Downwelling Shortwave Radiation', 'W/m²', 'rsds'),
'moisture_in_upper_portion_of_soil_column': ('Moisture in Upper Portion of Soil Column', 'kg m-2', 'mrsos'),
'precipitation': ('Precipitation', 'kg m-2 s-1', 'pr'),
'near_surface_relative_humidity': ('Relative Humidity', '%', 'hurs'),
'evaporation_including_sublimation_and_transpiration': ('Evaporation (including sublimation and transpiration)', 'kg m-2 s-1', 'evspsbl'),
'total_runoff': ('Total Runoff', 'kg m-2 s-1', 'mrro'),
'daily_minimum_near_surface_air_temperature': ('Daily Minimum Near Surface Air Temperature', '°C', 'tasmin'),
'daily_maximum_near_surface_air_temperature': ('Daily Maximum Near Surface Air Temperature', '°C', 'tasmax'),
'near_surface_wind_speed': ('Near Surface Wind Speed', 'm/s', 'sfcWind'),
'near_surface_air_temperature': ('Near Surface Air Temperature', '°C', 'tas'),
}
# Function to load data for a given variable from the dataset at the nearest latitude and longitude
def load_data(variable: str, ds: xr.Dataset, lat: float, lon: float) -> xr.DataArray:
"""
Load data for a given variable from the dataset at the nearest latitude and longitude.
Args:
variable (str): The variable to extract from the dataset.
ds (xr.Dataset): The xarray dataset containing climate data.
lat (float): Latitude for nearest data point.
lon (float): Longitude for nearest data point.
Returns:
xr.DataArray: The data array containing the variable values for the specified location.
"""
try:
data = ds[variable].sel(lat=lat, lon=lon, method="nearest")
# Convert temperature from Kelvin to Celsius for specific variables
if variable in ["tas", "tasmin", "tasmax"]:
data = data - 273.15
return data
except Exception as e:
print(f"Error loading {variable}: {e}")
return None
# Function to load forecast datasets from NetCDF files based on variable mapping
def get_forecast_datasets(climate_sub_files: list) -> dict:
"""
Get the forecast datasets by loading NetCDF files for each variable.
Args:
climate_sub_files (list): List of file paths to the NetCDF files.
Returns:
dict: Dictionary with variable names as keys and xarray datasets as values.
"""
datasets = {}
for file_path in climate_sub_files:
filename = os.path.basename(file_path)
for long_name, (title, unit, var_key) in VARIABLE_MAPPING.items():
if var_key in filename:
if var_key in ["tas", "tasmax", "tasmin"]:
if f"_{var_key}_" in f"_{filename}_" or filename.endswith(f"_{var_key}.nc"):
datasets[long_name] = xr.open_dataset(file_path, engine="netcdf4")
else:
datasets[long_name] = xr.open_dataset(file_path, engine="netcdf4")
return datasets
# Function to extract climate data from forecast datasets and convert to a DataFrame
def get_forecast_data(datasets: dict, lat: float, lon: float) -> pd.DataFrame:
"""
Extract climate data from the forecast datasets for a given location and convert to a DataFrame.
Args:
datasets (dict): Dictionary of datasets, one for each variable.
lat (float): Latitude of the location to extract data for.
lon (float): Longitude of the location to extract data for.
Returns:
pd.DataFrame: A DataFrame containing time series data for each variable.
"""
time_series_data = {'time': []}
for long_name, (title, unit, variable) in VARIABLE_MAPPING.items():
print(f"Processing {long_name} ({title}, {unit}, {variable})...")
data = load_data(variable, datasets[long_name], lat, lon)
if data is not None:
time_series_data['time'] = data.time.values
column_name = f"{title} ({unit})"
time_series_data[column_name] = data.values
return pd.DataFrame(time_series_data)
# Function to compute reference evapotranspiration (ET0)
def compute_et0(df: pd.DataFrame, latitude: float, longitude: float):
"""
Compute reference evapotranspiration using the provided climate data.
Args:
df (pd.DataFrame): DataFrame containing climate data.
latitude (float): Latitude of the location.
longitude (float): Longitude of the location.
Returns:
arraylike: Daily reference evapotranspiration.
"""
irradiance = df.irradiance
Tmin = df.air_temperature_min
Tmax = df.air_temperature_max
T = (Tmin + Tmax) / 2 # Average temperature
RHmin = df.relative_humidity_min
RHmax = df.relative_humidity_max
WS = df.wind_speed
JJulien = df.day_of_year
et0_values = agro_indicators.et0(irradiance, T, Tmax, Tmin, RHmin, RHmax, WS, JJulien, latitude, longitude)
return et0_values
# Main processing workflow
def main():
# Define the directory to parse
folder_to_parse = "../climate_data_pessimist/"
# Retrieve the subfolders and files to parse
climate_sub_folder = [os.path.join(folder_to_parse, e) for e in os.listdir(folder_to_parse) if os.path.isdir(os.path.join(folder_to_parse, e))]
climate_sub_files = [os.path.join(e, i) for e in climate_sub_folder for i in os.listdir(e) if i.endswith('.nc')]
# Load the forecast datasets
datasets = get_forecast_datasets(climate_sub_files)
# Get the forecast data for a specific latitude and longitude
lat, lon = 47.0, 5.0 # Example coordinates
final_df = get_forecast_data(datasets, lat, lon)
coef = 1
# Display the resulting DataFrame
print(final_df.head())
# Preprocess the data
data_test = final_df.copy()
data_test["irradiance"] = data_test['Surface Downwelling Shortwave Radiation (W/m²)'] * coef
data_test["air_temperature_min"] = data_test['Daily Minimum Near Surface Air Temperature (°C)']
data_test["air_temperature_max"] = data_test['Daily Maximum Near Surface Air Temperature (°C)']
data_test["relative_humidity_min"] = data_test['Relative Humidity (%)']
data_test["relative_humidity_max"] = data_test['Relative Humidity (%)']
data_test["wind_speed"] = data_test['Near Surface Wind Speed (m/s)']
# Convert 'time' to datetime and calculate Julian day
data_test['time'] = pd.to_datetime(data_test['time'], errors='coerce')
data_test['day_of_year'] = data_test['time'].dt.dayofyear
# Compute ET0
et0 = compute_et0(data_test, lat, lon)
data_test['Evaporation (mm/day)'] = et0
# Convert Precipitation from kg/m²/s to mm/day
data_test['Precipitation (mm/day)'] = 86400 * data_test['Precipitation (kg m-2 s-1)']
# Calculate Water Deficit: Water Deficit = ET0 - P + M
data_test['Water Deficit (mm/day)'] = (
(data_test['Evaporation (mm/day)'] - (data_test['Precipitation (mm/day)']) +
data_test['Moisture in Upper Portion of Soil Column (kg m-2)'])
)
# Display the resulting DataFrame with Water Deficit
print(data_test[['Water Deficit (mm/day)', 'Precipitation (mm/day)', 'Evaporation (mm/day)', 'Moisture in Upper Portion of Soil Column (kg m-2)']])
return data_test
# Run the main function
if __name__ == "__main__":
main()
|