lucaordronneau commited on
Commit
aa4cf91
·
verified ·
1 Parent(s): 38af3d3

Upload forecast.py

Browse files

Update with water deficit

Files changed (1) hide show
  1. forecast.py +87 -28
forecast.py CHANGED
@@ -1,6 +1,10 @@
1
  import os
2
  import xarray as xr
3
  import pandas as pd
 
 
 
 
4
 
5
 
6
  # Mapping of variable names to metadata (title, unit, and NetCDF variable key)
@@ -18,6 +22,7 @@ VARIABLE_MAPPING = {
18
  }
19
 
20
 
 
21
  def load_data(variable: str, ds: xr.Dataset, lat: float, lon: float) -> xr.DataArray:
22
  """
23
  Load data for a given variable from the dataset at the nearest latitude and longitude.
@@ -33,17 +38,16 @@ def load_data(variable: str, ds: xr.Dataset, lat: float, lon: float) -> xr.DataA
33
  """
34
  try:
35
  data = ds[variable].sel(lat=lat, lon=lon, method="nearest")
36
-
37
  # Convert temperature from Kelvin to Celsius for specific variables
38
  if variable in ["tas", "tasmin", "tasmax"]:
39
  data = data - 273.15
40
-
41
  return data
42
  except Exception as e:
43
  print(f"Error loading {variable}: {e}")
44
  return None
45
 
46
 
 
47
  def get_forecast_datasets(climate_sub_files: list) -> dict:
48
  """
49
  Get the forecast datasets by loading NetCDF files for each variable.
@@ -56,12 +60,10 @@ def get_forecast_datasets(climate_sub_files: list) -> dict:
56
  """
57
  datasets = {}
58
 
59
- # Iterate over each file and check if the variable exists in the filename
60
  for file_path in climate_sub_files:
61
  filename = os.path.basename(file_path)
62
-
63
  for long_name, (title, unit, var_key) in VARIABLE_MAPPING.items():
64
- if var_key in filename: # Check for presence of variable in filename
65
  if var_key in ["tas", "tasmax", "tasmin"]:
66
  if f"_{var_key}_" in f"_{filename}_" or filename.endswith(f"_{var_key}.nc"):
67
  datasets[long_name] = xr.open_dataset(file_path, engine="netcdf4")
@@ -71,6 +73,7 @@ def get_forecast_datasets(climate_sub_files: list) -> dict:
71
  return datasets
72
 
73
 
 
74
  def get_forecast_data(datasets: dict, lat: float, lon: float) -> pd.DataFrame:
75
  """
76
  Extract climate data from the forecast datasets for a given location and convert to a DataFrame.
@@ -85,41 +88,97 @@ def get_forecast_data(datasets: dict, lat: float, lon: float) -> pd.DataFrame:
85
  """
86
  time_series_data = {'time': []}
87
 
88
- # Iterate over the variable mapping to load and process data for each variable
89
  for long_name, (title, unit, variable) in VARIABLE_MAPPING.items():
90
  print(f"Processing {long_name} ({title}, {unit}, {variable})...")
91
-
92
- # Load the data for the current variable
93
  data = load_data(variable, datasets[long_name], lat, lon)
94
-
95
- if data is not None:
96
- print(f"Time values: {data.time.values[:5]}") # Preview first few time values
97
- print(f"Data values: {data.values[:5]}") # Preview first few data values
98
 
99
- # Add the time values to the 'time' list
100
  time_series_data['time'] = data.time.values
101
-
102
- # Format the column name with unit (e.g., "Precipitation (kg m-2 s-1)")
103
  column_name = f"{title} ({unit})"
104
  time_series_data[column_name] = data.values
105
 
106
- # Convert the time series data into a pandas DataFrame
107
  return pd.DataFrame(time_series_data)
108
 
109
 
110
- # Define the directory to parse
111
- folder_to_parse = "climate_data_pessimist/"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
112
 
113
- # Retrieve the subfolders and files to parse
114
- climate_sub_folder = [os.path.join(folder_to_parse, e) for e in os.listdir(folder_to_parse) if os.path.isdir(os.path.join(folder_to_parse, e))]
115
- climate_sub_files = [os.path.join(e, i) for e in climate_sub_folder for i in os.listdir(e) if i.endswith('.nc')]
116
 
117
- # Load the forecast datasets
118
- datasets = get_forecast_datasets(climate_sub_files)
119
 
120
- # Get the forecast data for a specific latitude and longitude
121
- lat, lon = 47.0, 5.0 # Example coordinates
122
- final_df = get_forecast_data(datasets, lat, lon)
123
 
124
- # Display the resulting DataFrame
125
- print(final_df.head())
 
 
1
  import os
2
  import xarray as xr
3
  import pandas as pd
4
+ from matplotlib import pyplot as plt
5
+ import docs.agro_indicators as agro_indicators
6
+ import numpy as np
7
+ from datetime import datetime
8
 
9
 
10
  # Mapping of variable names to metadata (title, unit, and NetCDF variable key)
 
22
  }
23
 
24
 
25
+ # Function to load data for a given variable from the dataset at the nearest latitude and longitude
26
  def load_data(variable: str, ds: xr.Dataset, lat: float, lon: float) -> xr.DataArray:
27
  """
28
  Load data for a given variable from the dataset at the nearest latitude and longitude.
 
38
  """
39
  try:
40
  data = ds[variable].sel(lat=lat, lon=lon, method="nearest")
 
41
  # Convert temperature from Kelvin to Celsius for specific variables
42
  if variable in ["tas", "tasmin", "tasmax"]:
43
  data = data - 273.15
 
44
  return data
45
  except Exception as e:
46
  print(f"Error loading {variable}: {e}")
47
  return None
48
 
49
 
50
+ # Function to load forecast datasets from NetCDF files based on variable mapping
51
  def get_forecast_datasets(climate_sub_files: list) -> dict:
52
  """
53
  Get the forecast datasets by loading NetCDF files for each variable.
 
60
  """
61
  datasets = {}
62
 
 
63
  for file_path in climate_sub_files:
64
  filename = os.path.basename(file_path)
 
65
  for long_name, (title, unit, var_key) in VARIABLE_MAPPING.items():
66
+ if var_key in filename:
67
  if var_key in ["tas", "tasmax", "tasmin"]:
68
  if f"_{var_key}_" in f"_{filename}_" or filename.endswith(f"_{var_key}.nc"):
69
  datasets[long_name] = xr.open_dataset(file_path, engine="netcdf4")
 
73
  return datasets
74
 
75
 
76
+ # Function to extract climate data from forecast datasets and convert to a DataFrame
77
  def get_forecast_data(datasets: dict, lat: float, lon: float) -> pd.DataFrame:
78
  """
79
  Extract climate data from the forecast datasets for a given location and convert to a DataFrame.
 
88
  """
89
  time_series_data = {'time': []}
90
 
 
91
  for long_name, (title, unit, variable) in VARIABLE_MAPPING.items():
92
  print(f"Processing {long_name} ({title}, {unit}, {variable})...")
 
 
93
  data = load_data(variable, datasets[long_name], lat, lon)
 
 
 
 
94
 
95
+ if data is not None:
96
  time_series_data['time'] = data.time.values
 
 
97
  column_name = f"{title} ({unit})"
98
  time_series_data[column_name] = data.values
99
 
 
100
  return pd.DataFrame(time_series_data)
101
 
102
 
103
+ # Function to compute reference evapotranspiration (ET0)
104
+ def compute_et0(df: pd.DataFrame, latitude: float, longitude: float):
105
+ """
106
+ Compute reference evapotranspiration using the provided climate data.
107
+
108
+ Args:
109
+ df (pd.DataFrame): DataFrame containing climate data.
110
+ latitude (float): Latitude of the location.
111
+ longitude (float): Longitude of the location.
112
+
113
+ Returns:
114
+ arraylike: Daily reference evapotranspiration.
115
+ """
116
+ irradiance = df.irradiance
117
+ Tmin = df.air_temperature_min
118
+ Tmax = df.air_temperature_max
119
+ T = (Tmin + Tmax) / 2 # Average temperature
120
+ RHmin = df.relative_humidity_min
121
+ RHmax = df.relative_humidity_max
122
+ WS = df.wind_speed
123
+ JJulien = df.day_of_year
124
+
125
+ et0_values = agro_indicators.et0(irradiance, T, Tmax, Tmin, RHmin, RHmax, WS, JJulien, latitude, longitude)
126
+ return et0_values
127
+
128
+
129
+ # Main processing workflow
130
+ def main():
131
+ # Define the directory to parse
132
+ folder_to_parse = "../climate_data_pessimist/"
133
+
134
+ # Retrieve the subfolders and files to parse
135
+ climate_sub_folder = [os.path.join(folder_to_parse, e) for e in os.listdir(folder_to_parse) if os.path.isdir(os.path.join(folder_to_parse, e))]
136
+ climate_sub_files = [os.path.join(e, i) for e in climate_sub_folder for i in os.listdir(e) if i.endswith('.nc')]
137
+
138
+ # Load the forecast datasets
139
+ datasets = get_forecast_datasets(climate_sub_files)
140
+
141
+ # Get the forecast data for a specific latitude and longitude
142
+ lat, lon = 47.0, 5.0 # Example coordinates
143
+ final_df = get_forecast_data(datasets, lat, lon)
144
+
145
+ coef = 1
146
+
147
+ # Display the resulting DataFrame
148
+ print(final_df.head())
149
+
150
+ # Preprocess the data
151
+ data_test = final_df.copy()
152
+ data_test["irradiance"] = data_test['Surface Downwelling Shortwave Radiation (W/m²)'] * coef
153
+ data_test["air_temperature_min"] = data_test['Daily Minimum Near Surface Air Temperature (°C)']
154
+ data_test["air_temperature_max"] = data_test['Daily Maximum Near Surface Air Temperature (°C)']
155
+ data_test["relative_humidity_min"] = data_test['Relative Humidity (%)']
156
+ data_test["relative_humidity_max"] = data_test['Relative Humidity (%)']
157
+ data_test["wind_speed"] = data_test['Near Surface Wind Speed (m/s)']
158
+
159
+ # Convert 'time' to datetime and calculate Julian day
160
+ data_test['time'] = pd.to_datetime(data_test['time'], errors='coerce')
161
+ data_test['day_of_year'] = data_test['time'].dt.dayofyear
162
+
163
+ # Compute ET0
164
+ et0 = compute_et0(data_test, lat, lon)
165
+ data_test['Evaporation (mm/day)'] = et0
166
+
167
+ # Convert Precipitation from kg/m²/s to mm/day
168
+ data_test['Precipitation (mm/day)'] = 86400 * data_test['Precipitation (kg m-2 s-1)']
169
+
170
+ # Calculate Water Deficit: Water Deficit = ET0 - P + M
171
+ data_test['Water Deficit (mm/day)'] = (
172
+ (data_test['Evaporation (mm/day)'] - (data_test['Precipitation (mm/day)']) +
173
+ data_test['Moisture in Upper Portion of Soil Column (kg m-2)'])
174
+ )
175
 
176
+ # Display the resulting DataFrame with Water Deficit
177
+ print(data_test[['Water Deficit (mm/day)', 'Precipitation (mm/day)', 'Evaporation (mm/day)', 'Moisture in Upper Portion of Soil Column (kg m-2)']])
 
178
 
179
+ return data_test
 
180
 
 
 
 
181
 
182
+ # Run the main function
183
+ if __name__ == "__main__":
184
+ main()