Hugo Massonnat commited on
Commit
ac675c8
·
1 Parent(s): ec9d9e0

update forecast dataframe

Browse files
Files changed (2) hide show
  1. forecast.py +59 -78
  2. requirements.txt +2 -0
forecast.py CHANGED
@@ -1,11 +1,8 @@
1
  import os
2
  import xarray as xr
3
  import pandas as pd
4
- from matplotlib import pyplot as plt
5
- import docs.agro_indicators as agro_indicators
6
- import numpy as np
7
- from datetime import datetime
8
 
 
9
 
10
  # Mapping of variable names to metadata (title, unit, and NetCDF variable key)
11
  VARIABLE_MAPPING = {
@@ -23,21 +20,21 @@ VARIABLE_MAPPING = {
23
 
24
 
25
  # Function to load data for a given variable from the dataset at the nearest latitude and longitude
26
- def load_data(variable: str, ds: xr.Dataset, lat: float, lon: float) -> xr.DataArray:
27
  """
28
  Load data for a given variable from the dataset at the nearest latitude and longitude.
29
 
30
  Args:
31
  variable (str): The variable to extract from the dataset.
32
  ds (xr.Dataset): The xarray dataset containing climate data.
33
- lat (float): Latitude for nearest data point.
34
- lon (float): Longitude for nearest data point.
35
 
36
  Returns:
37
  xr.DataArray: The data array containing the variable values for the specified location.
38
  """
39
  try:
40
- data = ds[variable].sel(lat=lat, lon=lon, method="nearest")
41
  # Convert temperature from Kelvin to Celsius for specific variables
42
  if variable in ["tas", "tasmin", "tasmax"]:
43
  data = data - 273.15
@@ -74,109 +71,93 @@ def get_forecast_datasets(climate_sub_files: list) -> dict:
74
 
75
 
76
  # Function to extract climate data from forecast datasets and convert to a DataFrame
77
- def get_forecast_data(datasets: dict, lat: float, lon: float) -> pd.DataFrame:
78
  """
79
  Extract climate data from the forecast datasets for a given location and convert to a DataFrame.
80
 
81
  Args:
82
- datasets (dict): Dictionary of datasets, one for each variable.
83
- lat (float): Latitude of the location to extract data for.
84
- lon (float): Longitude of the location to extract data for.
85
 
86
  Returns:
87
  pd.DataFrame: A DataFrame containing time series data for each variable.
88
  """
 
 
 
 
 
 
 
 
 
 
 
 
 
 
89
  time_series_data = {'time': []}
90
 
91
  for long_name, (title, unit, variable) in VARIABLE_MAPPING.items():
92
  print(f"Processing {long_name} ({title}, {unit}, {variable})...")
93
- data = load_data(variable, datasets[long_name], lat, lon)
94
 
95
  if data is not None:
96
  time_series_data['time'] = data.time.values
97
  column_name = f"{title} ({unit})"
98
  time_series_data[column_name] = data.values
99
 
100
- return pd.DataFrame(time_series_data)
101
-
102
-
103
- # Function to compute reference evapotranspiration (ET0)
104
- def compute_et0(df: pd.DataFrame, latitude: float, longitude: float):
105
- """
106
- Compute reference evapotranspiration using the provided climate data.
107
-
108
- Args:
109
- df (pd.DataFrame): DataFrame containing climate data.
110
- latitude (float): Latitude of the location.
111
- longitude (float): Longitude of the location.
112
-
113
- Returns:
114
- arraylike: Daily reference evapotranspiration.
115
- """
116
- irradiance = df.irradiance
117
- Tmin = df.air_temperature_min
118
- Tmax = df.air_temperature_max
119
- T = (Tmin + Tmax) / 2 # Average temperature
120
- RHmin = df.relative_humidity_min
121
- RHmax = df.relative_humidity_max
122
- WS = df.wind_speed
123
- JJulien = df.day_of_year
124
-
125
- et0_values = agro_indicators.et0(irradiance, T, Tmax, Tmin, RHmin, RHmax, WS, JJulien, latitude, longitude)
126
- return et0_values
127
-
128
 
129
- # Main processing workflow
130
- def main():
131
- # Define the directory to parse
132
- folder_to_parse = "../climate_data_pessimist/"
133
-
134
- # Retrieve the subfolders and files to parse
135
- climate_sub_folder = [os.path.join(folder_to_parse, e) for e in os.listdir(folder_to_parse) if os.path.isdir(os.path.join(folder_to_parse, e))]
136
- climate_sub_files = [os.path.join(e, i) for e in climate_sub_folder for i in os.listdir(e) if i.endswith('.nc')]
137
-
138
- # Load the forecast datasets
139
- datasets = get_forecast_datasets(climate_sub_files)
140
-
141
- # Get the forecast data for a specific latitude and longitude
142
- lat, lon = 47.0, 5.0 # Example coordinates
143
- final_df = get_forecast_data(datasets, lat, lon)
144
 
145
- coef = 1
146
 
147
- # Display the resulting DataFrame
148
- print(final_df.head())
149
 
150
- # Preprocess the data
151
- data_test = final_df.copy()
152
- data_test["irradiance"] = data_test['Surface Downwelling Shortwave Radiation (W/m²)'] * coef
153
- data_test["air_temperature_min"] = data_test['Daily Minimum Near Surface Air Temperature (°C)']
154
- data_test["air_temperature_max"] = data_test['Daily Maximum Near Surface Air Temperature (°C)']
155
- data_test["relative_humidity_min"] = data_test['Relative Humidity (%)']
156
- data_test["relative_humidity_max"] = data_test['Relative Humidity (%)']
157
- data_test["wind_speed"] = data_test['Near Surface Wind Speed (m/s)']
158
 
159
  # Convert 'time' to datetime and calculate Julian day
160
- data_test['time'] = pd.to_datetime(data_test['time'], errors='coerce')
161
- data_test['day_of_year'] = data_test['time'].dt.dayofyear
162
 
163
  # Compute ET0
164
- et0 = compute_et0(data_test, lat, lon)
165
- data_test['Evaporation (mm/day)'] = et0
166
 
167
  # Convert Precipitation from kg/m²/s to mm/day
168
- data_test['Precipitation (mm/day)'] = 86400 * data_test['Precipitation (kg m-2 s-1)']
169
 
170
  # Calculate Water Deficit: Water Deficit = ET0 - P + M
171
- data_test['Water Deficit (mm/day)'] = (
172
- (data_test['Evaporation (mm/day)'] - (data_test['Precipitation (mm/day)']) +
173
- data_test['Moisture in Upper Portion of Soil Column (kg m-2)'])
174
  )
175
 
176
- # Display the resulting DataFrame with Water Deficit
177
- print(data_test[['Water Deficit (mm/day)', 'Precipitation (mm/day)', 'Evaporation (mm/day)', 'Moisture in Upper Portion of Soil Column (kg m-2)']])
 
 
 
 
 
 
 
 
 
 
 
 
178
 
179
- return data_test
180
 
181
 
182
  # Run the main function
 
1
  import os
2
  import xarray as xr
3
  import pandas as pd
 
 
 
 
4
 
5
+ from compute_et0_adjusted import compute_et0
6
 
7
  # Mapping of variable names to metadata (title, unit, and NetCDF variable key)
8
  VARIABLE_MAPPING = {
 
20
 
21
 
22
  # Function to load data for a given variable from the dataset at the nearest latitude and longitude
23
+ def load_data(variable: str, ds: xr.Dataset, latitude: float, longitude: float) -> xr.DataArray:
24
  """
25
  Load data for a given variable from the dataset at the nearest latitude and longitude.
26
 
27
  Args:
28
  variable (str): The variable to extract from the dataset.
29
  ds (xr.Dataset): The xarray dataset containing climate data.
30
+ latitude(float): Latitude for nearest data point.
31
+ longitude (float): Longitude for nearest data point.
32
 
33
  Returns:
34
  xr.DataArray: The data array containing the variable values for the specified location.
35
  """
36
  try:
37
+ data = ds[variable].sel(lat=latitude, lon=longitude, method="nearest")
38
  # Convert temperature from Kelvin to Celsius for specific variables
39
  if variable in ["tas", "tasmin", "tasmax"]:
40
  data = data - 273.15
 
71
 
72
 
73
  # Function to extract climate data from forecast datasets and convert to a DataFrame
74
+ def get_forecast_data(latitude: float, longitude: float, scenario: str, shading_coef: float) -> pd.DataFrame:
75
  """
76
  Extract climate data from the forecast datasets for a given location and convert to a DataFrame.
77
 
78
  Args:
79
+ latitude(float): Latitude of the location to extract data for.
80
+ longitude (float): Longitude of the location to extract data for.
81
+ scenario (str): The scenario to extract data for.
82
 
83
  Returns:
84
  pd.DataFrame: A DataFrame containing time series data for each variable.
85
  """
86
+ assert scenario in ["moderate", "pessimist"]
87
+ assert 0 <= shading_coef <= 1
88
+
89
+ # Define the directory to parse
90
+ folder_to_parse = f"data/climate_data_{scenario}/"
91
+
92
+ # Retrieve the subfolders and files to parse
93
+ climate_sub_folder = [os.path.join(folder_to_parse, e) for e in os.listdir(folder_to_parse) if
94
+ os.path.isdir(os.path.join(folder_to_parse, e))]
95
+ climate_sub_files = [os.path.join(e, i) for e in climate_sub_folder for i in os.listdir(e) if i.endswith('.nc')]
96
+
97
+ # Load the forecast datasets
98
+ datasets = get_forecast_datasets(climate_sub_files)
99
+
100
  time_series_data = {'time': []}
101
 
102
  for long_name, (title, unit, variable) in VARIABLE_MAPPING.items():
103
  print(f"Processing {long_name} ({title}, {unit}, {variable})...")
104
+ data = load_data(variable, datasets[long_name], latitude, longitude)
105
 
106
  if data is not None:
107
  time_series_data['time'] = data.time.values
108
  column_name = f"{title} ({unit})"
109
  time_series_data[column_name] = data.values
110
 
111
+ forecast_data = pd.DataFrame(time_series_data)
112
+ forecast_data = preprocess_forectast_data(forecast_data, latitude, longitude, shading_coef)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
113
 
114
+ return pd.DataFrame(time_series_data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
115
 
 
116
 
117
+ def preprocess_forectast_data(df: pd.DataFrame, latitude, longitude, shading_coef=0) -> pd.DataFrame:
118
+ assert 0 <= shading_coef <= 1
119
 
120
+ preprocessed_data = df.copy()
121
+ preprocessed_data["irradiance"] = preprocessed_data['Surface Downwelling Shortwave Radiation (W/m²)'] * (1 - shading_coef)
122
+ preprocessed_data["air_temperature_min"] = preprocessed_data['Daily Minimum Near Surface Air Temperature (°C)']
123
+ preprocessed_data["air_temperature_max"] = preprocessed_data['Daily Maximum Near Surface Air Temperature (°C)']
124
+ preprocessed_data["relative_humidity_min"] = preprocessed_data['Relative Humidity (%)']
125
+ preprocessed_data["relative_humidity_max"] = preprocessed_data['Relative Humidity (%)']
126
+ preprocessed_data["wind_speed"] = preprocessed_data['Near Surface Wind Speed (m/s)']
 
127
 
128
  # Convert 'time' to datetime and calculate Julian day
129
+ preprocessed_data['time'] = pd.to_datetime(preprocessed_data['time'], errors='coerce')
130
+ preprocessed_data['day_of_year'] = preprocessed_data['time'].dt.dayofyear
131
 
132
  # Compute ET0
133
+ et0 = compute_et0(preprocessed_data, latitude, longitude)
134
+ preprocessed_data['Evaporation (mm/day)'] = et0
135
 
136
  # Convert Precipitation from kg/m²/s to mm/day
137
+ preprocessed_data['Precipitation (mm/day)'] = 86400 * preprocessed_data['Precipitation (kg m-2 s-1)']
138
 
139
  # Calculate Water Deficit: Water Deficit = ET0 - P + M
140
+ preprocessed_data['Water Deficit (mm/day)'] = (
141
+ (preprocessed_data['Evaporation (mm/day)'] - (preprocessed_data['Precipitation (mm/day)']) +
142
+ preprocessed_data['Moisture in Upper Portion of Soil Column (kg m-2)'])
143
  )
144
 
145
+ return preprocessed_data
146
+
147
+
148
+ # Main processing workflow
149
+ def main():
150
+ # Get the forecast data for a specific latitude and longitude
151
+ latitude, longitude = 47.0, 5.0 # Example coordinates
152
+ scenario = "pessimist"
153
+ shading_coef = 0
154
+ forecast_data = get_forecast_data(latitude, longitude, scenario=scenario, shading_coef=shading_coef)
155
+
156
+ # Display the resulting DataFrame
157
+ print(forecast_data.head())
158
+ print(forecast_data.columns)
159
 
160
+ return forecast_data
161
 
162
 
163
  # Run the main function
requirements.txt CHANGED
@@ -22,3 +22,5 @@ matplotlib
22
  xarray
23
  folium
24
  netcdf4
 
 
 
22
  xarray
23
  folium
24
  netcdf4
25
+ geopy
26
+ geopandas