Spaces:
Sleeping
Sleeping
#!/usr/bin/env python | |
# -*- coding: utf-8 -*- | |
import argparse | |
import gc | |
import glob | |
import logging | |
import os | |
import traceback | |
from datetime import timedelta | |
from typing import Dict | |
import netCDF4 | |
import numpy as np | |
import pandas as pd | |
import rasterio | |
from libs.utils import setup_logging | |
from libs.utils import verbose as vprint | |
setup_logging() | |
log = logging.getLogger(__name__) | |
CONFIG = {} | |
V = 1 | |
V_IGNORE = [] # Debug, Warning, Error | |
# print(os.getcwd()) | |
def get_historic_agg( | |
input_dir: str, | |
historic_years: int, | |
current_window_start: str, | |
current_window_end: str, | |
layer_name: str, | |
agg_window: str = "mean", | |
agg_history: str = "mean", | |
) -> np.ndarray: | |
"""Get the historic mean for a given window_start and window_end dates. | |
Parameters | |
---------- | |
input_dir : str | |
Path to the directory containing the netcdf files. | |
historic_years : int | |
Number of historic years to consider for the mean. | |
current_window_start : str | |
Start date of the current window. Format: YYYY-MM-DD. | |
current_window_end : str | |
End date of the current window. Format: YYYY-MM-DD. | |
layer_name : str | |
Soil layer to consider for the mean. | |
agg_window : str | |
Aggregation method for the window. Default is "mean". Possible values: "mean", "median", "max", "min", "std", "var". | |
agg_history : str | |
Aggregation method for the historic years. Default is "mean". Possible values: "mean", "median", "max", "min", "std", "var". | |
Returns | |
------- | |
np.ndarray | |
Array of the historic mean for the given window_start and window_end dates for the historic years. | |
Raises | |
------ | |
FileNotFoundError | |
If the file for the historic year is not found. Possible solutions: | |
- The historic year should be modelled before calling this function. | |
- The path to the historic year should be changed. | |
- Calculate for a more recent historic year by reducing historic_years value. | |
""" | |
# Get the window_start year | |
window_start_year = pd.to_datetime(current_window_start).year | |
window_end_year = pd.to_datetime(current_window_end).year | |
# Get the first year | |
first_year = window_start_year - historic_years | |
# Check if file exists for this year | |
if os.path.exists(os.path.join(input_dir, f"model_{first_year}.nc")): | |
# Get the list of historic windows | |
historic_agg = {} | |
for year in range(1, historic_years + 1): | |
args = { | |
"input_dir": input_dir, | |
"window_start": f"{window_start_year-year}{current_window_start[4:]}", | |
"window_end": f"{window_end_year-year}{current_window_end[4:]}", | |
"layer_name": layer_name, | |
"agg": agg_window, | |
} | |
# Get the range mean | |
historic_agg[window_start_year - year] = get_range_agg(**args) | |
historic_agg_np = np.array([historic_agg[year] for year in historic_agg]) | |
# Get the aggregation of the historic years | |
if agg_history == "mean": | |
historic_agg_np = np.mean(historic_agg_np, axis=0) | |
elif agg_history == "median": | |
historic_agg_np = np.median(historic_agg_np, axis=0) | |
elif agg_history == "max": | |
historic_agg_np = np.max(historic_agg_np, axis=0) | |
elif agg_history == "min": | |
historic_agg_np = np.min(historic_agg_np, axis=0) | |
elif agg_history == "std": | |
historic_agg_np = np.std(historic_agg_np, axis=0) | |
elif agg_history == "var": | |
historic_agg_np = np.var(historic_agg_np, axis=0) | |
elif agg_history == "sum": | |
historic_agg_np = np.sum(historic_agg_np, axis=0) | |
else: | |
raise ValueError( | |
f"Invalid aggregation method: {agg_history}. Possible values: mean, median, max, min, std, var, sum." | |
) | |
return historic_agg_np | |
else: | |
raise FileNotFoundError( | |
f"File not found for the historic data: {os.path.join(input_dir,f'model_{first_year}.nc')}. Make sure the path is correct and the historic year for the requested year is modelled before calling this function." | |
) | |
def get_range_agg( | |
input_dir: str, | |
window_start: str, | |
window_end: str, | |
layer_name: str, | |
agg: str = "mean", | |
) -> np.ndarray: | |
"""Get the mean for a given window_start and window_end dates. | |
Parameters | |
---------- | |
input_dir : str | |
Path to the directory containing the netcdf files. | |
window_start : str | |
Start date of the window. Format: YYYY-MM-DD. | |
window_end : str | |
End date of the window. Format: YYYY-MM-DD. | |
layer_name : str | |
Soil layer to consider for the mean. | |
agg : str | |
Aggregation method to use. Possible values: mean, median, max, min, std. | |
Returns | |
------- | |
np.ndarray | |
Mean raster for the given window_start and window_end dates. | |
""" | |
# Get the list of dates between two dates if date_from and date_to | |
dates = pd.DataFrame( | |
pd.date_range( | |
pd.to_datetime(window_start), | |
pd.to_datetime(window_end) - timedelta(days=1), | |
freq="d", | |
), | |
columns=["date"], | |
) # .strftime('%Y-%m-%d') | |
dates["dayofyear"] = dates["date"].dt.dayofyear - 1 | |
dates["year"] = dates["date"].dt.year | |
dates["str_dates"] = dates["date"].dt.strftime("%Y-%m-%d") | |
yearly_dates = dates.groupby("year")["dayofyear"].apply(list).to_dict() | |
data_l = list() | |
# For each year, get the data for layer_name for the dates specified in yearly_dates | |
for year in yearly_dates: | |
# read the year file | |
nc_y = netCDF4.Dataset(os.path.join(input_dir, f"model_{year}.nc")) | |
vprint( | |
1, | |
V, | |
V_IGNORE, | |
Debug=f"getting data for year: {year} from layer: {layer_name}...", | |
) | |
# Get the data for the layer_name | |
data = nc_y.variables[layer_name][:, :, :] | |
# Get the data for the dates | |
days = yearly_dates[year] | |
data = data[days, :, :] | |
data_l.append(data) | |
nc_y.close() | |
del data | |
gc.collect() | |
# Concat data for all years | |
data_concat = np.concatenate(data_l, axis=0) | |
data_concat.shape | |
if agg == "mean": | |
# Get the mean raster for the range | |
data_agg = np.mean(data_concat, axis=0) | |
elif agg == "median": | |
# Get the median raster for the range | |
data_agg = np.median(data_concat, axis=0) | |
elif agg == "max": | |
# Get the max raster for the range | |
data_agg = np.max(data_concat, axis=0) | |
elif agg == "min": | |
# Get the min raster for the range | |
data_agg = np.min(data_concat, axis=0) | |
elif agg == "std": | |
# Get the std raster for the range | |
data_agg = np.std(data_concat, axis=0) | |
elif agg == "var": | |
# Get the var raster for the range | |
data_agg = np.var(data_concat, axis=0) | |
elif agg == "sum": | |
# Get the sum raster for the range | |
data_agg = np.sum(data_concat, axis=0) | |
else: | |
raise ValueError( | |
f"agg should be one of mean, median, max, min, std, var, sum. {agg} was provided." | |
) | |
print("done.") | |
return data_agg | |
def save(path, array, profile): | |
"""Save the array as a raster. | |
Parameters | |
---------- | |
path : str | |
Path to the raster to save. | |
array : np.ndarray | |
Array to save as a raster. | |
profile : dict | |
Profile of the raster to save. | |
""" | |
with rasterio.open(path, "w", **profile) as dst: | |
dst.write(array, 1) | |
def analyse( | |
input, | |
window_start, | |
window_end, | |
historic_years: int, | |
layer: str, | |
match_raster: str = None, | |
output: str = None, | |
agg_history: str = "mean", | |
agg_window: str = "mean", | |
comparison: str = "diff", | |
) -> Dict[str, str]: | |
"""Main function to run the script. | |
Parameters | |
---------- | |
input : str | |
Path to the input raster. | |
window_start : str | |
Start date of the window. Format: YYYY-MM-DD. | |
window_end : str | |
End date of the window. Format: YYYY-MM-DD. | |
historic_years : int | |
Number of historic years to use for the comparison. | |
layer : str | |
Soil layer to consider for the comparison. | |
match_raster : str | |
Path to the match raster. Default: None. If None, the match raster will be searched in the et_pp directory based on the input directory. | |
output : str | |
Path to the output raster. Default: None. If None, the output raster will be saved in the same directory as the input raster. | |
agg_history : str | |
Aggregation method to use for the historic years. Possible values: mean, median, max, min, std. Default: mean. | |
agg_window : str | |
Aggregation method to use for the window. Possible values: mean, median, max, min, std. Default: mean. | |
comparison : str | |
Comparison method to use. Possible values: diff, ratio. Default: diff. | |
Returns | |
------- | |
Dict[str,str] | |
Dictionary with the path to the output rasters. | |
""" | |
if output is None: | |
output = os.path.join(input, "analysis") | |
# Create the output directory if it does not exist | |
if not os.path.exists(output): | |
os.makedirs(output) | |
if match_raster is None: | |
match_raster = os.path.join(os.path.dirname(input), "et_pp") | |
files = glob.glob(os.path.join(match_raster, f"{window_start[:7]}*.tif")) | |
if len(files) == 0: | |
files = glob.glob(os.path.join(match_raster, f"{window_end[:7]}*.tif")) | |
if len(files) == 0: | |
vprint( | |
1, | |
V, | |
V_IGNORE, | |
Debug=f"Expanding the search for match raster file to find e closer date to {window_start[:5]}...", | |
) | |
files = glob.glob(os.path.join(match_raster, f"{window_start[:5]}*.tif")) | |
if len(files) == 0: | |
vprint( | |
1, | |
V, | |
V_IGNORE, | |
Debug=f"Expanding the search further for match raster file to find e closer date to {window_end[:5]}...", | |
) | |
files = glob.glob(os.path.join(match_raster, f"{window_end[:5]}*.tif")) | |
if len(files) == 0: | |
raise FileNotFoundError( | |
f"Could not find any matching raster in {match_raster} for the range of dates given at {window_start} / {window_end}!" | |
) | |
print(f"Found {len(files)} matching raster file {files[0]}.") | |
match_raster = files[0] | |
with rasterio.open(match_raster) as src: | |
profile = src.profile | |
# Get the layers | |
layer = layer | |
# Get the historic aggregated data | |
historic_data = get_historic_agg( | |
input_dir=input, | |
historic_years=historic_years, | |
current_window_start=window_start, | |
current_window_end=window_end, | |
agg_window=agg_window, | |
agg_history=agg_history, | |
layer_name=layer, | |
) | |
# Get aggregated current window data | |
current_data = get_range_agg( | |
input_dir=input, | |
window_start=window_start, | |
window_end=window_end, | |
agg=agg_window, | |
layer_name=layer, | |
) | |
# Compare the two rasters | |
if comparison == "diff": | |
delta = current_data - historic_data | |
else: | |
raise NotImplementedError( | |
f"comparison should be diff. {comparison} was provided." | |
) | |
# Save the rasters | |
historic_raster = os.path.join( | |
output, | |
f"historic-{window_start.replace('-','_')}-{window_end.replace('-','_')}-{layer}-w_{agg_window}-h_{agg_history}-y_{historic_years}.tif", | |
) | |
current_raster = os.path.join( | |
output, | |
f"current-{window_start.replace('-','_')}-{window_end.replace('-','_')}-{layer}-w_{agg_window}.tif", | |
) | |
delta_raster = os.path.join( | |
output, | |
f"delta-{window_start.replace('-','_')}-{window_end.replace('-','_')}-{layer}-w_{agg_window}-h_{agg_history}-y_{historic_years}.tif", | |
) | |
save(historic_raster, historic_data, profile) | |
save(current_raster, current_data, profile) | |
save(delta_raster, delta, profile) | |
# # Visualise the rasters | |
# # Read the saved rasters | |
# with rasterio.open(historic_raster) as src: | |
# historic_raster = src.read(1) | |
# with rasterio.open(current_raster) as src: | |
# current_raster = src.read(1) | |
# with rasterio.open(delta_raster) as src: | |
# delta_raster = src.read(1) | |
# # Plot the rasters | |
return { | |
"historic_raster": historic_raster, | |
"current_raster": current_raster, | |
"delta_raster": delta_raster, | |
} | |
if __name__ == "__main__": | |
# Load Configs | |
parser = argparse.ArgumentParser( | |
description="Download rainfall data from Google Earth Engine for a range of dates.", | |
formatter_class=argparse.ArgumentDefaultsHelpFormatter, | |
) | |
parser.add_argument( | |
"-i", | |
"--input", | |
help="Absolute or relative path to the netcdf data ending with .nc. By dfault it is set to data.nc", | |
default="data.nc", | |
) | |
parser.add_argument( | |
"-l", | |
"--layer", | |
help="Soil layer to visualise. Default is all. Select between SM1 to SM5 or DD.", | |
default="all", | |
) | |
parser.add_argument("-s", "--window_start", help="Window start date YYYY-MM-DD.") | |
parser.add_argument("-e", "--window_end", help="Window end date YYYY-MM-DD.") | |
parser.add_argument( | |
"-y", "--historic_years", help="Number of years to go back in time.", default=2 | |
) | |
parser.add_argument( | |
"-a", | |
"--agg_history", | |
help="Aggregation method to use for the historic data. Possible values: mean, median, max, min, std, var, sum.", | |
default="mean", | |
) | |
parser.add_argument( | |
"-g", | |
"--agg_window", | |
help="Aggregation method to use for the window range data. Possible values: mean, median, max, min, std, var, sum.", | |
default="mean", | |
) | |
parser.add_argument( | |
"-c", | |
"--comparison", | |
help="Comparison method to use for the window range data. Possible values: show, diff.", | |
default="show", | |
) | |
parser.add_argument( | |
"-o", | |
"--output", | |
help="Output directory to save the output files. Default is the input directory.", | |
) | |
parser.add_argument( | |
"-m", | |
"--match_raster", | |
help="Raster to match the output to. Default is the input raster.", | |
) | |
args = parser.parse_args() | |
# args.input ="/home/sahand/Projects/PIPE-3788 GRDC SoilWaterNow Deployment/work/v3/Arawa 2019-2023/c8/1af25ced023e58c46f4403a155210d/soilwatermodel v3" | |
# args.window_start = "2022-12-20" | |
# args.window_end = "2023-01-10" | |
# args.historic_years = 3 | |
# args.agg_window = "mean" | |
# args.agg_history = "mean" | |
# args.comparison = "diff" | |
# args.layer = "SM2" | |
# args.output = None | |
try: | |
analyse( | |
input=args.input, | |
window_start=args.window_start, | |
window_end=args.window_end, | |
historic_years=args.historic_years, | |
agg_window=args.agg_window, | |
agg_history=args.agg_history, | |
comparison=args.comparison, | |
layer=args.layer, | |
output=args.output, | |
match_raster=args.match_raster, | |
) | |
except Exception as e: | |
vprint( | |
0, | |
V, | |
V_IGNORE, | |
Error="Failed to execute the main function:", | |
ErrorMessage=e, | |
) | |
traceback.print_exc() | |
raise e | |