File size: 15,551 Bytes
6b83428
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
225
226
227
228
229
230
231
232
233
234
235
236
237
238
239
240
241
242
243
244
245
246
247
248
249
250
251
252
253
254
255
256
257
258
259
260
261
262
263
264
265
266
267
268
269
270
271
272
273
274
275
276
277
278
279
280
281
282
283
284
285
286
287
288
289
290
291
292
293
294
295
296
297
298
299
300
301
302
303
304
305
306
307
308
309
310
311
312
313
314
315
316
317
318
319
320
321
322
323
324
325
326
327
328
329
330
331
332
333
334
335
336
337
338
339
340
341
342
343
344
345
346
347
348
349
350
351
352
353
354
355
356
357
358
359
360
361
362
363
364
365
366
367
368
369
370
371
372
373
374
375
376
377
378
379
380
381
382
383
384
385
386
387
388
389
390
391
392
393
394
395
396
397
398
399
400
401
402
403
404
405
406
407
408
409
410
411
412
413
414
415
416
417
418
419
420
421
422
423
424
425
426
427
428
429
430
431
432
433
434
435
436
437
438
439
440
441
442
443
444
445
446
447
448
449
450
451
452
453
454
455
456
457
458
459
460
461
462
463
464
465
466
467
468
469
#!/usr/bin/env python
# -*- coding: utf-8 -*-
import argparse
import gc
import glob
import logging
import os
import traceback
from datetime import timedelta
from typing import Dict

import netCDF4
import numpy as np
import pandas as pd
import rasterio

from libs.utils import setup_logging
from libs.utils import verbose as vprint

setup_logging()
log = logging.getLogger(__name__)
CONFIG = {}
V = 1
V_IGNORE = []  # Debug, Warning, Error
# print(os.getcwd())


def get_historic_agg(
    input_dir: str,
    historic_years: int,
    current_window_start: str,
    current_window_end: str,
    layer_name: str,
    agg_window: str = "mean",
    agg_history: str = "mean",
) -> np.ndarray:
    """Get the historic mean for a given window_start and window_end dates.

    Parameters
    ----------
    input_dir : str
        Path to the directory containing the netcdf files.
    historic_years : int
        Number of historic years to consider for the mean.
    current_window_start : str
        Start date of the current window. Format: YYYY-MM-DD.
    current_window_end : str
        End date of the current window. Format: YYYY-MM-DD.
    layer_name : str
        Soil layer to consider for the mean.
    agg_window : str
        Aggregation method for the window. Default is "mean". Possible values: "mean", "median", "max", "min", "std", "var".
    agg_history : str
        Aggregation method for the historic years. Default is "mean". Possible values: "mean", "median", "max", "min", "std", "var".

    Returns
    -------
    np.ndarray
        Array of the historic mean for the given window_start and window_end dates for the historic years.

    Raises
    ------
    FileNotFoundError
        If the file for the historic year is not found. Possible solutions:
          - The historic year should be modelled before calling this function.
          - The path to the historic year should be changed.
          - Calculate for a more recent historic year by reducing historic_years value.
    """

    # Get the window_start year
    window_start_year = pd.to_datetime(current_window_start).year
    window_end_year = pd.to_datetime(current_window_end).year

    # Get the first year
    first_year = window_start_year - historic_years

    # Check if file exists for this year
    if os.path.exists(os.path.join(input_dir, f"model_{first_year}.nc")):
        # Get the list of historic windows
        historic_agg = {}
        for year in range(1, historic_years + 1):
            args = {
                "input_dir": input_dir,
                "window_start": f"{window_start_year-year}{current_window_start[4:]}",
                "window_end": f"{window_end_year-year}{current_window_end[4:]}",
                "layer_name": layer_name,
                "agg": agg_window,
            }
            # Get the range mean
            historic_agg[window_start_year - year] = get_range_agg(**args)
        historic_agg_np = np.array([historic_agg[year] for year in historic_agg])
        # Get the aggregation of the historic years
        if agg_history == "mean":
            historic_agg_np = np.mean(historic_agg_np, axis=0)
        elif agg_history == "median":
            historic_agg_np = np.median(historic_agg_np, axis=0)
        elif agg_history == "max":
            historic_agg_np = np.max(historic_agg_np, axis=0)
        elif agg_history == "min":
            historic_agg_np = np.min(historic_agg_np, axis=0)
        elif agg_history == "std":
            historic_agg_np = np.std(historic_agg_np, axis=0)
        elif agg_history == "var":
            historic_agg_np = np.var(historic_agg_np, axis=0)
        elif agg_history == "sum":
            historic_agg_np = np.sum(historic_agg_np, axis=0)
        else:
            raise ValueError(
                f"Invalid aggregation method: {agg_history}. Possible values: mean, median, max, min, std, var, sum."
            )
        return historic_agg_np
    else:
        raise FileNotFoundError(
            f"File not found for the historic data: {os.path.join(input_dir,f'model_{first_year}.nc')}. Make sure the path is correct and the historic year for the requested year is modelled before calling this function."
        )


def get_range_agg(
    input_dir: str,
    window_start: str,
    window_end: str,
    layer_name: str,
    agg: str = "mean",
) -> np.ndarray:
    """Get the mean for a given window_start and window_end dates.

    Parameters
    ----------
    input_dir : str
        Path to the directory containing the netcdf files.
    window_start : str
        Start date of the window. Format: YYYY-MM-DD.
    window_end : str
        End date of the window. Format: YYYY-MM-DD.
    layer_name : str
        Soil layer to consider for the mean.
    agg : str
        Aggregation method to use. Possible values: mean, median, max, min, std.

    Returns
    -------
    np.ndarray
        Mean raster for the given window_start and window_end dates.
    """

    # Get the list of dates between two dates if date_from and date_to
    dates = pd.DataFrame(
        pd.date_range(
            pd.to_datetime(window_start),
            pd.to_datetime(window_end) - timedelta(days=1),
            freq="d",
        ),
        columns=["date"],
    )  # .strftime('%Y-%m-%d')
    dates["dayofyear"] = dates["date"].dt.dayofyear - 1
    dates["year"] = dates["date"].dt.year
    dates["str_dates"] = dates["date"].dt.strftime("%Y-%m-%d")

    yearly_dates = dates.groupby("year")["dayofyear"].apply(list).to_dict()

    data_l = list()
    # For each year, get the data for layer_name for the dates specified in yearly_dates
    for year in yearly_dates:
        # read the year file
        nc_y = netCDF4.Dataset(os.path.join(input_dir, f"model_{year}.nc"))

        vprint(
            1,
            V,
            V_IGNORE,
            Debug=f"getting data for year: {year} from layer: {layer_name}...",
        )
        # Get the data for the layer_name
        data = nc_y.variables[layer_name][:, :, :]

        # Get the data for the dates
        days = yearly_dates[year]

        data = data[days, :, :]

        data_l.append(data)
        nc_y.close()
        del data
        gc.collect()

    # Concat data for all years
    data_concat = np.concatenate(data_l, axis=0)
    data_concat.shape
    if agg == "mean":
        # Get the mean raster for the range
        data_agg = np.mean(data_concat, axis=0)
    elif agg == "median":
        # Get the median raster for the range
        data_agg = np.median(data_concat, axis=0)
    elif agg == "max":
        # Get the max raster for the range
        data_agg = np.max(data_concat, axis=0)
    elif agg == "min":
        # Get the min raster for the range
        data_agg = np.min(data_concat, axis=0)
    elif agg == "std":
        # Get the std raster for the range
        data_agg = np.std(data_concat, axis=0)
    elif agg == "var":
        # Get the var raster for the range
        data_agg = np.var(data_concat, axis=0)
    elif agg == "sum":
        # Get the sum raster for the range
        data_agg = np.sum(data_concat, axis=0)
    else:
        raise ValueError(
            f"agg should be one of mean, median, max, min, std, var, sum. {agg} was provided."
        )
    print("done.")
    return data_agg


def save(path, array, profile):
    """Save the array as a raster.

    Parameters
    ----------
    path : str
        Path to the raster to save.
    array : np.ndarray
        Array to save as a raster.
    profile : dict
        Profile of the raster to save.
    """

    with rasterio.open(path, "w", **profile) as dst:
        dst.write(array, 1)


def analyse(
    input,
    window_start,
    window_end,
    historic_years: int,
    layer: str,
    match_raster: str = None,
    output: str = None,
    agg_history: str = "mean",
    agg_window: str = "mean",
    comparison: str = "diff",
) -> Dict[str, str]:
    """Main function to run the script.

    Parameters
    ----------
    input : str
        Path to the input raster.
    window_start : str
        Start date of the window. Format: YYYY-MM-DD.
    window_end : str
        End date of the window. Format: YYYY-MM-DD.
    historic_years : int
        Number of historic years to use for the comparison.
    layer : str
        Soil layer to consider for the comparison.
    match_raster : str
        Path to the match raster. Default: None. If None, the match raster will be searched in the et_pp directory based on the input directory.
    output : str
        Path to the output raster. Default: None. If None, the output raster will be saved in the same directory as the input raster.
    agg_history : str
        Aggregation method to use for the historic years. Possible values: mean, median, max, min, std. Default: mean.
    agg_window : str
        Aggregation method to use for the window. Possible values: mean, median, max, min, std. Default: mean.
    comparison : str
        Comparison method to use. Possible values: diff, ratio. Default: diff.

    Returns
    -------
    Dict[str,str]
        Dictionary with the path to the output rasters.
    """

    if output is None:
        output = os.path.join(input, "analysis")

    # Create the output directory if it does not exist
    if not os.path.exists(output):
        os.makedirs(output)

    if match_raster is None:
        match_raster = os.path.join(os.path.dirname(input), "et_pp")

    files = glob.glob(os.path.join(match_raster, f"{window_start[:7]}*.tif"))
    if len(files) == 0:
        files = glob.glob(os.path.join(match_raster, f"{window_end[:7]}*.tif"))
    if len(files) == 0:
        vprint(
            1,
            V,
            V_IGNORE,
            Debug=f"Expanding the search for match raster file to find e closer date to {window_start[:5]}...",
        )
        files = glob.glob(os.path.join(match_raster, f"{window_start[:5]}*.tif"))
    if len(files) == 0:
        vprint(
            1,
            V,
            V_IGNORE,
            Debug=f"Expanding the search further for match raster file to find e closer date  to {window_end[:5]}...",
        )
        files = glob.glob(os.path.join(match_raster, f"{window_end[:5]}*.tif"))
    if len(files) == 0:
        raise FileNotFoundError(
            f"Could not find any matching raster in {match_raster} for the range of dates given at {window_start} / {window_end}!"
        )
    print(f"Found {len(files)} matching raster file {files[0]}.")
    match_raster = files[0]

    with rasterio.open(match_raster) as src:
        profile = src.profile

    # Get the layers
    layer = layer
    # Get the historic aggregated data
    historic_data = get_historic_agg(
        input_dir=input,
        historic_years=historic_years,
        current_window_start=window_start,
        current_window_end=window_end,
        agg_window=agg_window,
        agg_history=agg_history,
        layer_name=layer,
    )
    # Get aggregated current window data
    current_data = get_range_agg(
        input_dir=input,
        window_start=window_start,
        window_end=window_end,
        agg=agg_window,
        layer_name=layer,
    )

    # Compare the two rasters
    if comparison == "diff":
        delta = current_data - historic_data
    else:
        raise NotImplementedError(
            f"comparison should be diff. {comparison} was provided."
        )

    # Save the rasters
    historic_raster = os.path.join(
        output,
        f"historic-{window_start.replace('-','_')}-{window_end.replace('-','_')}-{layer}-w_{agg_window}-h_{agg_history}-y_{historic_years}.tif",
    )
    current_raster = os.path.join(
        output,
        f"current-{window_start.replace('-','_')}-{window_end.replace('-','_')}-{layer}-w_{agg_window}.tif",
    )
    delta_raster = os.path.join(
        output,
        f"delta-{window_start.replace('-','_')}-{window_end.replace('-','_')}-{layer}-w_{agg_window}-h_{agg_history}-y_{historic_years}.tif",
    )

    save(historic_raster, historic_data, profile)
    save(current_raster, current_data, profile)
    save(delta_raster, delta, profile)

    # # Visualise the rasters
    # # Read the saved rasters
    # with rasterio.open(historic_raster) as src:
    #     historic_raster = src.read(1)
    # with rasterio.open(current_raster) as src:
    #     current_raster = src.read(1)
    # with rasterio.open(delta_raster) as src:
    #     delta_raster = src.read(1)

    # # Plot the rasters
    return {
        "historic_raster": historic_raster,
        "current_raster": current_raster,
        "delta_raster": delta_raster,
    }


if __name__ == "__main__":
    # Load Configs
    parser = argparse.ArgumentParser(
        description="Download rainfall data from Google Earth Engine for a range of dates.",
        formatter_class=argparse.ArgumentDefaultsHelpFormatter,
    )
    parser.add_argument(
        "-i",
        "--input",
        help="Absolute or relative path to the netcdf data ending with .nc. By dfault it is set to data.nc",
        default="data.nc",
    )
    parser.add_argument(
        "-l",
        "--layer",
        help="Soil layer to visualise. Default is all. Select between SM1 to SM5 or DD.",
        default="all",
    )
    parser.add_argument("-s", "--window_start", help="Window start date YYYY-MM-DD.")
    parser.add_argument("-e", "--window_end", help="Window end date YYYY-MM-DD.")
    parser.add_argument(
        "-y", "--historic_years", help="Number of years to go back in time.", default=2
    )
    parser.add_argument(
        "-a",
        "--agg_history",
        help="Aggregation method to use for the historic data. Possible values: mean, median, max, min, std, var, sum.",
        default="mean",
    )
    parser.add_argument(
        "-g",
        "--agg_window",
        help="Aggregation method to use for the window range data. Possible values: mean, median, max, min, std, var, sum.",
        default="mean",
    )
    parser.add_argument(
        "-c",
        "--comparison",
        help="Comparison method to use for the window range data. Possible values: show, diff.",
        default="show",
    )
    parser.add_argument(
        "-o",
        "--output",
        help="Output directory to save the output files. Default is the input directory.",
    )
    parser.add_argument(
        "-m",
        "--match_raster",
        help="Raster to match the output to. Default is the input raster.",
    )

    args = parser.parse_args()

    # args.input ="/home/sahand/Projects/PIPE-3788 GRDC SoilWaterNow Deployment/work/v3/Arawa 2019-2023/c8/1af25ced023e58c46f4403a155210d/soilwatermodel v3"
    # args.window_start = "2022-12-20"
    # args.window_end = "2023-01-10"
    # args.historic_years = 3
    # args.agg_window = "mean"
    # args.agg_history = "mean"
    # args.comparison = "diff"
    # args.layer = "SM2"
    # args.output = None

    try:
        analyse(
            input=args.input,
            window_start=args.window_start,
            window_end=args.window_end,
            historic_years=args.historic_years,
            agg_window=args.agg_window,
            agg_history=args.agg_history,
            comparison=args.comparison,
            layer=args.layer,
            output=args.output,
            match_raster=args.match_raster,
        )

    except Exception as e:
        vprint(
            0,
            V,
            V_IGNORE,
            Error="Failed to execute the main function:",
            ErrorMessage=e,
        )
        traceback.print_exc()
        raise e