File size: 5,300 Bytes
af358f2
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
import os
import xarray as xr
import pandas as pd


# Mapping of variable names to metadata (title, unit, and NetCDF variable key)
VARIABLE_MAPPING = {
    'surface_downwelling_shortwave_radiation': ('Surface Downwelling Shortwave Radiation', 'W/m²', 'rsds'),
    'moisture_in_upper_portion_of_soil_column': ('Moisture in Upper Portion of Soil Column', 'kg m-2', 'mrsos'),
    'precipitation': ('Precipitation', 'kg m-2 s-1', 'pr'),
    'near_surface_relative_humidity': ('Relative Humidity', '%', 'hurs'),
    'evaporation_including_sublimation_and_transpiration': ('Evaporation (including sublimation and transpiration)', 'kg m-2 s-1', 'evspsbl'),
    'total_runoff': ('Total Runoff', 'kg m-2 s-1', 'mrro'),
    'daily_minimum_near_surface_air_temperature': ('Daily Minimum Near Surface Air Temperature', '°C', 'tasmin'),
    'daily_maximum_near_surface_air_temperature': ('Daily Maximum Near Surface Air Temperature', '°C', 'tasmax'),
    'near_surface_wind_speed': ('Near Surface Wind Speed', 'm/s', 'sfcWind'),
    'near_surface_air_temperature': ('Near Surface Air Temperature', '°C', 'tas'),
}


def load_data(variable: str, ds: xr.Dataset, lat: float, lon: float) -> xr.DataArray:
    """
    Load data for a given variable from the dataset at the nearest latitude and longitude.

    Args:
        variable (str): The variable to extract from the dataset.
        ds (xr.Dataset): The xarray dataset containing climate data.
        lat (float): Latitude for nearest data point.
        lon (float): Longitude for nearest data point.

    Returns:
        xr.DataArray: The data array containing the variable values for the specified location.
    """
    try:
        data = ds[variable].sel(lat=lat, lon=lon, method="nearest")
        
        # Convert temperature from Kelvin to Celsius for specific variables
        if variable in ["tas", "tasmin", "tasmax"]:
            data = data - 273.15
        
        return data
    except Exception as e:
        print(f"Error loading {variable}: {e}")
        return None


def get_forecast_datasets(climate_sub_files: list) -> dict:
    """
    Get the forecast datasets by loading NetCDF files for each variable.

    Args:
        climate_sub_files (list): List of file paths to the NetCDF files.

    Returns:
        dict: Dictionary with variable names as keys and xarray datasets as values.
    """
    datasets = {}

    # Iterate over each file and check if the variable exists in the filename
    for file_path in climate_sub_files:
        filename = os.path.basename(file_path)
        
        for long_name, (title, unit, var_key) in VARIABLE_MAPPING.items():
            if var_key in filename:  # Check for presence of variable in filename
                if var_key in ["tas", "tasmax", "tasmin"]:
                    if f"_{var_key}_" in f"_{filename}_" or filename.endswith(f"_{var_key}.nc"):
                        datasets[long_name] = xr.open_dataset(file_path, engine="netcdf4")
                else:
                    datasets[long_name] = xr.open_dataset(file_path, engine="netcdf4")

    return datasets


def get_forecast_data(datasets: dict, lat: float, lon: float) -> pd.DataFrame:
    """
    Extract climate data from the forecast datasets for a given location and convert to a DataFrame.

    Args:
        datasets (dict): Dictionary of datasets, one for each variable.
        lat (float): Latitude of the location to extract data for.
        lon (float): Longitude of the location to extract data for.

    Returns:
        pd.DataFrame: A DataFrame containing time series data for each variable.
    """
    time_series_data = {'time': []}

    # Iterate over the variable mapping to load and process data for each variable
    for long_name, (title, unit, variable) in VARIABLE_MAPPING.items():
        print(f"Processing {long_name} ({title}, {unit}, {variable})...")
        
        # Load the data for the current variable
        data = load_data(variable, datasets[long_name], lat, lon)
        
        if data is not None:
            print(f"Time values: {data.time.values[:5]}")  # Preview first few time values
            print(f"Data values: {data.values[:5]}")  # Preview first few data values

            # Add the time values to the 'time' list
            time_series_data['time'] = data.time.values
            
            # Format the column name with unit (e.g., "Precipitation (kg m-2 s-1)")
            column_name = f"{title} ({unit})"
            time_series_data[column_name] = data.values

    # Convert the time series data into a pandas DataFrame
    return pd.DataFrame(time_series_data)


# Define the directory to parse
folder_to_parse = "climate_data_pessimist/"

# Retrieve the subfolders and files to parse
climate_sub_folder = [os.path.join(folder_to_parse, e) for e in os.listdir(folder_to_parse) if os.path.isdir(os.path.join(folder_to_parse, e))]
climate_sub_files = [os.path.join(e, i) for e in climate_sub_folder for i in os.listdir(e) if i.endswith('.nc')]

# Load the forecast datasets
datasets = get_forecast_datasets(climate_sub_files)

# Get the forecast data for a specific latitude and longitude
lat, lon = 47.0, 5.0  # Example coordinates
final_df = get_forecast_data(datasets, lat, lon)

# Display the resulting DataFrame
print(final_df.head())