File size: 7,469 Bytes
a0e4229 b4c50fc a0e4229 b4c50fc a0e4229 b4c50fc a0e4229 b4c50fc |
1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 21 22 23 24 25 26 27 28 29 30 31 32 33 34 35 36 37 38 39 40 41 42 43 44 45 46 47 48 49 50 51 52 53 54 55 56 57 58 59 60 61 62 63 64 65 66 67 68 69 70 71 72 73 74 75 76 77 78 79 80 81 82 83 84 85 86 87 88 89 90 91 92 93 94 95 96 97 98 99 100 101 102 103 104 105 106 107 108 109 110 111 112 113 114 115 116 117 118 119 120 121 122 123 124 125 126 127 128 129 130 131 132 133 134 135 136 137 138 139 140 141 142 143 144 145 146 147 148 149 150 151 152 153 154 155 156 157 158 159 160 161 162 163 164 165 166 167 168 169 170 171 172 173 174 175 176 177 178 179 180 181 182 183 184 185 186 187 188 189 190 191 192 193 194 195 196 197 198 199 200 201 202 203 204 |
import xarray as xr
from siphon.catalog import TDSCatalog
import numpy as np
import datetime
import re
import os
# %%
def compute_thermal_temp_difference(subset):
lapse_rate = 0.0098
ground_temp = subset.air_temperature_0m - 273.3
air_temp = subset["air_temperature_ml"] - 273.3 # .ffill(dim='altitude')
# dimensions
# 'air_temperature_ml' altitude: 4 y: 3, x: 3
# 'elevation' y: 3 x: 3
# 'altitude' altitude: 4
# broadcast ground temperature to all altitudes, but let it decrease by lapse rate
altitude_diff = subset.altitude - subset.elevation
altitude_diff = altitude_diff.where(altitude_diff >= 0, 0)
temp_decrease = lapse_rate * altitude_diff
ground_parcel_temp = ground_temp - temp_decrease
thermal_temp_diff = (ground_parcel_temp - air_temp).clip(min=0)
return thermal_temp_diff
def extract_timestamp(filename):
# Define a regex pattern to capture the timestamp
pattern = r"(\d{4})(\d{2})(\d{2})T(\d{2})Z"
match = re.search(pattern, filename)
if match:
year, month, day, hour = match.groups()
return f"{year}-{month}-{day}T{hour}:00Z"
else:
return None
def find_latest_meps_file():
# The MEPS dataset: https://github.com/metno/NWPdocs/wiki/MEPS-dataset
today = datetime.datetime.today()
catalog_url = f"https://thredds.met.no/thredds/catalog/meps25epsarchive/{today.year}/{today.month:02d}/{today.day:02d}/catalog.xml"
file_url_base = f"https://thredds.met.no/thredds/dodsC/meps25epsarchive/{today.year}/{today.month:02d}/{today.day:02d}"
# Get the datasets from the catalog
catalog = TDSCatalog(catalog_url)
datasets = [s for s in catalog.datasets if "meps_det_ml" in s]
file_path = f"{file_url_base}/{sorted(datasets)[-1]}"
return file_path
def load_meps_for_location(file_path=None, altitude_min=0, altitude_max=3000):
"""
file_path=None
altitude_min=0
altitude_max=3000
"""
if file_path is None:
file_path = find_latest_meps_file()
x_range = "[220:1:300]"
y_range = "[420:1:500]"
time_range = "[0:1:66]"
hybrid_range = "[25:1:64]"
height_range = "[0:1:0]"
params = {
"x": x_range,
"y": y_range,
"time": time_range,
"hybrid": hybrid_range,
"height": height_range,
"longitude": f"{y_range}{x_range}",
"latitude": f"{y_range}{x_range}",
"air_temperature_ml": f"{time_range}{hybrid_range}{y_range}{x_range}",
"ap": f"{hybrid_range}",
"b": f"{hybrid_range}",
"surface_air_pressure": f"{time_range}{height_range}{y_range}{x_range}",
"x_wind_ml": f"{time_range}{hybrid_range}{y_range}{x_range}",
"y_wind_ml": f"{time_range}{hybrid_range}{y_range}{x_range}",
}
path = f"{file_path}?{','.join(f'{k}{v}' for k, v in params.items())}"
subset = xr.open_dataset(path, cache=True)
subset.load()
# get geopotential
time_range_sfc = "[0:1:0]"
surf_params = {
"x": x_range,
"y": y_range,
"time": f"{time_range}",
"surface_geopotential": f"{time_range_sfc}[0:1:0]{y_range}{x_range}",
"air_temperature_0m": f"{time_range}[0:1:0]{y_range}{x_range}",
}
file_path_surf = f"{file_path.replace('meps_det_ml', 'meps_det_sfc')}?{','.join(f'{k}{v}' for k, v in surf_params.items())}"
# Load surface parameters and merge into the main dataset
surf = xr.open_dataset(file_path_surf, cache=True)
# Convert the surface geopotential to elevation
elevation = (surf.surface_geopotential / 9.80665).squeeze()
# elevation.plot()
subset["elevation"] = elevation
air_temperature_0m = surf.air_temperature_0m.squeeze()
subset["air_temperature_0m"] = air_temperature_0m
# subset.elevation.plot()
def hybrid_to_height(ds):
"""
ds = subset
"""
# Constants
R = 287.05 # Gas constant for dry air
g = 9.80665 # Gravitational acceleration
# Calculate the pressure at each level
p = ds["ap"] + ds["b"] * ds["surface_air_pressure"] # .mean("ensemble_member")
# Get the temperature at each level
T = ds["air_temperature_ml"] # .mean("ensemble_member")
# Calculate the height difference between each level and the surface
dp = ds["surface_air_pressure"] - p # Pressure difference
dT = T - T.isel(hybrid=-1) # Temperature difference relative to the surface
dT_mean = 0.5 * (T + T.isel(hybrid=-1)) # Mean temperature
# Calculate the height using the hypsometric equation
dz = (R * dT_mean / g) * np.log(ds["surface_air_pressure"] / p)
return dz
altitude = hybrid_to_height(subset).mean("time").squeeze().mean("x").mean("y")
subset = subset.assign_coords(altitude=("hybrid", altitude.data))
subset = subset.swap_dims({"hybrid": "altitude"})
# filter subset on altitude ranges
subset = subset.where(
(subset.altitude >= altitude_min) & (subset.altitude <= altitude_max), drop=True
).squeeze()
wind_speed = np.sqrt(subset["x_wind_ml"] ** 2 + subset["y_wind_ml"] ** 2)
subset = subset.assign(wind_speed=(("time", "altitude", "y", "x"), wind_speed.data))
subset["thermal_temp_diff"] = compute_thermal_temp_difference(subset)
# subset = subset.assign(thermal_temp_diff=(('time', 'altitude','y','x'), thermal_temp_diff.data))
# Find the indices where the thermal temperature difference is zero or negative
# Create tiny value at ground level to avoid finding the ground as the thermal top
thermal_temp_diff = subset["thermal_temp_diff"]
thermal_temp_diff = thermal_temp_diff.where(
(thermal_temp_diff.sum("altitude") > 0)
| (subset["altitude"] != subset.altitude.min()),
thermal_temp_diff + 1e-6,
)
indices = (thermal_temp_diff > 0).argmax(dim="altitude")
# Get the altitudes corresponding to these indices
thermal_top = subset.altitude[indices]
subset = subset.assign(thermal_top=(("time", "y", "x"), thermal_top.data))
subset = subset.set_coords(["latitude", "longitude"])
return subset
def subsample_lat_lon(dataset, lat_stride=2, lon_stride=2):
"""
Subsample the latitude and longitude points from the dataset.
Parameters:
- dataset: xarray.Dataset, the dataset to subsample.
- lat_stride: int, stride value for latitude subsampling.
- lon_stride: int, stride value for longitude subsampling.
Returns:
- xarray.Dataset, the subsampled dataset.
"""
# Check if latitude and longitude dimensions are present
if "y" not in dataset.dims or "x" not in dataset.dims:
raise ValueError(
"Dataset does not contain 'y' and 'x' dimensions for latitude and longitude."
)
# Subsample latitude and longitude
subsampled_dataset = dataset.isel(
y=slice(None, None, lat_stride), x=slice(None, None, lon_stride)
)
return subsampled_dataset
if __name__ == "__main__":
dataset_file_path = find_latest_meps_file()
subset = load_meps_for_location(dataset_file_path)
subsampled_subset = subsample_lat_lon(subset, lat_stride=2, lon_stride=2)
os.makedirs("forecasts", exist_ok=True)
timestamp = extract_timestamp(dataset_file_path.split("/")[-1])
subsampled_subset.to_netcdf(f"forecasts/{timestamp}.nc")
print(f"Subsampled dataset saved to forecasts/{timestamp}.nc")
|