simen commited on
Commit
b4c50fc
·
1 Parent(s): fbbb329

add subsampling

Browse files
Files changed (1) hide show
  1. preprocess_forecast.py +32 -2
preprocess_forecast.py CHANGED
@@ -3,6 +3,7 @@ from siphon.catalog import TDSCatalog
3
  import numpy as np
4
  import datetime
5
  import re
 
6
 
7
 
8
  # %%
@@ -162,12 +163,41 @@ def load_meps_for_location(file_path=None, altitude_min=0, altitude_max=3000):
162
  return subset
163
 
164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
165
  if __name__ == "__main__":
166
  dataset_file_path = find_latest_meps_file()
167
 
168
  subset = load_meps_for_location(dataset_file_path)
169
 
 
 
170
  os.makedirs("forecasts", exist_ok=True)
171
 
172
- timestmap = extract_timestamp(dataset_file_path.split("/")[-1])
173
- subset.to_netcdf(f"forecasts/{timestmap}.nc")
 
 
3
  import numpy as np
4
  import datetime
5
  import re
6
+ import os
7
 
8
 
9
  # %%
 
163
  return subset
164
 
165
 
166
+ def subsample_lat_lon(dataset, lat_stride=2, lon_stride=2):
167
+ """
168
+ Subsample the latitude and longitude points from the dataset.
169
+
170
+ Parameters:
171
+ - dataset: xarray.Dataset, the dataset to subsample.
172
+ - lat_stride: int, stride value for latitude subsampling.
173
+ - lon_stride: int, stride value for longitude subsampling.
174
+
175
+ Returns:
176
+ - xarray.Dataset, the subsampled dataset.
177
+ """
178
+ # Check if latitude and longitude dimensions are present
179
+ if "y" not in dataset.dims or "x" not in dataset.dims:
180
+ raise ValueError(
181
+ "Dataset does not contain 'y' and 'x' dimensions for latitude and longitude."
182
+ )
183
+
184
+ # Subsample latitude and longitude
185
+ subsampled_dataset = dataset.isel(
186
+ y=slice(None, None, lat_stride), x=slice(None, None, lon_stride)
187
+ )
188
+
189
+ return subsampled_dataset
190
+
191
+
192
  if __name__ == "__main__":
193
  dataset_file_path = find_latest_meps_file()
194
 
195
  subset = load_meps_for_location(dataset_file_path)
196
 
197
+ subsampled_subset = subsample_lat_lon(subset, lat_stride=2, lon_stride=2)
198
+
199
  os.makedirs("forecasts", exist_ok=True)
200
 
201
+ timestamp = extract_timestamp(dataset_file_path.split("/")[-1])
202
+ subsampled_subset.to_netcdf(f"forecasts/{timestamp}.nc")
203
+ print(f"Subsampled dataset saved to forecasts/{timestamp}.nc")