Climate-ML-Foundation-Models / fengwu_utils.py
qq1990's picture
init
100edb4
raw
history blame
12.8 kB
import streamlit as st
import torch
# from Pangu-Weather import *
import numpy as np
from datetime import datetime
import numpy as np
import onnx
import onnxruntime as ort
import matplotlib.pyplot as plt
import cartopy.crs as ccrs
import io
def fengwu_config_data():
st.subheader("FengWu Model Data Input")
# Detailed data description section
st.markdown("""
**Input Data Requirements (FengWu):**
FengWu takes **two consecutive six-hour atmospheric states** as input:
1. **First Input (input1.npy)**: Atmospheric data at the initial time.
2. **Second Input (input2.npy)**: Atmospheric data 6 hours later.
**Shape & Variables:**
Each input is a NumPy array with shape `(69, 721, 1440)`:
- **Dimension 0 (69 features):**
The first 4 features are surface variables:
1. U10 (10-meter Eastward Wind)
2. V10 (10-meter Northward Wind)
3. T2M (2-meter Temperature)
4. MSL (Mean Sea Level Pressure)
These are followed by non-surface variables, each with 13 pressure levels:
- Z (Geopotential)
- Q (Specific Humidity)
- U (Eastward Wind)
- V (Northward Wind)
- T (Temperature)
The 13 vertical levels are: [50, 100, 150, 200, 250, 300, 400, 500, 600, 700, 850, 925, 1000] hPa
The total count is:
- Surface vars: 4
- For each non-surface var (Z, Q, U, V, T): 13 levels = 65 vars
4 (surface) + 65 (5 vars * 13 levels) = 69 total features.
**Spatial & Coordinate Details:**
- Latitude dimension (721 points) ranges from 90°N to -90°S with ~0.25° spacing.
- Longitude dimension (1440 points) ranges from 0° to 360°E with ~0.25° spacing.
- Ensure data is single precision floats (`.astype(np.float32)`).
**Data Frequency & Forecasting Scheme:**
- `input1.npy` corresponds to a given time (e.g., 06:00 UTC Jan 1, 2018).
- `input2.npy` corresponds to 6 hours later (e.g., 12:00 UTC Jan 1, 2018).
- The model predicts future states at subsequent 6-hour intervals.
**Converting Your Data:**
- ERA5 `.nc` files or ECMWF `.grib` files can be converted to `.npy` using appropriate Python packages (`netCDF4` or `pygrib`).
- Ensure you follow the exact variable and level ordering as described.
""")
# File uploaders for FengWu input data (two consecutive time steps)
st.markdown("### Upload Your FengWu Input Data Files")
input1_file = st.file_uploader(
"Upload input1.npy (Initial Time)",
type=["npy"],
key="fengwu_input1"
)
input2_file = st.file_uploader(
"Upload input2.npy (6 Hours Later)",
type=["npy"],
key="fengwu_input2"
)
st.markdown("---")
st.markdown("### References & Resources")
st.markdown("""
- **Research Paper:** [FengWu: Pushing the Skillful Global Medium-range Weather Forecast beyond 10 Days Lead](https://arxiv.org/abs/2304.02948)
- **GitHub Source Code:** [Fengwu on GitHub](https://github.com/OpenEarthLab/FengWu?tab=readme-ov-file)
""")
return input1_file, input2_file
@st.cache_resource
def inference_6hrs_fengwu(input1, input2):
model_6 = onnx.load('FengWu/fengwu_v2.onnx')
# Set the behavier of onnxruntime
options = ort.SessionOptions()
options.enable_cpu_mem_arena=False
options.enable_mem_pattern = False
options.enable_mem_reuse = False
# Increase the number for faster inference and more memory consumption
options.intra_op_num_threads = 1
# Set the behavier of cuda provider
cuda_provider_options = {'arena_extend_strategy':'kSameAsRequested',}
# Initialize onnxruntime session for Pangu-Weather Models
ort_session_6 = ort.InferenceSession('FengWu/fengwu_v2.onnx', sess_options=options, providers=[('CUDAExecutionProvider', cuda_provider_options)])
data_mean = np.load("FengWu/data_mean.npy")[:, np.newaxis, np.newaxis]
data_std = np.load("FengWu/data_std.npy")[:, np.newaxis, np.newaxis]
input1_after_norm = (input1 - data_mean) / data_std
input2_after_norm = (input2 - data_mean) / data_std
input = np.concatenate((input1_after_norm, input2_after_norm), axis=0)[np.newaxis, :, :, :]
input = input.astype(np.float32)
output = ort_session_6.run(None, {'input':input})[0]
output = (output[0, :69] * data_std) + data_mean
return output
@st.cache_resource
def inference_12hrs_fengwu(input1, input2):
model_6 = onnx.load('FengWu/fengwu_v2.onnx')
# Set the behavier of onnxruntime
options = ort.SessionOptions()
options.enable_cpu_mem_arena=False
options.enable_mem_pattern = False
options.enable_mem_reuse = False
# Increase the number for faster inference and more memory consumption
options.intra_op_num_threads = 1
# Set the behavier of cuda provider
cuda_provider_options = {'arena_extend_strategy':'kSameAsRequested',}
# Initialize onnxruntime session for Pangu-Weather Models
ort_session_6 = ort.InferenceSession('FengWu/fengwu_v2.onnx', sess_options=options, providers=[('CUDAExecutionProvider', cuda_provider_options)])
data_mean = np.load("FengWu/data_mean.npy")[:, np.newaxis, np.newaxis]
data_std = np.load("FengWu/data_std.npy")[:, np.newaxis, np.newaxis]
input1_after_norm = (input1 - data_mean) / data_std
input2_after_norm = (input2 - data_mean) / data_std
input = np.concatenate((input1_after_norm, input2_after_norm), axis=0)[np.newaxis, :, :, :]
input = input.astype(np.float32)
for i in range(2):
output = ort_session_6.run(None, {'input':input})[0]
input = np.concatenate((input[:, 69:], output[:, :69]), axis=1)
output = (output[0, :69] * data_std) + data_mean
# print(output.shape)
return output
@st.cache_resource
def inference_custom_hrs_fengwu(input1, input2, forecast_hours):
model_6 = onnx.load('FengWu/fengwu_v2.onnx')
# Set the behavier of onnxruntime
options = ort.SessionOptions()
options.enable_cpu_mem_arena=False
options.enable_mem_pattern = False
options.enable_mem_reuse = False
# Increase the number for faster inference and more memory consumption
options.intra_op_num_threads = 1
# Set the behavier of cuda provider
cuda_provider_options = {'arena_extend_strategy':'kSameAsRequested',}
# Initialize onnxruntime session for Pangu-Weather Models
ort_session_6 = ort.InferenceSession('FengWu/fengwu_v2.onnx', sess_options=options, providers=[('CUDAExecutionProvider', cuda_provider_options)])
data_mean = np.load("FengWu/data_mean.npy")[:, np.newaxis, np.newaxis]
data_std = np.load("FengWu/data_std.npy")[:, np.newaxis, np.newaxis]
input1_after_norm = (input1 - data_mean) / data_std
input2_after_norm = (input2 - data_mean) / data_std
input = np.concatenate((input1_after_norm, input2_after_norm), axis=0)[np.newaxis, :, :, :]
input = input.astype(np.float32)
for i in range(forecast_hours/6):
output = ort_session_6.run(None, {'input':input})[0]
input = np.concatenate((input[:, 69:], output[:, :69]), axis=1)
output = (output[0, :69] * data_std) + data_mean
# print(output.shape)
return output
def plot_fengwu_output(initial_data, predicted_data):
"""
Plot initial and predicted Fengwu model outputs.
Parameters:
- initial_data: np.ndarray of shape (69, 721, 1440) representing the initial or input state.
- predicted_data: np.ndarray of shape (69, 721, 1440) representing the predicted state by Fengwu.
"""
# Coordinate setup
lat = np.linspace(90, -90, 721) # Latitude from 90N to 90S
lon = np.linspace(0, 360, 1440) # Longitude from 0E to 360E
# Surface and upper-level variable definitions
surface_vars = ["U10", "V10", "T2M", "MSL"]
upper_vars = ["Z (Geopotential)", "Q (Specific Humidity)", "U (Eastward Wind)", "V (Northward Wind)", "T (Temperature)"]
upper_levels = [50,100,150,200,250,300,400,500,600,700,850,925,1000]
# Mapping of upper variable groups to their starting indices
# Each group has 13 levels, so indices shift by 13 for each subsequent group.
var_group_start = {
"Z (Geopotential)": 4, # Z starts at index 4
"Q (Specific Humidity)": 17, # Q = 4+13=17
"U (Eastward Wind)": 30, # U = 17+13=30
"V (Northward Wind)": 43,# V = 30+13=43
"T (Temperature)": 56 # T = 43+13=56
}
# --- Initial Data Visualization ---
st.subheader("Initial Data Visualization (Fengwu)")
init_col1, init_col2 = st.columns([1,1])
with init_col1:
init_data_choice = st.selectbox("Data Source", ["Upper-Air Data", "Surface Data"], key="fengwu_init_data_choice")
with init_col2:
if init_data_choice == "Upper-Air Data":
init_var = st.selectbox("Variable", upper_vars, key="fengwu_init_upper_var")
else:
init_var = st.selectbox("Variable", surface_vars, key="fengwu_init_surface_var")
# Select the data slice for initial data
if init_data_choice == "Upper-Air Data":
selected_level_hpa_init = st.select_slider(
"Select Pressure Level (hPa)",
options=upper_levels,
value=850, # Default to 850hPa
help="Select the pressure level in hPa.",
key="fengwu_init_level_hpa_slider"
)
level_index_init = upper_levels.index(selected_level_hpa_init)
start_index_init = var_group_start[init_var]
data_index_init = start_index_init + level_index_init
data_to_plot_init = initial_data[data_index_init, :, :]
title_init = f"Initial Upper-Air: {init_var} at {selected_level_hpa_init}hPa"
else:
# Surface variable
var_index_init = surface_vars.index(init_var)
data_to_plot_init = initial_data[var_index_init, :, :]
title_init = f"Initial Surface: {init_var}"
# Plot initial data
fig_init, ax_init = plt.subplots(figsize=(10, 5), subplot_kw={'projection': ccrs.PlateCarree()})
ax_init.set_title(title_init)
im_init = ax_init.imshow(data_to_plot_init, extent=[lon.min(), lon.max(), lat.min(), lat.max()],
origin='lower', cmap='coolwarm', transform=ccrs.PlateCarree())
ax_init.coastlines()
plt.colorbar(im_init, ax=ax_init, orientation='horizontal', pad=0.05)
st.pyplot(fig_init)
# --- Predicted Data Visualization ---
st.subheader("Predicted Data Visualization (Fengwu)")
pred_col1, pred_col2 = st.columns([1,1])
with pred_col1:
pred_data_choice = st.selectbox("Data Source", ["Upper-Air Data", "Surface Data"], key="fengwu_pred_data_choice")
with pred_col2:
if pred_data_choice == "Upper-Air Data":
pred_var = st.selectbox("Variable", upper_vars, key="fengwu_pred_upper_var")
else:
pred_var = st.selectbox("Variable", surface_vars, key="fengwu_pred_surface_var")
# Select the data slice for predicted data
if pred_data_choice == "Upper-Air Data":
selected_level_hpa_pred = st.select_slider(
"Select Pressure Level (hPa)",
options=upper_levels,
value=850, # Default to 850hPa
help="Select the pressure level in hPa.",
key="fengwu_pred_level_hpa_slider"
)
level_index_pred = upper_levels.index(selected_level_hpa_pred)
start_index_pred = var_group_start[pred_var]
data_index_pred = start_index_pred + level_index_pred
data_to_plot_pred = predicted_data[data_index_pred, :, :]
title_pred = f"Predicted Upper-Air: {pred_var} at {selected_level_hpa_pred}hPa"
else:
# Surface variable for predicted data
var_index_pred = surface_vars.index(pred_var)
data_to_plot_pred = predicted_data[var_index_pred, :, :]
title_pred = f"Predicted Surface: {pred_var}"
# Plot predicted data
fig_pred, ax_pred = plt.subplots(figsize=(10, 5), subplot_kw={'projection': ccrs.PlateCarree()})
ax_pred.set_title(title_pred)
im_pred = ax_pred.imshow(data_to_plot_pred, extent=[lon.min(), lon.max(), lat.min(), lat.max()],
origin='lower', cmap='coolwarm', transform=ccrs.PlateCarree())
ax_pred.coastlines()
plt.colorbar(im_pred, ax=ax_pred, orientation='horizontal', pad=0.05)
st.pyplot(fig_pred)
# --- Download Buttons ---
st.subheader("Download Predicted Fengwu Data")
# Convert predicted_data to binary format for download
buffer_pred = io.BytesIO()
np.save(buffer_pred, predicted_data)
buffer_pred.seek(0)
st.download_button(label="Download Predicted Fengwu Data",
data=buffer_pred,
file_name="predicted_fengwu.npy",
mime="application/octet-stream")