Spaces:
Running
Running
import os | |
import argparse | |
import logging | |
import pickle | |
import threading | |
import time | |
import warnings | |
from datetime import datetime, timedelta | |
from collections import defaultdict | |
import csv | |
import json | |
# Suppress warnings for cleaner output | |
warnings.filterwarnings('ignore', category=FutureWarning) | |
warnings.filterwarnings('ignore', category=UserWarning, module='umap') | |
warnings.filterwarnings('ignore', category=UserWarning, module='sklearn') | |
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import matplotlib.animation as animation | |
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas | |
import cartopy.crs as ccrs | |
import cartopy.feature as cfeature | |
import plotly.graph_objects as go | |
import plotly.express as px | |
from plotly.subplots import make_subplots | |
from sklearn.manifold import TSNE | |
from sklearn.cluster import DBSCAN, KMeans | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.decomposition import PCA | |
from sklearn.ensemble import RandomForestRegressor | |
from sklearn.model_selection import train_test_split | |
from sklearn.metrics import mean_absolute_error, r2_score | |
from scipy.interpolate import interp1d, RBFInterpolator, griddata | |
from scipy.ndimage import gaussian_filter | |
import statsmodels.api as sm | |
import requests | |
import tempfile | |
import shutil | |
import xarray as xr | |
import urllib.request | |
from urllib.error import URLError | |
import ssl | |
# NEW: Advanced ML imports | |
try: | |
import umap.umap_ as umap | |
UMAP_AVAILABLE = True | |
except ImportError: | |
UMAP_AVAILABLE = False | |
print("UMAP not available - clustering features limited") | |
# Optional CNN imports with robust error handling | |
CNN_AVAILABLE = False | |
try: | |
# Set environment variables before importing TensorFlow | |
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Suppress TensorFlow warnings | |
import tensorflow as tf | |
from tensorflow.keras import layers, models | |
# Test if TensorFlow actually works | |
tf.config.set_visible_devices([], 'GPU') # Disable GPU to avoid conflicts | |
CNN_AVAILABLE = True | |
print("TensorFlow successfully loaded - CNN features enabled") | |
except Exception as e: | |
CNN_AVAILABLE = False | |
print(f"TensorFlow not available - CNN features disabled: {str(e)[:100]}...") | |
try: | |
import cdsapi | |
CDSAPI_AVAILABLE = True | |
except ImportError: | |
CDSAPI_AVAILABLE = False | |
import tropycal.tracks as tracks | |
# Suppress SSL warnings for oceanic data downloads | |
ssl._create_default_https_context = ssl._create_unverified_context | |
# ----------------------------- | |
# Configuration and Setup | |
# ----------------------------- | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(levelname)s - %(message)s' | |
) | |
# Remove argument parser to simplify startup | |
DATA_PATH = '/tmp/typhoon_data' if 'SPACE_ID' in os.environ else tempfile.gettempdir() | |
# Ensure directory exists and is writable | |
try: | |
os.makedirs(DATA_PATH, exist_ok=True) | |
# Test write permissions | |
test_file = os.path.join(DATA_PATH, 'test_write.txt') | |
with open(test_file, 'w') as f: | |
f.write('test') | |
os.remove(test_file) | |
logging.info(f"Data directory is writable: {DATA_PATH}") | |
except Exception as e: | |
logging.warning(f"Data directory not writable, using temp dir: {e}") | |
DATA_PATH = tempfile.mkdtemp() | |
logging.info(f"Using temporary directory: {DATA_PATH}") | |
# Update file paths | |
ONI_DATA_PATH = os.path.join(DATA_PATH, 'oni_data.csv') | |
TYPHOON_DATA_PATH = os.path.join(DATA_PATH, 'processed_typhoon_data.csv') | |
MERGED_DATA_CSV = os.path.join(DATA_PATH, 'merged_typhoon_era5_data.csv') | |
# IBTrACS settings | |
BASIN_FILES = { | |
'EP': 'ibtracs.EP.list.v04r01.csv', | |
'NA': 'ibtracs.NA.list.v04r01.csv', | |
'WP': 'ibtracs.WP.list.v04r01.csv' | |
} | |
IBTRACS_BASE_URL = 'https://www.ncei.noaa.gov/data/international-best-track-archive-for-climate-stewardship-ibtracs/v04r01/access/csv/' | |
LOCAL_IBTRACS_PATH = os.path.join(DATA_PATH, 'ibtracs.WP.list.v04r01.csv') | |
CACHE_FILE = os.path.join(DATA_PATH, 'ibtracs_cache.pkl') | |
CACHE_EXPIRY_DAYS = 1 | |
# ----------------------------- | |
# ENHANCED: Color Maps and Standards with TD Support - FIXED TAIWAN CLASSIFICATION | |
# ----------------------------- | |
# Enhanced color mapping with TD support (for Plotly) | |
enhanced_color_map = { | |
'Unknown': 'rgb(200, 200, 200)', | |
'Tropical Depression': 'rgb(128, 128, 128)', # Gray for TD | |
'Tropical Storm': 'rgb(0, 0, 255)', | |
'C1 Typhoon': 'rgb(0, 255, 255)', | |
'C2 Typhoon': 'rgb(0, 255, 0)', | |
'C3 Strong Typhoon': 'rgb(255, 255, 0)', | |
'C4 Very Strong Typhoon': 'rgb(255, 165, 0)', | |
'C5 Super Typhoon': 'rgb(255, 0, 0)' | |
} | |
# Matplotlib-compatible color mapping (hex colors) | |
matplotlib_color_map = { | |
'Unknown': '#C8C8C8', | |
'Tropical Depression': '#808080', # Gray for TD | |
'Tropical Storm': '#0000FF', # Blue | |
'C1 Typhoon': '#00FFFF', # Cyan | |
'C2 Typhoon': '#00FF00', # Green | |
'C3 Strong Typhoon': '#FFFF00', # Yellow | |
'C4 Very Strong Typhoon': '#FFA500', # Orange | |
'C5 Super Typhoon': '#FF0000' # Red | |
} | |
# FIXED: Taiwan color mapping with correct CMA 2006 standards | |
taiwan_color_map_fixed = { | |
'Tropical Depression': '#808080', # Gray | |
'Tropical Storm': '#0000FF', # Blue | |
'Severe Tropical Storm': '#00FFFF', # Cyan | |
'Typhoon': '#FFFF00', # Yellow | |
'Severe Typhoon': '#FFA500', # Orange | |
'Super Typhoon': '#FF0000' # Red | |
} | |
def rgb_string_to_hex(rgb_string): | |
"""Convert 'rgb(r,g,b)' string to hex color for matplotlib""" | |
try: | |
# Extract numbers from 'rgb(r,g,b)' format | |
import re | |
numbers = re.findall(r'\d+', rgb_string) | |
if len(numbers) == 3: | |
r, g, b = map(int, numbers) | |
return f'#{r:02x}{g:02x}{b:02x}' | |
else: | |
return '#808080' # Default gray | |
except: | |
return '#808080' # Default gray | |
def get_matplotlib_color(category): | |
"""Get matplotlib-compatible color for a storm category""" | |
return matplotlib_color_map.get(category, '#808080') | |
def get_taiwan_color_fixed(category): | |
"""Get corrected Taiwan standard color""" | |
return taiwan_color_map_fixed.get(category, '#808080') | |
# Cluster colors for route visualization | |
CLUSTER_COLORS = [ | |
'#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7', | |
'#DDA0DD', '#98D8C8', '#F7DC6F', '#BB8FCE', '#85C1E9', | |
'#F8C471', '#82E0AA', '#F1948A', '#85C1E9', '#D2B4DE' | |
] | |
# Route prediction colors | |
ROUTE_COLORS = [ | |
'#FF0066', '#00FF66', '#6600FF', '#FF6600', '#0066FF', | |
'#FF00CC', '#00FFCC', '#CC00FF', '#CCFF00', '#00CCFF' | |
] | |
# Original color map for backward compatibility | |
color_map = { | |
'C5 Super Typhoon': 'rgb(255, 0, 0)', | |
'C4 Very Strong Typhoon': 'rgb(255, 165, 0)', | |
'C3 Strong Typhoon': 'rgb(255, 255, 0)', | |
'C2 Typhoon': 'rgb(0, 255, 0)', | |
'C1 Typhoon': 'rgb(0, 255, 255)', | |
'Tropical Storm': 'rgb(0, 0, 255)', | |
'Tropical Depression': 'rgb(128, 128, 128)' | |
} | |
atlantic_standard = { | |
'C5 Super Typhoon': {'wind_speed': 137, 'color': 'Red', 'hex': '#FF0000'}, | |
'C4 Very Strong Typhoon': {'wind_speed': 113, 'color': 'Orange', 'hex': '#FFA500'}, | |
'C3 Strong Typhoon': {'wind_speed': 96, 'color': 'Yellow', 'hex': '#FFFF00'}, | |
'C2 Typhoon': {'wind_speed': 83, 'color': 'Green', 'hex': '#00FF00'}, | |
'C1 Typhoon': {'wind_speed': 64, 'color': 'Cyan', 'hex': '#00FFFF'}, | |
'Tropical Storm': {'wind_speed': 34, 'color': 'Blue', 'hex': '#0000FF'}, | |
'Tropical Depression': {'wind_speed': 0, 'color': 'Gray', 'hex': '#808080'} | |
} | |
# FIXED: Taiwan standard with correct CMA 2006 thresholds | |
taiwan_standard_fixed = { | |
'Super Typhoon': {'wind_speed_ms': 51.0, 'wind_speed_kt': 99.2, 'color': 'Red', 'hex': '#FF0000'}, | |
'Severe Typhoon': {'wind_speed_ms': 41.5, 'wind_speed_kt': 80.7, 'color': 'Orange', 'hex': '#FFA500'}, | |
'Typhoon': {'wind_speed_ms': 32.7, 'wind_speed_kt': 63.6, 'color': 'Yellow', 'hex': '#FFFF00'}, | |
'Severe Tropical Storm': {'wind_speed_ms': 24.5, 'wind_speed_kt': 47.6, 'color': 'Cyan', 'hex': '#00FFFF'}, | |
'Tropical Storm': {'wind_speed_ms': 17.2, 'wind_speed_kt': 33.4, 'color': 'Blue', 'hex': '#0000FF'}, | |
'Tropical Depression': {'wind_speed_ms': 0, 'wind_speed_kt': 0, 'color': 'Gray', 'hex': '#808080'} | |
} | |
# ----------------------------- | |
# ENHANCED: Oceanic Data Integration | |
# ----------------------------- | |
class OceanicDataManager: | |
"""Manages real-time oceanic data for enhanced typhoon prediction""" | |
def __init__(self): | |
self.sst_base_url = "https://www.ncei.noaa.gov/erddap/griddap/NOAA_OISST_V2.nc" | |
self.slp_base_url = "https://psl.noaa.gov/thredds/dodsC/Datasets/ncep.reanalysis.dailyavgs/surface/slp.nc" | |
self.cache_dir = os.path.join(DATA_PATH, 'oceanic_cache') | |
self.create_cache_directory() | |
def create_cache_directory(self): | |
"""Create cache directory for oceanic data""" | |
try: | |
os.makedirs(self.cache_dir, exist_ok=True) | |
except Exception as e: | |
logging.warning(f"Could not create cache directory: {e}") | |
self.cache_dir = tempfile.mkdtemp() | |
def get_sst_data(self, lat_min, lat_max, lon_min, lon_max, date_start, date_end=None): | |
""" | |
Fetch Sea Surface Temperature data from NOAA OISST v2 | |
Parameters: | |
lat_min, lat_max: Latitude bounds | |
lon_min, lon_max: Longitude bounds | |
date_start: Start date (datetime or string) | |
date_end: End date (datetime or string, optional) | |
""" | |
try: | |
if date_end is None: | |
date_end = date_start | |
# Convert dates to strings if needed | |
if isinstance(date_start, datetime): | |
date_start_str = date_start.strftime('%Y-%m-%d') | |
else: | |
date_start_str = str(date_start) | |
if isinstance(date_end, datetime): | |
date_end_str = date_end.strftime('%Y-%m-%d') | |
else: | |
date_end_str = str(date_end) | |
# Construct ERDDAP URL with parameters | |
url_params = ( | |
f"?sst[({date_start_str}):1:({date_end_str})]" | |
f"[({lat_min}):1:({lat_max})]" | |
f"[({lon_min}):1:({lon_max})]" | |
) | |
full_url = self.sst_base_url + url_params | |
logging.info(f"Fetching SST data from: {full_url}") | |
# Use xarray to open the remote dataset | |
with warnings.catch_warnings(): | |
warnings.simplefilter("ignore") | |
ds = xr.open_dataset(full_url) | |
# Extract SST data | |
sst_data = ds['sst'].values | |
lats = ds['latitude'].values | |
lons = ds['longitude'].values | |
times = ds['time'].values | |
ds.close() | |
return { | |
'sst': sst_data, | |
'latitude': lats, | |
'longitude': lons, | |
'time': times, | |
'success': True | |
} | |
except Exception as e: | |
logging.error(f"Error fetching SST data: {e}") | |
return self._get_fallback_sst_data(lat_min, lat_max, lon_min, lon_max) | |
def get_slp_data(self, lat_min, lat_max, lon_min, lon_max, date_start, date_end=None): | |
""" | |
Fetch Sea Level Pressure data from NCEP/NCAR Reanalysis | |
Parameters similar to get_sst_data | |
""" | |
try: | |
if date_end is None: | |
date_end = date_start | |
# Convert dates for OPeNDAP access | |
if isinstance(date_start, datetime): | |
# NCEP uses different time indexing, may need adjustment | |
date_start_str = date_start.strftime('%Y-%m-%d') | |
else: | |
date_start_str = str(date_start) | |
if isinstance(date_end, datetime): | |
date_end_str = date_end.strftime('%Y-%m-%d') | |
else: | |
date_end_str = str(date_end) | |
logging.info(f"Fetching SLP data for {date_start_str} to {date_end_str}") | |
# Use xarray to open OPeNDAP dataset | |
with warnings.catch_warnings(): | |
warnings.simplefilter("ignore") | |
# Open the full dataset (this might be large, so we'll subset) | |
ds = xr.open_dataset(self.slp_base_url) | |
# Subset by time and location | |
# Note: Coordinate names might vary, adjust as needed | |
lat_coord = 'lat' if 'lat' in ds.dims else 'latitude' | |
lon_coord = 'lon' if 'lon' in ds.dims else 'longitude' | |
# Subset the data | |
subset = ds.sel( | |
time=slice(date_start_str, date_end_str), | |
**{lat_coord: slice(lat_min, lat_max), | |
lon_coord: slice(lon_min, lon_max)} | |
) | |
# Extract SLP data | |
slp_data = subset['slp'].values | |
lats = subset[lat_coord].values | |
lons = subset[lon_coord].values | |
times = subset['time'].values | |
ds.close() | |
return { | |
'slp': slp_data, | |
'latitude': lats, | |
'longitude': lons, | |
'time': times, | |
'success': True | |
} | |
except Exception as e: | |
logging.error(f"Error fetching SLP data: {e}") | |
return self._get_fallback_slp_data(lat_min, lat_max, lon_min, lon_max) | |
def _get_fallback_sst_data(self, lat_min, lat_max, lon_min, lon_max): | |
"""Generate realistic fallback SST data based on climatology""" | |
# Create a reasonable grid | |
lats = np.linspace(lat_min, lat_max, 20) | |
lons = np.linspace(lon_min, lon_max, 20) | |
# Generate climatological SST values for Western Pacific | |
sst_values = np.zeros((1, len(lats), len(lons))) | |
for i, lat in enumerate(lats): | |
for j, lon in enumerate(lons): | |
# Climatological SST estimation for Western Pacific | |
if lat < 10: # Tropical | |
base_sst = 29.0 | |
elif lat < 20: # Subtropical | |
base_sst = 28.0 - (lat - 10) * 0.3 | |
elif lat < 30: # Temperate | |
base_sst = 25.0 - (lat - 20) * 0.5 | |
else: # Cool waters | |
base_sst = 20.0 - (lat - 30) * 0.3 | |
# Add some realistic variation | |
sst_values[0, i, j] = base_sst + np.random.normal(0, 0.5) | |
return { | |
'sst': sst_values, | |
'latitude': lats, | |
'longitude': lons, | |
'time': [datetime.now()], | |
'success': False, | |
'note': 'Using climatological fallback data' | |
} | |
def _get_fallback_slp_data(self, lat_min, lat_max, lon_min, lon_max): | |
"""Generate realistic fallback SLP data""" | |
lats = np.linspace(lat_min, lat_max, 20) | |
lons = np.linspace(lon_min, lon_max, 20) | |
slp_values = np.zeros((1, len(lats), len(lons))) | |
for i, lat in enumerate(lats): | |
for j, lon in enumerate(lons): | |
# Climatological SLP estimation | |
if lat < 30: # Subtropical high influence | |
base_slp = 1013 + 3 * np.cos(np.radians(lat * 6)) | |
else: # Mid-latitude | |
base_slp = 1010 - (lat - 30) * 0.2 | |
slp_values[0, i, j] = base_slp + np.random.normal(0, 2) | |
return { | |
'slp': slp_values, | |
'latitude': lats, | |
'longitude': lons, | |
'time': [datetime.now()], | |
'success': False, | |
'note': 'Using climatological fallback data' | |
} | |
def interpolate_data_to_point(self, data_dict, target_lat, target_lon, variable='sst'): | |
"""Interpolate gridded data to a specific point""" | |
try: | |
data = data_dict[variable] | |
lats = data_dict['latitude'] | |
lons = data_dict['longitude'] | |
# Take most recent time if multiple times available | |
if len(data.shape) == 3: # time, lat, lon | |
data_2d = data[-1, :, :] | |
else: # lat, lon | |
data_2d = data | |
# Create coordinate grids | |
lon_grid, lat_grid = np.meshgrid(lons, lats) | |
# Flatten for interpolation | |
points = np.column_stack((lat_grid.flatten(), lon_grid.flatten())) | |
values = data_2d.flatten() | |
# Remove NaN values | |
valid_mask = ~np.isnan(values) | |
points = points[valid_mask] | |
values = values[valid_mask] | |
if len(values) == 0: | |
return np.nan | |
# Interpolate to target point | |
interpolated_value = griddata( | |
points, values, (target_lat, target_lon), | |
method='linear', fill_value=np.nan | |
) | |
# If linear interpolation fails, try nearest neighbor | |
if np.isnan(interpolated_value): | |
interpolated_value = griddata( | |
points, values, (target_lat, target_lon), | |
method='nearest' | |
) | |
return interpolated_value | |
except Exception as e: | |
logging.error(f"Error interpolating {variable} data: {e}") | |
return np.nan | |
# Global oceanic data manager | |
oceanic_manager = None | |
# ----------------------------- | |
# Utility Functions for HF Spaces | |
# ----------------------------- | |
def safe_file_write(file_path, data_frame, backup_dir=None): | |
"""Safely write DataFrame to CSV with backup and error handling""" | |
try: | |
# Create directory if it doesn't exist | |
os.makedirs(os.path.dirname(file_path), exist_ok=True) | |
# Try to write to a temporary file first | |
temp_path = file_path + '.tmp' | |
data_frame.to_csv(temp_path, index=False) | |
# If successful, rename to final file | |
os.rename(temp_path, file_path) | |
logging.info(f"Successfully saved {len(data_frame)} records to {file_path}") | |
return True | |
except PermissionError as e: | |
logging.warning(f"Permission denied writing to {file_path}: {e}") | |
if backup_dir: | |
try: | |
backup_path = os.path.join(backup_dir, os.path.basename(file_path)) | |
data_frame.to_csv(backup_path, index=False) | |
logging.info(f"Saved to backup location: {backup_path}") | |
return True | |
except Exception as backup_e: | |
logging.error(f"Failed to save to backup location: {backup_e}") | |
return False | |
except Exception as e: | |
logging.error(f"Error saving file {file_path}: {e}") | |
# Clean up temp file if it exists | |
temp_path = file_path + '.tmp' | |
if os.path.exists(temp_path): | |
try: | |
os.remove(temp_path) | |
except: | |
pass | |
return False | |
def get_fallback_data_dir(): | |
"""Get a fallback data directory that's guaranteed to be writable""" | |
fallback_dirs = [ | |
tempfile.gettempdir(), | |
'/tmp', | |
os.path.expanduser('~'), | |
os.getcwd() | |
] | |
for directory in fallback_dirs: | |
try: | |
test_dir = os.path.join(directory, 'typhoon_fallback') | |
os.makedirs(test_dir, exist_ok=True) | |
test_file = os.path.join(test_dir, 'test.txt') | |
with open(test_file, 'w') as f: | |
f.write('test') | |
os.remove(test_file) | |
return test_dir | |
except: | |
continue | |
# If all else fails, use current directory | |
return os.getcwd() | |
# ----------------------------- | |
# ONI and Typhoon Data Functions | |
# ----------------------------- | |
def download_oni_file(url, filename): | |
"""Download ONI file with retry logic""" | |
max_retries = 3 | |
for attempt in range(max_retries): | |
try: | |
response = requests.get(url, timeout=30) | |
response.raise_for_status() | |
with open(filename, 'wb') as f: | |
f.write(response.content) | |
return True | |
except Exception as e: | |
logging.warning(f"Attempt {attempt + 1} failed to download ONI: {e}") | |
if attempt < max_retries - 1: | |
time.sleep(2 ** attempt) # Exponential backoff | |
else: | |
logging.error(f"Failed to download ONI after {max_retries} attempts") | |
return False | |
def convert_oni_ascii_to_csv(input_file, output_file): | |
"""Convert ONI ASCII format to CSV""" | |
data = defaultdict(lambda: [''] * 12) | |
season_to_month = {'DJF':12, 'JFM':1, 'FMA':2, 'MAM':3, 'AMJ':4, 'MJJ':5, | |
'JJA':6, 'JAS':7, 'ASO':8, 'SON':9, 'OND':10, 'NDJ':11} | |
try: | |
with open(input_file, 'r') as f: | |
lines = f.readlines()[1:] # Skip header | |
for line in lines: | |
parts = line.split() | |
if len(parts) >= 4: | |
season, year, anom = parts[0], parts[1], parts[-1] | |
if season in season_to_month: | |
month = season_to_month[season] | |
if season == 'DJF': | |
year = str(int(year)-1) | |
data[year][month-1] = anom | |
# Write to CSV with safe write | |
df = pd.DataFrame(data).T.reset_index() | |
df.columns = ['Year','Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'] | |
df = df.sort_values('Year').reset_index(drop=True) | |
return safe_file_write(output_file, df, get_fallback_data_dir()) | |
except Exception as e: | |
logging.error(f"Error converting ONI file: {e}") | |
return False | |
def update_oni_data(): | |
"""Update ONI data with error handling""" | |
url = "https://www.cpc.ncep.noaa.gov/data/indices/oni.ascii.txt" | |
temp_file = os.path.join(DATA_PATH, "temp_oni.ascii.txt") | |
input_file = os.path.join(DATA_PATH, "oni.ascii.txt") | |
output_file = ONI_DATA_PATH | |
try: | |
if download_oni_file(url, temp_file): | |
if not os.path.exists(input_file) or not os.path.exists(output_file): | |
os.rename(temp_file, input_file) | |
convert_oni_ascii_to_csv(input_file, output_file) | |
else: | |
os.remove(temp_file) | |
else: | |
# Create fallback ONI data if download fails | |
logging.warning("Creating fallback ONI data") | |
create_fallback_oni_data(output_file) | |
except Exception as e: | |
logging.error(f"Error updating ONI data: {e}") | |
create_fallback_oni_data(output_file) | |
def create_fallback_oni_data(output_file): | |
"""Create minimal ONI data for testing""" | |
years = range(2000, 2026) # Extended to include 2025 | |
months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'] | |
# Create synthetic ONI data | |
data = [] | |
for year in years: | |
row = [year] | |
for month in months: | |
# Generate some realistic ONI values | |
value = np.random.normal(0, 1) * 0.5 | |
row.append(f"{value:.2f}") | |
data.append(row) | |
df = pd.DataFrame(data, columns=['Year'] + months) | |
safe_file_write(output_file, df, get_fallback_data_dir()) | |
# ----------------------------- | |
# FIXED: IBTrACS Data Loading | |
# ----------------------------- | |
def download_ibtracs_file(basin, force_download=False): | |
"""Download specific basin file from IBTrACS""" | |
filename = BASIN_FILES[basin] | |
local_path = os.path.join(DATA_PATH, filename) | |
url = IBTRACS_BASE_URL + filename | |
# Check if file exists and is recent (less than 7 days old) | |
if os.path.exists(local_path) and not force_download: | |
file_age = time.time() - os.path.getmtime(local_path) | |
if file_age < 7 * 24 * 3600: # 7 days | |
logging.info(f"Using cached {basin} basin file") | |
return local_path | |
try: | |
logging.info(f"Downloading {basin} basin file from {url}") | |
response = requests.get(url, timeout=60) | |
response.raise_for_status() | |
# Ensure directory exists | |
os.makedirs(os.path.dirname(local_path), exist_ok=True) | |
with open(local_path, 'wb') as f: | |
f.write(response.content) | |
logging.info(f"Successfully downloaded {basin} basin file") | |
return local_path | |
except Exception as e: | |
logging.error(f"Failed to download {basin} basin file: {e}") | |
return None | |
def examine_ibtracs_structure(file_path): | |
"""Examine the actual structure of an IBTrACS CSV file""" | |
try: | |
with open(file_path, 'r') as f: | |
lines = f.readlines() | |
# Show first 5 lines | |
logging.info("First 5 lines of IBTrACS file:") | |
for i, line in enumerate(lines[:5]): | |
logging.info(f"Line {i}: {line.strip()}") | |
# The first line contains the actual column headers | |
# No need to skip rows for IBTrACS v04r01 | |
df = pd.read_csv(file_path, nrows=5) | |
logging.info(f"Columns from first row: {list(df.columns)}") | |
return list(df.columns) | |
except Exception as e: | |
logging.error(f"Error examining IBTrACS structure: {e}") | |
return None | |
def load_ibtracs_csv_directly(basin='WP'): | |
"""Load IBTrACS data directly from CSV - FIXED VERSION""" | |
filename = BASIN_FILES[basin] | |
local_path = os.path.join(DATA_PATH, filename) | |
# Download if not exists | |
if not os.path.exists(local_path): | |
downloaded_path = download_ibtracs_file(basin) | |
if not downloaded_path: | |
return None | |
try: | |
# First, examine the structure | |
actual_columns = examine_ibtracs_structure(local_path) | |
if not actual_columns: | |
logging.error("Could not examine IBTrACS file structure") | |
return None | |
# Read IBTrACS CSV - DON'T skip any rows for v04r01 | |
# The first row contains proper column headers | |
logging.info(f"Reading IBTrACS CSV file: {local_path}") | |
df = pd.read_csv(local_path, low_memory=False) # Don't skip any rows | |
logging.info(f"Original columns: {list(df.columns)}") | |
logging.info(f"Data shape before cleaning: {df.shape}") | |
# Check which essential columns exist | |
required_cols = ['SID', 'ISO_TIME', 'LAT', 'LON'] | |
available_required = [col for col in required_cols if col in df.columns] | |
if len(available_required) < 2: | |
logging.error(f"Missing critical columns. Available: {list(df.columns)}") | |
return None | |
# Clean and standardize the data with format specification | |
if 'ISO_TIME' in df.columns: | |
df['ISO_TIME'] = pd.to_datetime(df['ISO_TIME'], format='%Y-%m-%d %H:%M:%S', errors='coerce') | |
# Clean numeric columns | |
numeric_columns = ['LAT', 'LON', 'WMO_WIND', 'WMO_PRES', 'USA_WIND', 'USA_PRES'] | |
for col in numeric_columns: | |
if col in df.columns: | |
df[col] = pd.to_numeric(df[col], errors='coerce') | |
# Filter out invalid/missing critical data | |
valid_rows = df['LAT'].notna() & df['LON'].notna() | |
df = df[valid_rows] | |
# Ensure LAT/LON are in reasonable ranges | |
df = df[(df['LAT'] >= -90) & (df['LAT'] <= 90)] | |
df = df[(df['LON'] >= -180) & (df['LON'] <= 180)] | |
# Add basin info if missing | |
if 'BASIN' not in df.columns: | |
df['BASIN'] = basin | |
# Add default columns if missing | |
if 'NAME' not in df.columns: | |
df['NAME'] = 'UNNAMED' | |
if 'SEASON' not in df.columns and 'ISO_TIME' in df.columns: | |
df['SEASON'] = df['ISO_TIME'].dt.year | |
logging.info(f"Successfully loaded {len(df)} records from {basin} basin") | |
return df | |
except Exception as e: | |
logging.error(f"Error reading IBTrACS CSV file: {e}") | |
return None | |
def load_ibtracs_data_fixed(): | |
"""Fixed version of IBTrACS data loading""" | |
ibtracs_data = {} | |
# Try to load each basin, but prioritize WP for this application | |
load_order = ['WP', 'EP', 'NA'] | |
for basin in load_order: | |
try: | |
logging.info(f"Loading {basin} basin data...") | |
df = load_ibtracs_csv_directly(basin) | |
if df is not None and not df.empty: | |
ibtracs_data[basin] = df | |
logging.info(f"Successfully loaded {basin} basin with {len(df)} records") | |
else: | |
logging.warning(f"No data loaded for basin {basin}") | |
ibtracs_data[basin] = None | |
except Exception as e: | |
logging.error(f"Failed to load basin {basin}: {e}") | |
ibtracs_data[basin] = None | |
return ibtracs_data | |
def load_data_fixed(oni_path, typhoon_path): | |
"""Fixed version of load_data function""" | |
# Load ONI data | |
oni_data = pd.DataFrame({'Year': [], 'Jan': [], 'Feb': [], 'Mar': [], 'Apr': [], | |
'May': [], 'Jun': [], 'Jul': [], 'Aug': [], 'Sep': [], | |
'Oct': [], 'Nov': [], 'Dec': []}) | |
if not os.path.exists(oni_path): | |
logging.warning(f"ONI data file not found: {oni_path}") | |
update_oni_data() | |
try: | |
oni_data = pd.read_csv(oni_path) | |
logging.info(f"Successfully loaded ONI data with {len(oni_data)} years") | |
except Exception as e: | |
logging.error(f"Error loading ONI data: {e}") | |
update_oni_data() | |
try: | |
oni_data = pd.read_csv(oni_path) | |
except Exception as e: | |
logging.error(f"Still can't load ONI data: {e}") | |
# Load typhoon data - NEW APPROACH | |
typhoon_data = None | |
# First, try to load from existing processed file | |
if os.path.exists(typhoon_path): | |
try: | |
typhoon_data = pd.read_csv(typhoon_path, low_memory=False) | |
# Ensure basic columns exist and are valid | |
required_cols = ['LAT', 'LON'] | |
if all(col in typhoon_data.columns for col in required_cols): | |
if 'ISO_TIME' in typhoon_data.columns: | |
typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce') | |
logging.info(f"Loaded processed typhoon data with {len(typhoon_data)} records") | |
else: | |
logging.warning("Processed typhoon data missing required columns, will reload from IBTrACS") | |
typhoon_data = None | |
except Exception as e: | |
logging.error(f"Error loading processed typhoon data: {e}") | |
typhoon_data = None | |
# If no valid processed data, load from IBTrACS | |
if typhoon_data is None or typhoon_data.empty: | |
logging.info("Loading typhoon data from IBTrACS...") | |
ibtracs_data = load_ibtracs_data_fixed() | |
# Combine all available basin data, prioritizing WP | |
combined_dfs = [] | |
for basin in ['WP', 'EP', 'NA']: | |
if basin in ibtracs_data and ibtracs_data[basin] is not None: | |
df = ibtracs_data[basin].copy() | |
df['BASIN'] = basin | |
combined_dfs.append(df) | |
if combined_dfs: | |
typhoon_data = pd.concat(combined_dfs, ignore_index=True) | |
# Ensure SID has proper format | |
if 'SID' not in typhoon_data.columns and 'BASIN' in typhoon_data.columns: | |
# Create SID from basin and other identifiers if missing | |
if 'SEASON' in typhoon_data.columns: | |
typhoon_data['SID'] = (typhoon_data['BASIN'].astype(str) + | |
typhoon_data.index.astype(str).str.zfill(2) + | |
typhoon_data['SEASON'].astype(str)) | |
else: | |
typhoon_data['SID'] = (typhoon_data['BASIN'].astype(str) + | |
typhoon_data.index.astype(str).str.zfill(2) + | |
'2000') | |
# Save the processed data for future use | |
safe_file_write(typhoon_path, typhoon_data, get_fallback_data_dir()) | |
logging.info(f"Combined IBTrACS data: {len(typhoon_data)} total records") | |
else: | |
logging.error("Failed to load any IBTrACS basin data") | |
# Create minimal fallback data | |
typhoon_data = create_fallback_typhoon_data() | |
# Final validation of typhoon data | |
if typhoon_data is not None: | |
# Ensure required columns exist with fallback values | |
required_columns = { | |
'SID': 'UNKNOWN', | |
'ISO_TIME': pd.Timestamp('2000-01-01'), | |
'LAT': 0.0, | |
'LON': 0.0, | |
'USA_WIND': np.nan, | |
'USA_PRES': np.nan, | |
'NAME': 'UNNAMED', | |
'SEASON': 2000 | |
} | |
for col, default_val in required_columns.items(): | |
if col not in typhoon_data.columns: | |
typhoon_data[col] = default_val | |
logging.warning(f"Added missing column {col} with default value") | |
# Ensure data types | |
if 'ISO_TIME' in typhoon_data.columns: | |
typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce') | |
typhoon_data['LAT'] = pd.to_numeric(typhoon_data['LAT'], errors='coerce') | |
typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce') | |
typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce') | |
typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce') | |
# Remove rows with invalid coordinates | |
typhoon_data = typhoon_data.dropna(subset=['LAT', 'LON']) | |
logging.info(f"Final typhoon data: {len(typhoon_data)} records after validation") | |
return oni_data, typhoon_data | |
def create_fallback_typhoon_data(): | |
"""Create minimal fallback typhoon data - FIXED VERSION""" | |
# Use proper pandas date_range instead of numpy | |
dates = pd.date_range(start='2000-01-01', end='2025-12-31', freq='D') # Extended to 2025 | |
storm_dates = dates[np.random.choice(len(dates), size=100, replace=False)] | |
data = [] | |
for i, date in enumerate(storm_dates): | |
# Create realistic WP storm tracks | |
base_lat = np.random.uniform(10, 30) | |
base_lon = np.random.uniform(130, 160) | |
# Generate 20-50 data points per storm | |
track_length = np.random.randint(20, 51) | |
sid = f"WP{i+1:02d}{date.year}" | |
for j in range(track_length): | |
lat = base_lat + j * 0.2 + np.random.normal(0, 0.1) | |
lon = base_lon + j * 0.3 + np.random.normal(0, 0.1) | |
wind = max(25, 70 + np.random.normal(0, 20)) | |
pres = max(950, 1000 - wind + np.random.normal(0, 5)) | |
data.append({ | |
'SID': sid, | |
'ISO_TIME': date + pd.Timedelta(hours=j*6), # Use pd.Timedelta instead | |
'NAME': f'FALLBACK_{i+1}', | |
'SEASON': date.year, | |
'LAT': lat, | |
'LON': lon, | |
'USA_WIND': wind, | |
'USA_PRES': pres, | |
'BASIN': 'WP' | |
}) | |
df = pd.DataFrame(data) | |
logging.info(f"Created fallback typhoon data with {len(df)} records") | |
return df | |
def process_oni_data(oni_data): | |
"""Process ONI data into long format""" | |
oni_long = oni_data.melt(id_vars=['Year'], var_name='Month', value_name='ONI') | |
month_map = {'Jan':'01','Feb':'02','Mar':'03','Apr':'04','May':'05','Jun':'06', | |
'Jul':'07','Aug':'08','Sep':'09','Oct':'10','Nov':'11','Dec':'12'} | |
oni_long['Month'] = oni_long['Month'].map(month_map) | |
oni_long['Date'] = pd.to_datetime(oni_long['Year'].astype(str)+'-'+oni_long['Month']+'-01') | |
oni_long['ONI'] = pd.to_numeric(oni_long['ONI'], errors='coerce') | |
return oni_long | |
def process_typhoon_data(typhoon_data): | |
"""Process typhoon data""" | |
if 'ISO_TIME' in typhoon_data.columns: | |
typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce') | |
typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce') | |
typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce') | |
typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce') | |
logging.info(f"Unique basins in typhoon_data: {typhoon_data['SID'].str[:2].unique()}") | |
typhoon_max = typhoon_data.groupby('SID').agg({ | |
'USA_WIND':'max','USA_PRES':'min','ISO_TIME':'first','SEASON':'first','NAME':'first', | |
'LAT':'first','LON':'first' | |
}).reset_index() | |
if 'ISO_TIME' in typhoon_max.columns: | |
typhoon_max['Month'] = typhoon_max['ISO_TIME'].dt.strftime('%m') | |
typhoon_max['Year'] = typhoon_max['ISO_TIME'].dt.year | |
else: | |
# Fallback if no ISO_TIME | |
typhoon_max['Month'] = '01' | |
typhoon_max['Year'] = typhoon_max['SEASON'] | |
typhoon_max['Category'] = typhoon_max['USA_WIND'].apply(categorize_typhoon_enhanced) | |
return typhoon_max | |
def merge_data(oni_long, typhoon_max): | |
"""Merge ONI and typhoon data""" | |
return pd.merge(typhoon_max, oni_long, on=['Year','Month']) | |
# ----------------------------- | |
# ENHANCED: Categorization Functions - FIXED TAIWAN CLASSIFICATION | |
# ----------------------------- | |
def categorize_typhoon_enhanced(wind_speed): | |
"""Enhanced categorization that properly includes Tropical Depressions""" | |
if pd.isna(wind_speed): | |
return 'Unknown' | |
# Convert to knots if in m/s (some datasets use m/s) | |
if wind_speed < 10: # Likely in m/s, convert to knots | |
wind_speed = wind_speed * 1.94384 | |
# FIXED thresholds to include TD | |
if wind_speed < 34: # Below 34 knots = Tropical Depression | |
return 'Tropical Depression' | |
elif wind_speed < 64: # 34-63 knots = Tropical Storm | |
return 'Tropical Storm' | |
elif wind_speed < 83: # 64-82 knots = Category 1 Typhoon | |
return 'C1 Typhoon' | |
elif wind_speed < 96: # 83-95 knots = Category 2 Typhoon | |
return 'C2 Typhoon' | |
elif wind_speed < 113: # 96-112 knots = Category 3 Strong Typhoon | |
return 'C3 Strong Typhoon' | |
elif wind_speed < 137: # 113-136 knots = Category 4 Very Strong Typhoon | |
return 'C4 Very Strong Typhoon' | |
else: # 137+ knots = Category 5 Super Typhoon | |
return 'C5 Super Typhoon' | |
def categorize_typhoon_taiwan_fixed(wind_speed): | |
""" | |
FIXED Taiwan categorization system based on CMA 2006 standards | |
Reference: CMA Tropical Cyclone Data Center official classification | |
""" | |
if pd.isna(wind_speed): | |
return 'Tropical Depression' | |
# Convert from knots to m/s if input appears to be in knots | |
if wind_speed > 50: # Likely in knots, convert to m/s | |
wind_speed_ms = wind_speed * 0.514444 | |
else: | |
wind_speed_ms = wind_speed | |
# CMA 2006 Classification Standards (used by Taiwan CWA) | |
if wind_speed_ms >= 51.0: | |
return 'Super Typhoon' # ≥51.0 m/s (≥99.2 kt) | |
elif wind_speed_ms >= 41.5: | |
return 'Severe Typhoon' # 41.5–50.9 m/s (80.7–99.1 kt) | |
elif wind_speed_ms >= 32.7: | |
return 'Typhoon' # 32.7–41.4 m/s (63.6–80.6 kt) | |
elif wind_speed_ms >= 24.5: | |
return 'Severe Tropical Storm' # 24.5–32.6 m/s (47.6–63.5 kt) | |
elif wind_speed_ms >= 17.2: | |
return 'Tropical Storm' # 17.2–24.4 m/s (33.4–47.5 kt) | |
else: | |
return 'Tropical Depression' # < 17.2 m/s (< 33.4 kt) | |
# Original function for backward compatibility | |
def categorize_typhoon(wind_speed): | |
"""Original categorize typhoon function for backward compatibility""" | |
return categorize_typhoon_enhanced(wind_speed) | |
def classify_enso_phases(oni_value): | |
"""Classify ENSO phases based on ONI value""" | |
if isinstance(oni_value, pd.Series): | |
oni_value = oni_value.iloc[0] | |
if pd.isna(oni_value): | |
return 'Neutral' | |
if oni_value >= 0.5: | |
return 'El Nino' | |
elif oni_value <= -0.5: | |
return 'La Nina' | |
else: | |
return 'Neutral' | |
# FIXED: Combined categorization function | |
def categorize_typhoon_by_standard_fixed(wind_speed, standard='atlantic'): | |
"""FIXED categorization function supporting both standards with correct Taiwan thresholds""" | |
if pd.isna(wind_speed): | |
return 'Tropical Depression', '#808080' | |
if standard == 'taiwan': | |
category = categorize_typhoon_taiwan_fixed(wind_speed) | |
color = taiwan_color_map_fixed.get(category, '#808080') | |
return category, color | |
else: | |
# Atlantic/International standard (unchanged) | |
if wind_speed >= 137: | |
return 'C5 Super Typhoon', '#FF0000' | |
elif wind_speed >= 113: | |
return 'C4 Very Strong Typhoon', '#FFA500' | |
elif wind_speed >= 96: | |
return 'C3 Strong Typhoon', '#FFFF00' | |
elif wind_speed >= 83: | |
return 'C2 Typhoon', '#00FF00' | |
elif wind_speed >= 64: | |
return 'C1 Typhoon', '#00FFFF' | |
elif wind_speed >= 34: | |
return 'Tropical Storm', '#0000FF' | |
else: | |
return 'Tropical Depression', '#808080' | |
# ----------------------------- | |
# ENHANCED: Historical Environmental Analysis | |
# ----------------------------- | |
def analyze_historical_environment(typhoon_data, oni_data): | |
"""Analyze historical environmental conditions for better predictions""" | |
try: | |
logging.info("Analyzing historical environmental patterns...") | |
# Get historical storm data with environmental conditions | |
historical_analysis = { | |
'sst_patterns': {}, | |
'slp_patterns': {}, | |
'oni_relationships': {}, | |
'seasonal_variations': {}, | |
'intensity_predictors': {} | |
} | |
# Analyze by storm intensity categories | |
for category in ['Tropical Depression', 'Tropical Storm', 'C1 Typhoon', | |
'C2 Typhoon', 'C3 Strong Typhoon', 'C4 Very Strong Typhoon', 'C5 Super Typhoon']: | |
# Filter storms by category | |
if 'USA_WIND' in typhoon_data.columns: | |
category_storms = typhoon_data[ | |
typhoon_data['USA_WIND'].apply(categorize_typhoon_enhanced) == category | |
] | |
if len(category_storms) > 0: | |
historical_analysis['intensity_predictors'][category] = { | |
'avg_genesis_lat': category_storms['LAT'].mean(), | |
'avg_genesis_lon': category_storms['LON'].mean(), | |
'count': len(category_storms['SID'].unique()), | |
'seasonal_distribution': category_storms['ISO_TIME'].dt.month.value_counts().to_dict() if 'ISO_TIME' in category_storms.columns else {} | |
} | |
# Analyze ENSO relationships | |
if len(oni_data) > 0: | |
for phase in ['El Nino', 'La Nina', 'Neutral']: | |
# This would be enhanced with actual storm-ENSO matching | |
historical_analysis['oni_relationships'][phase] = { | |
'storm_frequency_modifier': 1.0, # Will be calculated from real data | |
'intensity_modifier': 0.0, | |
'track_shift': {'lat': 0.0, 'lon': 0.0} | |
} | |
logging.info("Historical environmental analysis complete") | |
return historical_analysis | |
except Exception as e: | |
logging.error(f"Error in historical environmental analysis: {e}") | |
return {} | |
# ----------------------------- | |
# ENHANCED: Environmental Intensity Prediction | |
# ----------------------------- | |
def calculate_environmental_intensity_potential(lat, lon, month, oni_value, sst_data=None, slp_data=None): | |
""" | |
Calculate environmental intensity potential based on oceanic conditions | |
This function integrates multiple environmental factors to estimate | |
the maximum potential intensity a storm could achieve in given conditions. | |
""" | |
try: | |
# Base intensity potential from climatology | |
base_potential = 45 # kt - baseline for tropical storm formation | |
# SST contribution (most important factor) | |
if sst_data and sst_data['success']: | |
try: | |
sst_value = oceanic_manager.interpolate_data_to_point( | |
sst_data, lat, lon, 'sst' | |
) | |
if not np.isnan(sst_value): | |
# Convert to Celsius if needed (OISST is in Celsius) | |
sst_celsius = sst_value if sst_value < 50 else sst_value - 273.15 | |
# Enhanced SST-intensity relationship based on research | |
if sst_celsius >= 30.0: # Very warm - super typhoon potential | |
sst_contribution = 80 + (sst_celsius - 30) * 10 | |
elif sst_celsius >= 28.5: # Warm - typhoon potential | |
sst_contribution = 40 + (sst_celsius - 28.5) * 26.7 | |
elif sst_celsius >= 26.5: # Marginal - tropical storm potential | |
sst_contribution = 0 + (sst_celsius - 26.5) * 20 | |
else: # Too cool for significant development | |
sst_contribution = -30 | |
base_potential += sst_contribution | |
logging.debug(f"SST: {sst_celsius:.1f}°C, contribution: {sst_contribution:.1f}kt") | |
else: | |
# Use climatological SST | |
clim_sst = get_climatological_sst(lat, lon, month) | |
base_potential += max(0, (clim_sst - 26.5) * 15) | |
except Exception as e: | |
logging.warning(f"Error processing SST data: {e}") | |
clim_sst = get_climatological_sst(lat, lon, month) | |
base_potential += max(0, (clim_sst - 26.5) * 15) | |
else: | |
# Use climatological SST if real data unavailable | |
clim_sst = get_climatological_sst(lat, lon, month) | |
base_potential += max(0, (clim_sst - 26.5) * 15) | |
# SLP contribution (atmospheric environment) | |
if slp_data and slp_data['success']: | |
try: | |
slp_value = oceanic_manager.interpolate_data_to_point( | |
slp_data, lat, lon, 'slp' | |
) | |
if not np.isnan(slp_value): | |
# Convert from Pa to hPa if needed | |
slp_hpa = slp_value if slp_value > 500 else slp_value / 100 | |
# Lower pressure = better environment for intensification | |
if slp_hpa < 1008: # Low pressure environment | |
slp_contribution = (1008 - slp_hpa) * 3 | |
elif slp_hpa > 1015: # High pressure - suppressed development | |
slp_contribution = (1015 - slp_hpa) * 2 | |
else: # Neutral | |
slp_contribution = 0 | |
base_potential += slp_contribution | |
logging.debug(f"SLP: {slp_hpa:.1f}hPa, contribution: {slp_contribution:.1f}kt") | |
except Exception as e: | |
logging.warning(f"Error processing SLP data: {e}") | |
# ENSO modulation | |
if oni_value > 1.0: # Strong El Niño | |
enso_modifier = -15 # Suppressed development | |
elif oni_value > 0.5: # Moderate El Niño | |
enso_modifier = -8 | |
elif oni_value < -1.0: # Strong La Niña | |
enso_modifier = +12 # Enhanced development | |
elif oni_value < -0.5: # Moderate La Niña | |
enso_modifier = +6 | |
else: # Neutral | |
enso_modifier = oni_value * 2 | |
base_potential += enso_modifier | |
# Seasonal modulation | |
seasonal_factors = { | |
1: -12, 2: -10, 3: -8, 4: -5, 5: 0, 6: 5, | |
7: 12, 8: 15, 9: 18, 10: 12, 11: 5, 12: -8 | |
} | |
seasonal_modifier = seasonal_factors.get(month, 0) | |
base_potential += seasonal_modifier | |
# Latitude effects | |
if lat < 8: # Too close to equator - weak Coriolis | |
lat_modifier = -20 | |
elif lat < 12: # Good for development | |
lat_modifier = 5 | |
elif lat < 25: # Prime development zone | |
lat_modifier = 10 | |
elif lat < 35: # Marginal | |
lat_modifier = -5 | |
else: # Too far north | |
lat_modifier = -25 | |
base_potential += lat_modifier | |
# Wind shear estimation (simplified) | |
shear_factor = estimate_wind_shear(lat, lon, month, oni_value) | |
base_potential -= shear_factor | |
# Apply realistic bounds | |
environmental_potential = max(25, min(185, base_potential)) | |
return { | |
'potential_intensity': environmental_potential, | |
'sst_contribution': sst_contribution if 'sst_contribution' in locals() else 0, | |
'slp_contribution': slp_contribution if 'slp_contribution' in locals() else 0, | |
'enso_modifier': enso_modifier, | |
'seasonal_modifier': seasonal_modifier, | |
'latitude_modifier': lat_modifier, | |
'shear_factor': shear_factor | |
} | |
except Exception as e: | |
logging.error(f"Error calculating environmental potential: {e}") | |
return { | |
'potential_intensity': 50, | |
'error': str(e) | |
} | |
def get_climatological_sst(lat, lon, month): | |
"""Get climatological SST for a location and month""" | |
# Simplified climatological SST model for Western Pacific | |
base_sst = 28.0 # Base warm pool temperature | |
# Latitude effect | |
if lat < 5: | |
lat_effect = 0.5 # Warm near equator | |
elif lat < 15: | |
lat_effect = 1.0 # Peak warm pool | |
elif lat < 25: | |
lat_effect = 0.0 - (lat - 15) * 0.3 # Cooling northward | |
else: | |
lat_effect = -3.0 - (lat - 25) * 0.2 # Much cooler | |
# Seasonal effect | |
seasonal_cycle = { | |
1: -1.0, 2: -1.2, 3: -0.8, 4: 0.0, 5: 0.5, 6: 0.8, | |
7: 1.0, 8: 1.2, 9: 1.0, 10: 0.5, 11: 0.0, 12: -0.5 | |
} | |
seasonal_effect = seasonal_cycle.get(month, 0) | |
return base_sst + lat_effect + seasonal_effect | |
def estimate_wind_shear(lat, lon, month, oni_value): | |
"""Estimate wind shear based on location, season, and ENSO state""" | |
# Base shear climatology | |
if 5 <= lat <= 20 and 120 <= lon <= 160: # Low shear region | |
base_shear = 5 # kt equivalent intensity reduction | |
elif lat > 25: # Higher latitude - more shear | |
base_shear = 15 + (lat - 25) * 2 | |
else: # Marginal regions | |
base_shear = 10 | |
# Seasonal modulation | |
if month in [12, 1, 2, 3]: # Winter - high shear | |
seasonal_shear = 8 | |
elif month in [6, 7, 8, 9]: # Summer - low shear | |
seasonal_shear = -3 | |
else: # Transition seasons | |
seasonal_shear = 2 | |
# ENSO modulation | |
if oni_value > 0.5: # El Niño - increased shear | |
enso_shear = 5 + oni_value * 3 | |
elif oni_value < -0.5: # La Niña - decreased shear | |
enso_shear = oni_value * 2 | |
else: | |
enso_shear = 0 | |
total_shear = base_shear + seasonal_shear + enso_shear | |
return max(0, total_shear) | |
# ----------------------------- | |
# ENHANCED: Realistic Storm Prediction with Oceanic Data | |
# ----------------------------- | |
def get_realistic_genesis_locations(): | |
"""Get realistic typhoon genesis regions based on climatology""" | |
return { | |
"Western Pacific Main Development Region": {"lat": 12.5, "lon": 145.0, "description": "Peak activity zone (Guam area)"}, | |
"South China Sea": {"lat": 15.0, "lon": 115.0, "description": "Secondary development region"}, | |
"Philippine Sea": {"lat": 18.0, "lon": 135.0, "description": "Recurving storm region"}, | |
"Marshall Islands": {"lat": 8.0, "lon": 165.0, "description": "Eastern development zone"}, | |
"Monsoon Trough": {"lat": 10.0, "lon": 130.0, "description": "Monsoon-driven genesis"}, | |
"ITCZ Region": {"lat": 6.0, "lon": 140.0, "description": "Near-equatorial development"}, | |
"Subtropical Region": {"lat": 22.0, "lon": 125.0, "description": "Late season development"}, | |
"Bay of Bengal": {"lat": 15.0, "lon": 88.0, "description": "Indian Ocean cyclones"}, | |
"Eastern Pacific": {"lat": 12.0, "lon": -105.0, "description": "Hurricane development zone"}, | |
"Atlantic MDR": {"lat": 12.0, "lon": -45.0, "description": "Main Development Region"} | |
} | |
def predict_storm_route_and_intensity_with_oceanic_data( | |
genesis_region, month, oni_value, | |
forecast_hours=72, use_real_data=True, | |
models=None, enable_animation=True | |
): | |
""" | |
Enhanced prediction system integrating real-time oceanic data | |
This function provides the most realistic storm development prediction | |
by incorporating current SST and SLP conditions from global datasets. | |
""" | |
try: | |
genesis_locations = get_realistic_genesis_locations() | |
if genesis_region not in genesis_locations: | |
genesis_region = "Western Pacific Main Development Region" | |
genesis_info = genesis_locations[genesis_region] | |
start_lat = genesis_info["lat"] | |
start_lon = genesis_info["lon"] | |
logging.info(f"Starting enhanced prediction for {genesis_region}") | |
# Determine data bounds for oceanic data fetch | |
lat_buffer = 10 # degrees | |
lon_buffer = 15 # degrees | |
lat_min = start_lat - lat_buffer | |
lat_max = start_lat + lat_buffer | |
lon_min = start_lon - lon_buffer | |
lon_max = start_lon + lon_buffer | |
# Fetch current oceanic conditions | |
current_date = datetime.now() | |
sst_data = None | |
slp_data = None | |
if use_real_data: | |
try: | |
logging.info("Fetching real-time oceanic data...") | |
# Fetch SST data | |
sst_data = oceanic_manager.get_sst_data( | |
lat_min, lat_max, lon_min, lon_max, | |
current_date - timedelta(days=1), # Yesterday's data (most recent available) | |
current_date | |
) | |
# Fetch SLP data | |
slp_data = oceanic_manager.get_slp_data( | |
lat_min, lat_max, lon_min, lon_max, | |
current_date - timedelta(days=1), | |
current_date | |
) | |
logging.info(f"SST fetch: {'Success' if sst_data['success'] else 'Failed'}") | |
logging.info(f"SLP fetch: {'Success' if slp_data['success'] else 'Failed'}") | |
except Exception as e: | |
logging.warning(f"Error fetching real-time data, using climatology: {e}") | |
use_real_data = False | |
# Initialize results structure | |
results = { | |
'current_prediction': {}, | |
'route_forecast': [], | |
'confidence_scores': {}, | |
'environmental_data': { | |
'sst_source': 'Real-time NOAA OISST' if (sst_data and sst_data['success']) else 'Climatological', | |
'slp_source': 'Real-time NCEP/NCAR' if (slp_data and slp_data['success']) else 'Climatological', | |
'use_real_data': use_real_data | |
}, | |
'model_info': 'Enhanced Oceanic-Coupled Model', | |
'genesis_info': genesis_info | |
} | |
# Calculate initial environmental potential | |
env_potential = calculate_environmental_intensity_potential( | |
start_lat, start_lon, month, oni_value, sst_data, slp_data | |
) | |
# Realistic starting intensity (TD level) with environmental modulation | |
base_intensity = 30 # Base TD intensity | |
environmental_boost = min(8, max(-5, env_potential['potential_intensity'] - 50) * 0.15) | |
predicted_intensity = base_intensity + environmental_boost | |
predicted_intensity = max(25, min(45, predicted_intensity)) # Keep in TD-weak TS range | |
# Enhanced genesis conditions | |
results['current_prediction'] = { | |
'intensity_kt': predicted_intensity, | |
'pressure_hpa': 1008 - (predicted_intensity - 25) * 0.8, | |
'category': categorize_typhoon_enhanced(predicted_intensity), | |
'genesis_region': genesis_region, | |
'environmental_potential': env_potential['potential_intensity'], | |
'sst_contribution': env_potential.get('sst_contribution', 0), | |
'environmental_favorability': 'High' if env_potential['potential_intensity'] > 80 else | |
('Moderate' if env_potential['potential_intensity'] > 50 else 'Low') | |
} | |
# Enhanced route prediction with environmental coupling | |
current_lat = start_lat | |
current_lon = start_lon | |
current_intensity = predicted_intensity | |
route_points = [] | |
# Historical environmental analysis for better predictions | |
historical_patterns = analyze_historical_environment(typhoon_data, oni_data) | |
# Track storm development with oceanic data integration | |
for hour in range(0, forecast_hours + 6, 6): | |
# Dynamic oceanic conditions along track | |
if use_real_data and sst_data and slp_data: | |
# Get current environmental conditions | |
current_env = calculate_environmental_intensity_potential( | |
current_lat, current_lon, month, oni_value, sst_data, slp_data | |
) | |
environmental_limit = current_env['potential_intensity'] | |
else: | |
# Use climatological estimates | |
current_env = calculate_environmental_intensity_potential( | |
current_lat, current_lon, month, oni_value, None, None | |
) | |
environmental_limit = current_env['potential_intensity'] | |
# Enhanced storm motion with environmental steering | |
base_speed = calculate_environmental_steering_speed( | |
current_lat, current_lon, month, oni_value, slp_data | |
) | |
# Motion vectors with environmental influences | |
lat_tendency, lon_tendency = calculate_motion_tendency( | |
current_lat, current_lon, month, oni_value, hour, slp_data | |
) | |
# Update position | |
current_lat += lat_tendency | |
current_lon += lon_tendency | |
# Enhanced intensity evolution with environmental limits | |
intensity_tendency = calculate_environmental_intensity_change( | |
current_intensity, environmental_limit, hour, current_lat, current_lon, | |
month, oni_value, sst_data | |
) | |
# Update intensity with environmental constraints | |
current_intensity += intensity_tendency | |
current_intensity = max(20, min(environmental_limit, current_intensity)) | |
# Enhanced confidence calculation | |
confidence = calculate_dynamic_confidence( | |
hour, current_lat, current_lon, use_real_data, | |
sst_data['success'] if sst_data else False, | |
slp_data['success'] if slp_data else False | |
) | |
# Determine development stage with environmental context | |
stage = get_environmental_development_stage(hour, current_intensity, environmental_limit) | |
# Environmental metadata | |
if sst_data and sst_data['success']: | |
current_sst = oceanic_manager.interpolate_data_to_point( | |
sst_data, current_lat, current_lon, 'sst' | |
) | |
else: | |
current_sst = get_climatological_sst(current_lat, current_lon, month) | |
if slp_data and slp_data['success']: | |
current_slp = oceanic_manager.interpolate_data_to_point( | |
slp_data, current_lat, current_lon, 'slp' | |
) | |
current_slp = current_slp if current_slp > 500 else current_slp / 100 # Convert to hPa | |
else: | |
current_slp = 1013 # Standard atmosphere | |
route_points.append({ | |
'hour': hour, | |
'lat': current_lat, | |
'lon': current_lon, | |
'intensity_kt': current_intensity, | |
'category': categorize_typhoon_enhanced(current_intensity), | |
'confidence': confidence, | |
'development_stage': stage, | |
'forward_speed_kmh': base_speed * 111, # Convert to km/h | |
'pressure_hpa': max(900, 1013 - (current_intensity - 25) * 0.9) | |
}) | |
results['route_forecast'] = route_points | |
# Realistic confidence scores | |
results['confidence_scores'] = { | |
'genesis': 0.88, | |
'early_development': 0.82, | |
'position_24h': 0.85, | |
'position_48h': 0.78, | |
'position_72h': 0.68, | |
'intensity_24h': 0.75, | |
'intensity_48h': 0.65, | |
'intensity_72h': 0.55, | |
'long_term': max(0.3, 0.8 - (forecast_hours / 240) * 0.5) | |
} | |
# Model information | |
results['model_info'] = f"Enhanced Realistic Model - {genesis_region}" | |
return results | |
except Exception as e: | |
logging.error(f"Realistic prediction error: {str(e)}") | |
return { | |
'error': f"Prediction error: {str(e)}", | |
'current_prediction': {'intensity_kt': 30, 'category': 'Tropical Depression'}, | |
'route_forecast': [], | |
'confidence_scores': {}, | |
'model_info': 'Error in prediction' | |
} | |
# ----------------------------- | |
# FIXED: ADVANCED ML FEATURES WITH ROBUST ERROR HANDLING | |
# ----------------------------- | |
def extract_storm_features(typhoon_data): | |
"""Extract comprehensive features for clustering analysis - FIXED VERSION""" | |
try: | |
if typhoon_data is None or typhoon_data.empty: | |
logging.error("No typhoon data provided for feature extraction") | |
return None | |
# Basic features - ensure columns exist | |
basic_features = [] | |
for sid in typhoon_data['SID'].unique(): | |
storm_data = typhoon_data[typhoon_data['SID'] == sid].copy() | |
if len(storm_data) == 0: | |
continue | |
# Initialize feature dict with safe defaults | |
features = {'SID': sid} | |
# Wind statistics | |
if 'USA_WIND' in storm_data.columns: | |
wind_values = pd.to_numeric(storm_data['USA_WIND'], errors='coerce').dropna() | |
if len(wind_values) > 0: | |
features['USA_WIND_max'] = wind_values.max() | |
features['USA_WIND_mean'] = wind_values.mean() | |
features['USA_WIND_std'] = wind_values.std() if len(wind_values) > 1 else 0 | |
else: | |
features['USA_WIND_max'] = 30 | |
features['USA_WIND_mean'] = 30 | |
features['USA_WIND_std'] = 0 | |
else: | |
features['USA_WIND_max'] = 30 | |
features['USA_WIND_mean'] = 30 | |
features['USA_WIND_std'] = 0 | |
# Pressure statistics | |
if 'USA_PRES' in storm_data.columns: | |
pres_values = pd.to_numeric(storm_data['USA_PRES'], errors='coerce').dropna() | |
if len(pres_values) > 0: | |
features['USA_PRES_min'] = pres_values.min() | |
features['USA_PRES_mean'] = pres_values.mean() | |
features['USA_PRES_std'] = pres_values.std() if len(pres_values) > 1 else 0 | |
else: | |
features['USA_PRES_min'] = 1000 | |
features['USA_PRES_mean'] = 1000 | |
features['USA_PRES_std'] = 0 | |
else: | |
features['USA_PRES_min'] = 1000 | |
features['USA_PRES_mean'] = 1000 | |
features['USA_PRES_std'] = 0 | |
# Location statistics | |
if 'LAT' in storm_data.columns and 'LON' in storm_data.columns: | |
lat_values = pd.to_numeric(storm_data['LAT'], errors='coerce').dropna() | |
lon_values = pd.to_numeric(storm_data['LON'], errors='coerce').dropna() | |
if len(lat_values) > 0 and len(lon_values) > 0: | |
features['LAT_mean'] = lat_values.mean() | |
features['LAT_std'] = lat_values.std() if len(lat_values) > 1 else 0 | |
features['LAT_max'] = lat_values.max() | |
features['LAT_min'] = lat_values.min() | |
features['LON_mean'] = lon_values.mean() | |
features['LON_std'] = lon_values.std() if len(lon_values) > 1 else 0 | |
features['LON_max'] = lon_values.max() | |
features['LON_min'] = lon_values.min() | |
# Genesis location (first valid position) | |
features['genesis_lat'] = lat_values.iloc[0] | |
features['genesis_lon'] = lon_values.iloc[0] | |
features['genesis_intensity'] = features['USA_WIND_mean'] # Use mean as fallback | |
# Track characteristics | |
features['lat_range'] = lat_values.max() - lat_values.min() | |
features['lon_range'] = lon_values.max() - lon_values.min() | |
# Calculate track distance | |
if len(lat_values) > 1: | |
distances = [] | |
for i in range(1, len(lat_values)): | |
dlat = lat_values.iloc[i] - lat_values.iloc[i-1] | |
dlon = lon_values.iloc[i] - lon_values.iloc[i-1] | |
distances.append(np.sqrt(dlat**2 + dlon**2)) | |
features['total_distance'] = sum(distances) | |
features['avg_speed'] = np.mean(distances) if distances else 0 | |
else: | |
features['total_distance'] = 0 | |
features['avg_speed'] = 0 | |
# Track curvature | |
if len(lat_values) > 2: | |
bearing_changes = [] | |
for i in range(1, len(lat_values)-1): | |
dlat1 = lat_values.iloc[i] - lat_values.iloc[i-1] | |
dlon1 = lon_values.iloc[i] - lon_values.iloc[i-1] | |
dlat2 = lat_values.iloc[i+1] - lat_values.iloc[i] | |
dlon2 = lon_values.iloc[i+1] - lon_values.iloc[i] | |
angle1 = np.arctan2(dlat1, dlon1) | |
angle2 = np.arctan2(dlat2, dlon2) | |
change = abs(angle2 - angle1) | |
bearing_changes.append(change) | |
features['avg_curvature'] = np.mean(bearing_changes) if bearing_changes else 0 | |
else: | |
features['avg_curvature'] = 0 | |
else: | |
# Default location values | |
features.update({ | |
'LAT_mean': 20, 'LAT_std': 0, 'LAT_max': 20, 'LAT_min': 20, | |
'LON_mean': 140, 'LON_std': 0, 'LON_max': 140, 'LON_min': 140, | |
'genesis_lat': 20, 'genesis_lon': 140, 'genesis_intensity': 30, | |
'lat_range': 0, 'lon_range': 0, 'total_distance': 0, | |
'avg_speed': 0, 'avg_curvature': 0 | |
}) | |
else: | |
# Default location values if columns missing | |
features.update({ | |
'LAT_mean': 20, 'LAT_std': 0, 'LAT_max': 20, 'LAT_min': 20, | |
'LON_mean': 140, 'LON_std': 0, 'LON_max': 140, 'LON_min': 140, | |
'genesis_lat': 20, 'genesis_lon': 140, 'genesis_intensity': 30, | |
'lat_range': 0, 'lon_range': 0, 'total_distance': 0, | |
'avg_speed': 0, 'avg_curvature': 0 | |
}) | |
# Track length | |
features['track_length'] = len(storm_data) | |
# Add seasonal information | |
if 'SEASON' in storm_data.columns: | |
features['season'] = storm_data['SEASON'].iloc[0] | |
else: | |
features['season'] = 2000 | |
# Add basin information | |
if 'BASIN' in storm_data.columns: | |
features['basin'] = storm_data['BASIN'].iloc[0] | |
elif 'SID' in storm_data.columns: | |
features['basin'] = sid[:2] if len(sid) >= 2 else 'WP' | |
else: | |
features['basin'] = 'WP' | |
basic_features.append(features) | |
if not basic_features: | |
logging.error("No valid storm features could be extracted") | |
return None | |
# Convert to DataFrame | |
storm_features = pd.DataFrame(basic_features) | |
# Ensure all numeric columns are properly typed | |
numeric_columns = [col for col in storm_features.columns if col not in ['SID', 'basin']] | |
for col in numeric_columns: | |
storm_features[col] = pd.to_numeric(storm_features[col], errors='coerce').fillna(0) | |
logging.info(f"Successfully extracted features for {len(storm_features)} storms") | |
logging.info(f"Feature columns: {list(storm_features.columns)}") | |
return storm_features | |
except Exception as e: | |
logging.error(f"Error in extract_storm_features: {e}") | |
import traceback | |
traceback.print_exc() | |
return None | |
def perform_dimensionality_reduction(storm_features, method='umap', n_components=2): | |
"""Perform UMAP or t-SNE dimensionality reduction - FIXED VERSION""" | |
try: | |
if storm_features is None or storm_features.empty: | |
raise ValueError("No storm features provided") | |
# Select numeric features for clustering - FIXED | |
feature_cols = [] | |
for col in storm_features.columns: | |
if col not in ['SID', 'basin'] and storm_features[col].dtype in ['float64', 'int64']: | |
# Check if column has valid data | |
valid_data = storm_features[col].dropna() | |
if len(valid_data) > 0 and valid_data.std() > 0: # Only include columns with variance | |
feature_cols.append(col) | |
if len(feature_cols) == 0: | |
raise ValueError("No valid numeric features found for clustering") | |
logging.info(f"Using {len(feature_cols)} features for clustering: {feature_cols}") | |
X = storm_features[feature_cols].fillna(0) | |
# Check if we have enough samples | |
if len(X) < 2: | |
raise ValueError("Need at least 2 storms for clustering") | |
# Standardize features | |
scaler = StandardScaler() | |
X_scaled = scaler.fit_transform(X) | |
# Perform dimensionality reduction | |
if method.lower() == 'umap' and UMAP_AVAILABLE and len(X_scaled) >= 4: | |
# UMAP parameters optimized for typhoon data - fixed warnings | |
n_neighbors = min(15, len(X_scaled) - 1) | |
reducer = umap.UMAP( | |
n_components=n_components, | |
n_neighbors=n_neighbors, | |
min_dist=0.1, | |
metric='euclidean', | |
random_state=42, | |
n_jobs=1 # Explicitly set to avoid warning | |
) | |
elif method.lower() == 'tsne' and len(X_scaled) >= 4: | |
# t-SNE parameters | |
perplexity = min(30, len(X_scaled) // 4) | |
perplexity = max(1, perplexity) # Ensure perplexity is at least 1 | |
reducer = TSNE( | |
n_components=n_components, | |
perplexity=perplexity, | |
learning_rate=200, | |
n_iter=1000, | |
random_state=42 | |
) | |
else: | |
# Fallback to PCA | |
reducer = PCA(n_components=n_components, random_state=42) | |
# Fit and transform | |
embedding = reducer.fit_transform(X_scaled) | |
logging.info(f"Dimensionality reduction successful: {X_scaled.shape} -> {embedding.shape}") | |
return embedding, feature_cols, scaler | |
except Exception as e: | |
logging.error(f"Error in perform_dimensionality_reduction: {e}") | |
raise | |
def cluster_storms_data(embedding, method='dbscan', eps=0.5, min_samples=3): | |
"""Cluster storms based on their embedding - FIXED NAME VERSION""" | |
try: | |
if len(embedding) < 2: | |
return np.array([0] * len(embedding)) # Single cluster for insufficient data | |
if method.lower() == 'dbscan': | |
# Adjust min_samples based on data size | |
min_samples = min(min_samples, max(2, len(embedding) // 5)) | |
clusterer = DBSCAN(eps=eps, min_samples=min_samples) | |
elif method.lower() == 'kmeans': | |
# Adjust n_clusters based on data size | |
n_clusters = min(5, max(2, len(embedding) // 3)) | |
clusterer = KMeans(n_clusters=n_clusters, random_state=42) | |
else: | |
raise ValueError("Method must be 'dbscan' or 'kmeans'") | |
clusters = clusterer.fit_predict(embedding) | |
logging.info(f"Clustering complete: {len(np.unique(clusters))} clusters found") | |
return clusters | |
except Exception as e: | |
logging.error(f"Error in cluster_storms_data: {e}") | |
# Return single cluster as fallback | |
return np.array([0] * len(embedding)) | |
def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'): | |
"""Create separate plots for clustering analysis - ENHANCED CLARITY VERSION""" | |
try: | |
# Validate inputs | |
if storm_features is None or storm_features.empty: | |
raise ValueError("No storm features available for clustering") | |
if typhoon_data is None or typhoon_data.empty: | |
raise ValueError("No typhoon data available for route visualization") | |
logging.info(f"Starting clustering visualization with {len(storm_features)} storms") | |
# Perform dimensionality reduction | |
embedding, feature_cols, scaler = perform_dimensionality_reduction(storm_features, method) | |
# Perform clustering | |
cluster_labels = cluster_storms_data(embedding, 'dbscan') | |
# Add clustering results to storm features | |
storm_features_viz = storm_features.copy() | |
storm_features_viz['cluster'] = cluster_labels | |
storm_features_viz['dim1'] = embedding[:, 0] | |
storm_features_viz['dim2'] = embedding[:, 1] | |
# Merge with typhoon data for additional info - SAFE MERGE | |
try: | |
storm_info = typhoon_data.groupby('SID').first()[['NAME', 'SEASON']].reset_index() | |
storm_features_viz = storm_features_viz.merge(storm_info, on='SID', how='left') | |
# Fill missing values | |
storm_features_viz['NAME'] = storm_features_viz['NAME'].fillna('UNNAMED') | |
storm_features_viz['SEASON'] = storm_features_viz['SEASON'].fillna(2000) | |
except Exception as merge_error: | |
logging.warning(f"Could not merge storm info: {merge_error}") | |
storm_features_viz['NAME'] = 'UNNAMED' | |
storm_features_viz['SEASON'] = 2000 | |
# Get unique clusters and assign distinct colors | |
unique_clusters = sorted([c for c in storm_features_viz['cluster'].unique() if c != -1]) | |
noise_count = len(storm_features_viz[storm_features_viz['cluster'] == -1]) | |
# 1. Enhanced clustering scatter plot with clear cluster identification | |
fig_cluster = go.Figure() | |
# Add noise points first | |
if noise_count > 0: | |
noise_data = storm_features_viz[storm_features_viz['cluster'] == -1] | |
fig_cluster.add_trace( | |
go.Scatter( | |
x=noise_data['dim1'], | |
y=noise_data['dim2'], | |
mode='markers', | |
marker=dict(color='lightgray', size=8, opacity=0.5, symbol='x'), | |
name=f'Noise ({noise_count} storms)', | |
hovertemplate=( | |
'<b>%{customdata[0]}</b><br>' | |
'Season: %{customdata[1]}<br>' | |
'Cluster: Noise<br>' | |
f'{method.upper()} Dim 1: %{{x:.2f}}<br>' | |
f'{method.upper()} Dim 2: %{{y:.2f}}<br>' | |
'<extra></extra>' | |
), | |
customdata=np.column_stack(( | |
noise_data['NAME'].fillna('UNNAMED'), | |
noise_data['SEASON'].fillna(2000) | |
)) | |
) | |
) | |
# Add clusters with distinct colors and shapes | |
cluster_symbols = ['circle', 'square', 'diamond', 'triangle-up', 'triangle-down', | |
'pentagon', 'hexagon', 'star', 'cross', 'circle-open'] | |
for i, cluster in enumerate(unique_clusters): | |
cluster_data = storm_features_viz[storm_features_viz['cluster'] == cluster] | |
color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)] | |
symbol = cluster_symbols[i % len(cluster_symbols)] | |
fig_cluster.add_trace( | |
go.Scatter( | |
x=cluster_data['dim1'], | |
y=cluster_data['dim2'], | |
mode='markers', | |
marker=dict(color=color, size=10, symbol=symbol, line=dict(width=1, color='white')), | |
name=f'Cluster {cluster} ({len(cluster_data)} storms)', | |
hovertemplate=( | |
'<b>%{customdata[0]}</b><br>' | |
'Season: %{customdata[1]}<br>' | |
f'Cluster: {cluster}<br>' | |
f'{method.upper()} Dim 1: %{{x:.2f}}<br>' | |
f'{method.upper()} Dim 2: %{{y:.2f}}<br>' | |
'Intensity: %{customdata[2]:.0f} kt<br>' | |
'<extra></extra>' | |
), | |
customdata=np.column_stack(( | |
cluster_data['NAME'].fillna('UNNAMED'), | |
cluster_data['SEASON'].fillna(2000), | |
cluster_data['USA_WIND_max'].fillna(0) | |
)) | |
) | |
) | |
fig_cluster.update_layout( | |
title=f'Storm Clustering Analysis using {method.upper()}<br><sub>Each symbol/color represents a distinct storm pattern group</sub>', | |
xaxis_title=f'{method.upper()} Dimension 1', | |
yaxis_title=f'{method.upper()} Dimension 2', | |
height=600, | |
showlegend=True | |
) | |
# 2. ENHANCED route map with cluster legends and clearer representation | |
fig_routes = go.Figure() | |
# Create a comprehensive legend showing cluster characteristics | |
cluster_info_text = [] | |
for i, cluster in enumerate(unique_clusters): | |
cluster_storm_ids = storm_features_viz[storm_features_viz['cluster'] == cluster]['SID'].tolist() | |
color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)] | |
# Get cluster statistics for legend | |
cluster_data = storm_features_viz[storm_features_viz['cluster'] == cluster] | |
avg_intensity = cluster_data['USA_WIND_max'].mean() if 'USA_WIND_max' in cluster_data.columns else 0 | |
avg_pressure = cluster_data['USA_PRES_min'].mean() if 'USA_PRES_min' in cluster_data.columns else 1000 | |
cluster_info_text.append( | |
f"Cluster {cluster}: {len(cluster_storm_ids)} storms, " | |
f"Avg: {avg_intensity:.0f}kt/{avg_pressure:.0f}hPa" | |
) | |
# Add multiple storms per cluster with clear identification | |
storms_added = 0 | |
for j, sid in enumerate(cluster_storm_ids[:8]): # Show up to 8 storms per cluster | |
try: | |
storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME') | |
if len(storm_track) > 1: | |
# Ensure valid coordinates | |
valid_coords = storm_track['LAT'].notna() & storm_track['LON'].notna() | |
storm_track = storm_track[valid_coords] | |
if len(storm_track) > 1: | |
storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED' | |
storm_season = storm_track['SEASON'].iloc[0] if 'SEASON' in storm_track.columns else 'Unknown' | |
# Vary line style for different storms in same cluster | |
line_styles = ['solid', 'dash', 'dot', 'dashdot'] | |
line_style = line_styles[j % len(line_styles)] | |
line_width = 3 if j == 0 else 2 # First storm thicker | |
fig_routes.add_trace( | |
go.Scattergeo( | |
lon=storm_track['LON'], | |
lat=storm_track['LAT'], | |
mode='lines+markers', | |
line=dict(color=color, width=line_width, dash=line_style), | |
marker=dict(color=color, size=3), | |
name=f'C{cluster}: {storm_name} ({storm_season})', | |
showlegend=True, | |
legendgroup=f'cluster_{cluster}', | |
hovertemplate=( | |
f'<b>Cluster {cluster}: {storm_name}</b><br>' | |
'Lat: %{lat:.1f}°<br>' | |
'Lon: %{lon:.1f}°<br>' | |
f'Season: {storm_season}<br>' | |
f'Pattern Group: {cluster}<br>' | |
'<extra></extra>' | |
) | |
) | |
) | |
storms_added += 1 | |
except Exception as track_error: | |
logging.warning(f"Error adding track for storm {sid}: {track_error}") | |
continue | |
# Add cluster centroid marker | |
if len(cluster_storm_ids) > 0: | |
# Calculate average genesis location for cluster | |
cluster_storm_data = storm_features_viz[storm_features_viz['cluster'] == cluster] | |
if 'genesis_lat' in cluster_storm_data.columns and 'genesis_lon' in cluster_storm_data.columns: | |
avg_lat = cluster_storm_data['genesis_lat'].mean() | |
avg_lon = cluster_storm_data['genesis_lon'].mean() | |
fig_routes.add_trace( | |
go.Scattergeo( | |
lon=[avg_lon], | |
lat=[avg_lat], | |
mode='markers', | |
marker=dict( | |
color=color, | |
size=20, | |
symbol='star', | |
line=dict(width=2, color='white') | |
), | |
name=f'C{cluster} Center', | |
showlegend=True, | |
legendgroup=f'cluster_{cluster}', | |
hovertemplate=( | |
f'<b>Cluster {cluster} Genesis Center</b><br>' | |
f'Avg Position: {avg_lat:.1f}°N, {avg_lon:.1f}°E<br>' | |
f'Storms: {len(cluster_storm_ids)}<br>' | |
f'Avg Intensity: {avg_intensity:.0f} kt<br>' | |
'<extra></extra>' | |
) | |
) | |
) | |
# Update route map layout with enhanced information and LARGER SIZE | |
fig_routes.update_layout( | |
title=f"Storm Routes by {method.upper()} Clusters<br><sub>Different line styles = different storms in same cluster | Stars = cluster centers</sub>", | |
geo=dict( | |
projection_type="natural earth", | |
showland=True, | |
landcolor="LightGray", | |
showocean=True, | |
oceancolor="LightBlue", | |
showcoastlines=True, | |
coastlinecolor="Gray", | |
center=dict(lat=20, lon=140), | |
projection_scale=2.5 # Larger map | |
), | |
height=800, # Much larger height | |
width=1200, # Wider map | |
showlegend=True | |
) | |
# Add cluster info annotation | |
cluster_summary = "<br>".join(cluster_info_text) | |
fig_routes.add_annotation( | |
text=f"<b>Cluster Summary:</b><br>{cluster_summary}", | |
xref="paper", yref="paper", | |
x=0.02, y=0.98, | |
showarrow=False, | |
align="left", | |
bgcolor="rgba(255,255,255,0.8)", | |
bordercolor="gray", | |
borderwidth=1 | |
) | |
# 3. Enhanced pressure evolution plot with cluster identification | |
fig_pressure = go.Figure() | |
for i, cluster in enumerate(unique_clusters): | |
cluster_storm_ids = storm_features_viz[storm_features_viz['cluster'] == cluster]['SID'].tolist() | |
color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)] | |
cluster_pressures = [] | |
for j, sid in enumerate(cluster_storm_ids[:5]): # Limit to 5 storms per cluster | |
try: | |
storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME') | |
if len(storm_track) > 1 and 'USA_PRES' in storm_track.columns: | |
pressure_values = pd.to_numeric(storm_track['USA_PRES'], errors='coerce').dropna() | |
if len(pressure_values) > 0: | |
storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED' | |
time_hours = range(len(pressure_values)) | |
# Normalize time to show relative progression | |
normalized_time = np.linspace(0, 100, len(pressure_values)) | |
fig_pressure.add_trace( | |
go.Scatter( | |
x=normalized_time, | |
y=pressure_values, | |
mode='lines', | |
line=dict(color=color, width=2, dash='solid' if j == 0 else 'dash'), | |
name=f'C{cluster}: {storm_name}' if j == 0 else None, | |
showlegend=(j == 0), | |
legendgroup=f'pressure_cluster_{cluster}', | |
hovertemplate=( | |
f'<b>Cluster {cluster}: {storm_name}</b><br>' | |
'Progress: %{x:.0f}%<br>' | |
'Pressure: %{y:.0f} hPa<br>' | |
'<extra></extra>' | |
), | |
opacity=0.8 if j == 0 else 0.5 | |
) | |
) | |
cluster_pressures.extend(pressure_values) | |
except Exception as e: | |
continue | |
# Add cluster average line | |
if cluster_pressures: | |
avg_pressure = np.mean(cluster_pressures) | |
fig_pressure.add_hline( | |
y=avg_pressure, | |
line_dash="dot", | |
line_color=color, | |
annotation_text=f"C{cluster} Avg: {avg_pressure:.0f}", | |
annotation_position="right" | |
) | |
fig_pressure.update_layout( | |
title=f"Pressure Evolution by {method.upper()} Clusters<br><sub>Normalized timeline (0-100%) | Dotted lines = cluster averages</sub>", | |
xaxis_title="Storm Progress (%)", | |
yaxis_title="Pressure (hPa)", | |
height=500 | |
) | |
# 4. Enhanced wind evolution plot | |
fig_wind = go.Figure() | |
for i, cluster in enumerate(unique_clusters): | |
cluster_storm_ids = storm_features_viz[storm_features_viz['cluster'] == cluster]['SID'].tolist() | |
color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)] | |
cluster_winds = [] | |
for j, sid in enumerate(cluster_storm_ids[:5]): # Limit to 5 storms per cluster | |
try: | |
storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME') | |
if len(storm_track) > 1 and 'USA_WIND' in storm_track.columns: | |
wind_values = pd.to_numeric(storm_track['USA_WIND'], errors='coerce').dropna() | |
if len(wind_values) > 0: | |
storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED' | |
# Normalize time to show relative progression | |
normalized_time = np.linspace(0, 100, len(wind_values)) | |
fig_wind.add_trace( | |
go.Scatter( | |
x=normalized_time, | |
y=wind_values, | |
mode='lines', | |
line=dict(color=color, width=2, dash='solid' if j == 0 else 'dash'), | |
name=f'C{cluster}: {storm_name}' if j == 0 else None, | |
showlegend=(j == 0), | |
legendgroup=f'wind_cluster_{cluster}', | |
hovertemplate=( | |
f'<b>Cluster {cluster}: {storm_name}</b><br>' | |
'Progress: %{x:.0f}%<br>' | |
'Wind: %{y:.0f} kt<br>' | |
'<extra></extra>' | |
), | |
opacity=0.8 if j == 0 else 0.5 | |
) | |
) | |
cluster_winds.extend(wind_values) | |
except Exception as e: | |
continue | |
# Add cluster average line | |
if cluster_winds: | |
avg_wind = np.mean(cluster_winds) | |
fig_wind.add_hline( | |
y=avg_wind, | |
line_dash="dot", | |
line_color=color, | |
annotation_text=f"C{cluster} Avg: {avg_wind:.0f}", | |
annotation_position="right" | |
) | |
fig_wind.update_layout( | |
title=f"Wind Speed Evolution by {method.upper()} Clusters<br><sub>Normalized timeline (0-100%) | Dotted lines = cluster averages</sub>", | |
xaxis_title="Storm Progress (%)", | |
yaxis_title="Wind Speed (kt)", | |
height=500 | |
) | |
# Generate enhanced cluster statistics with clear explanations | |
try: | |
stats_text = f"ENHANCED {method.upper()} CLUSTER ANALYSIS RESULTS\n" + "="*60 + "\n\n" | |
stats_text += f"🔍 DIMENSIONALITY REDUCTION: {method.upper()}\n" | |
stats_text += f"🎯 CLUSTERING ALGORITHM: DBSCAN (automatic pattern discovery)\n" | |
stats_text += f"📊 TOTAL STORMS ANALYZED: {len(storm_features_viz)}\n" | |
stats_text += f"🎨 CLUSTERS DISCOVERED: {len(unique_clusters)}\n" | |
if noise_count > 0: | |
stats_text += f"❌ NOISE POINTS: {noise_count} storms (don't fit clear patterns)\n" | |
stats_text += "\n" | |
for cluster in sorted(storm_features_viz['cluster'].unique()): | |
cluster_data = storm_features_viz[storm_features_viz['cluster'] == cluster] | |
storm_count = len(cluster_data) | |
if cluster == -1: | |
stats_text += f"❌ NOISE GROUP: {storm_count} storms\n" | |
stats_text += " → These storms don't follow the main patterns\n" | |
stats_text += " → May represent unique or rare storm behaviors\n\n" | |
continue | |
stats_text += f"🎯 CLUSTER {cluster}: {storm_count} storms\n" | |
stats_text += f" 🎨 Color: {CLUSTER_COLORS[cluster % len(CLUSTER_COLORS)]}\n" | |
# Add detailed statistics if available | |
if 'USA_WIND_max' in cluster_data.columns: | |
wind_mean = cluster_data['USA_WIND_max'].mean() | |
wind_std = cluster_data['USA_WIND_max'].std() | |
stats_text += f" 💨 Intensity: {wind_mean:.1f} ± {wind_std:.1f} kt\n" | |
if 'USA_PRES_min' in cluster_data.columns: | |
pres_mean = cluster_data['USA_PRES_min'].mean() | |
pres_std = cluster_data['USA_PRES_min'].std() | |
stats_text += f" 🌡️ Pressure: {pres_mean:.1f} ± {pres_std:.1f} hPa\n" | |
if 'track_length' in cluster_data.columns: | |
track_mean = cluster_data['track_length'].mean() | |
stats_text += f" 📏 Avg Track Length: {track_mean:.1f} points\n" | |
if 'genesis_lat' in cluster_data.columns and 'genesis_lon' in cluster_data.columns: | |
lat_mean = cluster_data['genesis_lat'].mean() | |
lon_mean = cluster_data['genesis_lon'].mean() | |
stats_text += f" 🎯 Genesis Region: {lat_mean:.1f}°N, {lon_mean:.1f}°E\n" | |
# Add interpretation | |
if wind_mean < 50: | |
stats_text += " 💡 Pattern: Weaker storm group\n" | |
elif wind_mean > 100: | |
stats_text += " 💡 Pattern: Intense storm group\n" | |
else: | |
stats_text += " 💡 Pattern: Moderate intensity group\n" | |
stats_text += "\n" | |
# Add explanation of the analysis | |
stats_text += "📖 INTERPRETATION GUIDE:\n" | |
stats_text += f"• {method.upper()} reduces storm characteristics to 2D for visualization\n" | |
stats_text += "• DBSCAN finds natural groupings without preset number of clusters\n" | |
stats_text += "• Each cluster represents storms with similar behavior patterns\n" | |
stats_text += "• Route colors match cluster colors from the similarity plot\n" | |
stats_text += "• Stars on map show average genesis locations for each cluster\n" | |
stats_text += "• Temporal plots show how each cluster behaves over time\n\n" | |
stats_text += f"🔧 FEATURES USED FOR CLUSTERING:\n" | |
stats_text += f" Total: {len(feature_cols)} storm characteristics\n" | |
stats_text += f" Including: intensity, pressure, track shape, genesis location\n" | |
except Exception as stats_error: | |
stats_text = f"Error generating enhanced statistics: {str(stats_error)}" | |
return fig_cluster, fig_routes, fig_pressure, fig_wind, stats_text | |
except Exception as e: | |
logging.error(f"Error in enhanced clustering analysis: {e}") | |
import traceback | |
traceback.print_exc() | |
error_fig = go.Figure() | |
error_fig.add_annotation( | |
text=f"Error in clustering analysis: {str(e)}", | |
xref="paper", yref="paper", | |
x=0.5, y=0.5, xanchor='center', yanchor='middle', | |
showarrow=False, font_size=16 | |
) | |
return error_fig, error_fig, error_fig, error_fig, f"Error in clustering: {str(e)}" | |
# ----------------------------- | |
# ENHANCED: Advanced Prediction System with Route Forecasting | |
# ----------------------------- | |
def create_advanced_prediction_model(typhoon_data): | |
"""Create advanced ML model for intensity and route prediction""" | |
try: | |
if typhoon_data is None or typhoon_data.empty: | |
return None, "No data available for model training" | |
# Prepare training data | |
features = [] | |
targets = [] | |
for sid in typhoon_data['SID'].unique(): | |
storm_data = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME') | |
if len(storm_data) < 3: # Need at least 3 points for prediction | |
continue | |
for i in range(len(storm_data) - 1): | |
current = storm_data.iloc[i] | |
next_point = storm_data.iloc[i + 1] | |
# Extract features (current state) | |
feature_row = [] | |
# Current position | |
feature_row.extend([ | |
current.get('LAT', 20), | |
current.get('LON', 140) | |
]) | |
# Current intensity | |
feature_row.extend([ | |
current.get('USA_WIND', 30), | |
current.get('USA_PRES', 1000) | |
]) | |
# Time features | |
if 'ISO_TIME' in current and pd.notna(current['ISO_TIME']): | |
month = current['ISO_TIME'].month | |
day_of_year = current['ISO_TIME'].dayofyear | |
else: | |
month = 9 # Peak season default | |
day_of_year = 250 | |
feature_row.extend([month, day_of_year]) | |
# Motion features (if previous point exists) | |
if i > 0: | |
prev = storm_data.iloc[i - 1] | |
dlat = current.get('LAT', 20) - prev.get('LAT', 20) | |
dlon = current.get('LON', 140) - prev.get('LON', 140) | |
speed = np.sqrt(dlat**2 + dlon**2) | |
bearing = np.arctan2(dlat, dlon) | |
else: | |
speed = 0 | |
bearing = 0 | |
feature_row.extend([speed, bearing]) | |
features.append(feature_row) | |
# Target: next position and intensity | |
targets.append([ | |
next_point.get('LAT', 20), | |
next_point.get('LON', 140), | |
next_point.get('USA_WIND', 30) | |
]) | |
if len(features) < 10: # Need sufficient training data | |
return None, "Insufficient data for model training" | |
# Train model | |
X = np.array(features) | |
y = np.array(targets) | |
# Split data | |
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) | |
# Create separate models for position and intensity | |
models = {} | |
# Position model (lat, lon) | |
pos_model = RandomForestRegressor(n_estimators=100, random_state=42) | |
pos_model.fit(X_train, y_train[:, :2]) | |
models['position'] = pos_model | |
# Intensity model (wind speed) | |
int_model = RandomForestRegressor(n_estimators=100, random_state=42) | |
int_model.fit(X_train, y_train[:, 2]) | |
models['intensity'] = int_model | |
# Calculate model performance | |
pos_pred = pos_model.predict(X_test) | |
int_pred = int_model.predict(X_test) | |
pos_mae = mean_absolute_error(y_test[:, :2], pos_pred) | |
int_mae = mean_absolute_error(y_test[:, 2], int_pred) | |
model_info = f"Position MAE: {pos_mae:.2f}°, Intensity MAE: {int_mae:.2f} kt" | |
return models, model_info | |
except Exception as e: | |
return None, f"Error creating prediction model: {str(e)}" | |
def create_animated_route_visualization(prediction_results, show_uncertainty=True, enable_animation=True): | |
"""Create comprehensive animated route visualization with intensity plots""" | |
try: | |
if 'route_forecast' not in prediction_results or not prediction_results['route_forecast']: | |
return None, "No route forecast data available" | |
route_data = prediction_results['route_forecast'] | |
# Extract data for plotting | |
hours = [point['hour'] for point in route_data] | |
lats = [point['lat'] for point in route_data] | |
lons = [point['lon'] for point in route_data] | |
intensities = [point['intensity_kt'] for point in route_data] | |
categories = [point['category'] for point in route_data] | |
confidences = [point.get('confidence', 0.8) for point in route_data] | |
stages = [point.get('development_stage', 'Unknown') for point in route_data] | |
speeds = [point.get('forward_speed_kmh', 15) for point in route_data] | |
pressures = [point.get('pressure_hpa', 1013) for point in route_data] | |
# Create subplot layout with map and intensity plot | |
fig = make_subplots( | |
rows=2, cols=2, | |
subplot_titles=('Storm Track Animation', 'Wind Speed vs Time', 'Forward Speed vs Time', 'Pressure vs Time'), | |
specs=[[{"type": "geo", "colspan": 2}, None], | |
[{"type": "xy"}, {"type": "xy"}]], | |
vertical_spacing=0.15, | |
row_heights=[0.7, 0.3] | |
) | |
if enable_animation: | |
# Add frames for animation | |
frames = [] | |
# Static background elements first | |
# Add complete track as background | |
fig.add_trace( | |
go.Scattergeo( | |
lon=lons, | |
lat=lats, | |
mode='lines', | |
line=dict(color='lightgray', width=2, dash='dot'), | |
name='Complete Track', | |
showlegend=True, | |
opacity=0.4 | |
), | |
row=1, col=1 | |
) | |
# Genesis marker (always visible) | |
fig.add_trace( | |
go.Scattergeo( | |
lon=[lons[0]], | |
lat=[lats[0]], | |
mode='markers', | |
marker=dict( | |
size=25, | |
color='gold', | |
symbol='star', | |
line=dict(width=3, color='black') | |
), | |
name='Genesis', | |
showlegend=True, | |
hovertemplate=( | |
f"<b>GENESIS</b><br>" | |
f"Position: {lats[0]:.1f}°N, {lons[0]:.1f}°E<br>" | |
f"Initial: {intensities[0]:.0f} kt<br>" | |
f"Region: {prediction_results['genesis_info']['description']}<br>" | |
"<extra></extra>" | |
) | |
), | |
row=1, col=1 | |
) | |
# Create animation frames | |
for i in range(len(route_data)): | |
frame_lons = lons[:i+1] | |
frame_lats = lats[:i+1] | |
frame_intensities = intensities[:i+1] | |
frame_categories = categories[:i+1] | |
frame_hours = hours[:i+1] | |
# Current position marker | |
current_color = enhanced_color_map.get(frame_categories[-1], 'rgb(128,128,128)') | |
current_size = 15 + (frame_intensities[-1] / 10) | |
frame_data = [ | |
# Animated track up to current point | |
go.Scattergeo( | |
lon=frame_lons, | |
lat=frame_lats, | |
mode='lines+markers', | |
line=dict(color='blue', width=4), | |
marker=dict( | |
size=[8 + (intensity/15) for intensity in frame_intensities], | |
color=[enhanced_color_map.get(cat, 'rgb(128,128,128)') for cat in frame_categories], | |
opacity=0.8, | |
line=dict(width=1, color='white') | |
), | |
name='Current Track', | |
showlegend=False | |
), | |
# Current position highlight | |
go.Scattergeo( | |
lon=[frame_lons[-1]], | |
lat=[frame_lats[-1]], | |
mode='markers', | |
marker=dict( | |
size=current_size, | |
color=current_color, | |
symbol='circle', | |
line=dict(width=3, color='white') | |
), | |
name='Current Position', | |
showlegend=False, | |
hovertemplate=( | |
f"<b>Hour {route_data[i]['hour']}</b><br>" | |
f"Position: {lats[i]:.1f}°N, {lons[i]:.1f}°E<br>" | |
f"Intensity: {intensities[i]:.0f} kt<br>" | |
f"Category: {categories[i]}<br>" | |
f"Stage: {stages[i]}<br>" | |
f"Speed: {speeds[i]:.1f} km/h<br>" | |
f"Confidence: {confidences[i]*100:.0f}%<br>" | |
"<extra></extra>" | |
) | |
), | |
# Animated wind plot | |
go.Scatter( | |
x=frame_hours, | |
y=frame_intensities, | |
mode='lines+markers', | |
line=dict(color='red', width=3), | |
marker=dict(size=6, color='red'), | |
name='Wind Speed', | |
showlegend=False, | |
yaxis='y2' | |
), | |
# Animated speed plot | |
go.Scatter( | |
x=frame_hours, | |
y=speeds[:i+1], | |
mode='lines+markers', | |
line=dict(color='green', width=2), | |
marker=dict(size=4, color='green'), | |
name='Forward Speed', | |
showlegend=False, | |
yaxis='y3' | |
), | |
# Animated pressure plot | |
go.Scatter( | |
x=frame_hours, | |
y=pressures[:i+1], | |
mode='lines+markers', | |
line=dict(color='purple', width=2), | |
marker=dict(size=4, color='purple'), | |
name='Pressure', | |
showlegend=False, | |
yaxis='y4' | |
) | |
] | |
frames.append(go.Frame( | |
data=frame_data, | |
name=str(i), | |
layout=go.Layout( | |
title=f"Storm Development Animation - Hour {route_data[i]['hour']}<br>" | |
f"Intensity: {intensities[i]:.0f} kt | Category: {categories[i]} | Stage: {stages[i]} | Speed: {speeds[i]:.1f} km/h" | |
) | |
)) | |
fig.frames = frames | |
# Add play/pause controls | |
fig.update_layout( | |
updatemenus=[ | |
{ | |
"buttons": [ | |
{ | |
"args": [None, {"frame": {"duration": 1000, "redraw": True}, | |
"fromcurrent": True, "transition": {"duration": 300}}], | |
"label": "▶️ Play", | |
"method": "animate" | |
}, | |
{ | |
"args": [[None], {"frame": {"duration": 0, "redraw": True}, | |
"mode": "immediate", "transition": {"duration": 0}}], | |
"label": "⏸️ Pause", | |
"method": "animate" | |
}, | |
{ | |
"args": [None, {"frame": {"duration": 500, "redraw": True}, | |
"fromcurrent": True, "transition": {"duration": 300}}], | |
"label": "⏩ Fast", | |
"method": "animate" | |
} | |
], | |
"direction": "left", | |
"pad": {"r": 10, "t": 87}, | |
"showactive": False, | |
"type": "buttons", | |
"x": 0.1, | |
"xanchor": "right", | |
"y": 0, | |
"yanchor": "top" | |
} | |
], | |
sliders=[{ | |
"active": 0, | |
"yanchor": "top", | |
"xanchor": "left", | |
"currentvalue": { | |
"font": {"size": 16}, | |
"prefix": "Hour: ", | |
"visible": True, | |
"xanchor": "right" | |
}, | |
"transition": {"duration": 300, "easing": "cubic-in-out"}, | |
"pad": {"b": 10, "t": 50}, | |
"len": 0.9, | |
"x": 0.1, | |
"y": 0, | |
"steps": [ | |
{ | |
"args": [[str(i)], {"frame": {"duration": 300, "redraw": True}, | |
"mode": "immediate", "transition": {"duration": 300}}], | |
"label": f"H{route_data[i]['hour']}", | |
"method": "animate" | |
} | |
for i in range(0, len(route_data), max(1, len(route_data)//20)) # Limit slider steps | |
] | |
}] | |
) | |
else: | |
# Static view with all points | |
# Add genesis marker | |
fig.add_trace( | |
go.Scattergeo( | |
lon=[lons[0]], | |
lat=[lats[0]], | |
mode='markers', | |
marker=dict( | |
size=25, | |
color='gold', | |
symbol='star', | |
line=dict(width=3, color='black') | |
), | |
name='Genesis', | |
showlegend=True, | |
hovertemplate=( | |
f"<b>GENESIS</b><br>" | |
f"Position: {lats[0]:.1f}°N, {lons[0]:.1f}°E<br>" | |
f"Initial: {intensities[0]:.0f} kt<br>" | |
"<extra></extra>" | |
) | |
), | |
row=1, col=1 | |
) | |
# Add full track with intensity coloring | |
for i in range(0, len(route_data), max(1, len(route_data)//50)): # Sample points for performance | |
point = route_data[i] | |
color = enhanced_color_map.get(point['category'], 'rgb(128,128,128)') | |
size = 8 + (point['intensity_kt'] / 12) | |
fig.add_trace( | |
go.Scattergeo( | |
lon=[point['lon']], | |
lat=[point['lat']], | |
mode='markers', | |
marker=dict( | |
size=size, | |
color=color, | |
opacity=point.get('confidence', 0.8), | |
line=dict(width=1, color='white') | |
), | |
name=f"Hour {point['hour']}" if i % 10 == 0 else None, | |
showlegend=(i % 10 == 0), | |
hovertemplate=( | |
f"<b>Hour {point['hour']}</b><br>" | |
f"Position: {point['lat']:.1f}°N, {point['lon']:.1f}°E<br>" | |
f"Intensity: {point['intensity_kt']:.0f} kt<br>" | |
f"Category: {point['category']}<br>" | |
f"Stage: {point.get('development_stage', 'Unknown')}<br>" | |
f"Speed: {point.get('forward_speed_kmh', 15):.1f} km/h<br>" | |
"<extra></extra>" | |
) | |
), | |
row=1, col=1 | |
) | |
# Connect points with track line | |
fig.add_trace( | |
go.Scattergeo( | |
lon=lons, | |
lat=lats, | |
mode='lines', | |
line=dict(color='black', width=3), | |
name='Forecast Track', | |
showlegend=True | |
), | |
row=1, col=1 | |
) | |
# Add static intensity, speed, and pressure plots | |
# Wind speed plot | |
fig.add_trace( | |
go.Scatter( | |
x=hours, | |
y=intensities, | |
mode='lines+markers', | |
line=dict(color='red', width=3), | |
marker=dict(size=6, color='red'), | |
name='Wind Speed', | |
showlegend=False | |
), | |
row=2, col=1 | |
) | |
# Add category threshold lines | |
thresholds = [34, 64, 83, 96, 113, 137] | |
threshold_names = ['TS', 'C1', 'C2', 'C3', 'C4', 'C5'] | |
for thresh, name in zip(thresholds, threshold_names): | |
fig.add_trace( | |
go.Scatter( | |
x=[min(hours), max(hours)], | |
y=[thresh, thresh], | |
mode='lines', | |
line=dict(color='gray', width=1, dash='dash'), | |
name=name, | |
showlegend=False, | |
hovertemplate=f"{name} Threshold: {thresh} kt<extra></extra>" | |
), | |
row=2, col=1 | |
) | |
# Forward speed plot | |
fig.add_trace( | |
go.Scatter( | |
x=hours, | |
y=speeds, | |
mode='lines+markers', | |
line=dict(color='green', width=2), | |
marker=dict(size=4, color='green'), | |
name='Forward Speed', | |
showlegend=False | |
), | |
row=2, col=2 | |
) | |
# Add uncertainty cone if requested | |
if show_uncertainty and len(route_data) > 1: | |
uncertainty_lats_upper = [] | |
uncertainty_lats_lower = [] | |
uncertainty_lons_upper = [] | |
uncertainty_lons_lower = [] | |
for i, point in enumerate(route_data): | |
# Uncertainty grows with time and decreases with confidence | |
base_uncertainty = 0.4 + (i / len(route_data)) * 1.8 | |
confidence_factor = point.get('confidence', 0.8) | |
uncertainty = base_uncertainty / confidence_factor | |
uncertainty_lats_upper.append(point['lat'] + uncertainty) | |
uncertainty_lats_lower.append(point['lat'] - uncertainty) | |
uncertainty_lons_upper.append(point['lon'] + uncertainty) | |
uncertainty_lons_lower.append(point['lon'] - uncertainty) | |
uncertainty_lats = uncertainty_lats_upper + uncertainty_lats_lower[::-1] | |
uncertainty_lons = uncertainty_lons_upper + uncertainty_lons_lower[::-1] | |
fig.add_trace( | |
go.Scattergeo( | |
lon=uncertainty_lons, | |
lat=uncertainty_lats, | |
mode='lines', | |
fill='toself', | |
fillcolor='rgba(128,128,128,0.15)', | |
line=dict(color='rgba(128,128,128,0.4)', width=1), | |
name='Uncertainty Cone', | |
showlegend=True | |
), | |
row=1, col=1 | |
) | |
# Enhanced layout | |
fig.update_layout( | |
title=f"Comprehensive Storm Development Analysis<br><sub>Starting from {prediction_results['genesis_info']['description']}</sub>", | |
height=1000, # Taller for better subplot visibility | |
width=1400, # Wider | |
showlegend=True | |
) | |
# Update geo layout | |
fig.update_geos( | |
projection_type="natural earth", | |
showland=True, | |
landcolor="LightGray", | |
showocean=True, | |
oceancolor="LightBlue", | |
showcoastlines=True, | |
coastlinecolor="DarkGray", | |
showlakes=True, | |
lakecolor="LightBlue", | |
center=dict(lat=np.mean(lats), lon=np.mean(lons)), | |
projection_scale=2.0, | |
row=1, col=1 | |
) | |
# Update subplot axes | |
fig.update_xaxes(title_text="Forecast Hour", row=2, col=1) | |
fig.update_yaxes(title_text="Wind Speed (kt)", row=2, col=1) | |
fig.update_xaxes(title_text="Forecast Hour", row=2, col=2) | |
fig.update_yaxes(title_text="Forward Speed (km/h)", row=2, col=2) | |
# Generate enhanced forecast text | |
current = prediction_results['current_prediction'] | |
genesis_info = prediction_results['genesis_info'] | |
# Calculate some statistics | |
max_intensity = max(intensities) | |
max_intensity_time = hours[intensities.index(max_intensity)] | |
avg_speed = np.mean(speeds) | |
forecast_text = f""" | |
COMPREHENSIVE STORM DEVELOPMENT FORECAST | |
{'='*65} | |
GENESIS CONDITIONS: | |
• Region: {current.get('genesis_region', 'Unknown')} | |
• Description: {genesis_info['description']} | |
• Starting Position: {lats[0]:.1f}°N, {lons[0]:.1f}°E | |
• Initial Intensity: {current['intensity_kt']:.0f} kt (Tropical Depression) | |
• Genesis Pressure: {current.get('pressure_hpa', 1008):.0f} hPa | |
STORM CHARACTERISTICS: | |
• Peak Intensity: {max_intensity:.0f} kt at Hour {max_intensity_time} | |
• Average Forward Speed: {avg_speed:.1f} km/h | |
• Total Distance: {sum([speeds[i]/6 for i in range(len(speeds))]):.0f} km | |
• Final Position: {lats[-1]:.1f}°N, {lons[-1]:.1f}°E | |
• Forecast Duration: {hours[-1]} hours ({hours[-1]/24:.1f} days) | |
DEVELOPMENT TIMELINE: | |
• Hour 0 (Genesis): {intensities[0]:.0f} kt - {categories[0]} | |
• Hour 24: {intensities[min(4, len(intensities)-1)]:.0f} kt - {categories[min(4, len(categories)-1)]} | |
• Hour 48: {intensities[min(8, len(intensities)-1)]:.0f} kt - {categories[min(8, len(categories)-1)]} | |
• Hour 72: {intensities[min(12, len(intensities)-1)]:.0f} kt - {categories[min(12, len(categories)-1)]} | |
• Final: {intensities[-1]:.0f} kt - {categories[-1]} | |
MOTION ANALYSIS: | |
• Initial Motion: {speeds[0]:.1f} km/h | |
• Peak Speed: {max(speeds):.1f} km/h at Hour {hours[speeds.index(max(speeds))]} | |
• Final Motion: {speeds[-1]:.1f} km/h | |
CONFIDENCE ASSESSMENT: | |
• Genesis Likelihood: {prediction_results['confidence_scores'].get('genesis', 0.85)*100:.0f}% | |
• 24-hour Track: {prediction_results['confidence_scores'].get('position_24h', 0.85)*100:.0f}% | |
• 48-hour Track: {prediction_results['confidence_scores'].get('position_48h', 0.75)*100:.0f}% | |
• 72-hour Track: {prediction_results['confidence_scores'].get('position_72h', 0.65)*100:.0f}% | |
• Long-term: {prediction_results['confidence_scores'].get('long_term', 0.50)*100:.0f}% | |
FEATURES: | |
{"✅ Animation Enabled - Use controls to watch development" if enable_animation else "📊 Static Analysis - All time steps displayed"} | |
✅ Realistic Forward Speeds (15-25 km/h typical) | |
✅ Environmental Coupling (ENSO, SST, Shear) | |
✅ Multi-stage Development Cycle | |
✅ Uncertainty Quantification | |
MODEL: {prediction_results['model_info']} | |
""" | |
return fig, forecast_text.strip() | |
except Exception as e: | |
error_msg = f"Error creating comprehensive visualization: {str(e)}" | |
logging.error(error_msg) | |
import traceback | |
traceback.print_exc() | |
return None, error_msg | |
# ----------------------------- | |
# Regression Functions (Original) | |
# ----------------------------- | |
def perform_wind_regression(start_year, start_month, end_year, end_month): | |
"""Perform wind regression analysis""" | |
start_date = datetime(start_year, start_month, 1) | |
end_date = datetime(end_year, end_month, 28) | |
data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['USA_WIND','ONI']) | |
data['severe_typhoon'] = (data['USA_WIND']>=64).astype(int) | |
X = sm.add_constant(data['ONI']) | |
y = data['severe_typhoon'] | |
try: | |
model = sm.Logit(y, X).fit(disp=0) | |
beta_1 = model.params['ONI'] | |
exp_beta_1 = np.exp(beta_1) | |
p_value = model.pvalues['ONI'] | |
return f"Wind Regression: β1={beta_1:.4f}, Odds Ratio={exp_beta_1:.4f}, P-value={p_value:.4f}" | |
except Exception as e: | |
return f"Wind Regression Error: {e}" | |
def perform_pressure_regression(start_year, start_month, end_year, end_month): | |
"""Perform pressure regression analysis""" | |
start_date = datetime(start_year, start_month, 1) | |
end_date = datetime(end_year, end_month, 28) | |
data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['USA_PRES','ONI']) | |
data['intense_typhoon'] = (data['USA_PRES']<=950).astype(int) | |
X = sm.add_constant(data['ONI']) | |
y = data['intense_typhoon'] | |
try: | |
model = sm.Logit(y, X).fit(disp=0) | |
beta_1 = model.params['ONI'] | |
exp_beta_1 = np.exp(beta_1) | |
p_value = model.pvalues['ONI'] | |
return f"Pressure Regression: β1={beta_1:.4f}, Odds Ratio={exp_beta_1:.4f}, P-value={p_value:.4f}" | |
except Exception as e: | |
return f"Pressure Regression Error: {e}" | |
def perform_longitude_regression(start_year, start_month, end_year, end_month): | |
"""Perform longitude regression analysis""" | |
start_date = datetime(start_year, start_month, 1) | |
end_date = datetime(end_year, end_month, 28) | |
data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['LON','ONI']) | |
data['western_typhoon'] = (data['LON']<=140).astype(int) | |
X = sm.add_constant(data['ONI']) | |
y = data['western_typhoon'] | |
try: | |
model = sm.OLS(y, sm.add_constant(X)).fit() | |
beta_1 = model.params['ONI'] | |
exp_beta_1 = np.exp(beta_1) | |
p_value = model.pvalues['ONI'] | |
return f"Longitude Regression: β1={beta_1:.4f}, Odds Ratio={exp_beta_1:.4f}, P-value={p_value:.4f}" | |
except Exception as e: | |
return f"Longitude Regression Error: {e}" | |
# ----------------------------- | |
# Visualization Functions (Enhanced) | |
# ----------------------------- | |
def get_full_tracks(start_year, start_month, end_year, end_month, enso_phase, typhoon_search): | |
"""Get full typhoon tracks""" | |
start_date = datetime(start_year, start_month, 1) | |
end_date = datetime(end_year, end_month, 28) | |
filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy() | |
filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases) | |
if enso_phase != 'all': | |
filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()] | |
unique_storms = filtered_data['SID'].unique() | |
count = len(unique_storms) | |
fig = go.Figure() | |
for sid in unique_storms: | |
storm_data = typhoon_data[typhoon_data['SID']==sid] | |
if storm_data.empty: | |
continue | |
name = storm_data['NAME'].iloc[0] if pd.notnull(storm_data['NAME'].iloc[0]) else "Unnamed" | |
basin = storm_data['SID'].iloc[0][:2] | |
storm_oni = filtered_data[filtered_data['SID']==sid]['ONI'].iloc[0] | |
color = 'red' if storm_oni>=0.5 else ('blue' if storm_oni<=-0.5 else 'green') | |
fig.add_trace(go.Scattergeo( | |
lon=storm_data['LON'], lat=storm_data['LAT'], mode='lines', | |
name=f"{name} ({basin})", | |
line=dict(width=1.5, color=color), hoverinfo="name" | |
)) | |
if typhoon_search: | |
search_mask = typhoon_data['NAME'].str.contains(typhoon_search, case=False, na=False) | |
if search_mask.any(): | |
for sid in typhoon_data[search_mask]['SID'].unique(): | |
storm_data = typhoon_data[typhoon_data['SID']==sid] | |
fig.add_trace(go.Scattergeo( | |
lon=storm_data['LON'], lat=storm_data['LAT'], mode='lines+markers', | |
name=f"MATCHED: {storm_data['NAME'].iloc[0]}", | |
line=dict(width=3, color='yellow'), | |
marker=dict(size=5), hoverinfo="name" | |
)) | |
fig.update_layout( | |
title=f"Typhoon Tracks ({start_year}-{start_month} to {end_year}-{end_month})", | |
geo=dict( | |
projection_type='natural earth', | |
showland=True, | |
showcoastlines=True, | |
landcolor='rgb(243,243,243)', | |
countrycolor='rgb(204,204,204)', | |
coastlinecolor='rgb(204,204,204)', | |
center=dict(lon=140, lat=20), | |
projection_scale=3 | |
), | |
legend_title="Typhoons by ENSO Phase", | |
showlegend=True, | |
height=700 | |
) | |
fig.add_annotation( | |
x=0.02, y=0.98, xref="paper", yref="paper", | |
text="Red: El Niño, Blue: La Nina, Green: Neutral", | |
showarrow=False, align="left", | |
bgcolor="rgba(255,255,255,0.8)" | |
) | |
return fig, f"Total typhoons displayed: {count}" | |
def get_wind_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search): | |
"""Get wind analysis with enhanced categorization""" | |
start_date = datetime(start_year, start_month, 1) | |
end_date = datetime(end_year, end_month, 28) | |
filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy() | |
filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases) | |
if enso_phase != 'all': | |
filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()] | |
fig = px.scatter(filtered_data, x='ONI', y='USA_WIND', color='Category', | |
hover_data=['NAME','Year','Category'], | |
title='Wind Speed vs ONI', | |
labels={'ONI':'ONI Value','USA_WIND':'Max Wind Speed (knots)'}, | |
color_discrete_map=enhanced_color_map) | |
if typhoon_search: | |
mask = filtered_data['NAME'].str.contains(typhoon_search, case=False, na=False) | |
if mask.any(): | |
fig.add_trace(go.Scatter( | |
x=filtered_data.loc[mask,'ONI'], y=filtered_data.loc[mask,'USA_WIND'], | |
mode='markers', marker=dict(size=10, color='red', symbol='star'), | |
name=f'Matched: {typhoon_search}', | |
text=filtered_data.loc[mask,'NAME']+' ('+filtered_data.loc[mask,'Year'].astype(str)+')' | |
)) | |
regression = perform_wind_regression(start_year, start_month, end_year, end_month) | |
return fig, regression | |
def get_pressure_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search): | |
"""Get pressure analysis with enhanced categorization""" | |
start_date = datetime(start_year, start_month, 1) | |
end_date = datetime(end_year, end_month, 28) | |
filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy() | |
filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases) | |
if enso_phase != 'all': | |
filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()] | |
fig = px.scatter(filtered_data, x='ONI', y='USA_PRES', color='Category', | |
hover_data=['NAME','Year','Category'], | |
title='Pressure vs ONI', | |
labels={'ONI':'ONI Value','USA_PRES':'Min Pressure (hPa)'}, | |
color_discrete_map=enhanced_color_map) | |
if typhoon_search: | |
mask = filtered_data['NAME'].str.contains(typhoon_search, case=False, na=False) | |
if mask.any(): | |
fig.add_trace(go.Scatter( | |
x=filtered_data.loc[mask,'ONI'], y=filtered_data.loc[mask,'USA_PRES'], | |
mode='markers', marker=dict(size=10, color='red', symbol='star'), | |
name=f'Matched: {typhoon_search}', | |
text=filtered_data.loc[mask,'NAME']+' ('+filtered_data.loc[mask,'Year'].astype(str)+')' | |
)) | |
regression = perform_pressure_regression(start_year, start_month, end_year, end_month) | |
return fig, regression | |
def get_longitude_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search): | |
"""Get longitude analysis""" | |
start_date = datetime(start_year, start_month, 1) | |
end_date = datetime(end_year, end_month, 28) | |
filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy() | |
filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases) | |
if enso_phase != 'all': | |
filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()] | |
fig = px.scatter(filtered_data, x='LON', y='ONI', hover_data=['NAME'], | |
title='Typhoon Generation Longitude vs ONI (All Years)') | |
if len(filtered_data) > 1: | |
X = np.array(filtered_data['LON']).reshape(-1,1) | |
y = filtered_data['ONI'] | |
try: | |
model = sm.OLS(y, sm.add_constant(X)).fit() | |
y_pred = model.predict(sm.add_constant(X)) | |
fig.add_trace(go.Scatter(x=filtered_data['LON'], y=y_pred, mode='lines', name='Regression Line')) | |
slope = model.params[1] | |
slopes_text = f"All Years Slope: {slope:.4f}" | |
except Exception as e: | |
slopes_text = f"Regression Error: {e}" | |
else: | |
slopes_text = "Insufficient data for regression" | |
regression = perform_longitude_regression(start_year, start_month, end_year, end_month) | |
return fig, slopes_text, regression | |
# ----------------------------- | |
# ENHANCED: Animation Functions with Taiwan Standard Support - FIXED VERSION | |
# ----------------------------- | |
def get_available_years(typhoon_data): | |
"""Get all available years including 2025 - with error handling""" | |
try: | |
if typhoon_data is None or typhoon_data.empty: | |
return [str(year) for year in range(2000, 2026)] | |
if 'ISO_TIME' in typhoon_data.columns: | |
years = typhoon_data['ISO_TIME'].dt.year.dropna().unique() | |
elif 'SEASON' in typhoon_data.columns: | |
years = typhoon_data['SEASON'].dropna().unique() | |
else: | |
years = range(2000, 2026) # Default range including 2025 | |
# Convert to strings and sort | |
year_strings = sorted([str(int(year)) for year in years if not pd.isna(year)]) | |
# Ensure we have at least some years | |
if not year_strings: | |
return [str(year) for year in range(2000, 2026)] | |
return year_strings | |
except Exception as e: | |
print(f"Error in get_available_years: {e}") | |
return [str(year) for year in range(2000, 2026)] | |
def update_typhoon_options_enhanced(year, basin): | |
"""Enhanced typhoon options with TD support and 2025 data""" | |
try: | |
year = int(year) | |
# Filter by year - handle both ISO_TIME and SEASON columns | |
if 'ISO_TIME' in typhoon_data.columns: | |
year_mask = typhoon_data['ISO_TIME'].dt.year == year | |
elif 'SEASON' in typhoon_data.columns: | |
year_mask = typhoon_data['SEASON'] == year | |
else: | |
# Fallback - try to extract year from SID or other fields | |
year_mask = typhoon_data.index >= 0 # Include all data as fallback | |
year_data = typhoon_data[year_mask].copy() | |
# Filter by basin if specified | |
if basin != "All Basins": | |
basin_code = basin.split(' - ')[0] if ' - ' in basin else basin[:2] | |
if 'SID' in year_data.columns: | |
year_data = year_data[year_data['SID'].str.startswith(basin_code, na=False)] | |
elif 'BASIN' in year_data.columns: | |
year_data = year_data[year_data['BASIN'] == basin_code] | |
if year_data.empty: | |
return gr.update(choices=["No storms found"], value=None) | |
# Get unique storms - include ALL intensities (including TD) | |
storms = year_data.groupby('SID').agg({ | |
'NAME': 'first', | |
'USA_WIND': 'max' | |
}).reset_index() | |
# Enhanced categorization including TD | |
storms['category'] = storms['USA_WIND'].apply(categorize_typhoon_enhanced) | |
# Create options with category information | |
options = [] | |
for _, storm in storms.iterrows(): | |
name = storm['NAME'] if pd.notna(storm['NAME']) and storm['NAME'] != '' else 'UNNAMED' | |
sid = storm['SID'] | |
category = storm['category'] | |
max_wind = storm['USA_WIND'] if pd.notna(storm['USA_WIND']) else 0 | |
option = f"{name} ({sid}) - {category} ({max_wind:.0f}kt)" | |
options.append(option) | |
if not options: | |
return gr.update(choices=["No storms found"], value=None) | |
return gr.update(choices=sorted(options), value=options[0]) | |
except Exception as e: | |
print(f"Error in update_typhoon_options_enhanced: {e}") | |
return gr.update(choices=["Error loading storms"], value=None) | |
def generate_enhanced_track_video_fixed(year, typhoon_selection, standard): | |
"""FIXED: Enhanced track video generation with working animation display""" | |
if not typhoon_selection or typhoon_selection == "No storms found": | |
return None | |
try: | |
# Extract SID from selection | |
sid = typhoon_selection.split('(')[1].split(')')[0] | |
# Get storm data | |
storm_df = typhoon_data[typhoon_data['SID'] == sid].copy() | |
if storm_df.empty: | |
print(f"No data found for storm {sid}") | |
return None | |
# Sort by time | |
if 'ISO_TIME' in storm_df.columns: | |
storm_df = storm_df.sort_values('ISO_TIME') | |
# Extract data for animation | |
lats = storm_df['LAT'].astype(float).values | |
lons = storm_df['LON'].astype(float).values | |
if 'USA_WIND' in storm_df.columns: | |
winds = pd.to_numeric(storm_df['USA_WIND'], errors='coerce').fillna(0).values | |
else: | |
winds = np.full(len(lats), 30) | |
# Enhanced metadata | |
storm_name = storm_df['NAME'].iloc[0] if pd.notna(storm_df['NAME'].iloc[0]) else "UNNAMED" | |
season = storm_df['SEASON'].iloc[0] if 'SEASON' in storm_df.columns else year | |
print(f"Generating FIXED video for {storm_name} ({sid}) with {len(lats)} track points using {standard} standard") | |
# FIXED: Create figure with proper cartopy setup | |
fig = plt.figure(figsize=(16, 10)) | |
ax = plt.axes(projection=ccrs.PlateCarree()) | |
# Enhanced map features | |
ax.stock_img() | |
ax.add_feature(cfeature.COASTLINE, linewidth=0.8) | |
ax.add_feature(cfeature.BORDERS, linewidth=0.5) | |
ax.add_feature(cfeature.OCEAN, color='lightblue', alpha=0.5) | |
ax.add_feature(cfeature.LAND, color='lightgray', alpha=0.5) | |
# Set extent based on track | |
padding = 5 | |
ax.set_extent([ | |
min(lons) - padding, max(lons) + padding, | |
min(lats) - padding, max(lats) + padding | |
]) | |
# Add gridlines | |
gl = ax.gridlines(draw_labels=True, alpha=0.3) | |
gl.top_labels = gl.right_labels = False | |
# Title | |
ax.set_title(f"{season} {storm_name} ({sid}) Track Animation - {standard.upper()} Standard", | |
fontsize=18, fontweight='bold') | |
# FIXED: Animation elements - proper initialization with cartopy transforms | |
# Initialize empty line for track with correct transform | |
track_line, = ax.plot([], [], 'b-', linewidth=3, alpha=0.7, | |
label='Track', transform=ccrs.PlateCarree()) | |
# Initialize current position marker | |
current_point, = ax.plot([], [], 'o', markersize=15, | |
transform=ccrs.PlateCarree()) | |
# Historical track points (to show path traversed) | |
history_points, = ax.plot([], [], 'o', markersize=6, alpha=0.4, color='blue', | |
transform=ccrs.PlateCarree()) | |
# Info text box | |
info_box = ax.text(0.02, 0.98, '', transform=ax.transAxes, | |
fontsize=12, verticalalignment='top', | |
bbox=dict(boxstyle="round,pad=0.5", facecolor='white', alpha=0.9)) | |
# FIXED: Color legend with proper categories for both standards | |
legend_elements = [] | |
if standard == 'taiwan': | |
categories = ['Tropical Depression', 'Tropical Storm', 'Severe Tropical Storm', | |
'Typhoon', 'Severe Typhoon', 'Super Typhoon'] | |
for category in categories: | |
color = get_taiwan_color_fixed(category) | |
legend_elements.append(plt.Line2D([0], [0], marker='o', color='w', | |
markerfacecolor=color, markersize=10, label=category)) | |
else: | |
categories = ['Tropical Depression', 'Tropical Storm', 'C1 Typhoon', 'C2 Typhoon', | |
'C3 Strong Typhoon', 'C4 Very Strong Typhoon', 'C5 Super Typhoon'] | |
for category in categories: | |
color = get_matplotlib_color(category) | |
legend_elements.append(plt.Line2D([0], [0], marker='o', color='w', | |
markerfacecolor=color, markersize=10, label=category)) | |
ax.legend(handles=legend_elements, loc='upper right', fontsize=10) | |
# FIXED: Animation function with proper artist updates and cartopy compatibility | |
def animate_fixed(frame): | |
"""Fixed animation function that properly updates tracks with cartopy""" | |
try: | |
if frame >= len(lats): | |
return track_line, current_point, history_points, info_box | |
# FIXED: Update track line up to current frame | |
current_lons = lons[:frame+1] | |
current_lats = lats[:frame+1] | |
# Update the track line data (this is the key fix!) | |
track_line.set_data(current_lons, current_lats) | |
# FIXED: Update historical points (smaller markers showing traversed path) | |
if frame > 0: | |
history_points.set_data(current_lons[:-1], current_lats[:-1]) | |
# FIXED: Update current position with correct categorization | |
current_wind = winds[frame] | |
if standard == 'taiwan': | |
category, color = categorize_typhoon_by_standard_fixed(current_wind, 'taiwan') | |
else: | |
category, color = categorize_typhoon_by_standard_fixed(current_wind, 'atlantic') | |
# Debug for first few frames | |
if frame < 3: | |
print(f"FIXED Frame {frame}: Wind={current_wind:.1f}kt, Category={category}, Color={color}") | |
# Update current position marker | |
current_point.set_data([lons[frame]], [lats[frame]]) | |
current_point.set_color(color) | |
current_point.set_markersize(12 + current_wind/8) | |
# FIXED: Enhanced info display with correct Taiwan wind speed conversion | |
if 'ISO_TIME' in storm_df.columns and frame < len(storm_df): | |
current_time = storm_df.iloc[frame]['ISO_TIME'] | |
time_str = current_time.strftime('%Y-%m-%d %H:%M UTC') if pd.notna(current_time) else 'Unknown' | |
else: | |
time_str = f"Step {frame+1}" | |
# Corrected wind speed display for Taiwan standard | |
if standard == 'taiwan': | |
wind_ms = current_wind * 0.514444 | |
wind_display = f"{current_wind:.0f} kt ({wind_ms:.1f} m/s)" | |
else: | |
wind_display = f"{current_wind:.0f} kt" | |
info_text = ( | |
f"Storm: {storm_name}\n" | |
f"Time: {time_str}\n" | |
f"Position: {lats[frame]:.1f}°N, {lons[frame]:.1f}°E\n" | |
f"Max Wind: {wind_display}\n" | |
f"Category: {category}\n" | |
f"Standard: {standard.upper()}\n" | |
f"Frame: {frame+1}/{len(lats)}" | |
) | |
info_box.set_text(info_text) | |
# FIXED: Return all modified artists (crucial for proper display) | |
return track_line, current_point, history_points, info_box | |
except Exception as e: | |
print(f"Error in animate frame {frame}: {e}") | |
return track_line, current_point, history_points, info_box | |
# FIXED: Create animation with cartopy-compatible settings | |
# Key fixes: blit=False (crucial for cartopy), proper interval | |
anim = animation.FuncAnimation( | |
fig, animate_fixed, frames=len(lats), | |
interval=600, blit=False, repeat=True # blit=False is essential for cartopy! | |
) | |
# Save animation with optimized settings | |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4', | |
dir=tempfile.gettempdir()) | |
# FIXED: Writer settings optimized for track visibility | |
writer = animation.FFMpegWriter( | |
fps=2, bitrate=3000, codec='libx264', # Slower FPS for better track visibility | |
extra_args=['-pix_fmt', 'yuv420p'] | |
) | |
print(f"Saving FIXED animation to {temp_file.name}") | |
anim.save(temp_file.name, writer=writer, dpi=120) | |
plt.close(fig) | |
print(f"FIXED video generated successfully: {temp_file.name}") | |
return temp_file.name | |
except Exception as e: | |
print(f"Error generating FIXED video: {e}") | |
import traceback | |
traceback.print_exc() | |
return None | |
# FIXED: Update the simplified wrapper function | |
def simplified_track_video_fixed(year, basin, typhoon, standard): | |
"""Simplified track video function with FIXED animation and Taiwan classification""" | |
if not typhoon: | |
return None | |
return generate_enhanced_track_video_fixed(year, typhoon, standard) | |
# ----------------------------- | |
# Enhanced Gradio Interface with Oceanic Data Integration | |
# ----------------------------- | |
def generate_enhanced_environmental_forecast_text(results, base_forecast_text): | |
"""Generate enhanced forecast text with environmental details""" | |
try: | |
current = results['current_prediction'] | |
env_data = results['environmental_data'] | |
route_forecast = results['route_forecast'] | |
# Environmental analysis | |
env_analysis_text = f""" | |
ENHANCED ENVIRONMENTAL ANALYSIS | |
{'='*65} | |
REAL-TIME OCEANIC CONDITIONS: | |
• SST Data Source: {env_data.get('sst_source', 'Unknown')} | |
• SLP Data Source: {env_data.get('slp_source', 'Unknown')} | |
• Real-time Integration: {'✅ Active' if env_data.get('use_real_data', False) else '❌ Climatological Fallback'} | |
ENVIRONMENTAL POTENTIAL ANALYSIS: | |
• Genesis Potential: {current.get('environmental_potential', 'Unknown')} kt | |
• Environmental Favorability: {current.get('environmental_favorability', 'Unknown')} | |
• SST Contribution: {current.get('sst_contribution', 0):+.1f} kt | |
• Current Environmental Limit: {current.get('environmental_potential', 50):.0f} kt | |
TRACK-POINT ENVIRONMENTAL CONDITIONS: | |
""" | |
# Add sample of environmental conditions along track | |
if route_forecast and len(route_forecast) > 0: | |
sample_points = [0, len(route_forecast)//4, len(route_forecast)//2, | |
3*len(route_forecast)//4, len(route_forecast)-1] | |
for i in sample_points: | |
if i < len(route_forecast): | |
point = route_forecast[i] | |
env_analysis_text += f""" | |
• Hour {point['hour']}: | |
- Position: {point['lat']:.1f}°N, {point['lon']:.1f}°E | |
- Intensity: {point['intensity_kt']:.0f} kt (Limit: {point.get('environmental_limit', 'N/A')} kt) | |
- SST: {point.get('sst_celsius', 'N/A'):.1f}°C | SLP: {point.get('slp_hpa', 'N/A'):.0f} hPa | |
- Development Stage: {point['development_stage']} | |
- Tendency: {point.get('intensity_tendency', 0):+.1f} kt/6hr""" | |
env_analysis_text += f""" | |
OCEANIC DATA QUALITY ASSESSMENT: | |
• Position Confidence: {results['confidence_scores'].get('position_72h', 0.5)*100:.0f}% (72hr) | |
• Intensity Confidence: {results['confidence_scores'].get('intensity_72h', 0.5)*100:.0f}% (72hr) | |
• Environmental Coupling: {results['confidence_scores'].get('environmental_coupling', 0.5)*100:.0f}% | |
TECHNICAL IMPLEMENTATION: | |
• Model: {results['model_info']} | |
• Data Protocols: ERDDAP (SST) + OPeNDAP (SLP) | |
• Spatial Interpolation: Linear with nearest-neighbor fallback | |
• Physics: Emanuel potential intensity + environmental coupling | |
""" | |
return base_forecast_text + env_analysis_text | |
except Exception as e: | |
logging.error(f"Error generating enhanced forecast text: {e}") | |
return base_forecast_text + f"\n\nError in environmental analysis: {str(e)}" | |
# ----------------------------- | |
# Load & Process Data | |
# ----------------------------- | |
# Global variables initialization | |
oni_data = None | |
typhoon_data = None | |
merged_data = None | |
def initialize_data(): | |
"""Initialize all data safely""" | |
global oni_data, typhoon_data, merged_data, oceanic_manager | |
try: | |
logging.info("Starting data loading process...") | |
# Initialize oceanic manager | |
oceanic_manager = OceanicDataManager() | |
update_oni_data() | |
oni_data, typhoon_data = load_data_fixed(ONI_DATA_PATH, TYPHOON_DATA_PATH) | |
if oni_data is not None and typhoon_data is not None: | |
oni_long = process_oni_data(oni_data) | |
typhoon_max = process_typhoon_data(typhoon_data) | |
merged_data = merge_data(oni_long, typhoon_max) | |
logging.info("Data loading complete.") | |
else: | |
logging.error("Failed to load required data") | |
# Create minimal fallback data | |
oni_data = pd.DataFrame({'Year': [2000], 'Jan': [0], 'Feb': [0], 'Mar': [0], 'Apr': [0], | |
'May': [0], 'Jun': [0], 'Jul': [0], 'Aug': [0], 'Sep': [0], | |
'Oct': [0], 'Nov': [0], 'Dec': [0]}) | |
typhoon_data = create_fallback_typhoon_data() | |
oni_long = process_oni_data(oni_data) | |
typhoon_max = process_typhoon_data(typhoon_data) | |
merged_data = merge_data(oni_long, typhoon_max) | |
except Exception as e: | |
logging.error(f"Error during data initialization: {e}") | |
# Create minimal fallback data | |
oni_data = pd.DataFrame({'Year': [2000], 'Jan': [0], 'Feb': [0], 'Mar': [0], 'Apr': [0], | |
'May': [0], 'Jun': [0], 'Jul': [0], 'Aug': [0], 'Sep': [0], | |
'Oct': [0], 'Nov': [0], 'Dec': [0]}) | |
typhoon_data = create_fallback_typhoon_data() | |
oni_long = process_oni_data(oni_data) | |
typhoon_max = process_typhoon_data(typhoon_data) | |
merged_data = merge_data(oni_long, typhoon_max) | |
def create_interface(): | |
"""Create the enhanced Gradio interface with oceanic data integration""" | |
try: | |
# Ensure data is available | |
if oni_data is None or typhoon_data is None or merged_data is None: | |
logging.warning("Data not properly loaded, creating minimal interface") | |
return create_minimal_fallback_interface() | |
# Get safe data statistics | |
try: | |
total_storms = len(typhoon_data['SID'].unique()) if 'SID' in typhoon_data.columns else 0 | |
total_records = len(typhoon_data) | |
available_years = get_available_years(typhoon_data) | |
year_range_display = f"{available_years[0]} - {available_years[-1]}" if available_years else "Unknown" | |
except Exception as e: | |
logging.error(f"Error getting data statistics: {e}") | |
total_storms = 0 | |
total_records = 0 | |
year_range_display = "Unknown" | |
available_years = [str(year) for year in range(2000, 2026)] | |
with gr.Blocks(title="Enhanced Typhoon Analysis Platform with Oceanic Data", theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# 🌊 Enhanced Typhoon Analysis Platform with Real-time Oceanic Data") | |
gr.Markdown("**Advanced ML clustering, real-time SST/SLP integration, route predictions, and comprehensive tropical cyclone analysis**") | |
with gr.Tab("🏠 Overview"): | |
overview_text = f""" | |
## 🌊 Welcome to the Enhanced Typhoon Analysis Dashboard with Oceanic Coupling | |
This dashboard provides comprehensive analysis of typhoon data with **real-time oceanic data integration** for unprecedented forecast accuracy. | |
### 🚀 NEW Oceanic Data Features: | |
- **🌊 Real-time SST Data**: NOAA OISST v2 Sea Surface Temperature via ERDDAP | |
- **🌡️ Real-time SLP Data**: NCEP/NCAR Sea Level Pressure via OPeNDAP | |
- **🔄 Dynamic Environmental Coupling**: Live oceanic conditions drive intensity predictions | |
- **📊 Historical Environmental Analysis**: Past storm-environment relationships inform predictions | |
- **🎯 Environmental Potential Index**: Real-time calculation of maximum possible intensity | |
- **🌍 Global Data Coverage**: Automatic fallback to climatology when real-time data unavailable | |
### 📊 Enhanced Capabilities: | |
- **Environmental Intensity Modeling**: SST-driven maximum potential intensity calculations | |
- **Dynamic Steering**: SLP-based atmospheric steering patterns | |
- **ENSO-Environment Coupling**: Combined ENSO and oceanic state influences | |
- **Uncertainty Quantification**: Data quality-based confidence scoring | |
- **Multi-source Integration**: Seamless blending of real-time and climatological data | |
### 📊 Data Status: | |
- **ONI Data**: {len(oni_data)} years loaded | |
- **Typhoon Data**: {total_records:,} records loaded | |
- **Oceanic Data Sources**: NOAA OISST v2 + NCEP/NCAR Reanalysis | |
- **Available Years**: {year_range_display} | |
### 🔧 Technical Infrastructure: | |
- **Real-time Data Access**: xarray + OPeNDAP + ERDDAP protocols | |
- **Environmental Interpolation**: Spatial interpolation to storm locations | |
- **Physics-based Modeling**: Emanuel potential intensity theory implementation | |
- **Fallback Systems**: Robust climatological backup when real-time data unavailable | |
### 🔬 Scientific Accuracy: | |
- **SST-Intensity Relationship**: Based on latest tropical cyclone research | |
- **Shear Parameterization**: ENSO and seasonal wind shear modeling | |
- **Genesis Climatology**: Realistic development regions and frequencies | |
- **Track Forecasting**: Environmental steering with oceanic state dependencies | |
""" | |
gr.Markdown(overview_text) | |
with gr.Tab("🌊 Real-time Oceanic Storm Prediction"): | |
gr.Markdown("## 🌊 Advanced Storm Development with Live Oceanic Data") | |
gr.Markdown(""" | |
### 🔥 Revolutionary Features: | |
- **🌊 Live SST Integration**: Current sea surface temperatures from NOAA satellites | |
- **🌡️ Real-time SLP Data**: Current atmospheric pressure from global reanalysis | |
- **🎯 Environmental Potential**: Real-time calculation of maximum storm intensity | |
- **📈 Historical Learning**: Past storm-environment relationships guide predictions | |
- **🌍 Global Coverage**: Automatic data fetching with intelligent fallbacks | |
""") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
gr.Markdown("### 🌊 Genesis & Environmental Configuration") | |
genesis_options = list(get_realistic_genesis_locations().keys()) | |
genesis_region = gr.Dropdown( | |
choices=genesis_options, | |
value="Western Pacific Main Development Region", | |
label="🌊 Typhoon Genesis Region", | |
info="Climatologically realistic development regions" | |
) | |
# Enhanced environmental controls | |
with gr.Row(): | |
use_real_oceanic = gr.Checkbox( | |
label="🌊 Use Real-time Oceanic Data", | |
value=True, | |
info="Fetch live SST/SLP data (may take 10-30 seconds)" | |
) | |
show_environmental_details = gr.Checkbox( | |
label="📊 Show Environmental Analysis", | |
value=True, | |
info="Display detailed environmental breakdown" | |
) | |
# Display selected region info with real-time data status | |
def update_genesis_info_enhanced(region): | |
locations = get_realistic_genesis_locations() | |
if region in locations: | |
info = locations[region] | |
base_info = f"📍 Location: {info['lat']:.1f}°N, {info['lon']:.1f}°E\n📝 {info['description']}" | |
# Add climatological information | |
clim_sst = get_climatological_sst(info['lat'], info['lon'], 9) # September | |
env_potential = calculate_environmental_intensity_potential( | |
info['lat'], info['lon'], 9, 0.0, None, None | |
) | |
enhanced_info = ( | |
f"{base_info}\n" | |
f"🌡️ Climatological SST: {clim_sst:.1f}°C\n" | |
f"⚡ Environmental Potential: {env_potential['potential_intensity']:.0f} kt" | |
) | |
return enhanced_info | |
return "Select a genesis region" | |
genesis_info_display = gr.Textbox( | |
label="Selected Region Analysis", | |
lines=4, | |
interactive=False, | |
value=update_genesis_info_enhanced("Western Pacific Main Development Region") | |
) | |
genesis_region.change( | |
fn=update_genesis_info_enhanced, | |
inputs=[genesis_region], | |
outputs=[genesis_info_display] | |
) | |
with gr.Row(): | |
pred_month = gr.Slider( | |
1, 12, label="Month", value=9, | |
info="Peak season: Jul-Oct (affects SST/shear patterns)" | |
) | |
pred_oni = gr.Number( | |
label="ONI Value", value=0.0, | |
info="Current ENSO state (-3 to 3, affects oceanic patterns)" | |
) | |
with gr.Row(): | |
forecast_hours = gr.Number( | |
label="Forecast Length (hours)", | |
value=72, | |
minimum=24, | |
maximum=240, | |
step=6, | |
info="Extended forecasting with environmental evolution" | |
) | |
advanced_physics = gr.Checkbox( | |
label="Advanced Environmental Physics", | |
value=True, | |
info="Full SST-intensity coupling and wind shear modeling" | |
) | |
with gr.Row(): | |
show_uncertainty = gr.Checkbox( | |
label="Environmental Uncertainty Cone", | |
value=True, | |
info="Uncertainty based on data quality and environmental variability" | |
) | |
enable_animation = gr.Checkbox( | |
label="Animated Development", | |
value=True, | |
info="Watch storm-environment interaction evolve" | |
) | |
with gr.Column(scale=1): | |
gr.Markdown("### ⚙️ Oceanic Prediction Controls") | |
predict_oceanic_btn = gr.Button( | |
"🌊 Generate Enhanced Oceanic Forecast", | |
variant="primary", | |
size="lg" | |
) | |
gr.Markdown("### 📊 Environmental Conditions") | |
current_intensity = gr.Number(label="Genesis Intensity (kt)", interactive=False) | |
current_category = gr.Textbox(label="Initial Category", interactive=False) | |
environmental_potential = gr.Number(label="Environmental Potential (kt)", interactive=False) | |
environmental_favorability = gr.Textbox(label="Environmental Favorability", interactive=False) | |
gr.Markdown("### 🔧 Data Sources") | |
sst_data_source = gr.Textbox(label="SST Data Source", interactive=False) | |
slp_data_source = gr.Textbox(label="SLP Data Source", interactive=False) | |
model_confidence = gr.Textbox(label="Model Info", interactive=False) | |
with gr.Row(): | |
route_plot = gr.Plot(label="🗺️ Advanced Oceanic-Coupled Forecast") | |
with gr.Row(): | |
forecast_details = gr.Textbox( | |
label="📋 Comprehensive Environmental Forecast", | |
lines=25, | |
max_lines=30 | |
) | |
def run_oceanic_prediction( | |
region, month, oni, hours, advanced_phys, uncertainty, | |
animation, use_real_data, show_env_details | |
): | |
try: | |
# Run enhanced oceanic prediction | |
results = predict_storm_route_and_intensity_with_oceanic_data( | |
region, month, oni, hours, | |
use_real_data=use_real_data, | |
models=None, | |
enable_animation=animation | |
) | |
# Extract enhanced conditions | |
current = results['current_prediction'] | |
env_data = results['environmental_data'] | |
intensity = current['intensity_kt'] | |
category = current['category'] | |
env_potential = current.get('environmental_potential', 50) | |
env_favorability = current.get('environmental_favorability', 'Unknown') | |
# Data source information | |
sst_source = env_data.get('sst_source', 'Unknown') | |
slp_source = env_data.get('slp_source', 'Unknown') | |
# Create enhanced visualization | |
fig, forecast_text = create_animated_route_visualization( | |
results, uncertainty, animation | |
) | |
# Enhanced forecast text with environmental details | |
if show_env_details: | |
enhanced_forecast_text = generate_enhanced_environmental_forecast_text( | |
results, forecast_text | |
) | |
else: | |
enhanced_forecast_text = forecast_text | |
model_info = f"{results['model_info']}\nReal-time Data: {'Yes' if use_real_data else 'No'}" | |
return ( | |
intensity, | |
category, | |
env_potential, | |
env_favorability, | |
sst_source, | |
slp_source, | |
model_info, | |
fig, | |
enhanced_forecast_text | |
) | |
except Exception as e: | |
error_msg = f"Enhanced oceanic prediction failed: {str(e)}" | |
logging.error(error_msg) | |
import traceback | |
traceback.print_exc() | |
return ( | |
30, "Tropical Depression", 50, "Unknown", | |
"Error", "Error", f"Prediction failed: {str(e)}", | |
None, f"Error generating enhanced forecast: {str(e)}" | |
) | |
predict_oceanic_btn.click( | |
fn=run_oceanic_prediction, | |
inputs=[ | |
genesis_region, pred_month, pred_oni, forecast_hours, | |
advanced_physics, show_uncertainty, enable_animation, | |
use_real_oceanic, show_environmental_details | |
], | |
outputs=[ | |
current_intensity, current_category, environmental_potential, | |
environmental_favorability, sst_data_source, slp_data_source, | |
model_confidence, route_plot, forecast_details | |
] | |
) | |
# Enhanced information section | |
oceanic_info_text = """ | |
### 🌊 Oceanic Data Integration Features: | |
#### 🔥 Real-time Data Sources: | |
- **SST**: NOAA OISST v2 - Daily 0.25° resolution satellite-based sea surface temperatures | |
- **SLP**: NCEP/NCAR Reanalysis - 6-hourly 2.5° resolution atmospheric pressure fields | |
- **Coverage**: Global oceans with 1-2 day latency for most recent conditions | |
- **Protocols**: ERDDAP and OPeNDAP for standardized data access | |
#### 🧠 Environmental Physics: | |
- **Emanuel Potential Intensity**: Theoretical maximum intensity based on thermodynamics | |
- **SST-Intensity Coupling**: Non-linear relationship between sea surface temperature and storm intensity | |
- **Atmospheric Steering**: Sea level pressure gradients drive storm motion patterns | |
- **Wind Shear Modeling**: Vertical wind shear estimation from pressure patterns and ENSO state | |
#### 🎯 Enhanced Accuracy: | |
- **Real-time Environmental Limits**: Current oceanic conditions set maximum achievable intensity | |
- **Dynamic Development**: Storm intensification rate depends on real SST and atmospheric conditions | |
- **Track Steering**: Motion influenced by current pressure patterns rather than climatology alone | |
- **Confidence Scoring**: Higher confidence when real-time data successfully integrated | |
#### 🔄 Fallback Systems: | |
- **Automatic Degradation**: Seamlessly switches to climatology if real-time data unavailable | |
- **Quality Assessment**: Evaluates data completeness and provides appropriate confidence levels | |
- **Hybrid Approach**: Combines real-time data with climatological patterns for optimal accuracy | |
- **Error Handling**: Robust system continues operation even with partial data failures | |
#### 📊 Output Enhancements: | |
- **Environmental Metadata**: Track-point SST, SLP, and environmental limits | |
- **Data Source Tracking**: Clear indication of real-time vs climatological data usage | |
- **Uncertainty Quantification**: Confidence intervals based on data availability and environmental complexity | |
- **Detailed Analysis**: Comprehensive breakdown of environmental factors affecting development | |
""" | |
gr.Markdown(oceanic_info_text) | |
with gr.Tab("🔬 Advanced ML Clustering"): | |
gr.Markdown("## 🎯 Storm Pattern Analysis with Separate Visualizations") | |
gr.Markdown("**Four separate plots: Clustering, Routes, Pressure Evolution, and Wind Evolution**") | |
with gr.Row(): | |
with gr.Column(scale=2): | |
reduction_method = gr.Dropdown( | |
choices=['UMAP', 't-SNE', 'PCA'], | |
value='UMAP' if UMAP_AVAILABLE else 't-SNE', | |
label="🔍 Dimensionality Reduction Method", | |
info="UMAP provides better global structure preservation" | |
) | |
with gr.Column(scale=1): | |
analyze_clusters_btn = gr.Button("🚀 Generate All Cluster Analyses", variant="primary", size="lg") | |
with gr.Row(): | |
with gr.Column(): | |
cluster_plot = gr.Plot(label="📊 Storm Clustering Analysis") | |
with gr.Column(): | |
routes_plot = gr.Plot(label="🗺️ Clustered Storm Routes") | |
with gr.Row(): | |
with gr.Column(): | |
pressure_plot = gr.Plot(label="🌡️ Pressure Evolution by Cluster") | |
with gr.Column(): | |
wind_plot = gr.Plot(label="💨 Wind Speed Evolution by Cluster") | |
with gr.Row(): | |
cluster_stats = gr.Textbox(label="📈 Detailed Cluster Statistics", lines=15, max_lines=20) | |
def run_separate_clustering_analysis(method): | |
try: | |
# Extract features for clustering | |
storm_features = extract_storm_features(typhoon_data) | |
if storm_features is None: | |
return None, None, None, None, "Error: Could not extract storm features" | |
fig_cluster, fig_routes, fig_pressure, fig_wind, stats = create_separate_clustering_plots( | |
storm_features, typhoon_data, method.lower() | |
) | |
return fig_cluster, fig_routes, fig_pressure, fig_wind, stats | |
except Exception as e: | |
import traceback | |
error_details = traceback.format_exc() | |
error_msg = f"Error: {str(e)}\n\nDetails:\n{error_details}" | |
return None, None, None, None, error_msg | |
analyze_clusters_btn.click( | |
fn=run_separate_clustering_analysis, | |
inputs=[reduction_method], | |
outputs=[cluster_plot, routes_plot, pressure_plot, wind_plot, cluster_stats] | |
) | |
with gr.Tab("🗺️ Track Visualization"): | |
with gr.Row(): | |
start_year = gr.Number(label="Start Year", value=2020) | |
start_month = gr.Dropdown(label="Start Month", choices=list(range(1, 13)), value=1) | |
end_year = gr.Number(label="End Year", value=2025) | |
end_month = gr.Dropdown(label="End Month", choices=list(range(1, 13)), value=6) | |
enso_phase = gr.Dropdown(label="ENSO Phase", choices=['all', 'El Nino', 'La Nina', 'Neutral'], value='all') | |
typhoon_search = gr.Textbox(label="Typhoon Search") | |
analyze_btn = gr.Button("Generate Tracks") | |
tracks_plot = gr.Plot() | |
typhoon_count = gr.Textbox(label="Number of Typhoons Displayed") | |
analyze_btn.click( | |
fn=get_full_tracks, | |
inputs=[start_year, start_month, end_year, end_month, enso_phase, typhoon_search], | |
outputs=[tracks_plot, typhoon_count] | |
) | |
with gr.Tab("💨 Wind Analysis"): | |
with gr.Row(): | |
wind_start_year = gr.Number(label="Start Year", value=2020) | |
wind_start_month = gr.Dropdown(label="Start Month", choices=list(range(1, 13)), value=1) | |
wind_end_year = gr.Number(label="End Year", value=2024) | |
wind_end_month = gr.Dropdown(label="End Month", choices=list(range(1, 13)), value=6) | |
wind_enso_phase = gr.Dropdown(label="ENSO Phase", choices=['all', 'El Nino', 'La Nina', 'Neutral'], value='all') | |
wind_typhoon_search = gr.Textbox(label="Typhoon Search") | |
wind_analyze_btn = gr.Button("Generate Wind Analysis") | |
wind_scatter = gr.Plot() | |
wind_regression_results = gr.Textbox(label="Wind Regression Results") | |
wind_analyze_btn.click( | |
fn=get_wind_analysis, | |
inputs=[wind_start_year, wind_start_month, wind_end_year, wind_end_month, wind_enso_phase, wind_typhoon_search], | |
outputs=[wind_scatter, wind_regression_results] | |
) | |
with gr.Tab("🌡️ Pressure Analysis"): | |
with gr.Row(): | |
pressure_start_year = gr.Number(label="Start Year", value=2020) | |
pressure_start_month = gr.Dropdown(label="Start Month", choices=list(range(1, 13)), value=1) | |
pressure_end_year = gr.Number(label="End Year", value=2024) | |
pressure_end_month = gr.Dropdown(label="End Month", choices=list(range(1, 13)), value=6) | |
pressure_enso_phase = gr.Dropdown(label="ENSO Phase", choices=['all', 'El Nino', 'La Nina', 'Neutral'], value='all') | |
pressure_typhoon_search = gr.Textbox(label="Typhoon Search") | |
pressure_analyze_btn = gr.Button("Generate Pressure Analysis") | |
pressure_scatter = gr.Plot() | |
pressure_regression_results = gr.Textbox(label="Pressure Regression Results") | |
pressure_analyze_btn.click( | |
fn=get_pressure_analysis, | |
inputs=[pressure_start_year, pressure_start_month, pressure_end_year, pressure_end_month, pressure_enso_phase, pressure_typhoon_search], | |
outputs=[pressure_scatter, pressure_regression_results] | |
) | |
with gr.Tab("🌏 Longitude Analysis"): | |
with gr.Row(): | |
lon_start_year = gr.Number(label="Start Year", value=2020) | |
lon_start_month = gr.Dropdown(label="Start Month", choices=list(range(1, 13)), value=1) | |
lon_end_year = gr.Number(label="End Year", value=2020) | |
lon_end_month = gr.Dropdown(label="End Month", choices=list(range(1, 13)), value=6) | |
lon_enso_phase = gr.Dropdown(label="ENSO Phase", choices=['all', 'El Nino', 'La Nina', 'Neutral'], value='all') | |
lon_typhoon_search = gr.Textbox(label="Typhoon Search (Optional)") | |
lon_analyze_btn = gr.Button("Generate Longitude Analysis") | |
regression_plot = gr.Plot() | |
slopes_text = gr.Textbox(label="Regression Slopes") | |
lon_regression_results = gr.Textbox(label="Longitude Regression Results") | |
lon_analyze_btn.click( | |
fn=get_longitude_analysis, | |
inputs=[lon_start_year, lon_start_month, lon_end_year, lon_end_month, lon_enso_phase, lon_typhoon_search], | |
outputs=[regression_plot, slopes_text, lon_regression_results] | |
) | |
with gr.Tab("🎬 Enhanced Track Animation"): | |
gr.Markdown("## 🎥 High-Quality Storm Track Visualization (Atlantic & Taiwan Standards)") | |
with gr.Row(): | |
year_dropdown = gr.Dropdown( | |
label="Year", | |
choices=available_years, | |
value=available_years[-1] if available_years else "2024" | |
) | |
basin_dropdown = gr.Dropdown( | |
label="Basin", | |
choices=["All Basins", "WP - Western Pacific", "EP - Eastern Pacific", "NA - North Atlantic"], | |
value="All Basins" | |
) | |
with gr.Row(): | |
typhoon_dropdown = gr.Dropdown(label="Storm Selection (All Categories Including TD)") | |
standard_dropdown = gr.Dropdown( | |
label="🎌 Classification Standard", | |
choices=['atlantic', 'taiwan'], | |
value='atlantic', | |
info="Atlantic: International standard | Taiwan: Local meteorological standard" | |
) | |
generate_video_btn = gr.Button("🎬 Generate Enhanced Animation", variant="primary") | |
video_output = gr.Video(label="Storm Track Animation") | |
# Update storm options when year or basin changes | |
for input_comp in [year_dropdown, basin_dropdown]: | |
input_comp.change( | |
fn=update_typhoon_options_enhanced, | |
inputs=[year_dropdown, basin_dropdown], | |
outputs=[typhoon_dropdown] | |
) | |
# Generate video with fixed function | |
generate_video_btn.click( | |
fn=generate_enhanced_track_video_fixed, | |
inputs=[year_dropdown, typhoon_dropdown, standard_dropdown], | |
outputs=[video_output] | |
) | |
with gr.Tab("📊 Data Statistics & Insights"): | |
gr.Markdown("## 📈 Comprehensive Dataset Analysis") | |
# Create enhanced data summary | |
try: | |
if len(typhoon_data) > 0: | |
# Storm category distribution | |
storm_cats = typhoon_data.groupby('SID')['USA_WIND'].max().apply(categorize_typhoon_enhanced) | |
cat_counts = storm_cats.value_counts() | |
# Create distribution chart with enhanced colors | |
fig_dist = px.bar( | |
x=cat_counts.index, | |
y=cat_counts.values, | |
title="Storm Intensity Distribution (Including Tropical Depressions)", | |
labels={'x': 'Category', 'y': 'Number of Storms'}, | |
color=cat_counts.index, | |
color_discrete_map=enhanced_color_map | |
) | |
# Seasonal distribution | |
if 'ISO_TIME' in typhoon_data.columns: | |
seasonal_data = typhoon_data.copy() | |
seasonal_data['Month'] = seasonal_data['ISO_TIME'].dt.month | |
monthly_counts = seasonal_data.groupby(['Month', 'SID']).size().groupby('Month').size() | |
fig_seasonal = px.bar( | |
x=monthly_counts.index, | |
y=monthly_counts.values, | |
title="Seasonal Storm Distribution", | |
labels={'x': 'Month', 'y': 'Number of Storms'}, | |
color=monthly_counts.values, | |
color_continuous_scale='Viridis' | |
) | |
else: | |
fig_seasonal = None | |
# Basin distribution | |
if 'SID' in typhoon_data.columns: | |
basin_data = typhoon_data['SID'].str[:2].value_counts() | |
fig_basin = px.pie( | |
values=basin_data.values, | |
names=basin_data.index, | |
title="Distribution by Basin" | |
) | |
else: | |
fig_basin = None | |
with gr.Row(): | |
gr.Plot(value=fig_dist) | |
if fig_seasonal: | |
with gr.Row(): | |
gr.Plot(value=fig_seasonal) | |
if fig_basin: | |
with gr.Row(): | |
gr.Plot(value=fig_basin) | |
except Exception as e: | |
gr.Markdown(f"Visualization error: {str(e)}") | |
# Enhanced statistics | |
total_storms = len(typhoon_data['SID'].unique()) if 'SID' in typhoon_data.columns else 0 | |
total_records = len(typhoon_data) | |
if 'SEASON' in typhoon_data.columns: | |
try: | |
min_year = int(typhoon_data['SEASON'].min()) | |
max_year = int(typhoon_data['SEASON'].max()) | |
year_range = f"{min_year}-{max_year}" | |
years_covered = typhoon_data['SEASON'].nunique() | |
except (ValueError, TypeError): | |
year_range = "Unknown" | |
years_covered = 0 | |
else: | |
year_range = "Unknown" | |
years_covered = 0 | |
if 'SID' in typhoon_data.columns: | |
try: | |
basins_available = ', '.join(sorted(typhoon_data['SID'].str[:2].unique())) | |
avg_storms_per_year = total_storms / max(years_covered, 1) | |
except Exception: | |
basins_available = "Unknown" | |
avg_storms_per_year = 0 | |
else: | |
basins_available = "Unknown" | |
avg_storms_per_year = 0 | |
# TD specific statistics | |
try: | |
if 'USA_WIND' in typhoon_data.columns: | |
td_storms = len(typhoon_data[typhoon_data['USA_WIND'] < 34]['SID'].unique()) | |
ts_storms = len(typhoon_data[(typhoon_data['USA_WIND'] >= 34) & (typhoon_data['USA_WIND'] < 64)]['SID'].unique()) | |
typhoon_storms = len(typhoon_data[typhoon_data['USA_WIND'] >= 64]['SID'].unique()) | |
td_percentage = (td_storms / max(total_storms, 1)) * 100 | |
else: | |
td_storms = ts_storms = typhoon_storms = 0 | |
td_percentage = 0 | |
except Exception as e: | |
print(f"Error calculating TD statistics: {e}") | |
td_storms = ts_storms = typhoon_storms = 0 | |
td_percentage = 0 | |
# Create statistics text safely | |
stats_text = f""" | |
### 📊 Enhanced Dataset Summary: | |
- **Total Unique Storms**: {total_storms:,} | |
- **Total Track Records**: {total_records:,} | |
- **Year Range**: {year_range} ({years_covered} years) | |
- **Basins Available**: {basins_available} | |
- **Average Storms/Year**: {avg_storms_per_year:.1f} | |
### 🌪️ Storm Category Breakdown: | |
- **Tropical Depressions**: {td_storms:,} storms ({td_percentage:.1f}%) | |
- **Tropical Storms**: {ts_storms:,} storms | |
- **Typhoons (C1-C5)**: {typhoon_storms:,} storms | |
### 🚀 Platform Capabilities: | |
- **Complete TD Analysis** - First platform to include comprehensive TD tracking | |
- **Dual Classification Systems** - Both Atlantic and Taiwan standards supported | |
- **Advanced ML Clustering** - DBSCAN pattern recognition with separate visualizations | |
- **Real-time Oceanic Predictions** - Physics-based with SST/SLP integration | |
- **2025 Data Ready** - Full compatibility with current season data | |
- **Enhanced Animations** - Professional-quality storm track videos | |
- **Multi-basin Analysis** - Comprehensive Pacific and Atlantic coverage | |
### 🔬 Research Applications: | |
- Climate change impact studies | |
- Seasonal forecasting research | |
- Storm pattern classification | |
- ENSO-typhoon relationship analysis | |
- Oceanic-atmospheric coupling research | |
- Cross-regional classification comparisons | |
""" | |
gr.Markdown(stats_text) | |
return demo | |
except Exception as e: | |
logging.error(f"Error creating Gradio interface: {e}") | |
import traceback | |
traceback.print_exc() | |
# Create a minimal fallback interface | |
return create_minimal_fallback_interface() | |
def create_minimal_fallback_interface(): | |
"""Create a minimal fallback interface when main interface fails""" | |
with gr.Blocks() as demo: | |
gr.Markdown("# Enhanced Typhoon Analysis Platform") | |
gr.Markdown("**Notice**: Loading with minimal interface due to data issues.") | |
with gr.Tab("Status"): | |
gr.Markdown(""" | |
## Platform Status | |
The application is running but encountered issues loading the full interface. | |
This could be due to: | |
- Data loading problems | |
- Missing dependencies | |
- Configuration issues | |
### Available Features: | |
- Basic interface is functional | |
- Error logs are being generated | |
- System is ready for debugging | |
### Next Steps: | |
1. Check the console logs for detailed error information | |
2. Verify all required data files are accessible | |
3. Ensure all dependencies are properly installed | |
4. Try restarting the application | |
""") | |
with gr.Tab("Debug"): | |
gr.Markdown("## Debug Information") | |
def get_debug_info(): | |
debug_text = f""" | |
Python Environment: | |
- Working Directory: {os.getcwd()} | |
- Data Path: {DATA_PATH} | |
- UMAP Available: {UMAP_AVAILABLE} | |
- CNN Available: {CNN_AVAILABLE} | |
Data Status: | |
- ONI Data: {'Loaded' if oni_data is not None else 'Failed'} | |
- Typhoon Data: {'Loaded' if typhoon_data is not None else 'Failed'} | |
- Merged Data: {'Loaded' if merged_data is not None else 'Failed'} | |
File Checks: | |
- ONI Path Exists: {os.path.exists(ONI_DATA_PATH)} | |
- Typhoon Path Exists: {os.path.exists(TYPHOON_DATA_PATH)} | |
""" | |
return debug_text | |
debug_btn = gr.Button("Get Debug Info") | |
debug_output = gr.Textbox(label="Debug Information", lines=15) | |
debug_btn.click(fn=get_debug_info, outputs=debug_output) | |
return demo | |
# Initialize data | |
initialize_data() | |
# Create and launch the interface | |
demo = create_interface() | |
if __name__ == "__main__": | |
demo.launch(share=True) # Enable sharing with public link': current_lat, | |
'lon': current_lon, | |
'intensity_kt': current_intensity, | |
'category': categorize_typhoon_enhanced(current_intensity), | |
'confidence': confidence, | |
'development_stage': stage, | |
'forward_speed_kmh': base_speed * 111, # Convert to km/h | |
'pressure_hpa': max(900, 1013 - (current_intensity - 25) * 0.9), | |
'environmental_limit': environmental_limit, | |
'sst_celsius': current_sst, | |
'slp_hpa': current_slp, | |
'intensity_tendency': intensity_tendency | |
}) | |
results['route_forecast'] = route_points | |
# Enhanced confidence scores with environmental factors | |
base_confidence = 0.90 if use_real_data else 0.75 | |
results['confidence_scores'] = { | |
'genesis': base_confidence, | |
'early_development': base_confidence - 0.05, | |
'position_24h': base_confidence - 0.08, | |
'position_48h': base_confidence - 0.15, | |
'position_72h': base_confidence - 0.25, | |
'intensity_24h': (base_confidence - 0.10) if use_real_data else 0.65, | |
'intensity_48h': (base_confidence - 0.20) if use_real_data else 0.55, | |
'intensity_72h': (base_confidence - 0.30) if use_real_data else 0.45, | |
'environmental_coupling': 0.85 if use_real_data else 0.60 | |
} | |
# Enhanced model information | |
data_sources = [] | |
if sst_data and sst_data['success']: | |
data_sources.append("NOAA OISST v2") | |
if slp_data and slp_data['success']: | |
data_sources.append("NCEP/NCAR Reanalysis") | |
if data_sources: | |
results['model_info'] = f"Enhanced Oceanic Model using {', '.join(data_sources)}" | |
else: | |
results['model_info'] = "Enhanced Climatological Model" | |
logging.info(f"Enhanced prediction complete: {len(route_points)} forecast points") | |
return results | |
except Exception as e: | |
logging.error(f"Error in enhanced oceanic prediction: {e}") | |
import traceback | |
traceback.print_exc() | |
# Fallback to basic prediction | |
return predict_storm_route_and_intensity_realistic( | |
genesis_region, month, oni_value, models, forecast_hours, True | |
) | |
def calculate_environmental_steering_speed(lat, lon, month, oni_value, slp_data): | |
"""Calculate storm forward speed based on environmental steering""" | |
base_speed = 0.15 # Default speed in degrees/hour | |
# Latitude effects | |
if lat < 20: | |
speed_factor = 0.8 # Slower in tropics | |
elif lat < 30: | |
speed_factor = 1.2 # Faster in subtropics | |
else: | |
speed_factor = 1.5 # Fast in mid-latitudes | |
# Pressure gradient effects (if SLP data available) | |
if slp_data and slp_data['success']: | |
try: | |
# Calculate approximate pressure gradient (simplified) | |
slp_value = oceanic_manager.interpolate_data_to_point(slp_data, lat, lon, 'slp') | |
if not np.isnan(slp_value): | |
slp_hpa = slp_value if slp_value > 500 else slp_value / 100 | |
if slp_hpa < 1008: # Low pressure - faster motion | |
speed_factor *= 1.2 | |
elif slp_hpa > 1015: # High pressure - slower motion | |
speed_factor *= 0.8 | |
except: | |
pass | |
return base_speed * speed_factor | |
def calculate_motion_tendency(lat, lon, month, oni_value, hour, slp_data): | |
"""Calculate motion tendency with environmental steering""" | |
# Base climatological motion | |
ridge_position = 32 + 4 * np.sin(2 * np.pi * (month - 6) / 4) | |
if lat < ridge_position - 10: | |
base_lat_tendency = 0.05 # Poleward | |
base_lon_tendency = -0.12 # Westward | |
elif lat > ridge_position - 3: | |
base_lat_tendency = 0.15 # Strong poleward (recurvature) | |
base_lon_tendency = 0.08 # Eastward | |
else: | |
base_lat_tendency = 0.08 # Moderate poleward | |
base_lon_tendency = -0.06 # Moderate westward | |
# ENSO steering effects | |
if oni_value > 0.5: # El Niño | |
base_lon_tendency += 0.03 # More eastward | |
base_lat_tendency += 0.01 # Slightly more poleward | |
elif oni_value < -0.5: # La Niña | |
base_lon_tendency -= 0.04 # More westward | |
# Add realistic motion uncertainty | |
motion_uncertainty = 0.02 + (hour / 120) * 0.03 | |
lat_noise = np.random.normal(0, motion_uncertainty) | |
lon_noise = np.random.normal(0, motion_uncertainty) | |
return base_lat_tendency + lat_noise, base_lon_tendency + lon_noise | |
def calculate_environmental_intensity_change( | |
current_intensity, environmental_limit, hour, lat, lon, month, oni_value, sst_data | |
): | |
"""Calculate intensity change based on environmental conditions""" | |
# Base intensity tendency based on development stage | |
if hour <= 48: # Development phase | |
if current_intensity < environmental_limit * 0.6: | |
base_tendency = 3.5 # Rapid development possible | |
elif current_intensity < environmental_limit * 0.8: | |
base_tendency = 2.0 # Moderate development | |
else: | |
base_tendency = 0.5 # Near limit | |
elif hour <= 120: # Mature phase | |
if current_intensity < environmental_limit: | |
base_tendency = 1.0 # Slow intensification | |
else: | |
base_tendency = -0.5 # Slight weakening | |
else: # Extended phase | |
base_tendency = -2.0 # General weakening trend | |
# Environmental limit constraint | |
if current_intensity >= environmental_limit: | |
base_tendency = min(base_tendency, -1.0) # Force weakening if over limit | |
# SST effects on development rate | |
if sst_data and sst_data['success']: | |
try: | |
sst_value = oceanic_manager.interpolate_data_to_point(sst_data, lat, lon, 'sst') | |
if not np.isnan(sst_value): | |
sst_celsius = sst_value if sst_value < 50 else sst_value - 273.15 | |
if sst_celsius >= 29.5: # Very warm - enhanced development | |
base_tendency += 1.5 | |
elif sst_celsius >= 28.0: # Warm - normal development | |
base_tendency += 0.5 | |
elif sst_celsius < 26.5: # Cool - inhibited development | |
base_tendency -= 2.0 | |
except: | |
pass | |
# Land interaction | |
if lon < 110 or (120 < lon < 125 and lat > 20): # Near land masses | |
base_tendency -= 8.0 | |
# High latitude weakening | |
if lat > 35: | |
base_tendency -= 10.0 | |
elif lat > 30: | |
base_tendency -= 4.0 | |
# Add realistic intensity uncertainty | |
intensity_noise = np.random.normal(0, 1.0) | |
return base_tendency + intensity_noise | |
def calculate_dynamic_confidence(hour, lat, lon, use_real_data, sst_success, slp_success): | |
"""Calculate dynamic confidence based on data availability and conditions""" | |
base_confidence = 0.92 | |
# Time penalty | |
time_penalty = (hour / 120) * 0.35 | |
# Data quality bonus | |
data_bonus = 0.0 | |
if use_real_data: | |
if sst_success: | |
data_bonus += 0.08 | |
if slp_success: | |
data_bonus += 0.05 | |
# Environmental uncertainty | |
environment_penalty = 0.0 | |
if lat > 30 or lon < 115: # Challenging forecast regions | |
environment_penalty = 0.12 | |
elif lat > 25: | |
environment_penalty = 0.06 | |
final_confidence = base_confidence + data_bonus - time_penalty - environment_penalty | |
return max(0.25, min(0.95, final_confidence)) | |
def get_environmental_development_stage(hour, intensity, environmental_limit): | |
"""Determine development stage based on time and environmental context""" | |
intensity_fraction = intensity / max(environmental_limit, 50) | |
if hour <= 24: | |
return 'Genesis' | |
elif hour <= 72: | |
if intensity_fraction < 0.3: | |
return 'Early Development' | |
elif intensity_fraction < 0.6: | |
return 'Active Development' | |
else: | |
return 'Rapid Development' | |
elif hour <= 120: | |
if intensity_fraction > 0.8: | |
return 'Peak Intensity' | |
else: | |
return 'Mature Stage' | |
else: | |
return 'Extended Forecast' | |
def predict_storm_route_and_intensity_realistic(genesis_region, month, oni_value, models=None, forecast_hours=72, use_advanced_physics=True): | |
"""Realistic prediction with proper typhoon speeds and development""" | |
try: | |
genesis_locations = get_realistic_genesis_locations() | |
if genesis_region not in genesis_locations: | |
genesis_region = "Western Pacific Main Development Region" # Default | |
genesis_info = genesis_locations[genesis_region] | |
lat = genesis_info["lat"] | |
lon = genesis_info["lon"] | |
results = { | |
'current_prediction': {}, | |
'route_forecast': [], | |
'confidence_scores': {}, | |
'model_info': 'Realistic Genesis Model', | |
'genesis_info': genesis_info | |
} | |
# REALISTIC starting intensity - Tropical Depression level | |
base_intensity = 30 # Start at TD level (25-35 kt) | |
# Environmental factors for genesis | |
if oni_value > 1.0: # Strong El Niño - suppressed development | |
intensity_modifier = -6 | |
elif oni_value > 0.5: # Moderate El Niño | |
intensity_modifier = -3 | |
elif oni_value < -1.0: # Strong La Niña - enhanced development | |
intensity_modifier = +8 | |
elif oni_value < -0.5: # Moderate La Niña | |
intensity_modifier = +5 | |
else: # Neutral | |
intensity_modifier = oni_value * 2 | |
# Seasonal genesis effects | |
seasonal_factors = { | |
1: -8, 2: -6, 3: -4, 4: -2, 5: 2, 6: 6, | |
7: 10, 8: 12, 9: 15, 10: 10, 11: 4, 12: -5 | |
} | |
seasonal_modifier = seasonal_factors.get(month, 0) | |
# Genesis region favorability | |
region_factors = { | |
"Western Pacific Main Development Region": 8, | |
"South China Sea": 4, | |
"Philippine Sea": 5, | |
"Marshall Islands": 7, | |
"Monsoon Trough": 6, | |
"ITCZ Region": 3, | |
"Subtropical Region": 2, | |
"Bay of Bengal": 4, | |
"Eastern Pacific": 6, | |
"Atlantic MDR": 5 | |
} | |
region_modifier = region_factors.get(genesis_region, 0) | |
# Calculate realistic starting intensity (TD level) | |
predicted_intensity = base_intensity + intensity_modifier + seasonal_modifier + region_modifier | |
predicted_intensity = max(25, min(40, predicted_intensity)) # Keep in TD-weak TS range | |
# Add realistic uncertainty for genesis | |
intensity_uncertainty = np.random.normal(0, 2) | |
predicted_intensity += intensity_uncertainty | |
predicted_intensity = max(25, min(38, predicted_intensity)) # TD range | |
results['current_prediction'] = { | |
'intensity_kt': predicted_intensity, | |
'pressure_hpa': 1008 - (predicted_intensity - 25) * 0.6, # Realistic TD pressure | |
'category': categorize_typhoon_enhanced(predicted_intensity), | |
'genesis_region': genesis_region | |
} | |
# REALISTIC route prediction with proper typhoon speeds | |
current_lat = lat | |
current_lon = lon | |
current_intensity = predicted_intensity | |
route_points = [] | |
# Track storm development over time with REALISTIC SPEEDS | |
for hour in range(0, forecast_hours + 6, 6): | |
# REALISTIC typhoon motion - much faster speeds | |
# Typical typhoon forward speed: 15-25 km/h (0.14-0.23°/hour) | |
# Base forward speed depends on latitude and storm intensity | |
if current_lat < 20: # Low latitude - slower | |
base_speed = 0.12 # ~13 km/h | |
elif current_lat < 30: # Mid latitude - moderate | |
base_speed = 0.18 # ~20 km/h | |
else: # High latitude - faster | |
base_speed = 0.25 # ~28 km/h | |
# Intensity affects speed (stronger storms can move faster) | |
intensity_speed_factor = 1.0 + (current_intensity - 50) / 200 | |
base_speed *= max(0.8, min(1.4, intensity_speed_factor)) | |
# Beta drift (Coriolis effect) - realistic values | |
beta_drift_lat = 0.02 * np.sin(np.radians(current_lat)) | |
beta_drift_lon = -0.05 * np.cos(np.radians(current_lat)) | |
# Seasonal steering patterns with realistic speeds | |
if month in [6, 7, 8, 9]: # Peak season | |
ridge_strength = 1.2 | |
ridge_position = 32 + 4 * np.sin(2 * np.pi * (month - 6) / 4) | |
else: # Off season | |
ridge_strength = 0.9 | |
ridge_position = 28 | |
# REALISTIC motion based on position relative to subtropical ridge | |
if current_lat < ridge_position - 10: # Well south of ridge - westward movement | |
lat_tendency = base_speed * 0.3 + beta_drift_lat # Slight poleward | |
lon_tendency = -base_speed * 0.9 + beta_drift_lon # Strong westward | |
elif current_lat > ridge_position - 3: # Near ridge - recurvature | |
lat_tendency = base_speed * 0.8 + beta_drift_lat # Strong poleward | |
lon_tendency = base_speed * 0.4 + beta_drift_lon # Eastward | |
else: # In between - normal WNW motion | |
lat_tendency = base_speed * 0.4 + beta_drift_lat # Moderate poleward | |
lon_tendency = -base_speed * 0.7 + beta_drift_lon # Moderate westward | |
# ENSO steering modulation (realistic effects) | |
if oni_value > 0.5: # El Niño - more eastward/poleward motion | |
lon_tendency += 0.05 | |
lat_tendency += 0.02 | |
elif oni_value < -0.5: # La Niña - more westward motion | |
lon_tendency -= 0.08 | |
lat_tendency -= 0.01 | |
# Add motion uncertainty that grows with time (realistic error growth) | |
motion_uncertainty = 0.02 + (hour / 120) * 0.04 | |
lat_noise = np.random.normal(0, motion_uncertainty) | |
lon_noise = np.random.normal(0, motion_uncertainty) | |
# Update position with realistic speeds | |
current_lat += lat_tendency + lat_noise | |
current_lon += lon_tendency + lon_noise | |
# REALISTIC intensity evolution with proper development cycles | |
# Development phase (first 48-72 hours) - realistic intensification | |
if hour <= 48: | |
if current_intensity < 50: # Still weak - rapid development possible | |
if 10 <= current_lat <= 25 and 115 <= current_lon <= 165: # Favorable environment | |
intensity_tendency = 4.5 if current_intensity < 35 else 3.0 | |
elif 120 <= current_lon <= 155 and 15 <= current_lat <= 20: # Best environment | |
intensity_tendency = 6.0 if current_intensity < 40 else 4.0 | |
else: | |
intensity_tendency = 2.0 | |
elif current_intensity < 80: # Moderate intensity | |
intensity_tendency = 2.5 if (120 <= current_lon <= 155 and 10 <= current_lat <= 25) else 1.0 | |
else: # Already strong | |
intensity_tendency = 1.0 | |
# Mature phase (48-120 hours) - peak intensity maintenance | |
elif hour <= 120: | |
if current_lat < 25 and current_lon > 120: # Still in favorable waters | |
if current_intensity < 120: | |
intensity_tendency = 1.5 | |
else: | |
intensity_tendency = 0.0 # Maintain intensity | |
else: | |
intensity_tendency = -1.5 | |
# Extended phase (120+ hours) - gradual weakening | |
else: | |
if current_lat < 30 and current_lon > 115: | |
intensity_tendency = -2.0 # Slow weakening | |
else: | |
intensity_tendency = -3.5 # Faster weakening | |
# Environmental modulation (realistic effects) | |
if current_lat > 35: # High latitude - rapid weakening | |
intensity_tendency -= 12 | |
elif current_lat > 30: # Moderate latitude | |
intensity_tendency -= 5 | |
elif current_lon < 110: # Land interaction | |
intensity_tendency -= 15 | |
elif 125 <= current_lon <= 155 and 10 <= current_lat <= 25: # Warm pool | |
intensity_tendency += 2 | |
elif 160 <= current_lon <= 180 and 15 <= current_lat <= 30: # Still warm | |
intensity_tendency += 1 | |
# SST effects (realistic temperature impact) | |
if current_lat < 8: # Very warm but weak Coriolis | |
intensity_tendency += 0.5 | |
elif 8 <= current_lat <= 20: # Sweet spot for development | |
intensity_tendency += 2.0 | |
elif 20 < current_lat <= 30: # Marginal | |
intensity_tendency -= 1.0 | |
elif current_lat > 30: # Cool waters | |
intensity_tendency -= 4.0 | |
# Shear effects (simplified but realistic) | |
if month in [12, 1, 2, 3]: # High shear season | |
intensity_tendency -= 2.0 | |
elif month in [7, 8, 9]: # Low shear season | |
intensity_tendency += 1.0 | |
# Update intensity with realistic bounds and variability | |
intensity_noise = np.random.normal(0, 1.5) # Small random fluctuations | |
current_intensity += intensity_tendency + intensity_noise | |
current_intensity = max(20, min(185, current_intensity)) # Realistic range | |
# Calculate confidence based on forecast time and environment | |
base_confidence = 0.92 | |
time_penalty = (hour / 120) * 0.45 | |
environment_penalty = 0.15 if current_lat > 30 or current_lon < 115 else 0 | |
confidence = max(0.25, base_confidence - time_penalty - environment_penalty) | |
# Determine development stage | |
if hour <= 24: | |
stage = 'Genesis' | |
elif hour <= 72: | |
stage = 'Development' | |
elif hour <= 120: | |
stage = 'Mature' | |
elif hour <= 240: | |
stage = 'Extended' | |
else: | |
stage = 'Long-term' | |
route_points.append({ | |
'hour': hour, | |
'lat': current_lat, | |
'lon': current_lon, | |
'intensity_kt': current_intensity, | |
'category': categorize_typhoon_enhanced(current_intensity), | |
'confidence': confidence, | |
'development_stage': stage, | |
'forward_speed_kmh': base_speed * 111, # Convert to km/h | |
'pressure_hpa': max(900, 1013 - (current_intensity - 25) * 0.9) | |
}) | |
results['route_forecast'] = route_points | |
# Realistic confidence scores | |
results['confidence_scores'] = { | |
'genesis': 0.88, | |
'early_development': 0.82, | |
'position_24h': 0.85, | |
'position_48h': 0.78, | |
'position_72h': 0.68, | |
'intensity_24h': 0.75, | |
'intensity_48h': 0.65, | |
'intensity_72h': 0.55, | |
'long_term': max(0.3, 0.8 - (forecast_hours / 240) * 0.5) | |
} | |
# Model information | |
results['model_info'] = f"Enhanced Realistic Model - {genesis_region}" | |
return results | |
except Exception as e: | |
logging.error(f"Realistic prediction error: {str(e)}") | |
return { | |
'error': f"Prediction error: {str(e)}", | |
'current_prediction': {'intensity_kt': 30, 'category': 'Tropical Depression'}, | |
'route_forecast': [], | |
'confidence_scores': {}, | |
'model_info': 'Error in prediction' | |
} | |
# Update the existing predict_storm_route_and_intensity_realistic function to use oceanic data | |
def predict_storm_route_and_intensity_realistic_enhanced( | |
genesis_region, month, oni_value, models=None, | |
forecast_hours=72, use_advanced_physics=True | |
): | |
"""Enhanced wrapper that uses oceanic data when available""" | |
return predict_storm_route_and_intensity_with_oceanic_data( | |
genesis_region, month, oni_value, forecast_hours, | |
use_real_data=True, models=models, enable_animation=True | |
) | |
# Initialize data | |
initialize_data() | |
# Create and launch the interface | |
demo = create_interface() | |
if __name__ == "__main__": | |
demo.launch(share=True) # Enable sharing with public link |