import os
import argparse
import logging
import pickle
import threading
import time
import warnings
from datetime import datetime, timedelta
from collections import defaultdict
import csv
import json
# Suppress warnings for cleaner output
warnings.filterwarnings('ignore', category=FutureWarning)
warnings.filterwarnings('ignore', category=UserWarning, module='umap')
warnings.filterwarnings('ignore', category=UserWarning, module='sklearn')
import gradio as gr
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
import matplotlib.animation as animation
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
import cartopy.crs as ccrs
import cartopy.feature as cfeature
import plotly.graph_objects as go
import plotly.express as px
from plotly.subplots import make_subplots
from sklearn.manifold import TSNE
from sklearn.cluster import DBSCAN, KMeans
from sklearn.preprocessing import StandardScaler
from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
from scipy.interpolate import interp1d, RBFInterpolator
import statsmodels.api as sm
import requests
import tempfile
import shutil
import xarray as xr
# Advanced ML imports
try:
import umap.umap_ as umap
UMAP_AVAILABLE = True
except ImportError:
UMAP_AVAILABLE = False
print("UMAP not available - clustering features limited")
# Optional CNN imports with robust error handling
CNN_AVAILABLE = False
try:
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
from tensorflow.keras import layers, models
tf.config.set_visible_devices([], 'GPU')
CNN_AVAILABLE = True
print("TensorFlow successfully loaded - CNN features enabled")
except Exception as e:
CNN_AVAILABLE = False
print(f"TensorFlow not available - CNN features disabled: {str(e)[:100]}...")
try:
import cdsapi
CDSAPI_AVAILABLE = True
except ImportError:
CDSAPI_AVAILABLE = False
import tropycal.tracks as tracks
# -----------------------------
# Configuration and Setup
# -----------------------------
logging.basicConfig(
level=logging.INFO,
format='%(asctime)s - %(levelname)s - %(message)s'
)
# FIXED: Data path setup
DATA_PATH = '/tmp/typhoon_data' if 'SPACE_ID' in os.environ else tempfile.gettempdir()
try:
os.makedirs(DATA_PATH, exist_ok=True)
test_file = os.path.join(DATA_PATH, 'test_write.txt')
with open(test_file, 'w') as f:
f.write('test')
os.remove(test_file)
logging.info(f"Data directory is writable: {DATA_PATH}")
except Exception as e:
logging.warning(f"Data directory not writable, using temp dir: {e}")
DATA_PATH = tempfile.mkdtemp()
logging.info(f"Using temporary directory: {DATA_PATH}")
# Update file paths
ONI_DATA_PATH = os.path.join(DATA_PATH, 'oni_data.csv')
TYPHOON_DATA_PATH = os.path.join(DATA_PATH, 'processed_typhoon_data.csv')
MERGED_DATA_CSV = os.path.join(DATA_PATH, 'merged_typhoon_era5_data.csv')
# IBTrACS settings
BASIN_FILES = {
'EP': 'ibtracs.EP.list.v04r01.csv',
'NA': 'ibtracs.NA.list.v04r01.csv',
'WP': 'ibtracs.WP.list.v04r01.csv',
'ALL': 'ibtracs.ALL.list.v04r01.csv' # Added ALL basin option
}
IBTRACS_BASE_URL = 'https://www.ncei.noaa.gov/data/international-best-track-archive-for-climate-stewardship-ibtracs/v04r01/access/csv/'
# -----------------------------
# FIXED: Color Maps and Standards with TD Support
# -----------------------------
enhanced_color_map = {
'Unknown': 'rgb(200, 200, 200)',
'Tropical Depression': 'rgb(128, 128, 128)',
'Tropical Storm': 'rgb(0, 0, 255)',
'C1 Typhoon': 'rgb(0, 255, 255)',
'C2 Typhoon': 'rgb(0, 255, 0)',
'C3 Strong Typhoon': 'rgb(255, 255, 0)',
'C4 Very Strong Typhoon': 'rgb(255, 165, 0)',
'C5 Super Typhoon': 'rgb(255, 0, 0)'
}
matplotlib_color_map = {
'Unknown': '#C8C8C8',
'Tropical Depression': '#808080',
'Tropical Storm': '#0000FF',
'C1 Typhoon': '#00FFFF',
'C2 Typhoon': '#00FF00',
'C3 Strong Typhoon': '#FFFF00',
'C4 Very Strong Typhoon': '#FFA500',
'C5 Super Typhoon': '#FF0000'
}
taiwan_color_map_fixed = {
'Tropical Depression': '#808080',
'Tropical Storm': '#0000FF',
'Severe Tropical Storm': '#00FFFF',
'Typhoon': '#FFFF00',
'Severe Typhoon': '#FFA500',
'Super Typhoon': '#FF0000'
}
def get_matplotlib_color(category):
"""Get matplotlib-compatible color for a storm category"""
return matplotlib_color_map.get(category, '#808080')
def get_taiwan_color_fixed(category):
"""Get corrected Taiwan standard color"""
return taiwan_color_map_fixed.get(category, '#808080')
# Cluster colors for route visualization
CLUSTER_COLORS = [
'#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7',
'#DDA0DD', '#98D8C8', '#F7DC6F', '#BB8FCE', '#85C1E9',
'#F8C471', '#82E0AA', '#F1948A', '#85C1E9', '#D2B4DE'
]
# Route prediction colors
ROUTE_COLORS = [
'#FF0066', '#00FF66', '#6600FF', '#FF6600', '#0066FF',
'#FF00CC', '#00FFCC', '#CC00FF', '#CCFF00', '#00CCFF'
]
# Classification standards
atlantic_standard = {
'C5 Super Typhoon': {'wind_speed': 137, 'color': 'Red', 'hex': '#FF0000'},
'C4 Very Strong Typhoon': {'wind_speed': 113, 'color': 'Orange', 'hex': '#FFA500'},
'C3 Strong Typhoon': {'wind_speed': 96, 'color': 'Yellow', 'hex': '#FFFF00'},
'C2 Typhoon': {'wind_speed': 83, 'color': 'Green', 'hex': '#00FF00'},
'C1 Typhoon': {'wind_speed': 64, 'color': 'Cyan', 'hex': '#00FFFF'},
'Tropical Storm': {'wind_speed': 34, 'color': 'Blue', 'hex': '#0000FF'},
'Tropical Depression': {'wind_speed': 0, 'color': 'Gray', 'hex': '#808080'}
}
taiwan_standard_fixed = {
'Super Typhoon': {'wind_speed_ms': 51.0, 'wind_speed_kt': 99.2, 'color': 'Red', 'hex': '#FF0000'},
'Severe Typhoon': {'wind_speed_ms': 41.5, 'wind_speed_kt': 80.7, 'color': 'Orange', 'hex': '#FFA500'},
'Typhoon': {'wind_speed_ms': 32.7, 'wind_speed_kt': 63.6, 'color': 'Yellow', 'hex': '#FFFF00'},
'Severe Tropical Storm': {'wind_speed_ms': 24.5, 'wind_speed_kt': 47.6, 'color': 'Cyan', 'hex': '#00FFFF'},
'Tropical Storm': {'wind_speed_ms': 17.2, 'wind_speed_kt': 33.4, 'color': 'Blue', 'hex': '#0000FF'},
'Tropical Depression': {'wind_speed_ms': 0, 'wind_speed_kt': 0, 'color': 'Gray', 'hex': '#808080'}
}
# -----------------------------
# FIXED: Utility Functions
# -----------------------------
def safe_file_write(file_path, data_frame, backup_dir=None):
"""Safely write DataFrame to CSV with backup and error handling"""
try:
os.makedirs(os.path.dirname(file_path), exist_ok=True)
temp_path = file_path + '.tmp'
data_frame.to_csv(temp_path, index=False)
os.rename(temp_path, file_path)
logging.info(f"Successfully saved {len(data_frame)} records to {file_path}")
return True
except Exception as e:
logging.error(f"Error saving file {file_path}: {e}")
if backup_dir:
try:
backup_path = os.path.join(backup_dir, os.path.basename(file_path))
data_frame.to_csv(backup_path, index=False)
logging.info(f"Saved to backup location: {backup_path}")
return True
except Exception as backup_e:
logging.error(f"Failed to save to backup location: {backup_e}")
return False
# -----------------------------
# FIXED: ONI Data Functions
# -----------------------------
def download_oni_file(url, filename):
"""Download ONI file with retry logic"""
max_retries = 3
for attempt in range(max_retries):
try:
response = requests.get(url, timeout=30)
response.raise_for_status()
with open(filename, 'wb') as f:
f.write(response.content)
return True
except Exception as e:
logging.warning(f"Attempt {attempt + 1} failed to download ONI: {e}")
if attempt < max_retries - 1:
time.sleep(2 ** attempt)
return False
def convert_oni_ascii_to_csv(input_file, output_file):
"""Convert ONI ASCII format to CSV"""
data = defaultdict(lambda: [''] * 12)
season_to_month = {'DJF':12, 'JFM':1, 'FMA':2, 'MAM':3, 'AMJ':4, 'MJJ':5,
'JJA':6, 'JAS':7, 'ASO':8, 'SON':9, 'OND':10, 'NDJ':11}
try:
with open(input_file, 'r') as f:
lines = f.readlines()[1:] # Skip header
for line in lines:
parts = line.split()
if len(parts) >= 4:
season, year, anom = parts[0], parts[1], parts[-1]
if season in season_to_month:
month = season_to_month[season]
if season == 'DJF':
year = str(int(year)-1)
data[year][month-1] = anom
df = pd.DataFrame(data).T.reset_index()
df.columns = ['Year','Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
df = df.sort_values('Year').reset_index(drop=True)
return safe_file_write(output_file, df)
except Exception as e:
logging.error(f"Error converting ONI file: {e}")
return False
def update_oni_data():
"""Update ONI data with error handling"""
url = "https://www.cpc.ncep.noaa.gov/data/indices/oni.ascii.txt"
temp_file = os.path.join(DATA_PATH, "temp_oni.ascii.txt")
input_file = os.path.join(DATA_PATH, "oni.ascii.txt")
output_file = ONI_DATA_PATH
try:
if download_oni_file(url, temp_file):
if not os.path.exists(input_file) or not os.path.exists(output_file):
os.rename(temp_file, input_file)
convert_oni_ascii_to_csv(input_file, output_file)
else:
os.remove(temp_file)
else:
logging.warning("ONI download failed - will create minimal ONI data")
create_minimal_oni_data(output_file)
except Exception as e:
logging.error(f"Error updating ONI data: {e}")
create_minimal_oni_data(output_file)
def create_minimal_oni_data(output_file):
"""Create minimal ONI data for years without dropping typhoon data"""
years = range(1950, 2026) # Wide range to ensure coverage
months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
data = []
for year in years:
row = [year]
for month in months:
# Generate neutral ONI values (small variations around 0)
value = np.random.normal(0, 0.3)
row.append(f"{value:.2f}")
data.append(row)
df = pd.DataFrame(data, columns=['Year'] + months)
safe_file_write(output_file, df)
# -----------------------------
# FIXED: IBTrACS Data Loading - No Fallback, All Data
# -----------------------------
def download_ibtracs_file(basin, force_download=False):
"""Download specific basin file from IBTrACS"""
filename = BASIN_FILES[basin]
local_path = os.path.join(DATA_PATH, filename)
url = IBTRACS_BASE_URL + filename
if os.path.exists(local_path) and not force_download:
file_age = time.time() - os.path.getmtime(local_path)
if file_age < 7 * 24 * 3600: # 7 days
logging.info(f"Using cached {basin} basin file")
return local_path
try:
logging.info(f"Downloading {basin} basin file from {url}")
response = requests.get(url, timeout=120) # Increased timeout
response.raise_for_status()
os.makedirs(os.path.dirname(local_path), exist_ok=True)
with open(local_path, 'wb') as f:
f.write(response.content)
logging.info(f"Successfully downloaded {basin} basin file")
return local_path
except Exception as e:
logging.error(f"Failed to download {basin} basin file: {e}")
return None
def load_ibtracs_csv_directly(basin='ALL'):
"""Load IBTrACS data directly from CSV - FIXED to load ALL data"""
filename = BASIN_FILES[basin]
local_path = os.path.join(DATA_PATH, filename)
# Download if not exists
if not os.path.exists(local_path):
downloaded_path = download_ibtracs_file(basin)
if not downloaded_path:
logging.error(f"Could not download {basin} basin data")
return None
try:
logging.info(f"Reading IBTrACS CSV file: {local_path}")
# Read with low_memory=False to ensure proper data types
df = pd.read_csv(local_path, low_memory=False)
logging.info(f"Original data shape: {df.shape}")
logging.info(f"Available columns: {list(df.columns)}")
# Essential columns check
required_cols = ['SID', 'LAT', 'LON']
missing_cols = [col for col in required_cols if col not in df.columns]
if missing_cols:
logging.error(f"Missing critical columns: {missing_cols}")
return None
# FIXED: Data cleaning without dropping data unnecessarily
# Clean numeric columns carefully
numeric_columns = ['LAT', 'LON', 'WMO_WIND', 'WMO_PRES', 'USA_WIND', 'USA_PRES']
for col in numeric_columns:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
# Time handling
if 'ISO_TIME' in df.columns:
df['ISO_TIME'] = pd.to_datetime(df['ISO_TIME'], errors='coerce')
# FIXED: Only filter out clearly invalid coordinates
valid_coords = (
df['LAT'].notna() &
df['LON'].notna() &
(df['LAT'].between(-90, 90)) &
(df['LON'].between(-180, 180))
)
df = df[valid_coords]
# Add missing columns with defaults
if 'BASIN' not in df.columns:
if 'SID' in df.columns:
df['BASIN'] = df['SID'].str[:2]
else:
df['BASIN'] = basin
if 'NAME' not in df.columns:
df['NAME'] = 'UNNAMED'
if 'SEASON' not in df.columns and 'ISO_TIME' in df.columns:
df['SEASON'] = df['ISO_TIME'].dt.year
elif 'SEASON' not in df.columns:
# Extract year from SID if possible
if 'SID' in df.columns:
try:
df['SEASON'] = df['SID'].str.extract(r'(\d{4})').astype(float)
except:
df['SEASON'] = 2000 # Default year
logging.info(f"Successfully loaded {len(df)} records from {basin} basin")
logging.info(f"Final data shape: {df.shape}")
return df
except Exception as e:
logging.error(f"Error reading IBTrACS CSV file: {e}")
import traceback
traceback.print_exc()
return None
def load_all_ibtracs_data():
"""Load ALL available IBTrACS data - FIXED to never use fallback"""
all_data = []
# Try to load the ALL basin file first (contains all basins)
try:
logging.info("Attempting to load ALL basin data...")
all_basin_data = load_ibtracs_csv_directly('ALL')
if all_basin_data is not None and not all_basin_data.empty:
logging.info(f"Successfully loaded ALL basin data: {len(all_basin_data)} records")
return all_basin_data
except Exception as e:
logging.warning(f"Failed to load ALL basin data: {e}")
# If ALL basin fails, load individual basins
basins_to_load = ['WP', 'EP', 'NA']
for basin in basins_to_load:
try:
logging.info(f"Loading {basin} basin data...")
basin_data = load_ibtracs_csv_directly(basin)
if basin_data is not None and not basin_data.empty:
basin_data['BASIN'] = basin
all_data.append(basin_data)
logging.info(f"Successfully loaded {basin} basin: {len(basin_data)} records")
else:
logging.warning(f"No data loaded for basin {basin}")
except Exception as e:
logging.error(f"Failed to load basin {basin}: {e}")
if all_data:
combined_data = pd.concat(all_data, ignore_index=True)
logging.info(f"Combined all basins: {len(combined_data)} total records")
return combined_data
else:
logging.error("No IBTrACS data could be loaded from any basin")
return None
def load_data_fixed(oni_path, typhoon_path):
"""FIXED data loading - loads all available typhoon data regardless of ONI"""
# Load ONI data (optional - typhoon analysis can work without it)
oni_data = None
if os.path.exists(oni_path):
try:
oni_data = pd.read_csv(oni_path)
logging.info(f"Successfully loaded ONI data with {len(oni_data)} years")
except Exception as e:
logging.error(f"Error loading ONI data: {e}")
if oni_data is None:
logging.warning("ONI data not available - creating minimal ONI data")
update_oni_data()
try:
oni_data = pd.read_csv(oni_path)
except Exception as e:
logging.error(f"Still can't load ONI data: {e}")
# Create minimal fallback
create_minimal_oni_data(oni_path)
oni_data = pd.read_csv(oni_path)
# FIXED: Load typhoon data - ALWAYS from IBTrACS, never use fallback
typhoon_data = None
# Try to load from existing processed file first
if os.path.exists(typhoon_path):
try:
typhoon_data = pd.read_csv(typhoon_path, low_memory=False)
required_cols = ['LAT', 'LON', 'SID']
if all(col in typhoon_data.columns for col in required_cols):
if 'ISO_TIME' in typhoon_data.columns:
typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
logging.info(f"Loaded processed typhoon data with {len(typhoon_data)} records")
# Validate the data quality
valid_records = typhoon_data['LAT'].notna() & typhoon_data['LON'].notna()
if valid_records.sum() / len(typhoon_data) > 0.8: # If >80% valid, use it
typhoon_data = typhoon_data[valid_records]
else:
logging.warning("Processed data quality poor, reloading from IBTrACS")
typhoon_data = None
else:
logging.warning("Processed typhoon data missing required columns, reloading from IBTrACS")
typhoon_data = None
except Exception as e:
logging.error(f"Error loading processed typhoon data: {e}")
typhoon_data = None
# FIXED: Load from IBTrACS if needed - NO FALLBACK ALLOWED
if typhoon_data is None or typhoon_data.empty:
logging.info("Loading typhoon data from IBTrACS...")
typhoon_data = load_all_ibtracs_data()
if typhoon_data is None or typhoon_data.empty:
raise Exception("CRITICAL ERROR: No typhoon data could be loaded from IBTrACS. Check internet connection and IBTrACS availability.")
# Process and save the loaded data
# Ensure SID exists and is properly formatted
if 'SID' not in typhoon_data.columns:
logging.error("CRITICAL: No SID column in typhoon data")
raise Exception("Typhoon data missing SID column")
# Save the processed data for future use
try:
safe_file_write(typhoon_path, typhoon_data)
logging.info(f"Saved processed typhoon data: {len(typhoon_data)} records")
except Exception as e:
logging.warning(f"Could not save processed data: {e}")
# FIXED: Final validation and enhancement
if typhoon_data is not None and not typhoon_data.empty:
# Ensure required columns exist with proper defaults
required_columns = {
'SID': lambda: f"UNKNOWN_{typhoon_data.index}",
'ISO_TIME': pd.Timestamp('2000-01-01'),
'LAT': 20.0,
'LON': 140.0,
'USA_WIND': 30.0,
'USA_PRES': 1013.0,
'NAME': 'UNNAMED',
'SEASON': 2000,
'BASIN': 'WP'
}
for col, default_val in required_columns.items():
if col not in typhoon_data.columns:
if callable(default_val):
typhoon_data[col] = default_val()
else:
typhoon_data[col] = default_val
logging.warning(f"Added missing column {col}")
# Ensure proper data types
numeric_cols = ['LAT', 'LON', 'USA_WIND', 'USA_PRES', 'SEASON']
for col in numeric_cols:
if col in typhoon_data.columns:
typhoon_data[col] = pd.to_numeric(typhoon_data[col], errors='coerce')
if 'ISO_TIME' in typhoon_data.columns:
typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
# Remove only clearly invalid records
valid_mask = (
typhoon_data['LAT'].notna() &
typhoon_data['LON'].notna() &
typhoon_data['LAT'].between(-90, 90) &
typhoon_data['LON'].between(-180, 180)
)
original_count = len(typhoon_data)
typhoon_data = typhoon_data[valid_mask]
logging.info(f"Final typhoon data: {len(typhoon_data)} records (removed {original_count - len(typhoon_data)} invalid)")
if len(typhoon_data) == 0:
raise Exception("CRITICAL ERROR: All typhoon data was filtered out - check data quality")
else:
raise Exception("CRITICAL ERROR: No typhoon data available after all loading attempts")
return oni_data, typhoon_data
def process_oni_data(oni_data):
"""Process ONI data into long format"""
if oni_data is None or oni_data.empty:
# Return minimal ONI data that won't break merging
return pd.DataFrame({
'Year': [2000], 'Month': ['01'], 'ONI': [0.0],
'Date': [pd.Timestamp('2000-01-01')]
})
oni_long = oni_data.melt(id_vars=['Year'], var_name='Month', value_name='ONI')
month_map = {'Jan':'01','Feb':'02','Mar':'03','Apr':'04','May':'05','Jun':'06',
'Jul':'07','Aug':'08','Sep':'09','Oct':'10','Nov':'11','Dec':'12'}
oni_long['Month'] = oni_long['Month'].map(month_map)
oni_long['Date'] = pd.to_datetime(oni_long['Year'].astype(str)+'-'+oni_long['Month']+'-01')
oni_long['ONI'] = pd.to_numeric(oni_long['ONI'], errors='coerce').fillna(0)
return oni_long
def process_typhoon_data(typhoon_data):
"""Process typhoon data - FIXED to preserve all data"""
if typhoon_data is None or typhoon_data.empty:
raise Exception("No typhoon data to process")
# Ensure proper data types
if 'ISO_TIME' in typhoon_data.columns:
typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
numeric_cols = ['USA_WIND', 'USA_PRES', 'LON', 'LAT']
for col in numeric_cols:
if col in typhoon_data.columns:
typhoon_data[col] = pd.to_numeric(typhoon_data[col], errors='coerce')
logging.info(f"Processing {len(typhoon_data)} typhoon records")
# Get maximum values per storm
agg_dict = {}
if 'USA_WIND' in typhoon_data.columns:
agg_dict['USA_WIND'] = 'max'
if 'USA_PRES' in typhoon_data.columns:
agg_dict['USA_PRES'] = 'min'
if 'ISO_TIME' in typhoon_data.columns:
agg_dict['ISO_TIME'] = 'first'
if 'SEASON' in typhoon_data.columns:
agg_dict['SEASON'] = 'first'
if 'NAME' in typhoon_data.columns:
agg_dict['NAME'] = 'first'
if 'LAT' in typhoon_data.columns:
agg_dict['LAT'] = 'first'
if 'LON' in typhoon_data.columns:
agg_dict['LON'] = 'first'
typhoon_max = typhoon_data.groupby('SID').agg(agg_dict).reset_index()
# Add time-based columns for merging
if 'ISO_TIME' in typhoon_max.columns:
typhoon_max['Month'] = typhoon_max['ISO_TIME'].dt.strftime('%m')
typhoon_max['Year'] = typhoon_max['ISO_TIME'].dt.year
else:
# Use SEASON if available, otherwise default
if 'SEASON' in typhoon_max.columns:
typhoon_max['Year'] = typhoon_max['SEASON']
else:
typhoon_max['Year'] = 2000
typhoon_max['Month'] = '01' # Default month
# Add category
if 'USA_WIND' in typhoon_max.columns:
typhoon_max['Category'] = typhoon_max['USA_WIND'].apply(categorize_typhoon_enhanced)
else:
typhoon_max['Category'] = 'Unknown'
logging.info(f"Processed {len(typhoon_max)} unique storms")
return typhoon_max
def merge_data(oni_long, typhoon_max):
"""Merge ONI and typhoon data - FIXED to preserve typhoon data even without ONI"""
if typhoon_max is None or typhoon_max.empty:
raise Exception("No typhoon data to merge")
if oni_long is None or oni_long.empty:
# If no ONI data, add default ONI values
logging.warning("No ONI data available - using neutral values")
typhoon_max['ONI'] = 0.0
return typhoon_max
# Merge with ONI data
merged = pd.merge(typhoon_max, oni_long, on=['Year', 'Month'], how='left')
# Fill missing ONI values with neutral
merged['ONI'] = merged['ONI'].fillna(0.0)
logging.info(f"Merged data: {len(merged)} storms with ONI values")
return merged
# -----------------------------
# Enhanced Categorization Functions
# -----------------------------
def categorize_typhoon_enhanced(wind_speed):
"""Enhanced categorization that properly includes Tropical Depressions"""
if pd.isna(wind_speed):
return 'Unknown'
if wind_speed < 10: # Likely in m/s, convert to knots
wind_speed = wind_speed * 1.94384
if wind_speed < 34:
return 'Tropical Depression'
elif wind_speed < 64:
return 'Tropical Storm'
elif wind_speed < 83:
return 'C1 Typhoon'
elif wind_speed < 96:
return 'C2 Typhoon'
elif wind_speed < 113:
return 'C3 Strong Typhoon'
elif wind_speed < 137:
return 'C4 Very Strong Typhoon'
else:
return 'C5 Super Typhoon'
def categorize_typhoon_taiwan_fixed(wind_speed):
"""FIXED Taiwan categorization system based on CMA 2006 standards"""
if pd.isna(wind_speed):
return 'Tropical Depression'
if wind_speed > 50: # Likely in knots, convert to m/s
wind_speed_ms = wind_speed * 0.514444
else:
wind_speed_ms = wind_speed
if wind_speed_ms >= 51.0:
return 'Super Typhoon'
elif wind_speed_ms >= 41.5:
return 'Severe Typhoon'
elif wind_speed_ms >= 32.7:
return 'Typhoon'
elif wind_speed_ms >= 24.5:
return 'Severe Tropical Storm'
elif wind_speed_ms >= 17.2:
return 'Tropical Storm'
else:
return 'Tropical Depression'
def categorize_typhoon_by_standard_fixed(wind_speed, standard='atlantic'):
"""FIXED categorization function supporting both standards"""
if pd.isna(wind_speed):
return 'Tropical Depression', '#808080'
if standard == 'taiwan':
category = categorize_typhoon_taiwan_fixed(wind_speed)
color = taiwan_color_map_fixed.get(category, '#808080')
return category, color
else:
if wind_speed >= 137:
return 'C5 Super Typhoon', '#FF0000'
elif wind_speed >= 113:
return 'C4 Very Strong Typhoon', '#FFA500'
elif wind_speed >= 96:
return 'C3 Strong Typhoon', '#FFFF00'
elif wind_speed >= 83:
return 'C2 Typhoon', '#00FF00'
elif wind_speed >= 64:
return 'C1 Typhoon', '#00FFFF'
elif wind_speed >= 34:
return 'Tropical Storm', '#0000FF'
else:
return 'Tropical Depression', '#808080'
def classify_enso_phases(oni_value):
"""Classify ENSO phases based on ONI value"""
if isinstance(oni_value, pd.Series):
oni_value = oni_value.iloc[0]
if pd.isna(oni_value):
return 'Neutral'
if oni_value >= 0.5:
return 'El Nino'
elif oni_value <= -0.5:
return 'La Nina'
else:
return 'Neutral'
# -----------------------------
# FIXED: Advanced ML Features
# -----------------------------
def extract_storm_features(typhoon_data):
"""Extract comprehensive features for clustering analysis - FIXED VERSION"""
try:
if typhoon_data is None or typhoon_data.empty:
logging.error("No typhoon data provided for feature extraction")
return None
basic_features = []
for sid in typhoon_data['SID'].unique():
storm_data = typhoon_data[typhoon_data['SID'] == sid].copy()
if len(storm_data) == 0:
continue
features = {'SID': sid}
# Wind statistics
if 'USA_WIND' in storm_data.columns:
wind_values = pd.to_numeric(storm_data['USA_WIND'], errors='coerce').dropna()
if len(wind_values) > 0:
features['USA_WIND_max'] = wind_values.max()
features['USA_WIND_mean'] = wind_values.mean()
features['USA_WIND_std'] = wind_values.std() if len(wind_values) > 1 else 0
else:
features['USA_WIND_max'] = 30
features['USA_WIND_mean'] = 30
features['USA_WIND_std'] = 0
else:
features['USA_WIND_max'] = 30
features['USA_WIND_mean'] = 30
features['USA_WIND_std'] = 0
# Pressure statistics
if 'USA_PRES' in storm_data.columns:
pres_values = pd.to_numeric(storm_data['USA_PRES'], errors='coerce').dropna()
if len(pres_values) > 0:
features['USA_PRES_min'] = pres_values.min()
features['USA_PRES_mean'] = pres_values.mean()
features['USA_PRES_std'] = pres_values.std() if len(pres_values) > 1 else 0
else:
features['USA_PRES_min'] = 1000
features['USA_PRES_mean'] = 1000
features['USA_PRES_std'] = 0
else:
features['USA_PRES_min'] = 1000
features['USA_PRES_mean'] = 1000
features['USA_PRES_std'] = 0
# Location statistics
if 'LAT' in storm_data.columns and 'LON' in storm_data.columns:
lat_values = pd.to_numeric(storm_data['LAT'], errors='coerce').dropna()
lon_values = pd.to_numeric(storm_data['LON'], errors='coerce').dropna()
if len(lat_values) > 0 and len(lon_values) > 0:
features['LAT_mean'] = lat_values.mean()
features['LAT_std'] = lat_values.std() if len(lat_values) > 1 else 0
features['LAT_max'] = lat_values.max()
features['LAT_min'] = lat_values.min()
features['LON_mean'] = lon_values.mean()
features['LON_std'] = lon_values.std() if len(lon_values) > 1 else 0
features['LON_max'] = lon_values.max()
features['LON_min'] = lon_values.min()
features['genesis_lat'] = lat_values.iloc[0]
features['genesis_lon'] = lon_values.iloc[0]
features['genesis_intensity'] = features['USA_WIND_mean']
features['lat_range'] = lat_values.max() - lat_values.min()
features['lon_range'] = lon_values.max() - lon_values.min()
if len(lat_values) > 1:
distances = []
for i in range(1, len(lat_values)):
dlat = lat_values.iloc[i] - lat_values.iloc[i-1]
dlon = lon_values.iloc[i] - lon_values.iloc[i-1]
distances.append(np.sqrt(dlat**2 + dlon**2))
features['total_distance'] = sum(distances)
features['avg_speed'] = np.mean(distances) if distances else 0
else:
features['total_distance'] = 0
features['avg_speed'] = 0
if len(lat_values) > 2:
bearing_changes = []
for i in range(1, len(lat_values)-1):
dlat1 = lat_values.iloc[i] - lat_values.iloc[i-1]
dlon1 = lon_values.iloc[i] - lon_values.iloc[i-1]
dlat2 = lat_values.iloc[i+1] - lat_values.iloc[i]
dlon2 = lon_values.iloc[i+1] - lon_values.iloc[i]
angle1 = np.arctan2(dlat1, dlon1)
angle2 = np.arctan2(dlat2, dlon2)
change = abs(angle2 - angle1)
bearing_changes.append(change)
features['avg_curvature'] = np.mean(bearing_changes) if bearing_changes else 0
else:
features['avg_curvature'] = 0
else:
features.update({
'LAT_mean': 20, 'LAT_std': 0, 'LAT_max': 20, 'LAT_min': 20,
'LON_mean': 140, 'LON_std': 0, 'LON_max': 140, 'LON_min': 140,
'genesis_lat': 20, 'genesis_lon': 140, 'genesis_intensity': 30,
'lat_range': 0, 'lon_range': 0, 'total_distance': 0,
'avg_speed': 0, 'avg_curvature': 0
})
features['track_length'] = len(storm_data)
if 'SEASON' in storm_data.columns:
features['season'] = storm_data['SEASON'].iloc[0]
else:
features['season'] = 2000
if 'BASIN' in storm_data.columns:
features['basin'] = storm_data['BASIN'].iloc[0]
elif 'SID' in storm_data.columns:
features['basin'] = sid[:2] if len(sid) >= 2 else 'WP'
else:
features['basin'] = 'WP'
basic_features.append(features)
if not basic_features:
logging.error("No valid storm features could be extracted")
return None
storm_features = pd.DataFrame(basic_features)
numeric_columns = [col for col in storm_features.columns if col not in ['SID', 'basin']]
for col in numeric_columns:
storm_features[col] = pd.to_numeric(storm_features[col], errors='coerce').fillna(0)
logging.info(f"Successfully extracted features for {len(storm_features)} storms")
return storm_features
except Exception as e:
logging.error(f"Error in extract_storm_features: {e}")
import traceback
traceback.print_exc()
return None
def perform_dimensionality_reduction(storm_features, method='umap', n_components=2):
"""Perform UMAP or t-SNE dimensionality reduction"""
try:
if storm_features is None or storm_features.empty:
raise ValueError("No storm features provided")
feature_cols = []
for col in storm_features.columns:
if col not in ['SID', 'basin'] and storm_features[col].dtype in ['float64', 'int64']:
valid_data = storm_features[col].dropna()
if len(valid_data) > 0 and valid_data.std() > 0:
feature_cols.append(col)
if len(feature_cols) == 0:
raise ValueError("No valid numeric features found for clustering")
X = storm_features[feature_cols].fillna(0)
if len(X) < 2:
raise ValueError("Need at least 2 storms for clustering")
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
if method.lower() == 'umap' and UMAP_AVAILABLE and len(X_scaled) >= 4:
n_neighbors = min(15, len(X_scaled) - 1)
reducer = umap.UMAP(
n_components=n_components,
n_neighbors=n_neighbors,
min_dist=0.1,
metric='euclidean',
random_state=42,
n_jobs=1
)
elif method.lower() == 'tsne' and len(X_scaled) >= 4:
perplexity = min(30, len(X_scaled) // 4)
perplexity = max(1, perplexity)
reducer = TSNE(
n_components=n_components,
perplexity=perplexity,
learning_rate=200,
n_iter=1000,
random_state=42
)
else:
reducer = PCA(n_components=n_components, random_state=42)
embedding = reducer.fit_transform(X_scaled)
logging.info(f"Dimensionality reduction successful: {X_scaled.shape} -> {embedding.shape}")
return embedding, feature_cols, scaler
except Exception as e:
logging.error(f"Error in perform_dimensionality_reduction: {e}")
raise
def cluster_storms_data(embedding, method='dbscan', eps=0.5, min_samples=3):
"""Cluster storms based on their embedding"""
try:
if len(embedding) < 2:
return np.array([0] * len(embedding))
if method.lower() == 'dbscan':
min_samples = min(min_samples, max(2, len(embedding) // 5))
clusterer = DBSCAN(eps=eps, min_samples=min_samples)
elif method.lower() == 'kmeans':
n_clusters = min(5, max(2, len(embedding) // 3))
clusterer = KMeans(n_clusters=n_clusters, random_state=42)
else:
raise ValueError("Method must be 'dbscan' or 'kmeans'")
clusters = clusterer.fit_predict(embedding)
logging.info(f"Clustering complete: {len(np.unique(clusters))} clusters found")
return clusters
except Exception as e:
logging.error(f"Error in cluster_storms_data: {e}")
return np.array([0] * len(embedding))
def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'):
"""Create separate plots for clustering analysis"""
try:
if storm_features is None or storm_features.empty:
raise ValueError("No storm features available for clustering")
if typhoon_data is None or typhoon_data.empty:
raise ValueError("No typhoon data available for route visualization")
logging.info(f"Starting clustering visualization with {len(storm_features)} storms")
embedding, feature_cols, scaler = perform_dimensionality_reduction(storm_features, method)
cluster_labels = cluster_storms_data(embedding, 'dbscan')
storm_features_viz = storm_features.copy()
storm_features_viz['cluster'] = cluster_labels
storm_features_viz['dim1'] = embedding[:, 0]
storm_features_viz['dim2'] = embedding[:, 1]
try:
storm_info = typhoon_data.groupby('SID').first()[['NAME', 'SEASON']].reset_index()
storm_features_viz = storm_features_viz.merge(storm_info, on='SID', how='left')
storm_features_viz['NAME'] = storm_features_viz['NAME'].fillna('UNNAMED')
storm_features_viz['SEASON'] = storm_features_viz['SEASON'].fillna(2000)
except Exception as merge_error:
logging.warning(f"Could not merge storm info: {merge_error}")
storm_features_viz['NAME'] = 'UNNAMED'
storm_features_viz['SEASON'] = 2000
unique_clusters = sorted([c for c in storm_features_viz['cluster'].unique() if c != -1])
noise_count = len(storm_features_viz[storm_features_viz['cluster'] == -1])
# 1. Clustering scatter plot
fig_cluster = go.Figure()
if noise_count > 0:
noise_data = storm_features_viz[storm_features_viz['cluster'] == -1]
fig_cluster.add_trace(
go.Scatter(
x=noise_data['dim1'],
y=noise_data['dim2'],
mode='markers',
marker=dict(color='lightgray', size=8, opacity=0.5, symbol='x'),
name=f'Noise ({noise_count} storms)',
hovertemplate=(
'%{customdata[0]}
'
'Season: %{customdata[1]}
'
'Cluster: Noise
'
f'{method.upper()} Dim 1: %{{x:.2f}}
'
f'{method.upper()} Dim 2: %{{y:.2f}}
'
''
),
customdata=np.column_stack((
noise_data['NAME'].fillna('UNNAMED'),
noise_data['SEASON'].fillna(2000)
))
)
)
cluster_symbols = ['circle', 'square', 'diamond', 'triangle-up', 'triangle-down',
'pentagon', 'hexagon', 'star', 'cross', 'circle-open']
for i, cluster in enumerate(unique_clusters):
cluster_data = storm_features_viz[storm_features_viz['cluster'] == cluster]
color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)]
symbol = cluster_symbols[i % len(cluster_symbols)]
fig_cluster.add_trace(
go.Scatter(
x=cluster_data['dim1'],
y=cluster_data['dim2'],
mode='markers',
marker=dict(color=color, size=10, symbol=symbol, line=dict(width=1, color='white')),
name=f'Cluster {cluster} ({len(cluster_data)} storms)',
hovertemplate=(
'%{customdata[0]}
'
'Season: %{customdata[1]}
'
f'Cluster: {cluster}
'
f'{method.upper()} Dim 1: %{{x:.2f}}
'
f'{method.upper()} Dim 2: %{{y:.2f}}
'
'Intensity: %{customdata[2]:.0f} kt
'
''
),
customdata=np.column_stack((
cluster_data['NAME'].fillna('UNNAMED'),
cluster_data['SEASON'].fillna(2000),
cluster_data['USA_WIND_max'].fillna(0)
))
)
)
fig_cluster.update_layout(
title=f'Storm Clustering Analysis using {method.upper()}
Each symbol/color represents a distinct storm pattern group',
xaxis_title=f'{method.upper()} Dimension 1',
yaxis_title=f'{method.upper()} Dimension 2',
height=600,
showlegend=True
)
# 2. Route map
fig_routes = go.Figure()
cluster_info_text = []
for i, cluster in enumerate(unique_clusters):
cluster_storm_ids = storm_features_viz[storm_features_viz['cluster'] == cluster]['SID'].tolist()
color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)]
cluster_data = storm_features_viz[storm_features_viz['cluster'] == cluster]
avg_intensity = cluster_data['USA_WIND_max'].mean() if 'USA_WIND_max' in cluster_data.columns else 0
avg_pressure = cluster_data['USA_PRES_min'].mean() if 'USA_PRES_min' in cluster_data.columns else 1000
cluster_info_text.append(
f"Cluster {cluster}: {len(cluster_storm_ids)} storms, "
f"Avg: {avg_intensity:.0f}kt/{avg_pressure:.0f}hPa"
)
storms_added = 0
for j, sid in enumerate(cluster_storm_ids[:8]):
try:
storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME')
if len(storm_track) > 1:
valid_coords = storm_track['LAT'].notna() & storm_track['LON'].notna()
storm_track = storm_track[valid_coords]
if len(storm_track) > 1:
storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED'
storm_season = storm_track['SEASON'].iloc[0] if 'SEASON' in storm_track.columns else 'Unknown'
line_styles = ['solid', 'dash', 'dot', 'dashdot']
line_style = line_styles[j % len(line_styles)]
line_width = 3 if j == 0 else 2
fig_routes.add_trace(
go.Scattergeo(
lon=storm_track['LON'],
lat=storm_track['LAT'],
mode='lines+markers',
line=dict(color=color, width=line_width, dash=line_style),
marker=dict(color=color, size=3),
name=f'C{cluster}: {storm_name} ({storm_season})',
showlegend=True,
legendgroup=f'cluster_{cluster}',
hovertemplate=(
f'Cluster {cluster}: {storm_name}
'
'Lat: %{lat:.1f}Β°
'
'Lon: %{lon:.1f}Β°
'
f'Season: {storm_season}
'
f'Pattern Group: {cluster}
'
''
)
)
)
storms_added += 1
except Exception as track_error:
logging.warning(f"Error adding track for storm {sid}: {track_error}")
continue
if len(cluster_storm_ids) > 0:
cluster_storm_data = storm_features_viz[storm_features_viz['cluster'] == cluster]
if 'genesis_lat' in cluster_storm_data.columns and 'genesis_lon' in cluster_storm_data.columns:
avg_lat = cluster_storm_data['genesis_lat'].mean()
avg_lon = cluster_storm_data['genesis_lon'].mean()
fig_routes.add_trace(
go.Scattergeo(
lon=[avg_lon],
lat=[avg_lat],
mode='markers',
marker=dict(
color=color,
size=20,
symbol='star',
line=dict(width=2, color='white')
),
name=f'C{cluster} Center',
showlegend=True,
legendgroup=f'cluster_{cluster}',
hovertemplate=(
f'Cluster {cluster} Genesis Center
'
f'Avg Position: {avg_lat:.1f}Β°N, {avg_lon:.1f}Β°E
'
f'Storms: {len(cluster_storm_ids)}
'
f'Avg Intensity: {avg_intensity:.0f} kt
'
''
)
)
)
fig_routes.update_layout(
title=f"Storm Routes by {method.upper()} Clusters
Different line styles = different storms in same cluster | Stars = cluster centers",
geo=dict(
projection_type="natural earth",
showland=True,
landcolor="LightGray",
showocean=True,
oceancolor="LightBlue",
showcoastlines=True,
coastlinecolor="Gray",
center=dict(lat=20, lon=140),
projection_scale=2.5
),
height=800,
width=1200,
showlegend=True
)
cluster_summary = "
".join(cluster_info_text)
fig_routes.add_annotation(
text=f"Cluster Summary:
{cluster_summary}",
xref="paper", yref="paper",
x=0.02, y=0.98,
showarrow=False,
align="left",
bgcolor="rgba(255,255,255,0.8)",
bordercolor="gray",
borderwidth=1
)
# 3. Pressure evolution plot
fig_pressure = go.Figure()
for i, cluster in enumerate(unique_clusters):
cluster_storm_ids = storm_features_viz[storm_features_viz['cluster'] == cluster]['SID'].tolist()
color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)]
cluster_pressures = []
for j, sid in enumerate(cluster_storm_ids[:5]):
try:
storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME')
if len(storm_track) > 1 and 'USA_PRES' in storm_track.columns:
pressure_values = pd.to_numeric(storm_track['USA_PRES'], errors='coerce').dropna()
if len(pressure_values) > 0:
storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED'
normalized_time = np.linspace(0, 100, len(pressure_values))
fig_pressure.add_trace(
go.Scatter(
x=normalized_time,
y=pressure_values,
mode='lines',
line=dict(color=color, width=2, dash='solid' if j == 0 else 'dash'),
name=f'C{cluster}: {storm_name}' if j == 0 else None,
showlegend=(j == 0),
legendgroup=f'pressure_cluster_{cluster}',
hovertemplate=(
f'Cluster {cluster}: {storm_name}
'
'Progress: %{x:.0f}%
'
'Pressure: %{y:.0f} hPa
'
''
),
opacity=0.8 if j == 0 else 0.5
)
)
cluster_pressures.extend(pressure_values)
except Exception as e:
continue
if cluster_pressures:
avg_pressure = np.mean(cluster_pressures)
fig_pressure.add_hline(
y=avg_pressure,
line_dash="dot",
line_color=color,
annotation_text=f"C{cluster} Avg: {avg_pressure:.0f}",
annotation_position="right"
)
fig_pressure.update_layout(
title=f"Pressure Evolution by {method.upper()} Clusters
Normalized timeline (0-100%) | Dotted lines = cluster averages",
xaxis_title="Storm Progress (%)",
yaxis_title="Pressure (hPa)",
height=500
)
# 4. Wind evolution plot
fig_wind = go.Figure()
for i, cluster in enumerate(unique_clusters):
cluster_storm_ids = storm_features_viz[storm_features_viz['cluster'] == cluster]['SID'].tolist()
color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)]
cluster_winds = []
for j, sid in enumerate(cluster_storm_ids[:5]):
try:
storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME')
if len(storm_track) > 1 and 'USA_WIND' in storm_track.columns:
wind_values = pd.to_numeric(storm_track['USA_WIND'], errors='coerce').dropna()
if len(wind_values) > 0:
storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED'
normalized_time = np.linspace(0, 100, len(wind_values))
fig_wind.add_trace(
go.Scatter(
x=normalized_time,
y=wind_values,
mode='lines',
line=dict(color=color, width=2, dash='solid' if j == 0 else 'dash'),
name=f'C{cluster}: {storm_name}' if j == 0 else None,
showlegend=(j == 0),
legendgroup=f'wind_cluster_{cluster}',
hovertemplate=(
f'Cluster {cluster}: {storm_name}
'
'Progress: %{x:.0f}%
'
'Wind: %{y:.0f} kt
'
''
),
opacity=0.8 if j == 0 else 0.5
)
)
cluster_winds.extend(wind_values)
except Exception as e:
continue
if cluster_winds:
avg_wind = np.mean(cluster_winds)
fig_wind.add_hline(
y=avg_wind,
line_dash="dot",
line_color=color,
annotation_text=f"C{cluster} Avg: {avg_wind:.0f}",
annotation_position="right"
)
fig_wind.update_layout(
title=f"Wind Speed Evolution by {method.upper()} Clusters
Normalized timeline (0-100%) | Dotted lines = cluster averages",
xaxis_title="Storm Progress (%)",
yaxis_title="Wind Speed (kt)",
height=500
)
# Generate statistics
try:
stats_text = f"ENHANCED {method.upper()} CLUSTER ANALYSIS RESULTS\n" + "="*60 + "\n\n"
stats_text += f"π DIMENSIONALITY REDUCTION: {method.upper()}\n"
stats_text += f"π― CLUSTERING ALGORITHM: DBSCAN (automatic pattern discovery)\n"
stats_text += f"π TOTAL STORMS ANALYZED: {len(storm_features_viz)}\n"
stats_text += f"π¨ CLUSTERS DISCOVERED: {len(unique_clusters)}\n"
if noise_count > 0:
stats_text += f"β NOISE POINTS: {noise_count} storms (don't fit clear patterns)\n"
stats_text += "\n"
for cluster in sorted(storm_features_viz['cluster'].unique()):
cluster_data = storm_features_viz[storm_features_viz['cluster'] == cluster]
storm_count = len(cluster_data)
if cluster == -1:
stats_text += f"β NOISE GROUP: {storm_count} storms\n"
stats_text += " β These storms don't follow the main patterns\n"
stats_text += " β May represent unique or rare storm behaviors\n\n"
continue
stats_text += f"π― CLUSTER {cluster}: {storm_count} storms\n"
stats_text += f" π¨ Color: {CLUSTER_COLORS[cluster % len(CLUSTER_COLORS)]}\n"
if 'USA_WIND_max' in cluster_data.columns:
wind_mean = cluster_data['USA_WIND_max'].mean()
wind_std = cluster_data['USA_WIND_max'].std()
stats_text += f" π¨ Intensity: {wind_mean:.1f} Β± {wind_std:.1f} kt\n"
if 'USA_PRES_min' in cluster_data.columns:
pres_mean = cluster_data['USA_PRES_min'].mean()
pres_std = cluster_data['USA_PRES_min'].std()
stats_text += f" π‘οΈ Pressure: {pres_mean:.1f} Β± {pres_std:.1f} hPa\n"
if 'track_length' in cluster_data.columns:
track_mean = cluster_data['track_length'].mean()
stats_text += f" π Avg Track Length: {track_mean:.1f} points\n"
if 'genesis_lat' in cluster_data.columns and 'genesis_lon' in cluster_data.columns:
lat_mean = cluster_data['genesis_lat'].mean()
lon_mean = cluster_data['genesis_lon'].mean()
stats_text += f" π― Genesis Region: {lat_mean:.1f}Β°N, {lon_mean:.1f}Β°E\n"
if wind_mean < 50:
stats_text += " π‘ Pattern: Weaker storm group\n"
elif wind_mean > 100:
stats_text += " π‘ Pattern: Intense storm group\n"
else:
stats_text += " π‘ Pattern: Moderate intensity group\n"
stats_text += "\n"
stats_text += "π INTERPRETATION GUIDE:\n"
stats_text += f"β’ {method.upper()} reduces storm characteristics to 2D for visualization\n"
stats_text += "β’ DBSCAN finds natural groupings without preset number of clusters\n"
stats_text += "β’ Each cluster represents storms with similar behavior patterns\n"
stats_text += "β’ Route colors match cluster colors from the similarity plot\n"
stats_text += "β’ Stars on map show average genesis locations for each cluster\n"
stats_text += "β’ Temporal plots show how each cluster behaves over time\n\n"
stats_text += f"π§ FEATURES USED FOR CLUSTERING:\n"
stats_text += f" Total: {len(feature_cols)} storm characteristics\n"
stats_text += f" Including: intensity, pressure, track shape, genesis location\n"
except Exception as stats_error:
stats_text = f"Error generating enhanced statistics: {str(stats_error)}"
return fig_cluster, fig_routes, fig_pressure, fig_wind, stats_text
except Exception as e:
logging.error(f"Error in enhanced clustering analysis: {e}")
import traceback
traceback.print_exc()
error_fig = go.Figure()
error_fig.add_annotation(
text=f"Error in clustering analysis: {str(e)}",
xref="paper", yref="paper",
x=0.5, y=0.5, xanchor='center', yanchor='middle',
showarrow=False, font_size=16
)
return error_fig, error_fig, error_fig, error_fig, f"Error in clustering: {str(e)}"
# -----------------------------
# FIXED: Prediction System
# -----------------------------
def get_realistic_genesis_locations():
"""Get realistic typhoon genesis regions based on climatology"""
return {
"Western Pacific Main Development Region": {"lat": 12.5, "lon": 145.0, "description": "Peak activity zone (Guam area)"},
"South China Sea": {"lat": 15.0, "lon": 115.0, "description": "Secondary development region"},
"Philippine Sea": {"lat": 18.0, "lon": 135.0, "description": "Recurving storm region"},
"Marshall Islands": {"lat": 8.0, "lon": 165.0, "description": "Eastern development zone"},
"Monsoon Trough": {"lat": 10.0, "lon": 130.0, "description": "Monsoon-driven genesis"},
"ITCZ Region": {"lat": 6.0, "lon": 140.0, "description": "Near-equatorial development"},
"Subtropical Region": {"lat": 22.0, "lon": 125.0, "description": "Late season development"},
"Bay of Bengal": {"lat": 15.0, "lon": 88.0, "description": "Indian Ocean cyclones"},
"Eastern Pacific": {"lat": 12.0, "lon": -105.0, "description": "Hurricane development zone"},
"Atlantic MDR": {"lat": 12.0, "lon": -45.0, "description": "Main Development Region"}
}
def predict_storm_route_and_intensity_realistic(genesis_region, month, oni_value, models=None, forecast_hours=72, use_advanced_physics=True):
"""Realistic prediction with proper typhoon speeds and development"""
try:
genesis_locations = get_realistic_genesis_locations()
if genesis_region not in genesis_locations:
genesis_region = "Western Pacific Main Development Region"
genesis_info = genesis_locations[genesis_region]
lat = genesis_info["lat"]
lon = genesis_info["lon"]
results = {
'current_prediction': {},
'route_forecast': [],
'confidence_scores': {},
'model_info': 'Realistic Genesis Model',
'genesis_info': genesis_info
}
# Realistic starting intensity
base_intensity = 30
# Environmental factors
if oni_value > 1.0:
intensity_modifier = -6
elif oni_value > 0.5:
intensity_modifier = -3
elif oni_value < -1.0:
intensity_modifier = +8
elif oni_value < -0.5:
intensity_modifier = +5
else:
intensity_modifier = oni_value * 2
seasonal_factors = {
1: -8, 2: -6, 3: -4, 4: -2, 5: 2, 6: 6,
7: 10, 8: 12, 9: 15, 10: 10, 11: 4, 12: -5
}
seasonal_modifier = seasonal_factors.get(month, 0)
region_factors = {
"Western Pacific Main Development Region": 8,
"South China Sea": 4,
"Philippine Sea": 5,
"Marshall Islands": 7,
"Monsoon Trough": 6,
"ITCZ Region": 3,
"Subtropical Region": 2,
"Bay of Bengal": 4,
"Eastern Pacific": 6,
"Atlantic MDR": 5
}
region_modifier = region_factors.get(genesis_region, 0)
predicted_intensity = base_intensity + intensity_modifier + seasonal_modifier + region_modifier
predicted_intensity = max(25, min(40, predicted_intensity))
intensity_uncertainty = np.random.normal(0, 2)
predicted_intensity += intensity_uncertainty
predicted_intensity = max(25, min(38, predicted_intensity))
results['current_prediction'] = {
'intensity_kt': predicted_intensity,
'pressure_hpa': 1008 - (predicted_intensity - 25) * 0.6,
'category': categorize_typhoon_enhanced(predicted_intensity),
'genesis_region': genesis_region
}
# Route prediction
current_lat = lat
current_lon = lon
current_intensity = predicted_intensity
route_points = []
for hour in range(0, forecast_hours + 6, 6):
# Realistic motion
if current_lat < 20:
base_speed = 0.12
elif current_lat < 30:
base_speed = 0.18
else:
base_speed = 0.25
intensity_speed_factor = 1.0 + (current_intensity - 50) / 200
base_speed *= max(0.8, min(1.4, intensity_speed_factor))
beta_drift_lat = 0.02 * np.sin(np.radians(current_lat))
beta_drift_lon = -0.05 * np.cos(np.radians(current_lat))
if month in [6, 7, 8, 9]:
ridge_strength = 1.2
ridge_position = 32 + 4 * np.sin(2 * np.pi * (month - 6) / 4)
else:
ridge_strength = 0.9
ridge_position = 28
if current_lat < ridge_position - 10:
lat_tendency = base_speed * 0.3 + beta_drift_lat
lon_tendency = -base_speed * 0.9 + beta_drift_lon
elif current_lat > ridge_position - 3:
lat_tendency = base_speed * 0.8 + beta_drift_lat
lon_tendency = base_speed * 0.4 + beta_drift_lon
else:
lat_tendency = base_speed * 0.4 + beta_drift_lat
lon_tendency = -base_speed * 0.7 + beta_drift_lon
if oni_value > 0.5:
lon_tendency += 0.05
lat_tendency += 0.02
elif oni_value < -0.5:
lon_tendency -= 0.08
lat_tendency -= 0.01
motion_uncertainty = 0.02 + (hour / 120) * 0.04
lat_noise = np.random.normal(0, motion_uncertainty)
lon_noise = np.random.normal(0, motion_uncertainty)
current_lat += lat_tendency + lat_noise
current_lon += lon_tendency + lon_noise
# Intensity evolution
if hour <= 48:
if current_intensity < 50:
if 10 <= current_lat <= 25 and 115 <= current_lon <= 165:
intensity_tendency = 4.5 if current_intensity < 35 else 3.0
elif 120 <= current_lon <= 155 and 15 <= current_lat <= 20:
intensity_tendency = 6.0 if current_intensity < 40 else 4.0
else:
intensity_tendency = 2.0
elif current_intensity < 80:
intensity_tendency = 2.5 if (120 <= current_lon <= 155 and 10 <= current_lat <= 25) else 1.0
else:
intensity_tendency = 1.0
elif hour <= 120:
if current_lat < 25 and current_lon > 120:
if current_intensity < 120:
intensity_tendency = 1.5
else:
intensity_tendency = 0.0
else:
intensity_tendency = -1.5
else:
if current_lat < 30 and current_lon > 115:
intensity_tendency = -2.0
else:
intensity_tendency = -3.5
# Environmental modulation
if current_lat > 35:
intensity_tendency -= 12
elif current_lat > 30:
intensity_tendency -= 5
elif current_lon < 110:
intensity_tendency -= 15
elif 125 <= current_lon <= 155 and 10 <= current_lat <= 25:
intensity_tendency += 2
elif 160 <= current_lon <= 180 and 15 <= current_lat <= 30:
intensity_tendency += 1
if current_lat < 8:
intensity_tendency += 0.5
elif 8 <= current_lat <= 20:
intensity_tendency += 2.0
elif 20 < current_lat <= 30:
intensity_tendency -= 1.0
elif current_lat > 30:
intensity_tendency -= 4.0
if month in [12, 1, 2, 3]:
intensity_tendency -= 2.0
elif month in [7, 8, 9]:
intensity_tendency += 1.0
intensity_noise = np.random.normal(0, 1.5)
current_intensity += intensity_tendency + intensity_noise
current_intensity = max(20, min(185, current_intensity))
base_confidence = 0.92
time_penalty = (hour / 120) * 0.45
environment_penalty = 0.15 if current_lat > 30 or current_lon < 115 else 0
confidence = max(0.25, base_confidence - time_penalty - environment_penalty)
if hour <= 24:
stage = 'Genesis'
elif hour <= 72:
stage = 'Development'
elif hour <= 120:
stage = 'Mature'
elif hour <= 240:
stage = 'Extended'
else:
stage = 'Long-term'
route_points.append({
'hour': hour,
'lat': current_lat,
'lon': current_lon,
'intensity_kt': current_intensity,
'category': categorize_typhoon_enhanced(current_intensity),
'confidence': confidence,
'development_stage': stage,
'forward_speed_kmh': base_speed * 111,
'pressure_hpa': max(900, 1013 - (current_intensity - 25) * 0.9)
})
results['route_forecast'] = route_points
results['confidence_scores'] = {
'genesis': 0.88,
'early_development': 0.82,
'position_24h': 0.85,
'position_48h': 0.78,
'position_72h': 0.68,
'intensity_24h': 0.75,
'intensity_48h': 0.65,
'intensity_72h': 0.55,
'long_term': max(0.3, 0.8 - (forecast_hours / 240) * 0.5)
}
results['model_info'] = f"Enhanced Realistic Model - {genesis_region}"
return results
except Exception as e:
logging.error(f"Realistic prediction error: {str(e)}")
return {
'error': f"Prediction error: {str(e)}",
'current_prediction': {'intensity_kt': 30, 'category': 'Tropical Depression'},
'route_forecast': [],
'confidence_scores': {},
'model_info': 'Error in prediction'
}
def create_animated_route_visualization(prediction_results, show_uncertainty=True, enable_animation=True):
"""Create comprehensive animated route visualization with intensity plots"""
try:
if 'route_forecast' not in prediction_results or not prediction_results['route_forecast']:
return None, "No route forecast data available"
route_data = prediction_results['route_forecast']
hours = [point['hour'] for point in route_data]
lats = [point['lat'] for point in route_data]
lons = [point['lon'] for point in route_data]
intensities = [point['intensity_kt'] for point in route_data]
categories = [point['category'] for point in route_data]
confidences = [point.get('confidence', 0.8) for point in route_data]
stages = [point.get('development_stage', 'Unknown') for point in route_data]
speeds = [point.get('forward_speed_kmh', 15) for point in route_data]
pressures = [point.get('pressure_hpa', 1013) for point in route_data]
fig = make_subplots(
rows=2, cols=2,
subplot_titles=('Storm Track Animation', 'Wind Speed vs Time', 'Forward Speed vs Time', 'Pressure vs Time'),
specs=[[{"type": "geo", "colspan": 2}, None],
[{"type": "xy"}, {"type": "xy"}]],
vertical_spacing=0.15,
row_heights=[0.7, 0.3]
)
if enable_animation:
frames = []
fig.add_trace(
go.Scattergeo(
lon=lons,
lat=lats,
mode='lines',
line=dict(color='lightgray', width=2, dash='dot'),
name='Complete Track',
showlegend=True,
opacity=0.4
),
row=1, col=1
)
fig.add_trace(
go.Scattergeo(
lon=[lons[0]],
lat=[lats[0]],
mode='markers',
marker=dict(
size=25,
color='gold',
symbol='star',
line=dict(width=3, color='black')
),
name='Genesis',
showlegend=True,
hovertemplate=(
f"GENESIS
"
f"Position: {lats[0]:.1f}Β°N, {lons[0]:.1f}Β°E
"
f"Initial: {intensities[0]:.0f} kt
"
f"Region: {prediction_results['genesis_info']['description']}
"
""
)
),
row=1, col=1
)
for i in range(len(route_data)):
frame_lons = lons[:i+1]
frame_lats = lats[:i+1]
frame_intensities = intensities[:i+1]
frame_categories = categories[:i+1]
frame_hours = hours[:i+1]
current_color = enhanced_color_map.get(frame_categories[-1], 'rgb(128,128,128)')
current_size = 15 + (frame_intensities[-1] / 10)
frame_data = [
go.Scattergeo(
lon=frame_lons,
lat=frame_lats,
mode='lines+markers',
line=dict(color='blue', width=4),
marker=dict(
size=[8 + (intensity/15) for intensity in frame_intensities],
color=[enhanced_color_map.get(cat, 'rgb(128,128,128)') for cat in frame_categories],
opacity=0.8,
line=dict(width=1, color='white')
),
name='Current Track',
showlegend=False
),
go.Scattergeo(
lon=[frame_lons[-1]],
lat=[frame_lats[-1]],
mode='markers',
marker=dict(
size=current_size,
color=current_color,
symbol='circle',
line=dict(width=3, color='white')
),
name='Current Position',
showlegend=False,
hovertemplate=(
f"Hour {route_data[i]['hour']}
"
f"Position: {lats[i]:.1f}Β°N, {lons[i]:.1f}Β°E
"
f"Intensity: {intensities[i]:.0f} kt
"
f"Category: {categories[i]}
"
f"Stage: {stages[i]}
"
f"Speed: {speeds[i]:.1f} km/h
"
f"Confidence: {confidences[i]*100:.0f}%
"
""
)
),
go.Scatter(
x=frame_hours,
y=frame_intensities,
mode='lines+markers',
line=dict(color='red', width=3),
marker=dict(size=6, color='red'),
name='Wind Speed',
showlegend=False,
yaxis='y2'
),
go.Scatter(
x=frame_hours,
y=speeds[:i+1],
mode='lines+markers',
line=dict(color='green', width=2),
marker=dict(size=4, color='green'),
name='Forward Speed',
showlegend=False,
yaxis='y3'
),
go.Scatter(
x=frame_hours,
y=pressures[:i+1],
mode='lines+markers',
line=dict(color='purple', width=2),
marker=dict(size=4, color='purple'),
name='Pressure',
showlegend=False,
yaxis='y4'
)
]
frames.append(go.Frame(
data=frame_data,
name=str(i),
layout=go.Layout(
title=f"Storm Development Animation - Hour {route_data[i]['hour']}
"
f"Intensity: {intensities[i]:.0f} kt | Category: {categories[i]} | Stage: {stages[i]} | Speed: {speeds[i]:.1f} km/h"
)
))
fig.frames = frames
fig.update_layout(
updatemenus=[
{
"buttons": [
{
"args": [None, {"frame": {"duration": 1000, "redraw": True},
"fromcurrent": True, "transition": {"duration": 300}}],
"label": "βΆοΈ Play",
"method": "animate"
},
{
"args": [[None], {"frame": {"duration": 0, "redraw": True},
"mode": "immediate", "transition": {"duration": 0}}],
"label": "βΈοΈ Pause",
"method": "animate"
},
{
"args": [None, {"frame": {"duration": 500, "redraw": True},
"fromcurrent": True, "transition": {"duration": 300}}],
"label": "β© Fast",
"method": "animate"
}
],
"direction": "left",
"pad": {"r": 10, "t": 87},
"showactive": False,
"type": "buttons",
"x": 0.1,
"xanchor": "right",
"y": 0,
"yanchor": "top"
}
],
sliders=[{
"active": 0,
"yanchor": "top",
"xanchor": "left",
"currentvalue": {
"font": {"size": 16},
"prefix": "Hour: ",
"visible": True,
"xanchor": "right"
},
"transition": {"duration": 300, "easing": "cubic-in-out"},
"pad": {"b": 10, "t": 50},
"len": 0.9,
"x": 0.1,
"y": 0,
"steps": [
{
"args": [[str(i)], {"frame": {"duration": 300, "redraw": True},
"mode": "immediate", "transition": {"duration": 300}}],
"label": f"H{route_data[i]['hour']}",
"method": "animate"
}
for i in range(0, len(route_data), max(1, len(route_data)//20))
]
}]
)
else:
# Static view
fig.add_trace(
go.Scattergeo(
lon=[lons[0]],
lat=[lats[0]],
mode='markers',
marker=dict(
size=25,
color='gold',
symbol='star',
line=dict(width=3, color='black')
),
name='Genesis',
showlegend=True,
hovertemplate=(
f"GENESIS
"
f"Position: {lats[0]:.1f}Β°N, {lons[0]:.1f}Β°E
"
f"Initial: {intensities[0]:.0f} kt
"
""
)
),
row=1, col=1
)
for i in range(0, len(route_data), max(1, len(route_data)//50)):
point = route_data[i]
color = enhanced_color_map.get(point['category'], 'rgb(128,128,128)')
size = 8 + (point['intensity_kt'] / 12)
fig.add_trace(
go.Scattergeo(
lon=[point['lon']],
lat=[point['lat']],
mode='markers',
marker=dict(
size=size,
color=color,
opacity=point.get('confidence', 0.8),
line=dict(width=1, color='white')
),
name=f"Hour {point['hour']}" if i % 10 == 0 else None,
showlegend=(i % 10 == 0),
hovertemplate=(
f"Hour {point['hour']}
"
f"Position: {point['lat']:.1f}Β°N, {point['lon']:.1f}Β°E
"
f"Intensity: {point['intensity_kt']:.0f} kt
"
f"Category: {point['category']}
"
f"Stage: {point.get('development_stage', 'Unknown')}
"
f"Speed: {point.get('forward_speed_kmh', 15):.1f} km/h
"
""
)
),
row=1, col=1
)
fig.add_trace(
go.Scattergeo(
lon=lons,
lat=lats,
mode='lines',
line=dict(color='black', width=3),
name='Forecast Track',
showlegend=True
),
row=1, col=1
)
# Add static intensity, speed, and pressure plots
fig.add_trace(
go.Scatter(
x=hours,
y=intensities,
mode='lines+markers',
line=dict(color='red', width=3),
marker=dict(size=6, color='red'),
name='Wind Speed',
showlegend=False
),
row=2, col=1
)
# Add category threshold lines
thresholds = [34, 64, 83, 96, 113, 137]
threshold_names = ['TS', 'C1', 'C2', 'C3', 'C4', 'C5']
for thresh, name in zip(thresholds, threshold_names):
fig.add_trace(
go.Scatter(
x=[min(hours), max(hours)],
y=[thresh, thresh],
mode='lines',
line=dict(color='gray', width=1, dash='dash'),
name=name,
showlegend=False,
hovertemplate=f"{name} Threshold: {thresh} kt"
),
row=2, col=1
)
# Forward speed plot
fig.add_trace(
go.Scatter(
x=hours,
y=speeds,
mode='lines+markers',
line=dict(color='green', width=2),
marker=dict(size=4, color='green'),
name='Forward Speed',
showlegend=False
),
row=2, col=2
)
# Add uncertainty cone if requested
if show_uncertainty and len(route_data) > 1:
uncertainty_lats_upper = []
uncertainty_lats_lower = []
uncertainty_lons_upper = []
uncertainty_lons_lower = []
for i, point in enumerate(route_data):
base_uncertainty = 0.4 + (i / len(route_data)) * 1.8
confidence_factor = point.get('confidence', 0.8)
uncertainty = base_uncertainty / confidence_factor
uncertainty_lats_upper.append(point['lat'] + uncertainty)
uncertainty_lats_lower.append(point['lat'] - uncertainty)
uncertainty_lons_upper.append(point['lon'] + uncertainty)
uncertainty_lons_lower.append(point['lon'] - uncertainty)
uncertainty_lats = uncertainty_lats_upper + uncertainty_lats_lower[::-1]
uncertainty_lons = uncertainty_lons_upper + uncertainty_lons_lower[::-1]
fig.add_trace(
go.Scattergeo(
lon=uncertainty_lons,
lat=uncertainty_lats,
mode='lines',
fill='toself',
fillcolor='rgba(128,128,128,0.15)',
line=dict(color='rgba(128,128,128,0.4)', width=1),
name='Uncertainty Cone',
showlegend=True
),
row=1, col=1
)
# Enhanced layout
fig.update_layout(
title=f"Comprehensive Storm Development Analysis
Starting from {prediction_results['genesis_info']['description']}",
height=1000,
width=1400,
showlegend=True
)
# Update geo layout
fig.update_geos(
projection_type="natural earth",
showland=True,
landcolor="LightGray",
showocean=True,
oceancolor="LightBlue",
showcoastlines=True,
coastlinecolor="DarkGray",
showlakes=True,
lakecolor="LightBlue",
center=dict(lat=np.mean(lats), lon=np.mean(lons)),
projection_scale=2.0,
row=1, col=1
)
# Update subplot axes
fig.update_xaxes(title_text="Forecast Hour", row=2, col=1)
fig.update_yaxes(title_text="Wind Speed (kt)", row=2, col=1)
fig.update_xaxes(title_text="Forecast Hour", row=2, col=2)
fig.update_yaxes(title_text="Forward Speed (km/h)", row=2, col=2)
# Generate enhanced forecast text
current = prediction_results['current_prediction']
genesis_info = prediction_results['genesis_info']
max_intensity = max(intensities)
max_intensity_time = hours[intensities.index(max_intensity)]
avg_speed = np.mean(speeds)
forecast_text = f"""
COMPREHENSIVE STORM DEVELOPMENT FORECAST
{'='*65}
GENESIS CONDITIONS:
β’ Region: {current.get('genesis_region', 'Unknown')}
β’ Description: {genesis_info['description']}
β’ Starting Position: {lats[0]:.1f}Β°N, {lons[0]:.1f}Β°E
β’ Initial Intensity: {current['intensity_kt']:.0f} kt (Tropical Depression)
β’ Genesis Pressure: {current.get('pressure_hpa', 1008):.0f} hPa
STORM CHARACTERISTICS:
β’ Peak Intensity: {max_intensity:.0f} kt at Hour {max_intensity_time}
β’ Average Forward Speed: {avg_speed:.1f} km/h
β’ Total Distance: {sum([speeds[i]/6 for i in range(len(speeds))]):.0f} km
β’ Final Position: {lats[-1]:.1f}Β°N, {lons[-1]:.1f}Β°E
β’ Forecast Duration: {hours[-1]} hours ({hours[-1]/24:.1f} days)
DEVELOPMENT TIMELINE:
β’ Hour 0 (Genesis): {intensities[0]:.0f} kt - {categories[0]}
β’ Hour 24: {intensities[min(4, len(intensities)-1)]:.0f} kt - {categories[min(4, len(categories)-1)]}
β’ Hour 48: {intensities[min(8, len(intensities)-1)]:.0f} kt - {categories[min(8, len(categories)-1)]}
β’ Hour 72: {intensities[min(12, len(intensities)-1)]:.0f} kt - {categories[min(12, len(categories)-1)]}
β’ Final: {intensities[-1]:.0f} kt - {categories[-1]}
MOTION ANALYSIS:
β’ Initial Motion: {speeds[0]:.1f} km/h
β’ Peak Speed: {max(speeds):.1f} km/h at Hour {hours[speeds.index(max(speeds))]}
β’ Final Motion: {speeds[-1]:.1f} km/h
CONFIDENCE ASSESSMENT:
β’ Genesis Likelihood: {prediction_results['confidence_scores'].get('genesis', 0.85)*100:.0f}%
β’ 24-hour Track: {prediction_results['confidence_scores'].get('position_24h', 0.85)*100:.0f}%
β’ 48-hour Track: {prediction_results['confidence_scores'].get('position_48h', 0.75)*100:.0f}%
β’ 72-hour Track: {prediction_results['confidence_scores'].get('position_72h', 0.65)*100:.0f}%
β’ Long-term: {prediction_results['confidence_scores'].get('long_term', 0.50)*100:.0f}%
FEATURES:
{"β
Animation Enabled - Use controls to watch development" if enable_animation else "π Static Analysis - All time steps displayed"}
β
Realistic Forward Speeds (15-25 km/h typical)
β
Environmental Coupling (ENSO, SST, Shear)
β
Multi-stage Development Cycle
β
Uncertainty Quantification
MODEL: {prediction_results['model_info']}
"""
return fig, forecast_text.strip()
except Exception as e:
error_msg = f"Error creating comprehensive visualization: {str(e)}"
logging.error(error_msg)
import traceback
traceback.print_exc()
return None, error_msg
# -----------------------------
# Regression Functions
# -----------------------------
def perform_wind_regression(start_year, start_month, end_year, end_month):
"""Perform wind regression analysis"""
start_date = datetime(start_year, start_month, 1)
end_date = datetime(end_year, end_month, 28)
data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['USA_WIND','ONI'])
data['severe_typhoon'] = (data['USA_WIND']>=64).astype(int)
X = sm.add_constant(data['ONI'])
y = data['severe_typhoon']
try:
model = sm.Logit(y, X).fit(disp=0)
beta_1 = model.params['ONI']
exp_beta_1 = np.exp(beta_1)
p_value = model.pvalues['ONI']
return f"Wind Regression: Ξ²1={beta_1:.4f}, Odds Ratio={exp_beta_1:.4f}, P-value={p_value:.4f}"
except Exception as e:
return f"Wind Regression Error: {e}"
def perform_pressure_regression(start_year, start_month, end_year, end_month):
"""Perform pressure regression analysis"""
start_date = datetime(start_year, start_month, 1)
end_date = datetime(end_year, end_month, 28)
data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['USA_PRES','ONI'])
data['intense_typhoon'] = (data['USA_PRES']<=950).astype(int)
X = sm.add_constant(data['ONI'])
y = data['intense_typhoon']
try:
model = sm.Logit(y, X).fit(disp=0)
beta_1 = model.params['ONI']
exp_beta_1 = np.exp(beta_1)
p_value = model.pvalues['ONI']
return f"Pressure Regression: Ξ²1={beta_1:.4f}, Odds Ratio={exp_beta_1:.4f}, P-value={p_value:.4f}"
except Exception as e:
return f"Pressure Regression Error: {e}"
def perform_longitude_regression(start_year, start_month, end_year, end_month):
"""Perform longitude regression analysis"""
start_date = datetime(start_year, start_month, 1)
end_date = datetime(end_year, end_month, 28)
data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['LON','ONI'])
data['western_typhoon'] = (data['LON']<=140).astype(int)
X = sm.add_constant(data['ONI'])
y = data['western_typhoon']
try:
model = sm.OLS(y, sm.add_constant(X)).fit()
beta_1 = model.params['ONI']
exp_beta_1 = np.exp(beta_1)
p_value = model.pvalues['ONI']
return f"Longitude Regression: Ξ²1={beta_1:.4f}, Odds Ratio={exp_beta_1:.4f}, P-value={p_value:.4f}"
except Exception as e:
return f"Longitude Regression Error: {e}"
# -----------------------------
# FIXED: Visualization Functions
# -----------------------------
def get_full_tracks(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
"""Get full typhoon tracks"""
start_date = datetime(start_year, start_month, 1)
end_date = datetime(end_year, end_month, 28)
filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases)
if enso_phase != 'all':
filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()]
unique_storms = filtered_data['SID'].unique()
count = len(unique_storms)
fig = go.Figure()
for sid in unique_storms:
storm_data = typhoon_data[typhoon_data['SID']==sid]
if storm_data.empty:
continue
name = storm_data['NAME'].iloc[0] if pd.notnull(storm_data['NAME'].iloc[0]) else "Unnamed"
basin = storm_data['SID'].iloc[0][:2]
storm_oni = filtered_data[filtered_data['SID']==sid]['ONI'].iloc[0]
color = 'red' if storm_oni>=0.5 else ('blue' if storm_oni<=-0.5 else 'green')
fig.add_trace(go.Scattergeo(
lon=storm_data['LON'], lat=storm_data['LAT'], mode='lines',
name=f"{name} ({basin})",
line=dict(width=1.5, color=color), hoverinfo="name"
))
if typhoon_search:
search_mask = typhoon_data['NAME'].str.contains(typhoon_search, case=False, na=False)
if search_mask.any():
for sid in typhoon_data[search_mask]['SID'].unique():
storm_data = typhoon_data[typhoon_data['SID']==sid]
fig.add_trace(go.Scattergeo(
lon=storm_data['LON'], lat=storm_data['LAT'], mode='lines+markers',
name=f"MATCHED: {storm_data['NAME'].iloc[0]}",
line=dict(width=3, color='yellow'),
marker=dict(size=5), hoverinfo="name"
))
fig.update_layout(
title=f"Typhoon Tracks ({start_year}-{start_month} to {end_year}-{end_month})",
geo=dict(
projection_type='natural earth',
showland=True,
showcoastlines=True,
landcolor='rgb(243,243,243)',
countrycolor='rgb(204,204,204)',
coastlinecolor='rgb(204,204,204)',
center=dict(lon=140, lat=20),
projection_scale=3
),
legend_title="Typhoons by ENSO Phase",
showlegend=True,
height=700
)
fig.add_annotation(
x=0.02, y=0.98, xref="paper", yref="paper",
text="Red: El NiΓ±o, Blue: La Nina, Green: Neutral",
showarrow=False, align="left",
bgcolor="rgba(255,255,255,0.8)"
)
return fig, f"Total typhoons displayed: {count}"
def get_wind_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
"""Get wind analysis with enhanced categorization"""
start_date = datetime(start_year, start_month, 1)
end_date = datetime(end_year, end_month, 28)
filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases)
if enso_phase != 'all':
filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()]
fig = px.scatter(filtered_data, x='ONI', y='USA_WIND', color='Category',
hover_data=['NAME','Year','Category'],
title='Wind Speed vs ONI',
labels={'ONI':'ONI Value','USA_WIND':'Max Wind Speed (knots)'},
color_discrete_map=enhanced_color_map)
if typhoon_search:
mask = filtered_data['NAME'].str.contains(typhoon_search, case=False, na=False)
if mask.any():
fig.add_trace(go.Scatter(
x=filtered_data.loc[mask,'ONI'], y=filtered_data.loc[mask,'USA_WIND'],
mode='markers', marker=dict(size=10, color='red', symbol='star'),
name=f'Matched: {typhoon_search}',
text=filtered_data.loc[mask,'NAME']+' ('+filtered_data.loc[mask,'Year'].astype(str)+')'
))
regression = perform_wind_regression(start_year, start_month, end_year, end_month)
return fig, regression
def get_pressure_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
"""Get pressure analysis with enhanced categorization"""
start_date = datetime(start_year, start_month, 1)
end_date = datetime(end_year, end_month, 28)
filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases)
if enso_phase != 'all':
filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()]
fig = px.scatter(filtered_data, x='ONI', y='USA_PRES', color='Category',
hover_data=['NAME','Year','Category'],
title='Pressure vs ONI',
labels={'ONI':'ONI Value','USA_PRES':'Min Pressure (hPa)'},
color_discrete_map=enhanced_color_map)
if typhoon_search:
mask = filtered_data['NAME'].str.contains(typhoon_search, case=False, na=False)
if mask.any():
fig.add_trace(go.Scatter(
x=filtered_data.loc[mask,'ONI'], y=filtered_data.loc[mask,'USA_PRES'],
mode='markers', marker=dict(size=10, color='red', symbol='star'),
name=f'Matched: {typhoon_search}',
text=filtered_data.loc[mask,'NAME']+' ('+filtered_data.loc[mask,'Year'].astype(str)+')'
))
regression = perform_pressure_regression(start_year, start_month, end_year, end_month)
return fig, regression
def get_longitude_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
"""Get longitude analysis"""
start_date = datetime(start_year, start_month, 1)
end_date = datetime(end_year, end_month, 28)
filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases)
if enso_phase != 'all':
filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()]
fig = px.scatter(filtered_data, x='LON', y='ONI', hover_data=['NAME'],
title='Typhoon Generation Longitude vs ONI (All Years)')
if len(filtered_data) > 1:
X = np.array(filtered_data['LON']).reshape(-1,1)
y = filtered_data['ONI']
try:
model = sm.OLS(y, sm.add_constant(X)).fit()
y_pred = model.predict(sm.add_constant(X))
fig.add_trace(go.Scatter(x=filtered_data['LON'], y=y_pred, mode='lines', name='Regression Line'))
slope = model.params[1]
slopes_text = f"All Years Slope: {slope:.4f}"
except Exception as e:
slopes_text = f"Regression Error: {e}"
else:
slopes_text = "Insufficient data for regression"
regression = perform_longitude_regression(start_year, start_month, end_year, end_month)
return fig, slopes_text, regression
# -----------------------------
# FIXED: Animation Functions - NO FALLBACK
# -----------------------------
def get_available_years(typhoon_data):
"""Get all available years from actual data - NO FALLBACK"""
try:
if typhoon_data is None or typhoon_data.empty:
raise Exception("No typhoon data available for year extraction")
years = set()
# Try multiple methods to extract years
if 'ISO_TIME' in typhoon_data.columns:
valid_times = typhoon_data['ISO_TIME'].dropna()
if len(valid_times) > 0:
years.update(valid_times.dt.year.unique())
if 'SEASON' in typhoon_data.columns:
valid_seasons = typhoon_data['SEASON'].dropna()
if len(valid_seasons) > 0:
years.update(valid_seasons.unique())
# Extract from SID if available (format: BASIN + NUMBER + YEAR)
if 'SID' in typhoon_data.columns and len(years) == 0:
for sid in typhoon_data['SID'].dropna().unique():
try:
# Try to extract 4-digit year from SID
year_match = pd.Series([sid]).str.extract(r'(\d{4})')[0].iloc[0]
if year_match and 1950 <= int(year_match) <= 2030:
years.add(int(year_match))
except:
continue
if len(years) == 0:
raise Exception("Could not extract any valid years from typhoon data")
# Convert to sorted list of strings
year_strings = sorted([str(int(year)) for year in years if 1950 <= year <= 2030])
if len(year_strings) == 0:
raise Exception("No valid years found in reasonable range (1950-2030)")
logging.info(f"Extracted {len(year_strings)} years from data: {year_strings[0]} to {year_strings[-1]}")
return year_strings
except Exception as e:
logging.error(f"CRITICAL ERROR in get_available_years: {e}")
raise Exception(f"Cannot extract years from typhoon data: {e}")
def update_typhoon_options_enhanced(year, basin):
"""Enhanced typhoon options - NEVER returns empty or fallback"""
try:
year = int(year)
# Filter by year
if 'ISO_TIME' in typhoon_data.columns:
year_mask = typhoon_data['ISO_TIME'].dt.year == year
elif 'SEASON' in typhoon_data.columns:
year_mask = typhoon_data['SEASON'] == year
else:
# Try to extract from SID
sid_year_mask = typhoon_data['SID'].str.contains(str(year), na=False)
year_mask = sid_year_mask
year_data = typhoon_data[year_mask].copy()
# Filter by basin if specified
if basin != "All Basins":
basin_code = basin.split(' - ')[0] if ' - ' in basin else basin[:2]
if 'SID' in year_data.columns:
year_data = year_data[year_data['SID'].str.startswith(basin_code, na=False)]
elif 'BASIN' in year_data.columns:
year_data = year_data[year_data['BASIN'] == basin_code]
if year_data.empty:
raise Exception(f"No storms found for year {year} and basin {basin}")
# Get unique storms
storms = year_data.groupby('SID').agg({
'NAME': 'first',
'USA_WIND': 'max'
}).reset_index()
# Enhanced categorization including TD
storms['category'] = storms['USA_WIND'].apply(categorize_typhoon_enhanced)
# Create options with category information
options = []
for _, storm in storms.iterrows():
name = storm['NAME'] if pd.notna(storm['NAME']) and storm['NAME'] != '' else 'UNNAMED'
sid = storm['SID']
category = storm['category']
max_wind = storm['USA_WIND'] if pd.notna(storm['USA_WIND']) else 0
option = f"{name} ({sid}) - {category} ({max_wind:.0f}kt)"
options.append(option)
if not options:
raise Exception(f"No valid storm options generated for year {year}")
logging.info(f"Generated {len(options)} storm options for {year}")
return gr.update(choices=sorted(options), value=options[0])
except Exception as e:
error_msg = f"Error loading storms for {year}: {str(e)}"
logging.error(error_msg)
raise Exception(error_msg)
def generate_enhanced_track_video_fixed(year, typhoon_selection, standard):
"""FIXED: Enhanced track video generation - NO FALLBACK ALLOWED"""
try:
if not typhoon_selection or "No storms found" in typhoon_selection or "Error" in typhoon_selection:
raise Exception("Invalid typhoon selection provided")
# Extract SID from selection
try:
sid = typhoon_selection.split('(')[1].split(')')[0]
except:
raise Exception(f"Could not extract SID from selection: {typhoon_selection}")
# Get storm data
storm_df = typhoon_data[typhoon_data['SID'] == sid].copy()
if storm_df.empty:
raise Exception(f"No track data found for storm {sid}")
# Sort by time
if 'ISO_TIME' in storm_df.columns:
storm_df = storm_df.sort_values('ISO_TIME')
# Validate essential data
if 'LAT' not in storm_df.columns or 'LON' not in storm_df.columns:
raise Exception(f"Missing coordinate data for storm {sid}")
# Extract data for animation
lats = pd.to_numeric(storm_df['LAT'], errors='coerce').dropna().values
lons = pd.to_numeric(storm_df['LON'], errors='coerce').dropna().values
if len(lats) < 2 or len(lons) < 2:
raise Exception(f"Insufficient track points for storm {sid}: {len(lats)} points")
if 'USA_WIND' in storm_df.columns:
winds = pd.to_numeric(storm_df['USA_WIND'], errors='coerce').fillna(30).values[:len(lats)]
else:
winds = np.full(len(lats), 30)
# Enhanced metadata
storm_name = storm_df['NAME'].iloc[0] if pd.notna(storm_df['NAME'].iloc[0]) else "UNNAMED"
season = storm_df['SEASON'].iloc[0] if 'SEASON' in storm_df.columns else year
logging.info(f"Generating FIXED video for {storm_name} ({sid}) with {len(lats)} track points using {standard} standard")
# FIXED: Create figure with proper cartopy setup
fig = plt.figure(figsize=(16, 10))
ax = plt.axes(projection=ccrs.PlateCarree())
# Enhanced map features
ax.stock_img()
ax.add_feature(cfeature.COASTLINE, linewidth=0.8)
ax.add_feature(cfeature.BORDERS, linewidth=0.5)
ax.add_feature(cfeature.OCEAN, color='lightblue', alpha=0.5)
ax.add_feature(cfeature.LAND, color='lightgray', alpha=0.5)
# Set extent based on track
padding = 5
ax.set_extent([
min(lons) - padding, max(lons) + padding,
min(lats) - padding, max(lats) + padding
])
# Add gridlines
gl = ax.gridlines(draw_labels=True, alpha=0.3)
gl.top_labels = gl.right_labels = False
# Title
ax.set_title(f"{season} {storm_name} ({sid}) Track Animation - {standard.upper()} Standard",
fontsize=18, fontweight='bold')
# FIXED: Animation elements - proper initialization with cartopy transforms
track_line, = ax.plot([], [], 'b-', linewidth=3, alpha=0.7,
label='Track', transform=ccrs.PlateCarree())
current_point, = ax.plot([], [], 'o', markersize=15,
transform=ccrs.PlateCarree())
history_points, = ax.plot([], [], 'o', markersize=6, alpha=0.4, color='blue',
transform=ccrs.PlateCarree())
info_box = ax.text(0.02, 0.98, '', transform=ax.transAxes,
fontsize=12, verticalalignment='top',
bbox=dict(boxstyle="round,pad=0.5", facecolor='white', alpha=0.9))
# FIXED: Color legend with proper categories
legend_elements = []
if standard == 'taiwan':
categories = ['Tropical Depression', 'Tropical Storm', 'Severe Tropical Storm',
'Typhoon', 'Severe Typhoon', 'Super Typhoon']
for category in categories:
color = get_taiwan_color_fixed(category)
legend_elements.append(plt.Line2D([0], [0], marker='o', color='w',
markerfacecolor=color, markersize=10, label=category))
else:
categories = ['Tropical Depression', 'Tropical Storm', 'C1 Typhoon', 'C2 Typhoon',
'C3 Strong Typhoon', 'C4 Very Strong Typhoon', 'C5 Super Typhoon']
for category in categories:
color = get_matplotlib_color(category)
legend_elements.append(plt.Line2D([0], [0], marker='o', color='w',
markerfacecolor=color, markersize=10, label=category))
ax.legend(handles=legend_elements, loc='upper right', fontsize=10)
# FIXED: Animation function
def animate_fixed(frame):
"""Fixed animation function that properly updates tracks with cartopy"""
try:
if frame >= len(lats):
return track_line, current_point, history_points, info_box
# Update track line up to current frame
current_lons = lons[:frame+1]
current_lats = lats[:frame+1]
track_line.set_data(current_lons, current_lats)
# Update historical points
if frame > 0:
history_points.set_data(current_lons[:-1], current_lats[:-1])
# Update current position with correct categorization
current_wind = winds[frame]
if standard == 'taiwan':
category, color = categorize_typhoon_by_standard_fixed(current_wind, 'taiwan')
else:
category, color = categorize_typhoon_by_standard_fixed(current_wind, 'atlantic')
# Update current position marker
current_point.set_data([lons[frame]], [lats[frame]])
current_point.set_color(color)
current_point.set_markersize(12 + current_wind/8)
# Enhanced info display
if 'ISO_TIME' in storm_df.columns and frame < len(storm_df):
current_time = storm_df.iloc[frame]['ISO_TIME']
time_str = current_time.strftime('%Y-%m-%d %H:%M UTC') if pd.notna(current_time) else 'Unknown'
else:
time_str = f"Step {frame+1}"
# Wind speed display
if standard == 'taiwan':
wind_ms = current_wind * 0.514444
wind_display = f"{current_wind:.0f} kt ({wind_ms:.1f} m/s)"
else:
wind_display = f"{current_wind:.0f} kt"
info_text = (
f"Storm: {storm_name}\n"
f"Time: {time_str}\n"
f"Position: {lats[frame]:.1f}Β°N, {lons[frame]:.1f}Β°E\n"
f"Max Wind: {wind_display}\n"
f"Category: {category}\n"
f"Standard: {standard.upper()}\n"
f"Frame: {frame+1}/{len(lats)}"
)
info_box.set_text(info_text)
return track_line, current_point, history_points, info_box
except Exception as e:
logging.error(f"Error in animate frame {frame}: {e}")
return track_line, current_point, history_points, info_box
# FIXED: Create animation with cartopy-compatible settings
anim = animation.FuncAnimation(
fig, animate_fixed, frames=len(lats),
interval=600, blit=False, repeat=True
)
# Save animation
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4',
dir=tempfile.gettempdir())
writer = animation.FFMpegWriter(
fps=2, bitrate=3000, codec='libx264',
extra_args=['-pix_fmt', 'yuv420p']
)
logging.info(f"Saving FIXED animation to {temp_file.name}")
anim.save(temp_file.name, writer=writer, dpi=120)
plt.close(fig)
logging.info(f"FIXED video generated successfully: {temp_file.name}")
return temp_file.name
except Exception as e:
error_msg = f"CRITICAL ERROR generating video: {str(e)}"
logging.error(error_msg)
import traceback
traceback.print_exc()
raise Exception(error_msg)
# -----------------------------
# FIXED: Data Loading and Processing
# -----------------------------
# Global variables initialization
oni_data = None
typhoon_data = None
merged_data = None
def initialize_data():
"""Initialize all data safely - CRITICAL: NO FALLBACKS"""
global oni_data, typhoon_data, merged_data
try:
logging.info("Starting FIXED data loading process...")
# Update ONI data (optional)
update_oni_data()
# Load data with FIXED functions
oni_data, typhoon_data = load_data_fixed(ONI_DATA_PATH, TYPHOON_DATA_PATH)
# Verify critical data loaded
if typhoon_data is None or typhoon_data.empty:
raise Exception("CRITICAL: No typhoon data loaded")
if oni_data is None or oni_data.empty:
logging.warning("ONI data failed to load - using neutral values")
# Process data
oni_long = process_oni_data(oni_data)
typhoon_max = process_typhoon_data(typhoon_data)
merged_data = merge_data(oni_long, typhoon_max)
# Final validation
if merged_data is None or merged_data.empty:
raise Exception("CRITICAL: Merged data is empty")
logging.info(f"FIXED data loading complete:")
logging.info(f" - ONI data: {len(oni_data) if oni_data is not None else 0} years")
logging.info(f" - Typhoon data: {len(typhoon_data)} records")
logging.info(f" - Merged data: {len(merged_data)} storms")
except Exception as e:
logging.error(f"CRITICAL ERROR during FIXED data initialization: {e}")
import traceback
traceback.print_exc()
raise Exception(f"Data initialization failed: {e}")
# -----------------------------
# FIXED: Gradio Interface
# -----------------------------
def create_interface():
"""Create the enhanced Gradio interface - NO FALLBACKS"""
try:
# Ensure data is available
if oni_data is None or typhoon_data is None or merged_data is None:
raise Exception("Data not properly loaded for interface creation")
# Get safe data statistics
total_storms = len(typhoon_data['SID'].unique()) if 'SID' in typhoon_data.columns else 0
total_records = len(typhoon_data)
available_years = get_available_years(typhoon_data)
year_range_display = f"{available_years[0]} - {available_years[-1]}" if available_years else "Unknown"
with gr.Blocks(title="Enhanced Typhoon Analysis Platform", theme=gr.themes.Soft()) as demo:
gr.Markdown("# πͺοΈ Enhanced Typhoon Analysis Platform")
gr.Markdown("**Advanced ML clustering, route predictions, and comprehensive tropical cyclone analysis including Tropical Depressions**")
with gr.Tab("π Overview"):
overview_text = f"""
## Welcome to the Enhanced Typhoon Analysis Dashboard
This dashboard provides comprehensive analysis of typhoon data in relation to ENSO phases with advanced machine learning capabilities.
### π Enhanced Features:
- **Advanced ML Clustering**: UMAP/t-SNE storm pattern analysis with separate visualizations
- **Predictive Routing**: Advanced storm track and intensity forecasting with uncertainty quantification
- **Complete TD Support**: Now includes Tropical Depressions (< 34 kt)
- **Taiwan Standard**: Full support for Taiwan meteorological classification system
- **2025 Data Ready**: Real-time compatibility with current year data
- **Enhanced Animations**: High-quality storm track visualizations with both standards
- **NO FALLBACK DATA**: All data comes from real IBTrACS sources
### π Data Status:
- **ONI Data**: {len(oni_data) if oni_data is not None else 0} years loaded
- **Typhoon Data**: {total_records:,} records loaded
- **Merged Data**: {len(merged_data):,} typhoons with analysis data
- **Available Years**: {year_range_display}
- **Unique Storms**: {total_storms:,}
### π§ Technical Capabilities:
- **UMAP Clustering**: {"β
Available" if UMAP_AVAILABLE else "β οΈ Limited to t-SNE/PCA"}
- **AI Predictions**: {"π§ Deep Learning" if CNN_AVAILABLE else "π¬ Physics-based"}
- **Enhanced Categorization**: Tropical Depression to Super Typhoon
- **Platform**: Optimized for real-time analysis
- **Data Source**: Live IBTrACS database (no synthetic data)
### π Research Applications:
- Climate change impact studies
- Seasonal forecasting research
- Storm pattern classification
- ENSO-typhoon relationship analysis
- Intensity prediction model development
"""
gr.Markdown(overview_text)
with gr.Tab("π¬ Advanced ML Clustering"):
gr.Markdown("## π― Storm Pattern Analysis with Separate Visualizations")
gr.Markdown("**Four separate plots: Clustering, Routes, Pressure Evolution, and Wind Evolution**")
with gr.Row():
with gr.Column(scale=2):
reduction_method = gr.Dropdown(
choices=['UMAP', 't-SNE', 'PCA'],
value='UMAP' if UMAP_AVAILABLE else 't-SNE',
label="π Dimensionality Reduction Method",
info="UMAP provides better global structure preservation"
)
with gr.Column(scale=1):
analyze_clusters_btn = gr.Button("π Generate All Cluster Analyses", variant="primary", size="lg")
with gr.Row():
with gr.Column():
cluster_plot = gr.Plot(label="π Storm Clustering Analysis")
with gr.Column():
routes_plot = gr.Plot(label="πΊοΈ Clustered Storm Routes")
with gr.Row():
with gr.Column():
pressure_plot = gr.Plot(label="π‘οΈ Pressure Evolution by Cluster")
with gr.Column():
wind_plot = gr.Plot(label="π¨ Wind Speed Evolution by Cluster")
with gr.Row():
cluster_stats = gr.Textbox(label="π Detailed Cluster Statistics", lines=15, max_lines=20)
def run_separate_clustering_analysis(method):
try:
storm_features = extract_storm_features(typhoon_data)
if storm_features is None:
raise Exception("Could not extract storm features from data")
fig_cluster, fig_routes, fig_pressure, fig_wind, stats = create_separate_clustering_plots(
storm_features, typhoon_data, method.lower()
)
return fig_cluster, fig_routes, fig_pressure, fig_wind, stats
except Exception as e:
import traceback
error_details = traceback.format_exc()
error_msg = f"Clustering analysis failed: {str(e)}\n\nDetails:\n{error_details}"
logging.error(error_msg)
return None, None, None, None, error_msg
analyze_clusters_btn.click(
fn=run_separate_clustering_analysis,
inputs=[reduction_method],
outputs=[cluster_plot, routes_plot, pressure_plot, wind_plot, cluster_stats]
)
with gr.Tab("π Realistic Storm Genesis & Prediction"):
gr.Markdown("## π Realistic Typhoon Development from Genesis")
if CNN_AVAILABLE:
gr.Markdown("π§ **Deep Learning models available** - TensorFlow loaded successfully")
method_description = "Hybrid CNN-Physics genesis modeling with realistic development cycles"
else:
gr.Markdown("π¬ **Physics-based models available** - Using climatological relationships")
method_description = "Advanced physics-based genesis modeling with environmental coupling"
gr.Markdown(f"**Current Method**: {method_description}")
gr.Markdown("**π Realistic Genesis**: Select from climatologically accurate development regions")
gr.Markdown("**π TD Starting Point**: Storms begin at realistic Tropical Depression intensities (25-35 kt)")
gr.Markdown("**π¬ Animation Support**: Watch storm development unfold over time")
with gr.Row():
with gr.Column(scale=2):
gr.Markdown("### π Genesis Configuration")
genesis_options = list(get_realistic_genesis_locations().keys())
genesis_region = gr.Dropdown(
choices=genesis_options,
value="Western Pacific Main Development Region",
label="Typhoon Genesis Region",
info="Select realistic development region based on climatology"
)
def update_genesis_info(region):
locations = get_realistic_genesis_locations()
if region in locations:
info = locations[region]
return f"π Location: {info['lat']:.1f}Β°N, {info['lon']:.1f}Β°E\nπ {info['description']}"
return "Select a genesis region"
genesis_info_display = gr.Textbox(
label="Selected Region Info",
lines=2,
interactive=False,
value=update_genesis_info("Western Pacific Main Development Region")
)
genesis_region.change(
fn=update_genesis_info,
inputs=[genesis_region],
outputs=[genesis_info_display]
)
with gr.Row():
pred_month = gr.Slider(1, 12, label="Month", value=9, info="Peak season: Jul-Oct")
pred_oni = gr.Number(label="ONI Value", value=0.0, info="ENSO index (-3 to 3)")
with gr.Row():
forecast_hours = gr.Number(
label="Forecast Length (hours)",
value=72,
minimum=20,
maximum=1000,
step=6,
info="Extended forecasting: 20-1000 hours"
)
advanced_physics = gr.Checkbox(
label="Advanced Physics",
value=True,
info="Enhanced environmental modeling"
)
with gr.Row():
show_uncertainty = gr.Checkbox(label="Show Uncertainty Cone", value=True)
enable_animation = gr.Checkbox(
label="Enable Animation",
value=True,
info="Animated storm development vs static view"
)
with gr.Column(scale=1):
gr.Markdown("### βοΈ Prediction Controls")
predict_btn = gr.Button("π Generate Realistic Storm Forecast", variant="primary", size="lg")
gr.Markdown("### π Genesis Conditions")
current_intensity = gr.Number(label="Genesis Intensity (kt)", interactive=False)
current_category = gr.Textbox(label="Initial Category", interactive=False)
model_confidence = gr.Textbox(label="Model Info", interactive=False)
with gr.Row():
route_plot = gr.Plot(label="πΊοΈ Advanced Route & Intensity Forecast")
with gr.Row():
forecast_details = gr.Textbox(label="π Detailed Forecast Summary", lines=20, max_lines=25)
def run_realistic_prediction(region, month, oni, hours, advanced_phys, uncertainty, animation):
try:
results = predict_storm_route_and_intensity_realistic(
region, month, oni,
forecast_hours=hours,
use_advanced_physics=advanced_phys
)
current = results['current_prediction']
intensity = current['intensity_kt']
category = current['category']
genesis_info = results.get('genesis_info', {})
fig, forecast_text = create_animated_route_visualization(
results, uncertainty, animation
)
model_info = f"{results['model_info']}\nGenesis: {genesis_info.get('description', 'Unknown')}"
return (
intensity,
category,
model_info,
fig,
forecast_text
)
except Exception as e:
error_msg = f"Realistic prediction failed: {str(e)}"
logging.error(error_msg)
import traceback
traceback.print_exc()
raise gr.Error(error_msg)
predict_btn.click(
fn=run_realistic_prediction,
inputs=[genesis_region, pred_month, pred_oni, forecast_hours, advanced_physics, show_uncertainty, enable_animation],
outputs=[current_intensity, current_category, model_confidence, route_plot, forecast_details]
)
with gr.Tab("πΊοΈ Track Visualization"):
with gr.Row():
start_year = gr.Number(label="Start Year", value=2020)
start_month = gr.Dropdown(label="Start Month", choices=list(range(1, 13)), value=1)
end_year = gr.Number(label="End Year", value=2025)
end_month = gr.Dropdown(label="End Month", choices=list(range(1, 13)), value=6)
enso_phase = gr.Dropdown(label="ENSO Phase", choices=['all', 'El Nino', 'La Nina', 'Neutral'], value='all')
typhoon_search = gr.Textbox(label="Typhoon Search")
analyze_btn = gr.Button("Generate Tracks")
tracks_plot = gr.Plot()
typhoon_count = gr.Textbox(label="Number of Typhoons Displayed")
analyze_btn.click(
fn=get_full_tracks,
inputs=[start_year, start_month, end_year, end_month, enso_phase, typhoon_search],
outputs=[tracks_plot, typhoon_count]
)
with gr.Tab("π¨ Wind Analysis"):
with gr.Row():
wind_start_year = gr.Number(label="Start Year", value=2020)
wind_start_month = gr.Dropdown(label="Start Month", choices=list(range(1, 13)), value=1)
wind_end_year = gr.Number(label="End Year", value=2024)
wind_end_month = gr.Dropdown(label="End Month", choices=list(range(1, 13)), value=6)
wind_enso_phase = gr.Dropdown(label="ENSO Phase", choices=['all', 'El Nino', 'La Nina', 'Neutral'], value='all')
wind_typhoon_search = gr.Textbox(label="Typhoon Search")
wind_analyze_btn = gr.Button("Generate Wind Analysis")
wind_scatter = gr.Plot()
wind_regression_results = gr.Textbox(label="Wind Regression Results")
wind_analyze_btn.click(
fn=get_wind_analysis,
inputs=[wind_start_year, wind_start_month, wind_end_year, wind_end_month, wind_enso_phase, wind_typhoon_search],
outputs=[wind_scatter, wind_regression_results]
)
with gr.Tab("π‘οΈ Pressure Analysis"):
with gr.Row():
pressure_start_year = gr.Number(label="Start Year", value=2020)
pressure_start_month = gr.Dropdown(label="Start Month", choices=list(range(1, 13)), value=1)
pressure_end_year = gr.Number(label="End Year", value=2024)
pressure_end_month = gr.Dropdown(label="End Month", choices=list(range(1, 13)), value=6)
pressure_enso_phase = gr.Dropdown(label="ENSO Phase", choices=['all', 'El Nino', 'La Nina', 'Neutral'], value='all')
pressure_typhoon_search = gr.Textbox(label="Typhoon Search")
pressure_analyze_btn = gr.Button("Generate Pressure Analysis")
pressure_scatter = gr.Plot()
pressure_regression_results = gr.Textbox(label="Pressure Regression Results")
pressure_analyze_btn.click(
fn=get_pressure_analysis,
inputs=[pressure_start_year, pressure_start_month, pressure_end_year, pressure_end_month, pressure_enso_phase, pressure_typhoon_search],
outputs=[pressure_scatter, pressure_regression_results]
)
with gr.Tab("π Longitude Analysis"):
with gr.Row():
lon_start_year = gr.Number(label="Start Year", value=2020)
lon_start_month = gr.Dropdown(label="Start Month", choices=list(range(1, 13)), value=1)
lon_end_year = gr.Number(label="End Year", value=2020)
lon_end_month = gr.Dropdown(label="End Month", choices=list(range(1, 13)), value=6)
lon_enso_phase = gr.Dropdown(label="ENSO Phase", choices=['all', 'El Nino', 'La Nina', 'Neutral'], value='all')
lon_typhoon_search = gr.Textbox(label="Typhoon Search (Optional)")
lon_analyze_btn = gr.Button("Generate Longitude Analysis")
regression_plot = gr.Plot()
slopes_text = gr.Textbox(label="Regression Slopes")
lon_regression_results = gr.Textbox(label="Longitude Regression Results")
lon_analyze_btn.click(
fn=get_longitude_analysis,
inputs=[lon_start_year, lon_start_month, lon_end_year, lon_end_month, lon_enso_phase, lon_typhoon_search],
outputs=[regression_plot, slopes_text, lon_regression_results]
)
with gr.Tab("π¬ Enhanced Track Animation"):
gr.Markdown("## π₯ High-Quality Storm Track Visualization - NO FALLBACK DATA")
gr.Markdown("**ALL animations use real IBTrACS data - never synthetic or fallback data**")
with gr.Row():
year_dropdown = gr.Dropdown(
label="Year",
choices=available_years,
value=available_years[-1] if available_years else None
)
basin_dropdown = gr.Dropdown(
label="Basin",
choices=["All Basins", "WP - Western Pacific", "EP - Eastern Pacific", "NA - North Atlantic"],
value="All Basins"
)
with gr.Row():
typhoon_dropdown = gr.Dropdown(label="Storm Selection (All Categories Including TD)")
standard_dropdown = gr.Dropdown(
label="π Classification Standard",
choices=['atlantic', 'taiwan'],
value='atlantic',
info="Atlantic: International standard | Taiwan: Local meteorological standard"
)
generate_video_btn = gr.Button("π¬ Generate Enhanced Animation", variant="primary")
video_output = gr.Video(label="Storm Track Animation")
# Update storm options when year or basin changes
def safe_update_typhoon_options(year, basin):
try:
return update_typhoon_options_enhanced(year, basin)
except Exception as e:
error_msg = f"Failed to load storms: {str(e)}"
logging.error(error_msg)
return gr.update(choices=[error_msg], value=None)
for input_comp in [year_dropdown, basin_dropdown]:
input_comp.change(
fn=safe_update_typhoon_options,
inputs=[year_dropdown, basin_dropdown],
outputs=[typhoon_dropdown]
)
def safe_generate_video(year, typhoon_selection, standard):
try:
if not typhoon_selection:
raise gr.Error("Please select a typhoon first")
return generate_enhanced_track_video_fixed(year, typhoon_selection, standard)
except Exception as e:
error_msg = f"Video generation failed: {str(e)}"
logging.error(error_msg)
raise gr.Error(error_msg)
generate_video_btn.click(
fn=safe_generate_video,
inputs=[year_dropdown, typhoon_dropdown, standard_dropdown],
outputs=[video_output]
)
animation_info_text = """
### π¬ FIXED Animation Features - NO FALLBACK DATA:
- **Real Data Only**: All animations use actual IBTrACS typhoon track data
- **Dual Standards**: Full support for both Atlantic and Taiwan classification systems
- **Full TD Support**: Now displays Tropical Depressions (< 34 kt) in gray
- **2025 Compatibility**: Complete support for current year data
- **Enhanced Maps**: Better cartographic projections with terrain features
- **Smart Scaling**: Storm symbols scale dynamically with intensity
- **Real-time Info**: Live position, time, and meteorological data display
- **Professional Styling**: Publication-quality animations with proper legends
- **FIXED Animation**: Tracks now display properly with cartopy integration
- **Error Handling**: Robust error handling prevents fallback to synthetic data
### π Taiwan Standard Features (CORRECTED):
- **CMA 2006 Standards**: Uses official China Meteorological Administration classification
- **Six Categories**: TD β TS β STS β TY β STY β Super TY
- **Correct Thresholds**: Based on official meteorological standards
- **m/s Display**: Shows both knots and meters per second
- **CWB Compatible**: Matches Central Weather Bureau classifications
"""
gr.Markdown(animation_info_text)
with gr.Tab("π Data Statistics & Insights"):
gr.Markdown("## π Comprehensive Dataset Analysis - REAL DATA ONLY")
try:
if len(typhoon_data) > 0:
storm_cats = typhoon_data.groupby('SID')['USA_WIND'].max().apply(categorize_typhoon_enhanced)
cat_counts = storm_cats.value_counts()
fig_dist = px.bar(
x=cat_counts.index,
y=cat_counts.values,
title="Storm Intensity Distribution (Including Tropical Depressions)",
labels={'x': 'Category', 'y': 'Number of Storms'},
color=cat_counts.index,
color_discrete_map=enhanced_color_map
)
if 'ISO_TIME' in typhoon_data.columns:
seasonal_data = typhoon_data.copy()
seasonal_data['Month'] = seasonal_data['ISO_TIME'].dt.month
monthly_counts = seasonal_data.groupby(['Month', 'SID']).size().groupby('Month').size()
fig_seasonal = px.bar(
x=monthly_counts.index,
y=monthly_counts.values,
title="Seasonal Storm Distribution",
labels={'x': 'Month', 'y': 'Number of Storms'},
color=monthly_counts.values,
color_continuous_scale='Viridis'
)
else:
fig_seasonal = None
if 'SID' in typhoon_data.columns:
basin_data = typhoon_data['SID'].str[:2].value_counts()
fig_basin = px.pie(
values=basin_data.values,
names=basin_data.index,
title="Distribution by Basin"
)
else:
fig_basin = None
with gr.Row():
gr.Plot(value=fig_dist)
if fig_seasonal:
with gr.Row():
gr.Plot(value=fig_seasonal)
if fig_basin:
with gr.Row():
gr.Plot(value=fig_basin)
except Exception as e:
gr.Markdown(f"Visualization error: {str(e)}")
# Enhanced statistics
if 'SEASON' in typhoon_data.columns:
try:
min_year = int(typhoon_data['SEASON'].min())
max_year = int(typhoon_data['SEASON'].max())
year_range = f"{min_year}-{max_year}"
years_covered = typhoon_data['SEASON'].nunique()
except (ValueError, TypeError):
year_range = "Unknown"
years_covered = 0
else:
year_range = "Unknown"
years_covered = 0
if 'SID' in typhoon_data.columns:
try:
basins_available = ', '.join(sorted(typhoon_data['SID'].str[:2].unique()))
avg_storms_per_year = total_storms / max(years_covered, 1)
except Exception:
basins_available = "Unknown"
avg_storms_per_year = 0
else:
basins_available = "Unknown"
avg_storms_per_year = 0
try:
if 'USA_WIND' in typhoon_data.columns:
td_storms = len(typhoon_data[typhoon_data['USA_WIND'] < 34]['SID'].unique())
ts_storms = len(typhoon_data[(typhoon_data['USA_WIND'] >= 34) & (typhoon_data['USA_WIND'] < 64)]['SID'].unique())
typhoon_storms = len(typhoon_data[typhoon_data['USA_WIND'] >= 64]['SID'].unique())
td_percentage = (td_storms / max(total_storms, 1)) * 100
else:
td_storms = ts_storms = typhoon_storms = 0
td_percentage = 0
except Exception as e:
td_storms = ts_storms = typhoon_storms = 0
td_percentage = 0
stats_text = f"""
### π REAL Dataset Summary - NO SYNTHETIC DATA:
- **Total Unique Storms**: {total_storms:,}
- **Total Track Records**: {total_records:,}
- **Year Range**: {year_range} ({years_covered} years)
- **Basins Available**: {basins_available}
- **Average Storms/Year**: {avg_storms_per_year:.1f}
- **Data Source**: IBTrACS v04r01 (Real observations only)
### πͺοΈ Storm Category Breakdown:
- **Tropical Depressions**: {td_storms:,} storms ({td_percentage:.1f}%)
- **Tropical Storms**: {ts_storms:,} storms
- **Typhoons (C1-C5)**: {typhoon_storms:,} storms
### π Platform Capabilities:
- **Complete TD Analysis** - First platform to include comprehensive TD tracking
- **Dual Classification Systems** - Both Atlantic and Taiwan standards supported
- **Advanced ML Clustering** - DBSCAN pattern recognition with separate visualizations
- **Real-time Predictions** - Physics-based and optional CNN intensity forecasting
- **2025 Data Ready** - Full compatibility with current season data
- **Enhanced Animations** - Professional-quality storm track videos
- **Multi-basin Analysis** - Comprehensive Pacific and Atlantic coverage
- **NO FALLBACK DATA** - All analysis uses real meteorological observations
### π¬ Research Applications:
- Climate change impact studies
- Seasonal forecasting research
- Storm pattern classification
- ENSO-typhoon relationship analysis
- Intensity prediction model development
- Cross-regional classification comparisons
"""
gr.Markdown(stats_text)
return demo
except Exception as e:
logging.error(f"CRITICAL ERROR creating Gradio interface: {e}")
import traceback
traceback.print_exc()
raise Exception(f"Interface creation failed: {e}")
# -----------------------------
# MAIN EXECUTION
# -----------------------------
if __name__ == "__main__":
try:
# Initialize data first - CRITICAL
logging.info("Initializing data...")
initialize_data()
# Verify data loaded correctly
if typhoon_data is None or typhoon_data.empty:
raise Exception("CRITICAL: No typhoon data available for interface")
logging.info("Creating interface...")
demo = create_interface()
logging.info("Launching application...")
demo.launch(share=True)
except Exception as e:
logging.error(f"CRITICAL APPLICATION ERROR: {e}")
import traceback
traceback.print_exc()
print(f"\n{'='*60}")
print("CRITICAL ERROR: Application failed to start")
print(f"Error: {e}")
print("Check logs for detailed error information")
print(f"{'='*60}")
raise