diff --git "a/app.py" "b/app.py"
--- "a/app.py"
+++ "b/app.py"
@@ -34,12 +34,16 @@ from sklearn.decomposition import PCA
from sklearn.ensemble import RandomForestRegressor
from sklearn.model_selection import train_test_split
from sklearn.metrics import mean_absolute_error, r2_score
-from scipy.interpolate import interp1d, RBFInterpolator
+from scipy.interpolate import interp1d, RBFInterpolator, griddata
+from scipy.ndimage import gaussian_filter
import statsmodels.api as sm
import requests
import tempfile
import shutil
import xarray as xr
+import urllib.request
+from urllib.error import URLError
+import ssl
# NEW: Advanced ML imports
try:
@@ -72,6 +76,9 @@ except ImportError:
import tropycal.tracks as tracks
+# Suppress SSL warnings for oceanic data downloads
+ssl._create_default_https_context = ssl._create_unverified_context
+
# -----------------------------
# Configuration and Setup
# -----------------------------
@@ -216,6 +223,259 @@ taiwan_standard_fixed = {
'Tropical Depression': {'wind_speed_ms': 0, 'wind_speed_kt': 0, 'color': 'Gray', 'hex': '#808080'}
}
+# -----------------------------
+# ENHANCED: Oceanic Data Integration
+# -----------------------------
+
+class OceanicDataManager:
+ """Manages real-time oceanic data for enhanced typhoon prediction"""
+
+ def __init__(self):
+ self.sst_base_url = "https://www.ncei.noaa.gov/erddap/griddap/NOAA_OISST_V2.nc"
+ self.slp_base_url = "https://psl.noaa.gov/thredds/dodsC/Datasets/ncep.reanalysis.dailyavgs/surface/slp.nc"
+ self.cache_dir = os.path.join(DATA_PATH, 'oceanic_cache')
+ self.create_cache_directory()
+
+ def create_cache_directory(self):
+ """Create cache directory for oceanic data"""
+ try:
+ os.makedirs(self.cache_dir, exist_ok=True)
+ except Exception as e:
+ logging.warning(f"Could not create cache directory: {e}")
+ self.cache_dir = tempfile.mkdtemp()
+
+ def get_sst_data(self, lat_min, lat_max, lon_min, lon_max, date_start, date_end=None):
+ """
+ Fetch Sea Surface Temperature data from NOAA OISST v2
+
+ Parameters:
+ lat_min, lat_max: Latitude bounds
+ lon_min, lon_max: Longitude bounds
+ date_start: Start date (datetime or string)
+ date_end: End date (datetime or string, optional)
+ """
+ try:
+ if date_end is None:
+ date_end = date_start
+
+ # Convert dates to strings if needed
+ if isinstance(date_start, datetime):
+ date_start_str = date_start.strftime('%Y-%m-%d')
+ else:
+ date_start_str = str(date_start)
+
+ if isinstance(date_end, datetime):
+ date_end_str = date_end.strftime('%Y-%m-%d')
+ else:
+ date_end_str = str(date_end)
+
+ # Construct ERDDAP URL with parameters
+ url_params = (
+ f"?sst[({date_start_str}):1:({date_end_str})]"
+ f"[({lat_min}):1:({lat_max})]"
+ f"[({lon_min}):1:({lon_max})]"
+ )
+ full_url = self.sst_base_url + url_params
+
+ logging.info(f"Fetching SST data from: {full_url}")
+
+ # Use xarray to open the remote dataset
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore")
+ ds = xr.open_dataset(full_url)
+
+ # Extract SST data
+ sst_data = ds['sst'].values
+ lats = ds['latitude'].values
+ lons = ds['longitude'].values
+ times = ds['time'].values
+
+ ds.close()
+
+ return {
+ 'sst': sst_data,
+ 'latitude': lats,
+ 'longitude': lons,
+ 'time': times,
+ 'success': True
+ }
+
+ except Exception as e:
+ logging.error(f"Error fetching SST data: {e}")
+ return self._get_fallback_sst_data(lat_min, lat_max, lon_min, lon_max)
+
+ def get_slp_data(self, lat_min, lat_max, lon_min, lon_max, date_start, date_end=None):
+ """
+ Fetch Sea Level Pressure data from NCEP/NCAR Reanalysis
+
+ Parameters similar to get_sst_data
+ """
+ try:
+ if date_end is None:
+ date_end = date_start
+
+ # Convert dates for OPeNDAP access
+ if isinstance(date_start, datetime):
+ # NCEP uses different time indexing, may need adjustment
+ date_start_str = date_start.strftime('%Y-%m-%d')
+ else:
+ date_start_str = str(date_start)
+
+ if isinstance(date_end, datetime):
+ date_end_str = date_end.strftime('%Y-%m-%d')
+ else:
+ date_end_str = str(date_end)
+
+ logging.info(f"Fetching SLP data for {date_start_str} to {date_end_str}")
+
+ # Use xarray to open OPeNDAP dataset
+ with warnings.catch_warnings():
+ warnings.simplefilter("ignore")
+
+ # Open the full dataset (this might be large, so we'll subset)
+ ds = xr.open_dataset(self.slp_base_url)
+
+ # Subset by time and location
+ # Note: Coordinate names might vary, adjust as needed
+ lat_coord = 'lat' if 'lat' in ds.dims else 'latitude'
+ lon_coord = 'lon' if 'lon' in ds.dims else 'longitude'
+
+ # Subset the data
+ subset = ds.sel(
+ time=slice(date_start_str, date_end_str),
+ **{lat_coord: slice(lat_min, lat_max),
+ lon_coord: slice(lon_min, lon_max)}
+ )
+
+ # Extract SLP data
+ slp_data = subset['slp'].values
+ lats = subset[lat_coord].values
+ lons = subset[lon_coord].values
+ times = subset['time'].values
+
+ ds.close()
+
+ return {
+ 'slp': slp_data,
+ 'latitude': lats,
+ 'longitude': lons,
+ 'time': times,
+ 'success': True
+ }
+
+ except Exception as e:
+ logging.error(f"Error fetching SLP data: {e}")
+ return self._get_fallback_slp_data(lat_min, lat_max, lon_min, lon_max)
+
+ def _get_fallback_sst_data(self, lat_min, lat_max, lon_min, lon_max):
+ """Generate realistic fallback SST data based on climatology"""
+ # Create a reasonable grid
+ lats = np.linspace(lat_min, lat_max, 20)
+ lons = np.linspace(lon_min, lon_max, 20)
+
+ # Generate climatological SST values for Western Pacific
+ sst_values = np.zeros((1, len(lats), len(lons)))
+
+ for i, lat in enumerate(lats):
+ for j, lon in enumerate(lons):
+ # Climatological SST estimation for Western Pacific
+ if lat < 10: # Tropical
+ base_sst = 29.0
+ elif lat < 20: # Subtropical
+ base_sst = 28.0 - (lat - 10) * 0.3
+ elif lat < 30: # Temperate
+ base_sst = 25.0 - (lat - 20) * 0.5
+ else: # Cool waters
+ base_sst = 20.0 - (lat - 30) * 0.3
+
+ # Add some realistic variation
+ sst_values[0, i, j] = base_sst + np.random.normal(0, 0.5)
+
+ return {
+ 'sst': sst_values,
+ 'latitude': lats,
+ 'longitude': lons,
+ 'time': [datetime.now()],
+ 'success': False,
+ 'note': 'Using climatological fallback data'
+ }
+
+ def _get_fallback_slp_data(self, lat_min, lat_max, lon_min, lon_max):
+ """Generate realistic fallback SLP data"""
+ lats = np.linspace(lat_min, lat_max, 20)
+ lons = np.linspace(lon_min, lon_max, 20)
+
+ slp_values = np.zeros((1, len(lats), len(lons)))
+
+ for i, lat in enumerate(lats):
+ for j, lon in enumerate(lons):
+ # Climatological SLP estimation
+ if lat < 30: # Subtropical high influence
+ base_slp = 1013 + 3 * np.cos(np.radians(lat * 6))
+ else: # Mid-latitude
+ base_slp = 1010 - (lat - 30) * 0.2
+
+ slp_values[0, i, j] = base_slp + np.random.normal(0, 2)
+
+ return {
+ 'slp': slp_values,
+ 'latitude': lats,
+ 'longitude': lons,
+ 'time': [datetime.now()],
+ 'success': False,
+ 'note': 'Using climatological fallback data'
+ }
+
+ def interpolate_data_to_point(self, data_dict, target_lat, target_lon, variable='sst'):
+ """Interpolate gridded data to a specific point"""
+ try:
+ data = data_dict[variable]
+ lats = data_dict['latitude']
+ lons = data_dict['longitude']
+
+ # Take most recent time if multiple times available
+ if len(data.shape) == 3: # time, lat, lon
+ data_2d = data[-1, :, :]
+ else: # lat, lon
+ data_2d = data
+
+ # Create coordinate grids
+ lon_grid, lat_grid = np.meshgrid(lons, lats)
+
+ # Flatten for interpolation
+ points = np.column_stack((lat_grid.flatten(), lon_grid.flatten()))
+ values = data_2d.flatten()
+
+ # Remove NaN values
+ valid_mask = ~np.isnan(values)
+ points = points[valid_mask]
+ values = values[valid_mask]
+
+ if len(values) == 0:
+ return np.nan
+
+ # Interpolate to target point
+ interpolated_value = griddata(
+ points, values, (target_lat, target_lon),
+ method='linear', fill_value=np.nan
+ )
+
+ # If linear interpolation fails, try nearest neighbor
+ if np.isnan(interpolated_value):
+ interpolated_value = griddata(
+ points, values, (target_lat, target_lon),
+ method='nearest'
+ )
+
+ return interpolated_value
+
+ except Exception as e:
+ logging.error(f"Error interpolating {variable} data: {e}")
+ return np.nan
+
+# Global oceanic data manager
+oceanic_manager = None
+
# -----------------------------
# Utility Functions for HF Spaces
# -----------------------------
@@ -816,2212 +1076,2734 @@ def categorize_typhoon_by_standard_fixed(wind_speed, standard='atlantic'):
return 'Tropical Depression', '#808080'
# -----------------------------
-# FIXED: ADVANCED ML FEATURES WITH ROBUST ERROR HANDLING
+# ENHANCED: Historical Environmental Analysis
# -----------------------------
-def extract_storm_features(typhoon_data):
- """Extract comprehensive features for clustering analysis - FIXED VERSION"""
+def analyze_historical_environment(typhoon_data, oni_data):
+ """Analyze historical environmental conditions for better predictions"""
try:
- if typhoon_data is None or typhoon_data.empty:
- logging.error("No typhoon data provided for feature extraction")
- return None
+ logging.info("Analyzing historical environmental patterns...")
+
+ # Get historical storm data with environmental conditions
+ historical_analysis = {
+ 'sst_patterns': {},
+ 'slp_patterns': {},
+ 'oni_relationships': {},
+ 'seasonal_variations': {},
+ 'intensity_predictors': {}
+ }
- # Basic features - ensure columns exist
- basic_features = []
- for sid in typhoon_data['SID'].unique():
- storm_data = typhoon_data[typhoon_data['SID'] == sid].copy()
-
- if len(storm_data) == 0:
- continue
-
- # Initialize feature dict with safe defaults
- features = {'SID': sid}
+ # Analyze by storm intensity categories
+ for category in ['Tropical Depression', 'Tropical Storm', 'C1 Typhoon',
+ 'C2 Typhoon', 'C3 Strong Typhoon', 'C4 Very Strong Typhoon', 'C5 Super Typhoon']:
- # Wind statistics
- if 'USA_WIND' in storm_data.columns:
- wind_values = pd.to_numeric(storm_data['USA_WIND'], errors='coerce').dropna()
- if len(wind_values) > 0:
- features['USA_WIND_max'] = wind_values.max()
- features['USA_WIND_mean'] = wind_values.mean()
- features['USA_WIND_std'] = wind_values.std() if len(wind_values) > 1 else 0
- else:
- features['USA_WIND_max'] = 30
- features['USA_WIND_mean'] = 30
- features['USA_WIND_std'] = 0
- else:
- features['USA_WIND_max'] = 30
- features['USA_WIND_mean'] = 30
- features['USA_WIND_std'] = 0
+ # Filter storms by category
+ if 'USA_WIND' in typhoon_data.columns:
+ category_storms = typhoon_data[
+ typhoon_data['USA_WIND'].apply(categorize_typhoon_enhanced) == category
+ ]
- # Pressure statistics
- if 'USA_PRES' in storm_data.columns:
- pres_values = pd.to_numeric(storm_data['USA_PRES'], errors='coerce').dropna()
- if len(pres_values) > 0:
- features['USA_PRES_min'] = pres_values.min()
- features['USA_PRES_mean'] = pres_values.mean()
- features['USA_PRES_std'] = pres_values.std() if len(pres_values) > 1 else 0
- else:
- features['USA_PRES_min'] = 1000
- features['USA_PRES_mean'] = 1000
- features['USA_PRES_std'] = 0
- else:
- features['USA_PRES_min'] = 1000
- features['USA_PRES_mean'] = 1000
- features['USA_PRES_std'] = 0
-
- # Location statistics
- if 'LAT' in storm_data.columns and 'LON' in storm_data.columns:
- lat_values = pd.to_numeric(storm_data['LAT'], errors='coerce').dropna()
- lon_values = pd.to_numeric(storm_data['LON'], errors='coerce').dropna()
+ if len(category_storms) > 0:
+ historical_analysis['intensity_predictors'][category] = {
+ 'avg_genesis_lat': category_storms['LAT'].mean(),
+ 'avg_genesis_lon': category_storms['LON'].mean(),
+ 'count': len(category_storms['SID'].unique()),
+ 'seasonal_distribution': category_storms['ISO_TIME'].dt.month.value_counts().to_dict() if 'ISO_TIME' in category_storms.columns else {}
+ }
+
+ # Analyze ENSO relationships
+ if len(oni_data) > 0:
+ for phase in ['El Nino', 'La Nina', 'Neutral']:
+ # This would be enhanced with actual storm-ENSO matching
+ historical_analysis['oni_relationships'][phase] = {
+ 'storm_frequency_modifier': 1.0, # Will be calculated from real data
+ 'intensity_modifier': 0.0,
+ 'track_shift': {'lat': 0.0, 'lon': 0.0}
+ }
+
+ logging.info("Historical environmental analysis complete")
+ return historical_analysis
+
+ except Exception as e:
+ logging.error(f"Error in historical environmental analysis: {e}")
+ return {}
+
+# -----------------------------
+# ENHANCED: Environmental Intensity Prediction
+# -----------------------------
+
+def calculate_environmental_intensity_potential(lat, lon, month, oni_value, sst_data=None, slp_data=None):
+ """
+ Calculate environmental intensity potential based on oceanic conditions
+
+ This function integrates multiple environmental factors to estimate
+ the maximum potential intensity a storm could achieve in given conditions.
+ """
+ try:
+ # Base intensity potential from climatology
+ base_potential = 45 # kt - baseline for tropical storm formation
+
+ # SST contribution (most important factor)
+ if sst_data and sst_data['success']:
+ try:
+ sst_value = oceanic_manager.interpolate_data_to_point(
+ sst_data, lat, lon, 'sst'
+ )
- if len(lat_values) > 0 and len(lon_values) > 0:
- features['LAT_mean'] = lat_values.mean()
- features['LAT_std'] = lat_values.std() if len(lat_values) > 1 else 0
- features['LAT_max'] = lat_values.max()
- features['LAT_min'] = lat_values.min()
- features['LON_mean'] = lon_values.mean()
- features['LON_std'] = lon_values.std() if len(lon_values) > 1 else 0
- features['LON_max'] = lon_values.max()
- features['LON_min'] = lon_values.min()
-
- # Genesis location (first valid position)
- features['genesis_lat'] = lat_values.iloc[0]
- features['genesis_lon'] = lon_values.iloc[0]
- features['genesis_intensity'] = features['USA_WIND_mean'] # Use mean as fallback
+ if not np.isnan(sst_value):
+ # Convert to Celsius if needed (OISST is in Celsius)
+ sst_celsius = sst_value if sst_value < 50 else sst_value - 273.15
- # Track characteristics
- features['lat_range'] = lat_values.max() - lat_values.min()
- features['lon_range'] = lon_values.max() - lon_values.min()
+ # Enhanced SST-intensity relationship based on research
+ if sst_celsius >= 30.0: # Very warm - super typhoon potential
+ sst_contribution = 80 + (sst_celsius - 30) * 10
+ elif sst_celsius >= 28.5: # Warm - typhoon potential
+ sst_contribution = 40 + (sst_celsius - 28.5) * 26.7
+ elif sst_celsius >= 26.5: # Marginal - tropical storm potential
+ sst_contribution = 0 + (sst_celsius - 26.5) * 20
+ else: # Too cool for significant development
+ sst_contribution = -30
- # Calculate track distance
- if len(lat_values) > 1:
- distances = []
- for i in range(1, len(lat_values)):
- dlat = lat_values.iloc[i] - lat_values.iloc[i-1]
- dlon = lon_values.iloc[i] - lon_values.iloc[i-1]
- distances.append(np.sqrt(dlat**2 + dlon**2))
- features['total_distance'] = sum(distances)
- features['avg_speed'] = np.mean(distances) if distances else 0
- else:
- features['total_distance'] = 0
- features['avg_speed'] = 0
-
- # Track curvature
- if len(lat_values) > 2:
- bearing_changes = []
- for i in range(1, len(lat_values)-1):
- dlat1 = lat_values.iloc[i] - lat_values.iloc[i-1]
- dlon1 = lon_values.iloc[i] - lon_values.iloc[i-1]
- dlat2 = lat_values.iloc[i+1] - lat_values.iloc[i]
- dlon2 = lon_values.iloc[i+1] - lon_values.iloc[i]
-
- angle1 = np.arctan2(dlat1, dlon1)
- angle2 = np.arctan2(dlat2, dlon2)
- change = abs(angle2 - angle1)
- bearing_changes.append(change)
-
- features['avg_curvature'] = np.mean(bearing_changes) if bearing_changes else 0
- else:
- features['avg_curvature'] = 0
+ base_potential += sst_contribution
+ logging.debug(f"SST: {sst_celsius:.1f}°C, contribution: {sst_contribution:.1f}kt")
else:
- # Default location values
- features.update({
- 'LAT_mean': 20, 'LAT_std': 0, 'LAT_max': 20, 'LAT_min': 20,
- 'LON_mean': 140, 'LON_std': 0, 'LON_max': 140, 'LON_min': 140,
- 'genesis_lat': 20, 'genesis_lon': 140, 'genesis_intensity': 30,
- 'lat_range': 0, 'lon_range': 0, 'total_distance': 0,
- 'avg_speed': 0, 'avg_curvature': 0
- })
- else:
- # Default location values if columns missing
- features.update({
- 'LAT_mean': 20, 'LAT_std': 0, 'LAT_max': 20, 'LAT_min': 20,
- 'LON_mean': 140, 'LON_std': 0, 'LON_max': 140, 'LON_min': 140,
- 'genesis_lat': 20, 'genesis_lon': 140, 'genesis_intensity': 30,
- 'lat_range': 0, 'lon_range': 0, 'total_distance': 0,
- 'avg_speed': 0, 'avg_curvature': 0
- })
-
- # Track length
- features['track_length'] = len(storm_data)
-
- # Add seasonal information
- if 'SEASON' in storm_data.columns:
- features['season'] = storm_data['SEASON'].iloc[0]
- else:
- features['season'] = 2000
+ # Use climatological SST
+ clim_sst = get_climatological_sst(lat, lon, month)
+ base_potential += max(0, (clim_sst - 26.5) * 15)
+
+ except Exception as e:
+ logging.warning(f"Error processing SST data: {e}")
+ clim_sst = get_climatological_sst(lat, lon, month)
+ base_potential += max(0, (clim_sst - 26.5) * 15)
+ else:
+ # Use climatological SST if real data unavailable
+ clim_sst = get_climatological_sst(lat, lon, month)
+ base_potential += max(0, (clim_sst - 26.5) * 15)
+
+ # SLP contribution (atmospheric environment)
+ if slp_data and slp_data['success']:
+ try:
+ slp_value = oceanic_manager.interpolate_data_to_point(
+ slp_data, lat, lon, 'slp'
+ )
- # Add basin information
- if 'BASIN' in storm_data.columns:
- features['basin'] = storm_data['BASIN'].iloc[0]
- elif 'SID' in storm_data.columns:
- features['basin'] = sid[:2] if len(sid) >= 2 else 'WP'
- else:
- features['basin'] = 'WP'
-
- basic_features.append(features)
+ if not np.isnan(slp_value):
+ # Convert from Pa to hPa if needed
+ slp_hpa = slp_value if slp_value > 500 else slp_value / 100
+
+ # Lower pressure = better environment for intensification
+ if slp_hpa < 1008: # Low pressure environment
+ slp_contribution = (1008 - slp_hpa) * 3
+ elif slp_hpa > 1015: # High pressure - suppressed development
+ slp_contribution = (1015 - slp_hpa) * 2
+ else: # Neutral
+ slp_contribution = 0
+
+ base_potential += slp_contribution
+ logging.debug(f"SLP: {slp_hpa:.1f}hPa, contribution: {slp_contribution:.1f}kt")
+
+ except Exception as e:
+ logging.warning(f"Error processing SLP data: {e}")
- if not basic_features:
- logging.error("No valid storm features could be extracted")
- return None
-
- # Convert to DataFrame
- storm_features = pd.DataFrame(basic_features)
+ # ENSO modulation
+ if oni_value > 1.0: # Strong El Niño
+ enso_modifier = -15 # Suppressed development
+ elif oni_value > 0.5: # Moderate El Niño
+ enso_modifier = -8
+ elif oni_value < -1.0: # Strong La Niña
+ enso_modifier = +12 # Enhanced development
+ elif oni_value < -0.5: # Moderate La Niña
+ enso_modifier = +6
+ else: # Neutral
+ enso_modifier = oni_value * 2
- # Ensure all numeric columns are properly typed
- numeric_columns = [col for col in storm_features.columns if col not in ['SID', 'basin']]
- for col in numeric_columns:
- storm_features[col] = pd.to_numeric(storm_features[col], errors='coerce').fillna(0)
+ base_potential += enso_modifier
- logging.info(f"Successfully extracted features for {len(storm_features)} storms")
- logging.info(f"Feature columns: {list(storm_features.columns)}")
+ # Seasonal modulation
+ seasonal_factors = {
+ 1: -12, 2: -10, 3: -8, 4: -5, 5: 0, 6: 5,
+ 7: 12, 8: 15, 9: 18, 10: 12, 11: 5, 12: -8
+ }
+ seasonal_modifier = seasonal_factors.get(month, 0)
+ base_potential += seasonal_modifier
- return storm_features
+ # Latitude effects
+ if lat < 8: # Too close to equator - weak Coriolis
+ lat_modifier = -20
+ elif lat < 12: # Good for development
+ lat_modifier = 5
+ elif lat < 25: # Prime development zone
+ lat_modifier = 10
+ elif lat < 35: # Marginal
+ lat_modifier = -5
+ else: # Too far north
+ lat_modifier = -25
- except Exception as e:
- logging.error(f"Error in extract_storm_features: {e}")
- import traceback
- traceback.print_exc()
- return None
-
-def perform_dimensionality_reduction(storm_features, method='umap', n_components=2):
- """Perform UMAP or t-SNE dimensionality reduction - FIXED VERSION"""
- try:
- if storm_features is None or storm_features.empty:
- raise ValueError("No storm features provided")
-
- # Select numeric features for clustering - FIXED
- feature_cols = []
- for col in storm_features.columns:
- if col not in ['SID', 'basin'] and storm_features[col].dtype in ['float64', 'int64']:
- # Check if column has valid data
- valid_data = storm_features[col].dropna()
- if len(valid_data) > 0 and valid_data.std() > 0: # Only include columns with variance
- feature_cols.append(col)
-
- if len(feature_cols) == 0:
- raise ValueError("No valid numeric features found for clustering")
-
- logging.info(f"Using {len(feature_cols)} features for clustering: {feature_cols}")
-
- X = storm_features[feature_cols].fillna(0)
-
- # Check if we have enough samples
- if len(X) < 2:
- raise ValueError("Need at least 2 storms for clustering")
-
- # Standardize features
- scaler = StandardScaler()
- X_scaled = scaler.fit_transform(X)
-
- # Perform dimensionality reduction
- if method.lower() == 'umap' and UMAP_AVAILABLE and len(X_scaled) >= 4:
- # UMAP parameters optimized for typhoon data - fixed warnings
- n_neighbors = min(15, len(X_scaled) - 1)
- reducer = umap.UMAP(
- n_components=n_components,
- n_neighbors=n_neighbors,
- min_dist=0.1,
- metric='euclidean',
- random_state=42,
- n_jobs=1 # Explicitly set to avoid warning
- )
- elif method.lower() == 'tsne' and len(X_scaled) >= 4:
- # t-SNE parameters
- perplexity = min(30, len(X_scaled) // 4)
- perplexity = max(1, perplexity) # Ensure perplexity is at least 1
- reducer = TSNE(
- n_components=n_components,
- perplexity=perplexity,
- learning_rate=200,
- n_iter=1000,
- random_state=42
- )
- else:
- # Fallback to PCA
- reducer = PCA(n_components=n_components, random_state=42)
+ base_potential += lat_modifier
- # Fit and transform
- embedding = reducer.fit_transform(X_scaled)
+ # Wind shear estimation (simplified)
+ shear_factor = estimate_wind_shear(lat, lon, month, oni_value)
+ base_potential -= shear_factor
- logging.info(f"Dimensionality reduction successful: {X_scaled.shape} -> {embedding.shape}")
+ # Apply realistic bounds
+ environmental_potential = max(25, min(185, base_potential))
- return embedding, feature_cols, scaler
+ return {
+ 'potential_intensity': environmental_potential,
+ 'sst_contribution': sst_contribution if 'sst_contribution' in locals() else 0,
+ 'slp_contribution': slp_contribution if 'slp_contribution' in locals() else 0,
+ 'enso_modifier': enso_modifier,
+ 'seasonal_modifier': seasonal_modifier,
+ 'latitude_modifier': lat_modifier,
+ 'shear_factor': shear_factor
+ }
except Exception as e:
- logging.error(f"Error in perform_dimensionality_reduction: {e}")
- raise
+ logging.error(f"Error calculating environmental potential: {e}")
+ return {
+ 'potential_intensity': 50,
+ 'error': str(e)
+ }
-def cluster_storms_data(embedding, method='dbscan', eps=0.5, min_samples=3):
- """Cluster storms based on their embedding - FIXED NAME VERSION"""
+def get_climatological_sst(lat, lon, month):
+ """Get climatological SST for a location and month"""
+ # Simplified climatological SST model for Western Pacific
+ base_sst = 28.0 # Base warm pool temperature
+
+ # Latitude effect
+ if lat < 5:
+ lat_effect = 0.5 # Warm near equator
+ elif lat < 15:
+ lat_effect = 1.0 # Peak warm pool
+ elif lat < 25:
+ lat_effect = 0.0 - (lat - 15) * 0.3 # Cooling northward
+ else:
+ lat_effect = -3.0 - (lat - 25) * 0.2 # Much cooler
+
+ # Seasonal effect
+ seasonal_cycle = {
+ 1: -1.0, 2: -1.2, 3: -0.8, 4: 0.0, 5: 0.5, 6: 0.8,
+ 7: 1.0, 8: 1.2, 9: 1.0, 10: 0.5, 11: 0.0, 12: -0.5
+ }
+ seasonal_effect = seasonal_cycle.get(month, 0)
+
+ return base_sst + lat_effect + seasonal_effect
+
+def estimate_wind_shear(lat, lon, month, oni_value):
+ """Estimate wind shear based on location, season, and ENSO state"""
+ # Base shear climatology
+ if 5 <= lat <= 20 and 120 <= lon <= 160: # Low shear region
+ base_shear = 5 # kt equivalent intensity reduction
+ elif lat > 25: # Higher latitude - more shear
+ base_shear = 15 + (lat - 25) * 2
+ else: # Marginal regions
+ base_shear = 10
+
+ # Seasonal modulation
+ if month in [12, 1, 2, 3]: # Winter - high shear
+ seasonal_shear = 8
+ elif month in [6, 7, 8, 9]: # Summer - low shear
+ seasonal_shear = -3
+ else: # Transition seasons
+ seasonal_shear = 2
+
+ # ENSO modulation
+ if oni_value > 0.5: # El Niño - increased shear
+ enso_shear = 5 + oni_value * 3
+ elif oni_value < -0.5: # La Niña - decreased shear
+ enso_shear = oni_value * 2
+ else:
+ enso_shear = 0
+
+ total_shear = base_shear + seasonal_shear + enso_shear
+ return max(0, total_shear)
+
+# -----------------------------
+# ENHANCED: Realistic Storm Prediction with Oceanic Data
+# -----------------------------
+
+def get_realistic_genesis_locations():
+ """Get realistic typhoon genesis regions based on climatology"""
+ return {
+ "Western Pacific Main Development Region": {"lat": 12.5, "lon": 145.0, "description": "Peak activity zone (Guam area)"},
+ "South China Sea": {"lat": 15.0, "lon": 115.0, "description": "Secondary development region"},
+ "Philippine Sea": {"lat": 18.0, "lon": 135.0, "description": "Recurving storm region"},
+ "Marshall Islands": {"lat": 8.0, "lon": 165.0, "description": "Eastern development zone"},
+ "Monsoon Trough": {"lat": 10.0, "lon": 130.0, "description": "Monsoon-driven genesis"},
+ "ITCZ Region": {"lat": 6.0, "lon": 140.0, "description": "Near-equatorial development"},
+ "Subtropical Region": {"lat": 22.0, "lon": 125.0, "description": "Late season development"},
+ "Bay of Bengal": {"lat": 15.0, "lon": 88.0, "description": "Indian Ocean cyclones"},
+ "Eastern Pacific": {"lat": 12.0, "lon": -105.0, "description": "Hurricane development zone"},
+ "Atlantic MDR": {"lat": 12.0, "lon": -45.0, "description": "Main Development Region"}
+ }
+
+def predict_storm_route_and_intensity_with_oceanic_data(
+ genesis_region, month, oni_value,
+ forecast_hours=72, use_real_data=True,
+ models=None, enable_animation=True
+):
+ """
+ Enhanced prediction system integrating real-time oceanic data
+
+ This function provides the most realistic storm development prediction
+ by incorporating current SST and SLP conditions from global datasets.
+ """
try:
- if len(embedding) < 2:
- return np.array([0] * len(embedding)) # Single cluster for insufficient data
-
- if method.lower() == 'dbscan':
- # Adjust min_samples based on data size
- min_samples = min(min_samples, max(2, len(embedding) // 5))
- clusterer = DBSCAN(eps=eps, min_samples=min_samples)
- elif method.lower() == 'kmeans':
- # Adjust n_clusters based on data size
- n_clusters = min(5, max(2, len(embedding) // 3))
- clusterer = KMeans(n_clusters=n_clusters, random_state=42)
- else:
- raise ValueError("Method must be 'dbscan' or 'kmeans'")
+ genesis_locations = get_realistic_genesis_locations()
- clusters = clusterer.fit_predict(embedding)
+ if genesis_region not in genesis_locations:
+ genesis_region = "Western Pacific Main Development Region"
- logging.info(f"Clustering complete: {len(np.unique(clusters))} clusters found")
+ genesis_info = genesis_locations[genesis_region]
+ start_lat = genesis_info["lat"]
+ start_lon = genesis_info["lon"]
- return clusters
+ logging.info(f"Starting enhanced prediction for {genesis_region}")
- except Exception as e:
- logging.error(f"Error in cluster_storms_data: {e}")
- # Return single cluster as fallback
- return np.array([0] * len(embedding))
-
-def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'):
- """Create separate plots for clustering analysis - ENHANCED CLARITY VERSION"""
- try:
- # Validate inputs
- if storm_features is None or storm_features.empty:
- raise ValueError("No storm features available for clustering")
-
- if typhoon_data is None or typhoon_data.empty:
- raise ValueError("No typhoon data available for route visualization")
+ # Determine data bounds for oceanic data fetch
+ lat_buffer = 10 # degrees
+ lon_buffer = 15 # degrees
+ lat_min = start_lat - lat_buffer
+ lat_max = start_lat + lat_buffer
+ lon_min = start_lon - lon_buffer
+ lon_max = start_lon + lon_buffer
- logging.info(f"Starting clustering visualization with {len(storm_features)} storms")
+ # Fetch current oceanic conditions
+ current_date = datetime.now()
+ sst_data = None
+ slp_data = None
- # Perform dimensionality reduction
- embedding, feature_cols, scaler = perform_dimensionality_reduction(storm_features, method)
+ if use_real_data:
+ try:
+ logging.info("Fetching real-time oceanic data...")
+
+ # Fetch SST data
+ sst_data = oceanic_manager.get_sst_data(
+ lat_min, lat_max, lon_min, lon_max,
+ current_date - timedelta(days=1), # Yesterday's data (most recent available)
+ current_date
+ )
+
+ # Fetch SLP data
+ slp_data = oceanic_manager.get_slp_data(
+ lat_min, lat_max, lon_min, lon_max,
+ current_date - timedelta(days=1),
+ current_date
+ )
+
+ logging.info(f"SST fetch: {'Success' if sst_data['success'] else 'Failed'}")
+ logging.info(f"SLP fetch: {'Success' if slp_data['success'] else 'Failed'}")
+
+ except Exception as e:
+ logging.warning(f"Error fetching real-time data, using climatology: {e}")
+ use_real_data = False
- # Perform clustering
- cluster_labels = cluster_storms_data(embedding, 'dbscan')
+ # Initialize results structure
+ results = {
+ 'current_prediction': {},
+ 'route_forecast': [],
+ 'confidence_scores': {},
+ 'environmental_data': {
+ 'sst_source': 'Real-time NOAA OISST' if (sst_data and sst_data['success']) else 'Climatological',
+ 'slp_source': 'Real-time NCEP/NCAR' if (slp_data and slp_data['success']) else 'Climatological',
+ 'use_real_data': use_real_data
+ },
+ 'model_info': 'Enhanced Oceanic-Coupled Model',
+ 'genesis_info': genesis_info
+ }
- # Add clustering results to storm features
- storm_features_viz = storm_features.copy()
- storm_features_viz['cluster'] = cluster_labels
- storm_features_viz['dim1'] = embedding[:, 0]
- storm_features_viz['dim2'] = embedding[:, 1]
+ # Calculate initial environmental potential
+ env_potential = calculate_environmental_intensity_potential(
+ start_lat, start_lon, month, oni_value, sst_data, slp_data
+ )
- # Merge with typhoon data for additional info - SAFE MERGE
- try:
- storm_info = typhoon_data.groupby('SID').first()[['NAME', 'SEASON']].reset_index()
- storm_features_viz = storm_features_viz.merge(storm_info, on='SID', how='left')
- # Fill missing values
- storm_features_viz['NAME'] = storm_features_viz['NAME'].fillna('UNNAMED')
- storm_features_viz['SEASON'] = storm_features_viz['SEASON'].fillna(2000)
- except Exception as merge_error:
- logging.warning(f"Could not merge storm info: {merge_error}")
- storm_features_viz['NAME'] = 'UNNAMED'
- storm_features_viz['SEASON'] = 2000
+ # Realistic starting intensity (TD level) with environmental modulation
+ base_intensity = 30 # Base TD intensity
+ environmental_boost = min(8, max(-5, env_potential['potential_intensity'] - 50) * 0.15)
+ predicted_intensity = base_intensity + environmental_boost
+ predicted_intensity = max(25, min(45, predicted_intensity)) # Keep in TD-weak TS range
- # Get unique clusters and assign distinct colors
- unique_clusters = sorted([c for c in storm_features_viz['cluster'].unique() if c != -1])
- noise_count = len(storm_features_viz[storm_features_viz['cluster'] == -1])
+ # Enhanced genesis conditions
+ results['current_prediction'] = {
+ 'intensity_kt': predicted_intensity,
+ 'pressure_hpa': 1008 - (predicted_intensity - 25) * 0.8,
+ 'category': categorize_typhoon_enhanced(predicted_intensity),
+ 'genesis_region': genesis_region,
+ 'environmental_potential': env_potential['potential_intensity'],
+ 'sst_contribution': env_potential.get('sst_contribution', 0),
+ 'environmental_favorability': 'High' if env_potential['potential_intensity'] > 80 else
+ ('Moderate' if env_potential['potential_intensity'] > 50 else 'Low')
+ }
- # 1. Enhanced clustering scatter plot with clear cluster identification
- fig_cluster = go.Figure()
+ # Enhanced route prediction with environmental coupling
+ current_lat = start_lat
+ current_lon = start_lon
+ current_intensity = predicted_intensity
- # Add noise points first
- if noise_count > 0:
- noise_data = storm_features_viz[storm_features_viz['cluster'] == -1]
- fig_cluster.add_trace(
- go.Scatter(
- x=noise_data['dim1'],
- y=noise_data['dim2'],
- mode='markers',
- marker=dict(color='lightgray', size=8, opacity=0.5, symbol='x'),
- name=f'Noise ({noise_count} storms)',
- hovertemplate=(
- '%{customdata[0]}
'
- 'Season: %{customdata[1]}
'
- 'Cluster: Noise
'
- f'{method.upper()} Dim 1: %{{x:.2f}}
'
- f'{method.upper()} Dim 2: %{{y:.2f}}
'
- ''
- ),
- customdata=np.column_stack((
- noise_data['NAME'].fillna('UNNAMED'),
- noise_data['SEASON'].fillna(2000)
- ))
- )
- )
+ route_points = []
- # Add clusters with distinct colors and shapes
- cluster_symbols = ['circle', 'square', 'diamond', 'triangle-up', 'triangle-down',
- 'pentagon', 'hexagon', 'star', 'cross', 'circle-open']
+ # Historical environmental analysis for better predictions
+ historical_patterns = analyze_historical_environment(typhoon_data, oni_data)
- for i, cluster in enumerate(unique_clusters):
- cluster_data = storm_features_viz[storm_features_viz['cluster'] == cluster]
- color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)]
- symbol = cluster_symbols[i % len(cluster_symbols)]
+ # Track storm development with oceanic data integration
+ for hour in range(0, forecast_hours + 6, 6):
- fig_cluster.add_trace(
- go.Scatter(
- x=cluster_data['dim1'],
- y=cluster_data['dim2'],
- mode='markers',
- marker=dict(color=color, size=10, symbol=symbol, line=dict(width=1, color='white')),
- name=f'Cluster {cluster} ({len(cluster_data)} storms)',
- hovertemplate=(
- '%{customdata[0]}
'
- 'Season: %{customdata[1]}
'
- f'Cluster: {cluster}
'
- f'{method.upper()} Dim 1: %{{x:.2f}}
'
- f'{method.upper()} Dim 2: %{{y:.2f}}
'
- 'Intensity: %{customdata[2]:.0f} kt
'
- ''
- ),
- customdata=np.column_stack((
- cluster_data['NAME'].fillna('UNNAMED'),
- cluster_data['SEASON'].fillna(2000),
- cluster_data['USA_WIND_max'].fillna(0)
- ))
+ # Dynamic oceanic conditions along track
+ if use_real_data and sst_data and slp_data:
+ # Get current environmental conditions
+ current_env = calculate_environmental_intensity_potential(
+ current_lat, current_lon, month, oni_value, sst_data, slp_data
+ )
+ environmental_limit = current_env['potential_intensity']
+ else:
+ # Use climatological estimates
+ current_env = calculate_environmental_intensity_potential(
+ current_lat, current_lon, month, oni_value, None, None
)
+ environmental_limit = current_env['potential_intensity']
+
+ # Enhanced storm motion with environmental steering
+ base_speed = calculate_environmental_steering_speed(
+ current_lat, current_lon, month, oni_value, slp_data
)
-
- fig_cluster.update_layout(
- title=f'Storm Clustering Analysis using {method.upper()}
Each symbol/color represents a distinct storm pattern group',
- xaxis_title=f'{method.upper()} Dimension 1',
- yaxis_title=f'{method.upper()} Dimension 2',
- height=600,
- showlegend=True
- )
-
- # 2. ENHANCED route map with cluster legends and clearer representation
- fig_routes = go.Figure()
-
- # Create a comprehensive legend showing cluster characteristics
- cluster_info_text = []
-
- for i, cluster in enumerate(unique_clusters):
- cluster_storm_ids = storm_features_viz[storm_features_viz['cluster'] == cluster]['SID'].tolist()
- color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)]
- # Get cluster statistics for legend
- cluster_data = storm_features_viz[storm_features_viz['cluster'] == cluster]
- avg_intensity = cluster_data['USA_WIND_max'].mean() if 'USA_WIND_max' in cluster_data.columns else 0
- avg_pressure = cluster_data['USA_PRES_min'].mean() if 'USA_PRES_min' in cluster_data.columns else 1000
+ # Motion vectors with environmental influences
+ lat_tendency, lon_tendency = calculate_motion_tendency(
+ current_lat, current_lon, month, oni_value, hour, slp_data
+ )
- cluster_info_text.append(
- f"Cluster {cluster}: {len(cluster_storm_ids)} storms, "
- f"Avg: {avg_intensity:.0f}kt/{avg_pressure:.0f}hPa"
+ # Update position
+ current_lat += lat_tendency
+ current_lon += lon_tendency
+
+ # Enhanced intensity evolution with environmental limits
+ intensity_tendency = calculate_environmental_intensity_change(
+ current_intensity, environmental_limit, hour, current_lat, current_lon,
+ month, oni_value, sst_data
)
- # Add multiple storms per cluster with clear identification
- storms_added = 0
- for j, sid in enumerate(cluster_storm_ids[:8]): # Show up to 8 storms per cluster
- try:
- storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME')
- if len(storm_track) > 1:
- # Ensure valid coordinates
- valid_coords = storm_track['LAT'].notna() & storm_track['LON'].notna()
- storm_track = storm_track[valid_coords]
-
- if len(storm_track) > 1:
- storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED'
- storm_season = storm_track['SEASON'].iloc[0] if 'SEASON' in storm_track.columns else 'Unknown'
-
- # Vary line style for different storms in same cluster
- line_styles = ['solid', 'dash', 'dot', 'dashdot']
- line_style = line_styles[j % len(line_styles)]
- line_width = 3 if j == 0 else 2 # First storm thicker
-
- fig_routes.add_trace(
- go.Scattergeo(
- lon=storm_track['LON'],
- lat=storm_track['LAT'],
- mode='lines+markers',
- line=dict(color=color, width=line_width, dash=line_style),
- marker=dict(color=color, size=3),
- name=f'C{cluster}: {storm_name} ({storm_season})',
- showlegend=True,
- legendgroup=f'cluster_{cluster}',
- hovertemplate=(
- f'Cluster {cluster}: {storm_name}
'
- 'Lat: %{lat:.1f}°
'
- 'Lon: %{lon:.1f}°
'
- f'Season: {storm_season}
'
- f'Pattern Group: {cluster}
'
- ''
- )
- )
- )
- storms_added += 1
- except Exception as track_error:
- logging.warning(f"Error adding track for storm {sid}: {track_error}")
- continue
+ # Update intensity with environmental constraints
+ current_intensity += intensity_tendency
+ current_intensity = max(20, min(environmental_limit, current_intensity))
- # Add cluster centroid marker
- if len(cluster_storm_ids) > 0:
- # Calculate average genesis location for cluster
- cluster_storm_data = storm_features_viz[storm_features_viz['cluster'] == cluster]
- if 'genesis_lat' in cluster_storm_data.columns and 'genesis_lon' in cluster_storm_data.columns:
- avg_lat = cluster_storm_data['genesis_lat'].mean()
- avg_lon = cluster_storm_data['genesis_lon'].mean()
-
- fig_routes.add_trace(
- go.Scattergeo(
- lon=[avg_lon],
- lat=[avg_lat],
- mode='markers',
- marker=dict(
- color=color,
- size=20,
- symbol='star',
- line=dict(width=2, color='white')
- ),
- name=f'C{cluster} Center',
- showlegend=True,
- legendgroup=f'cluster_{cluster}',
- hovertemplate=(
- f'Cluster {cluster} Genesis Center
'
- f'Avg Position: {avg_lat:.1f}°N, {avg_lon:.1f}°E
'
- f'Storms: {len(cluster_storm_ids)}
'
- f'Avg Intensity: {avg_intensity:.0f} kt
'
- ''
- )
- )
- )
-
- # Update route map layout with enhanced information and LARGER SIZE
- fig_routes.update_layout(
- title=f"Storm Routes by {method.upper()} Clusters
Different line styles = different storms in same cluster | Stars = cluster centers",
- geo=dict(
- projection_type="natural earth",
- showland=True,
- landcolor="LightGray",
- showocean=True,
- oceancolor="LightBlue",
- showcoastlines=True,
- coastlinecolor="Gray",
- center=dict(lat=20, lon=140),
- projection_scale=2.5 # Larger map
- ),
- height=800, # Much larger height
- width=1200, # Wider map
- showlegend=True
- )
-
- # Add cluster info annotation
- cluster_summary = "
".join(cluster_info_text)
- fig_routes.add_annotation(
- text=f"Cluster Summary:
{cluster_summary}",
- xref="paper", yref="paper",
- x=0.02, y=0.98,
- showarrow=False,
- align="left",
- bgcolor="rgba(255,255,255,0.8)",
- bordercolor="gray",
- borderwidth=1
- )
-
- # 3. Enhanced pressure evolution plot with cluster identification
- fig_pressure = go.Figure()
-
- for i, cluster in enumerate(unique_clusters):
- cluster_storm_ids = storm_features_viz[storm_features_viz['cluster'] == cluster]['SID'].tolist()
- color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)]
+ # Enhanced confidence calculation
+ confidence = calculate_dynamic_confidence(
+ hour, current_lat, current_lon, use_real_data,
+ sst_data['success'] if sst_data else False,
+ slp_data['success'] if slp_data else False
+ )
- cluster_pressures = []
- for j, sid in enumerate(cluster_storm_ids[:5]): # Limit to 5 storms per cluster
- try:
- storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME')
- if len(storm_track) > 1 and 'USA_PRES' in storm_track.columns:
- pressure_values = pd.to_numeric(storm_track['USA_PRES'], errors='coerce').dropna()
- if len(pressure_values) > 0:
- storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED'
- time_hours = range(len(pressure_values))
-
- # Normalize time to show relative progression
- normalized_time = np.linspace(0, 100, len(pressure_values))
-
- fig_pressure.add_trace(
- go.Scatter(
- x=normalized_time,
- y=pressure_values,
- mode='lines',
- line=dict(color=color, width=2, dash='solid' if j == 0 else 'dash'),
- name=f'C{cluster}: {storm_name}' if j == 0 else None,
- showlegend=(j == 0),
- legendgroup=f'pressure_cluster_{cluster}',
- hovertemplate=(
- f'Cluster {cluster}: {storm_name}
'
- 'Progress: %{x:.0f}%
'
- 'Pressure: %{y:.0f} hPa
'
- ''
- ),
- opacity=0.8 if j == 0 else 0.5
- )
- )
- cluster_pressures.extend(pressure_values)
- except Exception as e:
- continue
+ # Determine development stage with environmental context
+ stage = get_environmental_development_stage(hour, current_intensity, environmental_limit)
- # Add cluster average line
- if cluster_pressures:
- avg_pressure = np.mean(cluster_pressures)
- fig_pressure.add_hline(
- y=avg_pressure,
- line_dash="dot",
- line_color=color,
- annotation_text=f"C{cluster} Avg: {avg_pressure:.0f}",
- annotation_position="right"
+ # Environmental metadata
+ if sst_data and sst_data['success']:
+ current_sst = oceanic_manager.interpolate_data_to_point(
+ sst_data, current_lat, current_lon, 'sst'
)
-
- fig_pressure.update_layout(
- title=f"Pressure Evolution by {method.upper()} Clusters
Normalized timeline (0-100%) | Dotted lines = cluster averages",
- xaxis_title="Storm Progress (%)",
- yaxis_title="Pressure (hPa)",
- height=500
- )
-
- # 4. Enhanced wind evolution plot
- fig_wind = go.Figure()
-
- for i, cluster in enumerate(unique_clusters):
- cluster_storm_ids = storm_features_viz[storm_features_viz['cluster'] == cluster]['SID'].tolist()
- color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)]
-
- cluster_winds = []
- for j, sid in enumerate(cluster_storm_ids[:5]): # Limit to 5 storms per cluster
- try:
- storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME')
- if len(storm_track) > 1 and 'USA_WIND' in storm_track.columns:
- wind_values = pd.to_numeric(storm_track['USA_WIND'], errors='coerce').dropna()
- if len(wind_values) > 0:
- storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED'
-
- # Normalize time to show relative progression
- normalized_time = np.linspace(0, 100, len(wind_values))
-
- fig_wind.add_trace(
- go.Scatter(
- x=normalized_time,
- y=wind_values,
- mode='lines',
- line=dict(color=color, width=2, dash='solid' if j == 0 else 'dash'),
- name=f'C{cluster}: {storm_name}' if j == 0 else None,
- showlegend=(j == 0),
- legendgroup=f'wind_cluster_{cluster}',
- hovertemplate=(
- f'Cluster {cluster}: {storm_name}
'
- 'Progress: %{x:.0f}%
'
- 'Wind: %{y:.0f} kt
'
- ''
- ),
- opacity=0.8 if j == 0 else 0.5
- )
- )
- cluster_winds.extend(wind_values)
- except Exception as e:
- continue
+ else:
+ current_sst = get_climatological_sst(current_lat, current_lon, month)
- # Add cluster average line
- if cluster_winds:
- avg_wind = np.mean(cluster_winds)
- fig_wind.add_hline(
- y=avg_wind,
- line_dash="dot",
- line_color=color,
- annotation_text=f"C{cluster} Avg: {avg_wind:.0f}",
- annotation_position="right"
+ if slp_data and slp_data['success']:
+ current_slp = oceanic_manager.interpolate_data_to_point(
+ slp_data, current_lat, current_lon, 'slp'
)
+ current_slp = current_slp if current_slp > 500 else current_slp / 100 # Convert to hPa
+ else:
+ current_slp = 1013 # Standard atmosphere
+
+ route_points.append({
+ 'hour': hour,
+ 'lat': current_lat,
+ 'lon': current_lon,
+ 'intensity_kt': current_intensity,
+ 'category': categorize_typhoon_enhanced(current_intensity),
+ 'confidence': confidence,
+ 'development_stage': stage,
+ 'forward_speed_kmh': base_speed * 111, # Convert to km/h
+ 'pressure_hpa': max(900, 1013 - (current_intensity - 25) * 0.9)
+ })
- fig_wind.update_layout(
- title=f"Wind Speed Evolution by {method.upper()} Clusters
Normalized timeline (0-100%) | Dotted lines = cluster averages",
- xaxis_title="Storm Progress (%)",
- yaxis_title="Wind Speed (kt)",
- height=500
- )
+ results['route_forecast'] = route_points
- # Generate enhanced cluster statistics with clear explanations
- try:
- stats_text = f"ENHANCED {method.upper()} CLUSTER ANALYSIS RESULTS\n" + "="*60 + "\n\n"
- stats_text += f"🔍 DIMENSIONALITY REDUCTION: {method.upper()}\n"
- stats_text += f"🎯 CLUSTERING ALGORITHM: DBSCAN (automatic pattern discovery)\n"
- stats_text += f"📊 TOTAL STORMS ANALYZED: {len(storm_features_viz)}\n"
- stats_text += f"🎨 CLUSTERS DISCOVERED: {len(unique_clusters)}\n"
- if noise_count > 0:
- stats_text += f"❌ NOISE POINTS: {noise_count} storms (don't fit clear patterns)\n"
- stats_text += "\n"
-
- for cluster in sorted(storm_features_viz['cluster'].unique()):
- cluster_data = storm_features_viz[storm_features_viz['cluster'] == cluster]
- storm_count = len(cluster_data)
-
- if cluster == -1:
- stats_text += f"❌ NOISE GROUP: {storm_count} storms\n"
- stats_text += " → These storms don't follow the main patterns\n"
- stats_text += " → May represent unique or rare storm behaviors\n\n"
- continue
-
- stats_text += f"🎯 CLUSTER {cluster}: {storm_count} storms\n"
- stats_text += f" 🎨 Color: {CLUSTER_COLORS[cluster % len(CLUSTER_COLORS)]}\n"
-
- # Add detailed statistics if available
- if 'USA_WIND_max' in cluster_data.columns:
- wind_mean = cluster_data['USA_WIND_max'].mean()
- wind_std = cluster_data['USA_WIND_max'].std()
- stats_text += f" 💨 Intensity: {wind_mean:.1f} ± {wind_std:.1f} kt\n"
-
- if 'USA_PRES_min' in cluster_data.columns:
- pres_mean = cluster_data['USA_PRES_min'].mean()
- pres_std = cluster_data['USA_PRES_min'].std()
- stats_text += f" 🌡️ Pressure: {pres_mean:.1f} ± {pres_std:.1f} hPa\n"
-
- if 'track_length' in cluster_data.columns:
- track_mean = cluster_data['track_length'].mean()
- stats_text += f" 📏 Avg Track Length: {track_mean:.1f} points\n"
-
- if 'genesis_lat' in cluster_data.columns and 'genesis_lon' in cluster_data.columns:
- lat_mean = cluster_data['genesis_lat'].mean()
- lon_mean = cluster_data['genesis_lon'].mean()
- stats_text += f" 🎯 Genesis Region: {lat_mean:.1f}°N, {lon_mean:.1f}°E\n"
-
- # Add interpretation
- if wind_mean < 50:
- stats_text += " 💡 Pattern: Weaker storm group\n"
- elif wind_mean > 100:
- stats_text += " 💡 Pattern: Intense storm group\n"
- else:
- stats_text += " 💡 Pattern: Moderate intensity group\n"
-
- stats_text += "\n"
-
- # Add explanation of the analysis
- stats_text += "📖 INTERPRETATION GUIDE:\n"
- stats_text += f"• {method.upper()} reduces storm characteristics to 2D for visualization\n"
- stats_text += "• DBSCAN finds natural groupings without preset number of clusters\n"
- stats_text += "• Each cluster represents storms with similar behavior patterns\n"
- stats_text += "• Route colors match cluster colors from the similarity plot\n"
- stats_text += "• Stars on map show average genesis locations for each cluster\n"
- stats_text += "• Temporal plots show how each cluster behaves over time\n\n"
-
- stats_text += f"🔧 FEATURES USED FOR CLUSTERING:\n"
- stats_text += f" Total: {len(feature_cols)} storm characteristics\n"
- stats_text += f" Including: intensity, pressure, track shape, genesis location\n"
-
- except Exception as stats_error:
- stats_text = f"Error generating enhanced statistics: {str(stats_error)}"
+ # Realistic confidence scores
+ results['confidence_scores'] = {
+ 'genesis': 0.88,
+ 'early_development': 0.82,
+ 'position_24h': 0.85,
+ 'position_48h': 0.78,
+ 'position_72h': 0.68,
+ 'intensity_24h': 0.75,
+ 'intensity_48h': 0.65,
+ 'intensity_72h': 0.55,
+ 'long_term': max(0.3, 0.8 - (forecast_hours / 240) * 0.5)
+ }
- return fig_cluster, fig_routes, fig_pressure, fig_wind, stats_text
+ # Model information
+ results['model_info'] = f"Enhanced Realistic Model - {genesis_region}"
- except Exception as e:
- logging.error(f"Error in enhanced clustering analysis: {e}")
- import traceback
- traceback.print_exc()
+ return results
- error_fig = go.Figure()
- error_fig.add_annotation(
- text=f"Error in clustering analysis: {str(e)}",
- xref="paper", yref="paper",
- x=0.5, y=0.5, xanchor='center', yanchor='middle',
- showarrow=False, font_size=16
- )
- return error_fig, error_fig, error_fig, error_fig, f"Error in clustering: {str(e)}"
+ except Exception as e:
+ logging.error(f"Realistic prediction error: {str(e)}")
+ return {
+ 'error': f"Prediction error: {str(e)}",
+ 'current_prediction': {'intensity_kt': 30, 'category': 'Tropical Depression'},
+ 'route_forecast': [],
+ 'confidence_scores': {},
+ 'model_info': 'Error in prediction'
+ }
# -----------------------------
-# ENHANCED: Advanced Prediction System with Route Forecasting
+# FIXED: ADVANCED ML FEATURES WITH ROBUST ERROR HANDLING
# -----------------------------
-def create_advanced_prediction_model(typhoon_data):
- """Create advanced ML model for intensity and route prediction"""
+def extract_storm_features(typhoon_data):
+ """Extract comprehensive features for clustering analysis - FIXED VERSION"""
try:
if typhoon_data is None or typhoon_data.empty:
- return None, "No data available for model training"
-
- # Prepare training data
- features = []
- targets = []
+ logging.error("No typhoon data provided for feature extraction")
+ return None
+ # Basic features - ensure columns exist
+ basic_features = []
for sid in typhoon_data['SID'].unique():
- storm_data = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME')
+ storm_data = typhoon_data[typhoon_data['SID'] == sid].copy()
- if len(storm_data) < 3: # Need at least 3 points for prediction
+ if len(storm_data) == 0:
continue
- for i in range(len(storm_data) - 1):
- current = storm_data.iloc[i]
- next_point = storm_data.iloc[i + 1]
-
- # Extract features (current state)
- feature_row = []
-
- # Current position
- feature_row.extend([
- current.get('LAT', 20),
- current.get('LON', 140)
- ])
-
- # Current intensity
- feature_row.extend([
- current.get('USA_WIND', 30),
- current.get('USA_PRES', 1000)
- ])
-
- # Time features
- if 'ISO_TIME' in current and pd.notna(current['ISO_TIME']):
- month = current['ISO_TIME'].month
- day_of_year = current['ISO_TIME'].dayofyear
+ # Initialize feature dict with safe defaults
+ features = {'SID': sid}
+
+ # Wind statistics
+ if 'USA_WIND' in storm_data.columns:
+ wind_values = pd.to_numeric(storm_data['USA_WIND'], errors='coerce').dropna()
+ if len(wind_values) > 0:
+ features['USA_WIND_max'] = wind_values.max()
+ features['USA_WIND_mean'] = wind_values.mean()
+ features['USA_WIND_std'] = wind_values.std() if len(wind_values) > 1 else 0
else:
- month = 9 # Peak season default
- day_of_year = 250
-
- feature_row.extend([month, day_of_year])
+ features['USA_WIND_max'] = 30
+ features['USA_WIND_mean'] = 30
+ features['USA_WIND_std'] = 0
+ else:
+ features['USA_WIND_max'] = 30
+ features['USA_WIND_mean'] = 30
+ features['USA_WIND_std'] = 0
- # Motion features (if previous point exists)
- if i > 0:
- prev = storm_data.iloc[i - 1]
- dlat = current.get('LAT', 20) - prev.get('LAT', 20)
- dlon = current.get('LON', 140) - prev.get('LON', 140)
- speed = np.sqrt(dlat**2 + dlon**2)
- bearing = np.arctan2(dlat, dlon)
+ # Pressure statistics
+ if 'USA_PRES' in storm_data.columns:
+ pres_values = pd.to_numeric(storm_data['USA_PRES'], errors='coerce').dropna()
+ if len(pres_values) > 0:
+ features['USA_PRES_min'] = pres_values.min()
+ features['USA_PRES_mean'] = pres_values.mean()
+ features['USA_PRES_std'] = pres_values.std() if len(pres_values) > 1 else 0
else:
- speed = 0
- bearing = 0
-
- feature_row.extend([speed, bearing])
-
- features.append(feature_row)
+ features['USA_PRES_min'] = 1000
+ features['USA_PRES_mean'] = 1000
+ features['USA_PRES_std'] = 0
+ else:
+ features['USA_PRES_min'] = 1000
+ features['USA_PRES_mean'] = 1000
+ features['USA_PRES_std'] = 0
+
+ # Location statistics
+ if 'LAT' in storm_data.columns and 'LON' in storm_data.columns:
+ lat_values = pd.to_numeric(storm_data['LAT'], errors='coerce').dropna()
+ lon_values = pd.to_numeric(storm_data['LON'], errors='coerce').dropna()
- # Target: next position and intensity
- targets.append([
- next_point.get('LAT', 20),
- next_point.get('LON', 140),
- next_point.get('USA_WIND', 30)
- ])
-
- if len(features) < 10: # Need sufficient training data
- return None, "Insufficient data for model training"
-
- # Train model
- X = np.array(features)
- y = np.array(targets)
-
- # Split data
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-
- # Create separate models for position and intensity
- models = {}
-
- # Position model (lat, lon)
- pos_model = RandomForestRegressor(n_estimators=100, random_state=42)
- pos_model.fit(X_train, y_train[:, :2])
- models['position'] = pos_model
-
- # Intensity model (wind speed)
- int_model = RandomForestRegressor(n_estimators=100, random_state=42)
- int_model.fit(X_train, y_train[:, 2])
- models['intensity'] = int_model
-
- # Calculate model performance
- pos_pred = pos_model.predict(X_test)
- int_pred = int_model.predict(X_test)
-
- pos_mae = mean_absolute_error(y_test[:, :2], pos_pred)
- int_mae = mean_absolute_error(y_test[:, 2], int_pred)
-
- model_info = f"Position MAE: {pos_mae:.2f}°, Intensity MAE: {int_mae:.2f} kt"
-
- return models, model_info
-
- except Exception as e:
- return None, f"Error creating prediction model: {str(e)}"
-
-def get_realistic_genesis_locations():
- """Get realistic typhoon genesis regions based on climatology"""
- return {
- "Western Pacific Main Development Region": {"lat": 12.5, "lon": 145.0, "description": "Peak activity zone (Guam area)"},
- "South China Sea": {"lat": 15.0, "lon": 115.0, "description": "Secondary development region"},
- "Philippine Sea": {"lat": 18.0, "lon": 135.0, "description": "Recurving storm region"},
- "Marshall Islands": {"lat": 8.0, "lon": 165.0, "description": "Eastern development zone"},
- "Monsoon Trough": {"lat": 10.0, "lon": 130.0, "description": "Monsoon-driven genesis"},
- "ITCZ Region": {"lat": 6.0, "lon": 140.0, "description": "Near-equatorial development"},
- "Subtropical Region": {"lat": 22.0, "lon": 125.0, "description": "Late season development"},
- "Bay of Bengal": {"lat": 15.0, "lon": 88.0, "description": "Indian Ocean cyclones"},
- "Eastern Pacific": {"lat": 12.0, "lon": -105.0, "description": "Hurricane development zone"},
- "Atlantic MDR": {"lat": 12.0, "lon": -45.0, "description": "Main Development Region"}
- }
-
-def predict_storm_route_and_intensity_realistic(genesis_region, month, oni_value, models=None, forecast_hours=72, use_advanced_physics=True):
- """Realistic prediction with proper typhoon speeds and development"""
- try:
- genesis_locations = get_realistic_genesis_locations()
-
- if genesis_region not in genesis_locations:
- genesis_region = "Western Pacific Main Development Region" # Default
-
- genesis_info = genesis_locations[genesis_region]
- lat = genesis_info["lat"]
- lon = genesis_info["lon"]
+ if len(lat_values) > 0 and len(lon_values) > 0:
+ features['LAT_mean'] = lat_values.mean()
+ features['LAT_std'] = lat_values.std() if len(lat_values) > 1 else 0
+ features['LAT_max'] = lat_values.max()
+ features['LAT_min'] = lat_values.min()
+ features['LON_mean'] = lon_values.mean()
+ features['LON_std'] = lon_values.std() if len(lon_values) > 1 else 0
+ features['LON_max'] = lon_values.max()
+ features['LON_min'] = lon_values.min()
+
+ # Genesis location (first valid position)
+ features['genesis_lat'] = lat_values.iloc[0]
+ features['genesis_lon'] = lon_values.iloc[0]
+ features['genesis_intensity'] = features['USA_WIND_mean'] # Use mean as fallback
+
+ # Track characteristics
+ features['lat_range'] = lat_values.max() - lat_values.min()
+ features['lon_range'] = lon_values.max() - lon_values.min()
+
+ # Calculate track distance
+ if len(lat_values) > 1:
+ distances = []
+ for i in range(1, len(lat_values)):
+ dlat = lat_values.iloc[i] - lat_values.iloc[i-1]
+ dlon = lon_values.iloc[i] - lon_values.iloc[i-1]
+ distances.append(np.sqrt(dlat**2 + dlon**2))
+ features['total_distance'] = sum(distances)
+ features['avg_speed'] = np.mean(distances) if distances else 0
+ else:
+ features['total_distance'] = 0
+ features['avg_speed'] = 0
+
+ # Track curvature
+ if len(lat_values) > 2:
+ bearing_changes = []
+ for i in range(1, len(lat_values)-1):
+ dlat1 = lat_values.iloc[i] - lat_values.iloc[i-1]
+ dlon1 = lon_values.iloc[i] - lon_values.iloc[i-1]
+ dlat2 = lat_values.iloc[i+1] - lat_values.iloc[i]
+ dlon2 = lon_values.iloc[i+1] - lon_values.iloc[i]
+
+ angle1 = np.arctan2(dlat1, dlon1)
+ angle2 = np.arctan2(dlat2, dlon2)
+ change = abs(angle2 - angle1)
+ bearing_changes.append(change)
+
+ features['avg_curvature'] = np.mean(bearing_changes) if bearing_changes else 0
+ else:
+ features['avg_curvature'] = 0
+ else:
+ # Default location values
+ features.update({
+ 'LAT_mean': 20, 'LAT_std': 0, 'LAT_max': 20, 'LAT_min': 20,
+ 'LON_mean': 140, 'LON_std': 0, 'LON_max': 140, 'LON_min': 140,
+ 'genesis_lat': 20, 'genesis_lon': 140, 'genesis_intensity': 30,
+ 'lat_range': 0, 'lon_range': 0, 'total_distance': 0,
+ 'avg_speed': 0, 'avg_curvature': 0
+ })
+ else:
+ # Default location values if columns missing
+ features.update({
+ 'LAT_mean': 20, 'LAT_std': 0, 'LAT_max': 20, 'LAT_min': 20,
+ 'LON_mean': 140, 'LON_std': 0, 'LON_max': 140, 'LON_min': 140,
+ 'genesis_lat': 20, 'genesis_lon': 140, 'genesis_intensity': 30,
+ 'lat_range': 0, 'lon_range': 0, 'total_distance': 0,
+ 'avg_speed': 0, 'avg_curvature': 0
+ })
+
+ # Track length
+ features['track_length'] = len(storm_data)
+
+ # Add seasonal information
+ if 'SEASON' in storm_data.columns:
+ features['season'] = storm_data['SEASON'].iloc[0]
+ else:
+ features['season'] = 2000
+
+ # Add basin information
+ if 'BASIN' in storm_data.columns:
+ features['basin'] = storm_data['BASIN'].iloc[0]
+ elif 'SID' in storm_data.columns:
+ features['basin'] = sid[:2] if len(sid) >= 2 else 'WP'
+ else:
+ features['basin'] = 'WP'
+
+ basic_features.append(features)
- results = {
- 'current_prediction': {},
- 'route_forecast': [],
- 'confidence_scores': {},
- 'model_info': 'Realistic Genesis Model',
- 'genesis_info': genesis_info
- }
+ if not basic_features:
+ logging.error("No valid storm features could be extracted")
+ return None
+
+ # Convert to DataFrame
+ storm_features = pd.DataFrame(basic_features)
- # REALISTIC starting intensity - Tropical Depression level
- base_intensity = 30 # Start at TD level (25-35 kt)
+ # Ensure all numeric columns are properly typed
+ numeric_columns = [col for col in storm_features.columns if col not in ['SID', 'basin']]
+ for col in numeric_columns:
+ storm_features[col] = pd.to_numeric(storm_features[col], errors='coerce').fillna(0)
- # Environmental factors for genesis
- if oni_value > 1.0: # Strong El Niño - suppressed development
- intensity_modifier = -6
- elif oni_value > 0.5: # Moderate El Niño
- intensity_modifier = -3
- elif oni_value < -1.0: # Strong La Niña - enhanced development
- intensity_modifier = +8
- elif oni_value < -0.5: # Moderate La Niña
- intensity_modifier = +5
- else: # Neutral
- intensity_modifier = oni_value * 2
+ logging.info(f"Successfully extracted features for {len(storm_features)} storms")
+ logging.info(f"Feature columns: {list(storm_features.columns)}")
- # Seasonal genesis effects
- seasonal_factors = {
- 1: -8, 2: -6, 3: -4, 4: -2, 5: 2, 6: 6,
- 7: 10, 8: 12, 9: 15, 10: 10, 11: 4, 12: -5
- }
- seasonal_modifier = seasonal_factors.get(month, 0)
+ return storm_features
- # Genesis region favorability
- region_factors = {
- "Western Pacific Main Development Region": 8,
- "South China Sea": 4,
- "Philippine Sea": 5,
- "Marshall Islands": 7,
- "Monsoon Trough": 6,
- "ITCZ Region": 3,
- "Subtropical Region": 2,
- "Bay of Bengal": 4,
- "Eastern Pacific": 6,
- "Atlantic MDR": 5
- }
- region_modifier = region_factors.get(genesis_region, 0)
+ except Exception as e:
+ logging.error(f"Error in extract_storm_features: {e}")
+ import traceback
+ traceback.print_exc()
+ return None
+
+def perform_dimensionality_reduction(storm_features, method='umap', n_components=2):
+ """Perform UMAP or t-SNE dimensionality reduction - FIXED VERSION"""
+ try:
+ if storm_features is None or storm_features.empty:
+ raise ValueError("No storm features provided")
- # Calculate realistic starting intensity (TD level)
- predicted_intensity = base_intensity + intensity_modifier + seasonal_modifier + region_modifier
- predicted_intensity = max(25, min(40, predicted_intensity)) # Keep in TD-weak TS range
+ # Select numeric features for clustering - FIXED
+ feature_cols = []
+ for col in storm_features.columns:
+ if col not in ['SID', 'basin'] and storm_features[col].dtype in ['float64', 'int64']:
+ # Check if column has valid data
+ valid_data = storm_features[col].dropna()
+ if len(valid_data) > 0 and valid_data.std() > 0: # Only include columns with variance
+ feature_cols.append(col)
- # Add realistic uncertainty for genesis
- intensity_uncertainty = np.random.normal(0, 2)
- predicted_intensity += intensity_uncertainty
- predicted_intensity = max(25, min(38, predicted_intensity)) # TD range
+ if len(feature_cols) == 0:
+ raise ValueError("No valid numeric features found for clustering")
- results['current_prediction'] = {
- 'intensity_kt': predicted_intensity,
- 'pressure_hpa': 1008 - (predicted_intensity - 25) * 0.6, # Realistic TD pressure
- 'category': categorize_typhoon_enhanced(predicted_intensity),
- 'genesis_region': genesis_region
- }
+ logging.info(f"Using {len(feature_cols)} features for clustering: {feature_cols}")
- # REALISTIC route prediction with proper typhoon speeds
- current_lat = lat
- current_lon = lon
- current_intensity = predicted_intensity
+ X = storm_features[feature_cols].fillna(0)
- route_points = []
+ # Check if we have enough samples
+ if len(X) < 2:
+ raise ValueError("Need at least 2 storms for clustering")
- # Track storm development over time with REALISTIC SPEEDS
- for hour in range(0, forecast_hours + 6, 6):
-
- # REALISTIC typhoon motion - much faster speeds
- # Typical typhoon forward speed: 15-25 km/h (0.14-0.23°/hour)
-
- # Base forward speed depends on latitude and storm intensity
- if current_lat < 20: # Low latitude - slower
- base_speed = 0.12 # ~13 km/h
- elif current_lat < 30: # Mid latitude - moderate
- base_speed = 0.18 # ~20 km/h
- else: # High latitude - faster
- base_speed = 0.25 # ~28 km/h
-
- # Intensity affects speed (stronger storms can move faster)
- intensity_speed_factor = 1.0 + (current_intensity - 50) / 200
- base_speed *= max(0.8, min(1.4, intensity_speed_factor))
-
- # Beta drift (Coriolis effect) - realistic values
- beta_drift_lat = 0.02 * np.sin(np.radians(current_lat))
- beta_drift_lon = -0.05 * np.cos(np.radians(current_lat))
-
- # Seasonal steering patterns with realistic speeds
- if month in [6, 7, 8, 9]: # Peak season
- ridge_strength = 1.2
- ridge_position = 32 + 4 * np.sin(2 * np.pi * (month - 6) / 4)
- else: # Off season
- ridge_strength = 0.9
- ridge_position = 28
-
- # REALISTIC motion based on position relative to subtropical ridge
- if current_lat < ridge_position - 10: # Well south of ridge - westward movement
- lat_tendency = base_speed * 0.3 + beta_drift_lat # Slight poleward
- lon_tendency = -base_speed * 0.9 + beta_drift_lon # Strong westward
- elif current_lat > ridge_position - 3: # Near ridge - recurvature
- lat_tendency = base_speed * 0.8 + beta_drift_lat # Strong poleward
- lon_tendency = base_speed * 0.4 + beta_drift_lon # Eastward
- else: # In between - normal WNW motion
- lat_tendency = base_speed * 0.4 + beta_drift_lat # Moderate poleward
- lon_tendency = -base_speed * 0.7 + beta_drift_lon # Moderate westward
-
- # ENSO steering modulation (realistic effects)
- if oni_value > 0.5: # El Niño - more eastward/poleward motion
- lon_tendency += 0.05
- lat_tendency += 0.02
- elif oni_value < -0.5: # La Niña - more westward motion
- lon_tendency -= 0.08
- lat_tendency -= 0.01
-
- # Add motion uncertainty that grows with time (realistic error growth)
- motion_uncertainty = 0.02 + (hour / 120) * 0.04
- lat_noise = np.random.normal(0, motion_uncertainty)
- lon_noise = np.random.normal(0, motion_uncertainty)
-
- # Update position with realistic speeds
- current_lat += lat_tendency + lat_noise
- current_lon += lon_tendency + lon_noise
-
- # REALISTIC intensity evolution with proper development cycles
-
- # Development phase (first 48-72 hours) - realistic intensification
- if hour <= 48:
- if current_intensity < 50: # Still weak - rapid development possible
- if 10 <= current_lat <= 25 and 115 <= current_lon <= 165: # Favorable environment
- intensity_tendency = 4.5 if current_intensity < 35 else 3.0
- elif 120 <= current_lon <= 155 and 15 <= current_lat <= 20: # Best environment
- intensity_tendency = 6.0 if current_intensity < 40 else 4.0
- else:
- intensity_tendency = 2.0
- elif current_intensity < 80: # Moderate intensity
- intensity_tendency = 2.5 if (120 <= current_lon <= 155 and 10 <= current_lat <= 25) else 1.0
- else: # Already strong
- intensity_tendency = 1.0
-
- # Mature phase (48-120 hours) - peak intensity maintenance
- elif hour <= 120:
- if current_lat < 25 and current_lon > 120: # Still in favorable waters
- if current_intensity < 120:
- intensity_tendency = 1.5
- else:
- intensity_tendency = 0.0 # Maintain intensity
- else:
- intensity_tendency = -1.5
-
- # Extended phase (120+ hours) - gradual weakening
- else:
- if current_lat < 30 and current_lon > 115:
- intensity_tendency = -2.0 # Slow weakening
- else:
- intensity_tendency = -3.5 # Faster weakening
-
- # Environmental modulation (realistic effects)
- if current_lat > 35: # High latitude - rapid weakening
- intensity_tendency -= 12
- elif current_lat > 30: # Moderate latitude
- intensity_tendency -= 5
- elif current_lon < 110: # Land interaction
- intensity_tendency -= 15
- elif 125 <= current_lon <= 155 and 10 <= current_lat <= 25: # Warm pool
- intensity_tendency += 2
- elif 160 <= current_lon <= 180 and 15 <= current_lat <= 30: # Still warm
- intensity_tendency += 1
-
- # SST effects (realistic temperature impact)
- if current_lat < 8: # Very warm but weak Coriolis
- intensity_tendency += 0.5
- elif 8 <= current_lat <= 20: # Sweet spot for development
- intensity_tendency += 2.0
- elif 20 < current_lat <= 30: # Marginal
- intensity_tendency -= 1.0
- elif current_lat > 30: # Cool waters
- intensity_tendency -= 4.0
-
- # Shear effects (simplified but realistic)
- if month in [12, 1, 2, 3]: # High shear season
- intensity_tendency -= 2.0
- elif month in [7, 8, 9]: # Low shear season
- intensity_tendency += 1.0
-
- # Update intensity with realistic bounds and variability
- intensity_noise = np.random.normal(0, 1.5) # Small random fluctuations
- current_intensity += intensity_tendency + intensity_noise
- current_intensity = max(20, min(185, current_intensity)) # Realistic range
-
- # Calculate confidence based on forecast time and environment
- base_confidence = 0.92
- time_penalty = (hour / 120) * 0.45
- environment_penalty = 0.15 if current_lat > 30 or current_lon < 115 else 0
- confidence = max(0.25, base_confidence - time_penalty - environment_penalty)
-
- # Determine development stage
- if hour <= 24:
- stage = 'Genesis'
- elif hour <= 72:
- stage = 'Development'
- elif hour <= 120:
- stage = 'Mature'
- elif hour <= 240:
- stage = 'Extended'
- else:
- stage = 'Long-term'
-
- route_points.append({
- 'hour': hour,
- 'lat': current_lat,
- 'lon': current_lon,
- 'intensity_kt': current_intensity,
- 'category': categorize_typhoon_enhanced(current_intensity),
- 'confidence': confidence,
- 'development_stage': stage,
- 'forward_speed_kmh': base_speed * 111, # Convert to km/h
- 'pressure_hpa': max(900, 1013 - (current_intensity - 25) * 0.9)
- })
+ # Standardize features
+ scaler = StandardScaler()
+ X_scaled = scaler.fit_transform(X)
- results['route_forecast'] = route_points
+ # Perform dimensionality reduction
+ if method.lower() == 'umap' and UMAP_AVAILABLE and len(X_scaled) >= 4:
+ # UMAP parameters optimized for typhoon data - fixed warnings
+ n_neighbors = min(15, len(X_scaled) - 1)
+ reducer = umap.UMAP(
+ n_components=n_components,
+ n_neighbors=n_neighbors,
+ min_dist=0.1,
+ metric='euclidean',
+ random_state=42,
+ n_jobs=1 # Explicitly set to avoid warning
+ )
+ elif method.lower() == 'tsne' and len(X_scaled) >= 4:
+ # t-SNE parameters
+ perplexity = min(30, len(X_scaled) // 4)
+ perplexity = max(1, perplexity) # Ensure perplexity is at least 1
+ reducer = TSNE(
+ n_components=n_components,
+ perplexity=perplexity,
+ learning_rate=200,
+ n_iter=1000,
+ random_state=42
+ )
+ else:
+ # Fallback to PCA
+ reducer = PCA(n_components=n_components, random_state=42)
- # Realistic confidence scores
- results['confidence_scores'] = {
- 'genesis': 0.88,
- 'early_development': 0.82,
- 'position_24h': 0.85,
- 'position_48h': 0.78,
- 'position_72h': 0.68,
- 'intensity_24h': 0.75,
- 'intensity_48h': 0.65,
- 'intensity_72h': 0.55,
- 'long_term': max(0.3, 0.8 - (forecast_hours / 240) * 0.5)
- }
+ # Fit and transform
+ embedding = reducer.fit_transform(X_scaled)
- # Model information
- results['model_info'] = f"Enhanced Realistic Model - {genesis_region}"
+ logging.info(f"Dimensionality reduction successful: {X_scaled.shape} -> {embedding.shape}")
- return results
+ return embedding, feature_cols, scaler
except Exception as e:
- logging.error(f"Realistic prediction error: {str(e)}")
- return {
- 'error': f"Prediction error: {str(e)}",
- 'current_prediction': {'intensity_kt': 30, 'category': 'Tropical Depression'},
- 'route_forecast': [],
- 'confidence_scores': {},
- 'model_info': 'Error in prediction'
- }
+ logging.error(f"Error in perform_dimensionality_reduction: {e}")
+ raise
-def create_animated_route_visualization(prediction_results, show_uncertainty=True, enable_animation=True):
- """Create comprehensive animated route visualization with intensity plots"""
+def cluster_storms_data(embedding, method='dbscan', eps=0.5, min_samples=3):
+ """Cluster storms based on their embedding - FIXED NAME VERSION"""
try:
- if 'route_forecast' not in prediction_results or not prediction_results['route_forecast']:
- return None, "No route forecast data available"
+ if len(embedding) < 2:
+ return np.array([0] * len(embedding)) # Single cluster for insufficient data
- route_data = prediction_results['route_forecast']
+ if method.lower() == 'dbscan':
+ # Adjust min_samples based on data size
+ min_samples = min(min_samples, max(2, len(embedding) // 5))
+ clusterer = DBSCAN(eps=eps, min_samples=min_samples)
+ elif method.lower() == 'kmeans':
+ # Adjust n_clusters based on data size
+ n_clusters = min(5, max(2, len(embedding) // 3))
+ clusterer = KMeans(n_clusters=n_clusters, random_state=42)
+ else:
+ raise ValueError("Method must be 'dbscan' or 'kmeans'")
- # Extract data for plotting
- hours = [point['hour'] for point in route_data]
- lats = [point['lat'] for point in route_data]
- lons = [point['lon'] for point in route_data]
- intensities = [point['intensity_kt'] for point in route_data]
- categories = [point['category'] for point in route_data]
- confidences = [point.get('confidence', 0.8) for point in route_data]
- stages = [point.get('development_stage', 'Unknown') for point in route_data]
- speeds = [point.get('forward_speed_kmh', 15) for point in route_data]
- pressures = [point.get('pressure_hpa', 1013) for point in route_data]
+ clusters = clusterer.fit_predict(embedding)
- # Create subplot layout with map and intensity plot
- fig = make_subplots(
- rows=2, cols=2,
- subplot_titles=('Storm Track Animation', 'Wind Speed vs Time', 'Forward Speed vs Time', 'Pressure vs Time'),
- specs=[[{"type": "geo", "colspan": 2}, None],
- [{"type": "xy"}, {"type": "xy"}]],
- vertical_spacing=0.15,
- row_heights=[0.7, 0.3]
- )
+ logging.info(f"Clustering complete: {len(np.unique(clusters))} clusters found")
- if enable_animation:
- # Add frames for animation
- frames = []
-
- # Static background elements first
- # Add complete track as background
- fig.add_trace(
- go.Scattergeo(
- lon=lons,
- lat=lats,
- mode='lines',
- line=dict(color='lightgray', width=2, dash='dot'),
- name='Complete Track',
- showlegend=True,
- opacity=0.4
- ),
- row=1, col=1
- )
-
- # Genesis marker (always visible)
- fig.add_trace(
- go.Scattergeo(
- lon=[lons[0]],
- lat=[lats[0]],
- mode='markers',
- marker=dict(
- size=25,
- color='gold',
- symbol='star',
- line=dict(width=3, color='black')
- ),
- name='Genesis',
- showlegend=True,
- hovertemplate=(
- f"GENESIS
"
- f"Position: {lats[0]:.1f}°N, {lons[0]:.1f}°E
"
- f"Initial: {intensities[0]:.0f} kt
"
- f"Region: {prediction_results['genesis_info']['description']}
"
- ""
- )
- ),
- row=1, col=1
- )
+ return clusters
+
+ except Exception as e:
+ logging.error(f"Error in cluster_storms_data: {e}")
+ # Return single cluster as fallback
+ return np.array([0] * len(embedding))
+
+def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'):
+ """Create separate plots for clustering analysis - ENHANCED CLARITY VERSION"""
+ try:
+ # Validate inputs
+ if storm_features is None or storm_features.empty:
+ raise ValueError("No storm features available for clustering")
- # Create animation frames
- for i in range(len(route_data)):
- frame_lons = lons[:i+1]
- frame_lats = lats[:i+1]
- frame_intensities = intensities[:i+1]
- frame_categories = categories[:i+1]
- frame_hours = hours[:i+1]
-
- # Current position marker
- current_color = enhanced_color_map.get(frame_categories[-1], 'rgb(128,128,128)')
- current_size = 15 + (frame_intensities[-1] / 10)
-
- frame_data = [
- # Animated track up to current point
- go.Scattergeo(
- lon=frame_lons,
- lat=frame_lats,
- mode='lines+markers',
- line=dict(color='blue', width=4),
- marker=dict(
- size=[8 + (intensity/15) for intensity in frame_intensities],
- color=[enhanced_color_map.get(cat, 'rgb(128,128,128)') for cat in frame_categories],
- opacity=0.8,
- line=dict(width=1, color='white')
- ),
- name='Current Track',
- showlegend=False
- ),
- # Current position highlight
- go.Scattergeo(
- lon=[frame_lons[-1]],
- lat=[frame_lats[-1]],
- mode='markers',
- marker=dict(
- size=current_size,
- color=current_color,
- symbol='circle',
- line=dict(width=3, color='white')
- ),
- name='Current Position',
- showlegend=False,
- hovertemplate=(
- f"Hour {route_data[i]['hour']}
"
- f"Position: {lats[i]:.1f}°N, {lons[i]:.1f}°E
"
- f"Intensity: {intensities[i]:.0f} kt
"
- f"Category: {categories[i]}
"
- f"Stage: {stages[i]}
"
- f"Speed: {speeds[i]:.1f} km/h
"
- f"Confidence: {confidences[i]*100:.0f}%
"
- ""
- )
- ),
- # Animated wind plot
- go.Scatter(
- x=frame_hours,
- y=frame_intensities,
- mode='lines+markers',
- line=dict(color='red', width=3),
- marker=dict(size=6, color='red'),
- name='Wind Speed',
- showlegend=False,
- yaxis='y2'
- ),
- # Animated speed plot
- go.Scatter(
- x=frame_hours,
- y=speeds[:i+1],
- mode='lines+markers',
- line=dict(color='green', width=2),
- marker=dict(size=4, color='green'),
- name='Forward Speed',
- showlegend=False,
- yaxis='y3'
+ if typhoon_data is None or typhoon_data.empty:
+ raise ValueError("No typhoon data available for route visualization")
+
+ logging.info(f"Starting clustering visualization with {len(storm_features)} storms")
+
+ # Perform dimensionality reduction
+ embedding, feature_cols, scaler = perform_dimensionality_reduction(storm_features, method)
+
+ # Perform clustering
+ cluster_labels = cluster_storms_data(embedding, 'dbscan')
+
+ # Add clustering results to storm features
+ storm_features_viz = storm_features.copy()
+ storm_features_viz['cluster'] = cluster_labels
+ storm_features_viz['dim1'] = embedding[:, 0]
+ storm_features_viz['dim2'] = embedding[:, 1]
+
+ # Merge with typhoon data for additional info - SAFE MERGE
+ try:
+ storm_info = typhoon_data.groupby('SID').first()[['NAME', 'SEASON']].reset_index()
+ storm_features_viz = storm_features_viz.merge(storm_info, on='SID', how='left')
+ # Fill missing values
+ storm_features_viz['NAME'] = storm_features_viz['NAME'].fillna('UNNAMED')
+ storm_features_viz['SEASON'] = storm_features_viz['SEASON'].fillna(2000)
+ except Exception as merge_error:
+ logging.warning(f"Could not merge storm info: {merge_error}")
+ storm_features_viz['NAME'] = 'UNNAMED'
+ storm_features_viz['SEASON'] = 2000
+
+ # Get unique clusters and assign distinct colors
+ unique_clusters = sorted([c for c in storm_features_viz['cluster'].unique() if c != -1])
+ noise_count = len(storm_features_viz[storm_features_viz['cluster'] == -1])
+
+ # 1. Enhanced clustering scatter plot with clear cluster identification
+ fig_cluster = go.Figure()
+
+ # Add noise points first
+ if noise_count > 0:
+ noise_data = storm_features_viz[storm_features_viz['cluster'] == -1]
+ fig_cluster.add_trace(
+ go.Scatter(
+ x=noise_data['dim1'],
+ y=noise_data['dim2'],
+ mode='markers',
+ marker=dict(color='lightgray', size=8, opacity=0.5, symbol='x'),
+ name=f'Noise ({noise_count} storms)',
+ hovertemplate=(
+ '%{customdata[0]}
'
+ 'Season: %{customdata[1]}
'
+ 'Cluster: Noise
'
+ f'{method.upper()} Dim 1: %{{x:.2f}}
'
+ f'{method.upper()} Dim 2: %{{y:.2f}}
'
+ ''
),
- # Animated pressure plot
- go.Scatter(
- x=frame_hours,
- y=pressures[:i+1],
- mode='lines+markers',
- line=dict(color='purple', width=2),
- marker=dict(size=4, color='purple'),
- name='Pressure',
- showlegend=False,
- yaxis='y4'
- )
- ]
-
- frames.append(go.Frame(
- data=frame_data,
- name=str(i),
- layout=go.Layout(
- title=f"Storm Development Animation - Hour {route_data[i]['hour']}
"
- f"Intensity: {intensities[i]:.0f} kt | Category: {categories[i]} | Stage: {stages[i]} | Speed: {speeds[i]:.1f} km/h"
- )
- ))
-
- fig.frames = frames
-
- # Add play/pause controls
- fig.update_layout(
- updatemenus=[
- {
- "buttons": [
- {
- "args": [None, {"frame": {"duration": 1000, "redraw": True},
- "fromcurrent": True, "transition": {"duration": 300}}],
- "label": "▶️ Play",
- "method": "animate"
- },
- {
- "args": [[None], {"frame": {"duration": 0, "redraw": True},
- "mode": "immediate", "transition": {"duration": 0}}],
- "label": "⏸️ Pause",
- "method": "animate"
- },
- {
- "args": [None, {"frame": {"duration": 500, "redraw": True},
- "fromcurrent": True, "transition": {"duration": 300}}],
- "label": "⏩ Fast",
- "method": "animate"
- }
- ],
- "direction": "left",
- "pad": {"r": 10, "t": 87},
- "showactive": False,
- "type": "buttons",
- "x": 0.1,
- "xanchor": "right",
- "y": 0,
- "yanchor": "top"
- }
- ],
- sliders=[{
- "active": 0,
- "yanchor": "top",
- "xanchor": "left",
- "currentvalue": {
- "font": {"size": 16},
- "prefix": "Hour: ",
- "visible": True,
- "xanchor": "right"
- },
- "transition": {"duration": 300, "easing": "cubic-in-out"},
- "pad": {"b": 10, "t": 50},
- "len": 0.9,
- "x": 0.1,
- "y": 0,
- "steps": [
- {
- "args": [[str(i)], {"frame": {"duration": 300, "redraw": True},
- "mode": "immediate", "transition": {"duration": 300}}],
- "label": f"H{route_data[i]['hour']}",
- "method": "animate"
- }
- for i in range(0, len(route_data), max(1, len(route_data)//20)) # Limit slider steps
- ]
- }]
+ customdata=np.column_stack((
+ noise_data['NAME'].fillna('UNNAMED'),
+ noise_data['SEASON'].fillna(2000)
+ ))
+ )
)
+
+ # Add clusters with distinct colors and shapes
+ cluster_symbols = ['circle', 'square', 'diamond', 'triangle-up', 'triangle-down',
+ 'pentagon', 'hexagon', 'star', 'cross', 'circle-open']
+
+ for i, cluster in enumerate(unique_clusters):
+ cluster_data = storm_features_viz[storm_features_viz['cluster'] == cluster]
+ color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)]
+ symbol = cluster_symbols[i % len(cluster_symbols)]
- else:
- # Static view with all points
- # Add genesis marker
- fig.add_trace(
- go.Scattergeo(
- lon=[lons[0]],
- lat=[lats[0]],
+ fig_cluster.add_trace(
+ go.Scatter(
+ x=cluster_data['dim1'],
+ y=cluster_data['dim2'],
mode='markers',
- marker=dict(
- size=25,
- color='gold',
- symbol='star',
- line=dict(width=3, color='black')
- ),
- name='Genesis',
- showlegend=True,
+ marker=dict(color=color, size=10, symbol=symbol, line=dict(width=1, color='white')),
+ name=f'Cluster {cluster} ({len(cluster_data)} storms)',
hovertemplate=(
- f"GENESIS
"
- f"Position: {lats[0]:.1f}°N, {lons[0]:.1f}°E
"
- f"Initial: {intensities[0]:.0f} kt
"
- ""
- )
- ),
- row=1, col=1
- )
-
- # Add full track with intensity coloring
- for i in range(0, len(route_data), max(1, len(route_data)//50)): # Sample points for performance
- point = route_data[i]
- color = enhanced_color_map.get(point['category'], 'rgb(128,128,128)')
- size = 8 + (point['intensity_kt'] / 12)
-
- fig.add_trace(
- go.Scattergeo(
- lon=[point['lon']],
- lat=[point['lat']],
- mode='markers',
- marker=dict(
- size=size,
- color=color,
- opacity=point.get('confidence', 0.8),
- line=dict(width=1, color='white')
- ),
- name=f"Hour {point['hour']}" if i % 10 == 0 else None,
- showlegend=(i % 10 == 0),
- hovertemplate=(
- f"Hour {point['hour']}
"
- f"Position: {point['lat']:.1f}°N, {point['lon']:.1f}°E
"
- f"Intensity: {point['intensity_kt']:.0f} kt
"
- f"Category: {point['category']}
"
- f"Stage: {point.get('development_stage', 'Unknown')}
"
- f"Speed: {point.get('forward_speed_kmh', 15):.1f} km/h
"
- ""
- )
- ),
- row=1, col=1
- )
-
- # Connect points with track line
- fig.add_trace(
- go.Scattergeo(
- lon=lons,
- lat=lats,
- mode='lines',
- line=dict(color='black', width=3),
- name='Forecast Track',
- showlegend=True
- ),
- row=1, col=1
+ '%{customdata[0]}
'
+ 'Season: %{customdata[1]}
'
+ f'Cluster: {cluster}
'
+ f'{method.upper()} Dim 1: %{{x:.2f}}
'
+ f'{method.upper()} Dim 2: %{{y:.2f}}
'
+ 'Intensity: %{customdata[2]:.0f} kt
'
+ ''
+ ),
+ customdata=np.column_stack((
+ cluster_data['NAME'].fillna('UNNAMED'),
+ cluster_data['SEASON'].fillna(2000),
+ cluster_data['USA_WIND_max'].fillna(0)
+ ))
+ )
)
- # Add static intensity, speed, and pressure plots
- # Wind speed plot
- fig.add_trace(
- go.Scatter(
- x=hours,
- y=intensities,
- mode='lines+markers',
- line=dict(color='red', width=3),
- marker=dict(size=6, color='red'),
- name='Wind Speed',
- showlegend=False
- ),
- row=2, col=1
+ fig_cluster.update_layout(
+ title=f'Storm Clustering Analysis using {method.upper()}
Each symbol/color represents a distinct storm pattern group',
+ xaxis_title=f'{method.upper()} Dimension 1',
+ yaxis_title=f'{method.upper()} Dimension 2',
+ height=600,
+ showlegend=True
)
- # Add category threshold lines
- thresholds = [34, 64, 83, 96, 113, 137]
- threshold_names = ['TS', 'C1', 'C2', 'C3', 'C4', 'C5']
-
- for thresh, name in zip(thresholds, threshold_names):
- fig.add_trace(
- go.Scatter(
- x=[min(hours), max(hours)],
- y=[thresh, thresh],
- mode='lines',
- line=dict(color='gray', width=1, dash='dash'),
- name=name,
- showlegend=False,
- hovertemplate=f"{name} Threshold: {thresh} kt"
- ),
- row=2, col=1
- )
+ # 2. ENHANCED route map with cluster legends and clearer representation
+ fig_routes = go.Figure()
- # Forward speed plot
- fig.add_trace(
- go.Scatter(
- x=hours,
- y=speeds,
- mode='lines+markers',
- line=dict(color='green', width=2),
- marker=dict(size=4, color='green'),
- name='Forward Speed',
- showlegend=False
- ),
- row=2, col=2
- )
+ # Create a comprehensive legend showing cluster characteristics
+ cluster_info_text = []
- # Add uncertainty cone if requested
- if show_uncertainty and len(route_data) > 1:
- uncertainty_lats_upper = []
- uncertainty_lats_lower = []
- uncertainty_lons_upper = []
- uncertainty_lons_lower = []
-
- for i, point in enumerate(route_data):
- # Uncertainty grows with time and decreases with confidence
- base_uncertainty = 0.4 + (i / len(route_data)) * 1.8
- confidence_factor = point.get('confidence', 0.8)
- uncertainty = base_uncertainty / confidence_factor
-
- uncertainty_lats_upper.append(point['lat'] + uncertainty)
- uncertainty_lats_lower.append(point['lat'] - uncertainty)
- uncertainty_lons_upper.append(point['lon'] + uncertainty)
- uncertainty_lons_lower.append(point['lon'] - uncertainty)
+ for i, cluster in enumerate(unique_clusters):
+ cluster_storm_ids = storm_features_viz[storm_features_viz['cluster'] == cluster]['SID'].tolist()
+ color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)]
- uncertainty_lats = uncertainty_lats_upper + uncertainty_lats_lower[::-1]
- uncertainty_lons = uncertainty_lons_upper + uncertainty_lons_lower[::-1]
+ # Get cluster statistics for legend
+ cluster_data = storm_features_viz[storm_features_viz['cluster'] == cluster]
+ avg_intensity = cluster_data['USA_WIND_max'].mean() if 'USA_WIND_max' in cluster_data.columns else 0
+ avg_pressure = cluster_data['USA_PRES_min'].mean() if 'USA_PRES_min' in cluster_data.columns else 1000
- fig.add_trace(
- go.Scattergeo(
- lon=uncertainty_lons,
- lat=uncertainty_lats,
- mode='lines',
- fill='toself',
- fillcolor='rgba(128,128,128,0.15)',
- line=dict(color='rgba(128,128,128,0.4)', width=1),
- name='Uncertainty Cone',
- showlegend=True
- ),
- row=1, col=1
+ cluster_info_text.append(
+ f"Cluster {cluster}: {len(cluster_storm_ids)} storms, "
+ f"Avg: {avg_intensity:.0f}kt/{avg_pressure:.0f}hPa"
)
+
+ # Add multiple storms per cluster with clear identification
+ storms_added = 0
+ for j, sid in enumerate(cluster_storm_ids[:8]): # Show up to 8 storms per cluster
+ try:
+ storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME')
+ if len(storm_track) > 1:
+ # Ensure valid coordinates
+ valid_coords = storm_track['LAT'].notna() & storm_track['LON'].notna()
+ storm_track = storm_track[valid_coords]
+
+ if len(storm_track) > 1:
+ storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED'
+ storm_season = storm_track['SEASON'].iloc[0] if 'SEASON' in storm_track.columns else 'Unknown'
+
+ # Vary line style for different storms in same cluster
+ line_styles = ['solid', 'dash', 'dot', 'dashdot']
+ line_style = line_styles[j % len(line_styles)]
+ line_width = 3 if j == 0 else 2 # First storm thicker
+
+ fig_routes.add_trace(
+ go.Scattergeo(
+ lon=storm_track['LON'],
+ lat=storm_track['LAT'],
+ mode='lines+markers',
+ line=dict(color=color, width=line_width, dash=line_style),
+ marker=dict(color=color, size=3),
+ name=f'C{cluster}: {storm_name} ({storm_season})',
+ showlegend=True,
+ legendgroup=f'cluster_{cluster}',
+ hovertemplate=(
+ f'Cluster {cluster}: {storm_name}
'
+ 'Lat: %{lat:.1f}°
'
+ 'Lon: %{lon:.1f}°
'
+ f'Season: {storm_season}
'
+ f'Pattern Group: {cluster}
'
+ ''
+ )
+ )
+ )
+ storms_added += 1
+ except Exception as track_error:
+ logging.warning(f"Error adding track for storm {sid}: {track_error}")
+ continue
+
+ # Add cluster centroid marker
+ if len(cluster_storm_ids) > 0:
+ # Calculate average genesis location for cluster
+ cluster_storm_data = storm_features_viz[storm_features_viz['cluster'] == cluster]
+ if 'genesis_lat' in cluster_storm_data.columns and 'genesis_lon' in cluster_storm_data.columns:
+ avg_lat = cluster_storm_data['genesis_lat'].mean()
+ avg_lon = cluster_storm_data['genesis_lon'].mean()
+
+ fig_routes.add_trace(
+ go.Scattergeo(
+ lon=[avg_lon],
+ lat=[avg_lat],
+ mode='markers',
+ marker=dict(
+ color=color,
+ size=20,
+ symbol='star',
+ line=dict(width=2, color='white')
+ ),
+ name=f'C{cluster} Center',
+ showlegend=True,
+ legendgroup=f'cluster_{cluster}',
+ hovertemplate=(
+ f'Cluster {cluster} Genesis Center
'
+ f'Avg Position: {avg_lat:.1f}°N, {avg_lon:.1f}°E
'
+ f'Storms: {len(cluster_storm_ids)}
'
+ f'Avg Intensity: {avg_intensity:.0f} kt
'
+ ''
+ )
+ )
+ )
- # Enhanced layout
- fig.update_layout(
- title=f"Comprehensive Storm Development Analysis
Starting from {prediction_results['genesis_info']['description']}",
- height=1000, # Taller for better subplot visibility
- width=1400, # Wider
+ # Update route map layout with enhanced information and LARGER SIZE
+ fig_routes.update_layout(
+ title=f"Storm Routes by {method.upper()} Clusters
Different line styles = different storms in same cluster | Stars = cluster centers",
+ geo=dict(
+ projection_type="natural earth",
+ showland=True,
+ landcolor="LightGray",
+ showocean=True,
+ oceancolor="LightBlue",
+ showcoastlines=True,
+ coastlinecolor="Gray",
+ center=dict(lat=20, lon=140),
+ projection_scale=2.5 # Larger map
+ ),
+ height=800, # Much larger height
+ width=1200, # Wider map
showlegend=True
)
- # Update geo layout
- fig.update_geos(
- projection_type="natural earth",
- showland=True,
- landcolor="LightGray",
- showocean=True,
- oceancolor="LightBlue",
- showcoastlines=True,
- coastlinecolor="DarkGray",
- showlakes=True,
- lakecolor="LightBlue",
- center=dict(lat=np.mean(lats), lon=np.mean(lons)),
- projection_scale=2.0,
- row=1, col=1
+ # Add cluster info annotation
+ cluster_summary = "
".join(cluster_info_text)
+ fig_routes.add_annotation(
+ text=f"Cluster Summary:
{cluster_summary}",
+ xref="paper", yref="paper",
+ x=0.02, y=0.98,
+ showarrow=False,
+ align="left",
+ bgcolor="rgba(255,255,255,0.8)",
+ bordercolor="gray",
+ borderwidth=1
)
- # Update subplot axes
- fig.update_xaxes(title_text="Forecast Hour", row=2, col=1)
- fig.update_yaxes(title_text="Wind Speed (kt)", row=2, col=1)
- fig.update_xaxes(title_text="Forecast Hour", row=2, col=2)
- fig.update_yaxes(title_text="Forward Speed (km/h)", row=2, col=2)
+ # 3. Enhanced pressure evolution plot with cluster identification
+ fig_pressure = go.Figure()
- # Generate enhanced forecast text
- current = prediction_results['current_prediction']
- genesis_info = prediction_results['genesis_info']
+ for i, cluster in enumerate(unique_clusters):
+ cluster_storm_ids = storm_features_viz[storm_features_viz['cluster'] == cluster]['SID'].tolist()
+ color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)]
+
+ cluster_pressures = []
+ for j, sid in enumerate(cluster_storm_ids[:5]): # Limit to 5 storms per cluster
+ try:
+ storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME')
+ if len(storm_track) > 1 and 'USA_PRES' in storm_track.columns:
+ pressure_values = pd.to_numeric(storm_track['USA_PRES'], errors='coerce').dropna()
+ if len(pressure_values) > 0:
+ storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED'
+ time_hours = range(len(pressure_values))
+
+ # Normalize time to show relative progression
+ normalized_time = np.linspace(0, 100, len(pressure_values))
+
+ fig_pressure.add_trace(
+ go.Scatter(
+ x=normalized_time,
+ y=pressure_values,
+ mode='lines',
+ line=dict(color=color, width=2, dash='solid' if j == 0 else 'dash'),
+ name=f'C{cluster}: {storm_name}' if j == 0 else None,
+ showlegend=(j == 0),
+ legendgroup=f'pressure_cluster_{cluster}',
+ hovertemplate=(
+ f'Cluster {cluster}: {storm_name}
'
+ 'Progress: %{x:.0f}%
'
+ 'Pressure: %{y:.0f} hPa
'
+ ''
+ ),
+ opacity=0.8 if j == 0 else 0.5
+ )
+ )
+ cluster_pressures.extend(pressure_values)
+ except Exception as e:
+ continue
+
+ # Add cluster average line
+ if cluster_pressures:
+ avg_pressure = np.mean(cluster_pressures)
+ fig_pressure.add_hline(
+ y=avg_pressure,
+ line_dash="dot",
+ line_color=color,
+ annotation_text=f"C{cluster} Avg: {avg_pressure:.0f}",
+ annotation_position="right"
+ )
- # Calculate some statistics
- max_intensity = max(intensities)
- max_intensity_time = hours[intensities.index(max_intensity)]
- avg_speed = np.mean(speeds)
+ fig_pressure.update_layout(
+ title=f"Pressure Evolution by {method.upper()} Clusters
Normalized timeline (0-100%) | Dotted lines = cluster averages",
+ xaxis_title="Storm Progress (%)",
+ yaxis_title="Pressure (hPa)",
+ height=500
+ )
- forecast_text = f"""
-COMPREHENSIVE STORM DEVELOPMENT FORECAST
-{'='*65}
-
-GENESIS CONDITIONS:
-• Region: {current.get('genesis_region', 'Unknown')}
-• Description: {genesis_info['description']}
-• Starting Position: {lats[0]:.1f}°N, {lons[0]:.1f}°E
-• Initial Intensity: {current['intensity_kt']:.0f} kt (Tropical Depression)
-• Genesis Pressure: {current.get('pressure_hpa', 1008):.0f} hPa
-
-STORM CHARACTERISTICS:
-• Peak Intensity: {max_intensity:.0f} kt at Hour {max_intensity_time}
-• Average Forward Speed: {avg_speed:.1f} km/h
-• Total Distance: {sum([speeds[i]/6 for i in range(len(speeds))]):.0f} km
-• Final Position: {lats[-1]:.1f}°N, {lons[-1]:.1f}°E
-• Forecast Duration: {hours[-1]} hours ({hours[-1]/24:.1f} days)
-
-DEVELOPMENT TIMELINE:
-• Hour 0 (Genesis): {intensities[0]:.0f} kt - {categories[0]}
-• Hour 24: {intensities[min(4, len(intensities)-1)]:.0f} kt - {categories[min(4, len(categories)-1)]}
-• Hour 48: {intensities[min(8, len(intensities)-1)]:.0f} kt - {categories[min(8, len(categories)-1)]}
-• Hour 72: {intensities[min(12, len(intensities)-1)]:.0f} kt - {categories[min(12, len(categories)-1)]}
-• Final: {intensities[-1]:.0f} kt - {categories[-1]}
-
-MOTION ANALYSIS:
-• Initial Motion: {speeds[0]:.1f} km/h
-• Peak Speed: {max(speeds):.1f} km/h at Hour {hours[speeds.index(max(speeds))]}
-• Final Motion: {speeds[-1]:.1f} km/h
-
-CONFIDENCE ASSESSMENT:
-• Genesis Likelihood: {prediction_results['confidence_scores'].get('genesis', 0.85)*100:.0f}%
-• 24-hour Track: {prediction_results['confidence_scores'].get('position_24h', 0.85)*100:.0f}%
-• 48-hour Track: {prediction_results['confidence_scores'].get('position_48h', 0.75)*100:.0f}%
-• 72-hour Track: {prediction_results['confidence_scores'].get('position_72h', 0.65)*100:.0f}%
-• Long-term: {prediction_results['confidence_scores'].get('long_term', 0.50)*100:.0f}%
-
-FEATURES:
-{"✅ Animation Enabled - Use controls to watch development" if enable_animation else "📊 Static Analysis - All time steps displayed"}
-✅ Realistic Forward Speeds (15-25 km/h typical)
-✅ Environmental Coupling (ENSO, SST, Shear)
-✅ Multi-stage Development Cycle
-✅ Uncertainty Quantification
-
-MODEL: {prediction_results['model_info']}
- """
+ # 4. Enhanced wind evolution plot
+ fig_wind = go.Figure()
- return fig, forecast_text.strip()
+ for i, cluster in enumerate(unique_clusters):
+ cluster_storm_ids = storm_features_viz[storm_features_viz['cluster'] == cluster]['SID'].tolist()
+ color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)]
+
+ cluster_winds = []
+ for j, sid in enumerate(cluster_storm_ids[:5]): # Limit to 5 storms per cluster
+ try:
+ storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME')
+ if len(storm_track) > 1 and 'USA_WIND' in storm_track.columns:
+ wind_values = pd.to_numeric(storm_track['USA_WIND'], errors='coerce').dropna()
+ if len(wind_values) > 0:
+ storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED'
+
+ # Normalize time to show relative progression
+ normalized_time = np.linspace(0, 100, len(wind_values))
+
+ fig_wind.add_trace(
+ go.Scatter(
+ x=normalized_time,
+ y=wind_values,
+ mode='lines',
+ line=dict(color=color, width=2, dash='solid' if j == 0 else 'dash'),
+ name=f'C{cluster}: {storm_name}' if j == 0 else None,
+ showlegend=(j == 0),
+ legendgroup=f'wind_cluster_{cluster}',
+ hovertemplate=(
+ f'Cluster {cluster}: {storm_name}
'
+ 'Progress: %{x:.0f}%
'
+ 'Wind: %{y:.0f} kt
'
+ ''
+ ),
+ opacity=0.8 if j == 0 else 0.5
+ )
+ )
+ cluster_winds.extend(wind_values)
+ except Exception as e:
+ continue
+
+ # Add cluster average line
+ if cluster_winds:
+ avg_wind = np.mean(cluster_winds)
+ fig_wind.add_hline(
+ y=avg_wind,
+ line_dash="dot",
+ line_color=color,
+ annotation_text=f"C{cluster} Avg: {avg_wind:.0f}",
+ annotation_position="right"
+ )
+
+ fig_wind.update_layout(
+ title=f"Wind Speed Evolution by {method.upper()} Clusters
Normalized timeline (0-100%) | Dotted lines = cluster averages",
+ xaxis_title="Storm Progress (%)",
+ yaxis_title="Wind Speed (kt)",
+ height=500
+ )
+
+ # Generate enhanced cluster statistics with clear explanations
+ try:
+ stats_text = f"ENHANCED {method.upper()} CLUSTER ANALYSIS RESULTS\n" + "="*60 + "\n\n"
+ stats_text += f"🔍 DIMENSIONALITY REDUCTION: {method.upper()}\n"
+ stats_text += f"🎯 CLUSTERING ALGORITHM: DBSCAN (automatic pattern discovery)\n"
+ stats_text += f"📊 TOTAL STORMS ANALYZED: {len(storm_features_viz)}\n"
+ stats_text += f"🎨 CLUSTERS DISCOVERED: {len(unique_clusters)}\n"
+ if noise_count > 0:
+ stats_text += f"❌ NOISE POINTS: {noise_count} storms (don't fit clear patterns)\n"
+ stats_text += "\n"
+
+ for cluster in sorted(storm_features_viz['cluster'].unique()):
+ cluster_data = storm_features_viz[storm_features_viz['cluster'] == cluster]
+ storm_count = len(cluster_data)
+
+ if cluster == -1:
+ stats_text += f"❌ NOISE GROUP: {storm_count} storms\n"
+ stats_text += " → These storms don't follow the main patterns\n"
+ stats_text += " → May represent unique or rare storm behaviors\n\n"
+ continue
+
+ stats_text += f"🎯 CLUSTER {cluster}: {storm_count} storms\n"
+ stats_text += f" 🎨 Color: {CLUSTER_COLORS[cluster % len(CLUSTER_COLORS)]}\n"
+
+ # Add detailed statistics if available
+ if 'USA_WIND_max' in cluster_data.columns:
+ wind_mean = cluster_data['USA_WIND_max'].mean()
+ wind_std = cluster_data['USA_WIND_max'].std()
+ stats_text += f" 💨 Intensity: {wind_mean:.1f} ± {wind_std:.1f} kt\n"
+
+ if 'USA_PRES_min' in cluster_data.columns:
+ pres_mean = cluster_data['USA_PRES_min'].mean()
+ pres_std = cluster_data['USA_PRES_min'].std()
+ stats_text += f" 🌡️ Pressure: {pres_mean:.1f} ± {pres_std:.1f} hPa\n"
+
+ if 'track_length' in cluster_data.columns:
+ track_mean = cluster_data['track_length'].mean()
+ stats_text += f" 📏 Avg Track Length: {track_mean:.1f} points\n"
+
+ if 'genesis_lat' in cluster_data.columns and 'genesis_lon' in cluster_data.columns:
+ lat_mean = cluster_data['genesis_lat'].mean()
+ lon_mean = cluster_data['genesis_lon'].mean()
+ stats_text += f" 🎯 Genesis Region: {lat_mean:.1f}°N, {lon_mean:.1f}°E\n"
+
+ # Add interpretation
+ if wind_mean < 50:
+ stats_text += " 💡 Pattern: Weaker storm group\n"
+ elif wind_mean > 100:
+ stats_text += " 💡 Pattern: Intense storm group\n"
+ else:
+ stats_text += " 💡 Pattern: Moderate intensity group\n"
+
+ stats_text += "\n"
+
+ # Add explanation of the analysis
+ stats_text += "📖 INTERPRETATION GUIDE:\n"
+ stats_text += f"• {method.upper()} reduces storm characteristics to 2D for visualization\n"
+ stats_text += "• DBSCAN finds natural groupings without preset number of clusters\n"
+ stats_text += "• Each cluster represents storms with similar behavior patterns\n"
+ stats_text += "• Route colors match cluster colors from the similarity plot\n"
+ stats_text += "• Stars on map show average genesis locations for each cluster\n"
+ stats_text += "• Temporal plots show how each cluster behaves over time\n\n"
+
+ stats_text += f"🔧 FEATURES USED FOR CLUSTERING:\n"
+ stats_text += f" Total: {len(feature_cols)} storm characteristics\n"
+ stats_text += f" Including: intensity, pressure, track shape, genesis location\n"
+
+ except Exception as stats_error:
+ stats_text = f"Error generating enhanced statistics: {str(stats_error)}"
+
+ return fig_cluster, fig_routes, fig_pressure, fig_wind, stats_text
except Exception as e:
- error_msg = f"Error creating comprehensive visualization: {str(e)}"
- logging.error(error_msg)
+ logging.error(f"Error in enhanced clustering analysis: {e}")
import traceback
traceback.print_exc()
- return None, error_msg
+
+ error_fig = go.Figure()
+ error_fig.add_annotation(
+ text=f"Error in clustering analysis: {str(e)}",
+ xref="paper", yref="paper",
+ x=0.5, y=0.5, xanchor='center', yanchor='middle',
+ showarrow=False, font_size=16
+ )
+ return error_fig, error_fig, error_fig, error_fig, f"Error in clustering: {str(e)}"
# -----------------------------
-# Regression Functions (Original)
+# ENHANCED: Advanced Prediction System with Route Forecasting
# -----------------------------
-def perform_wind_regression(start_year, start_month, end_year, end_month):
- """Perform wind regression analysis"""
- start_date = datetime(start_year, start_month, 1)
- end_date = datetime(end_year, end_month, 28)
- data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['USA_WIND','ONI'])
- data['severe_typhoon'] = (data['USA_WIND']>=64).astype(int)
- X = sm.add_constant(data['ONI'])
- y = data['severe_typhoon']
- try:
- model = sm.Logit(y, X).fit(disp=0)
- beta_1 = model.params['ONI']
- exp_beta_1 = np.exp(beta_1)
- p_value = model.pvalues['ONI']
- return f"Wind Regression: β1={beta_1:.4f}, Odds Ratio={exp_beta_1:.4f}, P-value={p_value:.4f}"
- except Exception as e:
- return f"Wind Regression Error: {e}"
-
-def perform_pressure_regression(start_year, start_month, end_year, end_month):
- """Perform pressure regression analysis"""
- start_date = datetime(start_year, start_month, 1)
- end_date = datetime(end_year, end_month, 28)
- data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['USA_PRES','ONI'])
- data['intense_typhoon'] = (data['USA_PRES']<=950).astype(int)
- X = sm.add_constant(data['ONI'])
- y = data['intense_typhoon']
- try:
- model = sm.Logit(y, X).fit(disp=0)
- beta_1 = model.params['ONI']
- exp_beta_1 = np.exp(beta_1)
- p_value = model.pvalues['ONI']
- return f"Pressure Regression: β1={beta_1:.4f}, Odds Ratio={exp_beta_1:.4f}, P-value={p_value:.4f}"
- except Exception as e:
- return f"Pressure Regression Error: {e}"
-
-def perform_longitude_regression(start_year, start_month, end_year, end_month):
- """Perform longitude regression analysis"""
- start_date = datetime(start_year, start_month, 1)
- end_date = datetime(end_year, end_month, 28)
- data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['LON','ONI'])
- data['western_typhoon'] = (data['LON']<=140).astype(int)
- X = sm.add_constant(data['ONI'])
- y = data['western_typhoon']
- try:
- model = sm.OLS(y, sm.add_constant(X)).fit()
- beta_1 = model.params['ONI']
- exp_beta_1 = np.exp(beta_1)
- p_value = model.pvalues['ONI']
- return f"Longitude Regression: β1={beta_1:.4f}, Odds Ratio={exp_beta_1:.4f}, P-value={p_value:.4f}"
- except Exception as e:
- return f"Longitude Regression Error: {e}"
-
-# -----------------------------
-# Visualization Functions (Enhanced)
-# -----------------------------
-
-def get_full_tracks(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
- """Get full typhoon tracks"""
- start_date = datetime(start_year, start_month, 1)
- end_date = datetime(end_year, end_month, 28)
- filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
- filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases)
- if enso_phase != 'all':
- filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()]
- unique_storms = filtered_data['SID'].unique()
- count = len(unique_storms)
- fig = go.Figure()
- for sid in unique_storms:
- storm_data = typhoon_data[typhoon_data['SID']==sid]
- if storm_data.empty:
- continue
- name = storm_data['NAME'].iloc[0] if pd.notnull(storm_data['NAME'].iloc[0]) else "Unnamed"
- basin = storm_data['SID'].iloc[0][:2]
- storm_oni = filtered_data[filtered_data['SID']==sid]['ONI'].iloc[0]
- color = 'red' if storm_oni>=0.5 else ('blue' if storm_oni<=-0.5 else 'green')
- fig.add_trace(go.Scattergeo(
- lon=storm_data['LON'], lat=storm_data['LAT'], mode='lines',
- name=f"{name} ({basin})",
- line=dict(width=1.5, color=color), hoverinfo="name"
- ))
- if typhoon_search:
- search_mask = typhoon_data['NAME'].str.contains(typhoon_search, case=False, na=False)
- if search_mask.any():
- for sid in typhoon_data[search_mask]['SID'].unique():
- storm_data = typhoon_data[typhoon_data['SID']==sid]
- fig.add_trace(go.Scattergeo(
- lon=storm_data['LON'], lat=storm_data['LAT'], mode='lines+markers',
- name=f"MATCHED: {storm_data['NAME'].iloc[0]}",
- line=dict(width=3, color='yellow'),
- marker=dict(size=5), hoverinfo="name"
- ))
- fig.update_layout(
- title=f"Typhoon Tracks ({start_year}-{start_month} to {end_year}-{end_month})",
- geo=dict(
- projection_type='natural earth',
- showland=True,
- showcoastlines=True,
- landcolor='rgb(243,243,243)',
- countrycolor='rgb(204,204,204)',
- coastlinecolor='rgb(204,204,204)',
- center=dict(lon=140, lat=20),
- projection_scale=3
- ),
- legend_title="Typhoons by ENSO Phase",
- showlegend=True,
- height=700
- )
- fig.add_annotation(
- x=0.02, y=0.98, xref="paper", yref="paper",
- text="Red: El Niño, Blue: La Nina, Green: Neutral",
- showarrow=False, align="left",
- bgcolor="rgba(255,255,255,0.8)"
- )
- return fig, f"Total typhoons displayed: {count}"
-
-def get_wind_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
- """Get wind analysis with enhanced categorization"""
- start_date = datetime(start_year, start_month, 1)
- end_date = datetime(end_year, end_month, 28)
- filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
- filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases)
- if enso_phase != 'all':
- filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()]
-
- fig = px.scatter(filtered_data, x='ONI', y='USA_WIND', color='Category',
- hover_data=['NAME','Year','Category'],
- title='Wind Speed vs ONI',
- labels={'ONI':'ONI Value','USA_WIND':'Max Wind Speed (knots)'},
- color_discrete_map=enhanced_color_map)
-
- if typhoon_search:
- mask = filtered_data['NAME'].str.contains(typhoon_search, case=False, na=False)
- if mask.any():
- fig.add_trace(go.Scatter(
- x=filtered_data.loc[mask,'ONI'], y=filtered_data.loc[mask,'USA_WIND'],
- mode='markers', marker=dict(size=10, color='red', symbol='star'),
- name=f'Matched: {typhoon_search}',
- text=filtered_data.loc[mask,'NAME']+' ('+filtered_data.loc[mask,'Year'].astype(str)+')'
- ))
-
- regression = perform_wind_regression(start_year, start_month, end_year, end_month)
- return fig, regression
-
-def get_pressure_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
- """Get pressure analysis with enhanced categorization"""
- start_date = datetime(start_year, start_month, 1)
- end_date = datetime(end_year, end_month, 28)
- filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
- filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases)
- if enso_phase != 'all':
- filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()]
-
- fig = px.scatter(filtered_data, x='ONI', y='USA_PRES', color='Category',
- hover_data=['NAME','Year','Category'],
- title='Pressure vs ONI',
- labels={'ONI':'ONI Value','USA_PRES':'Min Pressure (hPa)'},
- color_discrete_map=enhanced_color_map)
-
- if typhoon_search:
- mask = filtered_data['NAME'].str.contains(typhoon_search, case=False, na=False)
- if mask.any():
- fig.add_trace(go.Scatter(
- x=filtered_data.loc[mask,'ONI'], y=filtered_data.loc[mask,'USA_PRES'],
- mode='markers', marker=dict(size=10, color='red', symbol='star'),
- name=f'Matched: {typhoon_search}',
- text=filtered_data.loc[mask,'NAME']+' ('+filtered_data.loc[mask,'Year'].astype(str)+')'
- ))
-
- regression = perform_pressure_regression(start_year, start_month, end_year, end_month)
- return fig, regression
-
-def get_longitude_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
- """Get longitude analysis"""
- start_date = datetime(start_year, start_month, 1)
- end_date = datetime(end_year, end_month, 28)
- filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
- filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases)
- if enso_phase != 'all':
- filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()]
-
- fig = px.scatter(filtered_data, x='LON', y='ONI', hover_data=['NAME'],
- title='Typhoon Generation Longitude vs ONI (All Years)')
-
- if len(filtered_data) > 1:
- X = np.array(filtered_data['LON']).reshape(-1,1)
- y = filtered_data['ONI']
- try:
- model = sm.OLS(y, sm.add_constant(X)).fit()
- y_pred = model.predict(sm.add_constant(X))
- fig.add_trace(go.Scatter(x=filtered_data['LON'], y=y_pred, mode='lines', name='Regression Line'))
- slope = model.params[1]
- slopes_text = f"All Years Slope: {slope:.4f}"
- except Exception as e:
- slopes_text = f"Regression Error: {e}"
- else:
- slopes_text = "Insufficient data for regression"
-
- regression = perform_longitude_regression(start_year, start_month, end_year, end_month)
- return fig, slopes_text, regression
-
-# -----------------------------
-# ENHANCED: Animation Functions with Taiwan Standard Support - FIXED VERSION
-# -----------------------------
-
-def get_available_years(typhoon_data):
- """Get all available years including 2025 - with error handling"""
+def create_advanced_prediction_model(typhoon_data):
+ """Create advanced ML model for intensity and route prediction"""
try:
if typhoon_data is None or typhoon_data.empty:
- return [str(year) for year in range(2000, 2026)]
-
- if 'ISO_TIME' in typhoon_data.columns:
- years = typhoon_data['ISO_TIME'].dt.year.dropna().unique()
- elif 'SEASON' in typhoon_data.columns:
- years = typhoon_data['SEASON'].dropna().unique()
- else:
- years = range(2000, 2026) # Default range including 2025
+ return None, "No data available for model training"
- # Convert to strings and sort
- year_strings = sorted([str(int(year)) for year in years if not pd.isna(year)])
+ # Prepare training data
+ features = []
+ targets = []
- # Ensure we have at least some years
- if not year_strings:
- return [str(year) for year in range(2000, 2026)]
+ for sid in typhoon_data['SID'].unique():
+ storm_data = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME')
- return year_strings
-
- except Exception as e:
- print(f"Error in get_available_years: {e}")
- return [str(year) for year in range(2000, 2026)]
-
-def update_typhoon_options_enhanced(year, basin):
- """Enhanced typhoon options with TD support and 2025 data"""
- try:
- year = int(year)
-
- # Filter by year - handle both ISO_TIME and SEASON columns
- if 'ISO_TIME' in typhoon_data.columns:
- year_mask = typhoon_data['ISO_TIME'].dt.year == year
- elif 'SEASON' in typhoon_data.columns:
- year_mask = typhoon_data['SEASON'] == year
- else:
- # Fallback - try to extract year from SID or other fields
- year_mask = typhoon_data.index >= 0 # Include all data as fallback
-
- year_data = typhoon_data[year_mask].copy()
-
- # Filter by basin if specified
- if basin != "All Basins":
- basin_code = basin.split(' - ')[0] if ' - ' in basin else basin[:2]
- if 'SID' in year_data.columns:
- year_data = year_data[year_data['SID'].str.startswith(basin_code, na=False)]
- elif 'BASIN' in year_data.columns:
- year_data = year_data[year_data['BASIN'] == basin_code]
-
- if year_data.empty:
- return gr.update(choices=["No storms found"], value=None)
-
- # Get unique storms - include ALL intensities (including TD)
- storms = year_data.groupby('SID').agg({
- 'NAME': 'first',
- 'USA_WIND': 'max'
- }).reset_index()
-
- # Enhanced categorization including TD
- storms['category'] = storms['USA_WIND'].apply(categorize_typhoon_enhanced)
-
- # Create options with category information
- options = []
- for _, storm in storms.iterrows():
- name = storm['NAME'] if pd.notna(storm['NAME']) and storm['NAME'] != '' else 'UNNAMED'
- sid = storm['SID']
- category = storm['category']
- max_wind = storm['USA_WIND'] if pd.notna(storm['USA_WIND']) else 0
+ if len(storm_data) < 3: # Need at least 3 points for prediction
+ continue
- option = f"{name} ({sid}) - {category} ({max_wind:.0f}kt)"
- options.append(option)
-
- if not options:
- return gr.update(choices=["No storms found"], value=None)
-
- return gr.update(choices=sorted(options), value=options[0])
-
- except Exception as e:
- print(f"Error in update_typhoon_options_enhanced: {e}")
- return gr.update(choices=["Error loading storms"], value=None)
-
-def generate_enhanced_track_video_fixed(year, typhoon_selection, standard):
- """FIXED: Enhanced track video generation with working animation display"""
- if not typhoon_selection or typhoon_selection == "No storms found":
- return None
-
- try:
- # Extract SID from selection
- sid = typhoon_selection.split('(')[1].split(')')[0]
-
- # Get storm data
- storm_df = typhoon_data[typhoon_data['SID'] == sid].copy()
- if storm_df.empty:
- print(f"No data found for storm {sid}")
- return None
-
- # Sort by time
- if 'ISO_TIME' in storm_df.columns:
- storm_df = storm_df.sort_values('ISO_TIME')
-
- # Extract data for animation
- lats = storm_df['LAT'].astype(float).values
- lons = storm_df['LON'].astype(float).values
-
- if 'USA_WIND' in storm_df.columns:
- winds = pd.to_numeric(storm_df['USA_WIND'], errors='coerce').fillna(0).values
- else:
- winds = np.full(len(lats), 30)
-
- # Enhanced metadata
- storm_name = storm_df['NAME'].iloc[0] if pd.notna(storm_df['NAME'].iloc[0]) else "UNNAMED"
- season = storm_df['SEASON'].iloc[0] if 'SEASON' in storm_df.columns else year
-
- print(f"Generating FIXED video for {storm_name} ({sid}) with {len(lats)} track points using {standard} standard")
-
- # FIXED: Create figure with proper cartopy setup
- fig = plt.figure(figsize=(16, 10))
- ax = plt.axes(projection=ccrs.PlateCarree())
-
- # Enhanced map features
- ax.stock_img()
- ax.add_feature(cfeature.COASTLINE, linewidth=0.8)
- ax.add_feature(cfeature.BORDERS, linewidth=0.5)
- ax.add_feature(cfeature.OCEAN, color='lightblue', alpha=0.5)
- ax.add_feature(cfeature.LAND, color='lightgray', alpha=0.5)
-
- # Set extent based on track
- padding = 5
- ax.set_extent([
- min(lons) - padding, max(lons) + padding,
- min(lats) - padding, max(lats) + padding
- ])
-
- # Add gridlines
- gl = ax.gridlines(draw_labels=True, alpha=0.3)
- gl.top_labels = gl.right_labels = False
-
- # Title
- ax.set_title(f"{season} {storm_name} ({sid}) Track Animation - {standard.upper()} Standard",
- fontsize=18, fontweight='bold')
-
- # FIXED: Animation elements - proper initialization with cartopy transforms
- # Initialize empty line for track with correct transform
- track_line, = ax.plot([], [], 'b-', linewidth=3, alpha=0.7,
- label='Track', transform=ccrs.PlateCarree())
-
- # Initialize current position marker
- current_point, = ax.plot([], [], 'o', markersize=15,
- transform=ccrs.PlateCarree())
-
- # Historical track points (to show path traversed)
- history_points, = ax.plot([], [], 'o', markersize=6, alpha=0.4, color='blue',
- transform=ccrs.PlateCarree())
-
- # Info text box
- info_box = ax.text(0.02, 0.98, '', transform=ax.transAxes,
- fontsize=12, verticalalignment='top',
- bbox=dict(boxstyle="round,pad=0.5", facecolor='white', alpha=0.9))
-
- # FIXED: Color legend with proper categories for both standards
- legend_elements = []
- if standard == 'taiwan':
- categories = ['Tropical Depression', 'Tropical Storm', 'Severe Tropical Storm',
- 'Typhoon', 'Severe Typhoon', 'Super Typhoon']
- for category in categories:
- color = get_taiwan_color_fixed(category)
- legend_elements.append(plt.Line2D([0], [0], marker='o', color='w',
- markerfacecolor=color, markersize=10, label=category))
- else:
- categories = ['Tropical Depression', 'Tropical Storm', 'C1 Typhoon', 'C2 Typhoon',
- 'C3 Strong Typhoon', 'C4 Very Strong Typhoon', 'C5 Super Typhoon']
- for category in categories:
- color = get_matplotlib_color(category)
- legend_elements.append(plt.Line2D([0], [0], marker='o', color='w',
- markerfacecolor=color, markersize=10, label=category))
-
- ax.legend(handles=legend_elements, loc='upper right', fontsize=10)
-
- # FIXED: Animation function with proper artist updates and cartopy compatibility
- def animate_fixed(frame):
- """Fixed animation function that properly updates tracks with cartopy"""
- try:
- if frame >= len(lats):
- return track_line, current_point, history_points, info_box
-
- # FIXED: Update track line up to current frame
- current_lons = lons[:frame+1]
- current_lats = lats[:frame+1]
+ for i in range(len(storm_data) - 1):
+ current = storm_data.iloc[i]
+ next_point = storm_data.iloc[i + 1]
- # Update the track line data (this is the key fix!)
- track_line.set_data(current_lons, current_lats)
+ # Extract features (current state)
+ feature_row = []
- # FIXED: Update historical points (smaller markers showing traversed path)
- if frame > 0:
- history_points.set_data(current_lons[:-1], current_lats[:-1])
+ # Current position
+ feature_row.extend([
+ current.get('LAT', 20),
+ current.get('LON', 140)
+ ])
- # FIXED: Update current position with correct categorization
- current_wind = winds[frame]
+ # Current intensity
+ feature_row.extend([
+ current.get('USA_WIND', 30),
+ current.get('USA_PRES', 1000)
+ ])
- if standard == 'taiwan':
- category, color = categorize_typhoon_by_standard_fixed(current_wind, 'taiwan')
+ # Time features
+ if 'ISO_TIME' in current and pd.notna(current['ISO_TIME']):
+ month = current['ISO_TIME'].month
+ day_of_year = current['ISO_TIME'].dayofyear
else:
- category, color = categorize_typhoon_by_standard_fixed(current_wind, 'atlantic')
-
- # Debug for first few frames
- if frame < 3:
- print(f"FIXED Frame {frame}: Wind={current_wind:.1f}kt, Category={category}, Color={color}")
-
- # Update current position marker
- current_point.set_data([lons[frame]], [lats[frame]])
- current_point.set_color(color)
- current_point.set_markersize(12 + current_wind/8)
+ month = 9 # Peak season default
+ day_of_year = 250
- # FIXED: Enhanced info display with correct Taiwan wind speed conversion
- if 'ISO_TIME' in storm_df.columns and frame < len(storm_df):
- current_time = storm_df.iloc[frame]['ISO_TIME']
- time_str = current_time.strftime('%Y-%m-%d %H:%M UTC') if pd.notna(current_time) else 'Unknown'
- else:
- time_str = f"Step {frame+1}"
+ feature_row.extend([month, day_of_year])
- # Corrected wind speed display for Taiwan standard
- if standard == 'taiwan':
- wind_ms = current_wind * 0.514444
- wind_display = f"{current_wind:.0f} kt ({wind_ms:.1f} m/s)"
+ # Motion features (if previous point exists)
+ if i > 0:
+ prev = storm_data.iloc[i - 1]
+ dlat = current.get('LAT', 20) - prev.get('LAT', 20)
+ dlon = current.get('LON', 140) - prev.get('LON', 140)
+ speed = np.sqrt(dlat**2 + dlon**2)
+ bearing = np.arctan2(dlat, dlon)
else:
- wind_display = f"{current_wind:.0f} kt"
+ speed = 0
+ bearing = 0
- info_text = (
- f"Storm: {storm_name}\n"
- f"Time: {time_str}\n"
- f"Position: {lats[frame]:.1f}°N, {lons[frame]:.1f}°E\n"
- f"Max Wind: {wind_display}\n"
- f"Category: {category}\n"
- f"Standard: {standard.upper()}\n"
- f"Frame: {frame+1}/{len(lats)}"
- )
- info_box.set_text(info_text)
+ feature_row.extend([speed, bearing])
- # FIXED: Return all modified artists (crucial for proper display)
- return track_line, current_point, history_points, info_box
+ features.append(feature_row)
- except Exception as e:
- print(f"Error in animate frame {frame}: {e}")
- return track_line, current_point, history_points, info_box
+ # Target: next position and intensity
+ targets.append([
+ next_point.get('LAT', 20),
+ next_point.get('LON', 140),
+ next_point.get('USA_WIND', 30)
+ ])
- # FIXED: Create animation with cartopy-compatible settings
- # Key fixes: blit=False (crucial for cartopy), proper interval
- anim = animation.FuncAnimation(
- fig, animate_fixed, frames=len(lats),
- interval=600, blit=False, repeat=True # blit=False is essential for cartopy!
- )
+ if len(features) < 10: # Need sufficient training data
+ return None, "Insufficient data for model training"
- # Save animation with optimized settings
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4',
- dir=tempfile.gettempdir())
+ # Train model
+ X = np.array(features)
+ y = np.array(targets)
- # FIXED: Writer settings optimized for track visibility
- writer = animation.FFMpegWriter(
- fps=2, bitrate=3000, codec='libx264', # Slower FPS for better track visibility
- extra_args=['-pix_fmt', 'yuv420p']
- )
+ # Split data
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
- print(f"Saving FIXED animation to {temp_file.name}")
- anim.save(temp_file.name, writer=writer, dpi=120)
- plt.close(fig)
+ # Create separate models for position and intensity
+ models = {}
- print(f"FIXED video generated successfully: {temp_file.name}")
- return temp_file.name
+ # Position model (lat, lon)
+ pos_model = RandomForestRegressor(n_estimators=100, random_state=42)
+ pos_model.fit(X_train, y_train[:, :2])
+ models['position'] = pos_model
- except Exception as e:
- print(f"Error generating FIXED video: {e}")
- import traceback
- traceback.print_exc()
- return None
-
-# FIXED: Update the simplified wrapper function
-def simplified_track_video_fixed(year, basin, typhoon, standard):
- """Simplified track video function with FIXED animation and Taiwan classification"""
- if not typhoon:
- return None
- return generate_enhanced_track_video_fixed(year, typhoon, standard)
-
-# -----------------------------
-# Load & Process Data
-# -----------------------------
-
-# Global variables initialization
-oni_data = None
-typhoon_data = None
-merged_data = None
-
-def initialize_data():
- """Initialize all data safely"""
- global oni_data, typhoon_data, merged_data
- try:
- logging.info("Starting data loading process...")
- update_oni_data()
- oni_data, typhoon_data = load_data_fixed(ONI_DATA_PATH, TYPHOON_DATA_PATH)
+ # Intensity model (wind speed)
+ int_model = RandomForestRegressor(n_estimators=100, random_state=42)
+ int_model.fit(X_train, y_train[:, 2])
+ models['intensity'] = int_model
+
+ # Calculate model performance
+ pos_pred = pos_model.predict(X_test)
+ int_pred = int_model.predict(X_test)
+
+ pos_mae = mean_absolute_error(y_test[:, :2], pos_pred)
+ int_mae = mean_absolute_error(y_test[:, 2], int_pred)
+
+ model_info = f"Position MAE: {pos_mae:.2f}°, Intensity MAE: {int_mae:.2f} kt"
+
+ return models, model_info
- if oni_data is not None and typhoon_data is not None:
- oni_long = process_oni_data(oni_data)
- typhoon_max = process_typhoon_data(typhoon_data)
- merged_data = merge_data(oni_long, typhoon_max)
- logging.info("Data loading complete.")
- else:
- logging.error("Failed to load required data")
- # Create minimal fallback data
- oni_data = pd.DataFrame({'Year': [2000], 'Jan': [0], 'Feb': [0], 'Mar': [0], 'Apr': [0],
- 'May': [0], 'Jun': [0], 'Jul': [0], 'Aug': [0], 'Sep': [0],
- 'Oct': [0], 'Nov': [0], 'Dec': [0]})
- typhoon_data = create_fallback_typhoon_data()
- oni_long = process_oni_data(oni_data)
- typhoon_max = process_typhoon_data(typhoon_data)
- merged_data = merge_data(oni_long, typhoon_max)
except Exception as e:
- logging.error(f"Error during data initialization: {e}")
- # Create minimal fallback data
- oni_data = pd.DataFrame({'Year': [2000], 'Jan': [0], 'Feb': [0], 'Mar': [0], 'Apr': [0],
- 'May': [0], 'Jun': [0], 'Jul': [0], 'Aug': [0], 'Sep': [0],
- 'Oct': [0], 'Nov': [0], 'Dec': [0]})
- typhoon_data = create_fallback_typhoon_data()
- oni_long = process_oni_data(oni_data)
- typhoon_max = process_typhoon_data(typhoon_data)
- merged_data = merge_data(oni_long, typhoon_max)
-
-# Initialize data
-initialize_data()
-
-# -----------------------------
-# ENHANCED: Gradio Interface with Fixed Route Visualization and Enhanced Features
-# -----------------------------
+ return None, f"Error creating prediction model: {str(e)}"
-def create_interface():
- """Create the enhanced Gradio interface with robust error handling"""
+def create_animated_route_visualization(prediction_results, show_uncertainty=True, enable_animation=True):
+ """Create comprehensive animated route visualization with intensity plots"""
try:
- # Ensure data is available
- if oni_data is None or typhoon_data is None or merged_data is None:
- logging.warning("Data not properly loaded, creating minimal interface")
- return create_minimal_fallback_interface()
+ if 'route_forecast' not in prediction_results or not prediction_results['route_forecast']:
+ return None, "No route forecast data available"
+
+ route_data = prediction_results['route_forecast']
+
+ # Extract data for plotting
+ hours = [point['hour'] for point in route_data]
+ lats = [point['lat'] for point in route_data]
+ lons = [point['lon'] for point in route_data]
+ intensities = [point['intensity_kt'] for point in route_data]
+ categories = [point['category'] for point in route_data]
+ confidences = [point.get('confidence', 0.8) for point in route_data]
+ stages = [point.get('development_stage', 'Unknown') for point in route_data]
+ speeds = [point.get('forward_speed_kmh', 15) for point in route_data]
+ pressures = [point.get('pressure_hpa', 1013) for point in route_data]
+
+ # Create subplot layout with map and intensity plot
+ fig = make_subplots(
+ rows=2, cols=2,
+ subplot_titles=('Storm Track Animation', 'Wind Speed vs Time', 'Forward Speed vs Time', 'Pressure vs Time'),
+ specs=[[{"type": "geo", "colspan": 2}, None],
+ [{"type": "xy"}, {"type": "xy"}]],
+ vertical_spacing=0.15,
+ row_heights=[0.7, 0.3]
+ )
+
+ if enable_animation:
+ # Add frames for animation
+ frames = []
- # Get safe data statistics
- try:
- total_storms = len(typhoon_data['SID'].unique()) if 'SID' in typhoon_data.columns else 0
- total_records = len(typhoon_data)
- available_years = get_available_years(typhoon_data)
- year_range_display = f"{available_years[0]} - {available_years[-1]}" if available_years else "Unknown"
- except Exception as e:
- logging.error(f"Error getting data statistics: {e}")
- total_storms = 0
- total_records = 0
- year_range_display = "Unknown"
- available_years = [str(year) for year in range(2000, 2026)]
-
- with gr.Blocks(title="Enhanced Typhoon Analysis Platform", theme=gr.themes.Soft()) as demo:
- gr.Markdown("# 🌪️ Enhanced Typhoon Analysis Platform")
- gr.Markdown("**Advanced ML clustering, route predictions, and comprehensive tropical cyclone analysis including Tropical Depressions**")
+ # Static background elements first
+ # Add complete track as background
+ fig.add_trace(
+ go.Scattergeo(
+ lon=lons,
+ lat=lats,
+ mode='lines',
+ line=dict(color='lightgray', width=2, dash='dot'),
+ name='Complete Track',
+ showlegend=True,
+ opacity=0.4
+ ),
+ row=1, col=1
+ )
- with gr.Tab("🏠 Overview"):
- overview_text = f"""
- ## Welcome to the Enhanced Typhoon Analysis Dashboard
-
- This dashboard provides comprehensive analysis of typhoon data in relation to ENSO phases with advanced machine learning capabilities.
-
- ### 🚀 Enhanced Features:
- - **Advanced ML Clustering**: UMAP/t-SNE storm pattern analysis with separate visualizations
- - **Predictive Routing**: Advanced storm track and intensity forecasting with uncertainty quantification
- - **Complete TD Support**: Now includes Tropical Depressions (< 34 kt)
- - **Taiwan Standard**: Full support for Taiwan meteorological classification system
- - **2025 Data Ready**: Real-time compatibility with current year data
- - **Enhanced Animations**: High-quality storm track visualizations with both standards
-
- ### 📊 Data Status:
- - **ONI Data**: {len(oni_data)} years loaded
- - **Typhoon Data**: {total_records:,} records loaded
- - **Merged Data**: {len(merged_data):,} typhoons with ONI values
- - **Available Years**: {year_range_display}
-
- ### 🔧 Technical Capabilities:
- - **UMAP Clustering**: {"✅ Available" if UMAP_AVAILABLE else "⚠️ Limited to t-SNE/PCA"}
- - **AI Predictions**: {"🧠 Deep Learning" if CNN_AVAILABLE else "🔬 Physics-based"}
- - **Enhanced Categorization**: Tropical Depression to Super Typhoon
- - **Platform**: Optimized for Hugging Face Spaces
+ # Genesis marker (always visible)
+ fig.add_trace(
+ go.Scattergeo(
+ lon=[lons[0]],
+ lat=[lats[0]],
+ mode='markers',
+ marker=dict(
+ size=25,
+ color='gold',
+ symbol='star',
+ line=dict(width=3, color='black')
+ ),
+ name='Genesis',
+ showlegend=True,
+ hovertemplate=(
+ f"GENESIS
"
+ f"Position: {lats[0]:.1f}°N, {lons[0]:.1f}°E
"
+ f"Initial: {intensities[0]:.0f} kt
"
+ f"Region: {prediction_results['genesis_info']['description']}
"
+ ""
+ )
+ ),
+ row=1, col=1
+ )
+
+ # Create animation frames
+ for i in range(len(route_data)):
+ frame_lons = lons[:i+1]
+ frame_lats = lats[:i+1]
+ frame_intensities = intensities[:i+1]
+ frame_categories = categories[:i+1]
+ frame_hours = hours[:i+1]
- ### 📈 Research Applications:
- - Climate change impact studies
- - Seasonal forecasting research
- - Storm pattern classification
- - ENSO-typhoon relationship analysis
- - Intensity prediction model development
- """
- gr.Markdown(overview_text)
-
- with gr.Tab("🔬 Advanced ML Clustering"):
- gr.Markdown("## 🎯 Storm Pattern Analysis with Separate Visualizations")
- gr.Markdown("**Four separate plots: Clustering, Routes, Pressure Evolution, and Wind Evolution**")
+ # Current position marker
+ current_color = enhanced_color_map.get(frame_categories[-1], 'rgb(128,128,128)')
+ current_size = 15 + (frame_intensities[-1] / 10)
- with gr.Row():
- with gr.Column(scale=2):
- reduction_method = gr.Dropdown(
- choices=['UMAP', 't-SNE', 'PCA'],
- value='UMAP' if UMAP_AVAILABLE else 't-SNE',
- label="🔍 Dimensionality Reduction Method",
- info="UMAP provides better global structure preservation"
+ frame_data = [
+ # Animated track up to current point
+ go.Scattergeo(
+ lon=frame_lons,
+ lat=frame_lats,
+ mode='lines+markers',
+ line=dict(color='blue', width=4),
+ marker=dict(
+ size=[8 + (intensity/15) for intensity in frame_intensities],
+ color=[enhanced_color_map.get(cat, 'rgb(128,128,128)') for cat in frame_categories],
+ opacity=0.8,
+ line=dict(width=1, color='white')
+ ),
+ name='Current Track',
+ showlegend=False
+ ),
+ # Current position highlight
+ go.Scattergeo(
+ lon=[frame_lons[-1]],
+ lat=[frame_lats[-1]],
+ mode='markers',
+ marker=dict(
+ size=current_size,
+ color=current_color,
+ symbol='circle',
+ line=dict(width=3, color='white')
+ ),
+ name='Current Position',
+ showlegend=False,
+ hovertemplate=(
+ f"Hour {route_data[i]['hour']}
"
+ f"Position: {lats[i]:.1f}°N, {lons[i]:.1f}°E
"
+ f"Intensity: {intensities[i]:.0f} kt
"
+ f"Category: {categories[i]}
"
+ f"Stage: {stages[i]}
"
+ f"Speed: {speeds[i]:.1f} km/h
"
+ f"Confidence: {confidences[i]*100:.0f}%
"
+ ""
)
- with gr.Column(scale=1):
- analyze_clusters_btn = gr.Button("🚀 Generate All Cluster Analyses", variant="primary", size="lg")
-
- with gr.Row():
- with gr.Column():
- cluster_plot = gr.Plot(label="📊 Storm Clustering Analysis")
- with gr.Column():
- routes_plot = gr.Plot(label="🗺️ Clustered Storm Routes")
-
- with gr.Row():
- with gr.Column():
- pressure_plot = gr.Plot(label="🌡️ Pressure Evolution by Cluster")
- with gr.Column():
- wind_plot = gr.Plot(label="💨 Wind Speed Evolution by Cluster")
-
- with gr.Row():
- cluster_stats = gr.Textbox(label="📈 Detailed Cluster Statistics", lines=15, max_lines=20)
-
- def run_separate_clustering_analysis(method):
- try:
- # Extract features for clustering
+ ),
+ # Animated wind plot
+ go.Scatter(
+ x=frame_hours,
+ y=frame_intensities,
+ mode='lines+markers',
+ line=dict(color='red', width=3),
+ marker=dict(size=6, color='red'),
+ name='Wind Speed',
+ showlegend=False,
+ yaxis='y2'
+ ),
+ # Animated speed plot
+ go.Scatter(
+ x=frame_hours,
+ y=speeds[:i+1],
+ mode='lines+markers',
+ line=dict(color='green', width=2),
+ marker=dict(size=4, color='green'),
+ name='Forward Speed',
+ showlegend=False,
+ yaxis='y3'
+ ),
+ # Animated pressure plot
+ go.Scatter(
+ x=frame_hours,
+ y=pressures[:i+1],
+ mode='lines+markers',
+ line=dict(color='purple', width=2),
+ marker=dict(size=4, color='purple'),
+ name='Pressure',
+ showlegend=False,
+ yaxis='y4'
+ )
+ ]
+
+ frames.append(go.Frame(
+ data=frame_data,
+ name=str(i),
+ layout=go.Layout(
+ title=f"Storm Development Animation - Hour {route_data[i]['hour']}
"
+ f"Intensity: {intensities[i]:.0f} kt | Category: {categories[i]} | Stage: {stages[i]} | Speed: {speeds[i]:.1f} km/h"
+ )
+ ))
+
+ fig.frames = frames
+
+ # Add play/pause controls
+ fig.update_layout(
+ updatemenus=[
+ {
+ "buttons": [
+ {
+ "args": [None, {"frame": {"duration": 1000, "redraw": True},
+ "fromcurrent": True, "transition": {"duration": 300}}],
+ "label": "▶️ Play",
+ "method": "animate"
+ },
+ {
+ "args": [[None], {"frame": {"duration": 0, "redraw": True},
+ "mode": "immediate", "transition": {"duration": 0}}],
+ "label": "⏸️ Pause",
+ "method": "animate"
+ },
+ {
+ "args": [None, {"frame": {"duration": 500, "redraw": True},
+ "fromcurrent": True, "transition": {"duration": 300}}],
+ "label": "⏩ Fast",
+ "method": "animate"
+ }
+ ],
+ "direction": "left",
+ "pad": {"r": 10, "t": 87},
+ "showactive": False,
+ "type": "buttons",
+ "x": 0.1,
+ "xanchor": "right",
+ "y": 0,
+ "yanchor": "top"
+ }
+ ],
+ sliders=[{
+ "active": 0,
+ "yanchor": "top",
+ "xanchor": "left",
+ "currentvalue": {
+ "font": {"size": 16},
+ "prefix": "Hour: ",
+ "visible": True,
+ "xanchor": "right"
+ },
+ "transition": {"duration": 300, "easing": "cubic-in-out"},
+ "pad": {"b": 10, "t": 50},
+ "len": 0.9,
+ "x": 0.1,
+ "y": 0,
+ "steps": [
+ {
+ "args": [[str(i)], {"frame": {"duration": 300, "redraw": True},
+ "mode": "immediate", "transition": {"duration": 300}}],
+ "label": f"H{route_data[i]['hour']}",
+ "method": "animate"
+ }
+ for i in range(0, len(route_data), max(1, len(route_data)//20)) # Limit slider steps
+ ]
+ }]
+ )
+
+ else:
+ # Static view with all points
+ # Add genesis marker
+ fig.add_trace(
+ go.Scattergeo(
+ lon=[lons[0]],
+ lat=[lats[0]],
+ mode='markers',
+ marker=dict(
+ size=25,
+ color='gold',
+ symbol='star',
+ line=dict(width=3, color='black')
+ ),
+ name='Genesis',
+ showlegend=True,
+ hovertemplate=(
+ f"GENESIS
"
+ f"Position: {lats[0]:.1f}°N, {lons[0]:.1f}°E
"
+ f"Initial: {intensities[0]:.0f} kt
"
+ ""
+ )
+ ),
+ row=1, col=1
+ )
+
+ # Add full track with intensity coloring
+ for i in range(0, len(route_data), max(1, len(route_data)//50)): # Sample points for performance
+ point = route_data[i]
+ color = enhanced_color_map.get(point['category'], 'rgb(128,128,128)')
+ size = 8 + (point['intensity_kt'] / 12)
+
+ fig.add_trace(
+ go.Scattergeo(
+ lon=[point['lon']],
+ lat=[point['lat']],
+ mode='markers',
+ marker=dict(
+ size=size,
+ color=color,
+ opacity=point.get('confidence', 0.8),
+ line=dict(width=1, color='white')
+ ),
+ name=f"Hour {point['hour']}" if i % 10 == 0 else None,
+ showlegend=(i % 10 == 0),
+ hovertemplate=(
+ f"Hour {point['hour']}
"
+ f"Position: {point['lat']:.1f}°N, {point['lon']:.1f}°E
"
+ f"Intensity: {point['intensity_kt']:.0f} kt
"
+ f"Category: {point['category']}
"
+ f"Stage: {point.get('development_stage', 'Unknown')}
"
+ f"Speed: {point.get('forward_speed_kmh', 15):.1f} km/h
"
+ ""
+ )
+ ),
+ row=1, col=1
+ )
+
+ # Connect points with track line
+ fig.add_trace(
+ go.Scattergeo(
+ lon=lons,
+ lat=lats,
+ mode='lines',
+ line=dict(color='black', width=3),
+ name='Forecast Track',
+ showlegend=True
+ ),
+ row=1, col=1
+ )
+
+ # Add static intensity, speed, and pressure plots
+ # Wind speed plot
+ fig.add_trace(
+ go.Scatter(
+ x=hours,
+ y=intensities,
+ mode='lines+markers',
+ line=dict(color='red', width=3),
+ marker=dict(size=6, color='red'),
+ name='Wind Speed',
+ showlegend=False
+ ),
+ row=2, col=1
+ )
+
+ # Add category threshold lines
+ thresholds = [34, 64, 83, 96, 113, 137]
+ threshold_names = ['TS', 'C1', 'C2', 'C3', 'C4', 'C5']
+
+ for thresh, name in zip(thresholds, threshold_names):
+ fig.add_trace(
+ go.Scatter(
+ x=[min(hours), max(hours)],
+ y=[thresh, thresh],
+ mode='lines',
+ line=dict(color='gray', width=1, dash='dash'),
+ name=name,
+ showlegend=False,
+ hovertemplate=f"{name} Threshold: {thresh} kt"
+ ),
+ row=2, col=1
+ )
+
+ # Forward speed plot
+ fig.add_trace(
+ go.Scatter(
+ x=hours,
+ y=speeds,
+ mode='lines+markers',
+ line=dict(color='green', width=2),
+ marker=dict(size=4, color='green'),
+ name='Forward Speed',
+ showlegend=False
+ ),
+ row=2, col=2
+ )
+
+ # Add uncertainty cone if requested
+ if show_uncertainty and len(route_data) > 1:
+ uncertainty_lats_upper = []
+ uncertainty_lats_lower = []
+ uncertainty_lons_upper = []
+ uncertainty_lons_lower = []
+
+ for i, point in enumerate(route_data):
+ # Uncertainty grows with time and decreases with confidence
+ base_uncertainty = 0.4 + (i / len(route_data)) * 1.8
+ confidence_factor = point.get('confidence', 0.8)
+ uncertainty = base_uncertainty / confidence_factor
+
+ uncertainty_lats_upper.append(point['lat'] + uncertainty)
+ uncertainty_lats_lower.append(point['lat'] - uncertainty)
+ uncertainty_lons_upper.append(point['lon'] + uncertainty)
+ uncertainty_lons_lower.append(point['lon'] - uncertainty)
+
+ uncertainty_lats = uncertainty_lats_upper + uncertainty_lats_lower[::-1]
+ uncertainty_lons = uncertainty_lons_upper + uncertainty_lons_lower[::-1]
+
+ fig.add_trace(
+ go.Scattergeo(
+ lon=uncertainty_lons,
+ lat=uncertainty_lats,
+ mode='lines',
+ fill='toself',
+ fillcolor='rgba(128,128,128,0.15)',
+ line=dict(color='rgba(128,128,128,0.4)', width=1),
+ name='Uncertainty Cone',
+ showlegend=True
+ ),
+ row=1, col=1
+ )
+
+ # Enhanced layout
+ fig.update_layout(
+ title=f"Comprehensive Storm Development Analysis
Starting from {prediction_results['genesis_info']['description']}",
+ height=1000, # Taller for better subplot visibility
+ width=1400, # Wider
+ showlegend=True
+ )
+
+ # Update geo layout
+ fig.update_geos(
+ projection_type="natural earth",
+ showland=True,
+ landcolor="LightGray",
+ showocean=True,
+ oceancolor="LightBlue",
+ showcoastlines=True,
+ coastlinecolor="DarkGray",
+ showlakes=True,
+ lakecolor="LightBlue",
+ center=dict(lat=np.mean(lats), lon=np.mean(lons)),
+ projection_scale=2.0,
+ row=1, col=1
+ )
+
+ # Update subplot axes
+ fig.update_xaxes(title_text="Forecast Hour", row=2, col=1)
+ fig.update_yaxes(title_text="Wind Speed (kt)", row=2, col=1)
+ fig.update_xaxes(title_text="Forecast Hour", row=2, col=2)
+ fig.update_yaxes(title_text="Forward Speed (km/h)", row=2, col=2)
+
+ # Generate enhanced forecast text
+ current = prediction_results['current_prediction']
+ genesis_info = prediction_results['genesis_info']
+
+ # Calculate some statistics
+ max_intensity = max(intensities)
+ max_intensity_time = hours[intensities.index(max_intensity)]
+ avg_speed = np.mean(speeds)
+
+ forecast_text = f"""
+COMPREHENSIVE STORM DEVELOPMENT FORECAST
+{'='*65}
+
+GENESIS CONDITIONS:
+• Region: {current.get('genesis_region', 'Unknown')}
+• Description: {genesis_info['description']}
+• Starting Position: {lats[0]:.1f}°N, {lons[0]:.1f}°E
+• Initial Intensity: {current['intensity_kt']:.0f} kt (Tropical Depression)
+• Genesis Pressure: {current.get('pressure_hpa', 1008):.0f} hPa
+
+STORM CHARACTERISTICS:
+• Peak Intensity: {max_intensity:.0f} kt at Hour {max_intensity_time}
+• Average Forward Speed: {avg_speed:.1f} km/h
+• Total Distance: {sum([speeds[i]/6 for i in range(len(speeds))]):.0f} km
+• Final Position: {lats[-1]:.1f}°N, {lons[-1]:.1f}°E
+• Forecast Duration: {hours[-1]} hours ({hours[-1]/24:.1f} days)
+
+DEVELOPMENT TIMELINE:
+• Hour 0 (Genesis): {intensities[0]:.0f} kt - {categories[0]}
+• Hour 24: {intensities[min(4, len(intensities)-1)]:.0f} kt - {categories[min(4, len(categories)-1)]}
+• Hour 48: {intensities[min(8, len(intensities)-1)]:.0f} kt - {categories[min(8, len(categories)-1)]}
+• Hour 72: {intensities[min(12, len(intensities)-1)]:.0f} kt - {categories[min(12, len(categories)-1)]}
+• Final: {intensities[-1]:.0f} kt - {categories[-1]}
+
+MOTION ANALYSIS:
+• Initial Motion: {speeds[0]:.1f} km/h
+• Peak Speed: {max(speeds):.1f} km/h at Hour {hours[speeds.index(max(speeds))]}
+• Final Motion: {speeds[-1]:.1f} km/h
+
+CONFIDENCE ASSESSMENT:
+• Genesis Likelihood: {prediction_results['confidence_scores'].get('genesis', 0.85)*100:.0f}%
+• 24-hour Track: {prediction_results['confidence_scores'].get('position_24h', 0.85)*100:.0f}%
+• 48-hour Track: {prediction_results['confidence_scores'].get('position_48h', 0.75)*100:.0f}%
+• 72-hour Track: {prediction_results['confidence_scores'].get('position_72h', 0.65)*100:.0f}%
+• Long-term: {prediction_results['confidence_scores'].get('long_term', 0.50)*100:.0f}%
+
+FEATURES:
+{"✅ Animation Enabled - Use controls to watch development" if enable_animation else "📊 Static Analysis - All time steps displayed"}
+✅ Realistic Forward Speeds (15-25 km/h typical)
+✅ Environmental Coupling (ENSO, SST, Shear)
+✅ Multi-stage Development Cycle
+✅ Uncertainty Quantification
+
+MODEL: {prediction_results['model_info']}
+ """
+
+ return fig, forecast_text.strip()
+
+ except Exception as e:
+ error_msg = f"Error creating comprehensive visualization: {str(e)}"
+ logging.error(error_msg)
+ import traceback
+ traceback.print_exc()
+ return None, error_msg
+
+# -----------------------------
+# Regression Functions (Original)
+# -----------------------------
+
+def perform_wind_regression(start_year, start_month, end_year, end_month):
+ """Perform wind regression analysis"""
+ start_date = datetime(start_year, start_month, 1)
+ end_date = datetime(end_year, end_month, 28)
+ data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['USA_WIND','ONI'])
+ data['severe_typhoon'] = (data['USA_WIND']>=64).astype(int)
+ X = sm.add_constant(data['ONI'])
+ y = data['severe_typhoon']
+ try:
+ model = sm.Logit(y, X).fit(disp=0)
+ beta_1 = model.params['ONI']
+ exp_beta_1 = np.exp(beta_1)
+ p_value = model.pvalues['ONI']
+ return f"Wind Regression: β1={beta_1:.4f}, Odds Ratio={exp_beta_1:.4f}, P-value={p_value:.4f}"
+ except Exception as e:
+ return f"Wind Regression Error: {e}"
+
+def perform_pressure_regression(start_year, start_month, end_year, end_month):
+ """Perform pressure regression analysis"""
+ start_date = datetime(start_year, start_month, 1)
+ end_date = datetime(end_year, end_month, 28)
+ data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['USA_PRES','ONI'])
+ data['intense_typhoon'] = (data['USA_PRES']<=950).astype(int)
+ X = sm.add_constant(data['ONI'])
+ y = data['intense_typhoon']
+ try:
+ model = sm.Logit(y, X).fit(disp=0)
+ beta_1 = model.params['ONI']
+ exp_beta_1 = np.exp(beta_1)
+ p_value = model.pvalues['ONI']
+ return f"Pressure Regression: β1={beta_1:.4f}, Odds Ratio={exp_beta_1:.4f}, P-value={p_value:.4f}"
+ except Exception as e:
+ return f"Pressure Regression Error: {e}"
+
+def perform_longitude_regression(start_year, start_month, end_year, end_month):
+ """Perform longitude regression analysis"""
+ start_date = datetime(start_year, start_month, 1)
+ end_date = datetime(end_year, end_month, 28)
+ data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['LON','ONI'])
+ data['western_typhoon'] = (data['LON']<=140).astype(int)
+ X = sm.add_constant(data['ONI'])
+ y = data['western_typhoon']
+ try:
+ model = sm.OLS(y, sm.add_constant(X)).fit()
+ beta_1 = model.params['ONI']
+ exp_beta_1 = np.exp(beta_1)
+ p_value = model.pvalues['ONI']
+ return f"Longitude Regression: β1={beta_1:.4f}, Odds Ratio={exp_beta_1:.4f}, P-value={p_value:.4f}"
+ except Exception as e:
+ return f"Longitude Regression Error: {e}"
+
+# -----------------------------
+# Visualization Functions (Enhanced)
+# -----------------------------
+
+def get_full_tracks(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
+ """Get full typhoon tracks"""
+ start_date = datetime(start_year, start_month, 1)
+ end_date = datetime(end_year, end_month, 28)
+ filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
+ filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases)
+ if enso_phase != 'all':
+ filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()]
+ unique_storms = filtered_data['SID'].unique()
+ count = len(unique_storms)
+ fig = go.Figure()
+ for sid in unique_storms:
+ storm_data = typhoon_data[typhoon_data['SID']==sid]
+ if storm_data.empty:
+ continue
+ name = storm_data['NAME'].iloc[0] if pd.notnull(storm_data['NAME'].iloc[0]) else "Unnamed"
+ basin = storm_data['SID'].iloc[0][:2]
+ storm_oni = filtered_data[filtered_data['SID']==sid]['ONI'].iloc[0]
+ color = 'red' if storm_oni>=0.5 else ('blue' if storm_oni<=-0.5 else 'green')
+ fig.add_trace(go.Scattergeo(
+ lon=storm_data['LON'], lat=storm_data['LAT'], mode='lines',
+ name=f"{name} ({basin})",
+ line=dict(width=1.5, color=color), hoverinfo="name"
+ ))
+ if typhoon_search:
+ search_mask = typhoon_data['NAME'].str.contains(typhoon_search, case=False, na=False)
+ if search_mask.any():
+ for sid in typhoon_data[search_mask]['SID'].unique():
+ storm_data = typhoon_data[typhoon_data['SID']==sid]
+ fig.add_trace(go.Scattergeo(
+ lon=storm_data['LON'], lat=storm_data['LAT'], mode='lines+markers',
+ name=f"MATCHED: {storm_data['NAME'].iloc[0]}",
+ line=dict(width=3, color='yellow'),
+ marker=dict(size=5), hoverinfo="name"
+ ))
+ fig.update_layout(
+ title=f"Typhoon Tracks ({start_year}-{start_month} to {end_year}-{end_month})",
+ geo=dict(
+ projection_type='natural earth',
+ showland=True,
+ showcoastlines=True,
+ landcolor='rgb(243,243,243)',
+ countrycolor='rgb(204,204,204)',
+ coastlinecolor='rgb(204,204,204)',
+ center=dict(lon=140, lat=20),
+ projection_scale=3
+ ),
+ legend_title="Typhoons by ENSO Phase",
+ showlegend=True,
+ height=700
+ )
+ fig.add_annotation(
+ x=0.02, y=0.98, xref="paper", yref="paper",
+ text="Red: El Niño, Blue: La Nina, Green: Neutral",
+ showarrow=False, align="left",
+ bgcolor="rgba(255,255,255,0.8)"
+ )
+ return fig, f"Total typhoons displayed: {count}"
+
+def get_wind_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
+ """Get wind analysis with enhanced categorization"""
+ start_date = datetime(start_year, start_month, 1)
+ end_date = datetime(end_year, end_month, 28)
+ filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
+ filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases)
+ if enso_phase != 'all':
+ filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()]
+
+ fig = px.scatter(filtered_data, x='ONI', y='USA_WIND', color='Category',
+ hover_data=['NAME','Year','Category'],
+ title='Wind Speed vs ONI',
+ labels={'ONI':'ONI Value','USA_WIND':'Max Wind Speed (knots)'},
+ color_discrete_map=enhanced_color_map)
+
+ if typhoon_search:
+ mask = filtered_data['NAME'].str.contains(typhoon_search, case=False, na=False)
+ if mask.any():
+ fig.add_trace(go.Scatter(
+ x=filtered_data.loc[mask,'ONI'], y=filtered_data.loc[mask,'USA_WIND'],
+ mode='markers', marker=dict(size=10, color='red', symbol='star'),
+ name=f'Matched: {typhoon_search}',
+ text=filtered_data.loc[mask,'NAME']+' ('+filtered_data.loc[mask,'Year'].astype(str)+')'
+ ))
+
+ regression = perform_wind_regression(start_year, start_month, end_year, end_month)
+ return fig, regression
+
+def get_pressure_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
+ """Get pressure analysis with enhanced categorization"""
+ start_date = datetime(start_year, start_month, 1)
+ end_date = datetime(end_year, end_month, 28)
+ filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
+ filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases)
+ if enso_phase != 'all':
+ filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()]
+
+ fig = px.scatter(filtered_data, x='ONI', y='USA_PRES', color='Category',
+ hover_data=['NAME','Year','Category'],
+ title='Pressure vs ONI',
+ labels={'ONI':'ONI Value','USA_PRES':'Min Pressure (hPa)'},
+ color_discrete_map=enhanced_color_map)
+
+ if typhoon_search:
+ mask = filtered_data['NAME'].str.contains(typhoon_search, case=False, na=False)
+ if mask.any():
+ fig.add_trace(go.Scatter(
+ x=filtered_data.loc[mask,'ONI'], y=filtered_data.loc[mask,'USA_PRES'],
+ mode='markers', marker=dict(size=10, color='red', symbol='star'),
+ name=f'Matched: {typhoon_search}',
+ text=filtered_data.loc[mask,'NAME']+' ('+filtered_data.loc[mask,'Year'].astype(str)+')'
+ ))
+
+ regression = perform_pressure_regression(start_year, start_month, end_year, end_month)
+ return fig, regression
+
+def get_longitude_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
+ """Get longitude analysis"""
+ start_date = datetime(start_year, start_month, 1)
+ end_date = datetime(end_year, end_month, 28)
+ filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
+ filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases)
+ if enso_phase != 'all':
+ filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()]
+
+ fig = px.scatter(filtered_data, x='LON', y='ONI', hover_data=['NAME'],
+ title='Typhoon Generation Longitude vs ONI (All Years)')
+
+ if len(filtered_data) > 1:
+ X = np.array(filtered_data['LON']).reshape(-1,1)
+ y = filtered_data['ONI']
+ try:
+ model = sm.OLS(y, sm.add_constant(X)).fit()
+ y_pred = model.predict(sm.add_constant(X))
+ fig.add_trace(go.Scatter(x=filtered_data['LON'], y=y_pred, mode='lines', name='Regression Line'))
+ slope = model.params[1]
+ slopes_text = f"All Years Slope: {slope:.4f}"
+ except Exception as e:
+ slopes_text = f"Regression Error: {e}"
+ else:
+ slopes_text = "Insufficient data for regression"
+
+ regression = perform_longitude_regression(start_year, start_month, end_year, end_month)
+ return fig, slopes_text, regression
+
+# -----------------------------
+# ENHANCED: Animation Functions with Taiwan Standard Support - FIXED VERSION
+# -----------------------------
+
+def get_available_years(typhoon_data):
+ """Get all available years including 2025 - with error handling"""
+ try:
+ if typhoon_data is None or typhoon_data.empty:
+ return [str(year) for year in range(2000, 2026)]
+
+ if 'ISO_TIME' in typhoon_data.columns:
+ years = typhoon_data['ISO_TIME'].dt.year.dropna().unique()
+ elif 'SEASON' in typhoon_data.columns:
+ years = typhoon_data['SEASON'].dropna().unique()
+ else:
+ years = range(2000, 2026) # Default range including 2025
+
+ # Convert to strings and sort
+ year_strings = sorted([str(int(year)) for year in years if not pd.isna(year)])
+
+ # Ensure we have at least some years
+ if not year_strings:
+ return [str(year) for year in range(2000, 2026)]
+
+ return year_strings
+
+ except Exception as e:
+ print(f"Error in get_available_years: {e}")
+ return [str(year) for year in range(2000, 2026)]
+
+def update_typhoon_options_enhanced(year, basin):
+ """Enhanced typhoon options with TD support and 2025 data"""
+ try:
+ year = int(year)
+
+ # Filter by year - handle both ISO_TIME and SEASON columns
+ if 'ISO_TIME' in typhoon_data.columns:
+ year_mask = typhoon_data['ISO_TIME'].dt.year == year
+ elif 'SEASON' in typhoon_data.columns:
+ year_mask = typhoon_data['SEASON'] == year
+ else:
+ # Fallback - try to extract year from SID or other fields
+ year_mask = typhoon_data.index >= 0 # Include all data as fallback
+
+ year_data = typhoon_data[year_mask].copy()
+
+ # Filter by basin if specified
+ if basin != "All Basins":
+ basin_code = basin.split(' - ')[0] if ' - ' in basin else basin[:2]
+ if 'SID' in year_data.columns:
+ year_data = year_data[year_data['SID'].str.startswith(basin_code, na=False)]
+ elif 'BASIN' in year_data.columns:
+ year_data = year_data[year_data['BASIN'] == basin_code]
+
+ if year_data.empty:
+ return gr.update(choices=["No storms found"], value=None)
+
+ # Get unique storms - include ALL intensities (including TD)
+ storms = year_data.groupby('SID').agg({
+ 'NAME': 'first',
+ 'USA_WIND': 'max'
+ }).reset_index()
+
+ # Enhanced categorization including TD
+ storms['category'] = storms['USA_WIND'].apply(categorize_typhoon_enhanced)
+
+ # Create options with category information
+ options = []
+ for _, storm in storms.iterrows():
+ name = storm['NAME'] if pd.notna(storm['NAME']) and storm['NAME'] != '' else 'UNNAMED'
+ sid = storm['SID']
+ category = storm['category']
+ max_wind = storm['USA_WIND'] if pd.notna(storm['USA_WIND']) else 0
+
+ option = f"{name} ({sid}) - {category} ({max_wind:.0f}kt)"
+ options.append(option)
+
+ if not options:
+ return gr.update(choices=["No storms found"], value=None)
+
+ return gr.update(choices=sorted(options), value=options[0])
+
+ except Exception as e:
+ print(f"Error in update_typhoon_options_enhanced: {e}")
+ return gr.update(choices=["Error loading storms"], value=None)
+
+def generate_enhanced_track_video_fixed(year, typhoon_selection, standard):
+ """FIXED: Enhanced track video generation with working animation display"""
+ if not typhoon_selection or typhoon_selection == "No storms found":
+ return None
+
+ try:
+ # Extract SID from selection
+ sid = typhoon_selection.split('(')[1].split(')')[0]
+
+ # Get storm data
+ storm_df = typhoon_data[typhoon_data['SID'] == sid].copy()
+ if storm_df.empty:
+ print(f"No data found for storm {sid}")
+ return None
+
+ # Sort by time
+ if 'ISO_TIME' in storm_df.columns:
+ storm_df = storm_df.sort_values('ISO_TIME')
+
+ # Extract data for animation
+ lats = storm_df['LAT'].astype(float).values
+ lons = storm_df['LON'].astype(float).values
+
+ if 'USA_WIND' in storm_df.columns:
+ winds = pd.to_numeric(storm_df['USA_WIND'], errors='coerce').fillna(0).values
+ else:
+ winds = np.full(len(lats), 30)
+
+ # Enhanced metadata
+ storm_name = storm_df['NAME'].iloc[0] if pd.notna(storm_df['NAME'].iloc[0]) else "UNNAMED"
+ season = storm_df['SEASON'].iloc[0] if 'SEASON' in storm_df.columns else year
+
+ print(f"Generating FIXED video for {storm_name} ({sid}) with {len(lats)} track points using {standard} standard")
+
+ # FIXED: Create figure with proper cartopy setup
+ fig = plt.figure(figsize=(16, 10))
+ ax = plt.axes(projection=ccrs.PlateCarree())
+
+ # Enhanced map features
+ ax.stock_img()
+ ax.add_feature(cfeature.COASTLINE, linewidth=0.8)
+ ax.add_feature(cfeature.BORDERS, linewidth=0.5)
+ ax.add_feature(cfeature.OCEAN, color='lightblue', alpha=0.5)
+ ax.add_feature(cfeature.LAND, color='lightgray', alpha=0.5)
+
+ # Set extent based on track
+ padding = 5
+ ax.set_extent([
+ min(lons) - padding, max(lons) + padding,
+ min(lats) - padding, max(lats) + padding
+ ])
+
+ # Add gridlines
+ gl = ax.gridlines(draw_labels=True, alpha=0.3)
+ gl.top_labels = gl.right_labels = False
+
+ # Title
+ ax.set_title(f"{season} {storm_name} ({sid}) Track Animation - {standard.upper()} Standard",
+ fontsize=18, fontweight='bold')
+
+ # FIXED: Animation elements - proper initialization with cartopy transforms
+ # Initialize empty line for track with correct transform
+ track_line, = ax.plot([], [], 'b-', linewidth=3, alpha=0.7,
+ label='Track', transform=ccrs.PlateCarree())
+
+ # Initialize current position marker
+ current_point, = ax.plot([], [], 'o', markersize=15,
+ transform=ccrs.PlateCarree())
+
+ # Historical track points (to show path traversed)
+ history_points, = ax.plot([], [], 'o', markersize=6, alpha=0.4, color='blue',
+ transform=ccrs.PlateCarree())
+
+ # Info text box
+ info_box = ax.text(0.02, 0.98, '', transform=ax.transAxes,
+ fontsize=12, verticalalignment='top',
+ bbox=dict(boxstyle="round,pad=0.5", facecolor='white', alpha=0.9))
+
+ # FIXED: Color legend with proper categories for both standards
+ legend_elements = []
+ if standard == 'taiwan':
+ categories = ['Tropical Depression', 'Tropical Storm', 'Severe Tropical Storm',
+ 'Typhoon', 'Severe Typhoon', 'Super Typhoon']
+ for category in categories:
+ color = get_taiwan_color_fixed(category)
+ legend_elements.append(plt.Line2D([0], [0], marker='o', color='w',
+ markerfacecolor=color, markersize=10, label=category))
+ else:
+ categories = ['Tropical Depression', 'Tropical Storm', 'C1 Typhoon', 'C2 Typhoon',
+ 'C3 Strong Typhoon', 'C4 Very Strong Typhoon', 'C5 Super Typhoon']
+ for category in categories:
+ color = get_matplotlib_color(category)
+ legend_elements.append(plt.Line2D([0], [0], marker='o', color='w',
+ markerfacecolor=color, markersize=10, label=category))
+
+ ax.legend(handles=legend_elements, loc='upper right', fontsize=10)
+
+ # FIXED: Animation function with proper artist updates and cartopy compatibility
+ def animate_fixed(frame):
+ """Fixed animation function that properly updates tracks with cartopy"""
+ try:
+ if frame >= len(lats):
+ return track_line, current_point, history_points, info_box
+
+ # FIXED: Update track line up to current frame
+ current_lons = lons[:frame+1]
+ current_lats = lats[:frame+1]
+
+ # Update the track line data (this is the key fix!)
+ track_line.set_data(current_lons, current_lats)
+
+ # FIXED: Update historical points (smaller markers showing traversed path)
+ if frame > 0:
+ history_points.set_data(current_lons[:-1], current_lats[:-1])
+
+ # FIXED: Update current position with correct categorization
+ current_wind = winds[frame]
+
+ if standard == 'taiwan':
+ category, color = categorize_typhoon_by_standard_fixed(current_wind, 'taiwan')
+ else:
+ category, color = categorize_typhoon_by_standard_fixed(current_wind, 'atlantic')
+
+ # Debug for first few frames
+ if frame < 3:
+ print(f"FIXED Frame {frame}: Wind={current_wind:.1f}kt, Category={category}, Color={color}")
+
+ # Update current position marker
+ current_point.set_data([lons[frame]], [lats[frame]])
+ current_point.set_color(color)
+ current_point.set_markersize(12 + current_wind/8)
+
+ # FIXED: Enhanced info display with correct Taiwan wind speed conversion
+ if 'ISO_TIME' in storm_df.columns and frame < len(storm_df):
+ current_time = storm_df.iloc[frame]['ISO_TIME']
+ time_str = current_time.strftime('%Y-%m-%d %H:%M UTC') if pd.notna(current_time) else 'Unknown'
+ else:
+ time_str = f"Step {frame+1}"
+
+ # Corrected wind speed display for Taiwan standard
+ if standard == 'taiwan':
+ wind_ms = current_wind * 0.514444
+ wind_display = f"{current_wind:.0f} kt ({wind_ms:.1f} m/s)"
+ else:
+ wind_display = f"{current_wind:.0f} kt"
+
+ info_text = (
+ f"Storm: {storm_name}\n"
+ f"Time: {time_str}\n"
+ f"Position: {lats[frame]:.1f}°N, {lons[frame]:.1f}°E\n"
+ f"Max Wind: {wind_display}\n"
+ f"Category: {category}\n"
+ f"Standard: {standard.upper()}\n"
+ f"Frame: {frame+1}/{len(lats)}"
+ )
+ info_box.set_text(info_text)
+
+ # FIXED: Return all modified artists (crucial for proper display)
+ return track_line, current_point, history_points, info_box
+
+ except Exception as e:
+ print(f"Error in animate frame {frame}: {e}")
+ return track_line, current_point, history_points, info_box
+
+ # FIXED: Create animation with cartopy-compatible settings
+ # Key fixes: blit=False (crucial for cartopy), proper interval
+ anim = animation.FuncAnimation(
+ fig, animate_fixed, frames=len(lats),
+ interval=600, blit=False, repeat=True # blit=False is essential for cartopy!
+ )
+
+ # Save animation with optimized settings
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4',
+ dir=tempfile.gettempdir())
+
+ # FIXED: Writer settings optimized for track visibility
+ writer = animation.FFMpegWriter(
+ fps=2, bitrate=3000, codec='libx264', # Slower FPS for better track visibility
+ extra_args=['-pix_fmt', 'yuv420p']
+ )
+
+ print(f"Saving FIXED animation to {temp_file.name}")
+ anim.save(temp_file.name, writer=writer, dpi=120)
+ plt.close(fig)
+
+ print(f"FIXED video generated successfully: {temp_file.name}")
+ return temp_file.name
+
+ except Exception as e:
+ print(f"Error generating FIXED video: {e}")
+ import traceback
+ traceback.print_exc()
+ return None
+
+# FIXED: Update the simplified wrapper function
+def simplified_track_video_fixed(year, basin, typhoon, standard):
+ """Simplified track video function with FIXED animation and Taiwan classification"""
+ if not typhoon:
+ return None
+ return generate_enhanced_track_video_fixed(year, typhoon, standard)
+
+# -----------------------------
+# Enhanced Gradio Interface with Oceanic Data Integration
+# -----------------------------
+
+def generate_enhanced_environmental_forecast_text(results, base_forecast_text):
+ """Generate enhanced forecast text with environmental details"""
+ try:
+ current = results['current_prediction']
+ env_data = results['environmental_data']
+ route_forecast = results['route_forecast']
+
+ # Environmental analysis
+ env_analysis_text = f"""
+
+ENHANCED ENVIRONMENTAL ANALYSIS
+{'='*65}
+
+REAL-TIME OCEANIC CONDITIONS:
+• SST Data Source: {env_data.get('sst_source', 'Unknown')}
+• SLP Data Source: {env_data.get('slp_source', 'Unknown')}
+• Real-time Integration: {'✅ Active' if env_data.get('use_real_data', False) else '❌ Climatological Fallback'}
+
+ENVIRONMENTAL POTENTIAL ANALYSIS:
+• Genesis Potential: {current.get('environmental_potential', 'Unknown')} kt
+• Environmental Favorability: {current.get('environmental_favorability', 'Unknown')}
+• SST Contribution: {current.get('sst_contribution', 0):+.1f} kt
+• Current Environmental Limit: {current.get('environmental_potential', 50):.0f} kt
+
+TRACK-POINT ENVIRONMENTAL CONDITIONS:
+"""
+
+ # Add sample of environmental conditions along track
+ if route_forecast and len(route_forecast) > 0:
+ sample_points = [0, len(route_forecast)//4, len(route_forecast)//2,
+ 3*len(route_forecast)//4, len(route_forecast)-1]
+
+ for i in sample_points:
+ if i < len(route_forecast):
+ point = route_forecast[i]
+ env_analysis_text += f"""
+• Hour {point['hour']}:
+ - Position: {point['lat']:.1f}°N, {point['lon']:.1f}°E
+ - Intensity: {point['intensity_kt']:.0f} kt (Limit: {point.get('environmental_limit', 'N/A')} kt)
+ - SST: {point.get('sst_celsius', 'N/A'):.1f}°C | SLP: {point.get('slp_hpa', 'N/A'):.0f} hPa
+ - Development Stage: {point['development_stage']}
+ - Tendency: {point.get('intensity_tendency', 0):+.1f} kt/6hr"""
+
+ env_analysis_text += f"""
+
+OCEANIC DATA QUALITY ASSESSMENT:
+• Position Confidence: {results['confidence_scores'].get('position_72h', 0.5)*100:.0f}% (72hr)
+• Intensity Confidence: {results['confidence_scores'].get('intensity_72h', 0.5)*100:.0f}% (72hr)
+• Environmental Coupling: {results['confidence_scores'].get('environmental_coupling', 0.5)*100:.0f}%
+
+TECHNICAL IMPLEMENTATION:
+• Model: {results['model_info']}
+• Data Protocols: ERDDAP (SST) + OPeNDAP (SLP)
+• Spatial Interpolation: Linear with nearest-neighbor fallback
+• Physics: Emanuel potential intensity + environmental coupling
+ """
+
+ return base_forecast_text + env_analysis_text
+
+ except Exception as e:
+ logging.error(f"Error generating enhanced forecast text: {e}")
+ return base_forecast_text + f"\n\nError in environmental analysis: {str(e)}"
+
+# -----------------------------
+# Load & Process Data
+# -----------------------------
+
+# Global variables initialization
+oni_data = None
+typhoon_data = None
+merged_data = None
+
+def initialize_data():
+ """Initialize all data safely"""
+ global oni_data, typhoon_data, merged_data, oceanic_manager
+ try:
+ logging.info("Starting data loading process...")
+
+ # Initialize oceanic manager
+ oceanic_manager = OceanicDataManager()
+
+ update_oni_data()
+ oni_data, typhoon_data = load_data_fixed(ONI_DATA_PATH, TYPHOON_DATA_PATH)
+
+ if oni_data is not None and typhoon_data is not None:
+ oni_long = process_oni_data(oni_data)
+ typhoon_max = process_typhoon_data(typhoon_data)
+ merged_data = merge_data(oni_long, typhoon_max)
+ logging.info("Data loading complete.")
+ else:
+ logging.error("Failed to load required data")
+ # Create minimal fallback data
+ oni_data = pd.DataFrame({'Year': [2000], 'Jan': [0], 'Feb': [0], 'Mar': [0], 'Apr': [0],
+ 'May': [0], 'Jun': [0], 'Jul': [0], 'Aug': [0], 'Sep': [0],
+ 'Oct': [0], 'Nov': [0], 'Dec': [0]})
+ typhoon_data = create_fallback_typhoon_data()
+ oni_long = process_oni_data(oni_data)
+ typhoon_max = process_typhoon_data(typhoon_data)
+ merged_data = merge_data(oni_long, typhoon_max)
+ except Exception as e:
+ logging.error(f"Error during data initialization: {e}")
+ # Create minimal fallback data
+ oni_data = pd.DataFrame({'Year': [2000], 'Jan': [0], 'Feb': [0], 'Mar': [0], 'Apr': [0],
+ 'May': [0], 'Jun': [0], 'Jul': [0], 'Aug': [0], 'Sep': [0],
+ 'Oct': [0], 'Nov': [0], 'Dec': [0]})
+ typhoon_data = create_fallback_typhoon_data()
+ oni_long = process_oni_data(oni_data)
+ typhoon_max = process_typhoon_data(typhoon_data)
+ merged_data = merge_data(oni_long, typhoon_max)
+
+def create_interface():
+ """Create the enhanced Gradio interface with oceanic data integration"""
+ try:
+ # Ensure data is available
+ if oni_data is None or typhoon_data is None or merged_data is None:
+ logging.warning("Data not properly loaded, creating minimal interface")
+ return create_minimal_fallback_interface()
+
+ # Get safe data statistics
+ try:
+ total_storms = len(typhoon_data['SID'].unique()) if 'SID' in typhoon_data.columns else 0
+ total_records = len(typhoon_data)
+ available_years = get_available_years(typhoon_data)
+ year_range_display = f"{available_years[0]} - {available_years[-1]}" if available_years else "Unknown"
+ except Exception as e:
+ logging.error(f"Error getting data statistics: {e}")
+ total_storms = 0
+ total_records = 0
+ year_range_display = "Unknown"
+ available_years = [str(year) for year in range(2000, 2026)]
+
+ with gr.Blocks(title="Enhanced Typhoon Analysis Platform with Oceanic Data", theme=gr.themes.Soft()) as demo:
+ gr.Markdown("# 🌊 Enhanced Typhoon Analysis Platform with Real-time Oceanic Data")
+ gr.Markdown("**Advanced ML clustering, real-time SST/SLP integration, route predictions, and comprehensive tropical cyclone analysis**")
+
+ with gr.Tab("🏠 Overview"):
+ overview_text = f"""
+ ## 🌊 Welcome to the Enhanced Typhoon Analysis Dashboard with Oceanic Coupling
+
+ This dashboard provides comprehensive analysis of typhoon data with **real-time oceanic data integration** for unprecedented forecast accuracy.
+
+ ### 🚀 NEW Oceanic Data Features:
+ - **🌊 Real-time SST Data**: NOAA OISST v2 Sea Surface Temperature via ERDDAP
+ - **🌡️ Real-time SLP Data**: NCEP/NCAR Sea Level Pressure via OPeNDAP
+ - **🔄 Dynamic Environmental Coupling**: Live oceanic conditions drive intensity predictions
+ - **📊 Historical Environmental Analysis**: Past storm-environment relationships inform predictions
+ - **🎯 Environmental Potential Index**: Real-time calculation of maximum possible intensity
+ - **🌍 Global Data Coverage**: Automatic fallback to climatology when real-time data unavailable
+
+ ### 📊 Enhanced Capabilities:
+ - **Environmental Intensity Modeling**: SST-driven maximum potential intensity calculations
+ - **Dynamic Steering**: SLP-based atmospheric steering patterns
+ - **ENSO-Environment Coupling**: Combined ENSO and oceanic state influences
+ - **Uncertainty Quantification**: Data quality-based confidence scoring
+ - **Multi-source Integration**: Seamless blending of real-time and climatological data
+
+ ### 📊 Data Status:
+ - **ONI Data**: {len(oni_data)} years loaded
+ - **Typhoon Data**: {total_records:,} records loaded
+ - **Oceanic Data Sources**: NOAA OISST v2 + NCEP/NCAR Reanalysis
+ - **Available Years**: {year_range_display}
+
+ ### 🔧 Technical Infrastructure:
+ - **Real-time Data Access**: xarray + OPeNDAP + ERDDAP protocols
+ - **Environmental Interpolation**: Spatial interpolation to storm locations
+ - **Physics-based Modeling**: Emanuel potential intensity theory implementation
+ - **Fallback Systems**: Robust climatological backup when real-time data unavailable
+
+ ### 🔬 Scientific Accuracy:
+ - **SST-Intensity Relationship**: Based on latest tropical cyclone research
+ - **Shear Parameterization**: ENSO and seasonal wind shear modeling
+ - **Genesis Climatology**: Realistic development regions and frequencies
+ - **Track Forecasting**: Environmental steering with oceanic state dependencies
+ """
+ gr.Markdown(overview_text)
+
+ with gr.Tab("🌊 Real-time Oceanic Storm Prediction"):
+ gr.Markdown("## 🌊 Advanced Storm Development with Live Oceanic Data")
+
+ gr.Markdown("""
+ ### 🔥 Revolutionary Features:
+ - **🌊 Live SST Integration**: Current sea surface temperatures from NOAA satellites
+ - **🌡️ Real-time SLP Data**: Current atmospheric pressure from global reanalysis
+ - **🎯 Environmental Potential**: Real-time calculation of maximum storm intensity
+ - **📈 Historical Learning**: Past storm-environment relationships guide predictions
+ - **🌍 Global Coverage**: Automatic data fetching with intelligent fallbacks
+ """)
+
+ with gr.Row():
+ with gr.Column(scale=2):
+ gr.Markdown("### 🌊 Genesis & Environmental Configuration")
+
+ genesis_options = list(get_realistic_genesis_locations().keys())
+ genesis_region = gr.Dropdown(
+ choices=genesis_options,
+ value="Western Pacific Main Development Region",
+ label="🌊 Typhoon Genesis Region",
+ info="Climatologically realistic development regions"
+ )
+
+ # Enhanced environmental controls
+ with gr.Row():
+ use_real_oceanic = gr.Checkbox(
+ label="🌊 Use Real-time Oceanic Data",
+ value=True,
+ info="Fetch live SST/SLP data (may take 10-30 seconds)"
+ )
+ show_environmental_details = gr.Checkbox(
+ label="📊 Show Environmental Analysis",
+ value=True,
+ info="Display detailed environmental breakdown"
+ )
+
+ # Display selected region info with real-time data status
+ def update_genesis_info_enhanced(region):
+ locations = get_realistic_genesis_locations()
+ if region in locations:
+ info = locations[region]
+ base_info = f"📍 Location: {info['lat']:.1f}°N, {info['lon']:.1f}°E\n📝 {info['description']}"
+
+ # Add climatological information
+ clim_sst = get_climatological_sst(info['lat'], info['lon'], 9) # September
+ env_potential = calculate_environmental_intensity_potential(
+ info['lat'], info['lon'], 9, 0.0, None, None
+ )
+
+ enhanced_info = (
+ f"{base_info}\n"
+ f"🌡️ Climatological SST: {clim_sst:.1f}°C\n"
+ f"⚡ Environmental Potential: {env_potential['potential_intensity']:.0f} kt"
+ )
+ return enhanced_info
+ return "Select a genesis region"
+
+ genesis_info_display = gr.Textbox(
+ label="Selected Region Analysis",
+ lines=4,
+ interactive=False,
+ value=update_genesis_info_enhanced("Western Pacific Main Development Region")
+ )
+
+ genesis_region.change(
+ fn=update_genesis_info_enhanced,
+ inputs=[genesis_region],
+ outputs=[genesis_info_display]
+ )
+
+ with gr.Row():
+ pred_month = gr.Slider(
+ 1, 12, label="Month", value=9,
+ info="Peak season: Jul-Oct (affects SST/shear patterns)"
+ )
+ pred_oni = gr.Number(
+ label="ONI Value", value=0.0,
+ info="Current ENSO state (-3 to 3, affects oceanic patterns)"
+ )
+
+ with gr.Row():
+ forecast_hours = gr.Number(
+ label="Forecast Length (hours)",
+ value=72,
+ minimum=24,
+ maximum=240,
+ step=6,
+ info="Extended forecasting with environmental evolution"
+ )
+ advanced_physics = gr.Checkbox(
+ label="Advanced Environmental Physics",
+ value=True,
+ info="Full SST-intensity coupling and wind shear modeling"
+ )
+
+ with gr.Row():
+ show_uncertainty = gr.Checkbox(
+ label="Environmental Uncertainty Cone",
+ value=True,
+ info="Uncertainty based on data quality and environmental variability"
+ )
+ enable_animation = gr.Checkbox(
+ label="Animated Development",
+ value=True,
+ info="Watch storm-environment interaction evolve"
+ )
+
+ with gr.Column(scale=1):
+ gr.Markdown("### ⚙️ Oceanic Prediction Controls")
+ predict_oceanic_btn = gr.Button(
+ "🌊 Generate Enhanced Oceanic Forecast",
+ variant="primary",
+ size="lg"
+ )
+
+ gr.Markdown("### 📊 Environmental Conditions")
+ current_intensity = gr.Number(label="Genesis Intensity (kt)", interactive=False)
+ current_category = gr.Textbox(label="Initial Category", interactive=False)
+ environmental_potential = gr.Number(label="Environmental Potential (kt)", interactive=False)
+ environmental_favorability = gr.Textbox(label="Environmental Favorability", interactive=False)
+
+ gr.Markdown("### 🔧 Data Sources")
+ sst_data_source = gr.Textbox(label="SST Data Source", interactive=False)
+ slp_data_source = gr.Textbox(label="SLP Data Source", interactive=False)
+ model_confidence = gr.Textbox(label="Model Info", interactive=False)
+
+ with gr.Row():
+ route_plot = gr.Plot(label="🗺️ Advanced Oceanic-Coupled Forecast")
+
+ with gr.Row():
+ forecast_details = gr.Textbox(
+ label="📋 Comprehensive Environmental Forecast",
+ lines=25,
+ max_lines=30
+ )
+
+ def run_oceanic_prediction(
+ region, month, oni, hours, advanced_phys, uncertainty,
+ animation, use_real_data, show_env_details
+ ):
+ try:
+ # Run enhanced oceanic prediction
+ results = predict_storm_route_and_intensity_with_oceanic_data(
+ region, month, oni, hours,
+ use_real_data=use_real_data,
+ models=None,
+ enable_animation=animation
+ )
+
+ # Extract enhanced conditions
+ current = results['current_prediction']
+ env_data = results['environmental_data']
+
+ intensity = current['intensity_kt']
+ category = current['category']
+ env_potential = current.get('environmental_potential', 50)
+ env_favorability = current.get('environmental_favorability', 'Unknown')
+
+ # Data source information
+ sst_source = env_data.get('sst_source', 'Unknown')
+ slp_source = env_data.get('slp_source', 'Unknown')
+
+ # Create enhanced visualization
+ fig, forecast_text = create_animated_route_visualization(
+ results, uncertainty, animation
+ )
+
+ # Enhanced forecast text with environmental details
+ if show_env_details:
+ enhanced_forecast_text = generate_enhanced_environmental_forecast_text(
+ results, forecast_text
+ )
+ else:
+ enhanced_forecast_text = forecast_text
+
+ model_info = f"{results['model_info']}\nReal-time Data: {'Yes' if use_real_data else 'No'}"
+
+ return (
+ intensity,
+ category,
+ env_potential,
+ env_favorability,
+ sst_source,
+ slp_source,
+ model_info,
+ fig,
+ enhanced_forecast_text
+ )
+
+ except Exception as e:
+ error_msg = f"Enhanced oceanic prediction failed: {str(e)}"
+ logging.error(error_msg)
+ import traceback
+ traceback.print_exc()
+
+ return (
+ 30, "Tropical Depression", 50, "Unknown",
+ "Error", "Error", f"Prediction failed: {str(e)}",
+ None, f"Error generating enhanced forecast: {str(e)}"
+ )
+
+ predict_oceanic_btn.click(
+ fn=run_oceanic_prediction,
+ inputs=[
+ genesis_region, pred_month, pred_oni, forecast_hours,
+ advanced_physics, show_uncertainty, enable_animation,
+ use_real_oceanic, show_environmental_details
+ ],
+ outputs=[
+ current_intensity, current_category, environmental_potential,
+ environmental_favorability, sst_data_source, slp_data_source,
+ model_confidence, route_plot, forecast_details
+ ]
+ )
+
+ # Enhanced information section
+ oceanic_info_text = """
+ ### 🌊 Oceanic Data Integration Features:
+
+ #### 🔥 Real-time Data Sources:
+ - **SST**: NOAA OISST v2 - Daily 0.25° resolution satellite-based sea surface temperatures
+ - **SLP**: NCEP/NCAR Reanalysis - 6-hourly 2.5° resolution atmospheric pressure fields
+ - **Coverage**: Global oceans with 1-2 day latency for most recent conditions
+ - **Protocols**: ERDDAP and OPeNDAP for standardized data access
+
+ #### 🧠 Environmental Physics:
+ - **Emanuel Potential Intensity**: Theoretical maximum intensity based on thermodynamics
+ - **SST-Intensity Coupling**: Non-linear relationship between sea surface temperature and storm intensity
+ - **Atmospheric Steering**: Sea level pressure gradients drive storm motion patterns
+ - **Wind Shear Modeling**: Vertical wind shear estimation from pressure patterns and ENSO state
+
+ #### 🎯 Enhanced Accuracy:
+ - **Real-time Environmental Limits**: Current oceanic conditions set maximum achievable intensity
+ - **Dynamic Development**: Storm intensification rate depends on real SST and atmospheric conditions
+ - **Track Steering**: Motion influenced by current pressure patterns rather than climatology alone
+ - **Confidence Scoring**: Higher confidence when real-time data successfully integrated
+
+ #### 🔄 Fallback Systems:
+ - **Automatic Degradation**: Seamlessly switches to climatology if real-time data unavailable
+ - **Quality Assessment**: Evaluates data completeness and provides appropriate confidence levels
+ - **Hybrid Approach**: Combines real-time data with climatological patterns for optimal accuracy
+ - **Error Handling**: Robust system continues operation even with partial data failures
+
+ #### 📊 Output Enhancements:
+ - **Environmental Metadata**: Track-point SST, SLP, and environmental limits
+ - **Data Source Tracking**: Clear indication of real-time vs climatological data usage
+ - **Uncertainty Quantification**: Confidence intervals based on data availability and environmental complexity
+ - **Detailed Analysis**: Comprehensive breakdown of environmental factors affecting development
+ """
+ gr.Markdown(oceanic_info_text)
+
+ with gr.Tab("🔬 Advanced ML Clustering"):
+ gr.Markdown("## 🎯 Storm Pattern Analysis with Separate Visualizations")
+ gr.Markdown("**Four separate plots: Clustering, Routes, Pressure Evolution, and Wind Evolution**")
+
+ with gr.Row():
+ with gr.Column(scale=2):
+ reduction_method = gr.Dropdown(
+ choices=['UMAP', 't-SNE', 'PCA'],
+ value='UMAP' if UMAP_AVAILABLE else 't-SNE',
+ label="🔍 Dimensionality Reduction Method",
+ info="UMAP provides better global structure preservation"
+ )
+ with gr.Column(scale=1):
+ analyze_clusters_btn = gr.Button("🚀 Generate All Cluster Analyses", variant="primary", size="lg")
+
+ with gr.Row():
+ with gr.Column():
+ cluster_plot = gr.Plot(label="📊 Storm Clustering Analysis")
+ with gr.Column():
+ routes_plot = gr.Plot(label="🗺️ Clustered Storm Routes")
+
+ with gr.Row():
+ with gr.Column():
+ pressure_plot = gr.Plot(label="🌡️ Pressure Evolution by Cluster")
+ with gr.Column():
+ wind_plot = gr.Plot(label="💨 Wind Speed Evolution by Cluster")
+
+ with gr.Row():
+ cluster_stats = gr.Textbox(label="📈 Detailed Cluster Statistics", lines=15, max_lines=20)
+
+ def run_separate_clustering_analysis(method):
+ try:
+ # Extract features for clustering
storm_features = extract_storm_features(typhoon_data)
if storm_features is None:
return None, None, None, None, "Error: Could not extract storm features"
@@ -3041,498 +3823,819 @@ def create_interface():
inputs=[reduction_method],
outputs=[cluster_plot, routes_plot, pressure_plot, wind_plot, cluster_stats]
)
-
- cluster_info_text = """
- ### 📊 Enhanced Clustering Features:
- - **Separate Visualizations**: Four distinct plots for comprehensive analysis
- - **Multi-dimensional Analysis**: Uses 15+ storm characteristics including intensity, track shape, genesis location
- - **Route Visualization**: Geographic storm tracks colored by cluster membership
- - **Temporal Analysis**: Pressure and wind evolution patterns by cluster
- - **DBSCAN Clustering**: Automatic pattern discovery without predefined cluster count
- - **Interactive**: Hover over points to see storm details, zoom and pan all plots
-
- ### 🎯 How to Interpret:
- - **Clustering Plot**: Each dot is a storm positioned by similarity (close = similar characteristics)
- - **Routes Plot**: Actual geographic storm tracks, colored by which cluster they belong to
- - **Pressure Plot**: Shows how pressure changes over time for storms in each cluster
- - **Wind Plot**: Shows wind speed evolution patterns for each cluster
- - **Cluster Colors**: Each cluster gets a unique color across all four visualizations
- """
- gr.Markdown(cluster_info_text)
- with gr.Tab("🌊 Realistic Storm Genesis & Prediction"):
- gr.Markdown("## 🌊 Realistic Typhoon Development from Genesis")
-
- if CNN_AVAILABLE:
- gr.Markdown("🧠 **Deep Learning models available** - TensorFlow loaded successfully")
- method_description = "Hybrid CNN-Physics genesis modeling with realistic development cycles"
- else:
- gr.Markdown("🔬 **Physics-based models available** - Using climatological relationships")
- method_description = "Advanced physics-based genesis modeling with environmental coupling"
+ with gr.Tab("🗺️ Track Visualization"):
+ with gr.Row():
+ start_year = gr.Number(label="Start Year", value=2020)
+ start_month = gr.Dropdown(label="Start Month", choices=list(range(1, 13)), value=1)
+ end_year = gr.Number(label="End Year", value=2025)
+ end_month = gr.Dropdown(label="End Month", choices=list(range(1, 13)), value=6)
+ enso_phase = gr.Dropdown(label="ENSO Phase", choices=['all', 'El Nino', 'La Nina', 'Neutral'], value='all')
+ typhoon_search = gr.Textbox(label="Typhoon Search")
+ analyze_btn = gr.Button("Generate Tracks")
+ tracks_plot = gr.Plot()
+ typhoon_count = gr.Textbox(label="Number of Typhoons Displayed")
+ analyze_btn.click(
+ fn=get_full_tracks,
+ inputs=[start_year, start_month, end_year, end_month, enso_phase, typhoon_search],
+ outputs=[tracks_plot, typhoon_count]
+ )
+
+ with gr.Tab("💨 Wind Analysis"):
+ with gr.Row():
+ wind_start_year = gr.Number(label="Start Year", value=2020)
+ wind_start_month = gr.Dropdown(label="Start Month", choices=list(range(1, 13)), value=1)
+ wind_end_year = gr.Number(label="End Year", value=2024)
+ wind_end_month = gr.Dropdown(label="End Month", choices=list(range(1, 13)), value=6)
+ wind_enso_phase = gr.Dropdown(label="ENSO Phase", choices=['all', 'El Nino', 'La Nina', 'Neutral'], value='all')
+ wind_typhoon_search = gr.Textbox(label="Typhoon Search")
+ wind_analyze_btn = gr.Button("Generate Wind Analysis")
+ wind_scatter = gr.Plot()
+ wind_regression_results = gr.Textbox(label="Wind Regression Results")
+ wind_analyze_btn.click(
+ fn=get_wind_analysis,
+ inputs=[wind_start_year, wind_start_month, wind_end_year, wind_end_month, wind_enso_phase, wind_typhoon_search],
+ outputs=[wind_scatter, wind_regression_results]
+ )
+
+ with gr.Tab("🌡️ Pressure Analysis"):
+ with gr.Row():
+ pressure_start_year = gr.Number(label="Start Year", value=2020)
+ pressure_start_month = gr.Dropdown(label="Start Month", choices=list(range(1, 13)), value=1)
+ pressure_end_year = gr.Number(label="End Year", value=2024)
+ pressure_end_month = gr.Dropdown(label="End Month", choices=list(range(1, 13)), value=6)
+ pressure_enso_phase = gr.Dropdown(label="ENSO Phase", choices=['all', 'El Nino', 'La Nina', 'Neutral'], value='all')
+ pressure_typhoon_search = gr.Textbox(label="Typhoon Search")
+ pressure_analyze_btn = gr.Button("Generate Pressure Analysis")
+ pressure_scatter = gr.Plot()
+ pressure_regression_results = gr.Textbox(label="Pressure Regression Results")
+ pressure_analyze_btn.click(
+ fn=get_pressure_analysis,
+ inputs=[pressure_start_year, pressure_start_month, pressure_end_year, pressure_end_month, pressure_enso_phase, pressure_typhoon_search],
+ outputs=[pressure_scatter, pressure_regression_results]
+ )
+
+ with gr.Tab("🌏 Longitude Analysis"):
+ with gr.Row():
+ lon_start_year = gr.Number(label="Start Year", value=2020)
+ lon_start_month = gr.Dropdown(label="Start Month", choices=list(range(1, 13)), value=1)
+ lon_end_year = gr.Number(label="End Year", value=2020)
+ lon_end_month = gr.Dropdown(label="End Month", choices=list(range(1, 13)), value=6)
+ lon_enso_phase = gr.Dropdown(label="ENSO Phase", choices=['all', 'El Nino', 'La Nina', 'Neutral'], value='all')
+ lon_typhoon_search = gr.Textbox(label="Typhoon Search (Optional)")
+ lon_analyze_btn = gr.Button("Generate Longitude Analysis")
+ regression_plot = gr.Plot()
+ slopes_text = gr.Textbox(label="Regression Slopes")
+ lon_regression_results = gr.Textbox(label="Longitude Regression Results")
+ lon_analyze_btn.click(
+ fn=get_longitude_analysis,
+ inputs=[lon_start_year, lon_start_month, lon_end_year, lon_end_month, lon_enso_phase, lon_typhoon_search],
+ outputs=[regression_plot, slopes_text, lon_regression_results]
+ )
+
+ with gr.Tab("🎬 Enhanced Track Animation"):
+ gr.Markdown("## 🎥 High-Quality Storm Track Visualization (Atlantic & Taiwan Standards)")
- gr.Markdown(f"**Current Method**: {method_description}")
- gr.Markdown("**🌊 Realistic Genesis**: Select from climatologically accurate development regions")
- gr.Markdown("**📈 TD Starting Point**: Storms begin at realistic Tropical Depression intensities (25-35 kt)")
- gr.Markdown("**🎬 Animation Support**: Watch storm development unfold over time")
+ with gr.Row():
+ year_dropdown = gr.Dropdown(
+ label="Year",
+ choices=available_years,
+ value=available_years[-1] if available_years else "2024"
+ )
+ basin_dropdown = gr.Dropdown(
+ label="Basin",
+ choices=["All Basins", "WP - Western Pacific", "EP - Eastern Pacific", "NA - North Atlantic"],
+ value="All Basins"
+ )
with gr.Row():
- with gr.Column(scale=2):
- gr.Markdown("### 🌊 Genesis Configuration")
- genesis_options = list(get_realistic_genesis_locations().keys())
- genesis_region = gr.Dropdown(
- choices=genesis_options,
- value="Western Pacific Main Development Region",
- label="Typhoon Genesis Region",
- info="Select realistic development region based on climatology"
- )
-
- # Display selected region info
- def update_genesis_info(region):
- locations = get_realistic_genesis_locations()
- if region in locations:
- info = locations[region]
- return f"📍 Location: {info['lat']:.1f}°N, {info['lon']:.1f}°E\n📝 {info['description']}"
- return "Select a genesis region"
-
- genesis_info_display = gr.Textbox(
- label="Selected Region Info",
- lines=2,
- interactive=False,
- value=update_genesis_info("Western Pacific Main Development Region")
- )
+ typhoon_dropdown = gr.Dropdown(label="Storm Selection (All Categories Including TD)")
+ standard_dropdown = gr.Dropdown(
+ label="🎌 Classification Standard",
+ choices=['atlantic', 'taiwan'],
+ value='atlantic',
+ info="Atlantic: International standard | Taiwan: Local meteorological standard"
+ )
+
+ generate_video_btn = gr.Button("🎬 Generate Enhanced Animation", variant="primary")
+ video_output = gr.Video(label="Storm Track Animation")
+
+ # Update storm options when year or basin changes
+ for input_comp in [year_dropdown, basin_dropdown]:
+ input_comp.change(
+ fn=update_typhoon_options_enhanced,
+ inputs=[year_dropdown, basin_dropdown],
+ outputs=[typhoon_dropdown]
+ )
+
+ # Generate video with fixed function
+ generate_video_btn.click(
+ fn=generate_enhanced_track_video_fixed,
+ inputs=[year_dropdown, typhoon_dropdown, standard_dropdown],
+ outputs=[video_output]
+ )
+
+ with gr.Tab("📊 Data Statistics & Insights"):
+ gr.Markdown("## 📈 Comprehensive Dataset Analysis")
+
+ # Create enhanced data summary
+ try:
+ if len(typhoon_data) > 0:
+ # Storm category distribution
+ storm_cats = typhoon_data.groupby('SID')['USA_WIND'].max().apply(categorize_typhoon_enhanced)
+ cat_counts = storm_cats.value_counts()
- genesis_region.change(
- fn=update_genesis_info,
- inputs=[genesis_region],
- outputs=[genesis_info_display]
+ # Create distribution chart with enhanced colors
+ fig_dist = px.bar(
+ x=cat_counts.index,
+ y=cat_counts.values,
+ title="Storm Intensity Distribution (Including Tropical Depressions)",
+ labels={'x': 'Category', 'y': 'Number of Storms'},
+ color=cat_counts.index,
+ color_discrete_map=enhanced_color_map
)
- with gr.Row():
- pred_month = gr.Slider(1, 12, label="Month", value=9, info="Peak season: Jul-Oct")
- pred_oni = gr.Number(label="ONI Value", value=0.0, info="ENSO index (-3 to 3)")
- with gr.Row():
- forecast_hours = gr.Number(
- label="Forecast Length (hours)",
- value=72,
- minimum=20,
- maximum=100000,
- step=6,
- info="Extended forecasting: 20-1000hours (42 days max)"
+ # Seasonal distribution
+ if 'ISO_TIME' in typhoon_data.columns:
+ seasonal_data = typhoon_data.copy()
+ seasonal_data['Month'] = seasonal_data['ISO_TIME'].dt.month
+ monthly_counts = seasonal_data.groupby(['Month', 'SID']).size().groupby('Month').size()
+
+ fig_seasonal = px.bar(
+ x=monthly_counts.index,
+ y=monthly_counts.values,
+ title="Seasonal Storm Distribution",
+ labels={'x': 'Month', 'y': 'Number of Storms'},
+ color=monthly_counts.values,
+ color_continuous_scale='Viridis'
)
- advanced_physics = gr.Checkbox(
- label="Advanced Physics",
- value=True,
- info="Enhanced environmental modeling"
+ else:
+ fig_seasonal = None
+
+ # Basin distribution
+ if 'SID' in typhoon_data.columns:
+ basin_data = typhoon_data['SID'].str[:2].value_counts()
+ fig_basin = px.pie(
+ values=basin_data.values,
+ names=basin_data.index,
+ title="Distribution by Basin"
)
+ else:
+ fig_basin = None
+
with gr.Row():
- show_uncertainty = gr.Checkbox(label="Show Uncertainty Cone", value=True)
- enable_animation = gr.Checkbox(
- label="Enable Animation",
- value=True,
- info="Animated storm development vs static view"
- )
+ gr.Plot(value=fig_dist)
+
+ if fig_seasonal:
+ with gr.Row():
+ gr.Plot(value=fig_seasonal)
+
+ if fig_basin:
+ with gr.Row():
+ gr.Plot(value=fig_basin)
+
+ except Exception as e:
+ gr.Markdown(f"Visualization error: {str(e)}")
+
+ # Enhanced statistics
+ total_storms = len(typhoon_data['SID'].unique()) if 'SID' in typhoon_data.columns else 0
+ total_records = len(typhoon_data)
+
+ if 'SEASON' in typhoon_data.columns:
+ try:
+ min_year = int(typhoon_data['SEASON'].min())
+ max_year = int(typhoon_data['SEASON'].max())
+ year_range = f"{min_year}-{max_year}"
+ years_covered = typhoon_data['SEASON'].nunique()
+ except (ValueError, TypeError):
+ year_range = "Unknown"
+ years_covered = 0
+ else:
+ year_range = "Unknown"
+ years_covered = 0
+
+ if 'SID' in typhoon_data.columns:
+ try:
+ basins_available = ', '.join(sorted(typhoon_data['SID'].str[:2].unique()))
+ avg_storms_per_year = total_storms / max(years_covered, 1)
+ except Exception:
+ basins_available = "Unknown"
+ avg_storms_per_year = 0
+ else:
+ basins_available = "Unknown"
+ avg_storms_per_year = 0
- with gr.Column(scale=1):
- gr.Markdown("### ⚙️ Prediction Controls")
- predict_btn = gr.Button("🌊 Generate Realistic Storm Forecast", variant="primary", size="lg")
-
- gr.Markdown("### 📊 Genesis Conditions")
- current_intensity = gr.Number(label="Genesis Intensity (kt)", interactive=False)
- current_category = gr.Textbox(label="Initial Category", interactive=False)
- model_confidence = gr.Textbox(label="Model Info", interactive=False)
+ # TD specific statistics
+ try:
+ if 'USA_WIND' in typhoon_data.columns:
+ td_storms = len(typhoon_data[typhoon_data['USA_WIND'] < 34]['SID'].unique())
+ ts_storms = len(typhoon_data[(typhoon_data['USA_WIND'] >= 34) & (typhoon_data['USA_WIND'] < 64)]['SID'].unique())
+ typhoon_storms = len(typhoon_data[typhoon_data['USA_WIND'] >= 64]['SID'].unique())
+ td_percentage = (td_storms / max(total_storms, 1)) * 100
+ else:
+ td_storms = ts_storms = typhoon_storms = 0
+ td_percentage = 0
+ except Exception as e:
+ print(f"Error calculating TD statistics: {e}")
+ td_storms = ts_storms = typhoon_storms = 0
+ td_percentage = 0
- with gr.Row():
- route_plot = gr.Plot(label="🗺️ Advanced Route & Intensity Forecast")
+ # Create statistics text safely
+ stats_text = f"""
+ ### 📊 Enhanced Dataset Summary:
+ - **Total Unique Storms**: {total_storms:,}
+ - **Total Track Records**: {total_records:,}
+ - **Year Range**: {year_range} ({years_covered} years)
+ - **Basins Available**: {basins_available}
+ - **Average Storms/Year**: {avg_storms_per_year:.1f}
- with gr.Row():
- forecast_details = gr.Textbox(label="📋 Detailed Forecast Summary", lines=20, max_lines=25)
+ ### 🌪️ Storm Category Breakdown:
+ - **Tropical Depressions**: {td_storms:,} storms ({td_percentage:.1f}%)
+ - **Tropical Storms**: {ts_storms:,} storms
+ - **Typhoons (C1-C5)**: {typhoon_storms:,} storms
- def run_realistic_prediction(region, month, oni, hours, advanced_phys, uncertainty, animation):
- try:
- # Run realistic prediction with genesis region
- results = predict_storm_route_and_intensity_realistic(
- region, month, oni,
- forecast_hours=hours,
- use_advanced_physics=advanced_phys
- )
-
- # Extract genesis conditions
- current = results['current_prediction']
- intensity = current['intensity_kt']
- category = current['category']
- genesis_info = results.get('genesis_info', {})
-
- # Create enhanced visualization
- fig, forecast_text = create_animated_route_visualization(
- results, uncertainty, animation
- )
-
- model_info = f"{results['model_info']}\nGenesis: {genesis_info.get('description', 'Unknown')}"
-
- return (
- intensity,
- category,
- model_info,
- fig,
- forecast_text
- )
- except Exception as e:
- error_msg = f"Realistic prediction failed: {str(e)}"
- logging.error(error_msg)
- import traceback
- traceback.print_exc()
- return (
- 30, "Tropical Depression", f"Prediction failed: {str(e)}",
- None, f"Error generating realistic forecast: {str(e)}"
- )
+ ### 🚀 Platform Capabilities:
+ - **Complete TD Analysis** - First platform to include comprehensive TD tracking
+ - **Dual Classification Systems** - Both Atlantic and Taiwan standards supported
+ - **Advanced ML Clustering** - DBSCAN pattern recognition with separate visualizations
+ - **Real-time Oceanic Predictions** - Physics-based with SST/SLP integration
+ - **2025 Data Ready** - Full compatibility with current season data
+ - **Enhanced Animations** - Professional-quality storm track videos
+ - **Multi-basin Analysis** - Comprehensive Pacific and Atlantic coverage
- predict_btn.click(
- fn=run_realistic_prediction,
- inputs=[genesis_region, pred_month, pred_oni, forecast_hours, advanced_physics, show_uncertainty, enable_animation],
- outputs=[current_intensity, current_category, model_confidence, route_plot, forecast_details]
- )
+ ### 🔬 Research Applications:
+ - Climate change impact studies
+ - Seasonal forecasting research
+ - Storm pattern classification
+ - ENSO-typhoon relationship analysis
+ - Oceanic-atmospheric coupling research
+ - Cross-regional classification comparisons
+ """
+ gr.Markdown(stats_text)
- with gr.Tab("🗺️ Track Visualization"):
- with gr.Row():
- start_year = gr.Number(label="Start Year", value=2020)
- start_month = gr.Dropdown(label="Start Month", choices=list(range(1, 13)), value=1)
- end_year = gr.Number(label="End Year", value=2025)
- end_month = gr.Dropdown(label="End Month", choices=list(range(1, 13)), value=6)
- enso_phase = gr.Dropdown(label="ENSO Phase", choices=['all', 'El Nino', 'La Nina', 'Neutral'], value='all')
- typhoon_search = gr.Textbox(label="Typhoon Search")
- analyze_btn = gr.Button("Generate Tracks")
- tracks_plot = gr.Plot()
- typhoon_count = gr.Textbox(label="Number of Typhoons Displayed")
- analyze_btn.click(
- fn=get_full_tracks,
- inputs=[start_year, start_month, end_year, end_month, enso_phase, typhoon_search],
- outputs=[tracks_plot, typhoon_count]
- )
+ return demo
+
+ except Exception as e:
+ logging.error(f"Error creating Gradio interface: {e}")
+ import traceback
+ traceback.print_exc()
+ # Create a minimal fallback interface
+ return create_minimal_fallback_interface()
+
+def create_minimal_fallback_interface():
+ """Create a minimal fallback interface when main interface fails"""
+ with gr.Blocks() as demo:
+ gr.Markdown("# Enhanced Typhoon Analysis Platform")
+ gr.Markdown("**Notice**: Loading with minimal interface due to data issues.")
+
+ with gr.Tab("Status"):
+ gr.Markdown("""
+ ## Platform Status
+
+ The application is running but encountered issues loading the full interface.
+ This could be due to:
+ - Data loading problems
+ - Missing dependencies
+ - Configuration issues
+
+ ### Available Features:
+ - Basic interface is functional
+ - Error logs are being generated
+ - System is ready for debugging
+
+ ### Next Steps:
+ 1. Check the console logs for detailed error information
+ 2. Verify all required data files are accessible
+ 3. Ensure all dependencies are properly installed
+ 4. Try restarting the application
+ """)
+
+ with gr.Tab("Debug"):
+ gr.Markdown("## Debug Information")
+
+ def get_debug_info():
+ debug_text = f"""
+ Python Environment:
+ - Working Directory: {os.getcwd()}
+ - Data Path: {DATA_PATH}
+ - UMAP Available: {UMAP_AVAILABLE}
+ - CNN Available: {CNN_AVAILABLE}
+
+ Data Status:
+ - ONI Data: {'Loaded' if oni_data is not None else 'Failed'}
+ - Typhoon Data: {'Loaded' if typhoon_data is not None else 'Failed'}
+ - Merged Data: {'Loaded' if merged_data is not None else 'Failed'}
+
+ File Checks:
+ - ONI Path Exists: {os.path.exists(ONI_DATA_PATH)}
+ - Typhoon Path Exists: {os.path.exists(TYPHOON_DATA_PATH)}
+ """
+ return debug_text
+
+ debug_btn = gr.Button("Get Debug Info")
+ debug_output = gr.Textbox(label="Debug Information", lines=15)
+ debug_btn.click(fn=get_debug_info, outputs=debug_output)
+
+ return demo
+
+# Initialize data
+initialize_data()
+
+# Create and launch the interface
+demo = create_interface()
+
+if __name__ == "__main__":
+ demo.launch(share=True) # Enable sharing with public link': current_lat,
+ 'lon': current_lon,
+ 'intensity_kt': current_intensity,
+ 'category': categorize_typhoon_enhanced(current_intensity),
+ 'confidence': confidence,
+ 'development_stage': stage,
+ 'forward_speed_kmh': base_speed * 111, # Convert to km/h
+ 'pressure_hpa': max(900, 1013 - (current_intensity - 25) * 0.9),
+ 'environmental_limit': environmental_limit,
+ 'sst_celsius': current_sst,
+ 'slp_hpa': current_slp,
+ 'intensity_tendency': intensity_tendency
+ })
+
+ results['route_forecast'] = route_points
+
+ # Enhanced confidence scores with environmental factors
+ base_confidence = 0.90 if use_real_data else 0.75
+
+ results['confidence_scores'] = {
+ 'genesis': base_confidence,
+ 'early_development': base_confidence - 0.05,
+ 'position_24h': base_confidence - 0.08,
+ 'position_48h': base_confidence - 0.15,
+ 'position_72h': base_confidence - 0.25,
+ 'intensity_24h': (base_confidence - 0.10) if use_real_data else 0.65,
+ 'intensity_48h': (base_confidence - 0.20) if use_real_data else 0.55,
+ 'intensity_72h': (base_confidence - 0.30) if use_real_data else 0.45,
+ 'environmental_coupling': 0.85 if use_real_data else 0.60
+ }
+
+ # Enhanced model information
+ data_sources = []
+ if sst_data and sst_data['success']:
+ data_sources.append("NOAA OISST v2")
+ if slp_data and slp_data['success']:
+ data_sources.append("NCEP/NCAR Reanalysis")
+
+ if data_sources:
+ results['model_info'] = f"Enhanced Oceanic Model using {', '.join(data_sources)}"
+ else:
+ results['model_info'] = "Enhanced Climatological Model"
+
+ logging.info(f"Enhanced prediction complete: {len(route_points)} forecast points")
+ return results
+
+ except Exception as e:
+ logging.error(f"Error in enhanced oceanic prediction: {e}")
+ import traceback
+ traceback.print_exc()
+
+ # Fallback to basic prediction
+ return predict_storm_route_and_intensity_realistic(
+ genesis_region, month, oni_value, models, forecast_hours, True
+ )
+
+def calculate_environmental_steering_speed(lat, lon, month, oni_value, slp_data):
+ """Calculate storm forward speed based on environmental steering"""
+ base_speed = 0.15 # Default speed in degrees/hour
+
+ # Latitude effects
+ if lat < 20:
+ speed_factor = 0.8 # Slower in tropics
+ elif lat < 30:
+ speed_factor = 1.2 # Faster in subtropics
+ else:
+ speed_factor = 1.5 # Fast in mid-latitudes
+
+ # Pressure gradient effects (if SLP data available)
+ if slp_data and slp_data['success']:
+ try:
+ # Calculate approximate pressure gradient (simplified)
+ slp_value = oceanic_manager.interpolate_data_to_point(slp_data, lat, lon, 'slp')
+ if not np.isnan(slp_value):
+ slp_hpa = slp_value if slp_value > 500 else slp_value / 100
+ if slp_hpa < 1008: # Low pressure - faster motion
+ speed_factor *= 1.2
+ elif slp_hpa > 1015: # High pressure - slower motion
+ speed_factor *= 0.8
+ except:
+ pass
+
+ return base_speed * speed_factor
+
+def calculate_motion_tendency(lat, lon, month, oni_value, hour, slp_data):
+ """Calculate motion tendency with environmental steering"""
+ # Base climatological motion
+ ridge_position = 32 + 4 * np.sin(2 * np.pi * (month - 6) / 4)
+
+ if lat < ridge_position - 10:
+ base_lat_tendency = 0.05 # Poleward
+ base_lon_tendency = -0.12 # Westward
+ elif lat > ridge_position - 3:
+ base_lat_tendency = 0.15 # Strong poleward (recurvature)
+ base_lon_tendency = 0.08 # Eastward
+ else:
+ base_lat_tendency = 0.08 # Moderate poleward
+ base_lon_tendency = -0.06 # Moderate westward
+
+ # ENSO steering effects
+ if oni_value > 0.5: # El Niño
+ base_lon_tendency += 0.03 # More eastward
+ base_lat_tendency += 0.01 # Slightly more poleward
+ elif oni_value < -0.5: # La Niña
+ base_lon_tendency -= 0.04 # More westward
+
+ # Add realistic motion uncertainty
+ motion_uncertainty = 0.02 + (hour / 120) * 0.03
+ lat_noise = np.random.normal(0, motion_uncertainty)
+ lon_noise = np.random.normal(0, motion_uncertainty)
+
+ return base_lat_tendency + lat_noise, base_lon_tendency + lon_noise
+
+def calculate_environmental_intensity_change(
+ current_intensity, environmental_limit, hour, lat, lon, month, oni_value, sst_data
+):
+ """Calculate intensity change based on environmental conditions"""
+
+ # Base intensity tendency based on development stage
+ if hour <= 48: # Development phase
+ if current_intensity < environmental_limit * 0.6:
+ base_tendency = 3.5 # Rapid development possible
+ elif current_intensity < environmental_limit * 0.8:
+ base_tendency = 2.0 # Moderate development
+ else:
+ base_tendency = 0.5 # Near limit
+ elif hour <= 120: # Mature phase
+ if current_intensity < environmental_limit:
+ base_tendency = 1.0 # Slow intensification
+ else:
+ base_tendency = -0.5 # Slight weakening
+ else: # Extended phase
+ base_tendency = -2.0 # General weakening trend
+
+ # Environmental limit constraint
+ if current_intensity >= environmental_limit:
+ base_tendency = min(base_tendency, -1.0) # Force weakening if over limit
+
+ # SST effects on development rate
+ if sst_data and sst_data['success']:
+ try:
+ sst_value = oceanic_manager.interpolate_data_to_point(sst_data, lat, lon, 'sst')
+ if not np.isnan(sst_value):
+ sst_celsius = sst_value if sst_value < 50 else sst_value - 273.15
+ if sst_celsius >= 29.5: # Very warm - enhanced development
+ base_tendency += 1.5
+ elif sst_celsius >= 28.0: # Warm - normal development
+ base_tendency += 0.5
+ elif sst_celsius < 26.5: # Cool - inhibited development
+ base_tendency -= 2.0
+ except:
+ pass
+
+ # Land interaction
+ if lon < 110 or (120 < lon < 125 and lat > 20): # Near land masses
+ base_tendency -= 8.0
+
+ # High latitude weakening
+ if lat > 35:
+ base_tendency -= 10.0
+ elif lat > 30:
+ base_tendency -= 4.0
+
+ # Add realistic intensity uncertainty
+ intensity_noise = np.random.normal(0, 1.0)
+
+ return base_tendency + intensity_noise
+
+def calculate_dynamic_confidence(hour, lat, lon, use_real_data, sst_success, slp_success):
+ """Calculate dynamic confidence based on data availability and conditions"""
+ base_confidence = 0.92
+
+ # Time penalty
+ time_penalty = (hour / 120) * 0.35
+
+ # Data quality bonus
+ data_bonus = 0.0
+ if use_real_data:
+ if sst_success:
+ data_bonus += 0.08
+ if slp_success:
+ data_bonus += 0.05
+
+ # Environmental uncertainty
+ environment_penalty = 0.0
+ if lat > 30 or lon < 115: # Challenging forecast regions
+ environment_penalty = 0.12
+ elif lat > 25:
+ environment_penalty = 0.06
+
+ final_confidence = base_confidence + data_bonus - time_penalty - environment_penalty
+ return max(0.25, min(0.95, final_confidence))
+
+def get_environmental_development_stage(hour, intensity, environmental_limit):
+ """Determine development stage based on time and environmental context"""
+ intensity_fraction = intensity / max(environmental_limit, 50)
+
+ if hour <= 24:
+ return 'Genesis'
+ elif hour <= 72:
+ if intensity_fraction < 0.3:
+ return 'Early Development'
+ elif intensity_fraction < 0.6:
+ return 'Active Development'
+ else:
+ return 'Rapid Development'
+ elif hour <= 120:
+ if intensity_fraction > 0.8:
+ return 'Peak Intensity'
+ else:
+ return 'Mature Stage'
+ else:
+ return 'Extended Forecast'
+
+def predict_storm_route_and_intensity_realistic(genesis_region, month, oni_value, models=None, forecast_hours=72, use_advanced_physics=True):
+ """Realistic prediction with proper typhoon speeds and development"""
+ try:
+ genesis_locations = get_realistic_genesis_locations()
+
+ if genesis_region not in genesis_locations:
+ genesis_region = "Western Pacific Main Development Region" # Default
+
+ genesis_info = genesis_locations[genesis_region]
+ lat = genesis_info["lat"]
+ lon = genesis_info["lon"]
+
+ results = {
+ 'current_prediction': {},
+ 'route_forecast': [],
+ 'confidence_scores': {},
+ 'model_info': 'Realistic Genesis Model',
+ 'genesis_info': genesis_info
+ }
+
+ # REALISTIC starting intensity - Tropical Depression level
+ base_intensity = 30 # Start at TD level (25-35 kt)
+
+ # Environmental factors for genesis
+ if oni_value > 1.0: # Strong El Niño - suppressed development
+ intensity_modifier = -6
+ elif oni_value > 0.5: # Moderate El Niño
+ intensity_modifier = -3
+ elif oni_value < -1.0: # Strong La Niña - enhanced development
+ intensity_modifier = +8
+ elif oni_value < -0.5: # Moderate La Niña
+ intensity_modifier = +5
+ else: # Neutral
+ intensity_modifier = oni_value * 2
+
+ # Seasonal genesis effects
+ seasonal_factors = {
+ 1: -8, 2: -6, 3: -4, 4: -2, 5: 2, 6: 6,
+ 7: 10, 8: 12, 9: 15, 10: 10, 11: 4, 12: -5
+ }
+ seasonal_modifier = seasonal_factors.get(month, 0)
+
+ # Genesis region favorability
+ region_factors = {
+ "Western Pacific Main Development Region": 8,
+ "South China Sea": 4,
+ "Philippine Sea": 5,
+ "Marshall Islands": 7,
+ "Monsoon Trough": 6,
+ "ITCZ Region": 3,
+ "Subtropical Region": 2,
+ "Bay of Bengal": 4,
+ "Eastern Pacific": 6,
+ "Atlantic MDR": 5
+ }
+ region_modifier = region_factors.get(genesis_region, 0)
+
+ # Calculate realistic starting intensity (TD level)
+ predicted_intensity = base_intensity + intensity_modifier + seasonal_modifier + region_modifier
+ predicted_intensity = max(25, min(40, predicted_intensity)) # Keep in TD-weak TS range
+
+ # Add realistic uncertainty for genesis
+ intensity_uncertainty = np.random.normal(0, 2)
+ predicted_intensity += intensity_uncertainty
+ predicted_intensity = max(25, min(38, predicted_intensity)) # TD range
+
+ results['current_prediction'] = {
+ 'intensity_kt': predicted_intensity,
+ 'pressure_hpa': 1008 - (predicted_intensity - 25) * 0.6, # Realistic TD pressure
+ 'category': categorize_typhoon_enhanced(predicted_intensity),
+ 'genesis_region': genesis_region
+ }
+
+ # REALISTIC route prediction with proper typhoon speeds
+ current_lat = lat
+ current_lon = lon
+ current_intensity = predicted_intensity
+
+ route_points = []
+
+ # Track storm development over time with REALISTIC SPEEDS
+ for hour in range(0, forecast_hours + 6, 6):
- with gr.Tab("💨 Wind Analysis"):
- with gr.Row():
- wind_start_year = gr.Number(label="Start Year", value=2020)
- wind_start_month = gr.Dropdown(label="Start Month", choices=list(range(1, 13)), value=1)
- wind_end_year = gr.Number(label="End Year", value=2024)
- wind_end_month = gr.Dropdown(label="End Month", choices=list(range(1, 13)), value=6)
- wind_enso_phase = gr.Dropdown(label="ENSO Phase", choices=['all', 'El Nino', 'La Nina', 'Neutral'], value='all')
- wind_typhoon_search = gr.Textbox(label="Typhoon Search")
- wind_analyze_btn = gr.Button("Generate Wind Analysis")
- wind_scatter = gr.Plot()
- wind_regression_results = gr.Textbox(label="Wind Regression Results")
- wind_analyze_btn.click(
- fn=get_wind_analysis,
- inputs=[wind_start_year, wind_start_month, wind_end_year, wind_end_month, wind_enso_phase, wind_typhoon_search],
- outputs=[wind_scatter, wind_regression_results]
- )
+ # REALISTIC typhoon motion - much faster speeds
+ # Typical typhoon forward speed: 15-25 km/h (0.14-0.23°/hour)
- with gr.Tab("🌡️ Pressure Analysis"):
- with gr.Row():
- pressure_start_year = gr.Number(label="Start Year", value=2020)
- pressure_start_month = gr.Dropdown(label="Start Month", choices=list(range(1, 13)), value=1)
- pressure_end_year = gr.Number(label="End Year", value=2024)
- pressure_end_month = gr.Dropdown(label="End Month", choices=list(range(1, 13)), value=6)
- pressure_enso_phase = gr.Dropdown(label="ENSO Phase", choices=['all', 'El Nino', 'La Nina', 'Neutral'], value='all')
- pressure_typhoon_search = gr.Textbox(label="Typhoon Search")
- pressure_analyze_btn = gr.Button("Generate Pressure Analysis")
- pressure_scatter = gr.Plot()
- pressure_regression_results = gr.Textbox(label="Pressure Regression Results")
- pressure_analyze_btn.click(
- fn=get_pressure_analysis,
- inputs=[pressure_start_year, pressure_start_month, pressure_end_year, pressure_end_month, pressure_enso_phase, pressure_typhoon_search],
- outputs=[pressure_scatter, pressure_regression_results]
- )
+ # Base forward speed depends on latitude and storm intensity
+ if current_lat < 20: # Low latitude - slower
+ base_speed = 0.12 # ~13 km/h
+ elif current_lat < 30: # Mid latitude - moderate
+ base_speed = 0.18 # ~20 km/h
+ else: # High latitude - faster
+ base_speed = 0.25 # ~28 km/h
- with gr.Tab("🌏 Longitude Analysis"):
- with gr.Row():
- lon_start_year = gr.Number(label="Start Year", value=2020)
- lon_start_month = gr.Dropdown(label="Start Month", choices=list(range(1, 13)), value=1)
- lon_end_year = gr.Number(label="End Year", value=2020)
- lon_end_month = gr.Dropdown(label="End Month", choices=list(range(1, 13)), value=6)
- lon_enso_phase = gr.Dropdown(label="ENSO Phase", choices=['all', 'El Nino', 'La Nina', 'Neutral'], value='all')
- lon_typhoon_search = gr.Textbox(label="Typhoon Search (Optional)")
- lon_analyze_btn = gr.Button("Generate Longitude Analysis")
- regression_plot = gr.Plot()
- slopes_text = gr.Textbox(label="Regression Slopes")
- lon_regression_results = gr.Textbox(label="Longitude Regression Results")
- lon_analyze_btn.click(
- fn=get_longitude_analysis,
- inputs=[lon_start_year, lon_start_month, lon_end_year, lon_end_month, lon_enso_phase, lon_typhoon_search],
- outputs=[regression_plot, slopes_text, lon_regression_results]
- )
+ # Intensity affects speed (stronger storms can move faster)
+ intensity_speed_factor = 1.0 + (current_intensity - 50) / 200
+ base_speed *= max(0.8, min(1.4, intensity_speed_factor))
- with gr.Tab("🎬 Enhanced Track Animation"):
- gr.Markdown("## 🎥 High-Quality Storm Track Visualization (Atlantic & Taiwan Standards)")
-
- with gr.Row():
- year_dropdown = gr.Dropdown(
- label="Year",
- choices=available_years,
- value=available_years[-1] if available_years else "2024"
- )
- basin_dropdown = gr.Dropdown(
- label="Basin",
- choices=["All Basins", "WP - Western Pacific", "EP - Eastern Pacific", "NA - North Atlantic"],
- value="All Basins"
- )
-
- with gr.Row():
- typhoon_dropdown = gr.Dropdown(label="Storm Selection (All Categories Including TD)")
- standard_dropdown = gr.Dropdown(
- label="🎌 Classification Standard",
- choices=['atlantic', 'taiwan'],
- value='atlantic',
- info="Atlantic: International standard | Taiwan: Local meteorological standard"
- )
-
- generate_video_btn = gr.Button("🎬 Generate Enhanced Animation", variant="primary")
- video_output = gr.Video(label="Storm Track Animation")
-
- # Update storm options when year or basin changes
- for input_comp in [year_dropdown, basin_dropdown]:
- input_comp.change(
- fn=update_typhoon_options_enhanced,
- inputs=[year_dropdown, basin_dropdown],
- outputs=[typhoon_dropdown]
- )
-
- # FIXED: Generate video with fixed function
- generate_video_btn.click(
- fn=generate_enhanced_track_video_fixed,
- inputs=[year_dropdown, typhoon_dropdown, standard_dropdown],
- outputs=[video_output]
- )
-
- # FIXED animation info text with corrected Taiwan standards
- animation_info_text = """
- ### 🎬 Enhanced Animation Features:
- - **Dual Standards**: Full support for both Atlantic and Taiwan classification systems
- - **Full TD Support**: Now displays Tropical Depressions (< 34 kt) in gray
- - **2025 Compatibility**: Complete support for current year data
- - **Enhanced Maps**: Better cartographic projections with terrain features
- - **Smart Scaling**: Storm symbols scale dynamically with intensity
- - **Real-time Info**: Live position, time, and meteorological data display
- - **Professional Styling**: Publication-quality animations with proper legends
- - **Optimized Export**: Fast rendering with web-compatible video formats
- - **FIXED Animation**: Tracks now display properly with cartopy integration
-
- ### 🎌 Taiwan Standard Features (CORRECTED):
- - **CMA 2006 Standards**: Uses official China Meteorological Administration classification
- - **Six Categories**: TD → TS → STS → TY → STY → Super TY
- - **Correct Thresholds**:
- * Tropical Depression: < 17.2 m/s (< 33.4 kt)
- * Tropical Storm: 17.2-24.4 m/s (33.4-47.5 kt)
- * Severe Tropical Storm: 24.5-32.6 m/s (47.6-63.5 kt)
- * Typhoon: 32.7-41.4 m/s (63.6-80.6 kt)
- * Severe Typhoon: 41.5-50.9 m/s (80.7-99.1 kt)
- * Super Typhoon: ≥51.0 m/s (≥99.2 kt)
- - **m/s Display**: Shows both knots and meters per second
- - **CWB Compatible**: Matches Central Weather Bureau classifications
- - **Fixed Color Coding**: Gray → Blue → Cyan → Yellow → Orange → Red
- """
- gr.Markdown(animation_info_text)
-
- with gr.Tab("📊 Data Statistics & Insights"):
- gr.Markdown("## 📈 Comprehensive Dataset Analysis")
-
- # Create enhanced data summary
- try:
- if len(typhoon_data) > 0:
- # Storm category distribution
- storm_cats = typhoon_data.groupby('SID')['USA_WIND'].max().apply(categorize_typhoon_enhanced)
- cat_counts = storm_cats.value_counts()
-
- # Create distribution chart with enhanced colors
- fig_dist = px.bar(
- x=cat_counts.index,
- y=cat_counts.values,
- title="Storm Intensity Distribution (Including Tropical Depressions)",
- labels={'x': 'Category', 'y': 'Number of Storms'},
- color=cat_counts.index,
- color_discrete_map=enhanced_color_map
- )
-
- # Seasonal distribution
- if 'ISO_TIME' in typhoon_data.columns:
- seasonal_data = typhoon_data.copy()
- seasonal_data['Month'] = seasonal_data['ISO_TIME'].dt.month
- monthly_counts = seasonal_data.groupby(['Month', 'SID']).size().groupby('Month').size()
-
- fig_seasonal = px.bar(
- x=monthly_counts.index,
- y=monthly_counts.values,
- title="Seasonal Storm Distribution",
- labels={'x': 'Month', 'y': 'Number of Storms'},
- color=monthly_counts.values,
- color_continuous_scale='Viridis'
- )
- else:
- fig_seasonal = None
-
- # Basin distribution
- if 'SID' in typhoon_data.columns:
- basin_data = typhoon_data['SID'].str[:2].value_counts()
- fig_basin = px.pie(
- values=basin_data.values,
- names=basin_data.index,
- title="Distribution by Basin"
- )
- else:
- fig_basin = None
-
- with gr.Row():
- gr.Plot(value=fig_dist)
-
- if fig_seasonal:
- with gr.Row():
- gr.Plot(value=fig_seasonal)
-
- if fig_basin:
- with gr.Row():
- gr.Plot(value=fig_basin)
-
- except Exception as e:
- gr.Markdown(f"Visualization error: {str(e)}")
-
- # Enhanced statistics - FIXED formatting
- total_storms = len(typhoon_data['SID'].unique()) if 'SID' in typhoon_data.columns else 0
- total_records = len(typhoon_data)
-
- if 'SEASON' in typhoon_data.columns:
- try:
- min_year = int(typhoon_data['SEASON'].min())
- max_year = int(typhoon_data['SEASON'].max())
- year_range = f"{min_year}-{max_year}"
- years_covered = typhoon_data['SEASON'].nunique()
- except (ValueError, TypeError):
- year_range = "Unknown"
- years_covered = 0
+ # Beta drift (Coriolis effect) - realistic values
+ beta_drift_lat = 0.02 * np.sin(np.radians(current_lat))
+ beta_drift_lon = -0.05 * np.cos(np.radians(current_lat))
+
+ # Seasonal steering patterns with realistic speeds
+ if month in [6, 7, 8, 9]: # Peak season
+ ridge_strength = 1.2
+ ridge_position = 32 + 4 * np.sin(2 * np.pi * (month - 6) / 4)
+ else: # Off season
+ ridge_strength = 0.9
+ ridge_position = 28
+
+ # REALISTIC motion based on position relative to subtropical ridge
+ if current_lat < ridge_position - 10: # Well south of ridge - westward movement
+ lat_tendency = base_speed * 0.3 + beta_drift_lat # Slight poleward
+ lon_tendency = -base_speed * 0.9 + beta_drift_lon # Strong westward
+ elif current_lat > ridge_position - 3: # Near ridge - recurvature
+ lat_tendency = base_speed * 0.8 + beta_drift_lat # Strong poleward
+ lon_tendency = base_speed * 0.4 + beta_drift_lon # Eastward
+ else: # In between - normal WNW motion
+ lat_tendency = base_speed * 0.4 + beta_drift_lat # Moderate poleward
+ lon_tendency = -base_speed * 0.7 + beta_drift_lon # Moderate westward
+
+ # ENSO steering modulation (realistic effects)
+ if oni_value > 0.5: # El Niño - more eastward/poleward motion
+ lon_tendency += 0.05
+ lat_tendency += 0.02
+ elif oni_value < -0.5: # La Niña - more westward motion
+ lon_tendency -= 0.08
+ lat_tendency -= 0.01
+
+ # Add motion uncertainty that grows with time (realistic error growth)
+ motion_uncertainty = 0.02 + (hour / 120) * 0.04
+ lat_noise = np.random.normal(0, motion_uncertainty)
+ lon_noise = np.random.normal(0, motion_uncertainty)
+
+ # Update position with realistic speeds
+ current_lat += lat_tendency + lat_noise
+ current_lon += lon_tendency + lon_noise
+
+ # REALISTIC intensity evolution with proper development cycles
+
+ # Development phase (first 48-72 hours) - realistic intensification
+ if hour <= 48:
+ if current_intensity < 50: # Still weak - rapid development possible
+ if 10 <= current_lat <= 25 and 115 <= current_lon <= 165: # Favorable environment
+ intensity_tendency = 4.5 if current_intensity < 35 else 3.0
+ elif 120 <= current_lon <= 155 and 15 <= current_lat <= 20: # Best environment
+ intensity_tendency = 6.0 if current_intensity < 40 else 4.0
+ else:
+ intensity_tendency = 2.0
+ elif current_intensity < 80: # Moderate intensity
+ intensity_tendency = 2.5 if (120 <= current_lon <= 155 and 10 <= current_lat <= 25) else 1.0
+ else: # Already strong
+ intensity_tendency = 1.0
+
+ # Mature phase (48-120 hours) - peak intensity maintenance
+ elif hour <= 120:
+ if current_lat < 25 and current_lon > 120: # Still in favorable waters
+ if current_intensity < 120:
+ intensity_tendency = 1.5
+ else:
+ intensity_tendency = 0.0 # Maintain intensity
else:
- year_range = "Unknown"
- years_covered = 0
-
- if 'SID' in typhoon_data.columns:
- try:
- basins_available = ', '.join(sorted(typhoon_data['SID'].str[:2].unique()))
- avg_storms_per_year = total_storms / max(years_covered, 1)
- except Exception:
- basins_available = "Unknown"
- avg_storms_per_year = 0
+ intensity_tendency = -1.5
+
+ # Extended phase (120+ hours) - gradual weakening
+ else:
+ if current_lat < 30 and current_lon > 115:
+ intensity_tendency = -2.0 # Slow weakening
else:
- basins_available = "Unknown"
- avg_storms_per_year = 0
-
- # TD specific statistics
- try:
- if 'USA_WIND' in typhoon_data.columns:
- td_storms = len(typhoon_data[typhoon_data['USA_WIND'] < 34]['SID'].unique())
- ts_storms = len(typhoon_data[(typhoon_data['USA_WIND'] >= 34) & (typhoon_data['USA_WIND'] < 64)]['SID'].unique())
- typhoon_storms = len(typhoon_data[typhoon_data['USA_WIND'] >= 64]['SID'].unique())
- td_percentage = (td_storms / max(total_storms, 1)) * 100
- else:
- td_storms = ts_storms = typhoon_storms = 0
- td_percentage = 0
- except Exception as e:
- print(f"Error calculating TD statistics: {e}")
- td_storms = ts_storms = typhoon_storms = 0
- td_percentage = 0
-
- # Create statistics text safely
- stats_text = f"""
- ### 📊 Enhanced Dataset Summary:
- - **Total Unique Storms**: {total_storms:,}
- - **Total Track Records**: {total_records:,}
- - **Year Range**: {year_range} ({years_covered} years)
- - **Basins Available**: {basins_available}
- - **Average Storms/Year**: {avg_storms_per_year:.1f}
-
- ### 🌪️ Storm Category Breakdown:
- - **Tropical Depressions**: {td_storms:,} storms ({td_percentage:.1f}%)
- - **Tropical Storms**: {ts_storms:,} storms
- - **Typhoons (C1-C5)**: {typhoon_storms:,} storms
-
- ### 🚀 Platform Capabilities:
- - **Complete TD Analysis** - First platform to include comprehensive TD tracking
- - **Dual Classification Systems** - Both Atlantic and Taiwan standards supported
- - **Advanced ML Clustering** - DBSCAN pattern recognition with separate visualizations
- - **Real-time Predictions** - Physics-based and optional CNN intensity forecasting
- - **2025 Data Ready** - Full compatibility with current season data
- - **Enhanced Animations** - Professional-quality storm track videos
- - **Multi-basin Analysis** - Comprehensive Pacific and Atlantic coverage
-
- ### 🔬 Research Applications:
- - Climate change impact studies
- - Seasonal forecasting research
- - Storm pattern classification
- - ENSO-typhoon relationship analysis
- - Intensity prediction model development
- - Cross-regional classification comparisons
- """
- gr.Markdown(stats_text)
-
- return demo
- except Exception as e:
- logging.error(f"Error creating Gradio interface: {e}")
- import traceback
- traceback.print_exc()
- # Create a minimal fallback interface
- return create_minimal_fallback_interface()
-
-def create_minimal_fallback_interface():
- """Create a minimal fallback interface when main interface fails"""
- with gr.Blocks() as demo:
- gr.Markdown("# Enhanced Typhoon Analysis Platform")
- gr.Markdown("**Notice**: Loading with minimal interface due to data issues.")
-
- with gr.Tab("Status"):
- gr.Markdown("""
- ## Platform Status
+ intensity_tendency = -3.5 # Faster weakening
- The application is running but encountered issues loading the full interface.
- This could be due to:
- - Data loading problems
- - Missing dependencies
- - Configuration issues
+ # Environmental modulation (realistic effects)
+ if current_lat > 35: # High latitude - rapid weakening
+ intensity_tendency -= 12
+ elif current_lat > 30: # Moderate latitude
+ intensity_tendency -= 5
+ elif current_lon < 110: # Land interaction
+ intensity_tendency -= 15
+ elif 125 <= current_lon <= 155 and 10 <= current_lat <= 25: # Warm pool
+ intensity_tendency += 2
+ elif 160 <= current_lon <= 180 and 15 <= current_lat <= 30: # Still warm
+ intensity_tendency += 1
- ### Available Features:
- - Basic interface is functional
- - Error logs are being generated
- - System is ready for debugging
+ # SST effects (realistic temperature impact)
+ if current_lat < 8: # Very warm but weak Coriolis
+ intensity_tendency += 0.5
+ elif 8 <= current_lat <= 20: # Sweet spot for development
+ intensity_tendency += 2.0
+ elif 20 < current_lat <= 30: # Marginal
+ intensity_tendency -= 1.0
+ elif current_lat > 30: # Cool waters
+ intensity_tendency -= 4.0
- ### Next Steps:
- 1. Check the console logs for detailed error information
- 2. Verify all required data files are accessible
- 3. Ensure all dependencies are properly installed
- 4. Try restarting the application
- """)
-
- with gr.Tab("Debug"):
- gr.Markdown("## Debug Information")
+ # Shear effects (simplified but realistic)
+ if month in [12, 1, 2, 3]: # High shear season
+ intensity_tendency -= 2.0
+ elif month in [7, 8, 9]: # Low shear season
+ intensity_tendency += 1.0
- def get_debug_info():
- debug_text = f"""
- Python Environment:
- - Working Directory: {os.getcwd()}
- - Data Path: {DATA_PATH}
- - UMAP Available: {UMAP_AVAILABLE}
- - CNN Available: {CNN_AVAILABLE}
-
- Data Status:
- - ONI Data: {'Loaded' if oni_data is not None else 'Failed'}
- - Typhoon Data: {'Loaded' if typhoon_data is not None else 'Failed'}
- - Merged Data: {'Loaded' if merged_data is not None else 'Failed'}
-
- File Checks:
- - ONI Path Exists: {os.path.exists(ONI_DATA_PATH)}
- - Typhoon Path Exists: {os.path.exists(TYPHOON_DATA_PATH)}
- """
- return debug_text
+ # Update intensity with realistic bounds and variability
+ intensity_noise = np.random.normal(0, 1.5) # Small random fluctuations
+ current_intensity += intensity_tendency + intensity_noise
+ current_intensity = max(20, min(185, current_intensity)) # Realistic range
- debug_btn = gr.Button("Get Debug Info")
- debug_output = gr.Textbox(label="Debug Information", lines=15)
- debug_btn.click(fn=get_debug_info, outputs=debug_output)
-
- return demo
+ # Calculate confidence based on forecast time and environment
+ base_confidence = 0.92
+ time_penalty = (hour / 120) * 0.45
+ environment_penalty = 0.15 if current_lat > 30 or current_lon < 115 else 0
+ confidence = max(0.25, base_confidence - time_penalty - environment_penalty)
+
+ # Determine development stage
+ if hour <= 24:
+ stage = 'Genesis'
+ elif hour <= 72:
+ stage = 'Development'
+ elif hour <= 120:
+ stage = 'Mature'
+ elif hour <= 240:
+ stage = 'Extended'
+ else:
+ stage = 'Long-term'
+
+ route_points.append({
+ 'hour': hour,
+ 'lat': current_lat,
+ 'lon': current_lon,
+ 'intensity_kt': current_intensity,
+ 'category': categorize_typhoon_enhanced(current_intensity),
+ 'confidence': confidence,
+ 'development_stage': stage,
+ 'forward_speed_kmh': base_speed * 111, # Convert to km/h
+ 'pressure_hpa': max(900, 1013 - (current_intensity - 25) * 0.9)
+ })
+
+ results['route_forecast'] = route_points
+
+ # Realistic confidence scores
+ results['confidence_scores'] = {
+ 'genesis': 0.88,
+ 'early_development': 0.82,
+ 'position_24h': 0.85,
+ 'position_48h': 0.78,
+ 'position_72h': 0.68,
+ 'intensity_24h': 0.75,
+ 'intensity_48h': 0.65,
+ 'intensity_72h': 0.55,
+ 'long_term': max(0.3, 0.8 - (forecast_hours / 240) * 0.5)
+ }
+
+ # Model information
+ results['model_info'] = f"Enhanced Realistic Model - {genesis_region}"
+
+ return results
+
+ except Exception as e:
+ logging.error(f"Realistic prediction error: {str(e)}")
+ return {
+ 'error': f"Prediction error: {str(e)}",
+ 'current_prediction': {'intensity_kt': 30, 'category': 'Tropical Depression'},
+ 'route_forecast': [],
+ 'confidence_scores': {},
+ 'model_info': 'Error in prediction'
+ }
+
+# Update the existing predict_storm_route_and_intensity_realistic function to use oceanic data
+def predict_storm_route_and_intensity_realistic_enhanced(
+ genesis_region, month, oni_value, models=None,
+ forecast_hours=72, use_advanced_physics=True
+):
+ """Enhanced wrapper that uses oceanic data when available"""
+ return predict_storm_route_and_intensity_with_oceanic_data(
+ genesis_region, month, oni_value, forecast_hours,
+ use_real_data=True, models=models, enable_animation=True
+ )
+
+# Initialize data
+initialize_data()
# Create and launch the interface
demo = create_interface()