diff --git "a/app.py" "b/app.py"
--- "a/app.py"
+++ "b/app.py"
@@ -41,7 +41,7 @@ import tempfile
import shutil
import xarray as xr
-# Advanced ML imports
+# NEW: Advanced ML imports
try:
import umap.umap_ as umap
UMAP_AVAILABLE = True
@@ -52,10 +52,12 @@ except ImportError:
# Optional CNN imports with robust error handling
CNN_AVAILABLE = False
try:
- os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
+ # Set environment variables before importing TensorFlow
+ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Suppress TensorFlow warnings
import tensorflow as tf
from tensorflow.keras import layers, models
- tf.config.set_visible_devices([], 'GPU')
+ # Test if TensorFlow actually works
+ tf.config.set_visible_devices([], 'GPU') # Disable GPU to avoid conflicts
CNN_AVAILABLE = True
print("TensorFlow successfully loaded - CNN features enabled")
except Exception as e:
@@ -78,11 +80,13 @@ logging.basicConfig(
format='%(asctime)s - %(levelname)s - %(message)s'
)
-# FIXED: Data path setup
+# Remove argument parser to simplify startup
DATA_PATH = '/tmp/typhoon_data' if 'SPACE_ID' in os.environ else tempfile.gettempdir()
+# Ensure directory exists and is writable
try:
os.makedirs(DATA_PATH, exist_ok=True)
+ # Test write permissions
test_file = os.path.join(DATA_PATH, 'test_write.txt')
with open(test_file, 'w') as f:
f.write('test')
@@ -102,17 +106,20 @@ MERGED_DATA_CSV = os.path.join(DATA_PATH, 'merged_typhoon_era5_data.csv')
BASIN_FILES = {
'EP': 'ibtracs.EP.list.v04r01.csv',
'NA': 'ibtracs.NA.list.v04r01.csv',
- 'WP': 'ibtracs.WP.list.v04r01.csv',
- 'ALL': 'ibtracs.ALL.list.v04r01.csv' # Added ALL basin option
+ 'WP': 'ibtracs.WP.list.v04r01.csv'
}
IBTRACS_BASE_URL = 'https://www.ncei.noaa.gov/data/international-best-track-archive-for-climate-stewardship-ibtracs/v04r01/access/csv/'
+LOCAL_IBTRACS_PATH = os.path.join(DATA_PATH, 'ibtracs.WP.list.v04r01.csv')
+CACHE_FILE = os.path.join(DATA_PATH, 'ibtracs_cache.pkl')
+CACHE_EXPIRY_DAYS = 1
# -----------------------------
-# FIXED: Color Maps and Standards with TD Support
+# ENHANCED: Color Maps and Standards with TD Support - FIXED TAIWAN CLASSIFICATION
# -----------------------------
+# Enhanced color mapping with TD support (for Plotly)
enhanced_color_map = {
'Unknown': 'rgb(200, 200, 200)',
- 'Tropical Depression': 'rgb(128, 128, 128)',
+ 'Tropical Depression': 'rgb(128, 128, 128)', # Gray for TD
'Tropical Storm': 'rgb(0, 0, 255)',
'C1 Typhoon': 'rgb(0, 255, 255)',
'C2 Typhoon': 'rgb(0, 255, 0)',
@@ -121,26 +128,42 @@ enhanced_color_map = {
'C5 Super Typhoon': 'rgb(255, 0, 0)'
}
+# Matplotlib-compatible color mapping (hex colors)
matplotlib_color_map = {
'Unknown': '#C8C8C8',
- 'Tropical Depression': '#808080',
- 'Tropical Storm': '#0000FF',
- 'C1 Typhoon': '#00FFFF',
- 'C2 Typhoon': '#00FF00',
- 'C3 Strong Typhoon': '#FFFF00',
- 'C4 Very Strong Typhoon': '#FFA500',
- 'C5 Super Typhoon': '#FF0000'
+ 'Tropical Depression': '#808080', # Gray for TD
+ 'Tropical Storm': '#0000FF', # Blue
+ 'C1 Typhoon': '#00FFFF', # Cyan
+ 'C2 Typhoon': '#00FF00', # Green
+ 'C3 Strong Typhoon': '#FFFF00', # Yellow
+ 'C4 Very Strong Typhoon': '#FFA500', # Orange
+ 'C5 Super Typhoon': '#FF0000' # Red
}
+# FIXED: Taiwan color mapping with correct CMA 2006 standards
taiwan_color_map_fixed = {
- 'Tropical Depression': '#808080',
- 'Tropical Storm': '#0000FF',
- 'Severe Tropical Storm': '#00FFFF',
- 'Typhoon': '#FFFF00',
- 'Severe Typhoon': '#FFA500',
- 'Super Typhoon': '#FF0000'
+ 'Tropical Depression': '#808080', # Gray
+ 'Tropical Storm': '#0000FF', # Blue
+ 'Severe Tropical Storm': '#00FFFF', # Cyan
+ 'Typhoon': '#FFFF00', # Yellow
+ 'Severe Typhoon': '#FFA500', # Orange
+ 'Super Typhoon': '#FF0000' # Red
}
+def rgb_string_to_hex(rgb_string):
+ """Convert 'rgb(r,g,b)' string to hex color for matplotlib"""
+ try:
+ # Extract numbers from 'rgb(r,g,b)' format
+ import re
+ numbers = re.findall(r'\d+', rgb_string)
+ if len(numbers) == 3:
+ r, g, b = map(int, numbers)
+ return f'#{r:02x}{g:02x}{b:02x}'
+ else:
+ return '#808080' # Default gray
+ except:
+ return '#808080' # Default gray
+
def get_matplotlib_color(category):
"""Get matplotlib-compatible color for a storm category"""
return matplotlib_color_map.get(category, '#808080')
@@ -162,7 +185,17 @@ ROUTE_COLORS = [
'#FF00CC', '#00FFCC', '#CC00FF', '#CCFF00', '#00CCFF'
]
-# Classification standards
+# Original color map for backward compatibility
+color_map = {
+ 'C5 Super Typhoon': 'rgb(255, 0, 0)',
+ 'C4 Very Strong Typhoon': 'rgb(255, 165, 0)',
+ 'C3 Strong Typhoon': 'rgb(255, 255, 0)',
+ 'C2 Typhoon': 'rgb(0, 255, 0)',
+ 'C1 Typhoon': 'rgb(0, 255, 255)',
+ 'Tropical Storm': 'rgb(0, 0, 255)',
+ 'Tropical Depression': 'rgb(128, 128, 128)'
+}
+
atlantic_standard = {
'C5 Super Typhoon': {'wind_speed': 137, 'color': 'Red', 'hex': '#FF0000'},
'C4 Very Strong Typhoon': {'wind_speed': 113, 'color': 'Orange', 'hex': '#FFA500'},
@@ -173,6 +206,7 @@ atlantic_standard = {
'Tropical Depression': {'wind_speed': 0, 'color': 'Gray', 'hex': '#808080'}
}
+# FIXED: Taiwan standard with correct CMA 2006 thresholds
taiwan_standard_fixed = {
'Super Typhoon': {'wind_speed_ms': 51.0, 'wind_speed_kt': 99.2, 'color': 'Red', 'hex': '#FF0000'},
'Severe Typhoon': {'wind_speed_ms': 41.5, 'wind_speed_kt': 80.7, 'color': 'Orange', 'hex': '#FFA500'},
@@ -183,20 +217,26 @@ taiwan_standard_fixed = {
}
# -----------------------------
-# FIXED: Utility Functions
+# Utility Functions for HF Spaces
# -----------------------------
def safe_file_write(file_path, data_frame, backup_dir=None):
"""Safely write DataFrame to CSV with backup and error handling"""
try:
+ # Create directory if it doesn't exist
os.makedirs(os.path.dirname(file_path), exist_ok=True)
+
+ # Try to write to a temporary file first
temp_path = file_path + '.tmp'
data_frame.to_csv(temp_path, index=False)
+
+ # If successful, rename to final file
os.rename(temp_path, file_path)
logging.info(f"Successfully saved {len(data_frame)} records to {file_path}")
return True
- except Exception as e:
- logging.error(f"Error saving file {file_path}: {e}")
+
+ except PermissionError as e:
+ logging.warning(f"Permission denied writing to {file_path}: {e}")
if backup_dir:
try:
backup_path = os.path.join(backup_dir, os.path.basename(file_path))
@@ -206,9 +246,44 @@ def safe_file_write(file_path, data_frame, backup_dir=None):
except Exception as backup_e:
logging.error(f"Failed to save to backup location: {backup_e}")
return False
+
+ except Exception as e:
+ logging.error(f"Error saving file {file_path}: {e}")
+ # Clean up temp file if it exists
+ temp_path = file_path + '.tmp'
+ if os.path.exists(temp_path):
+ try:
+ os.remove(temp_path)
+ except:
+ pass
+ return False
+
+def get_fallback_data_dir():
+ """Get a fallback data directory that's guaranteed to be writable"""
+ fallback_dirs = [
+ tempfile.gettempdir(),
+ '/tmp',
+ os.path.expanduser('~'),
+ os.getcwd()
+ ]
+
+ for directory in fallback_dirs:
+ try:
+ test_dir = os.path.join(directory, 'typhoon_fallback')
+ os.makedirs(test_dir, exist_ok=True)
+ test_file = os.path.join(test_dir, 'test.txt')
+ with open(test_file, 'w') as f:
+ f.write('test')
+ os.remove(test_file)
+ return test_dir
+ except:
+ continue
+
+ # If all else fails, use current directory
+ return os.getcwd()
# -----------------------------
-# FIXED: ONI Data Functions
+# ONI and Typhoon Data Functions
# -----------------------------
def download_oni_file(url, filename):
@@ -224,8 +299,10 @@ def download_oni_file(url, filename):
except Exception as e:
logging.warning(f"Attempt {attempt + 1} failed to download ONI: {e}")
if attempt < max_retries - 1:
- time.sleep(2 ** attempt)
- return False
+ time.sleep(2 ** attempt) # Exponential backoff
+ else:
+ logging.error(f"Failed to download ONI after {max_retries} attempts")
+ return False
def convert_oni_ascii_to_csv(input_file, output_file):
"""Convert ONI ASCII format to CSV"""
@@ -246,11 +323,12 @@ def convert_oni_ascii_to_csv(input_file, output_file):
year = str(int(year)-1)
data[year][month-1] = anom
+ # Write to CSV with safe write
df = pd.DataFrame(data).T.reset_index()
df.columns = ['Year','Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
df = df.sort_values('Year').reset_index(drop=True)
- return safe_file_write(output_file, df)
+ return safe_file_write(output_file, df, get_fallback_data_dir())
except Exception as e:
logging.error(f"Error converting ONI file: {e}")
@@ -271,31 +349,33 @@ def update_oni_data():
else:
os.remove(temp_file)
else:
- logging.warning("ONI download failed - will create minimal ONI data")
- create_minimal_oni_data(output_file)
+ # Create fallback ONI data if download fails
+ logging.warning("Creating fallback ONI data")
+ create_fallback_oni_data(output_file)
except Exception as e:
logging.error(f"Error updating ONI data: {e}")
- create_minimal_oni_data(output_file)
+ create_fallback_oni_data(output_file)
-def create_minimal_oni_data(output_file):
- """Create minimal ONI data for years without dropping typhoon data"""
- years = range(1950, 2026) # Wide range to ensure coverage
+def create_fallback_oni_data(output_file):
+ """Create minimal ONI data for testing"""
+ years = range(2000, 2026) # Extended to include 2025
months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
+ # Create synthetic ONI data
data = []
for year in years:
row = [year]
for month in months:
- # Generate neutral ONI values (small variations around 0)
- value = np.random.normal(0, 0.3)
+ # Generate some realistic ONI values
+ value = np.random.normal(0, 1) * 0.5
row.append(f"{value:.2f}")
data.append(row)
df = pd.DataFrame(data, columns=['Year'] + months)
- safe_file_write(output_file, df)
+ safe_file_write(output_file, df, get_fallback_data_dir())
# -----------------------------
-# FIXED: IBTrACS Data Loading - No Fallback, All Data
+# FIXED: IBTrACS Data Loading
# -----------------------------
def download_ibtracs_file(basin, force_download=False):
@@ -304,6 +384,7 @@ def download_ibtracs_file(basin, force_download=False):
local_path = os.path.join(DATA_PATH, filename)
url = IBTRACS_BASE_URL + filename
+ # Check if file exists and is recent (less than 7 days old)
if os.path.exists(local_path) and not force_download:
file_age = time.time() - os.path.getmtime(local_path)
if file_age < 7 * 24 * 3600: # 7 days
@@ -312,9 +393,10 @@ def download_ibtracs_file(basin, force_download=False):
try:
logging.info(f"Downloading {basin} basin file from {url}")
- response = requests.get(url, timeout=120) # Increased timeout
+ response = requests.get(url, timeout=60)
response.raise_for_status()
+ # Ensure directory exists
os.makedirs(os.path.dirname(local_path), exist_ok=True)
with open(local_path, 'wb') as f:
@@ -325,8 +407,29 @@ def download_ibtracs_file(basin, force_download=False):
logging.error(f"Failed to download {basin} basin file: {e}")
return None
-def load_ibtracs_csv_directly(basin='ALL'):
- """Load IBTrACS data directly from CSV - FIXED to load ALL data"""
+def examine_ibtracs_structure(file_path):
+ """Examine the actual structure of an IBTrACS CSV file"""
+ try:
+ with open(file_path, 'r') as f:
+ lines = f.readlines()
+
+ # Show first 5 lines
+ logging.info("First 5 lines of IBTrACS file:")
+ for i, line in enumerate(lines[:5]):
+ logging.info(f"Line {i}: {line.strip()}")
+
+ # The first line contains the actual column headers
+ # No need to skip rows for IBTrACS v04r01
+ df = pd.read_csv(file_path, nrows=5)
+ logging.info(f"Columns from first row: {list(df.columns)}")
+
+ return list(df.columns)
+ except Exception as e:
+ logging.error(f"Error examining IBTrACS structure: {e}")
+ return None
+
+def load_ibtracs_csv_directly(basin='WP'):
+ """Load IBTrACS data directly from CSV - FIXED VERSION"""
filename = BASIN_FILES[basin]
local_path = os.path.join(DATA_PATH, filename)
@@ -334,328 +437,283 @@ def load_ibtracs_csv_directly(basin='ALL'):
if not os.path.exists(local_path):
downloaded_path = download_ibtracs_file(basin)
if not downloaded_path:
- logging.error(f"Could not download {basin} basin data")
return None
try:
+ # First, examine the structure
+ actual_columns = examine_ibtracs_structure(local_path)
+ if not actual_columns:
+ logging.error("Could not examine IBTrACS file structure")
+ return None
+
+ # Read IBTrACS CSV - DON'T skip any rows for v04r01
+ # The first row contains proper column headers
logging.info(f"Reading IBTrACS CSV file: {local_path}")
- # Read with low_memory=False to ensure proper data types
- df = pd.read_csv(local_path, low_memory=False)
+ df = pd.read_csv(local_path, low_memory=False) # Don't skip any rows
- logging.info(f"Original data shape: {df.shape}")
- logging.info(f"Available columns: {list(df.columns)}")
+ logging.info(f"Original columns: {list(df.columns)}")
+ logging.info(f"Data shape before cleaning: {df.shape}")
- # Essential columns check
- required_cols = ['SID', 'LAT', 'LON']
- missing_cols = [col for col in required_cols if col not in df.columns]
- if missing_cols:
- logging.error(f"Missing critical columns: {missing_cols}")
+ # Check which essential columns exist
+ required_cols = ['SID', 'ISO_TIME', 'LAT', 'LON']
+ available_required = [col for col in required_cols if col in df.columns]
+
+ if len(available_required) < 2:
+ logging.error(f"Missing critical columns. Available: {list(df.columns)}")
return None
- # FIXED: Data cleaning without dropping data unnecessarily
- # Clean numeric columns carefully
+ # Clean and standardize the data with format specification
+ if 'ISO_TIME' in df.columns:
+ df['ISO_TIME'] = pd.to_datetime(df['ISO_TIME'], format='%Y-%m-%d %H:%M:%S', errors='coerce')
+
+ # Clean numeric columns
numeric_columns = ['LAT', 'LON', 'WMO_WIND', 'WMO_PRES', 'USA_WIND', 'USA_PRES']
for col in numeric_columns:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
- # Time handling
- if 'ISO_TIME' in df.columns:
- df['ISO_TIME'] = pd.to_datetime(df['ISO_TIME'], errors='coerce')
-
- # FIXED: Only filter out clearly invalid coordinates
- valid_coords = (
- df['LAT'].notna() &
- df['LON'].notna() &
- (df['LAT'].between(-90, 90)) &
- (df['LON'].between(-180, 180))
- )
- df = df[valid_coords]
+ # Filter out invalid/missing critical data
+ valid_rows = df['LAT'].notna() & df['LON'].notna()
+ df = df[valid_rows]
+
+ # Ensure LAT/LON are in reasonable ranges
+ df = df[(df['LAT'] >= -90) & (df['LAT'] <= 90)]
+ df = df[(df['LON'] >= -180) & (df['LON'] <= 180)]
- # Add missing columns with defaults
+ # Add basin info if missing
if 'BASIN' not in df.columns:
- if 'SID' in df.columns:
- df['BASIN'] = df['SID'].str[:2]
- else:
- df['BASIN'] = basin
+ df['BASIN'] = basin
+ # Add default columns if missing
if 'NAME' not in df.columns:
df['NAME'] = 'UNNAMED'
if 'SEASON' not in df.columns and 'ISO_TIME' in df.columns:
df['SEASON'] = df['ISO_TIME'].dt.year
- elif 'SEASON' not in df.columns:
- # Extract year from SID if possible
- if 'SID' in df.columns:
- try:
- df['SEASON'] = df['SID'].str.extract(r'(\d{4})').astype(float)
- except:
- df['SEASON'] = 2000 # Default year
logging.info(f"Successfully loaded {len(df)} records from {basin} basin")
- logging.info(f"Final data shape: {df.shape}")
return df
except Exception as e:
logging.error(f"Error reading IBTrACS CSV file: {e}")
- import traceback
- traceback.print_exc()
return None
-def load_all_ibtracs_data():
- """Load ALL available IBTrACS data - FIXED to never use fallback"""
- all_data = []
+def load_ibtracs_data_fixed():
+ """Fixed version of IBTrACS data loading"""
+ ibtracs_data = {}
- # Try to load the ALL basin file first (contains all basins)
- try:
- logging.info("Attempting to load ALL basin data...")
- all_basin_data = load_ibtracs_csv_directly('ALL')
- if all_basin_data is not None and not all_basin_data.empty:
- logging.info(f"Successfully loaded ALL basin data: {len(all_basin_data)} records")
- return all_basin_data
- except Exception as e:
- logging.warning(f"Failed to load ALL basin data: {e}")
+ # Try to load each basin, but prioritize WP for this application
+ load_order = ['WP', 'EP', 'NA']
- # If ALL basin fails, load individual basins
- basins_to_load = ['WP', 'EP', 'NA']
- for basin in basins_to_load:
+ for basin in load_order:
try:
logging.info(f"Loading {basin} basin data...")
- basin_data = load_ibtracs_csv_directly(basin)
- if basin_data is not None and not basin_data.empty:
- basin_data['BASIN'] = basin
- all_data.append(basin_data)
- logging.info(f"Successfully loaded {basin} basin: {len(basin_data)} records")
+ df = load_ibtracs_csv_directly(basin)
+
+ if df is not None and not df.empty:
+ ibtracs_data[basin] = df
+ logging.info(f"Successfully loaded {basin} basin with {len(df)} records")
else:
logging.warning(f"No data loaded for basin {basin}")
+ ibtracs_data[basin] = None
+
except Exception as e:
logging.error(f"Failed to load basin {basin}: {e}")
+ ibtracs_data[basin] = None
- if all_data:
- combined_data = pd.concat(all_data, ignore_index=True)
- logging.info(f"Combined all basins: {len(combined_data)} total records")
- return combined_data
- else:
- logging.error("No IBTrACS data could be loaded from any basin")
- return None
+ return ibtracs_data
def load_data_fixed(oni_path, typhoon_path):
- """FIXED data loading - loads all available typhoon data regardless of ONI"""
+ """Fixed version of load_data function"""
+ # Load ONI data
+ oni_data = pd.DataFrame({'Year': [], 'Jan': [], 'Feb': [], 'Mar': [], 'Apr': [],
+ 'May': [], 'Jun': [], 'Jul': [], 'Aug': [], 'Sep': [],
+ 'Oct': [], 'Nov': [], 'Dec': []})
- # Load ONI data (optional - typhoon analysis can work without it)
- oni_data = None
- if os.path.exists(oni_path):
- try:
- oni_data = pd.read_csv(oni_path)
- logging.info(f"Successfully loaded ONI data with {len(oni_data)} years")
- except Exception as e:
- logging.error(f"Error loading ONI data: {e}")
+ if not os.path.exists(oni_path):
+ logging.warning(f"ONI data file not found: {oni_path}")
+ update_oni_data()
- if oni_data is None:
- logging.warning("ONI data not available - creating minimal ONI data")
+ try:
+ oni_data = pd.read_csv(oni_path)
+ logging.info(f"Successfully loaded ONI data with {len(oni_data)} years")
+ except Exception as e:
+ logging.error(f"Error loading ONI data: {e}")
update_oni_data()
try:
oni_data = pd.read_csv(oni_path)
except Exception as e:
logging.error(f"Still can't load ONI data: {e}")
- # Create minimal fallback
- create_minimal_oni_data(oni_path)
- oni_data = pd.read_csv(oni_path)
- # FIXED: Load typhoon data - ALWAYS from IBTrACS, never use fallback
+ # Load typhoon data - NEW APPROACH
typhoon_data = None
- # Try to load from existing processed file first
+ # First, try to load from existing processed file
if os.path.exists(typhoon_path):
try:
typhoon_data = pd.read_csv(typhoon_path, low_memory=False)
- required_cols = ['LAT', 'LON', 'SID']
+ # Ensure basic columns exist and are valid
+ required_cols = ['LAT', 'LON']
if all(col in typhoon_data.columns for col in required_cols):
if 'ISO_TIME' in typhoon_data.columns:
typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
logging.info(f"Loaded processed typhoon data with {len(typhoon_data)} records")
- # Validate the data quality
- valid_records = typhoon_data['LAT'].notna() & typhoon_data['LON'].notna()
- if valid_records.sum() / len(typhoon_data) > 0.8: # If >80% valid, use it
- typhoon_data = typhoon_data[valid_records]
- else:
- logging.warning("Processed data quality poor, reloading from IBTrACS")
- typhoon_data = None
else:
- logging.warning("Processed typhoon data missing required columns, reloading from IBTrACS")
+ logging.warning("Processed typhoon data missing required columns, will reload from IBTrACS")
typhoon_data = None
except Exception as e:
logging.error(f"Error loading processed typhoon data: {e}")
typhoon_data = None
- # FIXED: Load from IBTrACS if needed - NO FALLBACK ALLOWED
+ # If no valid processed data, load from IBTrACS
if typhoon_data is None or typhoon_data.empty:
logging.info("Loading typhoon data from IBTrACS...")
- typhoon_data = load_all_ibtracs_data()
+ ibtracs_data = load_ibtracs_data_fixed()
- if typhoon_data is None or typhoon_data.empty:
- raise Exception("CRITICAL ERROR: No typhoon data could be loaded from IBTrACS. Check internet connection and IBTrACS availability.")
-
- # Process and save the loaded data
- # Ensure SID exists and is properly formatted
- if 'SID' not in typhoon_data.columns:
- logging.error("CRITICAL: No SID column in typhoon data")
- raise Exception("Typhoon data missing SID column")
+ # Combine all available basin data, prioritizing WP
+ combined_dfs = []
+ for basin in ['WP', 'EP', 'NA']:
+ if basin in ibtracs_data and ibtracs_data[basin] is not None:
+ df = ibtracs_data[basin].copy()
+ df['BASIN'] = basin
+ combined_dfs.append(df)
- # Save the processed data for future use
- try:
- safe_file_write(typhoon_path, typhoon_data)
- logging.info(f"Saved processed typhoon data: {len(typhoon_data)} records")
- except Exception as e:
- logging.warning(f"Could not save processed data: {e}")
+ if combined_dfs:
+ typhoon_data = pd.concat(combined_dfs, ignore_index=True)
+ # Ensure SID has proper format
+ if 'SID' not in typhoon_data.columns and 'BASIN' in typhoon_data.columns:
+ # Create SID from basin and other identifiers if missing
+ if 'SEASON' in typhoon_data.columns:
+ typhoon_data['SID'] = (typhoon_data['BASIN'].astype(str) +
+ typhoon_data.index.astype(str).str.zfill(2) +
+ typhoon_data['SEASON'].astype(str))
+ else:
+ typhoon_data['SID'] = (typhoon_data['BASIN'].astype(str) +
+ typhoon_data.index.astype(str).str.zfill(2) +
+ '2000')
+
+ # Save the processed data for future use
+ safe_file_write(typhoon_path, typhoon_data, get_fallback_data_dir())
+ logging.info(f"Combined IBTrACS data: {len(typhoon_data)} total records")
+ else:
+ logging.error("Failed to load any IBTrACS basin data")
+ # Create minimal fallback data
+ typhoon_data = create_fallback_typhoon_data()
- # FIXED: Final validation and enhancement
- if typhoon_data is not None and not typhoon_data.empty:
- # Ensure required columns exist with proper defaults
+ # Final validation of typhoon data
+ if typhoon_data is not None:
+ # Ensure required columns exist with fallback values
required_columns = {
- 'SID': lambda: f"UNKNOWN_{typhoon_data.index}",
+ 'SID': 'UNKNOWN',
'ISO_TIME': pd.Timestamp('2000-01-01'),
- 'LAT': 20.0,
- 'LON': 140.0,
- 'USA_WIND': 30.0,
- 'USA_PRES': 1013.0,
+ 'LAT': 0.0,
+ 'LON': 0.0,
+ 'USA_WIND': np.nan,
+ 'USA_PRES': np.nan,
'NAME': 'UNNAMED',
- 'SEASON': 2000,
- 'BASIN': 'WP'
+ 'SEASON': 2000
}
for col, default_val in required_columns.items():
if col not in typhoon_data.columns:
- if callable(default_val):
- typhoon_data[col] = default_val()
- else:
- typhoon_data[col] = default_val
- logging.warning(f"Added missing column {col}")
-
- # Ensure proper data types
- numeric_cols = ['LAT', 'LON', 'USA_WIND', 'USA_PRES', 'SEASON']
- for col in numeric_cols:
- if col in typhoon_data.columns:
- typhoon_data[col] = pd.to_numeric(typhoon_data[col], errors='coerce')
+ typhoon_data[col] = default_val
+ logging.warning(f"Added missing column {col} with default value")
+ # Ensure data types
if 'ISO_TIME' in typhoon_data.columns:
typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
+ typhoon_data['LAT'] = pd.to_numeric(typhoon_data['LAT'], errors='coerce')
+ typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce')
+ typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce')
+ typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce')
- # Remove only clearly invalid records
- valid_mask = (
- typhoon_data['LAT'].notna() &
- typhoon_data['LON'].notna() &
- typhoon_data['LAT'].between(-90, 90) &
- typhoon_data['LON'].between(-180, 180)
- )
-
- original_count = len(typhoon_data)
- typhoon_data = typhoon_data[valid_mask]
- logging.info(f"Final typhoon data: {len(typhoon_data)} records (removed {original_count - len(typhoon_data)} invalid)")
+ # Remove rows with invalid coordinates
+ typhoon_data = typhoon_data.dropna(subset=['LAT', 'LON'])
- if len(typhoon_data) == 0:
- raise Exception("CRITICAL ERROR: All typhoon data was filtered out - check data quality")
-
- else:
- raise Exception("CRITICAL ERROR: No typhoon data available after all loading attempts")
+ logging.info(f"Final typhoon data: {len(typhoon_data)} records after validation")
return oni_data, typhoon_data
+def create_fallback_typhoon_data():
+ """Create minimal fallback typhoon data - FIXED VERSION"""
+ # Use proper pandas date_range instead of numpy
+ dates = pd.date_range(start='2000-01-01', end='2025-12-31', freq='D') # Extended to 2025
+ storm_dates = dates[np.random.choice(len(dates), size=100, replace=False)]
+
+ data = []
+ for i, date in enumerate(storm_dates):
+ # Create realistic WP storm tracks
+ base_lat = np.random.uniform(10, 30)
+ base_lon = np.random.uniform(130, 160)
+
+ # Generate 20-50 data points per storm
+ track_length = np.random.randint(20, 51)
+ sid = f"WP{i+1:02d}{date.year}"
+
+ for j in range(track_length):
+ lat = base_lat + j * 0.2 + np.random.normal(0, 0.1)
+ lon = base_lon + j * 0.3 + np.random.normal(0, 0.1)
+ wind = max(25, 70 + np.random.normal(0, 20))
+ pres = max(950, 1000 - wind + np.random.normal(0, 5))
+
+ data.append({
+ 'SID': sid,
+ 'ISO_TIME': date + pd.Timedelta(hours=j*6), # Use pd.Timedelta instead
+ 'NAME': f'FALLBACK_{i+1}',
+ 'SEASON': date.year,
+ 'LAT': lat,
+ 'LON': lon,
+ 'USA_WIND': wind,
+ 'USA_PRES': pres,
+ 'BASIN': 'WP'
+ })
+
+ df = pd.DataFrame(data)
+ logging.info(f"Created fallback typhoon data with {len(df)} records")
+ return df
+
def process_oni_data(oni_data):
"""Process ONI data into long format"""
- if oni_data is None or oni_data.empty:
- # Return minimal ONI data that won't break merging
- return pd.DataFrame({
- 'Year': [2000], 'Month': ['01'], 'ONI': [0.0],
- 'Date': [pd.Timestamp('2000-01-01')]
- })
-
oni_long = oni_data.melt(id_vars=['Year'], var_name='Month', value_name='ONI')
month_map = {'Jan':'01','Feb':'02','Mar':'03','Apr':'04','May':'05','Jun':'06',
'Jul':'07','Aug':'08','Sep':'09','Oct':'10','Nov':'11','Dec':'12'}
oni_long['Month'] = oni_long['Month'].map(month_map)
oni_long['Date'] = pd.to_datetime(oni_long['Year'].astype(str)+'-'+oni_long['Month']+'-01')
- oni_long['ONI'] = pd.to_numeric(oni_long['ONI'], errors='coerce').fillna(0)
+ oni_long['ONI'] = pd.to_numeric(oni_long['ONI'], errors='coerce')
return oni_long
def process_typhoon_data(typhoon_data):
- """Process typhoon data - FIXED to preserve all data"""
- if typhoon_data is None or typhoon_data.empty:
- raise Exception("No typhoon data to process")
-
- # Ensure proper data types
+ """Process typhoon data"""
if 'ISO_TIME' in typhoon_data.columns:
typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
+ typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce')
+ typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce')
+ typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce')
- numeric_cols = ['USA_WIND', 'USA_PRES', 'LON', 'LAT']
- for col in numeric_cols:
- if col in typhoon_data.columns:
- typhoon_data[col] = pd.to_numeric(typhoon_data[col], errors='coerce')
+ logging.info(f"Unique basins in typhoon_data: {typhoon_data['SID'].str[:2].unique()}")
- logging.info(f"Processing {len(typhoon_data)} typhoon records")
+ typhoon_max = typhoon_data.groupby('SID').agg({
+ 'USA_WIND':'max','USA_PRES':'min','ISO_TIME':'first','SEASON':'first','NAME':'first',
+ 'LAT':'first','LON':'first'
+ }).reset_index()
- # Get maximum values per storm
- agg_dict = {}
- if 'USA_WIND' in typhoon_data.columns:
- agg_dict['USA_WIND'] = 'max'
- if 'USA_PRES' in typhoon_data.columns:
- agg_dict['USA_PRES'] = 'min'
- if 'ISO_TIME' in typhoon_data.columns:
- agg_dict['ISO_TIME'] = 'first'
- if 'SEASON' in typhoon_data.columns:
- agg_dict['SEASON'] = 'first'
- if 'NAME' in typhoon_data.columns:
- agg_dict['NAME'] = 'first'
- if 'LAT' in typhoon_data.columns:
- agg_dict['LAT'] = 'first'
- if 'LON' in typhoon_data.columns:
- agg_dict['LON'] = 'first'
-
- typhoon_max = typhoon_data.groupby('SID').agg(agg_dict).reset_index()
-
- # Add time-based columns for merging
if 'ISO_TIME' in typhoon_max.columns:
typhoon_max['Month'] = typhoon_max['ISO_TIME'].dt.strftime('%m')
typhoon_max['Year'] = typhoon_max['ISO_TIME'].dt.year
else:
- # Use SEASON if available, otherwise default
- if 'SEASON' in typhoon_max.columns:
- typhoon_max['Year'] = typhoon_max['SEASON']
- else:
- typhoon_max['Year'] = 2000
- typhoon_max['Month'] = '01' # Default month
+ # Fallback if no ISO_TIME
+ typhoon_max['Month'] = '01'
+ typhoon_max['Year'] = typhoon_max['SEASON']
- # Add category
- if 'USA_WIND' in typhoon_max.columns:
- typhoon_max['Category'] = typhoon_max['USA_WIND'].apply(categorize_typhoon_enhanced)
- else:
- typhoon_max['Category'] = 'Unknown'
-
- logging.info(f"Processed {len(typhoon_max)} unique storms")
+ typhoon_max['Category'] = typhoon_max['USA_WIND'].apply(categorize_typhoon_enhanced)
return typhoon_max
def merge_data(oni_long, typhoon_max):
- """Merge ONI and typhoon data - FIXED to preserve typhoon data even without ONI"""
- if typhoon_max is None or typhoon_max.empty:
- raise Exception("No typhoon data to merge")
-
- if oni_long is None or oni_long.empty:
- # If no ONI data, add default ONI values
- logging.warning("No ONI data available - using neutral values")
- typhoon_max['ONI'] = 0.0
- return typhoon_max
-
- # Merge with ONI data
- merged = pd.merge(typhoon_max, oni_long, on=['Year', 'Month'], how='left')
-
- # Fill missing ONI values with neutral
- merged['ONI'] = merged['ONI'].fillna(0.0)
-
- logging.info(f"Merged data: {len(merged)} storms with ONI values")
- return merged
+ """Merge ONI and typhoon data"""
+ return pd.merge(typhoon_max, oni_long, on=['Year','Month'])
# -----------------------------
-# Enhanced Categorization Functions
+# ENHANCED: Categorization Functions - FIXED TAIWAN CLASSIFICATION
# -----------------------------
def categorize_typhoon_enhanced(wind_speed):
@@ -663,49 +721,75 @@ def categorize_typhoon_enhanced(wind_speed):
if pd.isna(wind_speed):
return 'Unknown'
+ # Convert to knots if in m/s (some datasets use m/s)
if wind_speed < 10: # Likely in m/s, convert to knots
wind_speed = wind_speed * 1.94384
- if wind_speed < 34:
+ # FIXED thresholds to include TD
+ if wind_speed < 34: # Below 34 knots = Tropical Depression
return 'Tropical Depression'
- elif wind_speed < 64:
+ elif wind_speed < 64: # 34-63 knots = Tropical Storm
return 'Tropical Storm'
- elif wind_speed < 83:
+ elif wind_speed < 83: # 64-82 knots = Category 1 Typhoon
return 'C1 Typhoon'
- elif wind_speed < 96:
+ elif wind_speed < 96: # 83-95 knots = Category 2 Typhoon
return 'C2 Typhoon'
- elif wind_speed < 113:
+ elif wind_speed < 113: # 96-112 knots = Category 3 Strong Typhoon
return 'C3 Strong Typhoon'
- elif wind_speed < 137:
+ elif wind_speed < 137: # 113-136 knots = Category 4 Very Strong Typhoon
return 'C4 Very Strong Typhoon'
- else:
+ else: # 137+ knots = Category 5 Super Typhoon
return 'C5 Super Typhoon'
def categorize_typhoon_taiwan_fixed(wind_speed):
- """FIXED Taiwan categorization system based on CMA 2006 standards"""
+ """
+ FIXED Taiwan categorization system based on CMA 2006 standards
+ Reference: CMA Tropical Cyclone Data Center official classification
+ """
if pd.isna(wind_speed):
return 'Tropical Depression'
+ # Convert from knots to m/s if input appears to be in knots
if wind_speed > 50: # Likely in knots, convert to m/s
wind_speed_ms = wind_speed * 0.514444
else:
wind_speed_ms = wind_speed
+ # CMA 2006 Classification Standards (used by Taiwan CWA)
if wind_speed_ms >= 51.0:
- return 'Super Typhoon'
+ return 'Super Typhoon' # ≥51.0 m/s (≥99.2 kt)
elif wind_speed_ms >= 41.5:
- return 'Severe Typhoon'
+ return 'Severe Typhoon' # 41.5–50.9 m/s (80.7–99.1 kt)
elif wind_speed_ms >= 32.7:
- return 'Typhoon'
+ return 'Typhoon' # 32.7–41.4 m/s (63.6–80.6 kt)
elif wind_speed_ms >= 24.5:
- return 'Severe Tropical Storm'
+ return 'Severe Tropical Storm' # 24.5–32.6 m/s (47.6–63.5 kt)
elif wind_speed_ms >= 17.2:
- return 'Tropical Storm'
+ return 'Tropical Storm' # 17.2–24.4 m/s (33.4–47.5 kt)
else:
- return 'Tropical Depression'
+ return 'Tropical Depression' # < 17.2 m/s (< 33.4 kt)
+
+# Original function for backward compatibility
+def categorize_typhoon(wind_speed):
+ """Original categorize typhoon function for backward compatibility"""
+ return categorize_typhoon_enhanced(wind_speed)
+
+def classify_enso_phases(oni_value):
+ """Classify ENSO phases based on ONI value"""
+ if isinstance(oni_value, pd.Series):
+ oni_value = oni_value.iloc[0]
+ if pd.isna(oni_value):
+ return 'Neutral'
+ if oni_value >= 0.5:
+ return 'El Nino'
+ elif oni_value <= -0.5:
+ return 'La Nina'
+ else:
+ return 'Neutral'
+# FIXED: Combined categorization function
def categorize_typhoon_by_standard_fixed(wind_speed, standard='atlantic'):
- """FIXED categorization function supporting both standards"""
+ """FIXED categorization function supporting both standards with correct Taiwan thresholds"""
if pd.isna(wind_speed):
return 'Tropical Depression', '#808080'
@@ -713,7 +797,9 @@ def categorize_typhoon_by_standard_fixed(wind_speed, standard='atlantic'):
category = categorize_typhoon_taiwan_fixed(wind_speed)
color = taiwan_color_map_fixed.get(category, '#808080')
return category, color
+
else:
+ # Atlantic/International standard (unchanged)
if wind_speed >= 137:
return 'C5 Super Typhoon', '#FF0000'
elif wind_speed >= 113:
@@ -729,21 +815,8 @@ def categorize_typhoon_by_standard_fixed(wind_speed, standard='atlantic'):
else:
return 'Tropical Depression', '#808080'
-def classify_enso_phases(oni_value):
- """Classify ENSO phases based on ONI value"""
- if isinstance(oni_value, pd.Series):
- oni_value = oni_value.iloc[0]
- if pd.isna(oni_value):
- return 'Neutral'
- if oni_value >= 0.5:
- return 'El Nino'
- elif oni_value <= -0.5:
- return 'La Nina'
- else:
- return 'Neutral'
-
# -----------------------------
-# FIXED: Advanced ML Features
+# FIXED: ADVANCED ML FEATURES WITH ROBUST ERROR HANDLING
# -----------------------------
def extract_storm_features(typhoon_data):
@@ -753,6 +826,7 @@ def extract_storm_features(typhoon_data):
logging.error("No typhoon data provided for feature extraction")
return None
+ # Basic features - ensure columns exist
basic_features = []
for sid in typhoon_data['SID'].unique():
storm_data = typhoon_data[typhoon_data['SID'] == sid].copy()
@@ -760,6 +834,7 @@ def extract_storm_features(typhoon_data):
if len(storm_data) == 0:
continue
+ # Initialize feature dict with safe defaults
features = {'SID': sid}
# Wind statistics
@@ -809,13 +884,16 @@ def extract_storm_features(typhoon_data):
features['LON_max'] = lon_values.max()
features['LON_min'] = lon_values.min()
+ # Genesis location (first valid position)
features['genesis_lat'] = lat_values.iloc[0]
features['genesis_lon'] = lon_values.iloc[0]
- features['genesis_intensity'] = features['USA_WIND_mean']
+ features['genesis_intensity'] = features['USA_WIND_mean'] # Use mean as fallback
+ # Track characteristics
features['lat_range'] = lat_values.max() - lat_values.min()
features['lon_range'] = lon_values.max() - lon_values.min()
+ # Calculate track distance
if len(lat_values) > 1:
distances = []
for i in range(1, len(lat_values)):
@@ -828,6 +906,7 @@ def extract_storm_features(typhoon_data):
features['total_distance'] = 0
features['avg_speed'] = 0
+ # Track curvature
if len(lat_values) > 2:
bearing_changes = []
for i in range(1, len(lat_values)-1):
@@ -845,6 +924,7 @@ def extract_storm_features(typhoon_data):
else:
features['avg_curvature'] = 0
else:
+ # Default location values
features.update({
'LAT_mean': 20, 'LAT_std': 0, 'LAT_max': 20, 'LAT_min': 20,
'LON_mean': 140, 'LON_std': 0, 'LON_max': 140, 'LON_min': 140,
@@ -852,14 +932,26 @@ def extract_storm_features(typhoon_data):
'lat_range': 0, 'lon_range': 0, 'total_distance': 0,
'avg_speed': 0, 'avg_curvature': 0
})
+ else:
+ # Default location values if columns missing
+ features.update({
+ 'LAT_mean': 20, 'LAT_std': 0, 'LAT_max': 20, 'LAT_min': 20,
+ 'LON_mean': 140, 'LON_std': 0, 'LON_max': 140, 'LON_min': 140,
+ 'genesis_lat': 20, 'genesis_lon': 140, 'genesis_intensity': 30,
+ 'lat_range': 0, 'lon_range': 0, 'total_distance': 0,
+ 'avg_speed': 0, 'avg_curvature': 0
+ })
+ # Track length
features['track_length'] = len(storm_data)
+ # Add seasonal information
if 'SEASON' in storm_data.columns:
features['season'] = storm_data['SEASON'].iloc[0]
else:
features['season'] = 2000
+ # Add basin information
if 'BASIN' in storm_data.columns:
features['basin'] = storm_data['BASIN'].iloc[0]
elif 'SID' in storm_data.columns:
@@ -873,13 +965,17 @@ def extract_storm_features(typhoon_data):
logging.error("No valid storm features could be extracted")
return None
+ # Convert to DataFrame
storm_features = pd.DataFrame(basic_features)
+ # Ensure all numeric columns are properly typed
numeric_columns = [col for col in storm_features.columns if col not in ['SID', 'basin']]
for col in numeric_columns:
storm_features[col] = pd.to_numeric(storm_features[col], errors='coerce').fillna(0)
logging.info(f"Successfully extracted features for {len(storm_features)} storms")
+ logging.info(f"Feature columns: {list(storm_features.columns)}")
+
return storm_features
except Exception as e:
@@ -889,30 +985,38 @@ def extract_storm_features(typhoon_data):
return None
def perform_dimensionality_reduction(storm_features, method='umap', n_components=2):
- """Perform UMAP or t-SNE dimensionality reduction"""
+ """Perform UMAP or t-SNE dimensionality reduction - FIXED VERSION"""
try:
if storm_features is None or storm_features.empty:
raise ValueError("No storm features provided")
+ # Select numeric features for clustering - FIXED
feature_cols = []
for col in storm_features.columns:
if col not in ['SID', 'basin'] and storm_features[col].dtype in ['float64', 'int64']:
+ # Check if column has valid data
valid_data = storm_features[col].dropna()
- if len(valid_data) > 0 and valid_data.std() > 0:
+ if len(valid_data) > 0 and valid_data.std() > 0: # Only include columns with variance
feature_cols.append(col)
if len(feature_cols) == 0:
raise ValueError("No valid numeric features found for clustering")
+ logging.info(f"Using {len(feature_cols)} features for clustering: {feature_cols}")
+
X = storm_features[feature_cols].fillna(0)
+ # Check if we have enough samples
if len(X) < 2:
raise ValueError("Need at least 2 storms for clustering")
+ # Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
+ # Perform dimensionality reduction
if method.lower() == 'umap' and UMAP_AVAILABLE and len(X_scaled) >= 4:
+ # UMAP parameters optimized for typhoon data - fixed warnings
n_neighbors = min(15, len(X_scaled) - 1)
reducer = umap.UMAP(
n_components=n_components,
@@ -920,11 +1024,12 @@ def perform_dimensionality_reduction(storm_features, method='umap', n_components
min_dist=0.1,
metric='euclidean',
random_state=42,
- n_jobs=1
+ n_jobs=1 # Explicitly set to avoid warning
)
elif method.lower() == 'tsne' and len(X_scaled) >= 4:
+ # t-SNE parameters
perplexity = min(30, len(X_scaled) // 4)
- perplexity = max(1, perplexity)
+ perplexity = max(1, perplexity) # Ensure perplexity is at least 1
reducer = TSNE(
n_components=n_components,
perplexity=perplexity,
@@ -933,11 +1038,14 @@ def perform_dimensionality_reduction(storm_features, method='umap', n_components
random_state=42
)
else:
+ # Fallback to PCA
reducer = PCA(n_components=n_components, random_state=42)
+ # Fit and transform
embedding = reducer.fit_transform(X_scaled)
logging.info(f"Dimensionality reduction successful: {X_scaled.shape} -> {embedding.shape}")
+
return embedding, feature_cols, scaler
except Exception as e:
@@ -945,15 +1053,17 @@ def perform_dimensionality_reduction(storm_features, method='umap', n_components
raise
def cluster_storms_data(embedding, method='dbscan', eps=0.5, min_samples=3):
- """Cluster storms based on their embedding"""
+ """Cluster storms based on their embedding - FIXED NAME VERSION"""
try:
if len(embedding) < 2:
- return np.array([0] * len(embedding))
+ return np.array([0] * len(embedding)) # Single cluster for insufficient data
if method.lower() == 'dbscan':
+ # Adjust min_samples based on data size
min_samples = min(min_samples, max(2, len(embedding) // 5))
clusterer = DBSCAN(eps=eps, min_samples=min_samples)
elif method.lower() == 'kmeans':
+ # Adjust n_clusters based on data size
n_clusters = min(5, max(2, len(embedding) // 3))
clusterer = KMeans(n_clusters=n_clusters, random_state=42)
else:
@@ -962,15 +1072,18 @@ def cluster_storms_data(embedding, method='dbscan', eps=0.5, min_samples=3):
clusters = clusterer.fit_predict(embedding)
logging.info(f"Clustering complete: {len(np.unique(clusters))} clusters found")
+
return clusters
except Exception as e:
logging.error(f"Error in cluster_storms_data: {e}")
+ # Return single cluster as fallback
return np.array([0] * len(embedding))
def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'):
- """Create separate plots for clustering analysis"""
+ """Create separate plots for clustering analysis - ENHANCED CLARITY VERSION"""
try:
+ # Validate inputs
if storm_features is None or storm_features.empty:
raise ValueError("No storm features available for clustering")
@@ -979,17 +1092,23 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
logging.info(f"Starting clustering visualization with {len(storm_features)} storms")
+ # Perform dimensionality reduction
embedding, feature_cols, scaler = perform_dimensionality_reduction(storm_features, method)
+
+ # Perform clustering
cluster_labels = cluster_storms_data(embedding, 'dbscan')
+ # Add clustering results to storm features
storm_features_viz = storm_features.copy()
storm_features_viz['cluster'] = cluster_labels
storm_features_viz['dim1'] = embedding[:, 0]
storm_features_viz['dim2'] = embedding[:, 1]
+ # Merge with typhoon data for additional info - SAFE MERGE
try:
storm_info = typhoon_data.groupby('SID').first()[['NAME', 'SEASON']].reset_index()
storm_features_viz = storm_features_viz.merge(storm_info, on='SID', how='left')
+ # Fill missing values
storm_features_viz['NAME'] = storm_features_viz['NAME'].fillna('UNNAMED')
storm_features_viz['SEASON'] = storm_features_viz['SEASON'].fillna(2000)
except Exception as merge_error:
@@ -997,12 +1116,14 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
storm_features_viz['NAME'] = 'UNNAMED'
storm_features_viz['SEASON'] = 2000
+ # Get unique clusters and assign distinct colors
unique_clusters = sorted([c for c in storm_features_viz['cluster'].unique() if c != -1])
noise_count = len(storm_features_viz[storm_features_viz['cluster'] == -1])
- # 1. Clustering scatter plot
+ # 1. Enhanced clustering scatter plot with clear cluster identification
fig_cluster = go.Figure()
+ # Add noise points first
if noise_count > 0:
noise_data = storm_features_viz[storm_features_viz['cluster'] == -1]
fig_cluster.add_trace(
@@ -1027,6 +1148,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
)
)
+ # Add clusters with distinct colors and shapes
cluster_symbols = ['circle', 'square', 'diamond', 'triangle-up', 'triangle-down',
'pentagon', 'hexagon', 'star', 'cross', 'circle-open']
@@ -1067,15 +1189,17 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
showlegend=True
)
- # 2. Route map
+ # 2. ENHANCED route map with cluster legends and clearer representation
fig_routes = go.Figure()
+ # Create a comprehensive legend showing cluster characteristics
cluster_info_text = []
for i, cluster in enumerate(unique_clusters):
cluster_storm_ids = storm_features_viz[storm_features_viz['cluster'] == cluster]['SID'].tolist()
color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)]
+ # Get cluster statistics for legend
cluster_data = storm_features_viz[storm_features_viz['cluster'] == cluster]
avg_intensity = cluster_data['USA_WIND_max'].mean() if 'USA_WIND_max' in cluster_data.columns else 0
avg_pressure = cluster_data['USA_PRES_min'].mean() if 'USA_PRES_min' in cluster_data.columns else 1000
@@ -1085,11 +1209,13 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
f"Avg: {avg_intensity:.0f}kt/{avg_pressure:.0f}hPa"
)
+ # Add multiple storms per cluster with clear identification
storms_added = 0
- for j, sid in enumerate(cluster_storm_ids[:8]):
+ for j, sid in enumerate(cluster_storm_ids[:8]): # Show up to 8 storms per cluster
try:
storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME')
if len(storm_track) > 1:
+ # Ensure valid coordinates
valid_coords = storm_track['LAT'].notna() & storm_track['LON'].notna()
storm_track = storm_track[valid_coords]
@@ -1097,9 +1223,10 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED'
storm_season = storm_track['SEASON'].iloc[0] if 'SEASON' in storm_track.columns else 'Unknown'
+ # Vary line style for different storms in same cluster
line_styles = ['solid', 'dash', 'dot', 'dashdot']
line_style = line_styles[j % len(line_styles)]
- line_width = 3 if j == 0 else 2
+ line_width = 3 if j == 0 else 2 # First storm thicker
fig_routes.add_trace(
go.Scattergeo(
@@ -1126,7 +1253,9 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
logging.warning(f"Error adding track for storm {sid}: {track_error}")
continue
+ # Add cluster centroid marker
if len(cluster_storm_ids) > 0:
+ # Calculate average genesis location for cluster
cluster_storm_data = storm_features_viz[storm_features_viz['cluster'] == cluster]
if 'genesis_lat' in cluster_storm_data.columns and 'genesis_lon' in cluster_storm_data.columns:
avg_lat = cluster_storm_data['genesis_lat'].mean()
@@ -1156,6 +1285,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
)
)
+ # Update route map layout with enhanced information and LARGER SIZE
fig_routes.update_layout(
title=f"Storm Routes by {method.upper()} Clusters
Different line styles = different storms in same cluster | Stars = cluster centers",
geo=dict(
@@ -1167,13 +1297,14 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
showcoastlines=True,
coastlinecolor="Gray",
center=dict(lat=20, lon=140),
- projection_scale=2.5
+ projection_scale=2.5 # Larger map
),
- height=800,
- width=1200,
+ height=800, # Much larger height
+ width=1200, # Wider map
showlegend=True
)
+ # Add cluster info annotation
cluster_summary = "
".join(cluster_info_text)
fig_routes.add_annotation(
text=f"Cluster Summary:
{cluster_summary}",
@@ -1186,7 +1317,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
borderwidth=1
)
- # 3. Pressure evolution plot
+ # 3. Enhanced pressure evolution plot with cluster identification
fig_pressure = go.Figure()
for i, cluster in enumerate(unique_clusters):
@@ -1194,13 +1325,16 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)]
cluster_pressures = []
- for j, sid in enumerate(cluster_storm_ids[:5]):
+ for j, sid in enumerate(cluster_storm_ids[:5]): # Limit to 5 storms per cluster
try:
storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME')
if len(storm_track) > 1 and 'USA_PRES' in storm_track.columns:
pressure_values = pd.to_numeric(storm_track['USA_PRES'], errors='coerce').dropna()
if len(pressure_values) > 0:
storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED'
+ time_hours = range(len(pressure_values))
+
+ # Normalize time to show relative progression
normalized_time = np.linspace(0, 100, len(pressure_values))
fig_pressure.add_trace(
@@ -1225,6 +1359,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
except Exception as e:
continue
+ # Add cluster average line
if cluster_pressures:
avg_pressure = np.mean(cluster_pressures)
fig_pressure.add_hline(
@@ -1242,7 +1377,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
height=500
)
- # 4. Wind evolution plot
+ # 4. Enhanced wind evolution plot
fig_wind = go.Figure()
for i, cluster in enumerate(unique_clusters):
@@ -1250,13 +1385,15 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)]
cluster_winds = []
- for j, sid in enumerate(cluster_storm_ids[:5]):
+ for j, sid in enumerate(cluster_storm_ids[:5]): # Limit to 5 storms per cluster
try:
storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME')
if len(storm_track) > 1 and 'USA_WIND' in storm_track.columns:
wind_values = pd.to_numeric(storm_track['USA_WIND'], errors='coerce').dropna()
if len(wind_values) > 0:
storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED'
+
+ # Normalize time to show relative progression
normalized_time = np.linspace(0, 100, len(wind_values))
fig_wind.add_trace(
@@ -1281,6 +1418,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
except Exception as e:
continue
+ # Add cluster average line
if cluster_winds:
avg_wind = np.mean(cluster_winds)
fig_wind.add_hline(
@@ -1298,7 +1436,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
height=500
)
- # Generate statistics
+ # Generate enhanced cluster statistics with clear explanations
try:
stats_text = f"ENHANCED {method.upper()} CLUSTER ANALYSIS RESULTS\n" + "="*60 + "\n\n"
stats_text += f"🔍 DIMENSIONALITY REDUCTION: {method.upper()}\n"
@@ -1322,6 +1460,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
stats_text += f"🎯 CLUSTER {cluster}: {storm_count} storms\n"
stats_text += f" 🎨 Color: {CLUSTER_COLORS[cluster % len(CLUSTER_COLORS)]}\n"
+ # Add detailed statistics if available
if 'USA_WIND_max' in cluster_data.columns:
wind_mean = cluster_data['USA_WIND_max'].mean()
wind_std = cluster_data['USA_WIND_max'].std()
@@ -1341,6 +1480,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
lon_mean = cluster_data['genesis_lon'].mean()
stats_text += f" 🎯 Genesis Region: {lat_mean:.1f}°N, {lon_mean:.1f}°E\n"
+ # Add interpretation
if wind_mean < 50:
stats_text += " 💡 Pattern: Weaker storm group\n"
elif wind_mean > 100:
@@ -1350,6 +1490,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
stats_text += "\n"
+ # Add explanation of the analysis
stats_text += "📖 INTERPRETATION GUIDE:\n"
stats_text += f"• {method.upper()} reduces storm characteristics to 2D for visualization\n"
stats_text += "• DBSCAN finds natural groupings without preset number of clusters\n"
@@ -1382,9 +1523,113 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
return error_fig, error_fig, error_fig, error_fig, f"Error in clustering: {str(e)}"
# -----------------------------
-# FIXED: Prediction System
+# ENHANCED: Advanced Prediction System with Route Forecasting
# -----------------------------
+def create_advanced_prediction_model(typhoon_data):
+ """Create advanced ML model for intensity and route prediction"""
+ try:
+ if typhoon_data is None or typhoon_data.empty:
+ return None, "No data available for model training"
+
+ # Prepare training data
+ features = []
+ targets = []
+
+ for sid in typhoon_data['SID'].unique():
+ storm_data = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME')
+
+ if len(storm_data) < 3: # Need at least 3 points for prediction
+ continue
+
+ for i in range(len(storm_data) - 1):
+ current = storm_data.iloc[i]
+ next_point = storm_data.iloc[i + 1]
+
+ # Extract features (current state)
+ feature_row = []
+
+ # Current position
+ feature_row.extend([
+ current.get('LAT', 20),
+ current.get('LON', 140)
+ ])
+
+ # Current intensity
+ feature_row.extend([
+ current.get('USA_WIND', 30),
+ current.get('USA_PRES', 1000)
+ ])
+
+ # Time features
+ if 'ISO_TIME' in current and pd.notna(current['ISO_TIME']):
+ month = current['ISO_TIME'].month
+ day_of_year = current['ISO_TIME'].dayofyear
+ else:
+ month = 9 # Peak season default
+ day_of_year = 250
+
+ feature_row.extend([month, day_of_year])
+
+ # Motion features (if previous point exists)
+ if i > 0:
+ prev = storm_data.iloc[i - 1]
+ dlat = current.get('LAT', 20) - prev.get('LAT', 20)
+ dlon = current.get('LON', 140) - prev.get('LON', 140)
+ speed = np.sqrt(dlat**2 + dlon**2)
+ bearing = np.arctan2(dlat, dlon)
+ else:
+ speed = 0
+ bearing = 0
+
+ feature_row.extend([speed, bearing])
+
+ features.append(feature_row)
+
+ # Target: next position and intensity
+ targets.append([
+ next_point.get('LAT', 20),
+ next_point.get('LON', 140),
+ next_point.get('USA_WIND', 30)
+ ])
+
+ if len(features) < 10: # Need sufficient training data
+ return None, "Insufficient data for model training"
+
+ # Train model
+ X = np.array(features)
+ y = np.array(targets)
+
+ # Split data
+ X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
+
+ # Create separate models for position and intensity
+ models = {}
+
+ # Position model (lat, lon)
+ pos_model = RandomForestRegressor(n_estimators=100, random_state=42)
+ pos_model.fit(X_train, y_train[:, :2])
+ models['position'] = pos_model
+
+ # Intensity model (wind speed)
+ int_model = RandomForestRegressor(n_estimators=100, random_state=42)
+ int_model.fit(X_train, y_train[:, 2])
+ models['intensity'] = int_model
+
+ # Calculate model performance
+ pos_pred = pos_model.predict(X_test)
+ int_pred = int_model.predict(X_test)
+
+ pos_mae = mean_absolute_error(y_test[:, :2], pos_pred)
+ int_mae = mean_absolute_error(y_test[:, 2], int_pred)
+
+ model_info = f"Position MAE: {pos_mae:.2f}°, Intensity MAE: {int_mae:.2f} kt"
+
+ return models, model_info
+
+ except Exception as e:
+ return None, f"Error creating prediction model: {str(e)}"
+
def get_realistic_genesis_locations():
"""Get realistic typhoon genesis regions based on climatology"""
return {
@@ -1406,7 +1651,7 @@ def predict_storm_route_and_intensity_realistic(genesis_region, month, oni_value
genesis_locations = get_realistic_genesis_locations()
if genesis_region not in genesis_locations:
- genesis_region = "Western Pacific Main Development Region"
+ genesis_region = "Western Pacific Main Development Region" # Default
genesis_info = genesis_locations[genesis_region]
lat = genesis_info["lat"]
@@ -1420,27 +1665,29 @@ def predict_storm_route_and_intensity_realistic(genesis_region, month, oni_value
'genesis_info': genesis_info
}
- # Realistic starting intensity
- base_intensity = 30
+ # REALISTIC starting intensity - Tropical Depression level
+ base_intensity = 30 # Start at TD level (25-35 kt)
- # Environmental factors
- if oni_value > 1.0:
+ # Environmental factors for genesis
+ if oni_value > 1.0: # Strong El Niño - suppressed development
intensity_modifier = -6
- elif oni_value > 0.5:
+ elif oni_value > 0.5: # Moderate El Niño
intensity_modifier = -3
- elif oni_value < -1.0:
+ elif oni_value < -1.0: # Strong La Niña - enhanced development
intensity_modifier = +8
- elif oni_value < -0.5:
+ elif oni_value < -0.5: # Moderate La Niña
intensity_modifier = +5
- else:
+ else: # Neutral
intensity_modifier = oni_value * 2
+ # Seasonal genesis effects
seasonal_factors = {
1: -8, 2: -6, 3: -4, 4: -2, 5: 2, 6: 6,
7: 10, 8: 12, 9: 15, 10: 10, 11: 4, 12: -5
}
seasonal_modifier = seasonal_factors.get(month, 0)
+ # Genesis region favorability
region_factors = {
"Western Pacific Main Development Region": 8,
"South China Sea": 4,
@@ -1455,137 +1702,160 @@ def predict_storm_route_and_intensity_realistic(genesis_region, month, oni_value
}
region_modifier = region_factors.get(genesis_region, 0)
+ # Calculate realistic starting intensity (TD level)
predicted_intensity = base_intensity + intensity_modifier + seasonal_modifier + region_modifier
- predicted_intensity = max(25, min(40, predicted_intensity))
+ predicted_intensity = max(25, min(40, predicted_intensity)) # Keep in TD-weak TS range
+ # Add realistic uncertainty for genesis
intensity_uncertainty = np.random.normal(0, 2)
predicted_intensity += intensity_uncertainty
- predicted_intensity = max(25, min(38, predicted_intensity))
+ predicted_intensity = max(25, min(38, predicted_intensity)) # TD range
results['current_prediction'] = {
'intensity_kt': predicted_intensity,
- 'pressure_hpa': 1008 - (predicted_intensity - 25) * 0.6,
+ 'pressure_hpa': 1008 - (predicted_intensity - 25) * 0.6, # Realistic TD pressure
'category': categorize_typhoon_enhanced(predicted_intensity),
'genesis_region': genesis_region
}
- # Route prediction
+ # REALISTIC route prediction with proper typhoon speeds
current_lat = lat
current_lon = lon
current_intensity = predicted_intensity
route_points = []
+ # Track storm development over time with REALISTIC SPEEDS
for hour in range(0, forecast_hours + 6, 6):
- # Realistic motion
- if current_lat < 20:
- base_speed = 0.12
- elif current_lat < 30:
- base_speed = 0.18
- else:
- base_speed = 0.25
+ # REALISTIC typhoon motion - much faster speeds
+ # Typical typhoon forward speed: 15-25 km/h (0.14-0.23°/hour)
+
+ # Base forward speed depends on latitude and storm intensity
+ if current_lat < 20: # Low latitude - slower
+ base_speed = 0.12 # ~13 km/h
+ elif current_lat < 30: # Mid latitude - moderate
+ base_speed = 0.18 # ~20 km/h
+ else: # High latitude - faster
+ base_speed = 0.25 # ~28 km/h
+
+ # Intensity affects speed (stronger storms can move faster)
intensity_speed_factor = 1.0 + (current_intensity - 50) / 200
base_speed *= max(0.8, min(1.4, intensity_speed_factor))
+ # Beta drift (Coriolis effect) - realistic values
beta_drift_lat = 0.02 * np.sin(np.radians(current_lat))
beta_drift_lon = -0.05 * np.cos(np.radians(current_lat))
- if month in [6, 7, 8, 9]:
+ # Seasonal steering patterns with realistic speeds
+ if month in [6, 7, 8, 9]: # Peak season
ridge_strength = 1.2
ridge_position = 32 + 4 * np.sin(2 * np.pi * (month - 6) / 4)
- else:
+ else: # Off season
ridge_strength = 0.9
ridge_position = 28
- if current_lat < ridge_position - 10:
- lat_tendency = base_speed * 0.3 + beta_drift_lat
- lon_tendency = -base_speed * 0.9 + beta_drift_lon
- elif current_lat > ridge_position - 3:
- lat_tendency = base_speed * 0.8 + beta_drift_lat
- lon_tendency = base_speed * 0.4 + beta_drift_lon
- else:
- lat_tendency = base_speed * 0.4 + beta_drift_lat
- lon_tendency = -base_speed * 0.7 + beta_drift_lon
+ # REALISTIC motion based on position relative to subtropical ridge
+ if current_lat < ridge_position - 10: # Well south of ridge - westward movement
+ lat_tendency = base_speed * 0.3 + beta_drift_lat # Slight poleward
+ lon_tendency = -base_speed * 0.9 + beta_drift_lon # Strong westward
+ elif current_lat > ridge_position - 3: # Near ridge - recurvature
+ lat_tendency = base_speed * 0.8 + beta_drift_lat # Strong poleward
+ lon_tendency = base_speed * 0.4 + beta_drift_lon # Eastward
+ else: # In between - normal WNW motion
+ lat_tendency = base_speed * 0.4 + beta_drift_lat # Moderate poleward
+ lon_tendency = -base_speed * 0.7 + beta_drift_lon # Moderate westward
- if oni_value > 0.5:
+ # ENSO steering modulation (realistic effects)
+ if oni_value > 0.5: # El Niño - more eastward/poleward motion
lon_tendency += 0.05
lat_tendency += 0.02
- elif oni_value < -0.5:
+ elif oni_value < -0.5: # La Niña - more westward motion
lon_tendency -= 0.08
lat_tendency -= 0.01
+ # Add motion uncertainty that grows with time (realistic error growth)
motion_uncertainty = 0.02 + (hour / 120) * 0.04
lat_noise = np.random.normal(0, motion_uncertainty)
lon_noise = np.random.normal(0, motion_uncertainty)
+ # Update position with realistic speeds
current_lat += lat_tendency + lat_noise
current_lon += lon_tendency + lon_noise
- # Intensity evolution
+ # REALISTIC intensity evolution with proper development cycles
+
+ # Development phase (first 48-72 hours) - realistic intensification
if hour <= 48:
- if current_intensity < 50:
- if 10 <= current_lat <= 25 and 115 <= current_lon <= 165:
+ if current_intensity < 50: # Still weak - rapid development possible
+ if 10 <= current_lat <= 25 and 115 <= current_lon <= 165: # Favorable environment
intensity_tendency = 4.5 if current_intensity < 35 else 3.0
- elif 120 <= current_lon <= 155 and 15 <= current_lat <= 20:
+ elif 120 <= current_lon <= 155 and 15 <= current_lat <= 20: # Best environment
intensity_tendency = 6.0 if current_intensity < 40 else 4.0
else:
intensity_tendency = 2.0
- elif current_intensity < 80:
+ elif current_intensity < 80: # Moderate intensity
intensity_tendency = 2.5 if (120 <= current_lon <= 155 and 10 <= current_lat <= 25) else 1.0
- else:
+ else: # Already strong
intensity_tendency = 1.0
+ # Mature phase (48-120 hours) - peak intensity maintenance
elif hour <= 120:
- if current_lat < 25 and current_lon > 120:
+ if current_lat < 25 and current_lon > 120: # Still in favorable waters
if current_intensity < 120:
intensity_tendency = 1.5
else:
- intensity_tendency = 0.0
+ intensity_tendency = 0.0 # Maintain intensity
else:
intensity_tendency = -1.5
+ # Extended phase (120+ hours) - gradual weakening
else:
if current_lat < 30 and current_lon > 115:
- intensity_tendency = -2.0
+ intensity_tendency = -2.0 # Slow weakening
else:
- intensity_tendency = -3.5
+ intensity_tendency = -3.5 # Faster weakening
- # Environmental modulation
- if current_lat > 35:
+ # Environmental modulation (realistic effects)
+ if current_lat > 35: # High latitude - rapid weakening
intensity_tendency -= 12
- elif current_lat > 30:
+ elif current_lat > 30: # Moderate latitude
intensity_tendency -= 5
- elif current_lon < 110:
+ elif current_lon < 110: # Land interaction
intensity_tendency -= 15
- elif 125 <= current_lon <= 155 and 10 <= current_lat <= 25:
+ elif 125 <= current_lon <= 155 and 10 <= current_lat <= 25: # Warm pool
intensity_tendency += 2
- elif 160 <= current_lon <= 180 and 15 <= current_lat <= 30:
+ elif 160 <= current_lon <= 180 and 15 <= current_lat <= 30: # Still warm
intensity_tendency += 1
- if current_lat < 8:
+ # SST effects (realistic temperature impact)
+ if current_lat < 8: # Very warm but weak Coriolis
intensity_tendency += 0.5
- elif 8 <= current_lat <= 20:
+ elif 8 <= current_lat <= 20: # Sweet spot for development
intensity_tendency += 2.0
- elif 20 < current_lat <= 30:
+ elif 20 < current_lat <= 30: # Marginal
intensity_tendency -= 1.0
- elif current_lat > 30:
+ elif current_lat > 30: # Cool waters
intensity_tendency -= 4.0
- if month in [12, 1, 2, 3]:
+ # Shear effects (simplified but realistic)
+ if month in [12, 1, 2, 3]: # High shear season
intensity_tendency -= 2.0
- elif month in [7, 8, 9]:
+ elif month in [7, 8, 9]: # Low shear season
intensity_tendency += 1.0
- intensity_noise = np.random.normal(0, 1.5)
+ # Update intensity with realistic bounds and variability
+ intensity_noise = np.random.normal(0, 1.5) # Small random fluctuations
current_intensity += intensity_tendency + intensity_noise
- current_intensity = max(20, min(185, current_intensity))
+ current_intensity = max(20, min(185, current_intensity)) # Realistic range
+ # Calculate confidence based on forecast time and environment
base_confidence = 0.92
time_penalty = (hour / 120) * 0.45
environment_penalty = 0.15 if current_lat > 30 or current_lon < 115 else 0
confidence = max(0.25, base_confidence - time_penalty - environment_penalty)
+ # Determine development stage
if hour <= 24:
stage = 'Genesis'
elif hour <= 72:
@@ -1605,12 +1875,13 @@ def predict_storm_route_and_intensity_realistic(genesis_region, month, oni_value
'category': categorize_typhoon_enhanced(current_intensity),
'confidence': confidence,
'development_stage': stage,
- 'forward_speed_kmh': base_speed * 111,
+ 'forward_speed_kmh': base_speed * 111, # Convert to km/h
'pressure_hpa': max(900, 1013 - (current_intensity - 25) * 0.9)
})
results['route_forecast'] = route_points
+ # Realistic confidence scores
results['confidence_scores'] = {
'genesis': 0.88,
'early_development': 0.82,
@@ -1623,6 +1894,7 @@ def predict_storm_route_and_intensity_realistic(genesis_region, month, oni_value
'long_term': max(0.3, 0.8 - (forecast_hours / 240) * 0.5)
}
+ # Model information
results['model_info'] = f"Enhanced Realistic Model - {genesis_region}"
return results
@@ -1645,6 +1917,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
route_data = prediction_results['route_forecast']
+ # Extract data for plotting
hours = [point['hour'] for point in route_data]
lats = [point['lat'] for point in route_data]
lons = [point['lon'] for point in route_data]
@@ -1655,6 +1928,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
speeds = [point.get('forward_speed_kmh', 15) for point in route_data]
pressures = [point.get('pressure_hpa', 1013) for point in route_data]
+ # Create subplot layout with map and intensity plot
fig = make_subplots(
rows=2, cols=2,
subplot_titles=('Storm Track Animation', 'Wind Speed vs Time', 'Forward Speed vs Time', 'Pressure vs Time'),
@@ -1665,8 +1939,11 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
)
if enable_animation:
+ # Add frames for animation
frames = []
+ # Static background elements first
+ # Add complete track as background
fig.add_trace(
go.Scattergeo(
lon=lons,
@@ -1680,6 +1957,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
row=1, col=1
)
+ # Genesis marker (always visible)
fig.add_trace(
go.Scattergeo(
lon=[lons[0]],
@@ -1704,6 +1982,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
row=1, col=1
)
+ # Create animation frames
for i in range(len(route_data)):
frame_lons = lons[:i+1]
frame_lats = lats[:i+1]
@@ -1711,10 +1990,12 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
frame_categories = categories[:i+1]
frame_hours = hours[:i+1]
+ # Current position marker
current_color = enhanced_color_map.get(frame_categories[-1], 'rgb(128,128,128)')
current_size = 15 + (frame_intensities[-1] / 10)
frame_data = [
+ # Animated track up to current point
go.Scattergeo(
lon=frame_lons,
lat=frame_lats,
@@ -1729,6 +2010,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
name='Current Track',
showlegend=False
),
+ # Current position highlight
go.Scattergeo(
lon=[frame_lons[-1]],
lat=[frame_lats[-1]],
@@ -1752,6 +2034,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
""
)
),
+ # Animated wind plot
go.Scatter(
x=frame_hours,
y=frame_intensities,
@@ -1762,6 +2045,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
showlegend=False,
yaxis='y2'
),
+ # Animated speed plot
go.Scatter(
x=frame_hours,
y=speeds[:i+1],
@@ -1772,6 +2056,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
showlegend=False,
yaxis='y3'
),
+ # Animated pressure plot
go.Scatter(
x=frame_hours,
y=pressures[:i+1],
@@ -1795,6 +2080,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
fig.frames = frames
+ # Add play/pause controls
fig.update_layout(
updatemenus=[
{
@@ -1850,13 +2136,14 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
"label": f"H{route_data[i]['hour']}",
"method": "animate"
}
- for i in range(0, len(route_data), max(1, len(route_data)//20))
+ for i in range(0, len(route_data), max(1, len(route_data)//20)) # Limit slider steps
]
}]
)
else:
- # Static view
+ # Static view with all points
+ # Add genesis marker
fig.add_trace(
go.Scattergeo(
lon=[lons[0]],
@@ -1880,7 +2167,8 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
row=1, col=1
)
- for i in range(0, len(route_data), max(1, len(route_data)//50)):
+ # Add full track with intensity coloring
+ for i in range(0, len(route_data), max(1, len(route_data)//50)): # Sample points for performance
point = route_data[i]
color = enhanced_color_map.get(point['category'], 'rgb(128,128,128)')
size = 8 + (point['intensity_kt'] / 12)
@@ -1911,6 +2199,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
row=1, col=1
)
+ # Connect points with track line
fig.add_trace(
go.Scattergeo(
lon=lons,
@@ -1924,6 +2213,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
)
# Add static intensity, speed, and pressure plots
+ # Wind speed plot
fig.add_trace(
go.Scatter(
x=hours,
@@ -1977,6 +2267,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
uncertainty_lons_lower = []
for i, point in enumerate(route_data):
+ # Uncertainty grows with time and decreases with confidence
base_uncertainty = 0.4 + (i / len(route_data)) * 1.8
confidence_factor = point.get('confidence', 0.8)
uncertainty = base_uncertainty / confidence_factor
@@ -2006,8 +2297,8 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
# Enhanced layout
fig.update_layout(
title=f"Comprehensive Storm Development Analysis
Starting from {prediction_results['genesis_info']['description']}",
- height=1000,
- width=1400,
+ height=1000, # Taller for better subplot visibility
+ width=1400, # Wider
showlegend=True
)
@@ -2037,6 +2328,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
current = prediction_results['current_prediction']
genesis_info = prediction_results['genesis_info']
+ # Calculate some statistics
max_intensity = max(intensities)
max_intensity_time = hours[intensities.index(max_intensity)]
avg_speed = np.mean(speeds)
@@ -2098,7 +2390,7 @@ MODEL: {prediction_results['model_info']}
return None, error_msg
# -----------------------------
-# Regression Functions
+# Regression Functions (Original)
# -----------------------------
def perform_wind_regression(start_year, start_month, end_year, end_month):
@@ -2153,7 +2445,7 @@ def perform_longitude_regression(start_year, start_month, end_year, end_month):
return f"Longitude Regression Error: {e}"
# -----------------------------
-# FIXED: Visualization Functions
+# Visualization Functions (Enhanced)
# -----------------------------
def get_full_tracks(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
@@ -2301,69 +2593,48 @@ def get_longitude_analysis(start_year, start_month, end_year, end_month, enso_ph
return fig, slopes_text, regression
# -----------------------------
-# FIXED: Animation Functions - NO FALLBACK
+# ENHANCED: Animation Functions with Taiwan Standard Support - FIXED VERSION
# -----------------------------
def get_available_years(typhoon_data):
- """Get all available years from actual data - NO FALLBACK"""
+ """Get all available years including 2025 - with error handling"""
try:
if typhoon_data is None or typhoon_data.empty:
- raise Exception("No typhoon data available for year extraction")
+ return [str(year) for year in range(2000, 2026)]
- years = set()
-
- # Try multiple methods to extract years
if 'ISO_TIME' in typhoon_data.columns:
- valid_times = typhoon_data['ISO_TIME'].dropna()
- if len(valid_times) > 0:
- years.update(valid_times.dt.year.unique())
-
- if 'SEASON' in typhoon_data.columns:
- valid_seasons = typhoon_data['SEASON'].dropna()
- if len(valid_seasons) > 0:
- years.update(valid_seasons.unique())
-
- # Extract from SID if available (format: BASIN + NUMBER + YEAR)
- if 'SID' in typhoon_data.columns and len(years) == 0:
- for sid in typhoon_data['SID'].dropna().unique():
- try:
- # Try to extract 4-digit year from SID
- year_match = pd.Series([sid]).str.extract(r'(\d{4})')[0].iloc[0]
- if year_match and 1950 <= int(year_match) <= 2030:
- years.add(int(year_match))
- except:
- continue
-
- if len(years) == 0:
- raise Exception("Could not extract any valid years from typhoon data")
+ years = typhoon_data['ISO_TIME'].dt.year.dropna().unique()
+ elif 'SEASON' in typhoon_data.columns:
+ years = typhoon_data['SEASON'].dropna().unique()
+ else:
+ years = range(2000, 2026) # Default range including 2025
- # Convert to sorted list of strings
- year_strings = sorted([str(int(year)) for year in years if 1950 <= year <= 2030])
+ # Convert to strings and sort
+ year_strings = sorted([str(int(year)) for year in years if not pd.isna(year)])
- if len(year_strings) == 0:
- raise Exception("No valid years found in reasonable range (1950-2030)")
+ # Ensure we have at least some years
+ if not year_strings:
+ return [str(year) for year in range(2000, 2026)]
- logging.info(f"Extracted {len(year_strings)} years from data: {year_strings[0]} to {year_strings[-1]}")
return year_strings
except Exception as e:
- logging.error(f"CRITICAL ERROR in get_available_years: {e}")
- raise Exception(f"Cannot extract years from typhoon data: {e}")
+ print(f"Error in get_available_years: {e}")
+ return [str(year) for year in range(2000, 2026)]
def update_typhoon_options_enhanced(year, basin):
- """Enhanced typhoon options - NEVER returns empty or fallback"""
+ """Enhanced typhoon options with TD support and 2025 data"""
try:
year = int(year)
- # Filter by year
+ # Filter by year - handle both ISO_TIME and SEASON columns
if 'ISO_TIME' in typhoon_data.columns:
year_mask = typhoon_data['ISO_TIME'].dt.year == year
elif 'SEASON' in typhoon_data.columns:
year_mask = typhoon_data['SEASON'] == year
else:
- # Try to extract from SID
- sid_year_mask = typhoon_data['SID'].str.contains(str(year), na=False)
- year_mask = sid_year_mask
+ # Fallback - try to extract year from SID or other fields
+ year_mask = typhoon_data.index >= 0 # Include all data as fallback
year_data = typhoon_data[year_mask].copy()
@@ -2376,9 +2647,9 @@ def update_typhoon_options_enhanced(year, basin):
year_data = year_data[year_data['BASIN'] == basin_code]
if year_data.empty:
- raise Exception(f"No storms found for year {year} and basin {basin}")
+ return gr.update(choices=["No storms found"], value=None)
- # Get unique storms
+ # Get unique storms - include ALL intensities (including TD)
storms = year_data.groupby('SID').agg({
'NAME': 'first',
'USA_WIND': 'max'
@@ -2399,50 +2670,39 @@ def update_typhoon_options_enhanced(year, basin):
options.append(option)
if not options:
- raise Exception(f"No valid storm options generated for year {year}")
+ return gr.update(choices=["No storms found"], value=None)
- logging.info(f"Generated {len(options)} storm options for {year}")
return gr.update(choices=sorted(options), value=options[0])
except Exception as e:
- error_msg = f"Error loading storms for {year}: {str(e)}"
- logging.error(error_msg)
- raise Exception(error_msg)
+ print(f"Error in update_typhoon_options_enhanced: {e}")
+ return gr.update(choices=["Error loading storms"], value=None)
def generate_enhanced_track_video_fixed(year, typhoon_selection, standard):
- """FIXED: Enhanced track video generation - NO FALLBACK ALLOWED"""
+ """FIXED: Enhanced track video generation with working animation display"""
+ if not typhoon_selection or typhoon_selection == "No storms found":
+ return None
+
try:
- if not typhoon_selection or "No storms found" in typhoon_selection or "Error" in typhoon_selection:
- raise Exception("Invalid typhoon selection provided")
-
# Extract SID from selection
- try:
- sid = typhoon_selection.split('(')[1].split(')')[0]
- except:
- raise Exception(f"Could not extract SID from selection: {typhoon_selection}")
+ sid = typhoon_selection.split('(')[1].split(')')[0]
# Get storm data
storm_df = typhoon_data[typhoon_data['SID'] == sid].copy()
if storm_df.empty:
- raise Exception(f"No track data found for storm {sid}")
+ print(f"No data found for storm {sid}")
+ return None
# Sort by time
if 'ISO_TIME' in storm_df.columns:
storm_df = storm_df.sort_values('ISO_TIME')
- # Validate essential data
- if 'LAT' not in storm_df.columns or 'LON' not in storm_df.columns:
- raise Exception(f"Missing coordinate data for storm {sid}")
-
# Extract data for animation
- lats = pd.to_numeric(storm_df['LAT'], errors='coerce').dropna().values
- lons = pd.to_numeric(storm_df['LON'], errors='coerce').dropna().values
-
- if len(lats) < 2 or len(lons) < 2:
- raise Exception(f"Insufficient track points for storm {sid}: {len(lats)} points")
+ lats = storm_df['LAT'].astype(float).values
+ lons = storm_df['LON'].astype(float).values
if 'USA_WIND' in storm_df.columns:
- winds = pd.to_numeric(storm_df['USA_WIND'], errors='coerce').fillna(30).values[:len(lats)]
+ winds = pd.to_numeric(storm_df['USA_WIND'], errors='coerce').fillna(0).values
else:
winds = np.full(len(lats), 30)
@@ -2450,7 +2710,7 @@ def generate_enhanced_track_video_fixed(year, typhoon_selection, standard):
storm_name = storm_df['NAME'].iloc[0] if pd.notna(storm_df['NAME'].iloc[0]) else "UNNAMED"
season = storm_df['SEASON'].iloc[0] if 'SEASON' in storm_df.columns else year
- logging.info(f"Generating FIXED video for {storm_name} ({sid}) with {len(lats)} track points using {standard} standard")
+ print(f"Generating FIXED video for {storm_name} ({sid}) with {len(lats)} track points using {standard} standard")
# FIXED: Create figure with proper cartopy setup
fig = plt.figure(figsize=(16, 10))
@@ -2479,20 +2739,24 @@ def generate_enhanced_track_video_fixed(year, typhoon_selection, standard):
fontsize=18, fontweight='bold')
# FIXED: Animation elements - proper initialization with cartopy transforms
+ # Initialize empty line for track with correct transform
track_line, = ax.plot([], [], 'b-', linewidth=3, alpha=0.7,
label='Track', transform=ccrs.PlateCarree())
+ # Initialize current position marker
current_point, = ax.plot([], [], 'o', markersize=15,
transform=ccrs.PlateCarree())
+ # Historical track points (to show path traversed)
history_points, = ax.plot([], [], 'o', markersize=6, alpha=0.4, color='blue',
transform=ccrs.PlateCarree())
+ # Info text box
info_box = ax.text(0.02, 0.98, '', transform=ax.transAxes,
fontsize=12, verticalalignment='top',
bbox=dict(boxstyle="round,pad=0.5", facecolor='white', alpha=0.9))
- # FIXED: Color legend with proper categories
+ # FIXED: Color legend with proper categories for both standards
legend_elements = []
if standard == 'taiwan':
categories = ['Tropical Depression', 'Tropical Storm', 'Severe Tropical Storm',
@@ -2511,24 +2775,25 @@ def generate_enhanced_track_video_fixed(year, typhoon_selection, standard):
ax.legend(handles=legend_elements, loc='upper right', fontsize=10)
- # FIXED: Animation function
+ # FIXED: Animation function with proper artist updates and cartopy compatibility
def animate_fixed(frame):
"""Fixed animation function that properly updates tracks with cartopy"""
try:
if frame >= len(lats):
return track_line, current_point, history_points, info_box
- # Update track line up to current frame
+ # FIXED: Update track line up to current frame
current_lons = lons[:frame+1]
current_lats = lats[:frame+1]
+ # Update the track line data (this is the key fix!)
track_line.set_data(current_lons, current_lats)
- # Update historical points
+ # FIXED: Update historical points (smaller markers showing traversed path)
if frame > 0:
history_points.set_data(current_lons[:-1], current_lats[:-1])
- # Update current position with correct categorization
+ # FIXED: Update current position with correct categorization
current_wind = winds[frame]
if standard == 'taiwan':
@@ -2536,19 +2801,23 @@ def generate_enhanced_track_video_fixed(year, typhoon_selection, standard):
else:
category, color = categorize_typhoon_by_standard_fixed(current_wind, 'atlantic')
+ # Debug for first few frames
+ if frame < 3:
+ print(f"FIXED Frame {frame}: Wind={current_wind:.1f}kt, Category={category}, Color={color}")
+
# Update current position marker
current_point.set_data([lons[frame]], [lats[frame]])
current_point.set_color(color)
current_point.set_markersize(12 + current_wind/8)
- # Enhanced info display
+ # FIXED: Enhanced info display with correct Taiwan wind speed conversion
if 'ISO_TIME' in storm_df.columns and frame < len(storm_df):
current_time = storm_df.iloc[frame]['ISO_TIME']
time_str = current_time.strftime('%Y-%m-%d %H:%M UTC') if pd.notna(current_time) else 'Unknown'
else:
time_str = f"Step {frame+1}"
- # Wind speed display
+ # Corrected wind speed display for Taiwan standard
if standard == 'taiwan':
wind_ms = current_wind * 0.514444
wind_display = f"{current_wind:.0f} kt ({wind_ms:.1f} m/s)"
@@ -2566,43 +2835,52 @@ def generate_enhanced_track_video_fixed(year, typhoon_selection, standard):
)
info_box.set_text(info_text)
+ # FIXED: Return all modified artists (crucial for proper display)
return track_line, current_point, history_points, info_box
except Exception as e:
- logging.error(f"Error in animate frame {frame}: {e}")
+ print(f"Error in animate frame {frame}: {e}")
return track_line, current_point, history_points, info_box
# FIXED: Create animation with cartopy-compatible settings
+ # Key fixes: blit=False (crucial for cartopy), proper interval
anim = animation.FuncAnimation(
fig, animate_fixed, frames=len(lats),
- interval=600, blit=False, repeat=True
+ interval=600, blit=False, repeat=True # blit=False is essential for cartopy!
)
- # Save animation
+ # Save animation with optimized settings
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4',
dir=tempfile.gettempdir())
+ # FIXED: Writer settings optimized for track visibility
writer = animation.FFMpegWriter(
- fps=2, bitrate=3000, codec='libx264',
+ fps=2, bitrate=3000, codec='libx264', # Slower FPS for better track visibility
extra_args=['-pix_fmt', 'yuv420p']
)
- logging.info(f"Saving FIXED animation to {temp_file.name}")
+ print(f"Saving FIXED animation to {temp_file.name}")
anim.save(temp_file.name, writer=writer, dpi=120)
plt.close(fig)
- logging.info(f"FIXED video generated successfully: {temp_file.name}")
+ print(f"FIXED video generated successfully: {temp_file.name}")
return temp_file.name
except Exception as e:
- error_msg = f"CRITICAL ERROR generating video: {str(e)}"
- logging.error(error_msg)
+ print(f"Error generating FIXED video: {e}")
import traceback
traceback.print_exc()
- raise Exception(error_msg)
+ return None
+
+# FIXED: Update the simplified wrapper function
+def simplified_track_video_fixed(year, basin, typhoon, standard):
+ """Simplified track video function with FIXED animation and Taiwan classification"""
+ if not typhoon:
+ return None
+ return generate_enhanced_track_video_fixed(year, typhoon, standard)
# -----------------------------
-# FIXED: Data Loading and Processing
+# Load & Process Data
# -----------------------------
# Global variables initialization
@@ -2611,60 +2889,66 @@ typhoon_data = None
merged_data = None
def initialize_data():
- """Initialize all data safely - CRITICAL: NO FALLBACKS"""
+ """Initialize all data safely"""
global oni_data, typhoon_data, merged_data
try:
- logging.info("Starting FIXED data loading process...")
-
- # Update ONI data (optional)
+ logging.info("Starting data loading process...")
update_oni_data()
-
- # Load data with FIXED functions
oni_data, typhoon_data = load_data_fixed(ONI_DATA_PATH, TYPHOON_DATA_PATH)
- # Verify critical data loaded
- if typhoon_data is None or typhoon_data.empty:
- raise Exception("CRITICAL: No typhoon data loaded")
-
- if oni_data is None or oni_data.empty:
- logging.warning("ONI data failed to load - using neutral values")
-
- # Process data
+ if oni_data is not None and typhoon_data is not None:
+ oni_long = process_oni_data(oni_data)
+ typhoon_max = process_typhoon_data(typhoon_data)
+ merged_data = merge_data(oni_long, typhoon_max)
+ logging.info("Data loading complete.")
+ else:
+ logging.error("Failed to load required data")
+ # Create minimal fallback data
+ oni_data = pd.DataFrame({'Year': [2000], 'Jan': [0], 'Feb': [0], 'Mar': [0], 'Apr': [0],
+ 'May': [0], 'Jun': [0], 'Jul': [0], 'Aug': [0], 'Sep': [0],
+ 'Oct': [0], 'Nov': [0], 'Dec': [0]})
+ typhoon_data = create_fallback_typhoon_data()
+ oni_long = process_oni_data(oni_data)
+ typhoon_max = process_typhoon_data(typhoon_data)
+ merged_data = merge_data(oni_long, typhoon_max)
+ except Exception as e:
+ logging.error(f"Error during data initialization: {e}")
+ # Create minimal fallback data
+ oni_data = pd.DataFrame({'Year': [2000], 'Jan': [0], 'Feb': [0], 'Mar': [0], 'Apr': [0],
+ 'May': [0], 'Jun': [0], 'Jul': [0], 'Aug': [0], 'Sep': [0],
+ 'Oct': [0], 'Nov': [0], 'Dec': [0]})
+ typhoon_data = create_fallback_typhoon_data()
oni_long = process_oni_data(oni_data)
typhoon_max = process_typhoon_data(typhoon_data)
merged_data = merge_data(oni_long, typhoon_max)
-
- # Final validation
- if merged_data is None or merged_data.empty:
- raise Exception("CRITICAL: Merged data is empty")
-
- logging.info(f"FIXED data loading complete:")
- logging.info(f" - ONI data: {len(oni_data) if oni_data is not None else 0} years")
- logging.info(f" - Typhoon data: {len(typhoon_data)} records")
- logging.info(f" - Merged data: {len(merged_data)} storms")
-
- except Exception as e:
- logging.error(f"CRITICAL ERROR during FIXED data initialization: {e}")
- import traceback
- traceback.print_exc()
- raise Exception(f"Data initialization failed: {e}")
+
+# Initialize data
+initialize_data()
# -----------------------------
-# FIXED: Gradio Interface
+# ENHANCED: Gradio Interface with Fixed Route Visualization and Enhanced Features
# -----------------------------
def create_interface():
- """Create the enhanced Gradio interface - NO FALLBACKS"""
+ """Create the enhanced Gradio interface with robust error handling"""
try:
# Ensure data is available
if oni_data is None or typhoon_data is None or merged_data is None:
- raise Exception("Data not properly loaded for interface creation")
+ logging.warning("Data not properly loaded, creating minimal interface")
+ return create_minimal_fallback_interface()
# Get safe data statistics
- total_storms = len(typhoon_data['SID'].unique()) if 'SID' in typhoon_data.columns else 0
- total_records = len(typhoon_data)
- available_years = get_available_years(typhoon_data)
- year_range_display = f"{available_years[0]} - {available_years[-1]}" if available_years else "Unknown"
+ try:
+ total_storms = len(typhoon_data['SID'].unique()) if 'SID' in typhoon_data.columns else 0
+ total_records = len(typhoon_data)
+ available_years = get_available_years(typhoon_data)
+ year_range_display = f"{available_years[0]} - {available_years[-1]}" if available_years else "Unknown"
+ except Exception as e:
+ logging.error(f"Error getting data statistics: {e}")
+ total_storms = 0
+ total_records = 0
+ year_range_display = "Unknown"
+ available_years = [str(year) for year in range(2000, 2026)]
with gr.Blocks(title="Enhanced Typhoon Analysis Platform", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🌪️ Enhanced Typhoon Analysis Platform")
@@ -2683,21 +2967,18 @@ def create_interface():
- **Taiwan Standard**: Full support for Taiwan meteorological classification system
- **2025 Data Ready**: Real-time compatibility with current year data
- **Enhanced Animations**: High-quality storm track visualizations with both standards
- - **NO FALLBACK DATA**: All data comes from real IBTrACS sources
### 📊 Data Status:
- - **ONI Data**: {len(oni_data) if oni_data is not None else 0} years loaded
+ - **ONI Data**: {len(oni_data)} years loaded
- **Typhoon Data**: {total_records:,} records loaded
- - **Merged Data**: {len(merged_data):,} typhoons with analysis data
+ - **Merged Data**: {len(merged_data):,} typhoons with ONI values
- **Available Years**: {year_range_display}
- - **Unique Storms**: {total_storms:,}
### 🔧 Technical Capabilities:
- **UMAP Clustering**: {"✅ Available" if UMAP_AVAILABLE else "⚠️ Limited to t-SNE/PCA"}
- **AI Predictions**: {"🧠 Deep Learning" if CNN_AVAILABLE else "🔬 Physics-based"}
- **Enhanced Categorization**: Tropical Depression to Super Typhoon
- - **Platform**: Optimized for real-time analysis
- - **Data Source**: Live IBTrACS database (no synthetic data)
+ - **Platform**: Optimized for Hugging Face Spaces
### 📈 Research Applications:
- Climate change impact studies
@@ -2740,9 +3021,10 @@ def create_interface():
def run_separate_clustering_analysis(method):
try:
+ # Extract features for clustering
storm_features = extract_storm_features(typhoon_data)
if storm_features is None:
- raise Exception("Could not extract storm features from data")
+ return None, None, None, None, "Error: Could not extract storm features"
fig_cluster, fig_routes, fig_pressure, fig_wind, stats = create_separate_clustering_plots(
storm_features, typhoon_data, method.lower()
@@ -2751,8 +3033,7 @@ def create_interface():
except Exception as e:
import traceback
error_details = traceback.format_exc()
- error_msg = f"Clustering analysis failed: {str(e)}\n\nDetails:\n{error_details}"
- logging.error(error_msg)
+ error_msg = f"Error: {str(e)}\n\nDetails:\n{error_details}"
return None, None, None, None, error_msg
analyze_clusters_btn.click(
@@ -2760,6 +3041,24 @@ def create_interface():
inputs=[reduction_method],
outputs=[cluster_plot, routes_plot, pressure_plot, wind_plot, cluster_stats]
)
+
+ cluster_info_text = """
+ ### 📊 Enhanced Clustering Features:
+ - **Separate Visualizations**: Four distinct plots for comprehensive analysis
+ - **Multi-dimensional Analysis**: Uses 15+ storm characteristics including intensity, track shape, genesis location
+ - **Route Visualization**: Geographic storm tracks colored by cluster membership
+ - **Temporal Analysis**: Pressure and wind evolution patterns by cluster
+ - **DBSCAN Clustering**: Automatic pattern discovery without predefined cluster count
+ - **Interactive**: Hover over points to see storm details, zoom and pan all plots
+
+ ### 🎯 How to Interpret:
+ - **Clustering Plot**: Each dot is a storm positioned by similarity (close = similar characteristics)
+ - **Routes Plot**: Actual geographic storm tracks, colored by which cluster they belong to
+ - **Pressure Plot**: Shows how pressure changes over time for storms in each cluster
+ - **Wind Plot**: Shows wind speed evolution patterns for each cluster
+ - **Cluster Colors**: Each cluster gets a unique color across all four visualizations
+ """
+ gr.Markdown(cluster_info_text)
with gr.Tab("🌊 Realistic Storm Genesis & Prediction"):
gr.Markdown("## 🌊 Realistic Typhoon Development from Genesis")
@@ -2787,6 +3086,7 @@ def create_interface():
info="Select realistic development region based on climatology"
)
+ # Display selected region info
def update_genesis_info(region):
locations = get_realistic_genesis_locations()
if region in locations:
@@ -2815,9 +3115,9 @@ def create_interface():
label="Forecast Length (hours)",
value=72,
minimum=20,
- maximum=1000,
+ maximum=100000,
step=6,
- info="Extended forecasting: 20-1000 hours"
+ info="Extended forecasting: 20-1000hours (42 days max)"
)
advanced_physics = gr.Checkbox(
label="Advanced Physics",
@@ -2849,17 +3149,20 @@ def create_interface():
def run_realistic_prediction(region, month, oni, hours, advanced_phys, uncertainty, animation):
try:
+ # Run realistic prediction with genesis region
results = predict_storm_route_and_intensity_realistic(
region, month, oni,
forecast_hours=hours,
use_advanced_physics=advanced_phys
)
+ # Extract genesis conditions
current = results['current_prediction']
intensity = current['intensity_kt']
category = current['category']
genesis_info = results.get('genesis_info', {})
+ # Create enhanced visualization
fig, forecast_text = create_animated_route_visualization(
results, uncertainty, animation
)
@@ -2878,7 +3181,10 @@ def create_interface():
logging.error(error_msg)
import traceback
traceback.print_exc()
- raise gr.Error(error_msg)
+ return (
+ 30, "Tropical Depression", f"Prediction failed: {str(e)}",
+ None, f"Error generating realistic forecast: {str(e)}"
+ )
predict_btn.click(
fn=run_realistic_prediction,
@@ -2956,14 +3262,13 @@ def create_interface():
)
with gr.Tab("🎬 Enhanced Track Animation"):
- gr.Markdown("## 🎥 High-Quality Storm Track Visualization - NO FALLBACK DATA")
- gr.Markdown("**ALL animations use real IBTrACS data - never synthetic or fallback data**")
+ gr.Markdown("## 🎥 High-Quality Storm Track Visualization (Atlantic & Taiwan Standards)")
with gr.Row():
year_dropdown = gr.Dropdown(
label="Year",
choices=available_years,
- value=available_years[-1] if available_years else None
+ value=available_years[-1] if available_years else "2024"
)
basin_dropdown = gr.Dropdown(
label="Basin",
@@ -2984,40 +3289,23 @@ def create_interface():
video_output = gr.Video(label="Storm Track Animation")
# Update storm options when year or basin changes
- def safe_update_typhoon_options(year, basin):
- try:
- return update_typhoon_options_enhanced(year, basin)
- except Exception as e:
- error_msg = f"Failed to load storms: {str(e)}"
- logging.error(error_msg)
- return gr.update(choices=[error_msg], value=None)
-
for input_comp in [year_dropdown, basin_dropdown]:
input_comp.change(
- fn=safe_update_typhoon_options,
+ fn=update_typhoon_options_enhanced,
inputs=[year_dropdown, basin_dropdown],
outputs=[typhoon_dropdown]
)
- def safe_generate_video(year, typhoon_selection, standard):
- try:
- if not typhoon_selection:
- raise gr.Error("Please select a typhoon first")
- return generate_enhanced_track_video_fixed(year, typhoon_selection, standard)
- except Exception as e:
- error_msg = f"Video generation failed: {str(e)}"
- logging.error(error_msg)
- raise gr.Error(error_msg)
-
+ # FIXED: Generate video with fixed function
generate_video_btn.click(
- fn=safe_generate_video,
+ fn=generate_enhanced_track_video_fixed,
inputs=[year_dropdown, typhoon_dropdown, standard_dropdown],
outputs=[video_output]
)
+ # FIXED animation info text with corrected Taiwan standards
animation_info_text = """
- ### 🎬 FIXED Animation Features - NO FALLBACK DATA:
- - **Real Data Only**: All animations use actual IBTrACS typhoon track data
+ ### 🎬 Enhanced Animation Features:
- **Dual Standards**: Full support for both Atlantic and Taiwan classification systems
- **Full TD Support**: Now displays Tropical Depressions (< 34 kt) in gray
- **2025 Compatibility**: Complete support for current year data
@@ -3025,26 +3313,36 @@ def create_interface():
- **Smart Scaling**: Storm symbols scale dynamically with intensity
- **Real-time Info**: Live position, time, and meteorological data display
- **Professional Styling**: Publication-quality animations with proper legends
+ - **Optimized Export**: Fast rendering with web-compatible video formats
- **FIXED Animation**: Tracks now display properly with cartopy integration
- - **Error Handling**: Robust error handling prevents fallback to synthetic data
### 🎌 Taiwan Standard Features (CORRECTED):
- **CMA 2006 Standards**: Uses official China Meteorological Administration classification
- **Six Categories**: TD → TS → STS → TY → STY → Super TY
- - **Correct Thresholds**: Based on official meteorological standards
+ - **Correct Thresholds**:
+ * Tropical Depression: < 17.2 m/s (< 33.4 kt)
+ * Tropical Storm: 17.2-24.4 m/s (33.4-47.5 kt)
+ * Severe Tropical Storm: 24.5-32.6 m/s (47.6-63.5 kt)
+ * Typhoon: 32.7-41.4 m/s (63.6-80.6 kt)
+ * Severe Typhoon: 41.5-50.9 m/s (80.7-99.1 kt)
+ * Super Typhoon: ≥51.0 m/s (≥99.2 kt)
- **m/s Display**: Shows both knots and meters per second
- **CWB Compatible**: Matches Central Weather Bureau classifications
+ - **Fixed Color Coding**: Gray → Blue → Cyan → Yellow → Orange → Red
"""
gr.Markdown(animation_info_text)
with gr.Tab("📊 Data Statistics & Insights"):
- gr.Markdown("## 📈 Comprehensive Dataset Analysis - REAL DATA ONLY")
+ gr.Markdown("## 📈 Comprehensive Dataset Analysis")
+ # Create enhanced data summary
try:
if len(typhoon_data) > 0:
+ # Storm category distribution
storm_cats = typhoon_data.groupby('SID')['USA_WIND'].max().apply(categorize_typhoon_enhanced)
cat_counts = storm_cats.value_counts()
+ # Create distribution chart with enhanced colors
fig_dist = px.bar(
x=cat_counts.index,
y=cat_counts.values,
@@ -3054,6 +3352,7 @@ def create_interface():
color_discrete_map=enhanced_color_map
)
+ # Seasonal distribution
if 'ISO_TIME' in typhoon_data.columns:
seasonal_data = typhoon_data.copy()
seasonal_data['Month'] = seasonal_data['ISO_TIME'].dt.month
@@ -3070,6 +3369,7 @@ def create_interface():
else:
fig_seasonal = None
+ # Basin distribution
if 'SID' in typhoon_data.columns:
basin_data = typhoon_data['SID'].str[:2].value_counts()
fig_basin = px.pie(
@@ -3094,7 +3394,10 @@ def create_interface():
except Exception as e:
gr.Markdown(f"Visualization error: {str(e)}")
- # Enhanced statistics
+ # Enhanced statistics - FIXED formatting
+ total_storms = len(typhoon_data['SID'].unique()) if 'SID' in typhoon_data.columns else 0
+ total_records = len(typhoon_data)
+
if 'SEASON' in typhoon_data.columns:
try:
min_year = int(typhoon_data['SEASON'].min())
@@ -3119,6 +3422,7 @@ def create_interface():
basins_available = "Unknown"
avg_storms_per_year = 0
+ # TD specific statistics
try:
if 'USA_WIND' in typhoon_data.columns:
td_storms = len(typhoon_data[typhoon_data['USA_WIND'] < 34]['SID'].unique())
@@ -3129,17 +3433,18 @@ def create_interface():
td_storms = ts_storms = typhoon_storms = 0
td_percentage = 0
except Exception as e:
+ print(f"Error calculating TD statistics: {e}")
td_storms = ts_storms = typhoon_storms = 0
td_percentage = 0
+ # Create statistics text safely
stats_text = f"""
- ### 📊 REAL Dataset Summary - NO SYNTHETIC DATA:
+ ### 📊 Enhanced Dataset Summary:
- **Total Unique Storms**: {total_storms:,}
- **Total Track Records**: {total_records:,}
- **Year Range**: {year_range} ({years_covered} years)
- **Basins Available**: {basins_available}
- **Average Storms/Year**: {avg_storms_per_year:.1f}
- - **Data Source**: IBTrACS v04r01 (Real observations only)
### 🌪️ Storm Category Breakdown:
- **Tropical Depressions**: {td_storms:,} storms ({td_percentage:.1f}%)
@@ -3154,7 +3459,6 @@ def create_interface():
- **2025 Data Ready** - Full compatibility with current season data
- **Enhanced Animations** - Professional-quality storm track videos
- **Multi-basin Analysis** - Comprehensive Pacific and Atlantic coverage
- - **NO FALLBACK DATA** - All analysis uses real meteorological observations
### 🔬 Research Applications:
- Climate change impact studies
@@ -3168,38 +3472,70 @@ def create_interface():
return demo
except Exception as e:
- logging.error(f"CRITICAL ERROR creating Gradio interface: {e}")
+ logging.error(f"Error creating Gradio interface: {e}")
import traceback
traceback.print_exc()
- raise Exception(f"Interface creation failed: {e}")
+ # Create a minimal fallback interface
+ return create_minimal_fallback_interface()
+
+def create_minimal_fallback_interface():
+ """Create a minimal fallback interface when main interface fails"""
+ with gr.Blocks() as demo:
+ gr.Markdown("# Enhanced Typhoon Analysis Platform")
+ gr.Markdown("**Notice**: Loading with minimal interface due to data issues.")
+
+ with gr.Tab("Status"):
+ gr.Markdown("""
+ ## Platform Status
+
+ The application is running but encountered issues loading the full interface.
+ This could be due to:
+ - Data loading problems
+ - Missing dependencies
+ - Configuration issues
+
+ ### Available Features:
+ - Basic interface is functional
+ - Error logs are being generated
+ - System is ready for debugging
+
+ ### Next Steps:
+ 1. Check the console logs for detailed error information
+ 2. Verify all required data files are accessible
+ 3. Ensure all dependencies are properly installed
+ 4. Try restarting the application
+ """)
+
+ with gr.Tab("Debug"):
+ gr.Markdown("## Debug Information")
+
+ def get_debug_info():
+ debug_text = f"""
+ Python Environment:
+ - Working Directory: {os.getcwd()}
+ - Data Path: {DATA_PATH}
+ - UMAP Available: {UMAP_AVAILABLE}
+ - CNN Available: {CNN_AVAILABLE}
+
+ Data Status:
+ - ONI Data: {'Loaded' if oni_data is not None else 'Failed'}
+ - Typhoon Data: {'Loaded' if typhoon_data is not None else 'Failed'}
+ - Merged Data: {'Loaded' if merged_data is not None else 'Failed'}
+
+ File Checks:
+ - ONI Path Exists: {os.path.exists(ONI_DATA_PATH)}
+ - Typhoon Path Exists: {os.path.exists(TYPHOON_DATA_PATH)}
+ """
+ return debug_text
+
+ debug_btn = gr.Button("Get Debug Info")
+ debug_output = gr.Textbox(label="Debug Information", lines=15)
+ debug_btn.click(fn=get_debug_info, outputs=debug_output)
+
+ return demo
-# -----------------------------
-# MAIN EXECUTION
-# -----------------------------
+# Create and launch the interface
+demo = create_interface()
if __name__ == "__main__":
- try:
- # Initialize data first - CRITICAL
- logging.info("Initializing data...")
- initialize_data()
-
- # Verify data loaded correctly
- if typhoon_data is None or typhoon_data.empty:
- raise Exception("CRITICAL: No typhoon data available for interface")
-
- logging.info("Creating interface...")
- demo = create_interface()
-
- logging.info("Launching application...")
- demo.launch(share=True)
-
- except Exception as e:
- logging.error(f"CRITICAL APPLICATION ERROR: {e}")
- import traceback
- traceback.print_exc()
- print(f"\n{'='*60}")
- print("CRITICAL ERROR: Application failed to start")
- print(f"Error: {e}")
- print("Check logs for detailed error information")
- print(f"{'='*60}")
- raise
\ No newline at end of file
+ demo.launch(share=True) # Enable sharing with public link
\ No newline at end of file