diff --git "a/app.py" "b/app.py" --- "a/app.py" +++ "b/app.py" @@ -41,7 +41,7 @@ import tempfile import shutil import xarray as xr -# Advanced ML imports +# NEW: Advanced ML imports try: import umap.umap_ as umap UMAP_AVAILABLE = True @@ -52,10 +52,12 @@ except ImportError: # Optional CNN imports with robust error handling CNN_AVAILABLE = False try: - os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' + # Set environment variables before importing TensorFlow + os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Suppress TensorFlow warnings import tensorflow as tf from tensorflow.keras import layers, models - tf.config.set_visible_devices([], 'GPU') + # Test if TensorFlow actually works + tf.config.set_visible_devices([], 'GPU') # Disable GPU to avoid conflicts CNN_AVAILABLE = True print("TensorFlow successfully loaded - CNN features enabled") except Exception as e: @@ -78,11 +80,13 @@ logging.basicConfig( format='%(asctime)s - %(levelname)s - %(message)s' ) -# FIXED: Data path setup +# Remove argument parser to simplify startup DATA_PATH = '/tmp/typhoon_data' if 'SPACE_ID' in os.environ else tempfile.gettempdir() +# Ensure directory exists and is writable try: os.makedirs(DATA_PATH, exist_ok=True) + # Test write permissions test_file = os.path.join(DATA_PATH, 'test_write.txt') with open(test_file, 'w') as f: f.write('test') @@ -102,17 +106,20 @@ MERGED_DATA_CSV = os.path.join(DATA_PATH, 'merged_typhoon_era5_data.csv') BASIN_FILES = { 'EP': 'ibtracs.EP.list.v04r01.csv', 'NA': 'ibtracs.NA.list.v04r01.csv', - 'WP': 'ibtracs.WP.list.v04r01.csv', - 'ALL': 'ibtracs.ALL.list.v04r01.csv' # Added ALL basin option + 'WP': 'ibtracs.WP.list.v04r01.csv' } IBTRACS_BASE_URL = 'https://www.ncei.noaa.gov/data/international-best-track-archive-for-climate-stewardship-ibtracs/v04r01/access/csv/' +LOCAL_IBTRACS_PATH = os.path.join(DATA_PATH, 'ibtracs.WP.list.v04r01.csv') +CACHE_FILE = os.path.join(DATA_PATH, 'ibtracs_cache.pkl') +CACHE_EXPIRY_DAYS = 1 # ----------------------------- -# FIXED: Color Maps and Standards with TD Support +# ENHANCED: Color Maps and Standards with TD Support - FIXED TAIWAN CLASSIFICATION # ----------------------------- +# Enhanced color mapping with TD support (for Plotly) enhanced_color_map = { 'Unknown': 'rgb(200, 200, 200)', - 'Tropical Depression': 'rgb(128, 128, 128)', + 'Tropical Depression': 'rgb(128, 128, 128)', # Gray for TD 'Tropical Storm': 'rgb(0, 0, 255)', 'C1 Typhoon': 'rgb(0, 255, 255)', 'C2 Typhoon': 'rgb(0, 255, 0)', @@ -121,26 +128,42 @@ enhanced_color_map = { 'C5 Super Typhoon': 'rgb(255, 0, 0)' } +# Matplotlib-compatible color mapping (hex colors) matplotlib_color_map = { 'Unknown': '#C8C8C8', - 'Tropical Depression': '#808080', - 'Tropical Storm': '#0000FF', - 'C1 Typhoon': '#00FFFF', - 'C2 Typhoon': '#00FF00', - 'C3 Strong Typhoon': '#FFFF00', - 'C4 Very Strong Typhoon': '#FFA500', - 'C5 Super Typhoon': '#FF0000' + 'Tropical Depression': '#808080', # Gray for TD + 'Tropical Storm': '#0000FF', # Blue + 'C1 Typhoon': '#00FFFF', # Cyan + 'C2 Typhoon': '#00FF00', # Green + 'C3 Strong Typhoon': '#FFFF00', # Yellow + 'C4 Very Strong Typhoon': '#FFA500', # Orange + 'C5 Super Typhoon': '#FF0000' # Red } +# FIXED: Taiwan color mapping with correct CMA 2006 standards taiwan_color_map_fixed = { - 'Tropical Depression': '#808080', - 'Tropical Storm': '#0000FF', - 'Severe Tropical Storm': '#00FFFF', - 'Typhoon': '#FFFF00', - 'Severe Typhoon': '#FFA500', - 'Super Typhoon': '#FF0000' + 'Tropical Depression': '#808080', # Gray + 'Tropical Storm': '#0000FF', # Blue + 'Severe Tropical Storm': '#00FFFF', # Cyan + 'Typhoon': '#FFFF00', # Yellow + 'Severe Typhoon': '#FFA500', # Orange + 'Super Typhoon': '#FF0000' # Red } +def rgb_string_to_hex(rgb_string): + """Convert 'rgb(r,g,b)' string to hex color for matplotlib""" + try: + # Extract numbers from 'rgb(r,g,b)' format + import re + numbers = re.findall(r'\d+', rgb_string) + if len(numbers) == 3: + r, g, b = map(int, numbers) + return f'#{r:02x}{g:02x}{b:02x}' + else: + return '#808080' # Default gray + except: + return '#808080' # Default gray + def get_matplotlib_color(category): """Get matplotlib-compatible color for a storm category""" return matplotlib_color_map.get(category, '#808080') @@ -162,7 +185,17 @@ ROUTE_COLORS = [ '#FF00CC', '#00FFCC', '#CC00FF', '#CCFF00', '#00CCFF' ] -# Classification standards +# Original color map for backward compatibility +color_map = { + 'C5 Super Typhoon': 'rgb(255, 0, 0)', + 'C4 Very Strong Typhoon': 'rgb(255, 165, 0)', + 'C3 Strong Typhoon': 'rgb(255, 255, 0)', + 'C2 Typhoon': 'rgb(0, 255, 0)', + 'C1 Typhoon': 'rgb(0, 255, 255)', + 'Tropical Storm': 'rgb(0, 0, 255)', + 'Tropical Depression': 'rgb(128, 128, 128)' +} + atlantic_standard = { 'C5 Super Typhoon': {'wind_speed': 137, 'color': 'Red', 'hex': '#FF0000'}, 'C4 Very Strong Typhoon': {'wind_speed': 113, 'color': 'Orange', 'hex': '#FFA500'}, @@ -173,6 +206,7 @@ atlantic_standard = { 'Tropical Depression': {'wind_speed': 0, 'color': 'Gray', 'hex': '#808080'} } +# FIXED: Taiwan standard with correct CMA 2006 thresholds taiwan_standard_fixed = { 'Super Typhoon': {'wind_speed_ms': 51.0, 'wind_speed_kt': 99.2, 'color': 'Red', 'hex': '#FF0000'}, 'Severe Typhoon': {'wind_speed_ms': 41.5, 'wind_speed_kt': 80.7, 'color': 'Orange', 'hex': '#FFA500'}, @@ -183,20 +217,26 @@ taiwan_standard_fixed = { } # ----------------------------- -# FIXED: Utility Functions +# Utility Functions for HF Spaces # ----------------------------- def safe_file_write(file_path, data_frame, backup_dir=None): """Safely write DataFrame to CSV with backup and error handling""" try: + # Create directory if it doesn't exist os.makedirs(os.path.dirname(file_path), exist_ok=True) + + # Try to write to a temporary file first temp_path = file_path + '.tmp' data_frame.to_csv(temp_path, index=False) + + # If successful, rename to final file os.rename(temp_path, file_path) logging.info(f"Successfully saved {len(data_frame)} records to {file_path}") return True - except Exception as e: - logging.error(f"Error saving file {file_path}: {e}") + + except PermissionError as e: + logging.warning(f"Permission denied writing to {file_path}: {e}") if backup_dir: try: backup_path = os.path.join(backup_dir, os.path.basename(file_path)) @@ -206,9 +246,44 @@ def safe_file_write(file_path, data_frame, backup_dir=None): except Exception as backup_e: logging.error(f"Failed to save to backup location: {backup_e}") return False + + except Exception as e: + logging.error(f"Error saving file {file_path}: {e}") + # Clean up temp file if it exists + temp_path = file_path + '.tmp' + if os.path.exists(temp_path): + try: + os.remove(temp_path) + except: + pass + return False + +def get_fallback_data_dir(): + """Get a fallback data directory that's guaranteed to be writable""" + fallback_dirs = [ + tempfile.gettempdir(), + '/tmp', + os.path.expanduser('~'), + os.getcwd() + ] + + for directory in fallback_dirs: + try: + test_dir = os.path.join(directory, 'typhoon_fallback') + os.makedirs(test_dir, exist_ok=True) + test_file = os.path.join(test_dir, 'test.txt') + with open(test_file, 'w') as f: + f.write('test') + os.remove(test_file) + return test_dir + except: + continue + + # If all else fails, use current directory + return os.getcwd() # ----------------------------- -# FIXED: ONI Data Functions +# ONI and Typhoon Data Functions # ----------------------------- def download_oni_file(url, filename): @@ -224,8 +299,10 @@ def download_oni_file(url, filename): except Exception as e: logging.warning(f"Attempt {attempt + 1} failed to download ONI: {e}") if attempt < max_retries - 1: - time.sleep(2 ** attempt) - return False + time.sleep(2 ** attempt) # Exponential backoff + else: + logging.error(f"Failed to download ONI after {max_retries} attempts") + return False def convert_oni_ascii_to_csv(input_file, output_file): """Convert ONI ASCII format to CSV""" @@ -246,11 +323,12 @@ def convert_oni_ascii_to_csv(input_file, output_file): year = str(int(year)-1) data[year][month-1] = anom + # Write to CSV with safe write df = pd.DataFrame(data).T.reset_index() df.columns = ['Year','Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'] df = df.sort_values('Year').reset_index(drop=True) - return safe_file_write(output_file, df) + return safe_file_write(output_file, df, get_fallback_data_dir()) except Exception as e: logging.error(f"Error converting ONI file: {e}") @@ -271,31 +349,33 @@ def update_oni_data(): else: os.remove(temp_file) else: - logging.warning("ONI download failed - will create minimal ONI data") - create_minimal_oni_data(output_file) + # Create fallback ONI data if download fails + logging.warning("Creating fallback ONI data") + create_fallback_oni_data(output_file) except Exception as e: logging.error(f"Error updating ONI data: {e}") - create_minimal_oni_data(output_file) + create_fallback_oni_data(output_file) -def create_minimal_oni_data(output_file): - """Create minimal ONI data for years without dropping typhoon data""" - years = range(1950, 2026) # Wide range to ensure coverage +def create_fallback_oni_data(output_file): + """Create minimal ONI data for testing""" + years = range(2000, 2026) # Extended to include 2025 months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'] + # Create synthetic ONI data data = [] for year in years: row = [year] for month in months: - # Generate neutral ONI values (small variations around 0) - value = np.random.normal(0, 0.3) + # Generate some realistic ONI values + value = np.random.normal(0, 1) * 0.5 row.append(f"{value:.2f}") data.append(row) df = pd.DataFrame(data, columns=['Year'] + months) - safe_file_write(output_file, df) + safe_file_write(output_file, df, get_fallback_data_dir()) # ----------------------------- -# FIXED: IBTrACS Data Loading - No Fallback, All Data +# FIXED: IBTrACS Data Loading # ----------------------------- def download_ibtracs_file(basin, force_download=False): @@ -304,6 +384,7 @@ def download_ibtracs_file(basin, force_download=False): local_path = os.path.join(DATA_PATH, filename) url = IBTRACS_BASE_URL + filename + # Check if file exists and is recent (less than 7 days old) if os.path.exists(local_path) and not force_download: file_age = time.time() - os.path.getmtime(local_path) if file_age < 7 * 24 * 3600: # 7 days @@ -312,9 +393,10 @@ def download_ibtracs_file(basin, force_download=False): try: logging.info(f"Downloading {basin} basin file from {url}") - response = requests.get(url, timeout=120) # Increased timeout + response = requests.get(url, timeout=60) response.raise_for_status() + # Ensure directory exists os.makedirs(os.path.dirname(local_path), exist_ok=True) with open(local_path, 'wb') as f: @@ -325,8 +407,29 @@ def download_ibtracs_file(basin, force_download=False): logging.error(f"Failed to download {basin} basin file: {e}") return None -def load_ibtracs_csv_directly(basin='ALL'): - """Load IBTrACS data directly from CSV - FIXED to load ALL data""" +def examine_ibtracs_structure(file_path): + """Examine the actual structure of an IBTrACS CSV file""" + try: + with open(file_path, 'r') as f: + lines = f.readlines() + + # Show first 5 lines + logging.info("First 5 lines of IBTrACS file:") + for i, line in enumerate(lines[:5]): + logging.info(f"Line {i}: {line.strip()}") + + # The first line contains the actual column headers + # No need to skip rows for IBTrACS v04r01 + df = pd.read_csv(file_path, nrows=5) + logging.info(f"Columns from first row: {list(df.columns)}") + + return list(df.columns) + except Exception as e: + logging.error(f"Error examining IBTrACS structure: {e}") + return None + +def load_ibtracs_csv_directly(basin='WP'): + """Load IBTrACS data directly from CSV - FIXED VERSION""" filename = BASIN_FILES[basin] local_path = os.path.join(DATA_PATH, filename) @@ -334,328 +437,283 @@ def load_ibtracs_csv_directly(basin='ALL'): if not os.path.exists(local_path): downloaded_path = download_ibtracs_file(basin) if not downloaded_path: - logging.error(f"Could not download {basin} basin data") return None try: + # First, examine the structure + actual_columns = examine_ibtracs_structure(local_path) + if not actual_columns: + logging.error("Could not examine IBTrACS file structure") + return None + + # Read IBTrACS CSV - DON'T skip any rows for v04r01 + # The first row contains proper column headers logging.info(f"Reading IBTrACS CSV file: {local_path}") - # Read with low_memory=False to ensure proper data types - df = pd.read_csv(local_path, low_memory=False) + df = pd.read_csv(local_path, low_memory=False) # Don't skip any rows - logging.info(f"Original data shape: {df.shape}") - logging.info(f"Available columns: {list(df.columns)}") + logging.info(f"Original columns: {list(df.columns)}") + logging.info(f"Data shape before cleaning: {df.shape}") - # Essential columns check - required_cols = ['SID', 'LAT', 'LON'] - missing_cols = [col for col in required_cols if col not in df.columns] - if missing_cols: - logging.error(f"Missing critical columns: {missing_cols}") + # Check which essential columns exist + required_cols = ['SID', 'ISO_TIME', 'LAT', 'LON'] + available_required = [col for col in required_cols if col in df.columns] + + if len(available_required) < 2: + logging.error(f"Missing critical columns. Available: {list(df.columns)}") return None - # FIXED: Data cleaning without dropping data unnecessarily - # Clean numeric columns carefully + # Clean and standardize the data with format specification + if 'ISO_TIME' in df.columns: + df['ISO_TIME'] = pd.to_datetime(df['ISO_TIME'], format='%Y-%m-%d %H:%M:%S', errors='coerce') + + # Clean numeric columns numeric_columns = ['LAT', 'LON', 'WMO_WIND', 'WMO_PRES', 'USA_WIND', 'USA_PRES'] for col in numeric_columns: if col in df.columns: df[col] = pd.to_numeric(df[col], errors='coerce') - # Time handling - if 'ISO_TIME' in df.columns: - df['ISO_TIME'] = pd.to_datetime(df['ISO_TIME'], errors='coerce') - - # FIXED: Only filter out clearly invalid coordinates - valid_coords = ( - df['LAT'].notna() & - df['LON'].notna() & - (df['LAT'].between(-90, 90)) & - (df['LON'].between(-180, 180)) - ) - df = df[valid_coords] + # Filter out invalid/missing critical data + valid_rows = df['LAT'].notna() & df['LON'].notna() + df = df[valid_rows] + + # Ensure LAT/LON are in reasonable ranges + df = df[(df['LAT'] >= -90) & (df['LAT'] <= 90)] + df = df[(df['LON'] >= -180) & (df['LON'] <= 180)] - # Add missing columns with defaults + # Add basin info if missing if 'BASIN' not in df.columns: - if 'SID' in df.columns: - df['BASIN'] = df['SID'].str[:2] - else: - df['BASIN'] = basin + df['BASIN'] = basin + # Add default columns if missing if 'NAME' not in df.columns: df['NAME'] = 'UNNAMED' if 'SEASON' not in df.columns and 'ISO_TIME' in df.columns: df['SEASON'] = df['ISO_TIME'].dt.year - elif 'SEASON' not in df.columns: - # Extract year from SID if possible - if 'SID' in df.columns: - try: - df['SEASON'] = df['SID'].str.extract(r'(\d{4})').astype(float) - except: - df['SEASON'] = 2000 # Default year logging.info(f"Successfully loaded {len(df)} records from {basin} basin") - logging.info(f"Final data shape: {df.shape}") return df except Exception as e: logging.error(f"Error reading IBTrACS CSV file: {e}") - import traceback - traceback.print_exc() return None -def load_all_ibtracs_data(): - """Load ALL available IBTrACS data - FIXED to never use fallback""" - all_data = [] +def load_ibtracs_data_fixed(): + """Fixed version of IBTrACS data loading""" + ibtracs_data = {} - # Try to load the ALL basin file first (contains all basins) - try: - logging.info("Attempting to load ALL basin data...") - all_basin_data = load_ibtracs_csv_directly('ALL') - if all_basin_data is not None and not all_basin_data.empty: - logging.info(f"Successfully loaded ALL basin data: {len(all_basin_data)} records") - return all_basin_data - except Exception as e: - logging.warning(f"Failed to load ALL basin data: {e}") + # Try to load each basin, but prioritize WP for this application + load_order = ['WP', 'EP', 'NA'] - # If ALL basin fails, load individual basins - basins_to_load = ['WP', 'EP', 'NA'] - for basin in basins_to_load: + for basin in load_order: try: logging.info(f"Loading {basin} basin data...") - basin_data = load_ibtracs_csv_directly(basin) - if basin_data is not None and not basin_data.empty: - basin_data['BASIN'] = basin - all_data.append(basin_data) - logging.info(f"Successfully loaded {basin} basin: {len(basin_data)} records") + df = load_ibtracs_csv_directly(basin) + + if df is not None and not df.empty: + ibtracs_data[basin] = df + logging.info(f"Successfully loaded {basin} basin with {len(df)} records") else: logging.warning(f"No data loaded for basin {basin}") + ibtracs_data[basin] = None + except Exception as e: logging.error(f"Failed to load basin {basin}: {e}") + ibtracs_data[basin] = None - if all_data: - combined_data = pd.concat(all_data, ignore_index=True) - logging.info(f"Combined all basins: {len(combined_data)} total records") - return combined_data - else: - logging.error("No IBTrACS data could be loaded from any basin") - return None + return ibtracs_data def load_data_fixed(oni_path, typhoon_path): - """FIXED data loading - loads all available typhoon data regardless of ONI""" + """Fixed version of load_data function""" + # Load ONI data + oni_data = pd.DataFrame({'Year': [], 'Jan': [], 'Feb': [], 'Mar': [], 'Apr': [], + 'May': [], 'Jun': [], 'Jul': [], 'Aug': [], 'Sep': [], + 'Oct': [], 'Nov': [], 'Dec': []}) - # Load ONI data (optional - typhoon analysis can work without it) - oni_data = None - if os.path.exists(oni_path): - try: - oni_data = pd.read_csv(oni_path) - logging.info(f"Successfully loaded ONI data with {len(oni_data)} years") - except Exception as e: - logging.error(f"Error loading ONI data: {e}") + if not os.path.exists(oni_path): + logging.warning(f"ONI data file not found: {oni_path}") + update_oni_data() - if oni_data is None: - logging.warning("ONI data not available - creating minimal ONI data") + try: + oni_data = pd.read_csv(oni_path) + logging.info(f"Successfully loaded ONI data with {len(oni_data)} years") + except Exception as e: + logging.error(f"Error loading ONI data: {e}") update_oni_data() try: oni_data = pd.read_csv(oni_path) except Exception as e: logging.error(f"Still can't load ONI data: {e}") - # Create minimal fallback - create_minimal_oni_data(oni_path) - oni_data = pd.read_csv(oni_path) - # FIXED: Load typhoon data - ALWAYS from IBTrACS, never use fallback + # Load typhoon data - NEW APPROACH typhoon_data = None - # Try to load from existing processed file first + # First, try to load from existing processed file if os.path.exists(typhoon_path): try: typhoon_data = pd.read_csv(typhoon_path, low_memory=False) - required_cols = ['LAT', 'LON', 'SID'] + # Ensure basic columns exist and are valid + required_cols = ['LAT', 'LON'] if all(col in typhoon_data.columns for col in required_cols): if 'ISO_TIME' in typhoon_data.columns: typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce') logging.info(f"Loaded processed typhoon data with {len(typhoon_data)} records") - # Validate the data quality - valid_records = typhoon_data['LAT'].notna() & typhoon_data['LON'].notna() - if valid_records.sum() / len(typhoon_data) > 0.8: # If >80% valid, use it - typhoon_data = typhoon_data[valid_records] - else: - logging.warning("Processed data quality poor, reloading from IBTrACS") - typhoon_data = None else: - logging.warning("Processed typhoon data missing required columns, reloading from IBTrACS") + logging.warning("Processed typhoon data missing required columns, will reload from IBTrACS") typhoon_data = None except Exception as e: logging.error(f"Error loading processed typhoon data: {e}") typhoon_data = None - # FIXED: Load from IBTrACS if needed - NO FALLBACK ALLOWED + # If no valid processed data, load from IBTrACS if typhoon_data is None or typhoon_data.empty: logging.info("Loading typhoon data from IBTrACS...") - typhoon_data = load_all_ibtracs_data() + ibtracs_data = load_ibtracs_data_fixed() - if typhoon_data is None or typhoon_data.empty: - raise Exception("CRITICAL ERROR: No typhoon data could be loaded from IBTrACS. Check internet connection and IBTrACS availability.") - - # Process and save the loaded data - # Ensure SID exists and is properly formatted - if 'SID' not in typhoon_data.columns: - logging.error("CRITICAL: No SID column in typhoon data") - raise Exception("Typhoon data missing SID column") + # Combine all available basin data, prioritizing WP + combined_dfs = [] + for basin in ['WP', 'EP', 'NA']: + if basin in ibtracs_data and ibtracs_data[basin] is not None: + df = ibtracs_data[basin].copy() + df['BASIN'] = basin + combined_dfs.append(df) - # Save the processed data for future use - try: - safe_file_write(typhoon_path, typhoon_data) - logging.info(f"Saved processed typhoon data: {len(typhoon_data)} records") - except Exception as e: - logging.warning(f"Could not save processed data: {e}") + if combined_dfs: + typhoon_data = pd.concat(combined_dfs, ignore_index=True) + # Ensure SID has proper format + if 'SID' not in typhoon_data.columns and 'BASIN' in typhoon_data.columns: + # Create SID from basin and other identifiers if missing + if 'SEASON' in typhoon_data.columns: + typhoon_data['SID'] = (typhoon_data['BASIN'].astype(str) + + typhoon_data.index.astype(str).str.zfill(2) + + typhoon_data['SEASON'].astype(str)) + else: + typhoon_data['SID'] = (typhoon_data['BASIN'].astype(str) + + typhoon_data.index.astype(str).str.zfill(2) + + '2000') + + # Save the processed data for future use + safe_file_write(typhoon_path, typhoon_data, get_fallback_data_dir()) + logging.info(f"Combined IBTrACS data: {len(typhoon_data)} total records") + else: + logging.error("Failed to load any IBTrACS basin data") + # Create minimal fallback data + typhoon_data = create_fallback_typhoon_data() - # FIXED: Final validation and enhancement - if typhoon_data is not None and not typhoon_data.empty: - # Ensure required columns exist with proper defaults + # Final validation of typhoon data + if typhoon_data is not None: + # Ensure required columns exist with fallback values required_columns = { - 'SID': lambda: f"UNKNOWN_{typhoon_data.index}", + 'SID': 'UNKNOWN', 'ISO_TIME': pd.Timestamp('2000-01-01'), - 'LAT': 20.0, - 'LON': 140.0, - 'USA_WIND': 30.0, - 'USA_PRES': 1013.0, + 'LAT': 0.0, + 'LON': 0.0, + 'USA_WIND': np.nan, + 'USA_PRES': np.nan, 'NAME': 'UNNAMED', - 'SEASON': 2000, - 'BASIN': 'WP' + 'SEASON': 2000 } for col, default_val in required_columns.items(): if col not in typhoon_data.columns: - if callable(default_val): - typhoon_data[col] = default_val() - else: - typhoon_data[col] = default_val - logging.warning(f"Added missing column {col}") - - # Ensure proper data types - numeric_cols = ['LAT', 'LON', 'USA_WIND', 'USA_PRES', 'SEASON'] - for col in numeric_cols: - if col in typhoon_data.columns: - typhoon_data[col] = pd.to_numeric(typhoon_data[col], errors='coerce') + typhoon_data[col] = default_val + logging.warning(f"Added missing column {col} with default value") + # Ensure data types if 'ISO_TIME' in typhoon_data.columns: typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce') + typhoon_data['LAT'] = pd.to_numeric(typhoon_data['LAT'], errors='coerce') + typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce') + typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce') + typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce') - # Remove only clearly invalid records - valid_mask = ( - typhoon_data['LAT'].notna() & - typhoon_data['LON'].notna() & - typhoon_data['LAT'].between(-90, 90) & - typhoon_data['LON'].between(-180, 180) - ) - - original_count = len(typhoon_data) - typhoon_data = typhoon_data[valid_mask] - logging.info(f"Final typhoon data: {len(typhoon_data)} records (removed {original_count - len(typhoon_data)} invalid)") + # Remove rows with invalid coordinates + typhoon_data = typhoon_data.dropna(subset=['LAT', 'LON']) - if len(typhoon_data) == 0: - raise Exception("CRITICAL ERROR: All typhoon data was filtered out - check data quality") - - else: - raise Exception("CRITICAL ERROR: No typhoon data available after all loading attempts") + logging.info(f"Final typhoon data: {len(typhoon_data)} records after validation") return oni_data, typhoon_data +def create_fallback_typhoon_data(): + """Create minimal fallback typhoon data - FIXED VERSION""" + # Use proper pandas date_range instead of numpy + dates = pd.date_range(start='2000-01-01', end='2025-12-31', freq='D') # Extended to 2025 + storm_dates = dates[np.random.choice(len(dates), size=100, replace=False)] + + data = [] + for i, date in enumerate(storm_dates): + # Create realistic WP storm tracks + base_lat = np.random.uniform(10, 30) + base_lon = np.random.uniform(130, 160) + + # Generate 20-50 data points per storm + track_length = np.random.randint(20, 51) + sid = f"WP{i+1:02d}{date.year}" + + for j in range(track_length): + lat = base_lat + j * 0.2 + np.random.normal(0, 0.1) + lon = base_lon + j * 0.3 + np.random.normal(0, 0.1) + wind = max(25, 70 + np.random.normal(0, 20)) + pres = max(950, 1000 - wind + np.random.normal(0, 5)) + + data.append({ + 'SID': sid, + 'ISO_TIME': date + pd.Timedelta(hours=j*6), # Use pd.Timedelta instead + 'NAME': f'FALLBACK_{i+1}', + 'SEASON': date.year, + 'LAT': lat, + 'LON': lon, + 'USA_WIND': wind, + 'USA_PRES': pres, + 'BASIN': 'WP' + }) + + df = pd.DataFrame(data) + logging.info(f"Created fallback typhoon data with {len(df)} records") + return df + def process_oni_data(oni_data): """Process ONI data into long format""" - if oni_data is None or oni_data.empty: - # Return minimal ONI data that won't break merging - return pd.DataFrame({ - 'Year': [2000], 'Month': ['01'], 'ONI': [0.0], - 'Date': [pd.Timestamp('2000-01-01')] - }) - oni_long = oni_data.melt(id_vars=['Year'], var_name='Month', value_name='ONI') month_map = {'Jan':'01','Feb':'02','Mar':'03','Apr':'04','May':'05','Jun':'06', 'Jul':'07','Aug':'08','Sep':'09','Oct':'10','Nov':'11','Dec':'12'} oni_long['Month'] = oni_long['Month'].map(month_map) oni_long['Date'] = pd.to_datetime(oni_long['Year'].astype(str)+'-'+oni_long['Month']+'-01') - oni_long['ONI'] = pd.to_numeric(oni_long['ONI'], errors='coerce').fillna(0) + oni_long['ONI'] = pd.to_numeric(oni_long['ONI'], errors='coerce') return oni_long def process_typhoon_data(typhoon_data): - """Process typhoon data - FIXED to preserve all data""" - if typhoon_data is None or typhoon_data.empty: - raise Exception("No typhoon data to process") - - # Ensure proper data types + """Process typhoon data""" if 'ISO_TIME' in typhoon_data.columns: typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce') + typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce') + typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce') + typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce') - numeric_cols = ['USA_WIND', 'USA_PRES', 'LON', 'LAT'] - for col in numeric_cols: - if col in typhoon_data.columns: - typhoon_data[col] = pd.to_numeric(typhoon_data[col], errors='coerce') + logging.info(f"Unique basins in typhoon_data: {typhoon_data['SID'].str[:2].unique()}") - logging.info(f"Processing {len(typhoon_data)} typhoon records") + typhoon_max = typhoon_data.groupby('SID').agg({ + 'USA_WIND':'max','USA_PRES':'min','ISO_TIME':'first','SEASON':'first','NAME':'first', + 'LAT':'first','LON':'first' + }).reset_index() - # Get maximum values per storm - agg_dict = {} - if 'USA_WIND' in typhoon_data.columns: - agg_dict['USA_WIND'] = 'max' - if 'USA_PRES' in typhoon_data.columns: - agg_dict['USA_PRES'] = 'min' - if 'ISO_TIME' in typhoon_data.columns: - agg_dict['ISO_TIME'] = 'first' - if 'SEASON' in typhoon_data.columns: - agg_dict['SEASON'] = 'first' - if 'NAME' in typhoon_data.columns: - agg_dict['NAME'] = 'first' - if 'LAT' in typhoon_data.columns: - agg_dict['LAT'] = 'first' - if 'LON' in typhoon_data.columns: - agg_dict['LON'] = 'first' - - typhoon_max = typhoon_data.groupby('SID').agg(agg_dict).reset_index() - - # Add time-based columns for merging if 'ISO_TIME' in typhoon_max.columns: typhoon_max['Month'] = typhoon_max['ISO_TIME'].dt.strftime('%m') typhoon_max['Year'] = typhoon_max['ISO_TIME'].dt.year else: - # Use SEASON if available, otherwise default - if 'SEASON' in typhoon_max.columns: - typhoon_max['Year'] = typhoon_max['SEASON'] - else: - typhoon_max['Year'] = 2000 - typhoon_max['Month'] = '01' # Default month + # Fallback if no ISO_TIME + typhoon_max['Month'] = '01' + typhoon_max['Year'] = typhoon_max['SEASON'] - # Add category - if 'USA_WIND' in typhoon_max.columns: - typhoon_max['Category'] = typhoon_max['USA_WIND'].apply(categorize_typhoon_enhanced) - else: - typhoon_max['Category'] = 'Unknown' - - logging.info(f"Processed {len(typhoon_max)} unique storms") + typhoon_max['Category'] = typhoon_max['USA_WIND'].apply(categorize_typhoon_enhanced) return typhoon_max def merge_data(oni_long, typhoon_max): - """Merge ONI and typhoon data - FIXED to preserve typhoon data even without ONI""" - if typhoon_max is None or typhoon_max.empty: - raise Exception("No typhoon data to merge") - - if oni_long is None or oni_long.empty: - # If no ONI data, add default ONI values - logging.warning("No ONI data available - using neutral values") - typhoon_max['ONI'] = 0.0 - return typhoon_max - - # Merge with ONI data - merged = pd.merge(typhoon_max, oni_long, on=['Year', 'Month'], how='left') - - # Fill missing ONI values with neutral - merged['ONI'] = merged['ONI'].fillna(0.0) - - logging.info(f"Merged data: {len(merged)} storms with ONI values") - return merged + """Merge ONI and typhoon data""" + return pd.merge(typhoon_max, oni_long, on=['Year','Month']) # ----------------------------- -# Enhanced Categorization Functions +# ENHANCED: Categorization Functions - FIXED TAIWAN CLASSIFICATION # ----------------------------- def categorize_typhoon_enhanced(wind_speed): @@ -663,49 +721,75 @@ def categorize_typhoon_enhanced(wind_speed): if pd.isna(wind_speed): return 'Unknown' + # Convert to knots if in m/s (some datasets use m/s) if wind_speed < 10: # Likely in m/s, convert to knots wind_speed = wind_speed * 1.94384 - if wind_speed < 34: + # FIXED thresholds to include TD + if wind_speed < 34: # Below 34 knots = Tropical Depression return 'Tropical Depression' - elif wind_speed < 64: + elif wind_speed < 64: # 34-63 knots = Tropical Storm return 'Tropical Storm' - elif wind_speed < 83: + elif wind_speed < 83: # 64-82 knots = Category 1 Typhoon return 'C1 Typhoon' - elif wind_speed < 96: + elif wind_speed < 96: # 83-95 knots = Category 2 Typhoon return 'C2 Typhoon' - elif wind_speed < 113: + elif wind_speed < 113: # 96-112 knots = Category 3 Strong Typhoon return 'C3 Strong Typhoon' - elif wind_speed < 137: + elif wind_speed < 137: # 113-136 knots = Category 4 Very Strong Typhoon return 'C4 Very Strong Typhoon' - else: + else: # 137+ knots = Category 5 Super Typhoon return 'C5 Super Typhoon' def categorize_typhoon_taiwan_fixed(wind_speed): - """FIXED Taiwan categorization system based on CMA 2006 standards""" + """ + FIXED Taiwan categorization system based on CMA 2006 standards + Reference: CMA Tropical Cyclone Data Center official classification + """ if pd.isna(wind_speed): return 'Tropical Depression' + # Convert from knots to m/s if input appears to be in knots if wind_speed > 50: # Likely in knots, convert to m/s wind_speed_ms = wind_speed * 0.514444 else: wind_speed_ms = wind_speed + # CMA 2006 Classification Standards (used by Taiwan CWA) if wind_speed_ms >= 51.0: - return 'Super Typhoon' + return 'Super Typhoon' # ≥51.0 m/s (≥99.2 kt) elif wind_speed_ms >= 41.5: - return 'Severe Typhoon' + return 'Severe Typhoon' # 41.5–50.9 m/s (80.7–99.1 kt) elif wind_speed_ms >= 32.7: - return 'Typhoon' + return 'Typhoon' # 32.7–41.4 m/s (63.6–80.6 kt) elif wind_speed_ms >= 24.5: - return 'Severe Tropical Storm' + return 'Severe Tropical Storm' # 24.5–32.6 m/s (47.6–63.5 kt) elif wind_speed_ms >= 17.2: - return 'Tropical Storm' + return 'Tropical Storm' # 17.2–24.4 m/s (33.4–47.5 kt) else: - return 'Tropical Depression' + return 'Tropical Depression' # < 17.2 m/s (< 33.4 kt) + +# Original function for backward compatibility +def categorize_typhoon(wind_speed): + """Original categorize typhoon function for backward compatibility""" + return categorize_typhoon_enhanced(wind_speed) + +def classify_enso_phases(oni_value): + """Classify ENSO phases based on ONI value""" + if isinstance(oni_value, pd.Series): + oni_value = oni_value.iloc[0] + if pd.isna(oni_value): + return 'Neutral' + if oni_value >= 0.5: + return 'El Nino' + elif oni_value <= -0.5: + return 'La Nina' + else: + return 'Neutral' +# FIXED: Combined categorization function def categorize_typhoon_by_standard_fixed(wind_speed, standard='atlantic'): - """FIXED categorization function supporting both standards""" + """FIXED categorization function supporting both standards with correct Taiwan thresholds""" if pd.isna(wind_speed): return 'Tropical Depression', '#808080' @@ -713,7 +797,9 @@ def categorize_typhoon_by_standard_fixed(wind_speed, standard='atlantic'): category = categorize_typhoon_taiwan_fixed(wind_speed) color = taiwan_color_map_fixed.get(category, '#808080') return category, color + else: + # Atlantic/International standard (unchanged) if wind_speed >= 137: return 'C5 Super Typhoon', '#FF0000' elif wind_speed >= 113: @@ -729,21 +815,8 @@ def categorize_typhoon_by_standard_fixed(wind_speed, standard='atlantic'): else: return 'Tropical Depression', '#808080' -def classify_enso_phases(oni_value): - """Classify ENSO phases based on ONI value""" - if isinstance(oni_value, pd.Series): - oni_value = oni_value.iloc[0] - if pd.isna(oni_value): - return 'Neutral' - if oni_value >= 0.5: - return 'El Nino' - elif oni_value <= -0.5: - return 'La Nina' - else: - return 'Neutral' - # ----------------------------- -# FIXED: Advanced ML Features +# FIXED: ADVANCED ML FEATURES WITH ROBUST ERROR HANDLING # ----------------------------- def extract_storm_features(typhoon_data): @@ -753,6 +826,7 @@ def extract_storm_features(typhoon_data): logging.error("No typhoon data provided for feature extraction") return None + # Basic features - ensure columns exist basic_features = [] for sid in typhoon_data['SID'].unique(): storm_data = typhoon_data[typhoon_data['SID'] == sid].copy() @@ -760,6 +834,7 @@ def extract_storm_features(typhoon_data): if len(storm_data) == 0: continue + # Initialize feature dict with safe defaults features = {'SID': sid} # Wind statistics @@ -809,13 +884,16 @@ def extract_storm_features(typhoon_data): features['LON_max'] = lon_values.max() features['LON_min'] = lon_values.min() + # Genesis location (first valid position) features['genesis_lat'] = lat_values.iloc[0] features['genesis_lon'] = lon_values.iloc[0] - features['genesis_intensity'] = features['USA_WIND_mean'] + features['genesis_intensity'] = features['USA_WIND_mean'] # Use mean as fallback + # Track characteristics features['lat_range'] = lat_values.max() - lat_values.min() features['lon_range'] = lon_values.max() - lon_values.min() + # Calculate track distance if len(lat_values) > 1: distances = [] for i in range(1, len(lat_values)): @@ -828,6 +906,7 @@ def extract_storm_features(typhoon_data): features['total_distance'] = 0 features['avg_speed'] = 0 + # Track curvature if len(lat_values) > 2: bearing_changes = [] for i in range(1, len(lat_values)-1): @@ -845,6 +924,7 @@ def extract_storm_features(typhoon_data): else: features['avg_curvature'] = 0 else: + # Default location values features.update({ 'LAT_mean': 20, 'LAT_std': 0, 'LAT_max': 20, 'LAT_min': 20, 'LON_mean': 140, 'LON_std': 0, 'LON_max': 140, 'LON_min': 140, @@ -852,14 +932,26 @@ def extract_storm_features(typhoon_data): 'lat_range': 0, 'lon_range': 0, 'total_distance': 0, 'avg_speed': 0, 'avg_curvature': 0 }) + else: + # Default location values if columns missing + features.update({ + 'LAT_mean': 20, 'LAT_std': 0, 'LAT_max': 20, 'LAT_min': 20, + 'LON_mean': 140, 'LON_std': 0, 'LON_max': 140, 'LON_min': 140, + 'genesis_lat': 20, 'genesis_lon': 140, 'genesis_intensity': 30, + 'lat_range': 0, 'lon_range': 0, 'total_distance': 0, + 'avg_speed': 0, 'avg_curvature': 0 + }) + # Track length features['track_length'] = len(storm_data) + # Add seasonal information if 'SEASON' in storm_data.columns: features['season'] = storm_data['SEASON'].iloc[0] else: features['season'] = 2000 + # Add basin information if 'BASIN' in storm_data.columns: features['basin'] = storm_data['BASIN'].iloc[0] elif 'SID' in storm_data.columns: @@ -873,13 +965,17 @@ def extract_storm_features(typhoon_data): logging.error("No valid storm features could be extracted") return None + # Convert to DataFrame storm_features = pd.DataFrame(basic_features) + # Ensure all numeric columns are properly typed numeric_columns = [col for col in storm_features.columns if col not in ['SID', 'basin']] for col in numeric_columns: storm_features[col] = pd.to_numeric(storm_features[col], errors='coerce').fillna(0) logging.info(f"Successfully extracted features for {len(storm_features)} storms") + logging.info(f"Feature columns: {list(storm_features.columns)}") + return storm_features except Exception as e: @@ -889,30 +985,38 @@ def extract_storm_features(typhoon_data): return None def perform_dimensionality_reduction(storm_features, method='umap', n_components=2): - """Perform UMAP or t-SNE dimensionality reduction""" + """Perform UMAP or t-SNE dimensionality reduction - FIXED VERSION""" try: if storm_features is None or storm_features.empty: raise ValueError("No storm features provided") + # Select numeric features for clustering - FIXED feature_cols = [] for col in storm_features.columns: if col not in ['SID', 'basin'] and storm_features[col].dtype in ['float64', 'int64']: + # Check if column has valid data valid_data = storm_features[col].dropna() - if len(valid_data) > 0 and valid_data.std() > 0: + if len(valid_data) > 0 and valid_data.std() > 0: # Only include columns with variance feature_cols.append(col) if len(feature_cols) == 0: raise ValueError("No valid numeric features found for clustering") + logging.info(f"Using {len(feature_cols)} features for clustering: {feature_cols}") + X = storm_features[feature_cols].fillna(0) + # Check if we have enough samples if len(X) < 2: raise ValueError("Need at least 2 storms for clustering") + # Standardize features scaler = StandardScaler() X_scaled = scaler.fit_transform(X) + # Perform dimensionality reduction if method.lower() == 'umap' and UMAP_AVAILABLE and len(X_scaled) >= 4: + # UMAP parameters optimized for typhoon data - fixed warnings n_neighbors = min(15, len(X_scaled) - 1) reducer = umap.UMAP( n_components=n_components, @@ -920,11 +1024,12 @@ def perform_dimensionality_reduction(storm_features, method='umap', n_components min_dist=0.1, metric='euclidean', random_state=42, - n_jobs=1 + n_jobs=1 # Explicitly set to avoid warning ) elif method.lower() == 'tsne' and len(X_scaled) >= 4: + # t-SNE parameters perplexity = min(30, len(X_scaled) // 4) - perplexity = max(1, perplexity) + perplexity = max(1, perplexity) # Ensure perplexity is at least 1 reducer = TSNE( n_components=n_components, perplexity=perplexity, @@ -933,11 +1038,14 @@ def perform_dimensionality_reduction(storm_features, method='umap', n_components random_state=42 ) else: + # Fallback to PCA reducer = PCA(n_components=n_components, random_state=42) + # Fit and transform embedding = reducer.fit_transform(X_scaled) logging.info(f"Dimensionality reduction successful: {X_scaled.shape} -> {embedding.shape}") + return embedding, feature_cols, scaler except Exception as e: @@ -945,15 +1053,17 @@ def perform_dimensionality_reduction(storm_features, method='umap', n_components raise def cluster_storms_data(embedding, method='dbscan', eps=0.5, min_samples=3): - """Cluster storms based on their embedding""" + """Cluster storms based on their embedding - FIXED NAME VERSION""" try: if len(embedding) < 2: - return np.array([0] * len(embedding)) + return np.array([0] * len(embedding)) # Single cluster for insufficient data if method.lower() == 'dbscan': + # Adjust min_samples based on data size min_samples = min(min_samples, max(2, len(embedding) // 5)) clusterer = DBSCAN(eps=eps, min_samples=min_samples) elif method.lower() == 'kmeans': + # Adjust n_clusters based on data size n_clusters = min(5, max(2, len(embedding) // 3)) clusterer = KMeans(n_clusters=n_clusters, random_state=42) else: @@ -962,15 +1072,18 @@ def cluster_storms_data(embedding, method='dbscan', eps=0.5, min_samples=3): clusters = clusterer.fit_predict(embedding) logging.info(f"Clustering complete: {len(np.unique(clusters))} clusters found") + return clusters except Exception as e: logging.error(f"Error in cluster_storms_data: {e}") + # Return single cluster as fallback return np.array([0] * len(embedding)) def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'): - """Create separate plots for clustering analysis""" + """Create separate plots for clustering analysis - ENHANCED CLARITY VERSION""" try: + # Validate inputs if storm_features is None or storm_features.empty: raise ValueError("No storm features available for clustering") @@ -979,17 +1092,23 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' logging.info(f"Starting clustering visualization with {len(storm_features)} storms") + # Perform dimensionality reduction embedding, feature_cols, scaler = perform_dimensionality_reduction(storm_features, method) + + # Perform clustering cluster_labels = cluster_storms_data(embedding, 'dbscan') + # Add clustering results to storm features storm_features_viz = storm_features.copy() storm_features_viz['cluster'] = cluster_labels storm_features_viz['dim1'] = embedding[:, 0] storm_features_viz['dim2'] = embedding[:, 1] + # Merge with typhoon data for additional info - SAFE MERGE try: storm_info = typhoon_data.groupby('SID').first()[['NAME', 'SEASON']].reset_index() storm_features_viz = storm_features_viz.merge(storm_info, on='SID', how='left') + # Fill missing values storm_features_viz['NAME'] = storm_features_viz['NAME'].fillna('UNNAMED') storm_features_viz['SEASON'] = storm_features_viz['SEASON'].fillna(2000) except Exception as merge_error: @@ -997,12 +1116,14 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' storm_features_viz['NAME'] = 'UNNAMED' storm_features_viz['SEASON'] = 2000 + # Get unique clusters and assign distinct colors unique_clusters = sorted([c for c in storm_features_viz['cluster'].unique() if c != -1]) noise_count = len(storm_features_viz[storm_features_viz['cluster'] == -1]) - # 1. Clustering scatter plot + # 1. Enhanced clustering scatter plot with clear cluster identification fig_cluster = go.Figure() + # Add noise points first if noise_count > 0: noise_data = storm_features_viz[storm_features_viz['cluster'] == -1] fig_cluster.add_trace( @@ -1027,6 +1148,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' ) ) + # Add clusters with distinct colors and shapes cluster_symbols = ['circle', 'square', 'diamond', 'triangle-up', 'triangle-down', 'pentagon', 'hexagon', 'star', 'cross', 'circle-open'] @@ -1067,15 +1189,17 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' showlegend=True ) - # 2. Route map + # 2. ENHANCED route map with cluster legends and clearer representation fig_routes = go.Figure() + # Create a comprehensive legend showing cluster characteristics cluster_info_text = [] for i, cluster in enumerate(unique_clusters): cluster_storm_ids = storm_features_viz[storm_features_viz['cluster'] == cluster]['SID'].tolist() color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)] + # Get cluster statistics for legend cluster_data = storm_features_viz[storm_features_viz['cluster'] == cluster] avg_intensity = cluster_data['USA_WIND_max'].mean() if 'USA_WIND_max' in cluster_data.columns else 0 avg_pressure = cluster_data['USA_PRES_min'].mean() if 'USA_PRES_min' in cluster_data.columns else 1000 @@ -1085,11 +1209,13 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' f"Avg: {avg_intensity:.0f}kt/{avg_pressure:.0f}hPa" ) + # Add multiple storms per cluster with clear identification storms_added = 0 - for j, sid in enumerate(cluster_storm_ids[:8]): + for j, sid in enumerate(cluster_storm_ids[:8]): # Show up to 8 storms per cluster try: storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME') if len(storm_track) > 1: + # Ensure valid coordinates valid_coords = storm_track['LAT'].notna() & storm_track['LON'].notna() storm_track = storm_track[valid_coords] @@ -1097,9 +1223,10 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED' storm_season = storm_track['SEASON'].iloc[0] if 'SEASON' in storm_track.columns else 'Unknown' + # Vary line style for different storms in same cluster line_styles = ['solid', 'dash', 'dot', 'dashdot'] line_style = line_styles[j % len(line_styles)] - line_width = 3 if j == 0 else 2 + line_width = 3 if j == 0 else 2 # First storm thicker fig_routes.add_trace( go.Scattergeo( @@ -1126,7 +1253,9 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' logging.warning(f"Error adding track for storm {sid}: {track_error}") continue + # Add cluster centroid marker if len(cluster_storm_ids) > 0: + # Calculate average genesis location for cluster cluster_storm_data = storm_features_viz[storm_features_viz['cluster'] == cluster] if 'genesis_lat' in cluster_storm_data.columns and 'genesis_lon' in cluster_storm_data.columns: avg_lat = cluster_storm_data['genesis_lat'].mean() @@ -1156,6 +1285,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' ) ) + # Update route map layout with enhanced information and LARGER SIZE fig_routes.update_layout( title=f"Storm Routes by {method.upper()} Clusters
Different line styles = different storms in same cluster | Stars = cluster centers", geo=dict( @@ -1167,13 +1297,14 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' showcoastlines=True, coastlinecolor="Gray", center=dict(lat=20, lon=140), - projection_scale=2.5 + projection_scale=2.5 # Larger map ), - height=800, - width=1200, + height=800, # Much larger height + width=1200, # Wider map showlegend=True ) + # Add cluster info annotation cluster_summary = "
".join(cluster_info_text) fig_routes.add_annotation( text=f"Cluster Summary:
{cluster_summary}", @@ -1186,7 +1317,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' borderwidth=1 ) - # 3. Pressure evolution plot + # 3. Enhanced pressure evolution plot with cluster identification fig_pressure = go.Figure() for i, cluster in enumerate(unique_clusters): @@ -1194,13 +1325,16 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)] cluster_pressures = [] - for j, sid in enumerate(cluster_storm_ids[:5]): + for j, sid in enumerate(cluster_storm_ids[:5]): # Limit to 5 storms per cluster try: storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME') if len(storm_track) > 1 and 'USA_PRES' in storm_track.columns: pressure_values = pd.to_numeric(storm_track['USA_PRES'], errors='coerce').dropna() if len(pressure_values) > 0: storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED' + time_hours = range(len(pressure_values)) + + # Normalize time to show relative progression normalized_time = np.linspace(0, 100, len(pressure_values)) fig_pressure.add_trace( @@ -1225,6 +1359,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' except Exception as e: continue + # Add cluster average line if cluster_pressures: avg_pressure = np.mean(cluster_pressures) fig_pressure.add_hline( @@ -1242,7 +1377,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' height=500 ) - # 4. Wind evolution plot + # 4. Enhanced wind evolution plot fig_wind = go.Figure() for i, cluster in enumerate(unique_clusters): @@ -1250,13 +1385,15 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)] cluster_winds = [] - for j, sid in enumerate(cluster_storm_ids[:5]): + for j, sid in enumerate(cluster_storm_ids[:5]): # Limit to 5 storms per cluster try: storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME') if len(storm_track) > 1 and 'USA_WIND' in storm_track.columns: wind_values = pd.to_numeric(storm_track['USA_WIND'], errors='coerce').dropna() if len(wind_values) > 0: storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED' + + # Normalize time to show relative progression normalized_time = np.linspace(0, 100, len(wind_values)) fig_wind.add_trace( @@ -1281,6 +1418,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' except Exception as e: continue + # Add cluster average line if cluster_winds: avg_wind = np.mean(cluster_winds) fig_wind.add_hline( @@ -1298,7 +1436,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' height=500 ) - # Generate statistics + # Generate enhanced cluster statistics with clear explanations try: stats_text = f"ENHANCED {method.upper()} CLUSTER ANALYSIS RESULTS\n" + "="*60 + "\n\n" stats_text += f"🔍 DIMENSIONALITY REDUCTION: {method.upper()}\n" @@ -1322,6 +1460,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' stats_text += f"🎯 CLUSTER {cluster}: {storm_count} storms\n" stats_text += f" 🎨 Color: {CLUSTER_COLORS[cluster % len(CLUSTER_COLORS)]}\n" + # Add detailed statistics if available if 'USA_WIND_max' in cluster_data.columns: wind_mean = cluster_data['USA_WIND_max'].mean() wind_std = cluster_data['USA_WIND_max'].std() @@ -1341,6 +1480,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' lon_mean = cluster_data['genesis_lon'].mean() stats_text += f" 🎯 Genesis Region: {lat_mean:.1f}°N, {lon_mean:.1f}°E\n" + # Add interpretation if wind_mean < 50: stats_text += " 💡 Pattern: Weaker storm group\n" elif wind_mean > 100: @@ -1350,6 +1490,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' stats_text += "\n" + # Add explanation of the analysis stats_text += "📖 INTERPRETATION GUIDE:\n" stats_text += f"• {method.upper()} reduces storm characteristics to 2D for visualization\n" stats_text += "• DBSCAN finds natural groupings without preset number of clusters\n" @@ -1382,9 +1523,113 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' return error_fig, error_fig, error_fig, error_fig, f"Error in clustering: {str(e)}" # ----------------------------- -# FIXED: Prediction System +# ENHANCED: Advanced Prediction System with Route Forecasting # ----------------------------- +def create_advanced_prediction_model(typhoon_data): + """Create advanced ML model for intensity and route prediction""" + try: + if typhoon_data is None or typhoon_data.empty: + return None, "No data available for model training" + + # Prepare training data + features = [] + targets = [] + + for sid in typhoon_data['SID'].unique(): + storm_data = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME') + + if len(storm_data) < 3: # Need at least 3 points for prediction + continue + + for i in range(len(storm_data) - 1): + current = storm_data.iloc[i] + next_point = storm_data.iloc[i + 1] + + # Extract features (current state) + feature_row = [] + + # Current position + feature_row.extend([ + current.get('LAT', 20), + current.get('LON', 140) + ]) + + # Current intensity + feature_row.extend([ + current.get('USA_WIND', 30), + current.get('USA_PRES', 1000) + ]) + + # Time features + if 'ISO_TIME' in current and pd.notna(current['ISO_TIME']): + month = current['ISO_TIME'].month + day_of_year = current['ISO_TIME'].dayofyear + else: + month = 9 # Peak season default + day_of_year = 250 + + feature_row.extend([month, day_of_year]) + + # Motion features (if previous point exists) + if i > 0: + prev = storm_data.iloc[i - 1] + dlat = current.get('LAT', 20) - prev.get('LAT', 20) + dlon = current.get('LON', 140) - prev.get('LON', 140) + speed = np.sqrt(dlat**2 + dlon**2) + bearing = np.arctan2(dlat, dlon) + else: + speed = 0 + bearing = 0 + + feature_row.extend([speed, bearing]) + + features.append(feature_row) + + # Target: next position and intensity + targets.append([ + next_point.get('LAT', 20), + next_point.get('LON', 140), + next_point.get('USA_WIND', 30) + ]) + + if len(features) < 10: # Need sufficient training data + return None, "Insufficient data for model training" + + # Train model + X = np.array(features) + y = np.array(targets) + + # Split data + X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) + + # Create separate models for position and intensity + models = {} + + # Position model (lat, lon) + pos_model = RandomForestRegressor(n_estimators=100, random_state=42) + pos_model.fit(X_train, y_train[:, :2]) + models['position'] = pos_model + + # Intensity model (wind speed) + int_model = RandomForestRegressor(n_estimators=100, random_state=42) + int_model.fit(X_train, y_train[:, 2]) + models['intensity'] = int_model + + # Calculate model performance + pos_pred = pos_model.predict(X_test) + int_pred = int_model.predict(X_test) + + pos_mae = mean_absolute_error(y_test[:, :2], pos_pred) + int_mae = mean_absolute_error(y_test[:, 2], int_pred) + + model_info = f"Position MAE: {pos_mae:.2f}°, Intensity MAE: {int_mae:.2f} kt" + + return models, model_info + + except Exception as e: + return None, f"Error creating prediction model: {str(e)}" + def get_realistic_genesis_locations(): """Get realistic typhoon genesis regions based on climatology""" return { @@ -1406,7 +1651,7 @@ def predict_storm_route_and_intensity_realistic(genesis_region, month, oni_value genesis_locations = get_realistic_genesis_locations() if genesis_region not in genesis_locations: - genesis_region = "Western Pacific Main Development Region" + genesis_region = "Western Pacific Main Development Region" # Default genesis_info = genesis_locations[genesis_region] lat = genesis_info["lat"] @@ -1420,27 +1665,29 @@ def predict_storm_route_and_intensity_realistic(genesis_region, month, oni_value 'genesis_info': genesis_info } - # Realistic starting intensity - base_intensity = 30 + # REALISTIC starting intensity - Tropical Depression level + base_intensity = 30 # Start at TD level (25-35 kt) - # Environmental factors - if oni_value > 1.0: + # Environmental factors for genesis + if oni_value > 1.0: # Strong El Niño - suppressed development intensity_modifier = -6 - elif oni_value > 0.5: + elif oni_value > 0.5: # Moderate El Niño intensity_modifier = -3 - elif oni_value < -1.0: + elif oni_value < -1.0: # Strong La Niña - enhanced development intensity_modifier = +8 - elif oni_value < -0.5: + elif oni_value < -0.5: # Moderate La Niña intensity_modifier = +5 - else: + else: # Neutral intensity_modifier = oni_value * 2 + # Seasonal genesis effects seasonal_factors = { 1: -8, 2: -6, 3: -4, 4: -2, 5: 2, 6: 6, 7: 10, 8: 12, 9: 15, 10: 10, 11: 4, 12: -5 } seasonal_modifier = seasonal_factors.get(month, 0) + # Genesis region favorability region_factors = { "Western Pacific Main Development Region": 8, "South China Sea": 4, @@ -1455,137 +1702,160 @@ def predict_storm_route_and_intensity_realistic(genesis_region, month, oni_value } region_modifier = region_factors.get(genesis_region, 0) + # Calculate realistic starting intensity (TD level) predicted_intensity = base_intensity + intensity_modifier + seasonal_modifier + region_modifier - predicted_intensity = max(25, min(40, predicted_intensity)) + predicted_intensity = max(25, min(40, predicted_intensity)) # Keep in TD-weak TS range + # Add realistic uncertainty for genesis intensity_uncertainty = np.random.normal(0, 2) predicted_intensity += intensity_uncertainty - predicted_intensity = max(25, min(38, predicted_intensity)) + predicted_intensity = max(25, min(38, predicted_intensity)) # TD range results['current_prediction'] = { 'intensity_kt': predicted_intensity, - 'pressure_hpa': 1008 - (predicted_intensity - 25) * 0.6, + 'pressure_hpa': 1008 - (predicted_intensity - 25) * 0.6, # Realistic TD pressure 'category': categorize_typhoon_enhanced(predicted_intensity), 'genesis_region': genesis_region } - # Route prediction + # REALISTIC route prediction with proper typhoon speeds current_lat = lat current_lon = lon current_intensity = predicted_intensity route_points = [] + # Track storm development over time with REALISTIC SPEEDS for hour in range(0, forecast_hours + 6, 6): - # Realistic motion - if current_lat < 20: - base_speed = 0.12 - elif current_lat < 30: - base_speed = 0.18 - else: - base_speed = 0.25 + # REALISTIC typhoon motion - much faster speeds + # Typical typhoon forward speed: 15-25 km/h (0.14-0.23°/hour) + + # Base forward speed depends on latitude and storm intensity + if current_lat < 20: # Low latitude - slower + base_speed = 0.12 # ~13 km/h + elif current_lat < 30: # Mid latitude - moderate + base_speed = 0.18 # ~20 km/h + else: # High latitude - faster + base_speed = 0.25 # ~28 km/h + + # Intensity affects speed (stronger storms can move faster) intensity_speed_factor = 1.0 + (current_intensity - 50) / 200 base_speed *= max(0.8, min(1.4, intensity_speed_factor)) + # Beta drift (Coriolis effect) - realistic values beta_drift_lat = 0.02 * np.sin(np.radians(current_lat)) beta_drift_lon = -0.05 * np.cos(np.radians(current_lat)) - if month in [6, 7, 8, 9]: + # Seasonal steering patterns with realistic speeds + if month in [6, 7, 8, 9]: # Peak season ridge_strength = 1.2 ridge_position = 32 + 4 * np.sin(2 * np.pi * (month - 6) / 4) - else: + else: # Off season ridge_strength = 0.9 ridge_position = 28 - if current_lat < ridge_position - 10: - lat_tendency = base_speed * 0.3 + beta_drift_lat - lon_tendency = -base_speed * 0.9 + beta_drift_lon - elif current_lat > ridge_position - 3: - lat_tendency = base_speed * 0.8 + beta_drift_lat - lon_tendency = base_speed * 0.4 + beta_drift_lon - else: - lat_tendency = base_speed * 0.4 + beta_drift_lat - lon_tendency = -base_speed * 0.7 + beta_drift_lon + # REALISTIC motion based on position relative to subtropical ridge + if current_lat < ridge_position - 10: # Well south of ridge - westward movement + lat_tendency = base_speed * 0.3 + beta_drift_lat # Slight poleward + lon_tendency = -base_speed * 0.9 + beta_drift_lon # Strong westward + elif current_lat > ridge_position - 3: # Near ridge - recurvature + lat_tendency = base_speed * 0.8 + beta_drift_lat # Strong poleward + lon_tendency = base_speed * 0.4 + beta_drift_lon # Eastward + else: # In between - normal WNW motion + lat_tendency = base_speed * 0.4 + beta_drift_lat # Moderate poleward + lon_tendency = -base_speed * 0.7 + beta_drift_lon # Moderate westward - if oni_value > 0.5: + # ENSO steering modulation (realistic effects) + if oni_value > 0.5: # El Niño - more eastward/poleward motion lon_tendency += 0.05 lat_tendency += 0.02 - elif oni_value < -0.5: + elif oni_value < -0.5: # La Niña - more westward motion lon_tendency -= 0.08 lat_tendency -= 0.01 + # Add motion uncertainty that grows with time (realistic error growth) motion_uncertainty = 0.02 + (hour / 120) * 0.04 lat_noise = np.random.normal(0, motion_uncertainty) lon_noise = np.random.normal(0, motion_uncertainty) + # Update position with realistic speeds current_lat += lat_tendency + lat_noise current_lon += lon_tendency + lon_noise - # Intensity evolution + # REALISTIC intensity evolution with proper development cycles + + # Development phase (first 48-72 hours) - realistic intensification if hour <= 48: - if current_intensity < 50: - if 10 <= current_lat <= 25 and 115 <= current_lon <= 165: + if current_intensity < 50: # Still weak - rapid development possible + if 10 <= current_lat <= 25 and 115 <= current_lon <= 165: # Favorable environment intensity_tendency = 4.5 if current_intensity < 35 else 3.0 - elif 120 <= current_lon <= 155 and 15 <= current_lat <= 20: + elif 120 <= current_lon <= 155 and 15 <= current_lat <= 20: # Best environment intensity_tendency = 6.0 if current_intensity < 40 else 4.0 else: intensity_tendency = 2.0 - elif current_intensity < 80: + elif current_intensity < 80: # Moderate intensity intensity_tendency = 2.5 if (120 <= current_lon <= 155 and 10 <= current_lat <= 25) else 1.0 - else: + else: # Already strong intensity_tendency = 1.0 + # Mature phase (48-120 hours) - peak intensity maintenance elif hour <= 120: - if current_lat < 25 and current_lon > 120: + if current_lat < 25 and current_lon > 120: # Still in favorable waters if current_intensity < 120: intensity_tendency = 1.5 else: - intensity_tendency = 0.0 + intensity_tendency = 0.0 # Maintain intensity else: intensity_tendency = -1.5 + # Extended phase (120+ hours) - gradual weakening else: if current_lat < 30 and current_lon > 115: - intensity_tendency = -2.0 + intensity_tendency = -2.0 # Slow weakening else: - intensity_tendency = -3.5 + intensity_tendency = -3.5 # Faster weakening - # Environmental modulation - if current_lat > 35: + # Environmental modulation (realistic effects) + if current_lat > 35: # High latitude - rapid weakening intensity_tendency -= 12 - elif current_lat > 30: + elif current_lat > 30: # Moderate latitude intensity_tendency -= 5 - elif current_lon < 110: + elif current_lon < 110: # Land interaction intensity_tendency -= 15 - elif 125 <= current_lon <= 155 and 10 <= current_lat <= 25: + elif 125 <= current_lon <= 155 and 10 <= current_lat <= 25: # Warm pool intensity_tendency += 2 - elif 160 <= current_lon <= 180 and 15 <= current_lat <= 30: + elif 160 <= current_lon <= 180 and 15 <= current_lat <= 30: # Still warm intensity_tendency += 1 - if current_lat < 8: + # SST effects (realistic temperature impact) + if current_lat < 8: # Very warm but weak Coriolis intensity_tendency += 0.5 - elif 8 <= current_lat <= 20: + elif 8 <= current_lat <= 20: # Sweet spot for development intensity_tendency += 2.0 - elif 20 < current_lat <= 30: + elif 20 < current_lat <= 30: # Marginal intensity_tendency -= 1.0 - elif current_lat > 30: + elif current_lat > 30: # Cool waters intensity_tendency -= 4.0 - if month in [12, 1, 2, 3]: + # Shear effects (simplified but realistic) + if month in [12, 1, 2, 3]: # High shear season intensity_tendency -= 2.0 - elif month in [7, 8, 9]: + elif month in [7, 8, 9]: # Low shear season intensity_tendency += 1.0 - intensity_noise = np.random.normal(0, 1.5) + # Update intensity with realistic bounds and variability + intensity_noise = np.random.normal(0, 1.5) # Small random fluctuations current_intensity += intensity_tendency + intensity_noise - current_intensity = max(20, min(185, current_intensity)) + current_intensity = max(20, min(185, current_intensity)) # Realistic range + # Calculate confidence based on forecast time and environment base_confidence = 0.92 time_penalty = (hour / 120) * 0.45 environment_penalty = 0.15 if current_lat > 30 or current_lon < 115 else 0 confidence = max(0.25, base_confidence - time_penalty - environment_penalty) + # Determine development stage if hour <= 24: stage = 'Genesis' elif hour <= 72: @@ -1605,12 +1875,13 @@ def predict_storm_route_and_intensity_realistic(genesis_region, month, oni_value 'category': categorize_typhoon_enhanced(current_intensity), 'confidence': confidence, 'development_stage': stage, - 'forward_speed_kmh': base_speed * 111, + 'forward_speed_kmh': base_speed * 111, # Convert to km/h 'pressure_hpa': max(900, 1013 - (current_intensity - 25) * 0.9) }) results['route_forecast'] = route_points + # Realistic confidence scores results['confidence_scores'] = { 'genesis': 0.88, 'early_development': 0.82, @@ -1623,6 +1894,7 @@ def predict_storm_route_and_intensity_realistic(genesis_region, month, oni_value 'long_term': max(0.3, 0.8 - (forecast_hours / 240) * 0.5) } + # Model information results['model_info'] = f"Enhanced Realistic Model - {genesis_region}" return results @@ -1645,6 +1917,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru route_data = prediction_results['route_forecast'] + # Extract data for plotting hours = [point['hour'] for point in route_data] lats = [point['lat'] for point in route_data] lons = [point['lon'] for point in route_data] @@ -1655,6 +1928,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru speeds = [point.get('forward_speed_kmh', 15) for point in route_data] pressures = [point.get('pressure_hpa', 1013) for point in route_data] + # Create subplot layout with map and intensity plot fig = make_subplots( rows=2, cols=2, subplot_titles=('Storm Track Animation', 'Wind Speed vs Time', 'Forward Speed vs Time', 'Pressure vs Time'), @@ -1665,8 +1939,11 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru ) if enable_animation: + # Add frames for animation frames = [] + # Static background elements first + # Add complete track as background fig.add_trace( go.Scattergeo( lon=lons, @@ -1680,6 +1957,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru row=1, col=1 ) + # Genesis marker (always visible) fig.add_trace( go.Scattergeo( lon=[lons[0]], @@ -1704,6 +1982,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru row=1, col=1 ) + # Create animation frames for i in range(len(route_data)): frame_lons = lons[:i+1] frame_lats = lats[:i+1] @@ -1711,10 +1990,12 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru frame_categories = categories[:i+1] frame_hours = hours[:i+1] + # Current position marker current_color = enhanced_color_map.get(frame_categories[-1], 'rgb(128,128,128)') current_size = 15 + (frame_intensities[-1] / 10) frame_data = [ + # Animated track up to current point go.Scattergeo( lon=frame_lons, lat=frame_lats, @@ -1729,6 +2010,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru name='Current Track', showlegend=False ), + # Current position highlight go.Scattergeo( lon=[frame_lons[-1]], lat=[frame_lats[-1]], @@ -1752,6 +2034,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru "" ) ), + # Animated wind plot go.Scatter( x=frame_hours, y=frame_intensities, @@ -1762,6 +2045,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru showlegend=False, yaxis='y2' ), + # Animated speed plot go.Scatter( x=frame_hours, y=speeds[:i+1], @@ -1772,6 +2056,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru showlegend=False, yaxis='y3' ), + # Animated pressure plot go.Scatter( x=frame_hours, y=pressures[:i+1], @@ -1795,6 +2080,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru fig.frames = frames + # Add play/pause controls fig.update_layout( updatemenus=[ { @@ -1850,13 +2136,14 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru "label": f"H{route_data[i]['hour']}", "method": "animate" } - for i in range(0, len(route_data), max(1, len(route_data)//20)) + for i in range(0, len(route_data), max(1, len(route_data)//20)) # Limit slider steps ] }] ) else: - # Static view + # Static view with all points + # Add genesis marker fig.add_trace( go.Scattergeo( lon=[lons[0]], @@ -1880,7 +2167,8 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru row=1, col=1 ) - for i in range(0, len(route_data), max(1, len(route_data)//50)): + # Add full track with intensity coloring + for i in range(0, len(route_data), max(1, len(route_data)//50)): # Sample points for performance point = route_data[i] color = enhanced_color_map.get(point['category'], 'rgb(128,128,128)') size = 8 + (point['intensity_kt'] / 12) @@ -1911,6 +2199,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru row=1, col=1 ) + # Connect points with track line fig.add_trace( go.Scattergeo( lon=lons, @@ -1924,6 +2213,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru ) # Add static intensity, speed, and pressure plots + # Wind speed plot fig.add_trace( go.Scatter( x=hours, @@ -1977,6 +2267,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru uncertainty_lons_lower = [] for i, point in enumerate(route_data): + # Uncertainty grows with time and decreases with confidence base_uncertainty = 0.4 + (i / len(route_data)) * 1.8 confidence_factor = point.get('confidence', 0.8) uncertainty = base_uncertainty / confidence_factor @@ -2006,8 +2297,8 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru # Enhanced layout fig.update_layout( title=f"Comprehensive Storm Development Analysis
Starting from {prediction_results['genesis_info']['description']}", - height=1000, - width=1400, + height=1000, # Taller for better subplot visibility + width=1400, # Wider showlegend=True ) @@ -2037,6 +2328,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru current = prediction_results['current_prediction'] genesis_info = prediction_results['genesis_info'] + # Calculate some statistics max_intensity = max(intensities) max_intensity_time = hours[intensities.index(max_intensity)] avg_speed = np.mean(speeds) @@ -2098,7 +2390,7 @@ MODEL: {prediction_results['model_info']} return None, error_msg # ----------------------------- -# Regression Functions +# Regression Functions (Original) # ----------------------------- def perform_wind_regression(start_year, start_month, end_year, end_month): @@ -2153,7 +2445,7 @@ def perform_longitude_regression(start_year, start_month, end_year, end_month): return f"Longitude Regression Error: {e}" # ----------------------------- -# FIXED: Visualization Functions +# Visualization Functions (Enhanced) # ----------------------------- def get_full_tracks(start_year, start_month, end_year, end_month, enso_phase, typhoon_search): @@ -2301,69 +2593,48 @@ def get_longitude_analysis(start_year, start_month, end_year, end_month, enso_ph return fig, slopes_text, regression # ----------------------------- -# FIXED: Animation Functions - NO FALLBACK +# ENHANCED: Animation Functions with Taiwan Standard Support - FIXED VERSION # ----------------------------- def get_available_years(typhoon_data): - """Get all available years from actual data - NO FALLBACK""" + """Get all available years including 2025 - with error handling""" try: if typhoon_data is None or typhoon_data.empty: - raise Exception("No typhoon data available for year extraction") + return [str(year) for year in range(2000, 2026)] - years = set() - - # Try multiple methods to extract years if 'ISO_TIME' in typhoon_data.columns: - valid_times = typhoon_data['ISO_TIME'].dropna() - if len(valid_times) > 0: - years.update(valid_times.dt.year.unique()) - - if 'SEASON' in typhoon_data.columns: - valid_seasons = typhoon_data['SEASON'].dropna() - if len(valid_seasons) > 0: - years.update(valid_seasons.unique()) - - # Extract from SID if available (format: BASIN + NUMBER + YEAR) - if 'SID' in typhoon_data.columns and len(years) == 0: - for sid in typhoon_data['SID'].dropna().unique(): - try: - # Try to extract 4-digit year from SID - year_match = pd.Series([sid]).str.extract(r'(\d{4})')[0].iloc[0] - if year_match and 1950 <= int(year_match) <= 2030: - years.add(int(year_match)) - except: - continue - - if len(years) == 0: - raise Exception("Could not extract any valid years from typhoon data") + years = typhoon_data['ISO_TIME'].dt.year.dropna().unique() + elif 'SEASON' in typhoon_data.columns: + years = typhoon_data['SEASON'].dropna().unique() + else: + years = range(2000, 2026) # Default range including 2025 - # Convert to sorted list of strings - year_strings = sorted([str(int(year)) for year in years if 1950 <= year <= 2030]) + # Convert to strings and sort + year_strings = sorted([str(int(year)) for year in years if not pd.isna(year)]) - if len(year_strings) == 0: - raise Exception("No valid years found in reasonable range (1950-2030)") + # Ensure we have at least some years + if not year_strings: + return [str(year) for year in range(2000, 2026)] - logging.info(f"Extracted {len(year_strings)} years from data: {year_strings[0]} to {year_strings[-1]}") return year_strings except Exception as e: - logging.error(f"CRITICAL ERROR in get_available_years: {e}") - raise Exception(f"Cannot extract years from typhoon data: {e}") + print(f"Error in get_available_years: {e}") + return [str(year) for year in range(2000, 2026)] def update_typhoon_options_enhanced(year, basin): - """Enhanced typhoon options - NEVER returns empty or fallback""" + """Enhanced typhoon options with TD support and 2025 data""" try: year = int(year) - # Filter by year + # Filter by year - handle both ISO_TIME and SEASON columns if 'ISO_TIME' in typhoon_data.columns: year_mask = typhoon_data['ISO_TIME'].dt.year == year elif 'SEASON' in typhoon_data.columns: year_mask = typhoon_data['SEASON'] == year else: - # Try to extract from SID - sid_year_mask = typhoon_data['SID'].str.contains(str(year), na=False) - year_mask = sid_year_mask + # Fallback - try to extract year from SID or other fields + year_mask = typhoon_data.index >= 0 # Include all data as fallback year_data = typhoon_data[year_mask].copy() @@ -2376,9 +2647,9 @@ def update_typhoon_options_enhanced(year, basin): year_data = year_data[year_data['BASIN'] == basin_code] if year_data.empty: - raise Exception(f"No storms found for year {year} and basin {basin}") + return gr.update(choices=["No storms found"], value=None) - # Get unique storms + # Get unique storms - include ALL intensities (including TD) storms = year_data.groupby('SID').agg({ 'NAME': 'first', 'USA_WIND': 'max' @@ -2399,50 +2670,39 @@ def update_typhoon_options_enhanced(year, basin): options.append(option) if not options: - raise Exception(f"No valid storm options generated for year {year}") + return gr.update(choices=["No storms found"], value=None) - logging.info(f"Generated {len(options)} storm options for {year}") return gr.update(choices=sorted(options), value=options[0]) except Exception as e: - error_msg = f"Error loading storms for {year}: {str(e)}" - logging.error(error_msg) - raise Exception(error_msg) + print(f"Error in update_typhoon_options_enhanced: {e}") + return gr.update(choices=["Error loading storms"], value=None) def generate_enhanced_track_video_fixed(year, typhoon_selection, standard): - """FIXED: Enhanced track video generation - NO FALLBACK ALLOWED""" + """FIXED: Enhanced track video generation with working animation display""" + if not typhoon_selection or typhoon_selection == "No storms found": + return None + try: - if not typhoon_selection or "No storms found" in typhoon_selection or "Error" in typhoon_selection: - raise Exception("Invalid typhoon selection provided") - # Extract SID from selection - try: - sid = typhoon_selection.split('(')[1].split(')')[0] - except: - raise Exception(f"Could not extract SID from selection: {typhoon_selection}") + sid = typhoon_selection.split('(')[1].split(')')[0] # Get storm data storm_df = typhoon_data[typhoon_data['SID'] == sid].copy() if storm_df.empty: - raise Exception(f"No track data found for storm {sid}") + print(f"No data found for storm {sid}") + return None # Sort by time if 'ISO_TIME' in storm_df.columns: storm_df = storm_df.sort_values('ISO_TIME') - # Validate essential data - if 'LAT' not in storm_df.columns or 'LON' not in storm_df.columns: - raise Exception(f"Missing coordinate data for storm {sid}") - # Extract data for animation - lats = pd.to_numeric(storm_df['LAT'], errors='coerce').dropna().values - lons = pd.to_numeric(storm_df['LON'], errors='coerce').dropna().values - - if len(lats) < 2 or len(lons) < 2: - raise Exception(f"Insufficient track points for storm {sid}: {len(lats)} points") + lats = storm_df['LAT'].astype(float).values + lons = storm_df['LON'].astype(float).values if 'USA_WIND' in storm_df.columns: - winds = pd.to_numeric(storm_df['USA_WIND'], errors='coerce').fillna(30).values[:len(lats)] + winds = pd.to_numeric(storm_df['USA_WIND'], errors='coerce').fillna(0).values else: winds = np.full(len(lats), 30) @@ -2450,7 +2710,7 @@ def generate_enhanced_track_video_fixed(year, typhoon_selection, standard): storm_name = storm_df['NAME'].iloc[0] if pd.notna(storm_df['NAME'].iloc[0]) else "UNNAMED" season = storm_df['SEASON'].iloc[0] if 'SEASON' in storm_df.columns else year - logging.info(f"Generating FIXED video for {storm_name} ({sid}) with {len(lats)} track points using {standard} standard") + print(f"Generating FIXED video for {storm_name} ({sid}) with {len(lats)} track points using {standard} standard") # FIXED: Create figure with proper cartopy setup fig = plt.figure(figsize=(16, 10)) @@ -2479,20 +2739,24 @@ def generate_enhanced_track_video_fixed(year, typhoon_selection, standard): fontsize=18, fontweight='bold') # FIXED: Animation elements - proper initialization with cartopy transforms + # Initialize empty line for track with correct transform track_line, = ax.plot([], [], 'b-', linewidth=3, alpha=0.7, label='Track', transform=ccrs.PlateCarree()) + # Initialize current position marker current_point, = ax.plot([], [], 'o', markersize=15, transform=ccrs.PlateCarree()) + # Historical track points (to show path traversed) history_points, = ax.plot([], [], 'o', markersize=6, alpha=0.4, color='blue', transform=ccrs.PlateCarree()) + # Info text box info_box = ax.text(0.02, 0.98, '', transform=ax.transAxes, fontsize=12, verticalalignment='top', bbox=dict(boxstyle="round,pad=0.5", facecolor='white', alpha=0.9)) - # FIXED: Color legend with proper categories + # FIXED: Color legend with proper categories for both standards legend_elements = [] if standard == 'taiwan': categories = ['Tropical Depression', 'Tropical Storm', 'Severe Tropical Storm', @@ -2511,24 +2775,25 @@ def generate_enhanced_track_video_fixed(year, typhoon_selection, standard): ax.legend(handles=legend_elements, loc='upper right', fontsize=10) - # FIXED: Animation function + # FIXED: Animation function with proper artist updates and cartopy compatibility def animate_fixed(frame): """Fixed animation function that properly updates tracks with cartopy""" try: if frame >= len(lats): return track_line, current_point, history_points, info_box - # Update track line up to current frame + # FIXED: Update track line up to current frame current_lons = lons[:frame+1] current_lats = lats[:frame+1] + # Update the track line data (this is the key fix!) track_line.set_data(current_lons, current_lats) - # Update historical points + # FIXED: Update historical points (smaller markers showing traversed path) if frame > 0: history_points.set_data(current_lons[:-1], current_lats[:-1]) - # Update current position with correct categorization + # FIXED: Update current position with correct categorization current_wind = winds[frame] if standard == 'taiwan': @@ -2536,19 +2801,23 @@ def generate_enhanced_track_video_fixed(year, typhoon_selection, standard): else: category, color = categorize_typhoon_by_standard_fixed(current_wind, 'atlantic') + # Debug for first few frames + if frame < 3: + print(f"FIXED Frame {frame}: Wind={current_wind:.1f}kt, Category={category}, Color={color}") + # Update current position marker current_point.set_data([lons[frame]], [lats[frame]]) current_point.set_color(color) current_point.set_markersize(12 + current_wind/8) - # Enhanced info display + # FIXED: Enhanced info display with correct Taiwan wind speed conversion if 'ISO_TIME' in storm_df.columns and frame < len(storm_df): current_time = storm_df.iloc[frame]['ISO_TIME'] time_str = current_time.strftime('%Y-%m-%d %H:%M UTC') if pd.notna(current_time) else 'Unknown' else: time_str = f"Step {frame+1}" - # Wind speed display + # Corrected wind speed display for Taiwan standard if standard == 'taiwan': wind_ms = current_wind * 0.514444 wind_display = f"{current_wind:.0f} kt ({wind_ms:.1f} m/s)" @@ -2566,43 +2835,52 @@ def generate_enhanced_track_video_fixed(year, typhoon_selection, standard): ) info_box.set_text(info_text) + # FIXED: Return all modified artists (crucial for proper display) return track_line, current_point, history_points, info_box except Exception as e: - logging.error(f"Error in animate frame {frame}: {e}") + print(f"Error in animate frame {frame}: {e}") return track_line, current_point, history_points, info_box # FIXED: Create animation with cartopy-compatible settings + # Key fixes: blit=False (crucial for cartopy), proper interval anim = animation.FuncAnimation( fig, animate_fixed, frames=len(lats), - interval=600, blit=False, repeat=True + interval=600, blit=False, repeat=True # blit=False is essential for cartopy! ) - # Save animation + # Save animation with optimized settings temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4', dir=tempfile.gettempdir()) + # FIXED: Writer settings optimized for track visibility writer = animation.FFMpegWriter( - fps=2, bitrate=3000, codec='libx264', + fps=2, bitrate=3000, codec='libx264', # Slower FPS for better track visibility extra_args=['-pix_fmt', 'yuv420p'] ) - logging.info(f"Saving FIXED animation to {temp_file.name}") + print(f"Saving FIXED animation to {temp_file.name}") anim.save(temp_file.name, writer=writer, dpi=120) plt.close(fig) - logging.info(f"FIXED video generated successfully: {temp_file.name}") + print(f"FIXED video generated successfully: {temp_file.name}") return temp_file.name except Exception as e: - error_msg = f"CRITICAL ERROR generating video: {str(e)}" - logging.error(error_msg) + print(f"Error generating FIXED video: {e}") import traceback traceback.print_exc() - raise Exception(error_msg) + return None + +# FIXED: Update the simplified wrapper function +def simplified_track_video_fixed(year, basin, typhoon, standard): + """Simplified track video function with FIXED animation and Taiwan classification""" + if not typhoon: + return None + return generate_enhanced_track_video_fixed(year, typhoon, standard) # ----------------------------- -# FIXED: Data Loading and Processing +# Load & Process Data # ----------------------------- # Global variables initialization @@ -2611,60 +2889,66 @@ typhoon_data = None merged_data = None def initialize_data(): - """Initialize all data safely - CRITICAL: NO FALLBACKS""" + """Initialize all data safely""" global oni_data, typhoon_data, merged_data try: - logging.info("Starting FIXED data loading process...") - - # Update ONI data (optional) + logging.info("Starting data loading process...") update_oni_data() - - # Load data with FIXED functions oni_data, typhoon_data = load_data_fixed(ONI_DATA_PATH, TYPHOON_DATA_PATH) - # Verify critical data loaded - if typhoon_data is None or typhoon_data.empty: - raise Exception("CRITICAL: No typhoon data loaded") - - if oni_data is None or oni_data.empty: - logging.warning("ONI data failed to load - using neutral values") - - # Process data + if oni_data is not None and typhoon_data is not None: + oni_long = process_oni_data(oni_data) + typhoon_max = process_typhoon_data(typhoon_data) + merged_data = merge_data(oni_long, typhoon_max) + logging.info("Data loading complete.") + else: + logging.error("Failed to load required data") + # Create minimal fallback data + oni_data = pd.DataFrame({'Year': [2000], 'Jan': [0], 'Feb': [0], 'Mar': [0], 'Apr': [0], + 'May': [0], 'Jun': [0], 'Jul': [0], 'Aug': [0], 'Sep': [0], + 'Oct': [0], 'Nov': [0], 'Dec': [0]}) + typhoon_data = create_fallback_typhoon_data() + oni_long = process_oni_data(oni_data) + typhoon_max = process_typhoon_data(typhoon_data) + merged_data = merge_data(oni_long, typhoon_max) + except Exception as e: + logging.error(f"Error during data initialization: {e}") + # Create minimal fallback data + oni_data = pd.DataFrame({'Year': [2000], 'Jan': [0], 'Feb': [0], 'Mar': [0], 'Apr': [0], + 'May': [0], 'Jun': [0], 'Jul': [0], 'Aug': [0], 'Sep': [0], + 'Oct': [0], 'Nov': [0], 'Dec': [0]}) + typhoon_data = create_fallback_typhoon_data() oni_long = process_oni_data(oni_data) typhoon_max = process_typhoon_data(typhoon_data) merged_data = merge_data(oni_long, typhoon_max) - - # Final validation - if merged_data is None or merged_data.empty: - raise Exception("CRITICAL: Merged data is empty") - - logging.info(f"FIXED data loading complete:") - logging.info(f" - ONI data: {len(oni_data) if oni_data is not None else 0} years") - logging.info(f" - Typhoon data: {len(typhoon_data)} records") - logging.info(f" - Merged data: {len(merged_data)} storms") - - except Exception as e: - logging.error(f"CRITICAL ERROR during FIXED data initialization: {e}") - import traceback - traceback.print_exc() - raise Exception(f"Data initialization failed: {e}") + +# Initialize data +initialize_data() # ----------------------------- -# FIXED: Gradio Interface +# ENHANCED: Gradio Interface with Fixed Route Visualization and Enhanced Features # ----------------------------- def create_interface(): - """Create the enhanced Gradio interface - NO FALLBACKS""" + """Create the enhanced Gradio interface with robust error handling""" try: # Ensure data is available if oni_data is None or typhoon_data is None or merged_data is None: - raise Exception("Data not properly loaded for interface creation") + logging.warning("Data not properly loaded, creating minimal interface") + return create_minimal_fallback_interface() # Get safe data statistics - total_storms = len(typhoon_data['SID'].unique()) if 'SID' in typhoon_data.columns else 0 - total_records = len(typhoon_data) - available_years = get_available_years(typhoon_data) - year_range_display = f"{available_years[0]} - {available_years[-1]}" if available_years else "Unknown" + try: + total_storms = len(typhoon_data['SID'].unique()) if 'SID' in typhoon_data.columns else 0 + total_records = len(typhoon_data) + available_years = get_available_years(typhoon_data) + year_range_display = f"{available_years[0]} - {available_years[-1]}" if available_years else "Unknown" + except Exception as e: + logging.error(f"Error getting data statistics: {e}") + total_storms = 0 + total_records = 0 + year_range_display = "Unknown" + available_years = [str(year) for year in range(2000, 2026)] with gr.Blocks(title="Enhanced Typhoon Analysis Platform", theme=gr.themes.Soft()) as demo: gr.Markdown("# 🌪️ Enhanced Typhoon Analysis Platform") @@ -2683,21 +2967,18 @@ def create_interface(): - **Taiwan Standard**: Full support for Taiwan meteorological classification system - **2025 Data Ready**: Real-time compatibility with current year data - **Enhanced Animations**: High-quality storm track visualizations with both standards - - **NO FALLBACK DATA**: All data comes from real IBTrACS sources ### 📊 Data Status: - - **ONI Data**: {len(oni_data) if oni_data is not None else 0} years loaded + - **ONI Data**: {len(oni_data)} years loaded - **Typhoon Data**: {total_records:,} records loaded - - **Merged Data**: {len(merged_data):,} typhoons with analysis data + - **Merged Data**: {len(merged_data):,} typhoons with ONI values - **Available Years**: {year_range_display} - - **Unique Storms**: {total_storms:,} ### 🔧 Technical Capabilities: - **UMAP Clustering**: {"✅ Available" if UMAP_AVAILABLE else "⚠️ Limited to t-SNE/PCA"} - **AI Predictions**: {"🧠 Deep Learning" if CNN_AVAILABLE else "🔬 Physics-based"} - **Enhanced Categorization**: Tropical Depression to Super Typhoon - - **Platform**: Optimized for real-time analysis - - **Data Source**: Live IBTrACS database (no synthetic data) + - **Platform**: Optimized for Hugging Face Spaces ### 📈 Research Applications: - Climate change impact studies @@ -2740,9 +3021,10 @@ def create_interface(): def run_separate_clustering_analysis(method): try: + # Extract features for clustering storm_features = extract_storm_features(typhoon_data) if storm_features is None: - raise Exception("Could not extract storm features from data") + return None, None, None, None, "Error: Could not extract storm features" fig_cluster, fig_routes, fig_pressure, fig_wind, stats = create_separate_clustering_plots( storm_features, typhoon_data, method.lower() @@ -2751,8 +3033,7 @@ def create_interface(): except Exception as e: import traceback error_details = traceback.format_exc() - error_msg = f"Clustering analysis failed: {str(e)}\n\nDetails:\n{error_details}" - logging.error(error_msg) + error_msg = f"Error: {str(e)}\n\nDetails:\n{error_details}" return None, None, None, None, error_msg analyze_clusters_btn.click( @@ -2760,6 +3041,24 @@ def create_interface(): inputs=[reduction_method], outputs=[cluster_plot, routes_plot, pressure_plot, wind_plot, cluster_stats] ) + + cluster_info_text = """ + ### 📊 Enhanced Clustering Features: + - **Separate Visualizations**: Four distinct plots for comprehensive analysis + - **Multi-dimensional Analysis**: Uses 15+ storm characteristics including intensity, track shape, genesis location + - **Route Visualization**: Geographic storm tracks colored by cluster membership + - **Temporal Analysis**: Pressure and wind evolution patterns by cluster + - **DBSCAN Clustering**: Automatic pattern discovery without predefined cluster count + - **Interactive**: Hover over points to see storm details, zoom and pan all plots + + ### 🎯 How to Interpret: + - **Clustering Plot**: Each dot is a storm positioned by similarity (close = similar characteristics) + - **Routes Plot**: Actual geographic storm tracks, colored by which cluster they belong to + - **Pressure Plot**: Shows how pressure changes over time for storms in each cluster + - **Wind Plot**: Shows wind speed evolution patterns for each cluster + - **Cluster Colors**: Each cluster gets a unique color across all four visualizations + """ + gr.Markdown(cluster_info_text) with gr.Tab("🌊 Realistic Storm Genesis & Prediction"): gr.Markdown("## 🌊 Realistic Typhoon Development from Genesis") @@ -2787,6 +3086,7 @@ def create_interface(): info="Select realistic development region based on climatology" ) + # Display selected region info def update_genesis_info(region): locations = get_realistic_genesis_locations() if region in locations: @@ -2815,9 +3115,9 @@ def create_interface(): label="Forecast Length (hours)", value=72, minimum=20, - maximum=1000, + maximum=100000, step=6, - info="Extended forecasting: 20-1000 hours" + info="Extended forecasting: 20-1000hours (42 days max)" ) advanced_physics = gr.Checkbox( label="Advanced Physics", @@ -2849,17 +3149,20 @@ def create_interface(): def run_realistic_prediction(region, month, oni, hours, advanced_phys, uncertainty, animation): try: + # Run realistic prediction with genesis region results = predict_storm_route_and_intensity_realistic( region, month, oni, forecast_hours=hours, use_advanced_physics=advanced_phys ) + # Extract genesis conditions current = results['current_prediction'] intensity = current['intensity_kt'] category = current['category'] genesis_info = results.get('genesis_info', {}) + # Create enhanced visualization fig, forecast_text = create_animated_route_visualization( results, uncertainty, animation ) @@ -2878,7 +3181,10 @@ def create_interface(): logging.error(error_msg) import traceback traceback.print_exc() - raise gr.Error(error_msg) + return ( + 30, "Tropical Depression", f"Prediction failed: {str(e)}", + None, f"Error generating realistic forecast: {str(e)}" + ) predict_btn.click( fn=run_realistic_prediction, @@ -2956,14 +3262,13 @@ def create_interface(): ) with gr.Tab("🎬 Enhanced Track Animation"): - gr.Markdown("## 🎥 High-Quality Storm Track Visualization - NO FALLBACK DATA") - gr.Markdown("**ALL animations use real IBTrACS data - never synthetic or fallback data**") + gr.Markdown("## 🎥 High-Quality Storm Track Visualization (Atlantic & Taiwan Standards)") with gr.Row(): year_dropdown = gr.Dropdown( label="Year", choices=available_years, - value=available_years[-1] if available_years else None + value=available_years[-1] if available_years else "2024" ) basin_dropdown = gr.Dropdown( label="Basin", @@ -2984,40 +3289,23 @@ def create_interface(): video_output = gr.Video(label="Storm Track Animation") # Update storm options when year or basin changes - def safe_update_typhoon_options(year, basin): - try: - return update_typhoon_options_enhanced(year, basin) - except Exception as e: - error_msg = f"Failed to load storms: {str(e)}" - logging.error(error_msg) - return gr.update(choices=[error_msg], value=None) - for input_comp in [year_dropdown, basin_dropdown]: input_comp.change( - fn=safe_update_typhoon_options, + fn=update_typhoon_options_enhanced, inputs=[year_dropdown, basin_dropdown], outputs=[typhoon_dropdown] ) - def safe_generate_video(year, typhoon_selection, standard): - try: - if not typhoon_selection: - raise gr.Error("Please select a typhoon first") - return generate_enhanced_track_video_fixed(year, typhoon_selection, standard) - except Exception as e: - error_msg = f"Video generation failed: {str(e)}" - logging.error(error_msg) - raise gr.Error(error_msg) - + # FIXED: Generate video with fixed function generate_video_btn.click( - fn=safe_generate_video, + fn=generate_enhanced_track_video_fixed, inputs=[year_dropdown, typhoon_dropdown, standard_dropdown], outputs=[video_output] ) + # FIXED animation info text with corrected Taiwan standards animation_info_text = """ - ### 🎬 FIXED Animation Features - NO FALLBACK DATA: - - **Real Data Only**: All animations use actual IBTrACS typhoon track data + ### 🎬 Enhanced Animation Features: - **Dual Standards**: Full support for both Atlantic and Taiwan classification systems - **Full TD Support**: Now displays Tropical Depressions (< 34 kt) in gray - **2025 Compatibility**: Complete support for current year data @@ -3025,26 +3313,36 @@ def create_interface(): - **Smart Scaling**: Storm symbols scale dynamically with intensity - **Real-time Info**: Live position, time, and meteorological data display - **Professional Styling**: Publication-quality animations with proper legends + - **Optimized Export**: Fast rendering with web-compatible video formats - **FIXED Animation**: Tracks now display properly with cartopy integration - - **Error Handling**: Robust error handling prevents fallback to synthetic data ### 🎌 Taiwan Standard Features (CORRECTED): - **CMA 2006 Standards**: Uses official China Meteorological Administration classification - **Six Categories**: TD → TS → STS → TY → STY → Super TY - - **Correct Thresholds**: Based on official meteorological standards + - **Correct Thresholds**: + * Tropical Depression: < 17.2 m/s (< 33.4 kt) + * Tropical Storm: 17.2-24.4 m/s (33.4-47.5 kt) + * Severe Tropical Storm: 24.5-32.6 m/s (47.6-63.5 kt) + * Typhoon: 32.7-41.4 m/s (63.6-80.6 kt) + * Severe Typhoon: 41.5-50.9 m/s (80.7-99.1 kt) + * Super Typhoon: ≥51.0 m/s (≥99.2 kt) - **m/s Display**: Shows both knots and meters per second - **CWB Compatible**: Matches Central Weather Bureau classifications + - **Fixed Color Coding**: Gray → Blue → Cyan → Yellow → Orange → Red """ gr.Markdown(animation_info_text) with gr.Tab("📊 Data Statistics & Insights"): - gr.Markdown("## 📈 Comprehensive Dataset Analysis - REAL DATA ONLY") + gr.Markdown("## 📈 Comprehensive Dataset Analysis") + # Create enhanced data summary try: if len(typhoon_data) > 0: + # Storm category distribution storm_cats = typhoon_data.groupby('SID')['USA_WIND'].max().apply(categorize_typhoon_enhanced) cat_counts = storm_cats.value_counts() + # Create distribution chart with enhanced colors fig_dist = px.bar( x=cat_counts.index, y=cat_counts.values, @@ -3054,6 +3352,7 @@ def create_interface(): color_discrete_map=enhanced_color_map ) + # Seasonal distribution if 'ISO_TIME' in typhoon_data.columns: seasonal_data = typhoon_data.copy() seasonal_data['Month'] = seasonal_data['ISO_TIME'].dt.month @@ -3070,6 +3369,7 @@ def create_interface(): else: fig_seasonal = None + # Basin distribution if 'SID' in typhoon_data.columns: basin_data = typhoon_data['SID'].str[:2].value_counts() fig_basin = px.pie( @@ -3094,7 +3394,10 @@ def create_interface(): except Exception as e: gr.Markdown(f"Visualization error: {str(e)}") - # Enhanced statistics + # Enhanced statistics - FIXED formatting + total_storms = len(typhoon_data['SID'].unique()) if 'SID' in typhoon_data.columns else 0 + total_records = len(typhoon_data) + if 'SEASON' in typhoon_data.columns: try: min_year = int(typhoon_data['SEASON'].min()) @@ -3119,6 +3422,7 @@ def create_interface(): basins_available = "Unknown" avg_storms_per_year = 0 + # TD specific statistics try: if 'USA_WIND' in typhoon_data.columns: td_storms = len(typhoon_data[typhoon_data['USA_WIND'] < 34]['SID'].unique()) @@ -3129,17 +3433,18 @@ def create_interface(): td_storms = ts_storms = typhoon_storms = 0 td_percentage = 0 except Exception as e: + print(f"Error calculating TD statistics: {e}") td_storms = ts_storms = typhoon_storms = 0 td_percentage = 0 + # Create statistics text safely stats_text = f""" - ### 📊 REAL Dataset Summary - NO SYNTHETIC DATA: + ### 📊 Enhanced Dataset Summary: - **Total Unique Storms**: {total_storms:,} - **Total Track Records**: {total_records:,} - **Year Range**: {year_range} ({years_covered} years) - **Basins Available**: {basins_available} - **Average Storms/Year**: {avg_storms_per_year:.1f} - - **Data Source**: IBTrACS v04r01 (Real observations only) ### 🌪️ Storm Category Breakdown: - **Tropical Depressions**: {td_storms:,} storms ({td_percentage:.1f}%) @@ -3154,7 +3459,6 @@ def create_interface(): - **2025 Data Ready** - Full compatibility with current season data - **Enhanced Animations** - Professional-quality storm track videos - **Multi-basin Analysis** - Comprehensive Pacific and Atlantic coverage - - **NO FALLBACK DATA** - All analysis uses real meteorological observations ### 🔬 Research Applications: - Climate change impact studies @@ -3168,38 +3472,70 @@ def create_interface(): return demo except Exception as e: - logging.error(f"CRITICAL ERROR creating Gradio interface: {e}") + logging.error(f"Error creating Gradio interface: {e}") import traceback traceback.print_exc() - raise Exception(f"Interface creation failed: {e}") + # Create a minimal fallback interface + return create_minimal_fallback_interface() + +def create_minimal_fallback_interface(): + """Create a minimal fallback interface when main interface fails""" + with gr.Blocks() as demo: + gr.Markdown("# Enhanced Typhoon Analysis Platform") + gr.Markdown("**Notice**: Loading with minimal interface due to data issues.") + + with gr.Tab("Status"): + gr.Markdown(""" + ## Platform Status + + The application is running but encountered issues loading the full interface. + This could be due to: + - Data loading problems + - Missing dependencies + - Configuration issues + + ### Available Features: + - Basic interface is functional + - Error logs are being generated + - System is ready for debugging + + ### Next Steps: + 1. Check the console logs for detailed error information + 2. Verify all required data files are accessible + 3. Ensure all dependencies are properly installed + 4. Try restarting the application + """) + + with gr.Tab("Debug"): + gr.Markdown("## Debug Information") + + def get_debug_info(): + debug_text = f""" + Python Environment: + - Working Directory: {os.getcwd()} + - Data Path: {DATA_PATH} + - UMAP Available: {UMAP_AVAILABLE} + - CNN Available: {CNN_AVAILABLE} + + Data Status: + - ONI Data: {'Loaded' if oni_data is not None else 'Failed'} + - Typhoon Data: {'Loaded' if typhoon_data is not None else 'Failed'} + - Merged Data: {'Loaded' if merged_data is not None else 'Failed'} + + File Checks: + - ONI Path Exists: {os.path.exists(ONI_DATA_PATH)} + - Typhoon Path Exists: {os.path.exists(TYPHOON_DATA_PATH)} + """ + return debug_text + + debug_btn = gr.Button("Get Debug Info") + debug_output = gr.Textbox(label="Debug Information", lines=15) + debug_btn.click(fn=get_debug_info, outputs=debug_output) + + return demo -# ----------------------------- -# MAIN EXECUTION -# ----------------------------- +# Create and launch the interface +demo = create_interface() if __name__ == "__main__": - try: - # Initialize data first - CRITICAL - logging.info("Initializing data...") - initialize_data() - - # Verify data loaded correctly - if typhoon_data is None or typhoon_data.empty: - raise Exception("CRITICAL: No typhoon data available for interface") - - logging.info("Creating interface...") - demo = create_interface() - - logging.info("Launching application...") - demo.launch(share=True) - - except Exception as e: - logging.error(f"CRITICAL APPLICATION ERROR: {e}") - import traceback - traceback.print_exc() - print(f"\n{'='*60}") - print("CRITICAL ERROR: Application failed to start") - print(f"Error: {e}") - print("Check logs for detailed error information") - print(f"{'='*60}") - raise \ No newline at end of file + demo.launch(share=True) # Enable sharing with public link \ No newline at end of file