diff --git "a/app.py" "b/app.py" --- "a/app.py" +++ "b/app.py" @@ -41,7 +41,7 @@ import tempfile import shutil import xarray as xr -# NEW: Advanced ML imports +# Advanced ML imports try: import umap.umap_ as umap UMAP_AVAILABLE = True @@ -52,12 +52,10 @@ except ImportError: # Optional CNN imports with robust error handling CNN_AVAILABLE = False try: - # Set environment variables before importing TensorFlow - os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Suppress TensorFlow warnings + os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' import tensorflow as tf from tensorflow.keras import layers, models - # Test if TensorFlow actually works - tf.config.set_visible_devices([], 'GPU') # Disable GPU to avoid conflicts + tf.config.set_visible_devices([], 'GPU') CNN_AVAILABLE = True print("TensorFlow successfully loaded - CNN features enabled") except Exception as e: @@ -80,13 +78,11 @@ logging.basicConfig( format='%(asctime)s - %(levelname)s - %(message)s' ) -# Remove argument parser to simplify startup +# FIXED: Data path setup DATA_PATH = '/tmp/typhoon_data' if 'SPACE_ID' in os.environ else tempfile.gettempdir() -# Ensure directory exists and is writable try: os.makedirs(DATA_PATH, exist_ok=True) - # Test write permissions test_file = os.path.join(DATA_PATH, 'test_write.txt') with open(test_file, 'w') as f: f.write('test') @@ -102,27 +98,21 @@ ONI_DATA_PATH = os.path.join(DATA_PATH, 'oni_data.csv') TYPHOON_DATA_PATH = os.path.join(DATA_PATH, 'processed_typhoon_data.csv') MERGED_DATA_CSV = os.path.join(DATA_PATH, 'merged_typhoon_era5_data.csv') -# IBTrACS settings - NOW INCLUDES ALL BASINS +# IBTrACS settings BASIN_FILES = { 'EP': 'ibtracs.EP.list.v04r01.csv', 'NA': 'ibtracs.NA.list.v04r01.csv', 'WP': 'ibtracs.WP.list.v04r01.csv', - 'SP': 'ibtracs.SP.list.v04r01.csv', # Added South Pacific - 'SI': 'ibtracs.SI.list.v04r01.csv', # Added South Indian - 'NI': 'ibtracs.NI.list.v04r01.csv' # Added North Indian + 'ALL': 'ibtracs.ALL.list.v04r01.csv' # Added ALL basin option } IBTRACS_BASE_URL = 'https://www.ncei.noaa.gov/data/international-best-track-archive-for-climate-stewardship-ibtracs/v04r01/access/csv/' -LOCAL_IBTRACS_PATH = os.path.join(DATA_PATH, 'ibtracs.WP.list.v04r01.csv') -CACHE_FILE = os.path.join(DATA_PATH, 'ibtracs_cache.pkl') -CACHE_EXPIRY_DAYS = 1 # ----------------------------- -# ENHANCED: Color Maps and Standards with TD Support - FIXED TAIWAN CLASSIFICATION +# FIXED: Color Maps and Standards with TD Support # ----------------------------- -# Enhanced color mapping with TD support (for Plotly) enhanced_color_map = { 'Unknown': 'rgb(200, 200, 200)', - 'Tropical Depression': 'rgb(128, 128, 128)', # Gray for TD + 'Tropical Depression': 'rgb(128, 128, 128)', 'Tropical Storm': 'rgb(0, 0, 255)', 'C1 Typhoon': 'rgb(0, 255, 255)', 'C2 Typhoon': 'rgb(0, 255, 0)', @@ -131,42 +121,26 @@ enhanced_color_map = { 'C5 Super Typhoon': 'rgb(255, 0, 0)' } -# Matplotlib-compatible color mapping (hex colors) matplotlib_color_map = { 'Unknown': '#C8C8C8', - 'Tropical Depression': '#808080', # Gray for TD - 'Tropical Storm': '#0000FF', # Blue - 'C1 Typhoon': '#00FFFF', # Cyan - 'C2 Typhoon': '#00FF00', # Green - 'C3 Strong Typhoon': '#FFFF00', # Yellow - 'C4 Very Strong Typhoon': '#FFA500', # Orange - 'C5 Super Typhoon': '#FF0000' # Red + 'Tropical Depression': '#808080', + 'Tropical Storm': '#0000FF', + 'C1 Typhoon': '#00FFFF', + 'C2 Typhoon': '#00FF00', + 'C3 Strong Typhoon': '#FFFF00', + 'C4 Very Strong Typhoon': '#FFA500', + 'C5 Super Typhoon': '#FF0000' } -# FIXED: Taiwan color mapping with correct CMA 2006 standards taiwan_color_map_fixed = { - 'Tropical Depression': '#808080', # Gray - 'Tropical Storm': '#0000FF', # Blue - 'Severe Tropical Storm': '#00FFFF', # Cyan - 'Typhoon': '#FFFF00', # Yellow - 'Severe Typhoon': '#FFA500', # Orange - 'Super Typhoon': '#FF0000' # Red + 'Tropical Depression': '#808080', + 'Tropical Storm': '#0000FF', + 'Severe Tropical Storm': '#00FFFF', + 'Typhoon': '#FFFF00', + 'Severe Typhoon': '#FFA500', + 'Super Typhoon': '#FF0000' } -def rgb_string_to_hex(rgb_string): - """Convert 'rgb(r,g,b)' string to hex color for matplotlib""" - try: - # Extract numbers from 'rgb(r,g,b)' format - import re - numbers = re.findall(r'\d+', rgb_string) - if len(numbers) == 3: - r, g, b = map(int, numbers) - return f'#{r:02x}{g:02x}{b:02x}' - else: - return '#808080' # Default gray - except: - return '#808080' # Default gray - def get_matplotlib_color(category): """Get matplotlib-compatible color for a storm category""" return matplotlib_color_map.get(category, '#808080') @@ -188,17 +162,7 @@ ROUTE_COLORS = [ '#FF00CC', '#00FFCC', '#CC00FF', '#CCFF00', '#00CCFF' ] -# Original color map for backward compatibility -color_map = { - 'C5 Super Typhoon': 'rgb(255, 0, 0)', - 'C4 Very Strong Typhoon': 'rgb(255, 165, 0)', - 'C3 Strong Typhoon': 'rgb(255, 255, 0)', - 'C2 Typhoon': 'rgb(0, 255, 0)', - 'C1 Typhoon': 'rgb(0, 255, 255)', - 'Tropical Storm': 'rgb(0, 0, 255)', - 'Tropical Depression': 'rgb(128, 128, 128)' -} - +# Classification standards atlantic_standard = { 'C5 Super Typhoon': {'wind_speed': 137, 'color': 'Red', 'hex': '#FF0000'}, 'C4 Very Strong Typhoon': {'wind_speed': 113, 'color': 'Orange', 'hex': '#FFA500'}, @@ -209,7 +173,6 @@ atlantic_standard = { 'Tropical Depression': {'wind_speed': 0, 'color': 'Gray', 'hex': '#808080'} } -# FIXED: Taiwan standard with correct CMA 2006 thresholds taiwan_standard_fixed = { 'Super Typhoon': {'wind_speed_ms': 51.0, 'wind_speed_kt': 99.2, 'color': 'Red', 'hex': '#FF0000'}, 'Severe Typhoon': {'wind_speed_ms': 41.5, 'wind_speed_kt': 80.7, 'color': 'Orange', 'hex': '#FFA500'}, @@ -220,26 +183,20 @@ taiwan_standard_fixed = { } # ----------------------------- -# Utility Functions for HF Spaces +# FIXED: Utility Functions # ----------------------------- def safe_file_write(file_path, data_frame, backup_dir=None): """Safely write DataFrame to CSV with backup and error handling""" try: - # Create directory if it doesn't exist os.makedirs(os.path.dirname(file_path), exist_ok=True) - - # Try to write to a temporary file first temp_path = file_path + '.tmp' data_frame.to_csv(temp_path, index=False) - - # If successful, rename to final file os.rename(temp_path, file_path) logging.info(f"Successfully saved {len(data_frame)} records to {file_path}") return True - - except PermissionError as e: - logging.warning(f"Permission denied writing to {file_path}: {e}") + except Exception as e: + logging.error(f"Error saving file {file_path}: {e}") if backup_dir: try: backup_path = os.path.join(backup_dir, os.path.basename(file_path)) @@ -249,44 +206,9 @@ def safe_file_write(file_path, data_frame, backup_dir=None): except Exception as backup_e: logging.error(f"Failed to save to backup location: {backup_e}") return False - - except Exception as e: - logging.error(f"Error saving file {file_path}: {e}") - # Clean up temp file if it exists - temp_path = file_path + '.tmp' - if os.path.exists(temp_path): - try: - os.remove(temp_path) - except: - pass - return False - -def get_fallback_data_dir(): - """Get a fallback data directory that's guaranteed to be writable""" - fallback_dirs = [ - tempfile.gettempdir(), - '/tmp', - os.path.expanduser('~'), - os.getcwd() - ] - - for directory in fallback_dirs: - try: - test_dir = os.path.join(directory, 'typhoon_fallback') - os.makedirs(test_dir, exist_ok=True) - test_file = os.path.join(test_dir, 'test.txt') - with open(test_file, 'w') as f: - f.write('test') - os.remove(test_file) - return test_dir - except: - continue - - # If all else fails, use current directory - return os.getcwd() # ----------------------------- -# ONI and Typhoon Data Functions - FIXED TO LOAD ALL DATA +# FIXED: ONI Data Functions # ----------------------------- def download_oni_file(url, filename): @@ -302,10 +224,8 @@ def download_oni_file(url, filename): except Exception as e: logging.warning(f"Attempt {attempt + 1} failed to download ONI: {e}") if attempt < max_retries - 1: - time.sleep(2 ** attempt) # Exponential backoff - else: - logging.error(f"Failed to download ONI after {max_retries} attempts") - return False + time.sleep(2 ** attempt) + return False def convert_oni_ascii_to_csv(input_file, output_file): """Convert ONI ASCII format to CSV""" @@ -326,19 +246,18 @@ def convert_oni_ascii_to_csv(input_file, output_file): year = str(int(year)-1) data[year][month-1] = anom - # Write to CSV with safe write df = pd.DataFrame(data).T.reset_index() df.columns = ['Year','Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'] df = df.sort_values('Year').reset_index(drop=True) - return safe_file_write(output_file, df, get_fallback_data_dir()) + return safe_file_write(output_file, df) except Exception as e: logging.error(f"Error converting ONI file: {e}") return False def update_oni_data(): - """Update ONI data with error handling - OPTIONAL now""" + """Update ONI data with error handling""" url = "https://www.cpc.ncep.noaa.gov/data/indices/oni.ascii.txt" temp_file = os.path.join(DATA_PATH, "temp_oni.ascii.txt") input_file = os.path.join(DATA_PATH, "oni.ascii.txt") @@ -352,33 +271,31 @@ def update_oni_data(): else: os.remove(temp_file) else: - # Create fallback ONI data if download fails - logging.warning("Creating fallback ONI data") - create_fallback_oni_data(output_file) + logging.warning("ONI download failed - will create minimal ONI data") + create_minimal_oni_data(output_file) except Exception as e: logging.error(f"Error updating ONI data: {e}") - create_fallback_oni_data(output_file) + create_minimal_oni_data(output_file) -def create_fallback_oni_data(output_file): - """Create minimal ONI data for testing - EXTENDED RANGE""" - years = range(1851, 2026) # Extended to full historical range +def create_minimal_oni_data(output_file): + """Create minimal ONI data for years without dropping typhoon data""" + years = range(1950, 2026) # Wide range to ensure coverage months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'] - # Create synthetic ONI data data = [] for year in years: row = [year] for month in months: - # Generate some realistic ONI values - value = np.random.normal(0, 1) * 0.5 + # Generate neutral ONI values (small variations around 0) + value = np.random.normal(0, 0.3) row.append(f"{value:.2f}") data.append(row) df = pd.DataFrame(data, columns=['Year'] + months) - safe_file_write(output_file, df, get_fallback_data_dir()) + safe_file_write(output_file, df) # ----------------------------- -# FIXED: IBTrACS Data Loading - LOAD ALL BASINS, ALL YEARS +# FIXED: IBTrACS Data Loading - No Fallback, All Data # ----------------------------- def download_ibtracs_file(basin, force_download=False): @@ -387,7 +304,6 @@ def download_ibtracs_file(basin, force_download=False): local_path = os.path.join(DATA_PATH, filename) url = IBTRACS_BASE_URL + filename - # Check if file exists and is recent (less than 7 days old) if os.path.exists(local_path) and not force_download: file_age = time.time() - os.path.getmtime(local_path) if file_age < 7 * 24 * 3600: # 7 days @@ -396,10 +312,9 @@ def download_ibtracs_file(basin, force_download=False): try: logging.info(f"Downloading {basin} basin file from {url}") - response = requests.get(url, timeout=60) + response = requests.get(url, timeout=120) # Increased timeout response.raise_for_status() - # Ensure directory exists os.makedirs(os.path.dirname(local_path), exist_ok=True) with open(local_path, 'wb') as f: @@ -410,29 +325,8 @@ def download_ibtracs_file(basin, force_download=False): logging.error(f"Failed to download {basin} basin file: {e}") return None -def examine_ibtracs_structure(file_path): - """Examine the actual structure of an IBTrACS CSV file""" - try: - with open(file_path, 'r') as f: - lines = f.readlines() - - # Show first 5 lines - logging.info("First 5 lines of IBTrACS file:") - for i, line in enumerate(lines[:5]): - logging.info(f"Line {i}: {line.strip()}") - - # The first line contains the actual column headers - # No need to skip rows for IBTrACS v04r01 - df = pd.read_csv(file_path, nrows=5) - logging.info(f"Columns from first row: {list(df.columns)}") - - return list(df.columns) - except Exception as e: - logging.error(f"Error examining IBTrACS structure: {e}") - return None - -def load_ibtracs_csv_directly(basin='WP'): - """Load IBTrACS data directly from CSV - FIXED VERSION""" +def load_ibtracs_csv_directly(basin='ALL'): + """Load IBTrACS data directly from CSV - FIXED to load ALL data""" filename = BASIN_FILES[basin] local_path = os.path.join(DATA_PATH, filename) @@ -440,368 +334,328 @@ def load_ibtracs_csv_directly(basin='WP'): if not os.path.exists(local_path): downloaded_path = download_ibtracs_file(basin) if not downloaded_path: + logging.error(f"Could not download {basin} basin data") return None try: - # First, examine the structure - actual_columns = examine_ibtracs_structure(local_path) - if not actual_columns: - logging.error("Could not examine IBTrACS file structure") - return None - - # Read IBTrACS CSV - DON'T skip any rows for v04r01 - # The first row contains proper column headers logging.info(f"Reading IBTrACS CSV file: {local_path}") - df = pd.read_csv(local_path, low_memory=False) # Don't skip any rows + # Read with low_memory=False to ensure proper data types + df = pd.read_csv(local_path, low_memory=False) - logging.info(f"Original columns: {list(df.columns)}") - logging.info(f"Data shape before cleaning: {df.shape}") + logging.info(f"Original data shape: {df.shape}") + logging.info(f"Available columns: {list(df.columns)}") - # Check which essential columns exist - required_cols = ['SID', 'ISO_TIME', 'LAT', 'LON'] - available_required = [col for col in required_cols if col in df.columns] - - if len(available_required) < 2: - logging.error(f"Missing critical columns. Available: {list(df.columns)}") + # Essential columns check + required_cols = ['SID', 'LAT', 'LON'] + missing_cols = [col for col in required_cols if col not in df.columns] + if missing_cols: + logging.error(f"Missing critical columns: {missing_cols}") return None - # Clean and standardize the data with format specification - if 'ISO_TIME' in df.columns: - df['ISO_TIME'] = pd.to_datetime(df['ISO_TIME'], format='%Y-%m-%d %H:%M:%S', errors='coerce') - - # Clean numeric columns + # FIXED: Data cleaning without dropping data unnecessarily + # Clean numeric columns carefully numeric_columns = ['LAT', 'LON', 'WMO_WIND', 'WMO_PRES', 'USA_WIND', 'USA_PRES'] for col in numeric_columns: if col in df.columns: df[col] = pd.to_numeric(df[col], errors='coerce') - # Filter out invalid/missing critical data - BUT KEEP ALL YEARS - valid_rows = df['LAT'].notna() & df['LON'].notna() - df = df[valid_rows] - - # Ensure LAT/LON are in reasonable ranges - df = df[(df['LAT'] >= -90) & (df['LAT'] <= 90)] - df = df[(df['LON'] >= -180) & (df['LON'] <= 180)] + # Time handling + if 'ISO_TIME' in df.columns: + df['ISO_TIME'] = pd.to_datetime(df['ISO_TIME'], errors='coerce') + + # FIXED: Only filter out clearly invalid coordinates + valid_coords = ( + df['LAT'].notna() & + df['LON'].notna() & + (df['LAT'].between(-90, 90)) & + (df['LON'].between(-180, 180)) + ) + df = df[valid_coords] - # Add basin info if missing + # Add missing columns with defaults if 'BASIN' not in df.columns: - df['BASIN'] = basin + if 'SID' in df.columns: + df['BASIN'] = df['SID'].str[:2] + else: + df['BASIN'] = basin - # Add default columns if missing if 'NAME' not in df.columns: df['NAME'] = 'UNNAMED' if 'SEASON' not in df.columns and 'ISO_TIME' in df.columns: df['SEASON'] = df['ISO_TIME'].dt.year + elif 'SEASON' not in df.columns: + # Extract year from SID if possible + if 'SID' in df.columns: + try: + df['SEASON'] = df['SID'].str.extract(r'(\d{4})').astype(float) + except: + df['SEASON'] = 2000 # Default year logging.info(f"Successfully loaded {len(df)} records from {basin} basin") + logging.info(f"Final data shape: {df.shape}") return df except Exception as e: logging.error(f"Error reading IBTrACS CSV file: {e}") + import traceback + traceback.print_exc() return None -def load_ibtracs_data_fixed(): - """FIXED: Load ALL AVAILABLE BASIN DATA without restrictions""" - ibtracs_data = {} +def load_all_ibtracs_data(): + """Load ALL available IBTrACS data - FIXED to never use fallback""" + all_data = [] - # Load ALL basins available - all_basins = ['WP', 'EP', 'NA', 'SP', 'SI', 'NI'] # All available basins + # Try to load the ALL basin file first (contains all basins) + try: + logging.info("Attempting to load ALL basin data...") + all_basin_data = load_ibtracs_csv_directly('ALL') + if all_basin_data is not None and not all_basin_data.empty: + logging.info(f"Successfully loaded ALL basin data: {len(all_basin_data)} records") + return all_basin_data + except Exception as e: + logging.warning(f"Failed to load ALL basin data: {e}") - for basin in all_basins: + # If ALL basin fails, load individual basins + basins_to_load = ['WP', 'EP', 'NA'] + for basin in basins_to_load: try: logging.info(f"Loading {basin} basin data...") - df = load_ibtracs_csv_directly(basin) - - if df is not None and not df.empty: - ibtracs_data[basin] = df - logging.info(f"Successfully loaded {basin} basin with {len(df)} records") + basin_data = load_ibtracs_csv_directly(basin) + if basin_data is not None and not basin_data.empty: + basin_data['BASIN'] = basin + all_data.append(basin_data) + logging.info(f"Successfully loaded {basin} basin: {len(basin_data)} records") else: logging.warning(f"No data loaded for basin {basin}") - ibtracs_data[basin] = None - except Exception as e: logging.error(f"Failed to load basin {basin}: {e}") - ibtracs_data[basin] = None - return ibtracs_data + if all_data: + combined_data = pd.concat(all_data, ignore_index=True) + logging.info(f"Combined all basins: {len(combined_data)} total records") + return combined_data + else: + logging.error("No IBTrACS data could be loaded from any basin") + return None def load_data_fixed(oni_path, typhoon_path): - """FIXED: Load ALL typhoon data regardless of ONI availability""" - # Load ONI data - OPTIONAL now - oni_data = pd.DataFrame({'Year': [], 'Jan': [], 'Feb': [], 'Mar': [], 'Apr': [], - 'May': [], 'Jun': [], 'Jul': [], 'Aug': [], 'Sep': [], - 'Oct': [], 'Nov': [], 'Dec': []}) + """FIXED data loading - loads all available typhoon data regardless of ONI""" - oni_available = False + # Load ONI data (optional - typhoon analysis can work without it) + oni_data = None if os.path.exists(oni_path): try: oni_data = pd.read_csv(oni_path) logging.info(f"Successfully loaded ONI data with {len(oni_data)} years") - oni_available = True except Exception as e: logging.error(f"Error loading ONI data: {e}") - oni_available = False - else: - logging.warning(f"ONI data file not found: {oni_path} - proceeding without ONI") - oni_available = False - # Load typhoon data - ALWAYS LOAD ALL AVAILABLE DATA + if oni_data is None: + logging.warning("ONI data not available - creating minimal ONI data") + update_oni_data() + try: + oni_data = pd.read_csv(oni_path) + except Exception as e: + logging.error(f"Still can't load ONI data: {e}") + # Create minimal fallback + create_minimal_oni_data(oni_path) + oni_data = pd.read_csv(oni_path) + + # FIXED: Load typhoon data - ALWAYS from IBTrACS, never use fallback typhoon_data = None - # First, try to load from existing processed file + # Try to load from existing processed file first if os.path.exists(typhoon_path): try: typhoon_data = pd.read_csv(typhoon_path, low_memory=False) - # Ensure basic columns exist and are valid - required_cols = ['LAT', 'LON'] + required_cols = ['LAT', 'LON', 'SID'] if all(col in typhoon_data.columns for col in required_cols): if 'ISO_TIME' in typhoon_data.columns: typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce') logging.info(f"Loaded processed typhoon data with {len(typhoon_data)} records") + # Validate the data quality + valid_records = typhoon_data['LAT'].notna() & typhoon_data['LON'].notna() + if valid_records.sum() / len(typhoon_data) > 0.8: # If >80% valid, use it + typhoon_data = typhoon_data[valid_records] + else: + logging.warning("Processed data quality poor, reloading from IBTrACS") + typhoon_data = None else: - logging.warning("Processed typhoon data missing required columns, will reload from IBTrACS") + logging.warning("Processed typhoon data missing required columns, reloading from IBTrACS") typhoon_data = None except Exception as e: logging.error(f"Error loading processed typhoon data: {e}") typhoon_data = None - # If no valid processed data, load from IBTrACS - LOAD ALL BASINS + # FIXED: Load from IBTrACS if needed - NO FALLBACK ALLOWED if typhoon_data is None or typhoon_data.empty: - logging.info("Loading ALL available typhoon data from IBTrACS...") - ibtracs_data = load_ibtracs_data_fixed() - - # Combine ALL available basin data - combined_dfs = [] - for basin in ['WP', 'EP', 'NA', 'SP', 'SI', 'NI']: - if basin in ibtracs_data and ibtracs_data[basin] is not None: - df = ibtracs_data[basin].copy() - df['BASIN'] = basin - combined_dfs.append(df) - logging.info(f"Added {len(df)} records from {basin} basin") - - if combined_dfs: - typhoon_data = pd.concat(combined_dfs, ignore_index=True) - # Ensure SID has proper format - if 'SID' not in typhoon_data.columns and 'BASIN' in typhoon_data.columns: - # Create SID from basin and other identifiers if missing - if 'SEASON' in typhoon_data.columns: - typhoon_data['SID'] = (typhoon_data['BASIN'].astype(str) + - typhoon_data.index.astype(str).str.zfill(2) + - typhoon_data['SEASON'].astype(str)) - else: - typhoon_data['SID'] = (typhoon_data['BASIN'].astype(str) + - typhoon_data.index.astype(str).str.zfill(2) + - '2000') - - # Save the processed data for future use - safe_file_write(typhoon_path, typhoon_data, get_fallback_data_dir()) - logging.info(f"Combined IBTrACS data: {len(typhoon_data)} total records from all basins") - else: - logging.error("Failed to load any IBTrACS basin data") - # Create comprehensive fallback data - typhoon_data = create_comprehensive_fallback_typhoon_data() + logging.info("Loading typhoon data from IBTrACS...") + typhoon_data = load_all_ibtracs_data() + + if typhoon_data is None or typhoon_data.empty: + raise Exception("CRITICAL ERROR: No typhoon data could be loaded from IBTrACS. Check internet connection and IBTrACS availability.") + + # Process and save the loaded data + # Ensure SID exists and is properly formatted + if 'SID' not in typhoon_data.columns: + logging.error("CRITICAL: No SID column in typhoon data") + raise Exception("Typhoon data missing SID column") + + # Save the processed data for future use + try: + safe_file_write(typhoon_path, typhoon_data) + logging.info(f"Saved processed typhoon data: {len(typhoon_data)} records") + except Exception as e: + logging.warning(f"Could not save processed data: {e}") - # Final validation of typhoon data - if typhoon_data is not None: - # Ensure required columns exist with fallback values + # FIXED: Final validation and enhancement + if typhoon_data is not None and not typhoon_data.empty: + # Ensure required columns exist with proper defaults required_columns = { - 'SID': 'UNKNOWN', + 'SID': lambda: f"UNKNOWN_{typhoon_data.index}", 'ISO_TIME': pd.Timestamp('2000-01-01'), - 'LAT': 0.0, - 'LON': 0.0, - 'USA_WIND': np.nan, - 'USA_PRES': np.nan, + 'LAT': 20.0, + 'LON': 140.0, + 'USA_WIND': 30.0, + 'USA_PRES': 1013.0, 'NAME': 'UNNAMED', - 'SEASON': 2000 + 'SEASON': 2000, + 'BASIN': 'WP' } for col, default_val in required_columns.items(): if col not in typhoon_data.columns: - typhoon_data[col] = default_val - logging.warning(f"Added missing column {col} with default value") + if callable(default_val): + typhoon_data[col] = default_val() + else: + typhoon_data[col] = default_val + logging.warning(f"Added missing column {col}") + + # Ensure proper data types + numeric_cols = ['LAT', 'LON', 'USA_WIND', 'USA_PRES', 'SEASON'] + for col in numeric_cols: + if col in typhoon_data.columns: + typhoon_data[col] = pd.to_numeric(typhoon_data[col], errors='coerce') - # Ensure data types if 'ISO_TIME' in typhoon_data.columns: typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce') - typhoon_data['LAT'] = pd.to_numeric(typhoon_data['LAT'], errors='coerce') - typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce') - typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce') - typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce') - # Remove rows with invalid coordinates - valid_coords = typhoon_data['LAT'].notna() & typhoon_data['LON'].notna() - typhoon_data = typhoon_data[valid_coords] + # Remove only clearly invalid records + valid_mask = ( + typhoon_data['LAT'].notna() & + typhoon_data['LON'].notna() & + typhoon_data['LAT'].between(-90, 90) & + typhoon_data['LON'].between(-180, 180) + ) - logging.info(f"Final typhoon data: {len(typhoon_data)} records after validation") + original_count = len(typhoon_data) + typhoon_data = typhoon_data[valid_mask] + logging.info(f"Final typhoon data: {len(typhoon_data)} records (removed {original_count - len(typhoon_data)} invalid)") - # Log basin distribution - if 'BASIN' in typhoon_data.columns: - basin_counts = typhoon_data['BASIN'].value_counts() - logging.info(f"Basin distribution: {dict(basin_counts)}") - - return oni_data, typhoon_data - -def create_comprehensive_fallback_typhoon_data(): - """Create comprehensive fallback typhoon data - ALL BASINS, ALL YEARS""" - # Extended date range and multiple basins - dates = pd.date_range(start='1851-01-01', end='2025-12-31', freq='D') + if len(typhoon_data) == 0: + raise Exception("CRITICAL ERROR: All typhoon data was filtered out - check data quality") - # Define basin parameters - basin_configs = { - 'WP': {'lat_range': (5, 45), 'lon_range': (100, 180), 'count': 200}, - 'EP': {'lat_range': (5, 35), 'lon_range': (-180, -80), 'count': 150}, - 'NA': {'lat_range': (5, 45), 'lon_range': (-100, -10), 'count': 100}, - 'SP': {'lat_range': (-40, -5), 'lon_range': (135, 240), 'count': 80}, - 'SI': {'lat_range': (-40, -5), 'lon_range': (30, 135), 'count': 70}, - 'NI': {'lat_range': (5, 30), 'lon_range': (40, 100), 'count': 50} - } - - data = [] - - for basin, config in basin_configs.items(): - # Generate storms for this basin - storm_dates = dates[np.random.choice(len(dates), size=config['count'], replace=False)] - - for i, date in enumerate(storm_dates): - # Create realistic storm tracks for this basin - lat_min, lat_max = config['lat_range'] - lon_min, lon_max = config['lon_range'] - - base_lat = np.random.uniform(lat_min, lat_max) - base_lon = np.random.uniform(lon_min, lon_max) - - # Generate 10-100 data points per storm (variable track lengths) - track_length = np.random.randint(10, 101) - sid = f"{basin}{i+1:02d}{date.year}" - - for j in range(track_length): - # Realistic movement patterns - if basin in ['WP', 'EP', 'NA']: # Northern Hemisphere - lat = base_lat + j * 0.15 + np.random.normal(0, 0.1) - if basin == 'WP': - lon = base_lon + j * 0.2 + np.random.normal(0, 0.1) - else: - lon = base_lon - j * 0.2 + np.random.normal(0, 0.1) - else: # Southern Hemisphere - lat = base_lat - j * 0.15 + np.random.normal(0, 0.1) - lon = base_lon + j * 0.2 + np.random.normal(0, 0.1) - - # Realistic intensity progression - if j < track_length * 0.3: # Development phase - wind = max(20, 25 + j * 3 + np.random.normal(0, 5)) - elif j < track_length * 0.7: # Mature phase - wind = max(30, 60 + np.random.normal(0, 20)) - else: # Decay phase - wind = max(20, 80 - (j - track_length * 0.7) * 2 + np.random.normal(0, 10)) - - pres = max(900, 1020 - wind + np.random.normal(0, 8)) - - data.append({ - 'SID': sid, - 'ISO_TIME': date + pd.Timedelta(hours=j*6), - 'NAME': f'FALLBACK_{basin}_{i+1}', - 'SEASON': date.year, - 'LAT': lat, - 'LON': lon, - 'USA_WIND': wind, - 'USA_PRES': pres, - 'BASIN': basin - }) + else: + raise Exception("CRITICAL ERROR: No typhoon data available after all loading attempts") - df = pd.DataFrame(data) - logging.info(f"Created comprehensive fallback typhoon data with {len(df)} records across all basins") - return df + return oni_data, typhoon_data def process_oni_data(oni_data): - """Process ONI data into long format - HANDLE EMPTY DATA""" + """Process ONI data into long format""" if oni_data is None or oni_data.empty: - # Create minimal ONI data - logging.warning("No ONI data available, creating minimal dataset") - years = range(1950, 2026) - data = [] - for year in years: - for month_num, month_name in enumerate(['Jan','Feb','Mar','Apr','May','Jun', - 'Jul','Aug','Sep','Oct','Nov','Dec'], 1): - data.append({ - 'Year': year, - 'Month': f'{month_num:02d}', - 'ONI': 0.0, - 'Date': pd.to_datetime(f'{year}-{month_num:02d}-01') - }) - return pd.DataFrame(data) + # Return minimal ONI data that won't break merging + return pd.DataFrame({ + 'Year': [2000], 'Month': ['01'], 'ONI': [0.0], + 'Date': [pd.Timestamp('2000-01-01')] + }) oni_long = oni_data.melt(id_vars=['Year'], var_name='Month', value_name='ONI') month_map = {'Jan':'01','Feb':'02','Mar':'03','Apr':'04','May':'05','Jun':'06', 'Jul':'07','Aug':'08','Sep':'09','Oct':'10','Nov':'11','Dec':'12'} oni_long['Month'] = oni_long['Month'].map(month_map) oni_long['Date'] = pd.to_datetime(oni_long['Year'].astype(str)+'-'+oni_long['Month']+'-01') - oni_long['ONI'] = pd.to_numeric(oni_long['ONI'], errors='coerce').fillna(0.0) + oni_long['ONI'] = pd.to_numeric(oni_long['ONI'], errors='coerce').fillna(0) return oni_long def process_typhoon_data(typhoon_data): - """Process typhoon data - ALWAYS PRESERVE ALL DATA""" + """Process typhoon data - FIXED to preserve all data""" if typhoon_data is None or typhoon_data.empty: - return pd.DataFrame() + raise Exception("No typhoon data to process") - # Process without filtering based on ONI availability + # Ensure proper data types if 'ISO_TIME' in typhoon_data.columns: typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce') - typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce') - typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce') - typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce') - # Log basin information - if 'SID' in typhoon_data.columns: - basins = typhoon_data['SID'].str[:2].unique() - logging.info(f"Available basins in typhoon data: {sorted(basins)}") + numeric_cols = ['USA_WIND', 'USA_PRES', 'LON', 'LAT'] + for col in numeric_cols: + if col in typhoon_data.columns: + typhoon_data[col] = pd.to_numeric(typhoon_data[col], errors='coerce') - # Get maximum values per storm - typhoon_max = typhoon_data.groupby('SID').agg({ - 'USA_WIND':'max','USA_PRES':'min','ISO_TIME':'first','SEASON':'first','NAME':'first', - 'LAT':'first','LON':'first' - }).reset_index() + logging.info(f"Processing {len(typhoon_data)} typhoon records") + # Get maximum values per storm + agg_dict = {} + if 'USA_WIND' in typhoon_data.columns: + agg_dict['USA_WIND'] = 'max' + if 'USA_PRES' in typhoon_data.columns: + agg_dict['USA_PRES'] = 'min' + if 'ISO_TIME' in typhoon_data.columns: + agg_dict['ISO_TIME'] = 'first' + if 'SEASON' in typhoon_data.columns: + agg_dict['SEASON'] = 'first' + if 'NAME' in typhoon_data.columns: + agg_dict['NAME'] = 'first' + if 'LAT' in typhoon_data.columns: + agg_dict['LAT'] = 'first' + if 'LON' in typhoon_data.columns: + agg_dict['LON'] = 'first' + + typhoon_max = typhoon_data.groupby('SID').agg(agg_dict).reset_index() + + # Add time-based columns for merging if 'ISO_TIME' in typhoon_max.columns: typhoon_max['Month'] = typhoon_max['ISO_TIME'].dt.strftime('%m') typhoon_max['Year'] = typhoon_max['ISO_TIME'].dt.year else: - # Fallback if no ISO_TIME - typhoon_max['Month'] = '01' - typhoon_max['Year'] = typhoon_max['SEASON'] + # Use SEASON if available, otherwise default + if 'SEASON' in typhoon_max.columns: + typhoon_max['Year'] = typhoon_max['SEASON'] + else: + typhoon_max['Year'] = 2000 + typhoon_max['Month'] = '01' # Default month - # Categorize ALL storms (including very weak ones) - typhoon_max['Category'] = typhoon_max['USA_WIND'].apply(categorize_typhoon_enhanced) + # Add category + if 'USA_WIND' in typhoon_max.columns: + typhoon_max['Category'] = typhoon_max['USA_WIND'].apply(categorize_typhoon_enhanced) + else: + typhoon_max['Category'] = 'Unknown' logging.info(f"Processed {len(typhoon_max)} unique storms") return typhoon_max def merge_data(oni_long, typhoon_max): - """FIXED: Merge data but KEEP ALL TYPHOON DATA even without ONI""" + """Merge ONI and typhoon data - FIXED to preserve typhoon data even without ONI""" if typhoon_max is None or typhoon_max.empty: - return pd.DataFrame() + raise Exception("No typhoon data to merge") if oni_long is None or oni_long.empty: - # No ONI data available - add dummy ONI values - logging.warning("No ONI data available - adding neutral ONI values") + # If no ONI data, add default ONI values + logging.warning("No ONI data available - using neutral values") typhoon_max['ONI'] = 0.0 return typhoon_max - # Use LEFT JOIN to keep all typhoon data - merged = pd.merge(typhoon_max, oni_long, on=['Year','Month'], how='left') + # Merge with ONI data + merged = pd.merge(typhoon_max, oni_long, on=['Year', 'Month'], how='left') - # Fill missing ONI values with neutral (0.0) + # Fill missing ONI values with neutral merged['ONI'] = merged['ONI'].fillna(0.0) - logging.info(f"Merged data: {len(merged)} storms total") - missing_oni = merged['ONI'].isna().sum() - if missing_oni > 0: - logging.info(f"Filled {missing_oni} missing ONI values with neutral (0.0)") - + logging.info(f"Merged data: {len(merged)} storms with ONI values") return merged # ----------------------------- -# ENHANCED: Categorization Functions - FIXED TAIWAN CLASSIFICATION +# Enhanced Categorization Functions # ----------------------------- def categorize_typhoon_enhanced(wind_speed): @@ -809,75 +663,49 @@ def categorize_typhoon_enhanced(wind_speed): if pd.isna(wind_speed): return 'Unknown' - # Convert to knots if in m/s (some datasets use m/s) if wind_speed < 10: # Likely in m/s, convert to knots wind_speed = wind_speed * 1.94384 - # FIXED thresholds to include TD - if wind_speed < 34: # Below 34 knots = Tropical Depression + if wind_speed < 34: return 'Tropical Depression' - elif wind_speed < 64: # 34-63 knots = Tropical Storm + elif wind_speed < 64: return 'Tropical Storm' - elif wind_speed < 83: # 64-82 knots = Category 1 Typhoon + elif wind_speed < 83: return 'C1 Typhoon' - elif wind_speed < 96: # 83-95 knots = Category 2 Typhoon + elif wind_speed < 96: return 'C2 Typhoon' - elif wind_speed < 113: # 96-112 knots = Category 3 Strong Typhoon + elif wind_speed < 113: return 'C3 Strong Typhoon' - elif wind_speed < 137: # 113-136 knots = Category 4 Very Strong Typhoon + elif wind_speed < 137: return 'C4 Very Strong Typhoon' - else: # 137+ knots = Category 5 Super Typhoon + else: return 'C5 Super Typhoon' def categorize_typhoon_taiwan_fixed(wind_speed): - """ - FIXED Taiwan categorization system based on CMA 2006 standards - Reference: CMA Tropical Cyclone Data Center official classification - """ + """FIXED Taiwan categorization system based on CMA 2006 standards""" if pd.isna(wind_speed): return 'Tropical Depression' - # Convert from knots to m/s if input appears to be in knots if wind_speed > 50: # Likely in knots, convert to m/s wind_speed_ms = wind_speed * 0.514444 else: wind_speed_ms = wind_speed - # CMA 2006 Classification Standards (used by Taiwan CWA) if wind_speed_ms >= 51.0: - return 'Super Typhoon' # ≥51.0 m/s (≥99.2 kt) + return 'Super Typhoon' elif wind_speed_ms >= 41.5: - return 'Severe Typhoon' # 41.5–50.9 m/s (80.7–99.1 kt) + return 'Severe Typhoon' elif wind_speed_ms >= 32.7: - return 'Typhoon' # 32.7–41.4 m/s (63.6–80.6 kt) + return 'Typhoon' elif wind_speed_ms >= 24.5: - return 'Severe Tropical Storm' # 24.5–32.6 m/s (47.6–63.5 kt) + return 'Severe Tropical Storm' elif wind_speed_ms >= 17.2: - return 'Tropical Storm' # 17.2–24.4 m/s (33.4–47.5 kt) - else: - return 'Tropical Depression' # < 17.2 m/s (< 33.4 kt) - -# Original function for backward compatibility -def categorize_typhoon(wind_speed): - """Original categorize typhoon function for backward compatibility""" - return categorize_typhoon_enhanced(wind_speed) - -def classify_enso_phases(oni_value): - """Classify ENSO phases based on ONI value - HANDLE MISSING VALUES""" - if isinstance(oni_value, pd.Series): - oni_value = oni_value.iloc[0] - if pd.isna(oni_value): - return 'Neutral' # Default to neutral for missing ONI - if oni_value >= 0.5: - return 'El Nino' - elif oni_value <= -0.5: - return 'La Nina' + return 'Tropical Storm' else: - return 'Neutral' + return 'Tropical Depression' -# FIXED: Combined categorization function def categorize_typhoon_by_standard_fixed(wind_speed, standard='atlantic'): - """FIXED categorization function supporting both standards with correct Taiwan thresholds""" + """FIXED categorization function supporting both standards""" if pd.isna(wind_speed): return 'Tropical Depression', '#808080' @@ -885,9 +713,7 @@ def categorize_typhoon_by_standard_fixed(wind_speed, standard='atlantic'): category = categorize_typhoon_taiwan_fixed(wind_speed) color = taiwan_color_map_fixed.get(category, '#808080') return category, color - else: - # Atlantic/International standard (unchanged) if wind_speed >= 137: return 'C5 Super Typhoon', '#FF0000' elif wind_speed >= 113: @@ -903,8 +729,21 @@ def categorize_typhoon_by_standard_fixed(wind_speed, standard='atlantic'): else: return 'Tropical Depression', '#808080' +def classify_enso_phases(oni_value): + """Classify ENSO phases based on ONI value""" + if isinstance(oni_value, pd.Series): + oni_value = oni_value.iloc[0] + if pd.isna(oni_value): + return 'Neutral' + if oni_value >= 0.5: + return 'El Nino' + elif oni_value <= -0.5: + return 'La Nina' + else: + return 'Neutral' + # ----------------------------- -# FIXED: ADVANCED ML FEATURES WITH ROBUST ERROR HANDLING +# FIXED: Advanced ML Features # ----------------------------- def extract_storm_features(typhoon_data): @@ -914,7 +753,6 @@ def extract_storm_features(typhoon_data): logging.error("No typhoon data provided for feature extraction") return None - # Basic features - ensure columns exist basic_features = [] for sid in typhoon_data['SID'].unique(): storm_data = typhoon_data[typhoon_data['SID'] == sid].copy() @@ -922,7 +760,6 @@ def extract_storm_features(typhoon_data): if len(storm_data) == 0: continue - # Initialize feature dict with safe defaults features = {'SID': sid} # Wind statistics @@ -972,16 +809,13 @@ def extract_storm_features(typhoon_data): features['LON_max'] = lon_values.max() features['LON_min'] = lon_values.min() - # Genesis location (first valid position) features['genesis_lat'] = lat_values.iloc[0] features['genesis_lon'] = lon_values.iloc[0] - features['genesis_intensity'] = features['USA_WIND_mean'] # Use mean as fallback + features['genesis_intensity'] = features['USA_WIND_mean'] - # Track characteristics features['lat_range'] = lat_values.max() - lat_values.min() features['lon_range'] = lon_values.max() - lon_values.min() - # Calculate track distance if len(lat_values) > 1: distances = [] for i in range(1, len(lat_values)): @@ -994,7 +828,6 @@ def extract_storm_features(typhoon_data): features['total_distance'] = 0 features['avg_speed'] = 0 - # Track curvature if len(lat_values) > 2: bearing_changes = [] for i in range(1, len(lat_values)-1): @@ -1012,7 +845,6 @@ def extract_storm_features(typhoon_data): else: features['avg_curvature'] = 0 else: - # Default location values features.update({ 'LAT_mean': 20, 'LAT_std': 0, 'LAT_max': 20, 'LAT_min': 20, 'LON_mean': 140, 'LON_std': 0, 'LON_max': 140, 'LON_min': 140, @@ -1020,26 +852,14 @@ def extract_storm_features(typhoon_data): 'lat_range': 0, 'lon_range': 0, 'total_distance': 0, 'avg_speed': 0, 'avg_curvature': 0 }) - else: - # Default location values if columns missing - features.update({ - 'LAT_mean': 20, 'LAT_std': 0, 'LAT_max': 20, 'LAT_min': 20, - 'LON_mean': 140, 'LON_std': 0, 'LON_max': 140, 'LON_min': 140, - 'genesis_lat': 20, 'genesis_lon': 140, 'genesis_intensity': 30, - 'lat_range': 0, 'lon_range': 0, 'total_distance': 0, - 'avg_speed': 0, 'avg_curvature': 0 - }) - # Track length features['track_length'] = len(storm_data) - # Add seasonal information if 'SEASON' in storm_data.columns: features['season'] = storm_data['SEASON'].iloc[0] else: features['season'] = 2000 - # Add basin information if 'BASIN' in storm_data.columns: features['basin'] = storm_data['BASIN'].iloc[0] elif 'SID' in storm_data.columns: @@ -1053,17 +873,13 @@ def extract_storm_features(typhoon_data): logging.error("No valid storm features could be extracted") return None - # Convert to DataFrame storm_features = pd.DataFrame(basic_features) - # Ensure all numeric columns are properly typed numeric_columns = [col for col in storm_features.columns if col not in ['SID', 'basin']] for col in numeric_columns: storm_features[col] = pd.to_numeric(storm_features[col], errors='coerce').fillna(0) logging.info(f"Successfully extracted features for {len(storm_features)} storms") - logging.info(f"Feature columns: {list(storm_features.columns)}") - return storm_features except Exception as e: @@ -1073,38 +889,30 @@ def extract_storm_features(typhoon_data): return None def perform_dimensionality_reduction(storm_features, method='umap', n_components=2): - """Perform UMAP or t-SNE dimensionality reduction - FIXED VERSION""" + """Perform UMAP or t-SNE dimensionality reduction""" try: if storm_features is None or storm_features.empty: raise ValueError("No storm features provided") - # Select numeric features for clustering - FIXED feature_cols = [] for col in storm_features.columns: if col not in ['SID', 'basin'] and storm_features[col].dtype in ['float64', 'int64']: - # Check if column has valid data valid_data = storm_features[col].dropna() - if len(valid_data) > 0 and valid_data.std() > 0: # Only include columns with variance + if len(valid_data) > 0 and valid_data.std() > 0: feature_cols.append(col) if len(feature_cols) == 0: raise ValueError("No valid numeric features found for clustering") - logging.info(f"Using {len(feature_cols)} features for clustering: {feature_cols}") - X = storm_features[feature_cols].fillna(0) - # Check if we have enough samples if len(X) < 2: raise ValueError("Need at least 2 storms for clustering") - # Standardize features scaler = StandardScaler() X_scaled = scaler.fit_transform(X) - # Perform dimensionality reduction if method.lower() == 'umap' and UMAP_AVAILABLE and len(X_scaled) >= 4: - # UMAP parameters optimized for typhoon data - fixed warnings n_neighbors = min(15, len(X_scaled) - 1) reducer = umap.UMAP( n_components=n_components, @@ -1112,12 +920,11 @@ def perform_dimensionality_reduction(storm_features, method='umap', n_components min_dist=0.1, metric='euclidean', random_state=42, - n_jobs=1 # Explicitly set to avoid warning + n_jobs=1 ) elif method.lower() == 'tsne' and len(X_scaled) >= 4: - # t-SNE parameters perplexity = min(30, len(X_scaled) // 4) - perplexity = max(1, perplexity) # Ensure perplexity is at least 1 + perplexity = max(1, perplexity) reducer = TSNE( n_components=n_components, perplexity=perplexity, @@ -1126,14 +933,11 @@ def perform_dimensionality_reduction(storm_features, method='umap', n_components random_state=42 ) else: - # Fallback to PCA reducer = PCA(n_components=n_components, random_state=42) - # Fit and transform embedding = reducer.fit_transform(X_scaled) logging.info(f"Dimensionality reduction successful: {X_scaled.shape} -> {embedding.shape}") - return embedding, feature_cols, scaler except Exception as e: @@ -1141,17 +945,15 @@ def perform_dimensionality_reduction(storm_features, method='umap', n_components raise def cluster_storms_data(embedding, method='dbscan', eps=0.5, min_samples=3): - """Cluster storms based on their embedding - FIXED NAME VERSION""" + """Cluster storms based on their embedding""" try: if len(embedding) < 2: - return np.array([0] * len(embedding)) # Single cluster for insufficient data + return np.array([0] * len(embedding)) if method.lower() == 'dbscan': - # Adjust min_samples based on data size min_samples = min(min_samples, max(2, len(embedding) // 5)) clusterer = DBSCAN(eps=eps, min_samples=min_samples) elif method.lower() == 'kmeans': - # Adjust n_clusters based on data size n_clusters = min(5, max(2, len(embedding) // 3)) clusterer = KMeans(n_clusters=n_clusters, random_state=42) else: @@ -1160,18 +962,15 @@ def cluster_storms_data(embedding, method='dbscan', eps=0.5, min_samples=3): clusters = clusterer.fit_predict(embedding) logging.info(f"Clustering complete: {len(np.unique(clusters))} clusters found") - return clusters except Exception as e: logging.error(f"Error in cluster_storms_data: {e}") - # Return single cluster as fallback return np.array([0] * len(embedding)) def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'): - """Create separate plots for clustering analysis - ENHANCED CLARITY VERSION""" + """Create separate plots for clustering analysis""" try: - # Validate inputs if storm_features is None or storm_features.empty: raise ValueError("No storm features available for clustering") @@ -1180,23 +979,17 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' logging.info(f"Starting clustering visualization with {len(storm_features)} storms") - # Perform dimensionality reduction embedding, feature_cols, scaler = perform_dimensionality_reduction(storm_features, method) - - # Perform clustering cluster_labels = cluster_storms_data(embedding, 'dbscan') - # Add clustering results to storm features storm_features_viz = storm_features.copy() storm_features_viz['cluster'] = cluster_labels storm_features_viz['dim1'] = embedding[:, 0] storm_features_viz['dim2'] = embedding[:, 1] - # Merge with typhoon data for additional info - SAFE MERGE try: storm_info = typhoon_data.groupby('SID').first()[['NAME', 'SEASON']].reset_index() storm_features_viz = storm_features_viz.merge(storm_info, on='SID', how='left') - # Fill missing values storm_features_viz['NAME'] = storm_features_viz['NAME'].fillna('UNNAMED') storm_features_viz['SEASON'] = storm_features_viz['SEASON'].fillna(2000) except Exception as merge_error: @@ -1204,14 +997,12 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' storm_features_viz['NAME'] = 'UNNAMED' storm_features_viz['SEASON'] = 2000 - # Get unique clusters and assign distinct colors unique_clusters = sorted([c for c in storm_features_viz['cluster'].unique() if c != -1]) noise_count = len(storm_features_viz[storm_features_viz['cluster'] == -1]) - # 1. Enhanced clustering scatter plot with clear cluster identification + # 1. Clustering scatter plot fig_cluster = go.Figure() - # Add noise points first if noise_count > 0: noise_data = storm_features_viz[storm_features_viz['cluster'] == -1] fig_cluster.add_trace( @@ -1236,7 +1027,6 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' ) ) - # Add clusters with distinct colors and shapes cluster_symbols = ['circle', 'square', 'diamond', 'triangle-up', 'triangle-down', 'pentagon', 'hexagon', 'star', 'cross', 'circle-open'] @@ -1277,17 +1067,15 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' showlegend=True ) - # 2. ENHANCED route map with cluster legends and clearer representation + # 2. Route map fig_routes = go.Figure() - # Create a comprehensive legend showing cluster characteristics cluster_info_text = [] for i, cluster in enumerate(unique_clusters): cluster_storm_ids = storm_features_viz[storm_features_viz['cluster'] == cluster]['SID'].tolist() color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)] - # Get cluster statistics for legend cluster_data = storm_features_viz[storm_features_viz['cluster'] == cluster] avg_intensity = cluster_data['USA_WIND_max'].mean() if 'USA_WIND_max' in cluster_data.columns else 0 avg_pressure = cluster_data['USA_PRES_min'].mean() if 'USA_PRES_min' in cluster_data.columns else 1000 @@ -1297,13 +1085,11 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' f"Avg: {avg_intensity:.0f}kt/{avg_pressure:.0f}hPa" ) - # Add multiple storms per cluster with clear identification storms_added = 0 - for j, sid in enumerate(cluster_storm_ids[:8]): # Show up to 8 storms per cluster + for j, sid in enumerate(cluster_storm_ids[:8]): try: storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME') if len(storm_track) > 1: - # Ensure valid coordinates valid_coords = storm_track['LAT'].notna() & storm_track['LON'].notna() storm_track = storm_track[valid_coords] @@ -1311,10 +1097,9 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED' storm_season = storm_track['SEASON'].iloc[0] if 'SEASON' in storm_track.columns else 'Unknown' - # Vary line style for different storms in same cluster line_styles = ['solid', 'dash', 'dot', 'dashdot'] line_style = line_styles[j % len(line_styles)] - line_width = 3 if j == 0 else 2 # First storm thicker + line_width = 3 if j == 0 else 2 fig_routes.add_trace( go.Scattergeo( @@ -1341,9 +1126,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' logging.warning(f"Error adding track for storm {sid}: {track_error}") continue - # Add cluster centroid marker if len(cluster_storm_ids) > 0: - # Calculate average genesis location for cluster cluster_storm_data = storm_features_viz[storm_features_viz['cluster'] == cluster] if 'genesis_lat' in cluster_storm_data.columns and 'genesis_lon' in cluster_storm_data.columns: avg_lat = cluster_storm_data['genesis_lat'].mean() @@ -1373,7 +1156,6 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' ) ) - # Update route map layout with enhanced information and LARGER SIZE fig_routes.update_layout( title=f"Storm Routes by {method.upper()} Clusters
Different line styles = different storms in same cluster | Stars = cluster centers", geo=dict( @@ -1385,14 +1167,13 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' showcoastlines=True, coastlinecolor="Gray", center=dict(lat=20, lon=140), - projection_scale=2.5 # Larger map + projection_scale=2.5 ), - height=800, # Much larger height - width=1200, # Wider map + height=800, + width=1200, showlegend=True ) - # Add cluster info annotation cluster_summary = "
".join(cluster_info_text) fig_routes.add_annotation( text=f"Cluster Summary:
{cluster_summary}", @@ -1405,7 +1186,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' borderwidth=1 ) - # 3. Enhanced pressure evolution plot with cluster identification + # 3. Pressure evolution plot fig_pressure = go.Figure() for i, cluster in enumerate(unique_clusters): @@ -1413,16 +1194,13 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)] cluster_pressures = [] - for j, sid in enumerate(cluster_storm_ids[:5]): # Limit to 5 storms per cluster + for j, sid in enumerate(cluster_storm_ids[:5]): try: storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME') if len(storm_track) > 1 and 'USA_PRES' in storm_track.columns: pressure_values = pd.to_numeric(storm_track['USA_PRES'], errors='coerce').dropna() if len(pressure_values) > 0: storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED' - time_hours = range(len(pressure_values)) - - # Normalize time to show relative progression normalized_time = np.linspace(0, 100, len(pressure_values)) fig_pressure.add_trace( @@ -1447,7 +1225,6 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' except Exception as e: continue - # Add cluster average line if cluster_pressures: avg_pressure = np.mean(cluster_pressures) fig_pressure.add_hline( @@ -1465,7 +1242,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' height=500 ) - # 4. Enhanced wind evolution plot + # 4. Wind evolution plot fig_wind = go.Figure() for i, cluster in enumerate(unique_clusters): @@ -1473,15 +1250,13 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)] cluster_winds = [] - for j, sid in enumerate(cluster_storm_ids[:5]): # Limit to 5 storms per cluster + for j, sid in enumerate(cluster_storm_ids[:5]): try: storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME') if len(storm_track) > 1 and 'USA_WIND' in storm_track.columns: wind_values = pd.to_numeric(storm_track['USA_WIND'], errors='coerce').dropna() if len(wind_values) > 0: storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED' - - # Normalize time to show relative progression normalized_time = np.linspace(0, 100, len(wind_values)) fig_wind.add_trace( @@ -1506,7 +1281,6 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' except Exception as e: continue - # Add cluster average line if cluster_winds: avg_wind = np.mean(cluster_winds) fig_wind.add_hline( @@ -1524,7 +1298,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' height=500 ) - # Generate enhanced cluster statistics with clear explanations + # Generate statistics try: stats_text = f"ENHANCED {method.upper()} CLUSTER ANALYSIS RESULTS\n" + "="*60 + "\n\n" stats_text += f"🔍 DIMENSIONALITY REDUCTION: {method.upper()}\n" @@ -1548,7 +1322,6 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' stats_text += f"🎯 CLUSTER {cluster}: {storm_count} storms\n" stats_text += f" 🎨 Color: {CLUSTER_COLORS[cluster % len(CLUSTER_COLORS)]}\n" - # Add detailed statistics if available if 'USA_WIND_max' in cluster_data.columns: wind_mean = cluster_data['USA_WIND_max'].mean() wind_std = cluster_data['USA_WIND_max'].std() @@ -1568,7 +1341,6 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' lon_mean = cluster_data['genesis_lon'].mean() stats_text += f" 🎯 Genesis Region: {lat_mean:.1f}°N, {lon_mean:.1f}°E\n" - # Add interpretation if wind_mean < 50: stats_text += " 💡 Pattern: Weaker storm group\n" elif wind_mean > 100: @@ -1578,7 +1350,6 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' stats_text += "\n" - # Add explanation of the analysis stats_text += "📖 INTERPRETATION GUIDE:\n" stats_text += f"• {method.upper()} reduces storm characteristics to 2D for visualization\n" stats_text += "• DBSCAN finds natural groupings without preset number of clusters\n" @@ -1611,113 +1382,9 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap' return error_fig, error_fig, error_fig, error_fig, f"Error in clustering: {str(e)}" # ----------------------------- -# ENHANCED: Advanced Prediction System with Route Forecasting +# FIXED: Prediction System # ----------------------------- -def create_advanced_prediction_model(typhoon_data): - """Create advanced ML model for intensity and route prediction""" - try: - if typhoon_data is None or typhoon_data.empty: - return None, "No data available for model training" - - # Prepare training data - features = [] - targets = [] - - for sid in typhoon_data['SID'].unique(): - storm_data = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME') - - if len(storm_data) < 3: # Need at least 3 points for prediction - continue - - for i in range(len(storm_data) - 1): - current = storm_data.iloc[i] - next_point = storm_data.iloc[i + 1] - - # Extract features (current state) - feature_row = [] - - # Current position - feature_row.extend([ - current.get('LAT', 20), - current.get('LON', 140) - ]) - - # Current intensity - feature_row.extend([ - current.get('USA_WIND', 30), - current.get('USA_PRES', 1000) - ]) - - # Time features - if 'ISO_TIME' in current and pd.notna(current['ISO_TIME']): - month = current['ISO_TIME'].month - day_of_year = current['ISO_TIME'].dayofyear - else: - month = 9 # Peak season default - day_of_year = 250 - - feature_row.extend([month, day_of_year]) - - # Motion features (if previous point exists) - if i > 0: - prev = storm_data.iloc[i - 1] - dlat = current.get('LAT', 20) - prev.get('LAT', 20) - dlon = current.get('LON', 140) - prev.get('LON', 140) - speed = np.sqrt(dlat**2 + dlon**2) - bearing = np.arctan2(dlat, dlon) - else: - speed = 0 - bearing = 0 - - feature_row.extend([speed, bearing]) - - features.append(feature_row) - - # Target: next position and intensity - targets.append([ - next_point.get('LAT', 20), - next_point.get('LON', 140), - next_point.get('USA_WIND', 30) - ]) - - if len(features) < 10: # Need sufficient training data - return None, "Insufficient data for model training" - - # Train model - X = np.array(features) - y = np.array(targets) - - # Split data - X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42) - - # Create separate models for position and intensity - models = {} - - # Position model (lat, lon) - pos_model = RandomForestRegressor(n_estimators=100, random_state=42) - pos_model.fit(X_train, y_train[:, :2]) - models['position'] = pos_model - - # Intensity model (wind speed) - int_model = RandomForestRegressor(n_estimators=100, random_state=42) - int_model.fit(X_train, y_train[:, 2]) - models['intensity'] = int_model - - # Calculate model performance - pos_pred = pos_model.predict(X_test) - int_pred = int_model.predict(X_test) - - pos_mae = mean_absolute_error(y_test[:, :2], pos_pred) - int_mae = mean_absolute_error(y_test[:, 2], int_pred) - - model_info = f"Position MAE: {pos_mae:.2f}°, Intensity MAE: {int_mae:.2f} kt" - - return models, model_info - - except Exception as e: - return None, f"Error creating prediction model: {str(e)}" - def get_realistic_genesis_locations(): """Get realistic typhoon genesis regions based on climatology""" return { @@ -1739,7 +1406,7 @@ def predict_storm_route_and_intensity_realistic(genesis_region, month, oni_value genesis_locations = get_realistic_genesis_locations() if genesis_region not in genesis_locations: - genesis_region = "Western Pacific Main Development Region" # Default + genesis_region = "Western Pacific Main Development Region" genesis_info = genesis_locations[genesis_region] lat = genesis_info["lat"] @@ -1753,29 +1420,27 @@ def predict_storm_route_and_intensity_realistic(genesis_region, month, oni_value 'genesis_info': genesis_info } - # REALISTIC starting intensity - Tropical Depression level - base_intensity = 30 # Start at TD level (25-35 kt) + # Realistic starting intensity + base_intensity = 30 - # Environmental factors for genesis - if oni_value > 1.0: # Strong El Niño - suppressed development + # Environmental factors + if oni_value > 1.0: intensity_modifier = -6 - elif oni_value > 0.5: # Moderate El Niño + elif oni_value > 0.5: intensity_modifier = -3 - elif oni_value < -1.0: # Strong La Niña - enhanced development + elif oni_value < -1.0: intensity_modifier = +8 - elif oni_value < -0.5: # Moderate La Niña + elif oni_value < -0.5: intensity_modifier = +5 - else: # Neutral + else: intensity_modifier = oni_value * 2 - # Seasonal genesis effects seasonal_factors = { 1: -8, 2: -6, 3: -4, 4: -2, 5: 2, 6: 6, 7: 10, 8: 12, 9: 15, 10: 10, 11: 4, 12: -5 } seasonal_modifier = seasonal_factors.get(month, 0) - # Genesis region favorability region_factors = { "Western Pacific Main Development Region": 8, "South China Sea": 4, @@ -1790,160 +1455,137 @@ def predict_storm_route_and_intensity_realistic(genesis_region, month, oni_value } region_modifier = region_factors.get(genesis_region, 0) - # Calculate realistic starting intensity (TD level) predicted_intensity = base_intensity + intensity_modifier + seasonal_modifier + region_modifier - predicted_intensity = max(25, min(40, predicted_intensity)) # Keep in TD-weak TS range + predicted_intensity = max(25, min(40, predicted_intensity)) - # Add realistic uncertainty for genesis intensity_uncertainty = np.random.normal(0, 2) predicted_intensity += intensity_uncertainty - predicted_intensity = max(25, min(38, predicted_intensity)) # TD range + predicted_intensity = max(25, min(38, predicted_intensity)) results['current_prediction'] = { 'intensity_kt': predicted_intensity, - 'pressure_hpa': 1008 - (predicted_intensity - 25) * 0.6, # Realistic TD pressure + 'pressure_hpa': 1008 - (predicted_intensity - 25) * 0.6, 'category': categorize_typhoon_enhanced(predicted_intensity), 'genesis_region': genesis_region } - # REALISTIC route prediction with proper typhoon speeds + # Route prediction current_lat = lat current_lon = lon current_intensity = predicted_intensity route_points = [] - # Track storm development over time with REALISTIC SPEEDS for hour in range(0, forecast_hours + 6, 6): + # Realistic motion + if current_lat < 20: + base_speed = 0.12 + elif current_lat < 30: + base_speed = 0.18 + else: + base_speed = 0.25 - # REALISTIC typhoon motion - much faster speeds - # Typical typhoon forward speed: 15-25 km/h (0.14-0.23°/hour) - - # Base forward speed depends on latitude and storm intensity - if current_lat < 20: # Low latitude - slower - base_speed = 0.12 # ~13 km/h - elif current_lat < 30: # Mid latitude - moderate - base_speed = 0.18 # ~20 km/h - else: # High latitude - faster - base_speed = 0.25 # ~28 km/h - - # Intensity affects speed (stronger storms can move faster) intensity_speed_factor = 1.0 + (current_intensity - 50) / 200 base_speed *= max(0.8, min(1.4, intensity_speed_factor)) - # Beta drift (Coriolis effect) - realistic values beta_drift_lat = 0.02 * np.sin(np.radians(current_lat)) beta_drift_lon = -0.05 * np.cos(np.radians(current_lat)) - # Seasonal steering patterns with realistic speeds - if month in [6, 7, 8, 9]: # Peak season + if month in [6, 7, 8, 9]: ridge_strength = 1.2 ridge_position = 32 + 4 * np.sin(2 * np.pi * (month - 6) / 4) - else: # Off season + else: ridge_strength = 0.9 ridge_position = 28 - # REALISTIC motion based on position relative to subtropical ridge - if current_lat < ridge_position - 10: # Well south of ridge - westward movement - lat_tendency = base_speed * 0.3 + beta_drift_lat # Slight poleward - lon_tendency = -base_speed * 0.9 + beta_drift_lon # Strong westward - elif current_lat > ridge_position - 3: # Near ridge - recurvature - lat_tendency = base_speed * 0.8 + beta_drift_lat # Strong poleward - lon_tendency = base_speed * 0.4 + beta_drift_lon # Eastward - else: # In between - normal WNW motion - lat_tendency = base_speed * 0.4 + beta_drift_lat # Moderate poleward - lon_tendency = -base_speed * 0.7 + beta_drift_lon # Moderate westward + if current_lat < ridge_position - 10: + lat_tendency = base_speed * 0.3 + beta_drift_lat + lon_tendency = -base_speed * 0.9 + beta_drift_lon + elif current_lat > ridge_position - 3: + lat_tendency = base_speed * 0.8 + beta_drift_lat + lon_tendency = base_speed * 0.4 + beta_drift_lon + else: + lat_tendency = base_speed * 0.4 + beta_drift_lat + lon_tendency = -base_speed * 0.7 + beta_drift_lon - # ENSO steering modulation (realistic effects) - if oni_value > 0.5: # El Niño - more eastward/poleward motion + if oni_value > 0.5: lon_tendency += 0.05 lat_tendency += 0.02 - elif oni_value < -0.5: # La Niña - more westward motion + elif oni_value < -0.5: lon_tendency -= 0.08 lat_tendency -= 0.01 - # Add motion uncertainty that grows with time (realistic error growth) motion_uncertainty = 0.02 + (hour / 120) * 0.04 lat_noise = np.random.normal(0, motion_uncertainty) lon_noise = np.random.normal(0, motion_uncertainty) - # Update position with realistic speeds current_lat += lat_tendency + lat_noise current_lon += lon_tendency + lon_noise - # REALISTIC intensity evolution with proper development cycles - - # Development phase (first 48-72 hours) - realistic intensification + # Intensity evolution if hour <= 48: - if current_intensity < 50: # Still weak - rapid development possible - if 10 <= current_lat <= 25 and 115 <= current_lon <= 165: # Favorable environment + if current_intensity < 50: + if 10 <= current_lat <= 25 and 115 <= current_lon <= 165: intensity_tendency = 4.5 if current_intensity < 35 else 3.0 - elif 120 <= current_lon <= 155 and 15 <= current_lat <= 20: # Best environment + elif 120 <= current_lon <= 155 and 15 <= current_lat <= 20: intensity_tendency = 6.0 if current_intensity < 40 else 4.0 else: intensity_tendency = 2.0 - elif current_intensity < 80: # Moderate intensity + elif current_intensity < 80: intensity_tendency = 2.5 if (120 <= current_lon <= 155 and 10 <= current_lat <= 25) else 1.0 - else: # Already strong + else: intensity_tendency = 1.0 - # Mature phase (48-120 hours) - peak intensity maintenance elif hour <= 120: - if current_lat < 25 and current_lon > 120: # Still in favorable waters + if current_lat < 25 and current_lon > 120: if current_intensity < 120: intensity_tendency = 1.5 else: - intensity_tendency = 0.0 # Maintain intensity + intensity_tendency = 0.0 else: intensity_tendency = -1.5 - # Extended phase (120+ hours) - gradual weakening else: if current_lat < 30 and current_lon > 115: - intensity_tendency = -2.0 # Slow weakening + intensity_tendency = -2.0 else: - intensity_tendency = -3.5 # Faster weakening + intensity_tendency = -3.5 - # Environmental modulation (realistic effects) - if current_lat > 35: # High latitude - rapid weakening + # Environmental modulation + if current_lat > 35: intensity_tendency -= 12 - elif current_lat > 30: # Moderate latitude + elif current_lat > 30: intensity_tendency -= 5 - elif current_lon < 110: # Land interaction + elif current_lon < 110: intensity_tendency -= 15 - elif 125 <= current_lon <= 155 and 10 <= current_lat <= 25: # Warm pool + elif 125 <= current_lon <= 155 and 10 <= current_lat <= 25: intensity_tendency += 2 - elif 160 <= current_lon <= 180 and 15 <= current_lat <= 30: # Still warm + elif 160 <= current_lon <= 180 and 15 <= current_lat <= 30: intensity_tendency += 1 - # SST effects (realistic temperature impact) - if current_lat < 8: # Very warm but weak Coriolis + if current_lat < 8: intensity_tendency += 0.5 - elif 8 <= current_lat <= 20: # Sweet spot for development + elif 8 <= current_lat <= 20: intensity_tendency += 2.0 - elif 20 < current_lat <= 30: # Marginal + elif 20 < current_lat <= 30: intensity_tendency -= 1.0 - elif current_lat > 30: # Cool waters + elif current_lat > 30: intensity_tendency -= 4.0 - # Shear effects (simplified but realistic) - if month in [12, 1, 2, 3]: # High shear season + if month in [12, 1, 2, 3]: intensity_tendency -= 2.0 - elif month in [7, 8, 9]: # Low shear season + elif month in [7, 8, 9]: intensity_tendency += 1.0 - # Update intensity with realistic bounds and variability - intensity_noise = np.random.normal(0, 1.5) # Small random fluctuations + intensity_noise = np.random.normal(0, 1.5) current_intensity += intensity_tendency + intensity_noise - current_intensity = max(20, min(185, current_intensity)) # Realistic range + current_intensity = max(20, min(185, current_intensity)) - # Calculate confidence based on forecast time and environment base_confidence = 0.92 time_penalty = (hour / 120) * 0.45 environment_penalty = 0.15 if current_lat > 30 or current_lon < 115 else 0 confidence = max(0.25, base_confidence - time_penalty - environment_penalty) - # Determine development stage if hour <= 24: stage = 'Genesis' elif hour <= 72: @@ -1963,13 +1605,12 @@ def predict_storm_route_and_intensity_realistic(genesis_region, month, oni_value 'category': categorize_typhoon_enhanced(current_intensity), 'confidence': confidence, 'development_stage': stage, - 'forward_speed_kmh': base_speed * 111, # Convert to km/h + 'forward_speed_kmh': base_speed * 111, 'pressure_hpa': max(900, 1013 - (current_intensity - 25) * 0.9) }) results['route_forecast'] = route_points - # Realistic confidence scores results['confidence_scores'] = { 'genesis': 0.88, 'early_development': 0.82, @@ -1982,7 +1623,6 @@ def predict_storm_route_and_intensity_realistic(genesis_region, month, oni_value 'long_term': max(0.3, 0.8 - (forecast_hours / 240) * 0.5) } - # Model information results['model_info'] = f"Enhanced Realistic Model - {genesis_region}" return results @@ -2005,7 +1645,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru route_data = prediction_results['route_forecast'] - # Extract data for plotting hours = [point['hour'] for point in route_data] lats = [point['lat'] for point in route_data] lons = [point['lon'] for point in route_data] @@ -2016,7 +1655,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru speeds = [point.get('forward_speed_kmh', 15) for point in route_data] pressures = [point.get('pressure_hpa', 1013) for point in route_data] - # Create subplot layout with map and intensity plot fig = make_subplots( rows=2, cols=2, subplot_titles=('Storm Track Animation', 'Wind Speed vs Time', 'Forward Speed vs Time', 'Pressure vs Time'), @@ -2027,11 +1665,8 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru ) if enable_animation: - # Add frames for animation frames = [] - # Static background elements first - # Add complete track as background fig.add_trace( go.Scattergeo( lon=lons, @@ -2045,7 +1680,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru row=1, col=1 ) - # Genesis marker (always visible) fig.add_trace( go.Scattergeo( lon=[lons[0]], @@ -2070,7 +1704,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru row=1, col=1 ) - # Create animation frames for i in range(len(route_data)): frame_lons = lons[:i+1] frame_lats = lats[:i+1] @@ -2078,12 +1711,10 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru frame_categories = categories[:i+1] frame_hours = hours[:i+1] - # Current position marker current_color = enhanced_color_map.get(frame_categories[-1], 'rgb(128,128,128)') current_size = 15 + (frame_intensities[-1] / 10) frame_data = [ - # Animated track up to current point go.Scattergeo( lon=frame_lons, lat=frame_lats, @@ -2098,7 +1729,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru name='Current Track', showlegend=False ), - # Current position highlight go.Scattergeo( lon=[frame_lons[-1]], lat=[frame_lats[-1]], @@ -2122,7 +1752,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru "" ) ), - # Animated wind plot go.Scatter( x=frame_hours, y=frame_intensities, @@ -2133,7 +1762,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru showlegend=False, yaxis='y2' ), - # Animated speed plot go.Scatter( x=frame_hours, y=speeds[:i+1], @@ -2144,7 +1772,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru showlegend=False, yaxis='y3' ), - # Animated pressure plot go.Scatter( x=frame_hours, y=pressures[:i+1], @@ -2168,7 +1795,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru fig.frames = frames - # Add play/pause controls fig.update_layout( updatemenus=[ { @@ -2224,14 +1850,13 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru "label": f"H{route_data[i]['hour']}", "method": "animate" } - for i in range(0, len(route_data), max(1, len(route_data)//20)) # Limit slider steps + for i in range(0, len(route_data), max(1, len(route_data)//20)) ] }] ) else: - # Static view with all points - # Add genesis marker + # Static view fig.add_trace( go.Scattergeo( lon=[lons[0]], @@ -2255,8 +1880,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru row=1, col=1 ) - # Add full track with intensity coloring - for i in range(0, len(route_data), max(1, len(route_data)//50)): # Sample points for performance + for i in range(0, len(route_data), max(1, len(route_data)//50)): point = route_data[i] color = enhanced_color_map.get(point['category'], 'rgb(128,128,128)') size = 8 + (point['intensity_kt'] / 12) @@ -2287,7 +1911,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru row=1, col=1 ) - # Connect points with track line fig.add_trace( go.Scattergeo( lon=lons, @@ -2301,7 +1924,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru ) # Add static intensity, speed, and pressure plots - # Wind speed plot fig.add_trace( go.Scatter( x=hours, @@ -2355,7 +1977,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru uncertainty_lons_lower = [] for i, point in enumerate(route_data): - # Uncertainty grows with time and decreases with confidence base_uncertainty = 0.4 + (i / len(route_data)) * 1.8 confidence_factor = point.get('confidence', 0.8) uncertainty = base_uncertainty / confidence_factor @@ -2385,8 +2006,8 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru # Enhanced layout fig.update_layout( title=f"Comprehensive Storm Development Analysis
Starting from {prediction_results['genesis_info']['description']}", - height=1000, # Taller for better subplot visibility - width=1400, # Wider + height=1000, + width=1400, showlegend=True ) @@ -2416,7 +2037,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru current = prediction_results['current_prediction'] genesis_info = prediction_results['genesis_info'] - # Calculate some statistics max_intensity = max(intensities) max_intensity_time = hours[intensities.index(max_intensity)] avg_speed = np.mean(speeds) @@ -2478,21 +2098,14 @@ MODEL: {prediction_results['model_info']} return None, error_msg # ----------------------------- -# Regression Functions (Original) +# Regression Functions # ----------------------------- def perform_wind_regression(start_year, start_month, end_year, end_month): """Perform wind regression analysis""" - if merged_data is None or merged_data.empty: - return "Wind Regression: No merged data available" - start_date = datetime(start_year, start_month, 1) end_date = datetime(end_year, end_month, 28) data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['USA_WIND','ONI']) - - if len(data) < 10: - return f"Wind Regression: Insufficient data ({len(data)} records)" - data['severe_typhoon'] = (data['USA_WIND']>=64).astype(int) X = sm.add_constant(data['ONI']) y = data['severe_typhoon'] @@ -2507,16 +2120,9 @@ def perform_wind_regression(start_year, start_month, end_year, end_month): def perform_pressure_regression(start_year, start_month, end_year, end_month): """Perform pressure regression analysis""" - if merged_data is None or merged_data.empty: - return "Pressure Regression: No merged data available" - start_date = datetime(start_year, start_month, 1) end_date = datetime(end_year, end_month, 28) data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['USA_PRES','ONI']) - - if len(data) < 10: - return f"Pressure Regression: Insufficient data ({len(data)} records)" - data['intense_typhoon'] = (data['USA_PRES']<=950).astype(int) X = sm.add_constant(data['ONI']) y = data['intense_typhoon'] @@ -2531,16 +2137,9 @@ def perform_pressure_regression(start_year, start_month, end_year, end_month): def perform_longitude_regression(start_year, start_month, end_year, end_month): """Perform longitude regression analysis""" - if merged_data is None or merged_data.empty: - return "Longitude Regression: No merged data available" - start_date = datetime(start_year, start_month, 1) end_date = datetime(end_year, end_month, 28) data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['LON','ONI']) - - if len(data) < 10: - return f"Longitude Regression: Insufficient data ({len(data)} records)" - data['western_typhoon'] = (data['LON']<=140).astype(int) X = sm.add_constant(data['ONI']) y = data['western_typhoon'] @@ -2554,88 +2153,33 @@ def perform_longitude_regression(start_year, start_month, end_year, end_month): return f"Longitude Regression Error: {e}" # ----------------------------- -# FIXED: Visualization Functions - WORK WITH ALL DATA +# FIXED: Visualization Functions # ----------------------------- -def get_available_years(typhoon_data): - """Get all available years - EXTENDED RANGE""" - try: - if typhoon_data is None or typhoon_data.empty: - return [str(year) for year in range(1851, 2026)] # Full historical range - - if 'ISO_TIME' in typhoon_data.columns: - years = typhoon_data['ISO_TIME'].dt.year.dropna().unique() - elif 'SEASON' in typhoon_data.columns: - years = typhoon_data['SEASON'].dropna().unique() - else: - years = range(1851, 2026) # Full historical range - - # Convert to strings and sort - year_strings = sorted([str(int(year)) for year in years if not pd.isna(year)]) - - # Ensure we have at least some years - if not year_strings: - return [str(year) for year in range(1851, 2026)] - - return year_strings - - except Exception as e: - print(f"Error in get_available_years: {e}") - return [str(year) for year in range(1851, 2026)] - def get_full_tracks(start_year, start_month, end_year, end_month, enso_phase, typhoon_search): - """FIXED: Get full typhoon tracks - WORKS WITHOUT ONI""" + """Get full typhoon tracks""" start_date = datetime(start_year, start_month, 1) end_date = datetime(end_year, end_month, 28) - - # Filter merged data by date - if merged_data is not None and not merged_data.empty: - filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy() - - # Add ENSO phase classification - handle missing ONI - filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases) - - if enso_phase != 'all': - filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()] - - unique_storms = filtered_data['SID'].unique() - else: - # Work directly with typhoon_data if merged_data not available - if 'ISO_TIME' in typhoon_data.columns: - time_filter = (typhoon_data['ISO_TIME'] >= start_date) & (typhoon_data['ISO_TIME'] <= end_date) - filtered_typhoons = typhoon_data[time_filter]['SID'].unique() - else: - # Fallback - use all available storms - filtered_typhoons = typhoon_data['SID'].unique() - unique_storms = filtered_typhoons - filtered_data = pd.DataFrame({'SID': unique_storms, 'ONI': 0.0}) # Dummy for compatibility - + filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy() + filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases) + if enso_phase != 'all': + filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()] + unique_storms = filtered_data['SID'].unique() count = len(unique_storms) fig = go.Figure() - for sid in unique_storms: storm_data = typhoon_data[typhoon_data['SID']==sid] if storm_data.empty: continue - name = storm_data['NAME'].iloc[0] if pd.notnull(storm_data['NAME'].iloc[0]) else "Unnamed" - basin = storm_data['SID'].iloc[0][:2] if 'SID' in storm_data.columns else "Unknown" - - # Get ONI value if available - if not filtered_data.empty and sid in filtered_data['SID'].values: - storm_oni = filtered_data[filtered_data['SID']==sid]['ONI'].iloc[0] - else: - storm_oni = 0.0 # Default neutral - + basin = storm_data['SID'].iloc[0][:2] + storm_oni = filtered_data[filtered_data['SID']==sid]['ONI'].iloc[0] color = 'red' if storm_oni>=0.5 else ('blue' if storm_oni<=-0.5 else 'green') - fig.add_trace(go.Scattergeo( lon=storm_data['LON'], lat=storm_data['LAT'], mode='lines', name=f"{name} ({basin})", line=dict(width=1.5, color=color), hoverinfo="name" )) - - # Handle typhoon search if typhoon_search: search_mask = typhoon_data['NAME'].str.contains(typhoon_search, case=False, na=False) if search_mask.any(): @@ -2647,9 +2191,8 @@ def get_full_tracks(start_year, start_month, end_year, end_month, enso_phase, ty line=dict(width=3, color='yellow'), marker=dict(size=5), hoverinfo="name" )) - fig.update_layout( - title=f"Typhoon Tracks ({start_year}-{start_month:02d} to {end_year}-{end_month:02d}) - All Available Data", + title=f"Typhoon Tracks ({start_year}-{start_month} to {end_year}-{end_month})", geo=dict( projection_type='natural earth', showland=True, @@ -2664,48 +2207,26 @@ def get_full_tracks(start_year, start_month, end_year, end_month, enso_phase, ty showlegend=True, height=700 ) - fig.add_annotation( x=0.02, y=0.98, xref="paper", yref="paper", - text="Red: El Niño, Blue: La Nina, Green: Neutral/Unknown ONI", + text="Red: El Niño, Blue: La Nina, Green: Neutral", showarrow=False, align="left", bgcolor="rgba(255,255,255,0.8)" ) - - return fig, f"Total typhoons displayed: {count} (includes all available data)" + return fig, f"Total typhoons displayed: {count}" def get_wind_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search): - """FIXED: Wind analysis that works with all data""" + """Get wind analysis with enhanced categorization""" start_date = datetime(start_year, start_month, 1) end_date = datetime(end_year, end_month, 28) - - if merged_data is not None and not merged_data.empty: - filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy() - filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases) - - if enso_phase != 'all': - filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()] - else: - # Create filtered data from typhoon_data - if 'ISO_TIME' in typhoon_data.columns: - time_filter = (typhoon_data['ISO_TIME'] >= start_date) & (typhoon_data['ISO_TIME'] <= end_date) - temp_data = typhoon_data[time_filter].groupby('SID').agg({ - 'USA_WIND': 'max', 'NAME': 'first', 'SEASON': 'first', 'ISO_TIME': 'first' - }).reset_index() - temp_data['ONI'] = 0.0 # Default neutral - temp_data['Category'] = temp_data['USA_WIND'].apply(categorize_typhoon_enhanced) - temp_data['Year'] = temp_data['ISO_TIME'].dt.year - temp_data['ENSO_Phase'] = 'Neutral' - filtered_data = temp_data - else: - return go.Figure(), "No time data available for analysis" - - if filtered_data.empty: - return go.Figure(), "No data available for selected time period" + filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy() + filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases) + if enso_phase != 'all': + filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()] fig = px.scatter(filtered_data, x='ONI', y='USA_WIND', color='Category', hover_data=['NAME','Year','Category'], - title='Wind Speed vs ONI (All Available Data)', + title='Wind Speed vs ONI', labels={'ONI':'ONI Value','USA_WIND':'Max Wind Speed (knots)'}, color_discrete_map=enhanced_color_map) @@ -2719,49 +2240,21 @@ def get_wind_analysis(start_year, start_month, end_year, end_month, enso_phase, text=filtered_data.loc[mask,'NAME']+' ('+filtered_data.loc[mask,'Year'].astype(str)+')' )) - # Try regression analysis if we have sufficient data - try: - if len(filtered_data) > 10: - regression = perform_wind_regression(start_year, start_month, end_year, end_month) - else: - regression = f"Wind Analysis: {len(filtered_data)} storms analyzed (insufficient for regression)" - except: - regression = f"Wind Analysis: {len(filtered_data)} storms analyzed" - + regression = perform_wind_regression(start_year, start_month, end_year, end_month) return fig, regression def get_pressure_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search): - """FIXED: Pressure analysis that works with all data""" + """Get pressure analysis with enhanced categorization""" start_date = datetime(start_year, start_month, 1) end_date = datetime(end_year, end_month, 28) - - if merged_data is not None and not merged_data.empty: - filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy() - filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases) - - if enso_phase != 'all': - filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()] - else: - # Create filtered data from typhoon_data - if 'ISO_TIME' in typhoon_data.columns: - time_filter = (typhoon_data['ISO_TIME'] >= start_date) & (typhoon_data['ISO_TIME'] <= end_date) - temp_data = typhoon_data[time_filter].groupby('SID').agg({ - 'USA_PRES': 'min', 'NAME': 'first', 'SEASON': 'first', 'ISO_TIME': 'first', 'USA_WIND': 'max' - }).reset_index() - temp_data['ONI'] = 0.0 # Default neutral - temp_data['Category'] = temp_data['USA_WIND'].apply(categorize_typhoon_enhanced) - temp_data['Year'] = temp_data['ISO_TIME'].dt.year - temp_data['ENSO_Phase'] = 'Neutral' - filtered_data = temp_data - else: - return go.Figure(), "No time data available for analysis" - - if filtered_data.empty: - return go.Figure(), "No data available for selected time period" + filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy() + filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases) + if enso_phase != 'all': + filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()] fig = px.scatter(filtered_data, x='ONI', y='USA_PRES', color='Category', hover_data=['NAME','Year','Category'], - title='Pressure vs ONI (All Available Data)', + title='Pressure vs ONI', labels={'ONI':'ONI Value','USA_PRES':'Min Pressure (hPa)'}, color_discrete_map=enhanced_color_map) @@ -2775,86 +2268,102 @@ def get_pressure_analysis(start_year, start_month, end_year, end_month, enso_pha text=filtered_data.loc[mask,'NAME']+' ('+filtered_data.loc[mask,'Year'].astype(str)+')' )) - # Try regression analysis if we have sufficient data - try: - if len(filtered_data) > 10: - regression = perform_pressure_regression(start_year, start_month, end_year, end_month) - else: - regression = f"Pressure Analysis: {len(filtered_data)} storms analyzed (insufficient for regression)" - except: - regression = f"Pressure Analysis: {len(filtered_data)} storms analyzed" - + regression = perform_pressure_regression(start_year, start_month, end_year, end_month) return fig, regression def get_longitude_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search): - """FIXED: Longitude analysis that works with all data""" + """Get longitude analysis""" start_date = datetime(start_year, start_month, 1) end_date = datetime(end_year, end_month, 28) - - if merged_data is not None and not merged_data.empty: - filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy() - filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases) - - if enso_phase != 'all': - filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()] - else: - # Create filtered data from typhoon_data - if 'ISO_TIME' in typhoon_data.columns: - time_filter = (typhoon_data['ISO_TIME'] >= start_date) & (typhoon_data['ISO_TIME'] <= end_date) - temp_data = typhoon_data[time_filter].groupby('SID').agg({ - 'LON': 'first', 'NAME': 'first', 'SEASON': 'first', 'ISO_TIME': 'first' - }).reset_index() - temp_data['ONI'] = 0.0 # Default neutral - temp_data['Year'] = temp_data['ISO_TIME'].dt.year - filtered_data = temp_data - else: - return go.Figure(), "No time data available", "No longitude analysis available" - - if filtered_data.empty: - return go.Figure(), "No data available", "No longitude analysis available" + filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy() + filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases) + if enso_phase != 'all': + filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()] fig = px.scatter(filtered_data, x='LON', y='ONI', hover_data=['NAME'], - title='Typhoon Generation Longitude vs ONI (All Available Data)') - - slopes_text = f"Longitude Analysis: {len(filtered_data)} storms analyzed" - regression = f"Data points: {len(filtered_data)}" + title='Typhoon Generation Longitude vs ONI (All Years)') - if len(filtered_data) > 10: + if len(filtered_data) > 1: + X = np.array(filtered_data['LON']).reshape(-1,1) + y = filtered_data['ONI'] try: - X = np.array(filtered_data['LON']).reshape(-1,1) - y = filtered_data['ONI'] model = sm.OLS(y, sm.add_constant(X)).fit() y_pred = model.predict(sm.add_constant(X)) fig.add_trace(go.Scatter(x=filtered_data['LON'], y=y_pred, mode='lines', name='Regression Line')) slope = model.params[1] - slopes_text = f"All Years Slope: {slope:.4f} (n={len(filtered_data)})" + slopes_text = f"All Years Slope: {slope:.4f}" except Exception as e: slopes_text = f"Regression Error: {e}" - - try: - regression = perform_longitude_regression(start_year, start_month, end_year, end_month) - except: - regression = f"Longitude Analysis: {len(filtered_data)} storms analyzed" + else: + slopes_text = "Insufficient data for regression" + regression = perform_longitude_regression(start_year, start_month, end_year, end_month) return fig, slopes_text, regression # ----------------------------- -# ENHANCED: Animation Functions with Taiwan Standard Support - FIXED VERSION +# FIXED: Animation Functions - NO FALLBACK # ----------------------------- +def get_available_years(typhoon_data): + """Get all available years from actual data - NO FALLBACK""" + try: + if typhoon_data is None or typhoon_data.empty: + raise Exception("No typhoon data available for year extraction") + + years = set() + + # Try multiple methods to extract years + if 'ISO_TIME' in typhoon_data.columns: + valid_times = typhoon_data['ISO_TIME'].dropna() + if len(valid_times) > 0: + years.update(valid_times.dt.year.unique()) + + if 'SEASON' in typhoon_data.columns: + valid_seasons = typhoon_data['SEASON'].dropna() + if len(valid_seasons) > 0: + years.update(valid_seasons.unique()) + + # Extract from SID if available (format: BASIN + NUMBER + YEAR) + if 'SID' in typhoon_data.columns and len(years) == 0: + for sid in typhoon_data['SID'].dropna().unique(): + try: + # Try to extract 4-digit year from SID + year_match = pd.Series([sid]).str.extract(r'(\d{4})')[0].iloc[0] + if year_match and 1950 <= int(year_match) <= 2030: + years.add(int(year_match)) + except: + continue + + if len(years) == 0: + raise Exception("Could not extract any valid years from typhoon data") + + # Convert to sorted list of strings + year_strings = sorted([str(int(year)) for year in years if 1950 <= year <= 2030]) + + if len(year_strings) == 0: + raise Exception("No valid years found in reasonable range (1950-2030)") + + logging.info(f"Extracted {len(year_strings)} years from data: {year_strings[0]} to {year_strings[-1]}") + return year_strings + + except Exception as e: + logging.error(f"CRITICAL ERROR in get_available_years: {e}") + raise Exception(f"Cannot extract years from typhoon data: {e}") + def update_typhoon_options_enhanced(year, basin): - """Enhanced typhoon options with TD support and 2025 data""" + """Enhanced typhoon options - NEVER returns empty or fallback""" try: year = int(year) - # Filter by year - handle both ISO_TIME and SEASON columns + # Filter by year if 'ISO_TIME' in typhoon_data.columns: year_mask = typhoon_data['ISO_TIME'].dt.year == year elif 'SEASON' in typhoon_data.columns: year_mask = typhoon_data['SEASON'] == year else: - # Fallback - try to extract year from SID or other fields - year_mask = typhoon_data.index >= 0 # Include all data as fallback + # Try to extract from SID + sid_year_mask = typhoon_data['SID'].str.contains(str(year), na=False) + year_mask = sid_year_mask year_data = typhoon_data[year_mask].copy() @@ -2867,9 +2376,9 @@ def update_typhoon_options_enhanced(year, basin): year_data = year_data[year_data['BASIN'] == basin_code] if year_data.empty: - return gr.update(choices=["No storms found"], value=None) + raise Exception(f"No storms found for year {year} and basin {basin}") - # Get unique storms - include ALL intensities (including TD) + # Get unique storms storms = year_data.groupby('SID').agg({ 'NAME': 'first', 'USA_WIND': 'max' @@ -2890,39 +2399,50 @@ def update_typhoon_options_enhanced(year, basin): options.append(option) if not options: - return gr.update(choices=["No storms found"], value=None) + raise Exception(f"No valid storm options generated for year {year}") + logging.info(f"Generated {len(options)} storm options for {year}") return gr.update(choices=sorted(options), value=options[0]) except Exception as e: - print(f"Error in update_typhoon_options_enhanced: {e}") - return gr.update(choices=["Error loading storms"], value=None) + error_msg = f"Error loading storms for {year}: {str(e)}" + logging.error(error_msg) + raise Exception(error_msg) def generate_enhanced_track_video_fixed(year, typhoon_selection, standard): - """FIXED: Enhanced track video generation with working animation display""" - if not typhoon_selection or typhoon_selection == "No storms found": - return None - + """FIXED: Enhanced track video generation - NO FALLBACK ALLOWED""" try: + if not typhoon_selection or "No storms found" in typhoon_selection or "Error" in typhoon_selection: + raise Exception("Invalid typhoon selection provided") + # Extract SID from selection - sid = typhoon_selection.split('(')[1].split(')')[0] + try: + sid = typhoon_selection.split('(')[1].split(')')[0] + except: + raise Exception(f"Could not extract SID from selection: {typhoon_selection}") # Get storm data storm_df = typhoon_data[typhoon_data['SID'] == sid].copy() if storm_df.empty: - print(f"No data found for storm {sid}") - return None + raise Exception(f"No track data found for storm {sid}") # Sort by time if 'ISO_TIME' in storm_df.columns: storm_df = storm_df.sort_values('ISO_TIME') + # Validate essential data + if 'LAT' not in storm_df.columns or 'LON' not in storm_df.columns: + raise Exception(f"Missing coordinate data for storm {sid}") + # Extract data for animation - lats = storm_df['LAT'].astype(float).values - lons = storm_df['LON'].astype(float).values + lats = pd.to_numeric(storm_df['LAT'], errors='coerce').dropna().values + lons = pd.to_numeric(storm_df['LON'], errors='coerce').dropna().values + + if len(lats) < 2 or len(lons) < 2: + raise Exception(f"Insufficient track points for storm {sid}: {len(lats)} points") if 'USA_WIND' in storm_df.columns: - winds = pd.to_numeric(storm_df['USA_WIND'], errors='coerce').fillna(0).values + winds = pd.to_numeric(storm_df['USA_WIND'], errors='coerce').fillna(30).values[:len(lats)] else: winds = np.full(len(lats), 30) @@ -2930,7 +2450,7 @@ def generate_enhanced_track_video_fixed(year, typhoon_selection, standard): storm_name = storm_df['NAME'].iloc[0] if pd.notna(storm_df['NAME'].iloc[0]) else "UNNAMED" season = storm_df['SEASON'].iloc[0] if 'SEASON' in storm_df.columns else year - print(f"Generating FIXED video for {storm_name} ({sid}) with {len(lats)} track points using {standard} standard") + logging.info(f"Generating FIXED video for {storm_name} ({sid}) with {len(lats)} track points using {standard} standard") # FIXED: Create figure with proper cartopy setup fig = plt.figure(figsize=(16, 10)) @@ -2959,24 +2479,20 @@ def generate_enhanced_track_video_fixed(year, typhoon_selection, standard): fontsize=18, fontweight='bold') # FIXED: Animation elements - proper initialization with cartopy transforms - # Initialize empty line for track with correct transform track_line, = ax.plot([], [], 'b-', linewidth=3, alpha=0.7, label='Track', transform=ccrs.PlateCarree()) - # Initialize current position marker current_point, = ax.plot([], [], 'o', markersize=15, transform=ccrs.PlateCarree()) - # Historical track points (to show path traversed) history_points, = ax.plot([], [], 'o', markersize=6, alpha=0.4, color='blue', transform=ccrs.PlateCarree()) - # Info text box info_box = ax.text(0.02, 0.98, '', transform=ax.transAxes, fontsize=12, verticalalignment='top', bbox=dict(boxstyle="round,pad=0.5", facecolor='white', alpha=0.9)) - # FIXED: Color legend with proper categories for both standards + # FIXED: Color legend with proper categories legend_elements = [] if standard == 'taiwan': categories = ['Tropical Depression', 'Tropical Storm', 'Severe Tropical Storm', @@ -2995,25 +2511,24 @@ def generate_enhanced_track_video_fixed(year, typhoon_selection, standard): ax.legend(handles=legend_elements, loc='upper right', fontsize=10) - # FIXED: Animation function with proper artist updates and cartopy compatibility + # FIXED: Animation function def animate_fixed(frame): """Fixed animation function that properly updates tracks with cartopy""" try: if frame >= len(lats): return track_line, current_point, history_points, info_box - # FIXED: Update track line up to current frame + # Update track line up to current frame current_lons = lons[:frame+1] current_lats = lats[:frame+1] - # Update the track line data (this is the key fix!) track_line.set_data(current_lons, current_lats) - # FIXED: Update historical points (smaller markers showing traversed path) + # Update historical points if frame > 0: history_points.set_data(current_lons[:-1], current_lats[:-1]) - # FIXED: Update current position with correct categorization + # Update current position with correct categorization current_wind = winds[frame] if standard == 'taiwan': @@ -3021,23 +2536,19 @@ def generate_enhanced_track_video_fixed(year, typhoon_selection, standard): else: category, color = categorize_typhoon_by_standard_fixed(current_wind, 'atlantic') - # Debug for first few frames - if frame < 3: - print(f"FIXED Frame {frame}: Wind={current_wind:.1f}kt, Category={category}, Color={color}") - # Update current position marker current_point.set_data([lons[frame]], [lats[frame]]) current_point.set_color(color) current_point.set_markersize(12 + current_wind/8) - # FIXED: Enhanced info display with correct Taiwan wind speed conversion + # Enhanced info display if 'ISO_TIME' in storm_df.columns and frame < len(storm_df): current_time = storm_df.iloc[frame]['ISO_TIME'] time_str = current_time.strftime('%Y-%m-%d %H:%M UTC') if pd.notna(current_time) else 'Unknown' else: time_str = f"Step {frame+1}" - # Corrected wind speed display for Taiwan standard + # Wind speed display if standard == 'taiwan': wind_ms = current_wind * 0.514444 wind_display = f"{current_wind:.0f} kt ({wind_ms:.1f} m/s)" @@ -3055,52 +2566,43 @@ def generate_enhanced_track_video_fixed(year, typhoon_selection, standard): ) info_box.set_text(info_text) - # FIXED: Return all modified artists (crucial for proper display) return track_line, current_point, history_points, info_box except Exception as e: - print(f"Error in animate frame {frame}: {e}") + logging.error(f"Error in animate frame {frame}: {e}") return track_line, current_point, history_points, info_box # FIXED: Create animation with cartopy-compatible settings - # Key fixes: blit=False (crucial for cartopy), proper interval anim = animation.FuncAnimation( fig, animate_fixed, frames=len(lats), - interval=600, blit=False, repeat=True # blit=False is essential for cartopy! + interval=600, blit=False, repeat=True ) - # Save animation with optimized settings + # Save animation temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4', dir=tempfile.gettempdir()) - # FIXED: Writer settings optimized for track visibility writer = animation.FFMpegWriter( - fps=2, bitrate=3000, codec='libx264', # Slower FPS for better track visibility + fps=2, bitrate=3000, codec='libx264', extra_args=['-pix_fmt', 'yuv420p'] ) - print(f"Saving FIXED animation to {temp_file.name}") + logging.info(f"Saving FIXED animation to {temp_file.name}") anim.save(temp_file.name, writer=writer, dpi=120) plt.close(fig) - print(f"FIXED video generated successfully: {temp_file.name}") + logging.info(f"FIXED video generated successfully: {temp_file.name}") return temp_file.name except Exception as e: - print(f"Error generating FIXED video: {e}") + error_msg = f"CRITICAL ERROR generating video: {str(e)}" + logging.error(error_msg) import traceback traceback.print_exc() - return None - -# FIXED: Update the simplified wrapper function -def simplified_track_video_fixed(year, basin, typhoon, standard): - """Simplified track video function with FIXED animation and Taiwan classification""" - if not typhoon: - return None - return generate_enhanced_track_video_fixed(year, typhoon, standard) + raise Exception(error_msg) # ----------------------------- -# Load & Process Data - FIXED INITIALIZATION +# FIXED: Data Loading and Processing # ----------------------------- # Global variables initialization @@ -3109,95 +2611,60 @@ typhoon_data = None merged_data = None def initialize_data(): - """FIXED: Initialize all data safely - LOAD ALL AVAILABLE DATA""" + """Initialize all data safely - CRITICAL: NO FALLBACKS""" global oni_data, typhoon_data, merged_data try: - logging.info("Starting comprehensive data loading process...") + logging.info("Starting FIXED data loading process...") - # Try to load ONI data (optional) - try: - update_oni_data() - if os.path.exists(ONI_DATA_PATH): - oni_data = pd.read_csv(ONI_DATA_PATH) - logging.info(f"ONI data loaded: {len(oni_data)} years") - else: - logging.warning("ONI data not available") - oni_data = None - except Exception as e: - logging.warning(f"ONI data loading failed: {e}") - oni_data = None + # Update ONI data (optional) + update_oni_data() - # Load typhoon data (required) - temp_oni = oni_data if oni_data is not None else pd.DataFrame() - temp_oni, typhoon_data = load_data_fixed(ONI_DATA_PATH, TYPHOON_DATA_PATH) + # Load data with FIXED functions + oni_data, typhoon_data = load_data_fixed(ONI_DATA_PATH, TYPHOON_DATA_PATH) - if oni_data is None: - oni_data = temp_oni + # Verify critical data loaded + if typhoon_data is None or typhoon_data.empty: + raise Exception("CRITICAL: No typhoon data loaded") - if typhoon_data is not None and not typhoon_data.empty: - oni_long = process_oni_data(oni_data) - typhoon_max = process_typhoon_data(typhoon_data) - merged_data = merge_data(oni_long, typhoon_max) - - logging.info(f"Data loading complete:") - logging.info(f" - ONI data: {len(oni_data) if oni_data is not None else 0} years") - logging.info(f" - Typhoon data: {len(typhoon_data)} records") - logging.info(f" - Merged data: {len(merged_data)} storms") - - # Log basin distribution - if 'BASIN' in typhoon_data.columns: - basin_counts = typhoon_data['BASIN'].value_counts() - logging.info(f" - Basin distribution: {dict(basin_counts)}") - - else: - logging.error("Failed to load typhoon data") - # Create comprehensive fallback data - oni_data = pd.DataFrame({'Year': range(1851, 2026), 'Jan': [0]*175, 'Feb': [0]*175, 'Mar': [0]*175, 'Apr': [0]*175, - 'May': [0]*175, 'Jun': [0]*175, 'Jul': [0]*175, 'Aug': [0]*175, 'Sep': [0]*175, - 'Oct': [0]*175, 'Nov': [0]*175, 'Dec': [0]*175}) - typhoon_data = create_comprehensive_fallback_typhoon_data() - oni_long = process_oni_data(oni_data) - typhoon_max = process_typhoon_data(typhoon_data) - merged_data = merge_data(oni_long, typhoon_max) - - except Exception as e: - logging.error(f"Error during data initialization: {e}") - # Create comprehensive fallback data - oni_data = pd.DataFrame({'Year': range(1851, 2026), 'Jan': [0]*175, 'Feb': [0]*175, 'Mar': [0]*175, 'Apr': [0]*175, - 'May': [0]*175, 'Jun': [0]*175, 'Jul': [0]*175, 'Aug': [0]*175, 'Sep': [0]*175, - 'Oct': [0]*175, 'Nov': [0]*175, 'Dec': [0]*175}) - typhoon_data = create_comprehensive_fallback_typhoon_data() + if oni_data is None or oni_data.empty: + logging.warning("ONI data failed to load - using neutral values") + + # Process data oni_long = process_oni_data(oni_data) typhoon_max = process_typhoon_data(typhoon_data) merged_data = merge_data(oni_long, typhoon_max) - -# Initialize data -initialize_data() + + # Final validation + if merged_data is None or merged_data.empty: + raise Exception("CRITICAL: Merged data is empty") + + logging.info(f"FIXED data loading complete:") + logging.info(f" - ONI data: {len(oni_data) if oni_data is not None else 0} years") + logging.info(f" - Typhoon data: {len(typhoon_data)} records") + logging.info(f" - Merged data: {len(merged_data)} storms") + + except Exception as e: + logging.error(f"CRITICAL ERROR during FIXED data initialization: {e}") + import traceback + traceback.print_exc() + raise Exception(f"Data initialization failed: {e}") # ----------------------------- -# ENHANCED: Gradio Interface with Fixed Route Visualization and Enhanced Features +# FIXED: Gradio Interface # ----------------------------- def create_interface(): - """Create the enhanced Gradio interface with robust error handling""" + """Create the enhanced Gradio interface - NO FALLBACKS""" try: # Ensure data is available if oni_data is None or typhoon_data is None or merged_data is None: - logging.warning("Data not properly loaded, creating minimal interface") - return create_minimal_fallback_interface() + raise Exception("Data not properly loaded for interface creation") # Get safe data statistics - try: - total_storms = len(typhoon_data['SID'].unique()) if 'SID' in typhoon_data.columns else 0 - total_records = len(typhoon_data) - available_years = get_available_years(typhoon_data) - year_range_display = f"{available_years[0]} - {available_years[-1]}" if available_years else "Unknown" - except Exception as e: - logging.error(f"Error getting data statistics: {e}") - total_storms = 0 - total_records = 0 - year_range_display = "Unknown" - available_years = [str(year) for year in range(2000, 2026)] + total_storms = len(typhoon_data['SID'].unique()) if 'SID' in typhoon_data.columns else 0 + total_records = len(typhoon_data) + available_years = get_available_years(typhoon_data) + year_range_display = f"{available_years[0]} - {available_years[-1]}" if available_years else "Unknown" with gr.Blocks(title="Enhanced Typhoon Analysis Platform", theme=gr.themes.Soft()) as demo: gr.Markdown("# 🌪️ Enhanced Typhoon Analysis Platform") @@ -3210,37 +2677,34 @@ def create_interface(): This dashboard provides comprehensive analysis of typhoon data in relation to ENSO phases with advanced machine learning capabilities. ### 🚀 Enhanced Features: - - **All Basin Coverage**: Loads data from ALL IBTrACS basins (WP, EP, NA, SP, SI, NI) - - **Complete Historical Range**: Full coverage from 1851-2025 (175+ years) - - **ONI Independent**: Analysis works with or without ONI data - **Advanced ML Clustering**: UMAP/t-SNE storm pattern analysis with separate visualizations - **Predictive Routing**: Advanced storm track and intensity forecasting with uncertainty quantification - **Complete TD Support**: Now includes Tropical Depressions (< 34 kt) - **Taiwan Standard**: Full support for Taiwan meteorological classification system + - **2025 Data Ready**: Real-time compatibility with current year data - **Enhanced Animations**: High-quality storm track visualizations with both standards + - **NO FALLBACK DATA**: All data comes from real IBTrACS sources ### 📊 Data Status: - **ONI Data**: {len(oni_data) if oni_data is not None else 0} years loaded - **Typhoon Data**: {total_records:,} records loaded - - **Merged Data**: {len(merged_data):,} typhoons with ONI values + - **Merged Data**: {len(merged_data):,} typhoons with analysis data - **Available Years**: {year_range_display} - - **Basin Coverage**: All IBTrACS basins (WP, EP, NA, SP, SI, NI) + - **Unique Storms**: {total_storms:,} ### 🔧 Technical Capabilities: - **UMAP Clustering**: {"✅ Available" if UMAP_AVAILABLE else "⚠️ Limited to t-SNE/PCA"} - **AI Predictions**: {"🧠 Deep Learning" if CNN_AVAILABLE else "🔬 Physics-based"} - **Enhanced Categorization**: Tropical Depression to Super Typhoon - - **Platform**: Optimized for Hugging Face Spaces - - **Maximum Data Utilization**: All available storms loaded regardless of ONI + - **Platform**: Optimized for real-time analysis + - **Data Source**: Live IBTrACS database (no synthetic data) ### 📈 Research Applications: - - Climate change impact studies across all basins + - Climate change impact studies - Seasonal forecasting research - Storm pattern classification - ENSO-typhoon relationship analysis - Intensity prediction model development - - Cross-regional classification comparisons - - Historical trend analysis (1851-2025) """ gr.Markdown(overview_text) @@ -3276,10 +2740,9 @@ def create_interface(): def run_separate_clustering_analysis(method): try: - # Extract features for clustering storm_features = extract_storm_features(typhoon_data) if storm_features is None: - return None, None, None, None, "Error: Could not extract storm features" + raise Exception("Could not extract storm features from data") fig_cluster, fig_routes, fig_pressure, fig_wind, stats = create_separate_clustering_plots( storm_features, typhoon_data, method.lower() @@ -3288,7 +2751,8 @@ def create_interface(): except Exception as e: import traceback error_details = traceback.format_exc() - error_msg = f"Error: {str(e)}\n\nDetails:\n{error_details}" + error_msg = f"Clustering analysis failed: {str(e)}\n\nDetails:\n{error_details}" + logging.error(error_msg) return None, None, None, None, error_msg analyze_clusters_btn.click( @@ -3296,26 +2760,6 @@ def create_interface(): inputs=[reduction_method], outputs=[cluster_plot, routes_plot, pressure_plot, wind_plot, cluster_stats] ) - - cluster_info_text = """ - ### 📊 Enhanced Clustering Features: - - **All Basin Analysis**: Uses data from all global tropical cyclone basins - - **Complete Historical Coverage**: Analyzes patterns from 1851-2025 - - **Separate Visualizations**: Four distinct plots for comprehensive analysis - - **Multi-dimensional Analysis**: Uses 15+ storm characteristics including intensity, track shape, genesis location - - **Route Visualization**: Geographic storm tracks colored by cluster membership - - **Temporal Analysis**: Pressure and wind evolution patterns by cluster - - **DBSCAN Clustering**: Automatic pattern discovery without predefined cluster count - - **Interactive**: Hover over points to see storm details, zoom and pan all plots - - ### 🎯 How to Interpret: - - **Clustering Plot**: Each dot is a storm positioned by similarity (close = similar characteristics) - - **Routes Plot**: Actual geographic storm tracks, colored by which cluster they belong to - - **Pressure Plot**: Shows how pressure changes over time for storms in each cluster - - **Wind Plot**: Shows wind speed evolution patterns for each cluster - - **Cluster Colors**: Each cluster gets a unique color across all four visualizations - """ - gr.Markdown(cluster_info_text) with gr.Tab("🌊 Realistic Storm Genesis & Prediction"): gr.Markdown("## 🌊 Realistic Typhoon Development from Genesis") @@ -3343,7 +2787,6 @@ def create_interface(): info="Select realistic development region based on climatology" ) - # Display selected region info def update_genesis_info(region): locations = get_realistic_genesis_locations() if region in locations: @@ -3374,7 +2817,7 @@ def create_interface(): minimum=20, maximum=1000, step=6, - info="Extended forecasting: 20-1000 hours (42 days max)" + info="Extended forecasting: 20-1000 hours" ) advanced_physics = gr.Checkbox( label="Advanced Physics", @@ -3406,20 +2849,17 @@ def create_interface(): def run_realistic_prediction(region, month, oni, hours, advanced_phys, uncertainty, animation): try: - # Run realistic prediction with genesis region results = predict_storm_route_and_intensity_realistic( region, month, oni, forecast_hours=hours, use_advanced_physics=advanced_phys ) - # Extract genesis conditions current = results['current_prediction'] intensity = current['intensity_kt'] category = current['category'] genesis_info = results.get('genesis_info', {}) - # Create enhanced visualization fig, forecast_text = create_animated_route_visualization( results, uncertainty, animation ) @@ -3438,10 +2878,7 @@ def create_interface(): logging.error(error_msg) import traceback traceback.print_exc() - return ( - 30, "Tropical Depression", f"Prediction failed: {str(e)}", - None, f"Error generating realistic forecast: {str(e)}" - ) + raise gr.Error(error_msg) predict_btn.click( fn=run_realistic_prediction, @@ -3519,18 +2956,18 @@ def create_interface(): ) with gr.Tab("🎬 Enhanced Track Animation"): - gr.Markdown("## 🎥 High-Quality Storm Track Visualization (Atlantic & Taiwan Standards)") + gr.Markdown("## 🎥 High-Quality Storm Track Visualization - NO FALLBACK DATA") + gr.Markdown("**ALL animations use real IBTrACS data - never synthetic or fallback data**") with gr.Row(): year_dropdown = gr.Dropdown( label="Year", choices=available_years, - value=available_years[-1] if available_years else "2024" + value=available_years[-1] if available_years else None ) basin_dropdown = gr.Dropdown( label="Basin", - choices=["All Basins", "WP - Western Pacific", "EP - Eastern Pacific", "NA - North Atlantic", - "SP - South Pacific", "SI - South Indian", "NI - North Indian"], + choices=["All Basins", "WP - Western Pacific", "EP - Eastern Pacific", "NA - North Atlantic"], value="All Basins" ) @@ -3547,71 +2984,76 @@ def create_interface(): video_output = gr.Video(label="Storm Track Animation") # Update storm options when year or basin changes + def safe_update_typhoon_options(year, basin): + try: + return update_typhoon_options_enhanced(year, basin) + except Exception as e: + error_msg = f"Failed to load storms: {str(e)}" + logging.error(error_msg) + return gr.update(choices=[error_msg], value=None) + for input_comp in [year_dropdown, basin_dropdown]: input_comp.change( - fn=update_typhoon_options_enhanced, + fn=safe_update_typhoon_options, inputs=[year_dropdown, basin_dropdown], outputs=[typhoon_dropdown] ) - # FIXED: Generate video with fixed function + def safe_generate_video(year, typhoon_selection, standard): + try: + if not typhoon_selection: + raise gr.Error("Please select a typhoon first") + return generate_enhanced_track_video_fixed(year, typhoon_selection, standard) + except Exception as e: + error_msg = f"Video generation failed: {str(e)}" + logging.error(error_msg) + raise gr.Error(error_msg) + generate_video_btn.click( - fn=generate_enhanced_track_video_fixed, + fn=safe_generate_video, inputs=[year_dropdown, typhoon_dropdown, standard_dropdown], outputs=[video_output] ) - # FIXED animation info text with corrected Taiwan standards animation_info_text = """ - ### 🎬 Enhanced Animation Features: - - **All Basin Support**: Visualize storms from any global basin (WP, EP, NA, SP, SI, NI) - - **Complete Historical Range**: Animate storms from 1851-2025 + ### 🎬 FIXED Animation Features - NO FALLBACK DATA: + - **Real Data Only**: All animations use actual IBTrACS typhoon track data - **Dual Standards**: Full support for both Atlantic and Taiwan classification systems - **Full TD Support**: Now displays Tropical Depressions (< 34 kt) in gray + - **2025 Compatibility**: Complete support for current year data - **Enhanced Maps**: Better cartographic projections with terrain features - **Smart Scaling**: Storm symbols scale dynamically with intensity - **Real-time Info**: Live position, time, and meteorological data display - **Professional Styling**: Publication-quality animations with proper legends - - **Optimized Export**: Fast rendering with web-compatible video formats - **FIXED Animation**: Tracks now display properly with cartopy integration + - **Error Handling**: Robust error handling prevents fallback to synthetic data ### 🎌 Taiwan Standard Features (CORRECTED): - **CMA 2006 Standards**: Uses official China Meteorological Administration classification - **Six Categories**: TD → TS → STS → TY → STY → Super TY - - **Correct Thresholds**: - * Tropical Depression: < 17.2 m/s (< 33.4 kt) - * Tropical Storm: 17.2-24.4 m/s (33.4-47.5 kt) - * Severe Tropical Storm: 24.5-32.6 m/s (47.6-63.5 kt) - * Typhoon: 32.7-41.4 m/s (63.6-80.6 kt) - * Severe Typhoon: 41.5-50.9 m/s (80.7-99.1 kt) - * Super Typhoon: ≥51.0 m/s (≥99.2 kt) + - **Correct Thresholds**: Based on official meteorological standards - **m/s Display**: Shows both knots and meters per second - **CWB Compatible**: Matches Central Weather Bureau classifications - - **Fixed Color Coding**: Gray → Blue → Cyan → Yellow → Orange → Red """ gr.Markdown(animation_info_text) with gr.Tab("📊 Data Statistics & Insights"): - gr.Markdown("## 📈 Comprehensive Dataset Analysis") + gr.Markdown("## 📈 Comprehensive Dataset Analysis - REAL DATA ONLY") - # Create enhanced data summary try: if len(typhoon_data) > 0: - # Storm category distribution storm_cats = typhoon_data.groupby('SID')['USA_WIND'].max().apply(categorize_typhoon_enhanced) cat_counts = storm_cats.value_counts() - # Create distribution chart with enhanced colors fig_dist = px.bar( x=cat_counts.index, y=cat_counts.values, - title="Storm Intensity Distribution (All Basins - Including Tropical Depressions)", + title="Storm Intensity Distribution (Including Tropical Depressions)", labels={'x': 'Category', 'y': 'Number of Storms'}, color=cat_counts.index, color_discrete_map=enhanced_color_map ) - # Seasonal distribution if 'ISO_TIME' in typhoon_data.columns: seasonal_data = typhoon_data.copy() seasonal_data['Month'] = seasonal_data['ISO_TIME'].dt.month @@ -3620,7 +3062,7 @@ def create_interface(): fig_seasonal = px.bar( x=monthly_counts.index, y=monthly_counts.values, - title="Seasonal Storm Distribution (All Basins)", + title="Seasonal Storm Distribution", labels={'x': 'Month', 'y': 'Number of Storms'}, color=monthly_counts.values, color_continuous_scale='Viridis' @@ -3628,13 +3070,12 @@ def create_interface(): else: fig_seasonal = None - # Basin distribution - if 'BASIN' in typhoon_data.columns: - basin_data = typhoon_data['BASIN'].value_counts() + if 'SID' in typhoon_data.columns: + basin_data = typhoon_data['SID'].str[:2].value_counts() fig_basin = px.pie( values=basin_data.values, names=basin_data.index, - title="Distribution by Basin (All Global Basins)" + title="Distribution by Basin" ) else: fig_basin = None @@ -3653,10 +3094,7 @@ def create_interface(): except Exception as e: gr.Markdown(f"Visualization error: {str(e)}") - # Enhanced statistics - FIXED formatting with ALL DATA - total_storms = len(typhoon_data['SID'].unique()) if 'SID' in typhoon_data.columns else 0 - total_records = len(typhoon_data) - + # Enhanced statistics if 'SEASON' in typhoon_data.columns: try: min_year = int(typhoon_data['SEASON'].min()) @@ -3664,24 +3102,23 @@ def create_interface(): year_range = f"{min_year}-{max_year}" years_covered = typhoon_data['SEASON'].nunique() except (ValueError, TypeError): - year_range = "1851-2025" - years_covered = 175 + year_range = "Unknown" + years_covered = 0 else: - year_range = "1851-2025" - years_covered = 175 + year_range = "Unknown" + years_covered = 0 - if 'BASIN' in typhoon_data.columns: + if 'SID' in typhoon_data.columns: try: - basins_available = ', '.join(sorted(typhoon_data['BASIN'].unique())) + basins_available = ', '.join(sorted(typhoon_data['SID'].str[:2].unique())) avg_storms_per_year = total_storms / max(years_covered, 1) except Exception: - basins_available = "WP, EP, NA, SP, SI, NI" + basins_available = "Unknown" avg_storms_per_year = 0 else: - basins_available = "WP, EP, NA, SP, SI, NI" + basins_available = "Unknown" avg_storms_per_year = 0 - # TD specific statistics try: if 'USA_WIND' in typhoon_data.columns: td_storms = len(typhoon_data[typhoon_data['USA_WIND'] < 34]['SID'].unique()) @@ -3692,18 +3129,17 @@ def create_interface(): td_storms = ts_storms = typhoon_storms = 0 td_percentage = 0 except Exception as e: - print(f"Error calculating TD statistics: {e}") td_storms = ts_storms = typhoon_storms = 0 td_percentage = 0 - # Create statistics text safely stats_text = f""" - ### 📊 Enhanced Dataset Summary: + ### 📊 REAL Dataset Summary - NO SYNTHETIC DATA: - **Total Unique Storms**: {total_storms:,} - **Total Track Records**: {total_records:,} - **Year Range**: {year_range} ({years_covered} years) - **Basins Available**: {basins_available} - **Average Storms/Year**: {avg_storms_per_year:.1f} + - **Data Source**: IBTrACS v04r01 (Real observations only) ### 🌪️ Storm Category Breakdown: - **Tropical Depressions**: {td_storms:,} storms ({td_percentage:.1f}%) @@ -3711,97 +3147,59 @@ def create_interface(): - **Typhoons (C1-C5)**: {typhoon_storms:,} storms ### 🚀 Platform Capabilities: - - **Complete Global Coverage** - ALL IBTrACS basins loaded (WP, EP, NA, SP, SI, NI) - - **Maximum Historical Range** - Full 175+ year coverage (1851-2025) - - **ONI Independence** - All storm data preserved regardless of ONI availability - **Complete TD Analysis** - First platform to include comprehensive TD tracking - **Dual Classification Systems** - Both Atlantic and Taiwan standards supported - **Advanced ML Clustering** - DBSCAN pattern recognition with separate visualizations - **Real-time Predictions** - Physics-based and optional CNN intensity forecasting + - **2025 Data Ready** - Full compatibility with current season data - **Enhanced Animations** - Professional-quality storm track videos - - **Cross-basin Analysis** - Comprehensive global tropical cyclone coverage + - **Multi-basin Analysis** - Comprehensive Pacific and Atlantic coverage + - **NO FALLBACK DATA** - All analysis uses real meteorological observations ### 🔬 Research Applications: - - Global climate change impact studies - - Cross-basin seasonal forecasting research - - Storm pattern classification across all oceans + - Climate change impact studies + - Seasonal forecasting research + - Storm pattern classification - ENSO-typhoon relationship analysis - Intensity prediction model development - Cross-regional classification comparisons - - Historical trend analysis spanning 175+ years - - Basin interaction and teleconnection studies """ gr.Markdown(stats_text) return demo except Exception as e: - logging.error(f"Error creating Gradio interface: {e}") + logging.error(f"CRITICAL ERROR creating Gradio interface: {e}") import traceback traceback.print_exc() - # Create a minimal fallback interface - return create_minimal_fallback_interface() - -def create_minimal_fallback_interface(): - """Create a minimal fallback interface when main interface fails""" - with gr.Blocks() as demo: - gr.Markdown("# Enhanced Typhoon Analysis Platform") - gr.Markdown("**Notice**: Loading with minimal interface due to data issues.") - - with gr.Tab("Status"): - gr.Markdown(""" - ## Platform Status - - The application is running but encountered issues loading the full interface. - This could be due to: - - Data loading problems - - Missing dependencies - - Configuration issues - - ### Available Features: - - Basic interface is functional - - Error logs are being generated - - System is ready for debugging - - ### Next Steps: - 1. Check the console logs for detailed error information - 2. Verify all required data files are accessible - 3. Ensure all dependencies are properly installed - 4. Try restarting the application - """) - - with gr.Tab("Debug"): - gr.Markdown("## Debug Information") - - def get_debug_info(): - debug_text = f""" - Python Environment: - - Working Directory: {os.getcwd()} - - Data Path: {DATA_PATH} - - UMAP Available: {UMAP_AVAILABLE} - - CNN Available: {CNN_AVAILABLE} - - Data Status: - - ONI Data: {'Loaded' if oni_data is not None else 'Failed'} - - Typhoon Data: {'Loaded' if typhoon_data is not None else 'Failed'} - - Merged Data: {'Loaded' if merged_data is not None else 'Failed'} - - File Checks: - - ONI Path Exists: {os.path.exists(ONI_DATA_PATH)} - - Typhoon Path Exists: {os.path.exists(TYPHOON_DATA_PATH)} - - Basin Files Available: - {[f"- {basin}: {BASIN_FILES[basin]}" for basin in BASIN_FILES.keys()]} - """ - return debug_text - - debug_btn = gr.Button("Get Debug Info") - debug_output = gr.Textbox(label="Debug Information", lines=15) - debug_btn.click(fn=get_debug_info, outputs=debug_output) - - return demo + raise Exception(f"Interface creation failed: {e}") -# Create and launch the interface -demo = create_interface() +# ----------------------------- +# MAIN EXECUTION +# ----------------------------- if __name__ == "__main__": - demo.launch(share=True) # Enable sharing with public link \ No newline at end of file + try: + # Initialize data first - CRITICAL + logging.info("Initializing data...") + initialize_data() + + # Verify data loaded correctly + if typhoon_data is None or typhoon_data.empty: + raise Exception("CRITICAL: No typhoon data available for interface") + + logging.info("Creating interface...") + demo = create_interface() + + logging.info("Launching application...") + demo.launch(share=True) + + except Exception as e: + logging.error(f"CRITICAL APPLICATION ERROR: {e}") + import traceback + traceback.print_exc() + print(f"\n{'='*60}") + print("CRITICAL ERROR: Application failed to start") + print(f"Error: {e}") + print("Check logs for detailed error information") + print(f"{'='*60}") + raise \ No newline at end of file