diff --git "a/app.py" "b/app.py"
--- "a/app.py"
+++ "b/app.py"
@@ -41,7 +41,7 @@ import tempfile
import shutil
import xarray as xr
-# NEW: Advanced ML imports
+# Advanced ML imports
try:
import umap.umap_ as umap
UMAP_AVAILABLE = True
@@ -52,12 +52,10 @@ except ImportError:
# Optional CNN imports with robust error handling
CNN_AVAILABLE = False
try:
- # Set environment variables before importing TensorFlow
- os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Suppress TensorFlow warnings
+ os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3'
import tensorflow as tf
from tensorflow.keras import layers, models
- # Test if TensorFlow actually works
- tf.config.set_visible_devices([], 'GPU') # Disable GPU to avoid conflicts
+ tf.config.set_visible_devices([], 'GPU')
CNN_AVAILABLE = True
print("TensorFlow successfully loaded - CNN features enabled")
except Exception as e:
@@ -80,13 +78,11 @@ logging.basicConfig(
format='%(asctime)s - %(levelname)s - %(message)s'
)
-# Remove argument parser to simplify startup
+# FIXED: Data path setup
DATA_PATH = '/tmp/typhoon_data' if 'SPACE_ID' in os.environ else tempfile.gettempdir()
-# Ensure directory exists and is writable
try:
os.makedirs(DATA_PATH, exist_ok=True)
- # Test write permissions
test_file = os.path.join(DATA_PATH, 'test_write.txt')
with open(test_file, 'w') as f:
f.write('test')
@@ -102,27 +98,21 @@ ONI_DATA_PATH = os.path.join(DATA_PATH, 'oni_data.csv')
TYPHOON_DATA_PATH = os.path.join(DATA_PATH, 'processed_typhoon_data.csv')
MERGED_DATA_CSV = os.path.join(DATA_PATH, 'merged_typhoon_era5_data.csv')
-# IBTrACS settings - NOW INCLUDES ALL BASINS
+# IBTrACS settings
BASIN_FILES = {
'EP': 'ibtracs.EP.list.v04r01.csv',
'NA': 'ibtracs.NA.list.v04r01.csv',
'WP': 'ibtracs.WP.list.v04r01.csv',
- 'SP': 'ibtracs.SP.list.v04r01.csv', # Added South Pacific
- 'SI': 'ibtracs.SI.list.v04r01.csv', # Added South Indian
- 'NI': 'ibtracs.NI.list.v04r01.csv' # Added North Indian
+ 'ALL': 'ibtracs.ALL.list.v04r01.csv' # Added ALL basin option
}
IBTRACS_BASE_URL = 'https://www.ncei.noaa.gov/data/international-best-track-archive-for-climate-stewardship-ibtracs/v04r01/access/csv/'
-LOCAL_IBTRACS_PATH = os.path.join(DATA_PATH, 'ibtracs.WP.list.v04r01.csv')
-CACHE_FILE = os.path.join(DATA_PATH, 'ibtracs_cache.pkl')
-CACHE_EXPIRY_DAYS = 1
# -----------------------------
-# ENHANCED: Color Maps and Standards with TD Support - FIXED TAIWAN CLASSIFICATION
+# FIXED: Color Maps and Standards with TD Support
# -----------------------------
-# Enhanced color mapping with TD support (for Plotly)
enhanced_color_map = {
'Unknown': 'rgb(200, 200, 200)',
- 'Tropical Depression': 'rgb(128, 128, 128)', # Gray for TD
+ 'Tropical Depression': 'rgb(128, 128, 128)',
'Tropical Storm': 'rgb(0, 0, 255)',
'C1 Typhoon': 'rgb(0, 255, 255)',
'C2 Typhoon': 'rgb(0, 255, 0)',
@@ -131,42 +121,26 @@ enhanced_color_map = {
'C5 Super Typhoon': 'rgb(255, 0, 0)'
}
-# Matplotlib-compatible color mapping (hex colors)
matplotlib_color_map = {
'Unknown': '#C8C8C8',
- 'Tropical Depression': '#808080', # Gray for TD
- 'Tropical Storm': '#0000FF', # Blue
- 'C1 Typhoon': '#00FFFF', # Cyan
- 'C2 Typhoon': '#00FF00', # Green
- 'C3 Strong Typhoon': '#FFFF00', # Yellow
- 'C4 Very Strong Typhoon': '#FFA500', # Orange
- 'C5 Super Typhoon': '#FF0000' # Red
+ 'Tropical Depression': '#808080',
+ 'Tropical Storm': '#0000FF',
+ 'C1 Typhoon': '#00FFFF',
+ 'C2 Typhoon': '#00FF00',
+ 'C3 Strong Typhoon': '#FFFF00',
+ 'C4 Very Strong Typhoon': '#FFA500',
+ 'C5 Super Typhoon': '#FF0000'
}
-# FIXED: Taiwan color mapping with correct CMA 2006 standards
taiwan_color_map_fixed = {
- 'Tropical Depression': '#808080', # Gray
- 'Tropical Storm': '#0000FF', # Blue
- 'Severe Tropical Storm': '#00FFFF', # Cyan
- 'Typhoon': '#FFFF00', # Yellow
- 'Severe Typhoon': '#FFA500', # Orange
- 'Super Typhoon': '#FF0000' # Red
+ 'Tropical Depression': '#808080',
+ 'Tropical Storm': '#0000FF',
+ 'Severe Tropical Storm': '#00FFFF',
+ 'Typhoon': '#FFFF00',
+ 'Severe Typhoon': '#FFA500',
+ 'Super Typhoon': '#FF0000'
}
-def rgb_string_to_hex(rgb_string):
- """Convert 'rgb(r,g,b)' string to hex color for matplotlib"""
- try:
- # Extract numbers from 'rgb(r,g,b)' format
- import re
- numbers = re.findall(r'\d+', rgb_string)
- if len(numbers) == 3:
- r, g, b = map(int, numbers)
- return f'#{r:02x}{g:02x}{b:02x}'
- else:
- return '#808080' # Default gray
- except:
- return '#808080' # Default gray
-
def get_matplotlib_color(category):
"""Get matplotlib-compatible color for a storm category"""
return matplotlib_color_map.get(category, '#808080')
@@ -188,17 +162,7 @@ ROUTE_COLORS = [
'#FF00CC', '#00FFCC', '#CC00FF', '#CCFF00', '#00CCFF'
]
-# Original color map for backward compatibility
-color_map = {
- 'C5 Super Typhoon': 'rgb(255, 0, 0)',
- 'C4 Very Strong Typhoon': 'rgb(255, 165, 0)',
- 'C3 Strong Typhoon': 'rgb(255, 255, 0)',
- 'C2 Typhoon': 'rgb(0, 255, 0)',
- 'C1 Typhoon': 'rgb(0, 255, 255)',
- 'Tropical Storm': 'rgb(0, 0, 255)',
- 'Tropical Depression': 'rgb(128, 128, 128)'
-}
-
+# Classification standards
atlantic_standard = {
'C5 Super Typhoon': {'wind_speed': 137, 'color': 'Red', 'hex': '#FF0000'},
'C4 Very Strong Typhoon': {'wind_speed': 113, 'color': 'Orange', 'hex': '#FFA500'},
@@ -209,7 +173,6 @@ atlantic_standard = {
'Tropical Depression': {'wind_speed': 0, 'color': 'Gray', 'hex': '#808080'}
}
-# FIXED: Taiwan standard with correct CMA 2006 thresholds
taiwan_standard_fixed = {
'Super Typhoon': {'wind_speed_ms': 51.0, 'wind_speed_kt': 99.2, 'color': 'Red', 'hex': '#FF0000'},
'Severe Typhoon': {'wind_speed_ms': 41.5, 'wind_speed_kt': 80.7, 'color': 'Orange', 'hex': '#FFA500'},
@@ -220,26 +183,20 @@ taiwan_standard_fixed = {
}
# -----------------------------
-# Utility Functions for HF Spaces
+# FIXED: Utility Functions
# -----------------------------
def safe_file_write(file_path, data_frame, backup_dir=None):
"""Safely write DataFrame to CSV with backup and error handling"""
try:
- # Create directory if it doesn't exist
os.makedirs(os.path.dirname(file_path), exist_ok=True)
-
- # Try to write to a temporary file first
temp_path = file_path + '.tmp'
data_frame.to_csv(temp_path, index=False)
-
- # If successful, rename to final file
os.rename(temp_path, file_path)
logging.info(f"Successfully saved {len(data_frame)} records to {file_path}")
return True
-
- except PermissionError as e:
- logging.warning(f"Permission denied writing to {file_path}: {e}")
+ except Exception as e:
+ logging.error(f"Error saving file {file_path}: {e}")
if backup_dir:
try:
backup_path = os.path.join(backup_dir, os.path.basename(file_path))
@@ -249,44 +206,9 @@ def safe_file_write(file_path, data_frame, backup_dir=None):
except Exception as backup_e:
logging.error(f"Failed to save to backup location: {backup_e}")
return False
-
- except Exception as e:
- logging.error(f"Error saving file {file_path}: {e}")
- # Clean up temp file if it exists
- temp_path = file_path + '.tmp'
- if os.path.exists(temp_path):
- try:
- os.remove(temp_path)
- except:
- pass
- return False
-
-def get_fallback_data_dir():
- """Get a fallback data directory that's guaranteed to be writable"""
- fallback_dirs = [
- tempfile.gettempdir(),
- '/tmp',
- os.path.expanduser('~'),
- os.getcwd()
- ]
-
- for directory in fallback_dirs:
- try:
- test_dir = os.path.join(directory, 'typhoon_fallback')
- os.makedirs(test_dir, exist_ok=True)
- test_file = os.path.join(test_dir, 'test.txt')
- with open(test_file, 'w') as f:
- f.write('test')
- os.remove(test_file)
- return test_dir
- except:
- continue
-
- # If all else fails, use current directory
- return os.getcwd()
# -----------------------------
-# ONI and Typhoon Data Functions - FIXED TO LOAD ALL DATA
+# FIXED: ONI Data Functions
# -----------------------------
def download_oni_file(url, filename):
@@ -302,10 +224,8 @@ def download_oni_file(url, filename):
except Exception as e:
logging.warning(f"Attempt {attempt + 1} failed to download ONI: {e}")
if attempt < max_retries - 1:
- time.sleep(2 ** attempt) # Exponential backoff
- else:
- logging.error(f"Failed to download ONI after {max_retries} attempts")
- return False
+ time.sleep(2 ** attempt)
+ return False
def convert_oni_ascii_to_csv(input_file, output_file):
"""Convert ONI ASCII format to CSV"""
@@ -326,19 +246,18 @@ def convert_oni_ascii_to_csv(input_file, output_file):
year = str(int(year)-1)
data[year][month-1] = anom
- # Write to CSV with safe write
df = pd.DataFrame(data).T.reset_index()
df.columns = ['Year','Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
df = df.sort_values('Year').reset_index(drop=True)
- return safe_file_write(output_file, df, get_fallback_data_dir())
+ return safe_file_write(output_file, df)
except Exception as e:
logging.error(f"Error converting ONI file: {e}")
return False
def update_oni_data():
- """Update ONI data with error handling - OPTIONAL now"""
+ """Update ONI data with error handling"""
url = "https://www.cpc.ncep.noaa.gov/data/indices/oni.ascii.txt"
temp_file = os.path.join(DATA_PATH, "temp_oni.ascii.txt")
input_file = os.path.join(DATA_PATH, "oni.ascii.txt")
@@ -352,33 +271,31 @@ def update_oni_data():
else:
os.remove(temp_file)
else:
- # Create fallback ONI data if download fails
- logging.warning("Creating fallback ONI data")
- create_fallback_oni_data(output_file)
+ logging.warning("ONI download failed - will create minimal ONI data")
+ create_minimal_oni_data(output_file)
except Exception as e:
logging.error(f"Error updating ONI data: {e}")
- create_fallback_oni_data(output_file)
+ create_minimal_oni_data(output_file)
-def create_fallback_oni_data(output_file):
- """Create minimal ONI data for testing - EXTENDED RANGE"""
- years = range(1851, 2026) # Extended to full historical range
+def create_minimal_oni_data(output_file):
+ """Create minimal ONI data for years without dropping typhoon data"""
+ years = range(1950, 2026) # Wide range to ensure coverage
months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
- # Create synthetic ONI data
data = []
for year in years:
row = [year]
for month in months:
- # Generate some realistic ONI values
- value = np.random.normal(0, 1) * 0.5
+ # Generate neutral ONI values (small variations around 0)
+ value = np.random.normal(0, 0.3)
row.append(f"{value:.2f}")
data.append(row)
df = pd.DataFrame(data, columns=['Year'] + months)
- safe_file_write(output_file, df, get_fallback_data_dir())
+ safe_file_write(output_file, df)
# -----------------------------
-# FIXED: IBTrACS Data Loading - LOAD ALL BASINS, ALL YEARS
+# FIXED: IBTrACS Data Loading - No Fallback, All Data
# -----------------------------
def download_ibtracs_file(basin, force_download=False):
@@ -387,7 +304,6 @@ def download_ibtracs_file(basin, force_download=False):
local_path = os.path.join(DATA_PATH, filename)
url = IBTRACS_BASE_URL + filename
- # Check if file exists and is recent (less than 7 days old)
if os.path.exists(local_path) and not force_download:
file_age = time.time() - os.path.getmtime(local_path)
if file_age < 7 * 24 * 3600: # 7 days
@@ -396,10 +312,9 @@ def download_ibtracs_file(basin, force_download=False):
try:
logging.info(f"Downloading {basin} basin file from {url}")
- response = requests.get(url, timeout=60)
+ response = requests.get(url, timeout=120) # Increased timeout
response.raise_for_status()
- # Ensure directory exists
os.makedirs(os.path.dirname(local_path), exist_ok=True)
with open(local_path, 'wb') as f:
@@ -410,29 +325,8 @@ def download_ibtracs_file(basin, force_download=False):
logging.error(f"Failed to download {basin} basin file: {e}")
return None
-def examine_ibtracs_structure(file_path):
- """Examine the actual structure of an IBTrACS CSV file"""
- try:
- with open(file_path, 'r') as f:
- lines = f.readlines()
-
- # Show first 5 lines
- logging.info("First 5 lines of IBTrACS file:")
- for i, line in enumerate(lines[:5]):
- logging.info(f"Line {i}: {line.strip()}")
-
- # The first line contains the actual column headers
- # No need to skip rows for IBTrACS v04r01
- df = pd.read_csv(file_path, nrows=5)
- logging.info(f"Columns from first row: {list(df.columns)}")
-
- return list(df.columns)
- except Exception as e:
- logging.error(f"Error examining IBTrACS structure: {e}")
- return None
-
-def load_ibtracs_csv_directly(basin='WP'):
- """Load IBTrACS data directly from CSV - FIXED VERSION"""
+def load_ibtracs_csv_directly(basin='ALL'):
+ """Load IBTrACS data directly from CSV - FIXED to load ALL data"""
filename = BASIN_FILES[basin]
local_path = os.path.join(DATA_PATH, filename)
@@ -440,368 +334,328 @@ def load_ibtracs_csv_directly(basin='WP'):
if not os.path.exists(local_path):
downloaded_path = download_ibtracs_file(basin)
if not downloaded_path:
+ logging.error(f"Could not download {basin} basin data")
return None
try:
- # First, examine the structure
- actual_columns = examine_ibtracs_structure(local_path)
- if not actual_columns:
- logging.error("Could not examine IBTrACS file structure")
- return None
-
- # Read IBTrACS CSV - DON'T skip any rows for v04r01
- # The first row contains proper column headers
logging.info(f"Reading IBTrACS CSV file: {local_path}")
- df = pd.read_csv(local_path, low_memory=False) # Don't skip any rows
+ # Read with low_memory=False to ensure proper data types
+ df = pd.read_csv(local_path, low_memory=False)
- logging.info(f"Original columns: {list(df.columns)}")
- logging.info(f"Data shape before cleaning: {df.shape}")
+ logging.info(f"Original data shape: {df.shape}")
+ logging.info(f"Available columns: {list(df.columns)}")
- # Check which essential columns exist
- required_cols = ['SID', 'ISO_TIME', 'LAT', 'LON']
- available_required = [col for col in required_cols if col in df.columns]
-
- if len(available_required) < 2:
- logging.error(f"Missing critical columns. Available: {list(df.columns)}")
+ # Essential columns check
+ required_cols = ['SID', 'LAT', 'LON']
+ missing_cols = [col for col in required_cols if col not in df.columns]
+ if missing_cols:
+ logging.error(f"Missing critical columns: {missing_cols}")
return None
- # Clean and standardize the data with format specification
- if 'ISO_TIME' in df.columns:
- df['ISO_TIME'] = pd.to_datetime(df['ISO_TIME'], format='%Y-%m-%d %H:%M:%S', errors='coerce')
-
- # Clean numeric columns
+ # FIXED: Data cleaning without dropping data unnecessarily
+ # Clean numeric columns carefully
numeric_columns = ['LAT', 'LON', 'WMO_WIND', 'WMO_PRES', 'USA_WIND', 'USA_PRES']
for col in numeric_columns:
if col in df.columns:
df[col] = pd.to_numeric(df[col], errors='coerce')
- # Filter out invalid/missing critical data - BUT KEEP ALL YEARS
- valid_rows = df['LAT'].notna() & df['LON'].notna()
- df = df[valid_rows]
-
- # Ensure LAT/LON are in reasonable ranges
- df = df[(df['LAT'] >= -90) & (df['LAT'] <= 90)]
- df = df[(df['LON'] >= -180) & (df['LON'] <= 180)]
+ # Time handling
+ if 'ISO_TIME' in df.columns:
+ df['ISO_TIME'] = pd.to_datetime(df['ISO_TIME'], errors='coerce')
+
+ # FIXED: Only filter out clearly invalid coordinates
+ valid_coords = (
+ df['LAT'].notna() &
+ df['LON'].notna() &
+ (df['LAT'].between(-90, 90)) &
+ (df['LON'].between(-180, 180))
+ )
+ df = df[valid_coords]
- # Add basin info if missing
+ # Add missing columns with defaults
if 'BASIN' not in df.columns:
- df['BASIN'] = basin
+ if 'SID' in df.columns:
+ df['BASIN'] = df['SID'].str[:2]
+ else:
+ df['BASIN'] = basin
- # Add default columns if missing
if 'NAME' not in df.columns:
df['NAME'] = 'UNNAMED'
if 'SEASON' not in df.columns and 'ISO_TIME' in df.columns:
df['SEASON'] = df['ISO_TIME'].dt.year
+ elif 'SEASON' not in df.columns:
+ # Extract year from SID if possible
+ if 'SID' in df.columns:
+ try:
+ df['SEASON'] = df['SID'].str.extract(r'(\d{4})').astype(float)
+ except:
+ df['SEASON'] = 2000 # Default year
logging.info(f"Successfully loaded {len(df)} records from {basin} basin")
+ logging.info(f"Final data shape: {df.shape}")
return df
except Exception as e:
logging.error(f"Error reading IBTrACS CSV file: {e}")
+ import traceback
+ traceback.print_exc()
return None
-def load_ibtracs_data_fixed():
- """FIXED: Load ALL AVAILABLE BASIN DATA without restrictions"""
- ibtracs_data = {}
+def load_all_ibtracs_data():
+ """Load ALL available IBTrACS data - FIXED to never use fallback"""
+ all_data = []
- # Load ALL basins available
- all_basins = ['WP', 'EP', 'NA', 'SP', 'SI', 'NI'] # All available basins
+ # Try to load the ALL basin file first (contains all basins)
+ try:
+ logging.info("Attempting to load ALL basin data...")
+ all_basin_data = load_ibtracs_csv_directly('ALL')
+ if all_basin_data is not None and not all_basin_data.empty:
+ logging.info(f"Successfully loaded ALL basin data: {len(all_basin_data)} records")
+ return all_basin_data
+ except Exception as e:
+ logging.warning(f"Failed to load ALL basin data: {e}")
- for basin in all_basins:
+ # If ALL basin fails, load individual basins
+ basins_to_load = ['WP', 'EP', 'NA']
+ for basin in basins_to_load:
try:
logging.info(f"Loading {basin} basin data...")
- df = load_ibtracs_csv_directly(basin)
-
- if df is not None and not df.empty:
- ibtracs_data[basin] = df
- logging.info(f"Successfully loaded {basin} basin with {len(df)} records")
+ basin_data = load_ibtracs_csv_directly(basin)
+ if basin_data is not None and not basin_data.empty:
+ basin_data['BASIN'] = basin
+ all_data.append(basin_data)
+ logging.info(f"Successfully loaded {basin} basin: {len(basin_data)} records")
else:
logging.warning(f"No data loaded for basin {basin}")
- ibtracs_data[basin] = None
-
except Exception as e:
logging.error(f"Failed to load basin {basin}: {e}")
- ibtracs_data[basin] = None
- return ibtracs_data
+ if all_data:
+ combined_data = pd.concat(all_data, ignore_index=True)
+ logging.info(f"Combined all basins: {len(combined_data)} total records")
+ return combined_data
+ else:
+ logging.error("No IBTrACS data could be loaded from any basin")
+ return None
def load_data_fixed(oni_path, typhoon_path):
- """FIXED: Load ALL typhoon data regardless of ONI availability"""
- # Load ONI data - OPTIONAL now
- oni_data = pd.DataFrame({'Year': [], 'Jan': [], 'Feb': [], 'Mar': [], 'Apr': [],
- 'May': [], 'Jun': [], 'Jul': [], 'Aug': [], 'Sep': [],
- 'Oct': [], 'Nov': [], 'Dec': []})
+ """FIXED data loading - loads all available typhoon data regardless of ONI"""
- oni_available = False
+ # Load ONI data (optional - typhoon analysis can work without it)
+ oni_data = None
if os.path.exists(oni_path):
try:
oni_data = pd.read_csv(oni_path)
logging.info(f"Successfully loaded ONI data with {len(oni_data)} years")
- oni_available = True
except Exception as e:
logging.error(f"Error loading ONI data: {e}")
- oni_available = False
- else:
- logging.warning(f"ONI data file not found: {oni_path} - proceeding without ONI")
- oni_available = False
- # Load typhoon data - ALWAYS LOAD ALL AVAILABLE DATA
+ if oni_data is None:
+ logging.warning("ONI data not available - creating minimal ONI data")
+ update_oni_data()
+ try:
+ oni_data = pd.read_csv(oni_path)
+ except Exception as e:
+ logging.error(f"Still can't load ONI data: {e}")
+ # Create minimal fallback
+ create_minimal_oni_data(oni_path)
+ oni_data = pd.read_csv(oni_path)
+
+ # FIXED: Load typhoon data - ALWAYS from IBTrACS, never use fallback
typhoon_data = None
- # First, try to load from existing processed file
+ # Try to load from existing processed file first
if os.path.exists(typhoon_path):
try:
typhoon_data = pd.read_csv(typhoon_path, low_memory=False)
- # Ensure basic columns exist and are valid
- required_cols = ['LAT', 'LON']
+ required_cols = ['LAT', 'LON', 'SID']
if all(col in typhoon_data.columns for col in required_cols):
if 'ISO_TIME' in typhoon_data.columns:
typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
logging.info(f"Loaded processed typhoon data with {len(typhoon_data)} records")
+ # Validate the data quality
+ valid_records = typhoon_data['LAT'].notna() & typhoon_data['LON'].notna()
+ if valid_records.sum() / len(typhoon_data) > 0.8: # If >80% valid, use it
+ typhoon_data = typhoon_data[valid_records]
+ else:
+ logging.warning("Processed data quality poor, reloading from IBTrACS")
+ typhoon_data = None
else:
- logging.warning("Processed typhoon data missing required columns, will reload from IBTrACS")
+ logging.warning("Processed typhoon data missing required columns, reloading from IBTrACS")
typhoon_data = None
except Exception as e:
logging.error(f"Error loading processed typhoon data: {e}")
typhoon_data = None
- # If no valid processed data, load from IBTrACS - LOAD ALL BASINS
+ # FIXED: Load from IBTrACS if needed - NO FALLBACK ALLOWED
if typhoon_data is None or typhoon_data.empty:
- logging.info("Loading ALL available typhoon data from IBTrACS...")
- ibtracs_data = load_ibtracs_data_fixed()
-
- # Combine ALL available basin data
- combined_dfs = []
- for basin in ['WP', 'EP', 'NA', 'SP', 'SI', 'NI']:
- if basin in ibtracs_data and ibtracs_data[basin] is not None:
- df = ibtracs_data[basin].copy()
- df['BASIN'] = basin
- combined_dfs.append(df)
- logging.info(f"Added {len(df)} records from {basin} basin")
-
- if combined_dfs:
- typhoon_data = pd.concat(combined_dfs, ignore_index=True)
- # Ensure SID has proper format
- if 'SID' not in typhoon_data.columns and 'BASIN' in typhoon_data.columns:
- # Create SID from basin and other identifiers if missing
- if 'SEASON' in typhoon_data.columns:
- typhoon_data['SID'] = (typhoon_data['BASIN'].astype(str) +
- typhoon_data.index.astype(str).str.zfill(2) +
- typhoon_data['SEASON'].astype(str))
- else:
- typhoon_data['SID'] = (typhoon_data['BASIN'].astype(str) +
- typhoon_data.index.astype(str).str.zfill(2) +
- '2000')
-
- # Save the processed data for future use
- safe_file_write(typhoon_path, typhoon_data, get_fallback_data_dir())
- logging.info(f"Combined IBTrACS data: {len(typhoon_data)} total records from all basins")
- else:
- logging.error("Failed to load any IBTrACS basin data")
- # Create comprehensive fallback data
- typhoon_data = create_comprehensive_fallback_typhoon_data()
+ logging.info("Loading typhoon data from IBTrACS...")
+ typhoon_data = load_all_ibtracs_data()
+
+ if typhoon_data is None or typhoon_data.empty:
+ raise Exception("CRITICAL ERROR: No typhoon data could be loaded from IBTrACS. Check internet connection and IBTrACS availability.")
+
+ # Process and save the loaded data
+ # Ensure SID exists and is properly formatted
+ if 'SID' not in typhoon_data.columns:
+ logging.error("CRITICAL: No SID column in typhoon data")
+ raise Exception("Typhoon data missing SID column")
+
+ # Save the processed data for future use
+ try:
+ safe_file_write(typhoon_path, typhoon_data)
+ logging.info(f"Saved processed typhoon data: {len(typhoon_data)} records")
+ except Exception as e:
+ logging.warning(f"Could not save processed data: {e}")
- # Final validation of typhoon data
- if typhoon_data is not None:
- # Ensure required columns exist with fallback values
+ # FIXED: Final validation and enhancement
+ if typhoon_data is not None and not typhoon_data.empty:
+ # Ensure required columns exist with proper defaults
required_columns = {
- 'SID': 'UNKNOWN',
+ 'SID': lambda: f"UNKNOWN_{typhoon_data.index}",
'ISO_TIME': pd.Timestamp('2000-01-01'),
- 'LAT': 0.0,
- 'LON': 0.0,
- 'USA_WIND': np.nan,
- 'USA_PRES': np.nan,
+ 'LAT': 20.0,
+ 'LON': 140.0,
+ 'USA_WIND': 30.0,
+ 'USA_PRES': 1013.0,
'NAME': 'UNNAMED',
- 'SEASON': 2000
+ 'SEASON': 2000,
+ 'BASIN': 'WP'
}
for col, default_val in required_columns.items():
if col not in typhoon_data.columns:
- typhoon_data[col] = default_val
- logging.warning(f"Added missing column {col} with default value")
+ if callable(default_val):
+ typhoon_data[col] = default_val()
+ else:
+ typhoon_data[col] = default_val
+ logging.warning(f"Added missing column {col}")
+
+ # Ensure proper data types
+ numeric_cols = ['LAT', 'LON', 'USA_WIND', 'USA_PRES', 'SEASON']
+ for col in numeric_cols:
+ if col in typhoon_data.columns:
+ typhoon_data[col] = pd.to_numeric(typhoon_data[col], errors='coerce')
- # Ensure data types
if 'ISO_TIME' in typhoon_data.columns:
typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
- typhoon_data['LAT'] = pd.to_numeric(typhoon_data['LAT'], errors='coerce')
- typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce')
- typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce')
- typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce')
- # Remove rows with invalid coordinates
- valid_coords = typhoon_data['LAT'].notna() & typhoon_data['LON'].notna()
- typhoon_data = typhoon_data[valid_coords]
+ # Remove only clearly invalid records
+ valid_mask = (
+ typhoon_data['LAT'].notna() &
+ typhoon_data['LON'].notna() &
+ typhoon_data['LAT'].between(-90, 90) &
+ typhoon_data['LON'].between(-180, 180)
+ )
- logging.info(f"Final typhoon data: {len(typhoon_data)} records after validation")
+ original_count = len(typhoon_data)
+ typhoon_data = typhoon_data[valid_mask]
+ logging.info(f"Final typhoon data: {len(typhoon_data)} records (removed {original_count - len(typhoon_data)} invalid)")
- # Log basin distribution
- if 'BASIN' in typhoon_data.columns:
- basin_counts = typhoon_data['BASIN'].value_counts()
- logging.info(f"Basin distribution: {dict(basin_counts)}")
-
- return oni_data, typhoon_data
-
-def create_comprehensive_fallback_typhoon_data():
- """Create comprehensive fallback typhoon data - ALL BASINS, ALL YEARS"""
- # Extended date range and multiple basins
- dates = pd.date_range(start='1851-01-01', end='2025-12-31', freq='D')
+ if len(typhoon_data) == 0:
+ raise Exception("CRITICAL ERROR: All typhoon data was filtered out - check data quality")
- # Define basin parameters
- basin_configs = {
- 'WP': {'lat_range': (5, 45), 'lon_range': (100, 180), 'count': 200},
- 'EP': {'lat_range': (5, 35), 'lon_range': (-180, -80), 'count': 150},
- 'NA': {'lat_range': (5, 45), 'lon_range': (-100, -10), 'count': 100},
- 'SP': {'lat_range': (-40, -5), 'lon_range': (135, 240), 'count': 80},
- 'SI': {'lat_range': (-40, -5), 'lon_range': (30, 135), 'count': 70},
- 'NI': {'lat_range': (5, 30), 'lon_range': (40, 100), 'count': 50}
- }
-
- data = []
-
- for basin, config in basin_configs.items():
- # Generate storms for this basin
- storm_dates = dates[np.random.choice(len(dates), size=config['count'], replace=False)]
-
- for i, date in enumerate(storm_dates):
- # Create realistic storm tracks for this basin
- lat_min, lat_max = config['lat_range']
- lon_min, lon_max = config['lon_range']
-
- base_lat = np.random.uniform(lat_min, lat_max)
- base_lon = np.random.uniform(lon_min, lon_max)
-
- # Generate 10-100 data points per storm (variable track lengths)
- track_length = np.random.randint(10, 101)
- sid = f"{basin}{i+1:02d}{date.year}"
-
- for j in range(track_length):
- # Realistic movement patterns
- if basin in ['WP', 'EP', 'NA']: # Northern Hemisphere
- lat = base_lat + j * 0.15 + np.random.normal(0, 0.1)
- if basin == 'WP':
- lon = base_lon + j * 0.2 + np.random.normal(0, 0.1)
- else:
- lon = base_lon - j * 0.2 + np.random.normal(0, 0.1)
- else: # Southern Hemisphere
- lat = base_lat - j * 0.15 + np.random.normal(0, 0.1)
- lon = base_lon + j * 0.2 + np.random.normal(0, 0.1)
-
- # Realistic intensity progression
- if j < track_length * 0.3: # Development phase
- wind = max(20, 25 + j * 3 + np.random.normal(0, 5))
- elif j < track_length * 0.7: # Mature phase
- wind = max(30, 60 + np.random.normal(0, 20))
- else: # Decay phase
- wind = max(20, 80 - (j - track_length * 0.7) * 2 + np.random.normal(0, 10))
-
- pres = max(900, 1020 - wind + np.random.normal(0, 8))
-
- data.append({
- 'SID': sid,
- 'ISO_TIME': date + pd.Timedelta(hours=j*6),
- 'NAME': f'FALLBACK_{basin}_{i+1}',
- 'SEASON': date.year,
- 'LAT': lat,
- 'LON': lon,
- 'USA_WIND': wind,
- 'USA_PRES': pres,
- 'BASIN': basin
- })
+ else:
+ raise Exception("CRITICAL ERROR: No typhoon data available after all loading attempts")
- df = pd.DataFrame(data)
- logging.info(f"Created comprehensive fallback typhoon data with {len(df)} records across all basins")
- return df
+ return oni_data, typhoon_data
def process_oni_data(oni_data):
- """Process ONI data into long format - HANDLE EMPTY DATA"""
+ """Process ONI data into long format"""
if oni_data is None or oni_data.empty:
- # Create minimal ONI data
- logging.warning("No ONI data available, creating minimal dataset")
- years = range(1950, 2026)
- data = []
- for year in years:
- for month_num, month_name in enumerate(['Jan','Feb','Mar','Apr','May','Jun',
- 'Jul','Aug','Sep','Oct','Nov','Dec'], 1):
- data.append({
- 'Year': year,
- 'Month': f'{month_num:02d}',
- 'ONI': 0.0,
- 'Date': pd.to_datetime(f'{year}-{month_num:02d}-01')
- })
- return pd.DataFrame(data)
+ # Return minimal ONI data that won't break merging
+ return pd.DataFrame({
+ 'Year': [2000], 'Month': ['01'], 'ONI': [0.0],
+ 'Date': [pd.Timestamp('2000-01-01')]
+ })
oni_long = oni_data.melt(id_vars=['Year'], var_name='Month', value_name='ONI')
month_map = {'Jan':'01','Feb':'02','Mar':'03','Apr':'04','May':'05','Jun':'06',
'Jul':'07','Aug':'08','Sep':'09','Oct':'10','Nov':'11','Dec':'12'}
oni_long['Month'] = oni_long['Month'].map(month_map)
oni_long['Date'] = pd.to_datetime(oni_long['Year'].astype(str)+'-'+oni_long['Month']+'-01')
- oni_long['ONI'] = pd.to_numeric(oni_long['ONI'], errors='coerce').fillna(0.0)
+ oni_long['ONI'] = pd.to_numeric(oni_long['ONI'], errors='coerce').fillna(0)
return oni_long
def process_typhoon_data(typhoon_data):
- """Process typhoon data - ALWAYS PRESERVE ALL DATA"""
+ """Process typhoon data - FIXED to preserve all data"""
if typhoon_data is None or typhoon_data.empty:
- return pd.DataFrame()
+ raise Exception("No typhoon data to process")
- # Process without filtering based on ONI availability
+ # Ensure proper data types
if 'ISO_TIME' in typhoon_data.columns:
typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
- typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce')
- typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce')
- typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce')
- # Log basin information
- if 'SID' in typhoon_data.columns:
- basins = typhoon_data['SID'].str[:2].unique()
- logging.info(f"Available basins in typhoon data: {sorted(basins)}")
+ numeric_cols = ['USA_WIND', 'USA_PRES', 'LON', 'LAT']
+ for col in numeric_cols:
+ if col in typhoon_data.columns:
+ typhoon_data[col] = pd.to_numeric(typhoon_data[col], errors='coerce')
- # Get maximum values per storm
- typhoon_max = typhoon_data.groupby('SID').agg({
- 'USA_WIND':'max','USA_PRES':'min','ISO_TIME':'first','SEASON':'first','NAME':'first',
- 'LAT':'first','LON':'first'
- }).reset_index()
+ logging.info(f"Processing {len(typhoon_data)} typhoon records")
+ # Get maximum values per storm
+ agg_dict = {}
+ if 'USA_WIND' in typhoon_data.columns:
+ agg_dict['USA_WIND'] = 'max'
+ if 'USA_PRES' in typhoon_data.columns:
+ agg_dict['USA_PRES'] = 'min'
+ if 'ISO_TIME' in typhoon_data.columns:
+ agg_dict['ISO_TIME'] = 'first'
+ if 'SEASON' in typhoon_data.columns:
+ agg_dict['SEASON'] = 'first'
+ if 'NAME' in typhoon_data.columns:
+ agg_dict['NAME'] = 'first'
+ if 'LAT' in typhoon_data.columns:
+ agg_dict['LAT'] = 'first'
+ if 'LON' in typhoon_data.columns:
+ agg_dict['LON'] = 'first'
+
+ typhoon_max = typhoon_data.groupby('SID').agg(agg_dict).reset_index()
+
+ # Add time-based columns for merging
if 'ISO_TIME' in typhoon_max.columns:
typhoon_max['Month'] = typhoon_max['ISO_TIME'].dt.strftime('%m')
typhoon_max['Year'] = typhoon_max['ISO_TIME'].dt.year
else:
- # Fallback if no ISO_TIME
- typhoon_max['Month'] = '01'
- typhoon_max['Year'] = typhoon_max['SEASON']
+ # Use SEASON if available, otherwise default
+ if 'SEASON' in typhoon_max.columns:
+ typhoon_max['Year'] = typhoon_max['SEASON']
+ else:
+ typhoon_max['Year'] = 2000
+ typhoon_max['Month'] = '01' # Default month
- # Categorize ALL storms (including very weak ones)
- typhoon_max['Category'] = typhoon_max['USA_WIND'].apply(categorize_typhoon_enhanced)
+ # Add category
+ if 'USA_WIND' in typhoon_max.columns:
+ typhoon_max['Category'] = typhoon_max['USA_WIND'].apply(categorize_typhoon_enhanced)
+ else:
+ typhoon_max['Category'] = 'Unknown'
logging.info(f"Processed {len(typhoon_max)} unique storms")
return typhoon_max
def merge_data(oni_long, typhoon_max):
- """FIXED: Merge data but KEEP ALL TYPHOON DATA even without ONI"""
+ """Merge ONI and typhoon data - FIXED to preserve typhoon data even without ONI"""
if typhoon_max is None or typhoon_max.empty:
- return pd.DataFrame()
+ raise Exception("No typhoon data to merge")
if oni_long is None or oni_long.empty:
- # No ONI data available - add dummy ONI values
- logging.warning("No ONI data available - adding neutral ONI values")
+ # If no ONI data, add default ONI values
+ logging.warning("No ONI data available - using neutral values")
typhoon_max['ONI'] = 0.0
return typhoon_max
- # Use LEFT JOIN to keep all typhoon data
- merged = pd.merge(typhoon_max, oni_long, on=['Year','Month'], how='left')
+ # Merge with ONI data
+ merged = pd.merge(typhoon_max, oni_long, on=['Year', 'Month'], how='left')
- # Fill missing ONI values with neutral (0.0)
+ # Fill missing ONI values with neutral
merged['ONI'] = merged['ONI'].fillna(0.0)
- logging.info(f"Merged data: {len(merged)} storms total")
- missing_oni = merged['ONI'].isna().sum()
- if missing_oni > 0:
- logging.info(f"Filled {missing_oni} missing ONI values with neutral (0.0)")
-
+ logging.info(f"Merged data: {len(merged)} storms with ONI values")
return merged
# -----------------------------
-# ENHANCED: Categorization Functions - FIXED TAIWAN CLASSIFICATION
+# Enhanced Categorization Functions
# -----------------------------
def categorize_typhoon_enhanced(wind_speed):
@@ -809,75 +663,49 @@ def categorize_typhoon_enhanced(wind_speed):
if pd.isna(wind_speed):
return 'Unknown'
- # Convert to knots if in m/s (some datasets use m/s)
if wind_speed < 10: # Likely in m/s, convert to knots
wind_speed = wind_speed * 1.94384
- # FIXED thresholds to include TD
- if wind_speed < 34: # Below 34 knots = Tropical Depression
+ if wind_speed < 34:
return 'Tropical Depression'
- elif wind_speed < 64: # 34-63 knots = Tropical Storm
+ elif wind_speed < 64:
return 'Tropical Storm'
- elif wind_speed < 83: # 64-82 knots = Category 1 Typhoon
+ elif wind_speed < 83:
return 'C1 Typhoon'
- elif wind_speed < 96: # 83-95 knots = Category 2 Typhoon
+ elif wind_speed < 96:
return 'C2 Typhoon'
- elif wind_speed < 113: # 96-112 knots = Category 3 Strong Typhoon
+ elif wind_speed < 113:
return 'C3 Strong Typhoon'
- elif wind_speed < 137: # 113-136 knots = Category 4 Very Strong Typhoon
+ elif wind_speed < 137:
return 'C4 Very Strong Typhoon'
- else: # 137+ knots = Category 5 Super Typhoon
+ else:
return 'C5 Super Typhoon'
def categorize_typhoon_taiwan_fixed(wind_speed):
- """
- FIXED Taiwan categorization system based on CMA 2006 standards
- Reference: CMA Tropical Cyclone Data Center official classification
- """
+ """FIXED Taiwan categorization system based on CMA 2006 standards"""
if pd.isna(wind_speed):
return 'Tropical Depression'
- # Convert from knots to m/s if input appears to be in knots
if wind_speed > 50: # Likely in knots, convert to m/s
wind_speed_ms = wind_speed * 0.514444
else:
wind_speed_ms = wind_speed
- # CMA 2006 Classification Standards (used by Taiwan CWA)
if wind_speed_ms >= 51.0:
- return 'Super Typhoon' # ≥51.0 m/s (≥99.2 kt)
+ return 'Super Typhoon'
elif wind_speed_ms >= 41.5:
- return 'Severe Typhoon' # 41.5–50.9 m/s (80.7–99.1 kt)
+ return 'Severe Typhoon'
elif wind_speed_ms >= 32.7:
- return 'Typhoon' # 32.7–41.4 m/s (63.6–80.6 kt)
+ return 'Typhoon'
elif wind_speed_ms >= 24.5:
- return 'Severe Tropical Storm' # 24.5–32.6 m/s (47.6–63.5 kt)
+ return 'Severe Tropical Storm'
elif wind_speed_ms >= 17.2:
- return 'Tropical Storm' # 17.2–24.4 m/s (33.4–47.5 kt)
- else:
- return 'Tropical Depression' # < 17.2 m/s (< 33.4 kt)
-
-# Original function for backward compatibility
-def categorize_typhoon(wind_speed):
- """Original categorize typhoon function for backward compatibility"""
- return categorize_typhoon_enhanced(wind_speed)
-
-def classify_enso_phases(oni_value):
- """Classify ENSO phases based on ONI value - HANDLE MISSING VALUES"""
- if isinstance(oni_value, pd.Series):
- oni_value = oni_value.iloc[0]
- if pd.isna(oni_value):
- return 'Neutral' # Default to neutral for missing ONI
- if oni_value >= 0.5:
- return 'El Nino'
- elif oni_value <= -0.5:
- return 'La Nina'
+ return 'Tropical Storm'
else:
- return 'Neutral'
+ return 'Tropical Depression'
-# FIXED: Combined categorization function
def categorize_typhoon_by_standard_fixed(wind_speed, standard='atlantic'):
- """FIXED categorization function supporting both standards with correct Taiwan thresholds"""
+ """FIXED categorization function supporting both standards"""
if pd.isna(wind_speed):
return 'Tropical Depression', '#808080'
@@ -885,9 +713,7 @@ def categorize_typhoon_by_standard_fixed(wind_speed, standard='atlantic'):
category = categorize_typhoon_taiwan_fixed(wind_speed)
color = taiwan_color_map_fixed.get(category, '#808080')
return category, color
-
else:
- # Atlantic/International standard (unchanged)
if wind_speed >= 137:
return 'C5 Super Typhoon', '#FF0000'
elif wind_speed >= 113:
@@ -903,8 +729,21 @@ def categorize_typhoon_by_standard_fixed(wind_speed, standard='atlantic'):
else:
return 'Tropical Depression', '#808080'
+def classify_enso_phases(oni_value):
+ """Classify ENSO phases based on ONI value"""
+ if isinstance(oni_value, pd.Series):
+ oni_value = oni_value.iloc[0]
+ if pd.isna(oni_value):
+ return 'Neutral'
+ if oni_value >= 0.5:
+ return 'El Nino'
+ elif oni_value <= -0.5:
+ return 'La Nina'
+ else:
+ return 'Neutral'
+
# -----------------------------
-# FIXED: ADVANCED ML FEATURES WITH ROBUST ERROR HANDLING
+# FIXED: Advanced ML Features
# -----------------------------
def extract_storm_features(typhoon_data):
@@ -914,7 +753,6 @@ def extract_storm_features(typhoon_data):
logging.error("No typhoon data provided for feature extraction")
return None
- # Basic features - ensure columns exist
basic_features = []
for sid in typhoon_data['SID'].unique():
storm_data = typhoon_data[typhoon_data['SID'] == sid].copy()
@@ -922,7 +760,6 @@ def extract_storm_features(typhoon_data):
if len(storm_data) == 0:
continue
- # Initialize feature dict with safe defaults
features = {'SID': sid}
# Wind statistics
@@ -972,16 +809,13 @@ def extract_storm_features(typhoon_data):
features['LON_max'] = lon_values.max()
features['LON_min'] = lon_values.min()
- # Genesis location (first valid position)
features['genesis_lat'] = lat_values.iloc[0]
features['genesis_lon'] = lon_values.iloc[0]
- features['genesis_intensity'] = features['USA_WIND_mean'] # Use mean as fallback
+ features['genesis_intensity'] = features['USA_WIND_mean']
- # Track characteristics
features['lat_range'] = lat_values.max() - lat_values.min()
features['lon_range'] = lon_values.max() - lon_values.min()
- # Calculate track distance
if len(lat_values) > 1:
distances = []
for i in range(1, len(lat_values)):
@@ -994,7 +828,6 @@ def extract_storm_features(typhoon_data):
features['total_distance'] = 0
features['avg_speed'] = 0
- # Track curvature
if len(lat_values) > 2:
bearing_changes = []
for i in range(1, len(lat_values)-1):
@@ -1012,7 +845,6 @@ def extract_storm_features(typhoon_data):
else:
features['avg_curvature'] = 0
else:
- # Default location values
features.update({
'LAT_mean': 20, 'LAT_std': 0, 'LAT_max': 20, 'LAT_min': 20,
'LON_mean': 140, 'LON_std': 0, 'LON_max': 140, 'LON_min': 140,
@@ -1020,26 +852,14 @@ def extract_storm_features(typhoon_data):
'lat_range': 0, 'lon_range': 0, 'total_distance': 0,
'avg_speed': 0, 'avg_curvature': 0
})
- else:
- # Default location values if columns missing
- features.update({
- 'LAT_mean': 20, 'LAT_std': 0, 'LAT_max': 20, 'LAT_min': 20,
- 'LON_mean': 140, 'LON_std': 0, 'LON_max': 140, 'LON_min': 140,
- 'genesis_lat': 20, 'genesis_lon': 140, 'genesis_intensity': 30,
- 'lat_range': 0, 'lon_range': 0, 'total_distance': 0,
- 'avg_speed': 0, 'avg_curvature': 0
- })
- # Track length
features['track_length'] = len(storm_data)
- # Add seasonal information
if 'SEASON' in storm_data.columns:
features['season'] = storm_data['SEASON'].iloc[0]
else:
features['season'] = 2000
- # Add basin information
if 'BASIN' in storm_data.columns:
features['basin'] = storm_data['BASIN'].iloc[0]
elif 'SID' in storm_data.columns:
@@ -1053,17 +873,13 @@ def extract_storm_features(typhoon_data):
logging.error("No valid storm features could be extracted")
return None
- # Convert to DataFrame
storm_features = pd.DataFrame(basic_features)
- # Ensure all numeric columns are properly typed
numeric_columns = [col for col in storm_features.columns if col not in ['SID', 'basin']]
for col in numeric_columns:
storm_features[col] = pd.to_numeric(storm_features[col], errors='coerce').fillna(0)
logging.info(f"Successfully extracted features for {len(storm_features)} storms")
- logging.info(f"Feature columns: {list(storm_features.columns)}")
-
return storm_features
except Exception as e:
@@ -1073,38 +889,30 @@ def extract_storm_features(typhoon_data):
return None
def perform_dimensionality_reduction(storm_features, method='umap', n_components=2):
- """Perform UMAP or t-SNE dimensionality reduction - FIXED VERSION"""
+ """Perform UMAP or t-SNE dimensionality reduction"""
try:
if storm_features is None or storm_features.empty:
raise ValueError("No storm features provided")
- # Select numeric features for clustering - FIXED
feature_cols = []
for col in storm_features.columns:
if col not in ['SID', 'basin'] and storm_features[col].dtype in ['float64', 'int64']:
- # Check if column has valid data
valid_data = storm_features[col].dropna()
- if len(valid_data) > 0 and valid_data.std() > 0: # Only include columns with variance
+ if len(valid_data) > 0 and valid_data.std() > 0:
feature_cols.append(col)
if len(feature_cols) == 0:
raise ValueError("No valid numeric features found for clustering")
- logging.info(f"Using {len(feature_cols)} features for clustering: {feature_cols}")
-
X = storm_features[feature_cols].fillna(0)
- # Check if we have enough samples
if len(X) < 2:
raise ValueError("Need at least 2 storms for clustering")
- # Standardize features
scaler = StandardScaler()
X_scaled = scaler.fit_transform(X)
- # Perform dimensionality reduction
if method.lower() == 'umap' and UMAP_AVAILABLE and len(X_scaled) >= 4:
- # UMAP parameters optimized for typhoon data - fixed warnings
n_neighbors = min(15, len(X_scaled) - 1)
reducer = umap.UMAP(
n_components=n_components,
@@ -1112,12 +920,11 @@ def perform_dimensionality_reduction(storm_features, method='umap', n_components
min_dist=0.1,
metric='euclidean',
random_state=42,
- n_jobs=1 # Explicitly set to avoid warning
+ n_jobs=1
)
elif method.lower() == 'tsne' and len(X_scaled) >= 4:
- # t-SNE parameters
perplexity = min(30, len(X_scaled) // 4)
- perplexity = max(1, perplexity) # Ensure perplexity is at least 1
+ perplexity = max(1, perplexity)
reducer = TSNE(
n_components=n_components,
perplexity=perplexity,
@@ -1126,14 +933,11 @@ def perform_dimensionality_reduction(storm_features, method='umap', n_components
random_state=42
)
else:
- # Fallback to PCA
reducer = PCA(n_components=n_components, random_state=42)
- # Fit and transform
embedding = reducer.fit_transform(X_scaled)
logging.info(f"Dimensionality reduction successful: {X_scaled.shape} -> {embedding.shape}")
-
return embedding, feature_cols, scaler
except Exception as e:
@@ -1141,17 +945,15 @@ def perform_dimensionality_reduction(storm_features, method='umap', n_components
raise
def cluster_storms_data(embedding, method='dbscan', eps=0.5, min_samples=3):
- """Cluster storms based on their embedding - FIXED NAME VERSION"""
+ """Cluster storms based on their embedding"""
try:
if len(embedding) < 2:
- return np.array([0] * len(embedding)) # Single cluster for insufficient data
+ return np.array([0] * len(embedding))
if method.lower() == 'dbscan':
- # Adjust min_samples based on data size
min_samples = min(min_samples, max(2, len(embedding) // 5))
clusterer = DBSCAN(eps=eps, min_samples=min_samples)
elif method.lower() == 'kmeans':
- # Adjust n_clusters based on data size
n_clusters = min(5, max(2, len(embedding) // 3))
clusterer = KMeans(n_clusters=n_clusters, random_state=42)
else:
@@ -1160,18 +962,15 @@ def cluster_storms_data(embedding, method='dbscan', eps=0.5, min_samples=3):
clusters = clusterer.fit_predict(embedding)
logging.info(f"Clustering complete: {len(np.unique(clusters))} clusters found")
-
return clusters
except Exception as e:
logging.error(f"Error in cluster_storms_data: {e}")
- # Return single cluster as fallback
return np.array([0] * len(embedding))
def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'):
- """Create separate plots for clustering analysis - ENHANCED CLARITY VERSION"""
+ """Create separate plots for clustering analysis"""
try:
- # Validate inputs
if storm_features is None or storm_features.empty:
raise ValueError("No storm features available for clustering")
@@ -1180,23 +979,17 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
logging.info(f"Starting clustering visualization with {len(storm_features)} storms")
- # Perform dimensionality reduction
embedding, feature_cols, scaler = perform_dimensionality_reduction(storm_features, method)
-
- # Perform clustering
cluster_labels = cluster_storms_data(embedding, 'dbscan')
- # Add clustering results to storm features
storm_features_viz = storm_features.copy()
storm_features_viz['cluster'] = cluster_labels
storm_features_viz['dim1'] = embedding[:, 0]
storm_features_viz['dim2'] = embedding[:, 1]
- # Merge with typhoon data for additional info - SAFE MERGE
try:
storm_info = typhoon_data.groupby('SID').first()[['NAME', 'SEASON']].reset_index()
storm_features_viz = storm_features_viz.merge(storm_info, on='SID', how='left')
- # Fill missing values
storm_features_viz['NAME'] = storm_features_viz['NAME'].fillna('UNNAMED')
storm_features_viz['SEASON'] = storm_features_viz['SEASON'].fillna(2000)
except Exception as merge_error:
@@ -1204,14 +997,12 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
storm_features_viz['NAME'] = 'UNNAMED'
storm_features_viz['SEASON'] = 2000
- # Get unique clusters and assign distinct colors
unique_clusters = sorted([c for c in storm_features_viz['cluster'].unique() if c != -1])
noise_count = len(storm_features_viz[storm_features_viz['cluster'] == -1])
- # 1. Enhanced clustering scatter plot with clear cluster identification
+ # 1. Clustering scatter plot
fig_cluster = go.Figure()
- # Add noise points first
if noise_count > 0:
noise_data = storm_features_viz[storm_features_viz['cluster'] == -1]
fig_cluster.add_trace(
@@ -1236,7 +1027,6 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
)
)
- # Add clusters with distinct colors and shapes
cluster_symbols = ['circle', 'square', 'diamond', 'triangle-up', 'triangle-down',
'pentagon', 'hexagon', 'star', 'cross', 'circle-open']
@@ -1277,17 +1067,15 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
showlegend=True
)
- # 2. ENHANCED route map with cluster legends and clearer representation
+ # 2. Route map
fig_routes = go.Figure()
- # Create a comprehensive legend showing cluster characteristics
cluster_info_text = []
for i, cluster in enumerate(unique_clusters):
cluster_storm_ids = storm_features_viz[storm_features_viz['cluster'] == cluster]['SID'].tolist()
color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)]
- # Get cluster statistics for legend
cluster_data = storm_features_viz[storm_features_viz['cluster'] == cluster]
avg_intensity = cluster_data['USA_WIND_max'].mean() if 'USA_WIND_max' in cluster_data.columns else 0
avg_pressure = cluster_data['USA_PRES_min'].mean() if 'USA_PRES_min' in cluster_data.columns else 1000
@@ -1297,13 +1085,11 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
f"Avg: {avg_intensity:.0f}kt/{avg_pressure:.0f}hPa"
)
- # Add multiple storms per cluster with clear identification
storms_added = 0
- for j, sid in enumerate(cluster_storm_ids[:8]): # Show up to 8 storms per cluster
+ for j, sid in enumerate(cluster_storm_ids[:8]):
try:
storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME')
if len(storm_track) > 1:
- # Ensure valid coordinates
valid_coords = storm_track['LAT'].notna() & storm_track['LON'].notna()
storm_track = storm_track[valid_coords]
@@ -1311,10 +1097,9 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED'
storm_season = storm_track['SEASON'].iloc[0] if 'SEASON' in storm_track.columns else 'Unknown'
- # Vary line style for different storms in same cluster
line_styles = ['solid', 'dash', 'dot', 'dashdot']
line_style = line_styles[j % len(line_styles)]
- line_width = 3 if j == 0 else 2 # First storm thicker
+ line_width = 3 if j == 0 else 2
fig_routes.add_trace(
go.Scattergeo(
@@ -1341,9 +1126,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
logging.warning(f"Error adding track for storm {sid}: {track_error}")
continue
- # Add cluster centroid marker
if len(cluster_storm_ids) > 0:
- # Calculate average genesis location for cluster
cluster_storm_data = storm_features_viz[storm_features_viz['cluster'] == cluster]
if 'genesis_lat' in cluster_storm_data.columns and 'genesis_lon' in cluster_storm_data.columns:
avg_lat = cluster_storm_data['genesis_lat'].mean()
@@ -1373,7 +1156,6 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
)
)
- # Update route map layout with enhanced information and LARGER SIZE
fig_routes.update_layout(
title=f"Storm Routes by {method.upper()} Clusters
Different line styles = different storms in same cluster | Stars = cluster centers",
geo=dict(
@@ -1385,14 +1167,13 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
showcoastlines=True,
coastlinecolor="Gray",
center=dict(lat=20, lon=140),
- projection_scale=2.5 # Larger map
+ projection_scale=2.5
),
- height=800, # Much larger height
- width=1200, # Wider map
+ height=800,
+ width=1200,
showlegend=True
)
- # Add cluster info annotation
cluster_summary = "
".join(cluster_info_text)
fig_routes.add_annotation(
text=f"Cluster Summary:
{cluster_summary}",
@@ -1405,7 +1186,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
borderwidth=1
)
- # 3. Enhanced pressure evolution plot with cluster identification
+ # 3. Pressure evolution plot
fig_pressure = go.Figure()
for i, cluster in enumerate(unique_clusters):
@@ -1413,16 +1194,13 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)]
cluster_pressures = []
- for j, sid in enumerate(cluster_storm_ids[:5]): # Limit to 5 storms per cluster
+ for j, sid in enumerate(cluster_storm_ids[:5]):
try:
storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME')
if len(storm_track) > 1 and 'USA_PRES' in storm_track.columns:
pressure_values = pd.to_numeric(storm_track['USA_PRES'], errors='coerce').dropna()
if len(pressure_values) > 0:
storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED'
- time_hours = range(len(pressure_values))
-
- # Normalize time to show relative progression
normalized_time = np.linspace(0, 100, len(pressure_values))
fig_pressure.add_trace(
@@ -1447,7 +1225,6 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
except Exception as e:
continue
- # Add cluster average line
if cluster_pressures:
avg_pressure = np.mean(cluster_pressures)
fig_pressure.add_hline(
@@ -1465,7 +1242,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
height=500
)
- # 4. Enhanced wind evolution plot
+ # 4. Wind evolution plot
fig_wind = go.Figure()
for i, cluster in enumerate(unique_clusters):
@@ -1473,15 +1250,13 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)]
cluster_winds = []
- for j, sid in enumerate(cluster_storm_ids[:5]): # Limit to 5 storms per cluster
+ for j, sid in enumerate(cluster_storm_ids[:5]):
try:
storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME')
if len(storm_track) > 1 and 'USA_WIND' in storm_track.columns:
wind_values = pd.to_numeric(storm_track['USA_WIND'], errors='coerce').dropna()
if len(wind_values) > 0:
storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED'
-
- # Normalize time to show relative progression
normalized_time = np.linspace(0, 100, len(wind_values))
fig_wind.add_trace(
@@ -1506,7 +1281,6 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
except Exception as e:
continue
- # Add cluster average line
if cluster_winds:
avg_wind = np.mean(cluster_winds)
fig_wind.add_hline(
@@ -1524,7 +1298,7 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
height=500
)
- # Generate enhanced cluster statistics with clear explanations
+ # Generate statistics
try:
stats_text = f"ENHANCED {method.upper()} CLUSTER ANALYSIS RESULTS\n" + "="*60 + "\n\n"
stats_text += f"🔍 DIMENSIONALITY REDUCTION: {method.upper()}\n"
@@ -1548,7 +1322,6 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
stats_text += f"🎯 CLUSTER {cluster}: {storm_count} storms\n"
stats_text += f" 🎨 Color: {CLUSTER_COLORS[cluster % len(CLUSTER_COLORS)]}\n"
- # Add detailed statistics if available
if 'USA_WIND_max' in cluster_data.columns:
wind_mean = cluster_data['USA_WIND_max'].mean()
wind_std = cluster_data['USA_WIND_max'].std()
@@ -1568,7 +1341,6 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
lon_mean = cluster_data['genesis_lon'].mean()
stats_text += f" 🎯 Genesis Region: {lat_mean:.1f}°N, {lon_mean:.1f}°E\n"
- # Add interpretation
if wind_mean < 50:
stats_text += " 💡 Pattern: Weaker storm group\n"
elif wind_mean > 100:
@@ -1578,7 +1350,6 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
stats_text += "\n"
- # Add explanation of the analysis
stats_text += "📖 INTERPRETATION GUIDE:\n"
stats_text += f"• {method.upper()} reduces storm characteristics to 2D for visualization\n"
stats_text += "• DBSCAN finds natural groupings without preset number of clusters\n"
@@ -1611,113 +1382,9 @@ def create_separate_clustering_plots(storm_features, typhoon_data, method='umap'
return error_fig, error_fig, error_fig, error_fig, f"Error in clustering: {str(e)}"
# -----------------------------
-# ENHANCED: Advanced Prediction System with Route Forecasting
+# FIXED: Prediction System
# -----------------------------
-def create_advanced_prediction_model(typhoon_data):
- """Create advanced ML model for intensity and route prediction"""
- try:
- if typhoon_data is None or typhoon_data.empty:
- return None, "No data available for model training"
-
- # Prepare training data
- features = []
- targets = []
-
- for sid in typhoon_data['SID'].unique():
- storm_data = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME')
-
- if len(storm_data) < 3: # Need at least 3 points for prediction
- continue
-
- for i in range(len(storm_data) - 1):
- current = storm_data.iloc[i]
- next_point = storm_data.iloc[i + 1]
-
- # Extract features (current state)
- feature_row = []
-
- # Current position
- feature_row.extend([
- current.get('LAT', 20),
- current.get('LON', 140)
- ])
-
- # Current intensity
- feature_row.extend([
- current.get('USA_WIND', 30),
- current.get('USA_PRES', 1000)
- ])
-
- # Time features
- if 'ISO_TIME' in current and pd.notna(current['ISO_TIME']):
- month = current['ISO_TIME'].month
- day_of_year = current['ISO_TIME'].dayofyear
- else:
- month = 9 # Peak season default
- day_of_year = 250
-
- feature_row.extend([month, day_of_year])
-
- # Motion features (if previous point exists)
- if i > 0:
- prev = storm_data.iloc[i - 1]
- dlat = current.get('LAT', 20) - prev.get('LAT', 20)
- dlon = current.get('LON', 140) - prev.get('LON', 140)
- speed = np.sqrt(dlat**2 + dlon**2)
- bearing = np.arctan2(dlat, dlon)
- else:
- speed = 0
- bearing = 0
-
- feature_row.extend([speed, bearing])
-
- features.append(feature_row)
-
- # Target: next position and intensity
- targets.append([
- next_point.get('LAT', 20),
- next_point.get('LON', 140),
- next_point.get('USA_WIND', 30)
- ])
-
- if len(features) < 10: # Need sufficient training data
- return None, "Insufficient data for model training"
-
- # Train model
- X = np.array(features)
- y = np.array(targets)
-
- # Split data
- X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)
-
- # Create separate models for position and intensity
- models = {}
-
- # Position model (lat, lon)
- pos_model = RandomForestRegressor(n_estimators=100, random_state=42)
- pos_model.fit(X_train, y_train[:, :2])
- models['position'] = pos_model
-
- # Intensity model (wind speed)
- int_model = RandomForestRegressor(n_estimators=100, random_state=42)
- int_model.fit(X_train, y_train[:, 2])
- models['intensity'] = int_model
-
- # Calculate model performance
- pos_pred = pos_model.predict(X_test)
- int_pred = int_model.predict(X_test)
-
- pos_mae = mean_absolute_error(y_test[:, :2], pos_pred)
- int_mae = mean_absolute_error(y_test[:, 2], int_pred)
-
- model_info = f"Position MAE: {pos_mae:.2f}°, Intensity MAE: {int_mae:.2f} kt"
-
- return models, model_info
-
- except Exception as e:
- return None, f"Error creating prediction model: {str(e)}"
-
def get_realistic_genesis_locations():
"""Get realistic typhoon genesis regions based on climatology"""
return {
@@ -1739,7 +1406,7 @@ def predict_storm_route_and_intensity_realistic(genesis_region, month, oni_value
genesis_locations = get_realistic_genesis_locations()
if genesis_region not in genesis_locations:
- genesis_region = "Western Pacific Main Development Region" # Default
+ genesis_region = "Western Pacific Main Development Region"
genesis_info = genesis_locations[genesis_region]
lat = genesis_info["lat"]
@@ -1753,29 +1420,27 @@ def predict_storm_route_and_intensity_realistic(genesis_region, month, oni_value
'genesis_info': genesis_info
}
- # REALISTIC starting intensity - Tropical Depression level
- base_intensity = 30 # Start at TD level (25-35 kt)
+ # Realistic starting intensity
+ base_intensity = 30
- # Environmental factors for genesis
- if oni_value > 1.0: # Strong El Niño - suppressed development
+ # Environmental factors
+ if oni_value > 1.0:
intensity_modifier = -6
- elif oni_value > 0.5: # Moderate El Niño
+ elif oni_value > 0.5:
intensity_modifier = -3
- elif oni_value < -1.0: # Strong La Niña - enhanced development
+ elif oni_value < -1.0:
intensity_modifier = +8
- elif oni_value < -0.5: # Moderate La Niña
+ elif oni_value < -0.5:
intensity_modifier = +5
- else: # Neutral
+ else:
intensity_modifier = oni_value * 2
- # Seasonal genesis effects
seasonal_factors = {
1: -8, 2: -6, 3: -4, 4: -2, 5: 2, 6: 6,
7: 10, 8: 12, 9: 15, 10: 10, 11: 4, 12: -5
}
seasonal_modifier = seasonal_factors.get(month, 0)
- # Genesis region favorability
region_factors = {
"Western Pacific Main Development Region": 8,
"South China Sea": 4,
@@ -1790,160 +1455,137 @@ def predict_storm_route_and_intensity_realistic(genesis_region, month, oni_value
}
region_modifier = region_factors.get(genesis_region, 0)
- # Calculate realistic starting intensity (TD level)
predicted_intensity = base_intensity + intensity_modifier + seasonal_modifier + region_modifier
- predicted_intensity = max(25, min(40, predicted_intensity)) # Keep in TD-weak TS range
+ predicted_intensity = max(25, min(40, predicted_intensity))
- # Add realistic uncertainty for genesis
intensity_uncertainty = np.random.normal(0, 2)
predicted_intensity += intensity_uncertainty
- predicted_intensity = max(25, min(38, predicted_intensity)) # TD range
+ predicted_intensity = max(25, min(38, predicted_intensity))
results['current_prediction'] = {
'intensity_kt': predicted_intensity,
- 'pressure_hpa': 1008 - (predicted_intensity - 25) * 0.6, # Realistic TD pressure
+ 'pressure_hpa': 1008 - (predicted_intensity - 25) * 0.6,
'category': categorize_typhoon_enhanced(predicted_intensity),
'genesis_region': genesis_region
}
- # REALISTIC route prediction with proper typhoon speeds
+ # Route prediction
current_lat = lat
current_lon = lon
current_intensity = predicted_intensity
route_points = []
- # Track storm development over time with REALISTIC SPEEDS
for hour in range(0, forecast_hours + 6, 6):
+ # Realistic motion
+ if current_lat < 20:
+ base_speed = 0.12
+ elif current_lat < 30:
+ base_speed = 0.18
+ else:
+ base_speed = 0.25
- # REALISTIC typhoon motion - much faster speeds
- # Typical typhoon forward speed: 15-25 km/h (0.14-0.23°/hour)
-
- # Base forward speed depends on latitude and storm intensity
- if current_lat < 20: # Low latitude - slower
- base_speed = 0.12 # ~13 km/h
- elif current_lat < 30: # Mid latitude - moderate
- base_speed = 0.18 # ~20 km/h
- else: # High latitude - faster
- base_speed = 0.25 # ~28 km/h
-
- # Intensity affects speed (stronger storms can move faster)
intensity_speed_factor = 1.0 + (current_intensity - 50) / 200
base_speed *= max(0.8, min(1.4, intensity_speed_factor))
- # Beta drift (Coriolis effect) - realistic values
beta_drift_lat = 0.02 * np.sin(np.radians(current_lat))
beta_drift_lon = -0.05 * np.cos(np.radians(current_lat))
- # Seasonal steering patterns with realistic speeds
- if month in [6, 7, 8, 9]: # Peak season
+ if month in [6, 7, 8, 9]:
ridge_strength = 1.2
ridge_position = 32 + 4 * np.sin(2 * np.pi * (month - 6) / 4)
- else: # Off season
+ else:
ridge_strength = 0.9
ridge_position = 28
- # REALISTIC motion based on position relative to subtropical ridge
- if current_lat < ridge_position - 10: # Well south of ridge - westward movement
- lat_tendency = base_speed * 0.3 + beta_drift_lat # Slight poleward
- lon_tendency = -base_speed * 0.9 + beta_drift_lon # Strong westward
- elif current_lat > ridge_position - 3: # Near ridge - recurvature
- lat_tendency = base_speed * 0.8 + beta_drift_lat # Strong poleward
- lon_tendency = base_speed * 0.4 + beta_drift_lon # Eastward
- else: # In between - normal WNW motion
- lat_tendency = base_speed * 0.4 + beta_drift_lat # Moderate poleward
- lon_tendency = -base_speed * 0.7 + beta_drift_lon # Moderate westward
+ if current_lat < ridge_position - 10:
+ lat_tendency = base_speed * 0.3 + beta_drift_lat
+ lon_tendency = -base_speed * 0.9 + beta_drift_lon
+ elif current_lat > ridge_position - 3:
+ lat_tendency = base_speed * 0.8 + beta_drift_lat
+ lon_tendency = base_speed * 0.4 + beta_drift_lon
+ else:
+ lat_tendency = base_speed * 0.4 + beta_drift_lat
+ lon_tendency = -base_speed * 0.7 + beta_drift_lon
- # ENSO steering modulation (realistic effects)
- if oni_value > 0.5: # El Niño - more eastward/poleward motion
+ if oni_value > 0.5:
lon_tendency += 0.05
lat_tendency += 0.02
- elif oni_value < -0.5: # La Niña - more westward motion
+ elif oni_value < -0.5:
lon_tendency -= 0.08
lat_tendency -= 0.01
- # Add motion uncertainty that grows with time (realistic error growth)
motion_uncertainty = 0.02 + (hour / 120) * 0.04
lat_noise = np.random.normal(0, motion_uncertainty)
lon_noise = np.random.normal(0, motion_uncertainty)
- # Update position with realistic speeds
current_lat += lat_tendency + lat_noise
current_lon += lon_tendency + lon_noise
- # REALISTIC intensity evolution with proper development cycles
-
- # Development phase (first 48-72 hours) - realistic intensification
+ # Intensity evolution
if hour <= 48:
- if current_intensity < 50: # Still weak - rapid development possible
- if 10 <= current_lat <= 25 and 115 <= current_lon <= 165: # Favorable environment
+ if current_intensity < 50:
+ if 10 <= current_lat <= 25 and 115 <= current_lon <= 165:
intensity_tendency = 4.5 if current_intensity < 35 else 3.0
- elif 120 <= current_lon <= 155 and 15 <= current_lat <= 20: # Best environment
+ elif 120 <= current_lon <= 155 and 15 <= current_lat <= 20:
intensity_tendency = 6.0 if current_intensity < 40 else 4.0
else:
intensity_tendency = 2.0
- elif current_intensity < 80: # Moderate intensity
+ elif current_intensity < 80:
intensity_tendency = 2.5 if (120 <= current_lon <= 155 and 10 <= current_lat <= 25) else 1.0
- else: # Already strong
+ else:
intensity_tendency = 1.0
- # Mature phase (48-120 hours) - peak intensity maintenance
elif hour <= 120:
- if current_lat < 25 and current_lon > 120: # Still in favorable waters
+ if current_lat < 25 and current_lon > 120:
if current_intensity < 120:
intensity_tendency = 1.5
else:
- intensity_tendency = 0.0 # Maintain intensity
+ intensity_tendency = 0.0
else:
intensity_tendency = -1.5
- # Extended phase (120+ hours) - gradual weakening
else:
if current_lat < 30 and current_lon > 115:
- intensity_tendency = -2.0 # Slow weakening
+ intensity_tendency = -2.0
else:
- intensity_tendency = -3.5 # Faster weakening
+ intensity_tendency = -3.5
- # Environmental modulation (realistic effects)
- if current_lat > 35: # High latitude - rapid weakening
+ # Environmental modulation
+ if current_lat > 35:
intensity_tendency -= 12
- elif current_lat > 30: # Moderate latitude
+ elif current_lat > 30:
intensity_tendency -= 5
- elif current_lon < 110: # Land interaction
+ elif current_lon < 110:
intensity_tendency -= 15
- elif 125 <= current_lon <= 155 and 10 <= current_lat <= 25: # Warm pool
+ elif 125 <= current_lon <= 155 and 10 <= current_lat <= 25:
intensity_tendency += 2
- elif 160 <= current_lon <= 180 and 15 <= current_lat <= 30: # Still warm
+ elif 160 <= current_lon <= 180 and 15 <= current_lat <= 30:
intensity_tendency += 1
- # SST effects (realistic temperature impact)
- if current_lat < 8: # Very warm but weak Coriolis
+ if current_lat < 8:
intensity_tendency += 0.5
- elif 8 <= current_lat <= 20: # Sweet spot for development
+ elif 8 <= current_lat <= 20:
intensity_tendency += 2.0
- elif 20 < current_lat <= 30: # Marginal
+ elif 20 < current_lat <= 30:
intensity_tendency -= 1.0
- elif current_lat > 30: # Cool waters
+ elif current_lat > 30:
intensity_tendency -= 4.0
- # Shear effects (simplified but realistic)
- if month in [12, 1, 2, 3]: # High shear season
+ if month in [12, 1, 2, 3]:
intensity_tendency -= 2.0
- elif month in [7, 8, 9]: # Low shear season
+ elif month in [7, 8, 9]:
intensity_tendency += 1.0
- # Update intensity with realistic bounds and variability
- intensity_noise = np.random.normal(0, 1.5) # Small random fluctuations
+ intensity_noise = np.random.normal(0, 1.5)
current_intensity += intensity_tendency + intensity_noise
- current_intensity = max(20, min(185, current_intensity)) # Realistic range
+ current_intensity = max(20, min(185, current_intensity))
- # Calculate confidence based on forecast time and environment
base_confidence = 0.92
time_penalty = (hour / 120) * 0.45
environment_penalty = 0.15 if current_lat > 30 or current_lon < 115 else 0
confidence = max(0.25, base_confidence - time_penalty - environment_penalty)
- # Determine development stage
if hour <= 24:
stage = 'Genesis'
elif hour <= 72:
@@ -1963,13 +1605,12 @@ def predict_storm_route_and_intensity_realistic(genesis_region, month, oni_value
'category': categorize_typhoon_enhanced(current_intensity),
'confidence': confidence,
'development_stage': stage,
- 'forward_speed_kmh': base_speed * 111, # Convert to km/h
+ 'forward_speed_kmh': base_speed * 111,
'pressure_hpa': max(900, 1013 - (current_intensity - 25) * 0.9)
})
results['route_forecast'] = route_points
- # Realistic confidence scores
results['confidence_scores'] = {
'genesis': 0.88,
'early_development': 0.82,
@@ -1982,7 +1623,6 @@ def predict_storm_route_and_intensity_realistic(genesis_region, month, oni_value
'long_term': max(0.3, 0.8 - (forecast_hours / 240) * 0.5)
}
- # Model information
results['model_info'] = f"Enhanced Realistic Model - {genesis_region}"
return results
@@ -2005,7 +1645,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
route_data = prediction_results['route_forecast']
- # Extract data for plotting
hours = [point['hour'] for point in route_data]
lats = [point['lat'] for point in route_data]
lons = [point['lon'] for point in route_data]
@@ -2016,7 +1655,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
speeds = [point.get('forward_speed_kmh', 15) for point in route_data]
pressures = [point.get('pressure_hpa', 1013) for point in route_data]
- # Create subplot layout with map and intensity plot
fig = make_subplots(
rows=2, cols=2,
subplot_titles=('Storm Track Animation', 'Wind Speed vs Time', 'Forward Speed vs Time', 'Pressure vs Time'),
@@ -2027,11 +1665,8 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
)
if enable_animation:
- # Add frames for animation
frames = []
- # Static background elements first
- # Add complete track as background
fig.add_trace(
go.Scattergeo(
lon=lons,
@@ -2045,7 +1680,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
row=1, col=1
)
- # Genesis marker (always visible)
fig.add_trace(
go.Scattergeo(
lon=[lons[0]],
@@ -2070,7 +1704,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
row=1, col=1
)
- # Create animation frames
for i in range(len(route_data)):
frame_lons = lons[:i+1]
frame_lats = lats[:i+1]
@@ -2078,12 +1711,10 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
frame_categories = categories[:i+1]
frame_hours = hours[:i+1]
- # Current position marker
current_color = enhanced_color_map.get(frame_categories[-1], 'rgb(128,128,128)')
current_size = 15 + (frame_intensities[-1] / 10)
frame_data = [
- # Animated track up to current point
go.Scattergeo(
lon=frame_lons,
lat=frame_lats,
@@ -2098,7 +1729,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
name='Current Track',
showlegend=False
),
- # Current position highlight
go.Scattergeo(
lon=[frame_lons[-1]],
lat=[frame_lats[-1]],
@@ -2122,7 +1752,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
""
)
),
- # Animated wind plot
go.Scatter(
x=frame_hours,
y=frame_intensities,
@@ -2133,7 +1762,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
showlegend=False,
yaxis='y2'
),
- # Animated speed plot
go.Scatter(
x=frame_hours,
y=speeds[:i+1],
@@ -2144,7 +1772,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
showlegend=False,
yaxis='y3'
),
- # Animated pressure plot
go.Scatter(
x=frame_hours,
y=pressures[:i+1],
@@ -2168,7 +1795,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
fig.frames = frames
- # Add play/pause controls
fig.update_layout(
updatemenus=[
{
@@ -2224,14 +1850,13 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
"label": f"H{route_data[i]['hour']}",
"method": "animate"
}
- for i in range(0, len(route_data), max(1, len(route_data)//20)) # Limit slider steps
+ for i in range(0, len(route_data), max(1, len(route_data)//20))
]
}]
)
else:
- # Static view with all points
- # Add genesis marker
+ # Static view
fig.add_trace(
go.Scattergeo(
lon=[lons[0]],
@@ -2255,8 +1880,7 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
row=1, col=1
)
- # Add full track with intensity coloring
- for i in range(0, len(route_data), max(1, len(route_data)//50)): # Sample points for performance
+ for i in range(0, len(route_data), max(1, len(route_data)//50)):
point = route_data[i]
color = enhanced_color_map.get(point['category'], 'rgb(128,128,128)')
size = 8 + (point['intensity_kt'] / 12)
@@ -2287,7 +1911,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
row=1, col=1
)
- # Connect points with track line
fig.add_trace(
go.Scattergeo(
lon=lons,
@@ -2301,7 +1924,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
)
# Add static intensity, speed, and pressure plots
- # Wind speed plot
fig.add_trace(
go.Scatter(
x=hours,
@@ -2355,7 +1977,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
uncertainty_lons_lower = []
for i, point in enumerate(route_data):
- # Uncertainty grows with time and decreases with confidence
base_uncertainty = 0.4 + (i / len(route_data)) * 1.8
confidence_factor = point.get('confidence', 0.8)
uncertainty = base_uncertainty / confidence_factor
@@ -2385,8 +2006,8 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
# Enhanced layout
fig.update_layout(
title=f"Comprehensive Storm Development Analysis
Starting from {prediction_results['genesis_info']['description']}",
- height=1000, # Taller for better subplot visibility
- width=1400, # Wider
+ height=1000,
+ width=1400,
showlegend=True
)
@@ -2416,7 +2037,6 @@ def create_animated_route_visualization(prediction_results, show_uncertainty=Tru
current = prediction_results['current_prediction']
genesis_info = prediction_results['genesis_info']
- # Calculate some statistics
max_intensity = max(intensities)
max_intensity_time = hours[intensities.index(max_intensity)]
avg_speed = np.mean(speeds)
@@ -2478,21 +2098,14 @@ MODEL: {prediction_results['model_info']}
return None, error_msg
# -----------------------------
-# Regression Functions (Original)
+# Regression Functions
# -----------------------------
def perform_wind_regression(start_year, start_month, end_year, end_month):
"""Perform wind regression analysis"""
- if merged_data is None or merged_data.empty:
- return "Wind Regression: No merged data available"
-
start_date = datetime(start_year, start_month, 1)
end_date = datetime(end_year, end_month, 28)
data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['USA_WIND','ONI'])
-
- if len(data) < 10:
- return f"Wind Regression: Insufficient data ({len(data)} records)"
-
data['severe_typhoon'] = (data['USA_WIND']>=64).astype(int)
X = sm.add_constant(data['ONI'])
y = data['severe_typhoon']
@@ -2507,16 +2120,9 @@ def perform_wind_regression(start_year, start_month, end_year, end_month):
def perform_pressure_regression(start_year, start_month, end_year, end_month):
"""Perform pressure regression analysis"""
- if merged_data is None or merged_data.empty:
- return "Pressure Regression: No merged data available"
-
start_date = datetime(start_year, start_month, 1)
end_date = datetime(end_year, end_month, 28)
data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['USA_PRES','ONI'])
-
- if len(data) < 10:
- return f"Pressure Regression: Insufficient data ({len(data)} records)"
-
data['intense_typhoon'] = (data['USA_PRES']<=950).astype(int)
X = sm.add_constant(data['ONI'])
y = data['intense_typhoon']
@@ -2531,16 +2137,9 @@ def perform_pressure_regression(start_year, start_month, end_year, end_month):
def perform_longitude_regression(start_year, start_month, end_year, end_month):
"""Perform longitude regression analysis"""
- if merged_data is None or merged_data.empty:
- return "Longitude Regression: No merged data available"
-
start_date = datetime(start_year, start_month, 1)
end_date = datetime(end_year, end_month, 28)
data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['LON','ONI'])
-
- if len(data) < 10:
- return f"Longitude Regression: Insufficient data ({len(data)} records)"
-
data['western_typhoon'] = (data['LON']<=140).astype(int)
X = sm.add_constant(data['ONI'])
y = data['western_typhoon']
@@ -2554,88 +2153,33 @@ def perform_longitude_regression(start_year, start_month, end_year, end_month):
return f"Longitude Regression Error: {e}"
# -----------------------------
-# FIXED: Visualization Functions - WORK WITH ALL DATA
+# FIXED: Visualization Functions
# -----------------------------
-def get_available_years(typhoon_data):
- """Get all available years - EXTENDED RANGE"""
- try:
- if typhoon_data is None or typhoon_data.empty:
- return [str(year) for year in range(1851, 2026)] # Full historical range
-
- if 'ISO_TIME' in typhoon_data.columns:
- years = typhoon_data['ISO_TIME'].dt.year.dropna().unique()
- elif 'SEASON' in typhoon_data.columns:
- years = typhoon_data['SEASON'].dropna().unique()
- else:
- years = range(1851, 2026) # Full historical range
-
- # Convert to strings and sort
- year_strings = sorted([str(int(year)) for year in years if not pd.isna(year)])
-
- # Ensure we have at least some years
- if not year_strings:
- return [str(year) for year in range(1851, 2026)]
-
- return year_strings
-
- except Exception as e:
- print(f"Error in get_available_years: {e}")
- return [str(year) for year in range(1851, 2026)]
-
def get_full_tracks(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
- """FIXED: Get full typhoon tracks - WORKS WITHOUT ONI"""
+ """Get full typhoon tracks"""
start_date = datetime(start_year, start_month, 1)
end_date = datetime(end_year, end_month, 28)
-
- # Filter merged data by date
- if merged_data is not None and not merged_data.empty:
- filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
-
- # Add ENSO phase classification - handle missing ONI
- filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases)
-
- if enso_phase != 'all':
- filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()]
-
- unique_storms = filtered_data['SID'].unique()
- else:
- # Work directly with typhoon_data if merged_data not available
- if 'ISO_TIME' in typhoon_data.columns:
- time_filter = (typhoon_data['ISO_TIME'] >= start_date) & (typhoon_data['ISO_TIME'] <= end_date)
- filtered_typhoons = typhoon_data[time_filter]['SID'].unique()
- else:
- # Fallback - use all available storms
- filtered_typhoons = typhoon_data['SID'].unique()
- unique_storms = filtered_typhoons
- filtered_data = pd.DataFrame({'SID': unique_storms, 'ONI': 0.0}) # Dummy for compatibility
-
+ filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
+ filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases)
+ if enso_phase != 'all':
+ filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()]
+ unique_storms = filtered_data['SID'].unique()
count = len(unique_storms)
fig = go.Figure()
-
for sid in unique_storms:
storm_data = typhoon_data[typhoon_data['SID']==sid]
if storm_data.empty:
continue
-
name = storm_data['NAME'].iloc[0] if pd.notnull(storm_data['NAME'].iloc[0]) else "Unnamed"
- basin = storm_data['SID'].iloc[0][:2] if 'SID' in storm_data.columns else "Unknown"
-
- # Get ONI value if available
- if not filtered_data.empty and sid in filtered_data['SID'].values:
- storm_oni = filtered_data[filtered_data['SID']==sid]['ONI'].iloc[0]
- else:
- storm_oni = 0.0 # Default neutral
-
+ basin = storm_data['SID'].iloc[0][:2]
+ storm_oni = filtered_data[filtered_data['SID']==sid]['ONI'].iloc[0]
color = 'red' if storm_oni>=0.5 else ('blue' if storm_oni<=-0.5 else 'green')
-
fig.add_trace(go.Scattergeo(
lon=storm_data['LON'], lat=storm_data['LAT'], mode='lines',
name=f"{name} ({basin})",
line=dict(width=1.5, color=color), hoverinfo="name"
))
-
- # Handle typhoon search
if typhoon_search:
search_mask = typhoon_data['NAME'].str.contains(typhoon_search, case=False, na=False)
if search_mask.any():
@@ -2647,9 +2191,8 @@ def get_full_tracks(start_year, start_month, end_year, end_month, enso_phase, ty
line=dict(width=3, color='yellow'),
marker=dict(size=5), hoverinfo="name"
))
-
fig.update_layout(
- title=f"Typhoon Tracks ({start_year}-{start_month:02d} to {end_year}-{end_month:02d}) - All Available Data",
+ title=f"Typhoon Tracks ({start_year}-{start_month} to {end_year}-{end_month})",
geo=dict(
projection_type='natural earth',
showland=True,
@@ -2664,48 +2207,26 @@ def get_full_tracks(start_year, start_month, end_year, end_month, enso_phase, ty
showlegend=True,
height=700
)
-
fig.add_annotation(
x=0.02, y=0.98, xref="paper", yref="paper",
- text="Red: El Niño, Blue: La Nina, Green: Neutral/Unknown ONI",
+ text="Red: El Niño, Blue: La Nina, Green: Neutral",
showarrow=False, align="left",
bgcolor="rgba(255,255,255,0.8)"
)
-
- return fig, f"Total typhoons displayed: {count} (includes all available data)"
+ return fig, f"Total typhoons displayed: {count}"
def get_wind_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
- """FIXED: Wind analysis that works with all data"""
+ """Get wind analysis with enhanced categorization"""
start_date = datetime(start_year, start_month, 1)
end_date = datetime(end_year, end_month, 28)
-
- if merged_data is not None and not merged_data.empty:
- filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
- filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases)
-
- if enso_phase != 'all':
- filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()]
- else:
- # Create filtered data from typhoon_data
- if 'ISO_TIME' in typhoon_data.columns:
- time_filter = (typhoon_data['ISO_TIME'] >= start_date) & (typhoon_data['ISO_TIME'] <= end_date)
- temp_data = typhoon_data[time_filter].groupby('SID').agg({
- 'USA_WIND': 'max', 'NAME': 'first', 'SEASON': 'first', 'ISO_TIME': 'first'
- }).reset_index()
- temp_data['ONI'] = 0.0 # Default neutral
- temp_data['Category'] = temp_data['USA_WIND'].apply(categorize_typhoon_enhanced)
- temp_data['Year'] = temp_data['ISO_TIME'].dt.year
- temp_data['ENSO_Phase'] = 'Neutral'
- filtered_data = temp_data
- else:
- return go.Figure(), "No time data available for analysis"
-
- if filtered_data.empty:
- return go.Figure(), "No data available for selected time period"
+ filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
+ filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases)
+ if enso_phase != 'all':
+ filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()]
fig = px.scatter(filtered_data, x='ONI', y='USA_WIND', color='Category',
hover_data=['NAME','Year','Category'],
- title='Wind Speed vs ONI (All Available Data)',
+ title='Wind Speed vs ONI',
labels={'ONI':'ONI Value','USA_WIND':'Max Wind Speed (knots)'},
color_discrete_map=enhanced_color_map)
@@ -2719,49 +2240,21 @@ def get_wind_analysis(start_year, start_month, end_year, end_month, enso_phase,
text=filtered_data.loc[mask,'NAME']+' ('+filtered_data.loc[mask,'Year'].astype(str)+')'
))
- # Try regression analysis if we have sufficient data
- try:
- if len(filtered_data) > 10:
- regression = perform_wind_regression(start_year, start_month, end_year, end_month)
- else:
- regression = f"Wind Analysis: {len(filtered_data)} storms analyzed (insufficient for regression)"
- except:
- regression = f"Wind Analysis: {len(filtered_data)} storms analyzed"
-
+ regression = perform_wind_regression(start_year, start_month, end_year, end_month)
return fig, regression
def get_pressure_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
- """FIXED: Pressure analysis that works with all data"""
+ """Get pressure analysis with enhanced categorization"""
start_date = datetime(start_year, start_month, 1)
end_date = datetime(end_year, end_month, 28)
-
- if merged_data is not None and not merged_data.empty:
- filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
- filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases)
-
- if enso_phase != 'all':
- filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()]
- else:
- # Create filtered data from typhoon_data
- if 'ISO_TIME' in typhoon_data.columns:
- time_filter = (typhoon_data['ISO_TIME'] >= start_date) & (typhoon_data['ISO_TIME'] <= end_date)
- temp_data = typhoon_data[time_filter].groupby('SID').agg({
- 'USA_PRES': 'min', 'NAME': 'first', 'SEASON': 'first', 'ISO_TIME': 'first', 'USA_WIND': 'max'
- }).reset_index()
- temp_data['ONI'] = 0.0 # Default neutral
- temp_data['Category'] = temp_data['USA_WIND'].apply(categorize_typhoon_enhanced)
- temp_data['Year'] = temp_data['ISO_TIME'].dt.year
- temp_data['ENSO_Phase'] = 'Neutral'
- filtered_data = temp_data
- else:
- return go.Figure(), "No time data available for analysis"
-
- if filtered_data.empty:
- return go.Figure(), "No data available for selected time period"
+ filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
+ filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases)
+ if enso_phase != 'all':
+ filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()]
fig = px.scatter(filtered_data, x='ONI', y='USA_PRES', color='Category',
hover_data=['NAME','Year','Category'],
- title='Pressure vs ONI (All Available Data)',
+ title='Pressure vs ONI',
labels={'ONI':'ONI Value','USA_PRES':'Min Pressure (hPa)'},
color_discrete_map=enhanced_color_map)
@@ -2775,86 +2268,102 @@ def get_pressure_analysis(start_year, start_month, end_year, end_month, enso_pha
text=filtered_data.loc[mask,'NAME']+' ('+filtered_data.loc[mask,'Year'].astype(str)+')'
))
- # Try regression analysis if we have sufficient data
- try:
- if len(filtered_data) > 10:
- regression = perform_pressure_regression(start_year, start_month, end_year, end_month)
- else:
- regression = f"Pressure Analysis: {len(filtered_data)} storms analyzed (insufficient for regression)"
- except:
- regression = f"Pressure Analysis: {len(filtered_data)} storms analyzed"
-
+ regression = perform_pressure_regression(start_year, start_month, end_year, end_month)
return fig, regression
def get_longitude_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
- """FIXED: Longitude analysis that works with all data"""
+ """Get longitude analysis"""
start_date = datetime(start_year, start_month, 1)
end_date = datetime(end_year, end_month, 28)
-
- if merged_data is not None and not merged_data.empty:
- filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
- filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases)
-
- if enso_phase != 'all':
- filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()]
- else:
- # Create filtered data from typhoon_data
- if 'ISO_TIME' in typhoon_data.columns:
- time_filter = (typhoon_data['ISO_TIME'] >= start_date) & (typhoon_data['ISO_TIME'] <= end_date)
- temp_data = typhoon_data[time_filter].groupby('SID').agg({
- 'LON': 'first', 'NAME': 'first', 'SEASON': 'first', 'ISO_TIME': 'first'
- }).reset_index()
- temp_data['ONI'] = 0.0 # Default neutral
- temp_data['Year'] = temp_data['ISO_TIME'].dt.year
- filtered_data = temp_data
- else:
- return go.Figure(), "No time data available", "No longitude analysis available"
-
- if filtered_data.empty:
- return go.Figure(), "No data available", "No longitude analysis available"
+ filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
+ filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases)
+ if enso_phase != 'all':
+ filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()]
fig = px.scatter(filtered_data, x='LON', y='ONI', hover_data=['NAME'],
- title='Typhoon Generation Longitude vs ONI (All Available Data)')
-
- slopes_text = f"Longitude Analysis: {len(filtered_data)} storms analyzed"
- regression = f"Data points: {len(filtered_data)}"
+ title='Typhoon Generation Longitude vs ONI (All Years)')
- if len(filtered_data) > 10:
+ if len(filtered_data) > 1:
+ X = np.array(filtered_data['LON']).reshape(-1,1)
+ y = filtered_data['ONI']
try:
- X = np.array(filtered_data['LON']).reshape(-1,1)
- y = filtered_data['ONI']
model = sm.OLS(y, sm.add_constant(X)).fit()
y_pred = model.predict(sm.add_constant(X))
fig.add_trace(go.Scatter(x=filtered_data['LON'], y=y_pred, mode='lines', name='Regression Line'))
slope = model.params[1]
- slopes_text = f"All Years Slope: {slope:.4f} (n={len(filtered_data)})"
+ slopes_text = f"All Years Slope: {slope:.4f}"
except Exception as e:
slopes_text = f"Regression Error: {e}"
-
- try:
- regression = perform_longitude_regression(start_year, start_month, end_year, end_month)
- except:
- regression = f"Longitude Analysis: {len(filtered_data)} storms analyzed"
+ else:
+ slopes_text = "Insufficient data for regression"
+ regression = perform_longitude_regression(start_year, start_month, end_year, end_month)
return fig, slopes_text, regression
# -----------------------------
-# ENHANCED: Animation Functions with Taiwan Standard Support - FIXED VERSION
+# FIXED: Animation Functions - NO FALLBACK
# -----------------------------
+def get_available_years(typhoon_data):
+ """Get all available years from actual data - NO FALLBACK"""
+ try:
+ if typhoon_data is None or typhoon_data.empty:
+ raise Exception("No typhoon data available for year extraction")
+
+ years = set()
+
+ # Try multiple methods to extract years
+ if 'ISO_TIME' in typhoon_data.columns:
+ valid_times = typhoon_data['ISO_TIME'].dropna()
+ if len(valid_times) > 0:
+ years.update(valid_times.dt.year.unique())
+
+ if 'SEASON' in typhoon_data.columns:
+ valid_seasons = typhoon_data['SEASON'].dropna()
+ if len(valid_seasons) > 0:
+ years.update(valid_seasons.unique())
+
+ # Extract from SID if available (format: BASIN + NUMBER + YEAR)
+ if 'SID' in typhoon_data.columns and len(years) == 0:
+ for sid in typhoon_data['SID'].dropna().unique():
+ try:
+ # Try to extract 4-digit year from SID
+ year_match = pd.Series([sid]).str.extract(r'(\d{4})')[0].iloc[0]
+ if year_match and 1950 <= int(year_match) <= 2030:
+ years.add(int(year_match))
+ except:
+ continue
+
+ if len(years) == 0:
+ raise Exception("Could not extract any valid years from typhoon data")
+
+ # Convert to sorted list of strings
+ year_strings = sorted([str(int(year)) for year in years if 1950 <= year <= 2030])
+
+ if len(year_strings) == 0:
+ raise Exception("No valid years found in reasonable range (1950-2030)")
+
+ logging.info(f"Extracted {len(year_strings)} years from data: {year_strings[0]} to {year_strings[-1]}")
+ return year_strings
+
+ except Exception as e:
+ logging.error(f"CRITICAL ERROR in get_available_years: {e}")
+ raise Exception(f"Cannot extract years from typhoon data: {e}")
+
def update_typhoon_options_enhanced(year, basin):
- """Enhanced typhoon options with TD support and 2025 data"""
+ """Enhanced typhoon options - NEVER returns empty or fallback"""
try:
year = int(year)
- # Filter by year - handle both ISO_TIME and SEASON columns
+ # Filter by year
if 'ISO_TIME' in typhoon_data.columns:
year_mask = typhoon_data['ISO_TIME'].dt.year == year
elif 'SEASON' in typhoon_data.columns:
year_mask = typhoon_data['SEASON'] == year
else:
- # Fallback - try to extract year from SID or other fields
- year_mask = typhoon_data.index >= 0 # Include all data as fallback
+ # Try to extract from SID
+ sid_year_mask = typhoon_data['SID'].str.contains(str(year), na=False)
+ year_mask = sid_year_mask
year_data = typhoon_data[year_mask].copy()
@@ -2867,9 +2376,9 @@ def update_typhoon_options_enhanced(year, basin):
year_data = year_data[year_data['BASIN'] == basin_code]
if year_data.empty:
- return gr.update(choices=["No storms found"], value=None)
+ raise Exception(f"No storms found for year {year} and basin {basin}")
- # Get unique storms - include ALL intensities (including TD)
+ # Get unique storms
storms = year_data.groupby('SID').agg({
'NAME': 'first',
'USA_WIND': 'max'
@@ -2890,39 +2399,50 @@ def update_typhoon_options_enhanced(year, basin):
options.append(option)
if not options:
- return gr.update(choices=["No storms found"], value=None)
+ raise Exception(f"No valid storm options generated for year {year}")
+ logging.info(f"Generated {len(options)} storm options for {year}")
return gr.update(choices=sorted(options), value=options[0])
except Exception as e:
- print(f"Error in update_typhoon_options_enhanced: {e}")
- return gr.update(choices=["Error loading storms"], value=None)
+ error_msg = f"Error loading storms for {year}: {str(e)}"
+ logging.error(error_msg)
+ raise Exception(error_msg)
def generate_enhanced_track_video_fixed(year, typhoon_selection, standard):
- """FIXED: Enhanced track video generation with working animation display"""
- if not typhoon_selection or typhoon_selection == "No storms found":
- return None
-
+ """FIXED: Enhanced track video generation - NO FALLBACK ALLOWED"""
try:
+ if not typhoon_selection or "No storms found" in typhoon_selection or "Error" in typhoon_selection:
+ raise Exception("Invalid typhoon selection provided")
+
# Extract SID from selection
- sid = typhoon_selection.split('(')[1].split(')')[0]
+ try:
+ sid = typhoon_selection.split('(')[1].split(')')[0]
+ except:
+ raise Exception(f"Could not extract SID from selection: {typhoon_selection}")
# Get storm data
storm_df = typhoon_data[typhoon_data['SID'] == sid].copy()
if storm_df.empty:
- print(f"No data found for storm {sid}")
- return None
+ raise Exception(f"No track data found for storm {sid}")
# Sort by time
if 'ISO_TIME' in storm_df.columns:
storm_df = storm_df.sort_values('ISO_TIME')
+ # Validate essential data
+ if 'LAT' not in storm_df.columns or 'LON' not in storm_df.columns:
+ raise Exception(f"Missing coordinate data for storm {sid}")
+
# Extract data for animation
- lats = storm_df['LAT'].astype(float).values
- lons = storm_df['LON'].astype(float).values
+ lats = pd.to_numeric(storm_df['LAT'], errors='coerce').dropna().values
+ lons = pd.to_numeric(storm_df['LON'], errors='coerce').dropna().values
+
+ if len(lats) < 2 or len(lons) < 2:
+ raise Exception(f"Insufficient track points for storm {sid}: {len(lats)} points")
if 'USA_WIND' in storm_df.columns:
- winds = pd.to_numeric(storm_df['USA_WIND'], errors='coerce').fillna(0).values
+ winds = pd.to_numeric(storm_df['USA_WIND'], errors='coerce').fillna(30).values[:len(lats)]
else:
winds = np.full(len(lats), 30)
@@ -2930,7 +2450,7 @@ def generate_enhanced_track_video_fixed(year, typhoon_selection, standard):
storm_name = storm_df['NAME'].iloc[0] if pd.notna(storm_df['NAME'].iloc[0]) else "UNNAMED"
season = storm_df['SEASON'].iloc[0] if 'SEASON' in storm_df.columns else year
- print(f"Generating FIXED video for {storm_name} ({sid}) with {len(lats)} track points using {standard} standard")
+ logging.info(f"Generating FIXED video for {storm_name} ({sid}) with {len(lats)} track points using {standard} standard")
# FIXED: Create figure with proper cartopy setup
fig = plt.figure(figsize=(16, 10))
@@ -2959,24 +2479,20 @@ def generate_enhanced_track_video_fixed(year, typhoon_selection, standard):
fontsize=18, fontweight='bold')
# FIXED: Animation elements - proper initialization with cartopy transforms
- # Initialize empty line for track with correct transform
track_line, = ax.plot([], [], 'b-', linewidth=3, alpha=0.7,
label='Track', transform=ccrs.PlateCarree())
- # Initialize current position marker
current_point, = ax.plot([], [], 'o', markersize=15,
transform=ccrs.PlateCarree())
- # Historical track points (to show path traversed)
history_points, = ax.plot([], [], 'o', markersize=6, alpha=0.4, color='blue',
transform=ccrs.PlateCarree())
- # Info text box
info_box = ax.text(0.02, 0.98, '', transform=ax.transAxes,
fontsize=12, verticalalignment='top',
bbox=dict(boxstyle="round,pad=0.5", facecolor='white', alpha=0.9))
- # FIXED: Color legend with proper categories for both standards
+ # FIXED: Color legend with proper categories
legend_elements = []
if standard == 'taiwan':
categories = ['Tropical Depression', 'Tropical Storm', 'Severe Tropical Storm',
@@ -2995,25 +2511,24 @@ def generate_enhanced_track_video_fixed(year, typhoon_selection, standard):
ax.legend(handles=legend_elements, loc='upper right', fontsize=10)
- # FIXED: Animation function with proper artist updates and cartopy compatibility
+ # FIXED: Animation function
def animate_fixed(frame):
"""Fixed animation function that properly updates tracks with cartopy"""
try:
if frame >= len(lats):
return track_line, current_point, history_points, info_box
- # FIXED: Update track line up to current frame
+ # Update track line up to current frame
current_lons = lons[:frame+1]
current_lats = lats[:frame+1]
- # Update the track line data (this is the key fix!)
track_line.set_data(current_lons, current_lats)
- # FIXED: Update historical points (smaller markers showing traversed path)
+ # Update historical points
if frame > 0:
history_points.set_data(current_lons[:-1], current_lats[:-1])
- # FIXED: Update current position with correct categorization
+ # Update current position with correct categorization
current_wind = winds[frame]
if standard == 'taiwan':
@@ -3021,23 +2536,19 @@ def generate_enhanced_track_video_fixed(year, typhoon_selection, standard):
else:
category, color = categorize_typhoon_by_standard_fixed(current_wind, 'atlantic')
- # Debug for first few frames
- if frame < 3:
- print(f"FIXED Frame {frame}: Wind={current_wind:.1f}kt, Category={category}, Color={color}")
-
# Update current position marker
current_point.set_data([lons[frame]], [lats[frame]])
current_point.set_color(color)
current_point.set_markersize(12 + current_wind/8)
- # FIXED: Enhanced info display with correct Taiwan wind speed conversion
+ # Enhanced info display
if 'ISO_TIME' in storm_df.columns and frame < len(storm_df):
current_time = storm_df.iloc[frame]['ISO_TIME']
time_str = current_time.strftime('%Y-%m-%d %H:%M UTC') if pd.notna(current_time) else 'Unknown'
else:
time_str = f"Step {frame+1}"
- # Corrected wind speed display for Taiwan standard
+ # Wind speed display
if standard == 'taiwan':
wind_ms = current_wind * 0.514444
wind_display = f"{current_wind:.0f} kt ({wind_ms:.1f} m/s)"
@@ -3055,52 +2566,43 @@ def generate_enhanced_track_video_fixed(year, typhoon_selection, standard):
)
info_box.set_text(info_text)
- # FIXED: Return all modified artists (crucial for proper display)
return track_line, current_point, history_points, info_box
except Exception as e:
- print(f"Error in animate frame {frame}: {e}")
+ logging.error(f"Error in animate frame {frame}: {e}")
return track_line, current_point, history_points, info_box
# FIXED: Create animation with cartopy-compatible settings
- # Key fixes: blit=False (crucial for cartopy), proper interval
anim = animation.FuncAnimation(
fig, animate_fixed, frames=len(lats),
- interval=600, blit=False, repeat=True # blit=False is essential for cartopy!
+ interval=600, blit=False, repeat=True
)
- # Save animation with optimized settings
+ # Save animation
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4',
dir=tempfile.gettempdir())
- # FIXED: Writer settings optimized for track visibility
writer = animation.FFMpegWriter(
- fps=2, bitrate=3000, codec='libx264', # Slower FPS for better track visibility
+ fps=2, bitrate=3000, codec='libx264',
extra_args=['-pix_fmt', 'yuv420p']
)
- print(f"Saving FIXED animation to {temp_file.name}")
+ logging.info(f"Saving FIXED animation to {temp_file.name}")
anim.save(temp_file.name, writer=writer, dpi=120)
plt.close(fig)
- print(f"FIXED video generated successfully: {temp_file.name}")
+ logging.info(f"FIXED video generated successfully: {temp_file.name}")
return temp_file.name
except Exception as e:
- print(f"Error generating FIXED video: {e}")
+ error_msg = f"CRITICAL ERROR generating video: {str(e)}"
+ logging.error(error_msg)
import traceback
traceback.print_exc()
- return None
-
-# FIXED: Update the simplified wrapper function
-def simplified_track_video_fixed(year, basin, typhoon, standard):
- """Simplified track video function with FIXED animation and Taiwan classification"""
- if not typhoon:
- return None
- return generate_enhanced_track_video_fixed(year, typhoon, standard)
+ raise Exception(error_msg)
# -----------------------------
-# Load & Process Data - FIXED INITIALIZATION
+# FIXED: Data Loading and Processing
# -----------------------------
# Global variables initialization
@@ -3109,95 +2611,60 @@ typhoon_data = None
merged_data = None
def initialize_data():
- """FIXED: Initialize all data safely - LOAD ALL AVAILABLE DATA"""
+ """Initialize all data safely - CRITICAL: NO FALLBACKS"""
global oni_data, typhoon_data, merged_data
try:
- logging.info("Starting comprehensive data loading process...")
+ logging.info("Starting FIXED data loading process...")
- # Try to load ONI data (optional)
- try:
- update_oni_data()
- if os.path.exists(ONI_DATA_PATH):
- oni_data = pd.read_csv(ONI_DATA_PATH)
- logging.info(f"ONI data loaded: {len(oni_data)} years")
- else:
- logging.warning("ONI data not available")
- oni_data = None
- except Exception as e:
- logging.warning(f"ONI data loading failed: {e}")
- oni_data = None
+ # Update ONI data (optional)
+ update_oni_data()
- # Load typhoon data (required)
- temp_oni = oni_data if oni_data is not None else pd.DataFrame()
- temp_oni, typhoon_data = load_data_fixed(ONI_DATA_PATH, TYPHOON_DATA_PATH)
+ # Load data with FIXED functions
+ oni_data, typhoon_data = load_data_fixed(ONI_DATA_PATH, TYPHOON_DATA_PATH)
- if oni_data is None:
- oni_data = temp_oni
+ # Verify critical data loaded
+ if typhoon_data is None or typhoon_data.empty:
+ raise Exception("CRITICAL: No typhoon data loaded")
- if typhoon_data is not None and not typhoon_data.empty:
- oni_long = process_oni_data(oni_data)
- typhoon_max = process_typhoon_data(typhoon_data)
- merged_data = merge_data(oni_long, typhoon_max)
-
- logging.info(f"Data loading complete:")
- logging.info(f" - ONI data: {len(oni_data) if oni_data is not None else 0} years")
- logging.info(f" - Typhoon data: {len(typhoon_data)} records")
- logging.info(f" - Merged data: {len(merged_data)} storms")
-
- # Log basin distribution
- if 'BASIN' in typhoon_data.columns:
- basin_counts = typhoon_data['BASIN'].value_counts()
- logging.info(f" - Basin distribution: {dict(basin_counts)}")
-
- else:
- logging.error("Failed to load typhoon data")
- # Create comprehensive fallback data
- oni_data = pd.DataFrame({'Year': range(1851, 2026), 'Jan': [0]*175, 'Feb': [0]*175, 'Mar': [0]*175, 'Apr': [0]*175,
- 'May': [0]*175, 'Jun': [0]*175, 'Jul': [0]*175, 'Aug': [0]*175, 'Sep': [0]*175,
- 'Oct': [0]*175, 'Nov': [0]*175, 'Dec': [0]*175})
- typhoon_data = create_comprehensive_fallback_typhoon_data()
- oni_long = process_oni_data(oni_data)
- typhoon_max = process_typhoon_data(typhoon_data)
- merged_data = merge_data(oni_long, typhoon_max)
-
- except Exception as e:
- logging.error(f"Error during data initialization: {e}")
- # Create comprehensive fallback data
- oni_data = pd.DataFrame({'Year': range(1851, 2026), 'Jan': [0]*175, 'Feb': [0]*175, 'Mar': [0]*175, 'Apr': [0]*175,
- 'May': [0]*175, 'Jun': [0]*175, 'Jul': [0]*175, 'Aug': [0]*175, 'Sep': [0]*175,
- 'Oct': [0]*175, 'Nov': [0]*175, 'Dec': [0]*175})
- typhoon_data = create_comprehensive_fallback_typhoon_data()
+ if oni_data is None or oni_data.empty:
+ logging.warning("ONI data failed to load - using neutral values")
+
+ # Process data
oni_long = process_oni_data(oni_data)
typhoon_max = process_typhoon_data(typhoon_data)
merged_data = merge_data(oni_long, typhoon_max)
-
-# Initialize data
-initialize_data()
+
+ # Final validation
+ if merged_data is None or merged_data.empty:
+ raise Exception("CRITICAL: Merged data is empty")
+
+ logging.info(f"FIXED data loading complete:")
+ logging.info(f" - ONI data: {len(oni_data) if oni_data is not None else 0} years")
+ logging.info(f" - Typhoon data: {len(typhoon_data)} records")
+ logging.info(f" - Merged data: {len(merged_data)} storms")
+
+ except Exception as e:
+ logging.error(f"CRITICAL ERROR during FIXED data initialization: {e}")
+ import traceback
+ traceback.print_exc()
+ raise Exception(f"Data initialization failed: {e}")
# -----------------------------
-# ENHANCED: Gradio Interface with Fixed Route Visualization and Enhanced Features
+# FIXED: Gradio Interface
# -----------------------------
def create_interface():
- """Create the enhanced Gradio interface with robust error handling"""
+ """Create the enhanced Gradio interface - NO FALLBACKS"""
try:
# Ensure data is available
if oni_data is None or typhoon_data is None or merged_data is None:
- logging.warning("Data not properly loaded, creating minimal interface")
- return create_minimal_fallback_interface()
+ raise Exception("Data not properly loaded for interface creation")
# Get safe data statistics
- try:
- total_storms = len(typhoon_data['SID'].unique()) if 'SID' in typhoon_data.columns else 0
- total_records = len(typhoon_data)
- available_years = get_available_years(typhoon_data)
- year_range_display = f"{available_years[0]} - {available_years[-1]}" if available_years else "Unknown"
- except Exception as e:
- logging.error(f"Error getting data statistics: {e}")
- total_storms = 0
- total_records = 0
- year_range_display = "Unknown"
- available_years = [str(year) for year in range(2000, 2026)]
+ total_storms = len(typhoon_data['SID'].unique()) if 'SID' in typhoon_data.columns else 0
+ total_records = len(typhoon_data)
+ available_years = get_available_years(typhoon_data)
+ year_range_display = f"{available_years[0]} - {available_years[-1]}" if available_years else "Unknown"
with gr.Blocks(title="Enhanced Typhoon Analysis Platform", theme=gr.themes.Soft()) as demo:
gr.Markdown("# 🌪️ Enhanced Typhoon Analysis Platform")
@@ -3210,37 +2677,34 @@ def create_interface():
This dashboard provides comprehensive analysis of typhoon data in relation to ENSO phases with advanced machine learning capabilities.
### 🚀 Enhanced Features:
- - **All Basin Coverage**: Loads data from ALL IBTrACS basins (WP, EP, NA, SP, SI, NI)
- - **Complete Historical Range**: Full coverage from 1851-2025 (175+ years)
- - **ONI Independent**: Analysis works with or without ONI data
- **Advanced ML Clustering**: UMAP/t-SNE storm pattern analysis with separate visualizations
- **Predictive Routing**: Advanced storm track and intensity forecasting with uncertainty quantification
- **Complete TD Support**: Now includes Tropical Depressions (< 34 kt)
- **Taiwan Standard**: Full support for Taiwan meteorological classification system
+ - **2025 Data Ready**: Real-time compatibility with current year data
- **Enhanced Animations**: High-quality storm track visualizations with both standards
+ - **NO FALLBACK DATA**: All data comes from real IBTrACS sources
### 📊 Data Status:
- **ONI Data**: {len(oni_data) if oni_data is not None else 0} years loaded
- **Typhoon Data**: {total_records:,} records loaded
- - **Merged Data**: {len(merged_data):,} typhoons with ONI values
+ - **Merged Data**: {len(merged_data):,} typhoons with analysis data
- **Available Years**: {year_range_display}
- - **Basin Coverage**: All IBTrACS basins (WP, EP, NA, SP, SI, NI)
+ - **Unique Storms**: {total_storms:,}
### 🔧 Technical Capabilities:
- **UMAP Clustering**: {"✅ Available" if UMAP_AVAILABLE else "⚠️ Limited to t-SNE/PCA"}
- **AI Predictions**: {"🧠 Deep Learning" if CNN_AVAILABLE else "🔬 Physics-based"}
- **Enhanced Categorization**: Tropical Depression to Super Typhoon
- - **Platform**: Optimized for Hugging Face Spaces
- - **Maximum Data Utilization**: All available storms loaded regardless of ONI
+ - **Platform**: Optimized for real-time analysis
+ - **Data Source**: Live IBTrACS database (no synthetic data)
### 📈 Research Applications:
- - Climate change impact studies across all basins
+ - Climate change impact studies
- Seasonal forecasting research
- Storm pattern classification
- ENSO-typhoon relationship analysis
- Intensity prediction model development
- - Cross-regional classification comparisons
- - Historical trend analysis (1851-2025)
"""
gr.Markdown(overview_text)
@@ -3276,10 +2740,9 @@ def create_interface():
def run_separate_clustering_analysis(method):
try:
- # Extract features for clustering
storm_features = extract_storm_features(typhoon_data)
if storm_features is None:
- return None, None, None, None, "Error: Could not extract storm features"
+ raise Exception("Could not extract storm features from data")
fig_cluster, fig_routes, fig_pressure, fig_wind, stats = create_separate_clustering_plots(
storm_features, typhoon_data, method.lower()
@@ -3288,7 +2751,8 @@ def create_interface():
except Exception as e:
import traceback
error_details = traceback.format_exc()
- error_msg = f"Error: {str(e)}\n\nDetails:\n{error_details}"
+ error_msg = f"Clustering analysis failed: {str(e)}\n\nDetails:\n{error_details}"
+ logging.error(error_msg)
return None, None, None, None, error_msg
analyze_clusters_btn.click(
@@ -3296,26 +2760,6 @@ def create_interface():
inputs=[reduction_method],
outputs=[cluster_plot, routes_plot, pressure_plot, wind_plot, cluster_stats]
)
-
- cluster_info_text = """
- ### 📊 Enhanced Clustering Features:
- - **All Basin Analysis**: Uses data from all global tropical cyclone basins
- - **Complete Historical Coverage**: Analyzes patterns from 1851-2025
- - **Separate Visualizations**: Four distinct plots for comprehensive analysis
- - **Multi-dimensional Analysis**: Uses 15+ storm characteristics including intensity, track shape, genesis location
- - **Route Visualization**: Geographic storm tracks colored by cluster membership
- - **Temporal Analysis**: Pressure and wind evolution patterns by cluster
- - **DBSCAN Clustering**: Automatic pattern discovery without predefined cluster count
- - **Interactive**: Hover over points to see storm details, zoom and pan all plots
-
- ### 🎯 How to Interpret:
- - **Clustering Plot**: Each dot is a storm positioned by similarity (close = similar characteristics)
- - **Routes Plot**: Actual geographic storm tracks, colored by which cluster they belong to
- - **Pressure Plot**: Shows how pressure changes over time for storms in each cluster
- - **Wind Plot**: Shows wind speed evolution patterns for each cluster
- - **Cluster Colors**: Each cluster gets a unique color across all four visualizations
- """
- gr.Markdown(cluster_info_text)
with gr.Tab("🌊 Realistic Storm Genesis & Prediction"):
gr.Markdown("## 🌊 Realistic Typhoon Development from Genesis")
@@ -3343,7 +2787,6 @@ def create_interface():
info="Select realistic development region based on climatology"
)
- # Display selected region info
def update_genesis_info(region):
locations = get_realistic_genesis_locations()
if region in locations:
@@ -3374,7 +2817,7 @@ def create_interface():
minimum=20,
maximum=1000,
step=6,
- info="Extended forecasting: 20-1000 hours (42 days max)"
+ info="Extended forecasting: 20-1000 hours"
)
advanced_physics = gr.Checkbox(
label="Advanced Physics",
@@ -3406,20 +2849,17 @@ def create_interface():
def run_realistic_prediction(region, month, oni, hours, advanced_phys, uncertainty, animation):
try:
- # Run realistic prediction with genesis region
results = predict_storm_route_and_intensity_realistic(
region, month, oni,
forecast_hours=hours,
use_advanced_physics=advanced_phys
)
- # Extract genesis conditions
current = results['current_prediction']
intensity = current['intensity_kt']
category = current['category']
genesis_info = results.get('genesis_info', {})
- # Create enhanced visualization
fig, forecast_text = create_animated_route_visualization(
results, uncertainty, animation
)
@@ -3438,10 +2878,7 @@ def create_interface():
logging.error(error_msg)
import traceback
traceback.print_exc()
- return (
- 30, "Tropical Depression", f"Prediction failed: {str(e)}",
- None, f"Error generating realistic forecast: {str(e)}"
- )
+ raise gr.Error(error_msg)
predict_btn.click(
fn=run_realistic_prediction,
@@ -3519,18 +2956,18 @@ def create_interface():
)
with gr.Tab("🎬 Enhanced Track Animation"):
- gr.Markdown("## 🎥 High-Quality Storm Track Visualization (Atlantic & Taiwan Standards)")
+ gr.Markdown("## 🎥 High-Quality Storm Track Visualization - NO FALLBACK DATA")
+ gr.Markdown("**ALL animations use real IBTrACS data - never synthetic or fallback data**")
with gr.Row():
year_dropdown = gr.Dropdown(
label="Year",
choices=available_years,
- value=available_years[-1] if available_years else "2024"
+ value=available_years[-1] if available_years else None
)
basin_dropdown = gr.Dropdown(
label="Basin",
- choices=["All Basins", "WP - Western Pacific", "EP - Eastern Pacific", "NA - North Atlantic",
- "SP - South Pacific", "SI - South Indian", "NI - North Indian"],
+ choices=["All Basins", "WP - Western Pacific", "EP - Eastern Pacific", "NA - North Atlantic"],
value="All Basins"
)
@@ -3547,71 +2984,76 @@ def create_interface():
video_output = gr.Video(label="Storm Track Animation")
# Update storm options when year or basin changes
+ def safe_update_typhoon_options(year, basin):
+ try:
+ return update_typhoon_options_enhanced(year, basin)
+ except Exception as e:
+ error_msg = f"Failed to load storms: {str(e)}"
+ logging.error(error_msg)
+ return gr.update(choices=[error_msg], value=None)
+
for input_comp in [year_dropdown, basin_dropdown]:
input_comp.change(
- fn=update_typhoon_options_enhanced,
+ fn=safe_update_typhoon_options,
inputs=[year_dropdown, basin_dropdown],
outputs=[typhoon_dropdown]
)
- # FIXED: Generate video with fixed function
+ def safe_generate_video(year, typhoon_selection, standard):
+ try:
+ if not typhoon_selection:
+ raise gr.Error("Please select a typhoon first")
+ return generate_enhanced_track_video_fixed(year, typhoon_selection, standard)
+ except Exception as e:
+ error_msg = f"Video generation failed: {str(e)}"
+ logging.error(error_msg)
+ raise gr.Error(error_msg)
+
generate_video_btn.click(
- fn=generate_enhanced_track_video_fixed,
+ fn=safe_generate_video,
inputs=[year_dropdown, typhoon_dropdown, standard_dropdown],
outputs=[video_output]
)
- # FIXED animation info text with corrected Taiwan standards
animation_info_text = """
- ### 🎬 Enhanced Animation Features:
- - **All Basin Support**: Visualize storms from any global basin (WP, EP, NA, SP, SI, NI)
- - **Complete Historical Range**: Animate storms from 1851-2025
+ ### 🎬 FIXED Animation Features - NO FALLBACK DATA:
+ - **Real Data Only**: All animations use actual IBTrACS typhoon track data
- **Dual Standards**: Full support for both Atlantic and Taiwan classification systems
- **Full TD Support**: Now displays Tropical Depressions (< 34 kt) in gray
+ - **2025 Compatibility**: Complete support for current year data
- **Enhanced Maps**: Better cartographic projections with terrain features
- **Smart Scaling**: Storm symbols scale dynamically with intensity
- **Real-time Info**: Live position, time, and meteorological data display
- **Professional Styling**: Publication-quality animations with proper legends
- - **Optimized Export**: Fast rendering with web-compatible video formats
- **FIXED Animation**: Tracks now display properly with cartopy integration
+ - **Error Handling**: Robust error handling prevents fallback to synthetic data
### 🎌 Taiwan Standard Features (CORRECTED):
- **CMA 2006 Standards**: Uses official China Meteorological Administration classification
- **Six Categories**: TD → TS → STS → TY → STY → Super TY
- - **Correct Thresholds**:
- * Tropical Depression: < 17.2 m/s (< 33.4 kt)
- * Tropical Storm: 17.2-24.4 m/s (33.4-47.5 kt)
- * Severe Tropical Storm: 24.5-32.6 m/s (47.6-63.5 kt)
- * Typhoon: 32.7-41.4 m/s (63.6-80.6 kt)
- * Severe Typhoon: 41.5-50.9 m/s (80.7-99.1 kt)
- * Super Typhoon: ≥51.0 m/s (≥99.2 kt)
+ - **Correct Thresholds**: Based on official meteorological standards
- **m/s Display**: Shows both knots and meters per second
- **CWB Compatible**: Matches Central Weather Bureau classifications
- - **Fixed Color Coding**: Gray → Blue → Cyan → Yellow → Orange → Red
"""
gr.Markdown(animation_info_text)
with gr.Tab("📊 Data Statistics & Insights"):
- gr.Markdown("## 📈 Comprehensive Dataset Analysis")
+ gr.Markdown("## 📈 Comprehensive Dataset Analysis - REAL DATA ONLY")
- # Create enhanced data summary
try:
if len(typhoon_data) > 0:
- # Storm category distribution
storm_cats = typhoon_data.groupby('SID')['USA_WIND'].max().apply(categorize_typhoon_enhanced)
cat_counts = storm_cats.value_counts()
- # Create distribution chart with enhanced colors
fig_dist = px.bar(
x=cat_counts.index,
y=cat_counts.values,
- title="Storm Intensity Distribution (All Basins - Including Tropical Depressions)",
+ title="Storm Intensity Distribution (Including Tropical Depressions)",
labels={'x': 'Category', 'y': 'Number of Storms'},
color=cat_counts.index,
color_discrete_map=enhanced_color_map
)
- # Seasonal distribution
if 'ISO_TIME' in typhoon_data.columns:
seasonal_data = typhoon_data.copy()
seasonal_data['Month'] = seasonal_data['ISO_TIME'].dt.month
@@ -3620,7 +3062,7 @@ def create_interface():
fig_seasonal = px.bar(
x=monthly_counts.index,
y=monthly_counts.values,
- title="Seasonal Storm Distribution (All Basins)",
+ title="Seasonal Storm Distribution",
labels={'x': 'Month', 'y': 'Number of Storms'},
color=monthly_counts.values,
color_continuous_scale='Viridis'
@@ -3628,13 +3070,12 @@ def create_interface():
else:
fig_seasonal = None
- # Basin distribution
- if 'BASIN' in typhoon_data.columns:
- basin_data = typhoon_data['BASIN'].value_counts()
+ if 'SID' in typhoon_data.columns:
+ basin_data = typhoon_data['SID'].str[:2].value_counts()
fig_basin = px.pie(
values=basin_data.values,
names=basin_data.index,
- title="Distribution by Basin (All Global Basins)"
+ title="Distribution by Basin"
)
else:
fig_basin = None
@@ -3653,10 +3094,7 @@ def create_interface():
except Exception as e:
gr.Markdown(f"Visualization error: {str(e)}")
- # Enhanced statistics - FIXED formatting with ALL DATA
- total_storms = len(typhoon_data['SID'].unique()) if 'SID' in typhoon_data.columns else 0
- total_records = len(typhoon_data)
-
+ # Enhanced statistics
if 'SEASON' in typhoon_data.columns:
try:
min_year = int(typhoon_data['SEASON'].min())
@@ -3664,24 +3102,23 @@ def create_interface():
year_range = f"{min_year}-{max_year}"
years_covered = typhoon_data['SEASON'].nunique()
except (ValueError, TypeError):
- year_range = "1851-2025"
- years_covered = 175
+ year_range = "Unknown"
+ years_covered = 0
else:
- year_range = "1851-2025"
- years_covered = 175
+ year_range = "Unknown"
+ years_covered = 0
- if 'BASIN' in typhoon_data.columns:
+ if 'SID' in typhoon_data.columns:
try:
- basins_available = ', '.join(sorted(typhoon_data['BASIN'].unique()))
+ basins_available = ', '.join(sorted(typhoon_data['SID'].str[:2].unique()))
avg_storms_per_year = total_storms / max(years_covered, 1)
except Exception:
- basins_available = "WP, EP, NA, SP, SI, NI"
+ basins_available = "Unknown"
avg_storms_per_year = 0
else:
- basins_available = "WP, EP, NA, SP, SI, NI"
+ basins_available = "Unknown"
avg_storms_per_year = 0
- # TD specific statistics
try:
if 'USA_WIND' in typhoon_data.columns:
td_storms = len(typhoon_data[typhoon_data['USA_WIND'] < 34]['SID'].unique())
@@ -3692,18 +3129,17 @@ def create_interface():
td_storms = ts_storms = typhoon_storms = 0
td_percentage = 0
except Exception as e:
- print(f"Error calculating TD statistics: {e}")
td_storms = ts_storms = typhoon_storms = 0
td_percentage = 0
- # Create statistics text safely
stats_text = f"""
- ### 📊 Enhanced Dataset Summary:
+ ### 📊 REAL Dataset Summary - NO SYNTHETIC DATA:
- **Total Unique Storms**: {total_storms:,}
- **Total Track Records**: {total_records:,}
- **Year Range**: {year_range} ({years_covered} years)
- **Basins Available**: {basins_available}
- **Average Storms/Year**: {avg_storms_per_year:.1f}
+ - **Data Source**: IBTrACS v04r01 (Real observations only)
### 🌪️ Storm Category Breakdown:
- **Tropical Depressions**: {td_storms:,} storms ({td_percentage:.1f}%)
@@ -3711,97 +3147,59 @@ def create_interface():
- **Typhoons (C1-C5)**: {typhoon_storms:,} storms
### 🚀 Platform Capabilities:
- - **Complete Global Coverage** - ALL IBTrACS basins loaded (WP, EP, NA, SP, SI, NI)
- - **Maximum Historical Range** - Full 175+ year coverage (1851-2025)
- - **ONI Independence** - All storm data preserved regardless of ONI availability
- **Complete TD Analysis** - First platform to include comprehensive TD tracking
- **Dual Classification Systems** - Both Atlantic and Taiwan standards supported
- **Advanced ML Clustering** - DBSCAN pattern recognition with separate visualizations
- **Real-time Predictions** - Physics-based and optional CNN intensity forecasting
+ - **2025 Data Ready** - Full compatibility with current season data
- **Enhanced Animations** - Professional-quality storm track videos
- - **Cross-basin Analysis** - Comprehensive global tropical cyclone coverage
+ - **Multi-basin Analysis** - Comprehensive Pacific and Atlantic coverage
+ - **NO FALLBACK DATA** - All analysis uses real meteorological observations
### 🔬 Research Applications:
- - Global climate change impact studies
- - Cross-basin seasonal forecasting research
- - Storm pattern classification across all oceans
+ - Climate change impact studies
+ - Seasonal forecasting research
+ - Storm pattern classification
- ENSO-typhoon relationship analysis
- Intensity prediction model development
- Cross-regional classification comparisons
- - Historical trend analysis spanning 175+ years
- - Basin interaction and teleconnection studies
"""
gr.Markdown(stats_text)
return demo
except Exception as e:
- logging.error(f"Error creating Gradio interface: {e}")
+ logging.error(f"CRITICAL ERROR creating Gradio interface: {e}")
import traceback
traceback.print_exc()
- # Create a minimal fallback interface
- return create_minimal_fallback_interface()
-
-def create_minimal_fallback_interface():
- """Create a minimal fallback interface when main interface fails"""
- with gr.Blocks() as demo:
- gr.Markdown("# Enhanced Typhoon Analysis Platform")
- gr.Markdown("**Notice**: Loading with minimal interface due to data issues.")
-
- with gr.Tab("Status"):
- gr.Markdown("""
- ## Platform Status
-
- The application is running but encountered issues loading the full interface.
- This could be due to:
- - Data loading problems
- - Missing dependencies
- - Configuration issues
-
- ### Available Features:
- - Basic interface is functional
- - Error logs are being generated
- - System is ready for debugging
-
- ### Next Steps:
- 1. Check the console logs for detailed error information
- 2. Verify all required data files are accessible
- 3. Ensure all dependencies are properly installed
- 4. Try restarting the application
- """)
-
- with gr.Tab("Debug"):
- gr.Markdown("## Debug Information")
-
- def get_debug_info():
- debug_text = f"""
- Python Environment:
- - Working Directory: {os.getcwd()}
- - Data Path: {DATA_PATH}
- - UMAP Available: {UMAP_AVAILABLE}
- - CNN Available: {CNN_AVAILABLE}
-
- Data Status:
- - ONI Data: {'Loaded' if oni_data is not None else 'Failed'}
- - Typhoon Data: {'Loaded' if typhoon_data is not None else 'Failed'}
- - Merged Data: {'Loaded' if merged_data is not None else 'Failed'}
-
- File Checks:
- - ONI Path Exists: {os.path.exists(ONI_DATA_PATH)}
- - Typhoon Path Exists: {os.path.exists(TYPHOON_DATA_PATH)}
-
- Basin Files Available:
- {[f"- {basin}: {BASIN_FILES[basin]}" for basin in BASIN_FILES.keys()]}
- """
- return debug_text
-
- debug_btn = gr.Button("Get Debug Info")
- debug_output = gr.Textbox(label="Debug Information", lines=15)
- debug_btn.click(fn=get_debug_info, outputs=debug_output)
-
- return demo
+ raise Exception(f"Interface creation failed: {e}")
-# Create and launch the interface
-demo = create_interface()
+# -----------------------------
+# MAIN EXECUTION
+# -----------------------------
if __name__ == "__main__":
- demo.launch(share=True) # Enable sharing with public link
\ No newline at end of file
+ try:
+ # Initialize data first - CRITICAL
+ logging.info("Initializing data...")
+ initialize_data()
+
+ # Verify data loaded correctly
+ if typhoon_data is None or typhoon_data.empty:
+ raise Exception("CRITICAL: No typhoon data available for interface")
+
+ logging.info("Creating interface...")
+ demo = create_interface()
+
+ logging.info("Launching application...")
+ demo.launch(share=True)
+
+ except Exception as e:
+ logging.error(f"CRITICAL APPLICATION ERROR: {e}")
+ import traceback
+ traceback.print_exc()
+ print(f"\n{'='*60}")
+ print("CRITICAL ERROR: Application failed to start")
+ print(f"Error: {e}")
+ print("Check logs for detailed error information")
+ print(f"{'='*60}")
+ raise
\ No newline at end of file