Spaces:
Running
Running
# -*- coding: utf-8 -*- | |
import os | |
import argparse | |
import logging | |
import pickle | |
import threading | |
import time | |
import warnings | |
from datetime import datetime, timedelta | |
from collections import defaultdict | |
import csv | |
# Suppress warnings for cleaner output | |
warnings.filterwarnings('ignore', category=FutureWarning) | |
warnings.filterwarnings('ignore', category=UserWarning, module='umap') | |
warnings.filterwarnings('ignore', category=UserWarning, module='sklearn') | |
import gradio as gr | |
import pandas as pd | |
import numpy as np | |
import matplotlib.pyplot as plt | |
import matplotlib.animation as animation | |
from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas | |
import cartopy.crs as ccrs | |
import cartopy.feature as cfeature | |
import plotly.graph_objects as go | |
import plotly.express as px | |
from plotly.subplots import make_subplots | |
from sklearn.manifold import TSNE | |
from sklearn.cluster import DBSCAN, KMeans | |
from sklearn.preprocessing import StandardScaler | |
from sklearn.decomposition import PCA | |
from scipy.interpolate import interp1d | |
import statsmodels.api as sm | |
import requests | |
import tempfile | |
import shutil | |
import xarray as xr | |
# NEW: Advanced ML imports | |
try: | |
import umap.umap_ as umap | |
UMAP_AVAILABLE = True | |
except ImportError: | |
UMAP_AVAILABLE = False | |
print("UMAP not available - clustering features limited") | |
# Optional CNN imports with robust error handling | |
CNN_AVAILABLE = False | |
try: | |
# Set environment variables before importing TensorFlow | |
os.environ['TF_CPP_MIN_LOG_LEVEL'] = '3' # Suppress TensorFlow warnings | |
import tensorflow as tf | |
from tensorflow.keras import layers, models | |
# Test if TensorFlow actually works | |
tf.config.set_visible_devices([], 'GPU') # Disable GPU to avoid conflicts | |
CNN_AVAILABLE = True | |
print("TensorFlow successfully loaded - CNN features enabled") | |
except Exception as e: | |
CNN_AVAILABLE = False | |
print(f"TensorFlow not available - CNN features disabled: {str(e)[:100]}...") | |
try: | |
import cdsapi | |
CDSAPI_AVAILABLE = True | |
except ImportError: | |
CDSAPI_AVAILABLE = False | |
import tropycal.tracks as tracks | |
# ----------------------------- | |
# Configuration and Setup | |
# ----------------------------- | |
logging.basicConfig( | |
level=logging.INFO, | |
format='%(asctime)s - %(levelname)s - %(message)s' | |
) | |
# Remove argument parser to simplify startup | |
DATA_PATH = '/tmp/typhoon_data' if 'SPACE_ID' in os.environ else tempfile.gettempdir() | |
# Ensure directory exists and is writable | |
try: | |
os.makedirs(DATA_PATH, exist_ok=True) | |
# Test write permissions | |
test_file = os.path.join(DATA_PATH, 'test_write.txt') | |
with open(test_file, 'w') as f: | |
f.write('test') | |
os.remove(test_file) | |
logging.info(f"Data directory is writable: {DATA_PATH}") | |
except Exception as e: | |
logging.warning(f"Data directory not writable, using temp dir: {e}") | |
DATA_PATH = tempfile.mkdtemp() | |
logging.info(f"Using temporary directory: {DATA_PATH}") | |
# Update file paths | |
ONI_DATA_PATH = os.path.join(DATA_PATH, 'oni_data.csv') | |
TYPHOON_DATA_PATH = os.path.join(DATA_PATH, 'processed_typhoon_data.csv') | |
MERGED_DATA_CSV = os.path.join(DATA_PATH, 'merged_typhoon_era5_data.csv') | |
# IBTrACS settings | |
BASIN_FILES = { | |
'EP': 'ibtracs.EP.list.v04r01.csv', | |
'NA': 'ibtracs.NA.list.v04r01.csv', | |
'WP': 'ibtracs.WP.list.v04r01.csv' | |
} | |
IBTRACS_BASE_URL = 'https://www.ncei.noaa.gov/data/international-best-track-archive-for-climate-stewardship-ibtracs/v04r01/access/csv/' | |
LOCAL_IBTRACS_PATH = os.path.join(DATA_PATH, 'ibtracs.WP.list.v04r01.csv') | |
CACHE_FILE = os.path.join(DATA_PATH, 'ibtracs_cache.pkl') | |
CACHE_EXPIRY_DAYS = 1 | |
# ----------------------------- | |
# ENHANCED: Color Maps and Standards with TD Support | |
# ----------------------------- | |
# Enhanced color mapping with TD support (for Plotly) | |
enhanced_color_map = { | |
'Unknown': 'rgb(200, 200, 200)', | |
'Tropical Depression': 'rgb(128, 128, 128)', # NEW: Gray for TD | |
'Tropical Storm': 'rgb(0, 0, 255)', | |
'C1 Typhoon': 'rgb(0, 255, 255)', | |
'C2 Typhoon': 'rgb(0, 255, 0)', | |
'C3 Strong Typhoon': 'rgb(255, 255, 0)', | |
'C4 Very Strong Typhoon': 'rgb(255, 165, 0)', | |
'C5 Super Typhoon': 'rgb(255, 0, 0)' | |
} | |
# Matplotlib-compatible color mapping (hex colors) | |
matplotlib_color_map = { | |
'Unknown': '#C8C8C8', | |
'Tropical Depression': '#808080', # Gray for TD | |
'Tropical Storm': '#0000FF', # Blue | |
'C1 Typhoon': '#00FFFF', # Cyan | |
'C2 Typhoon': '#00FF00', # Green | |
'C3 Strong Typhoon': '#FFFF00', # Yellow | |
'C4 Very Strong Typhoon': '#FFA500', # Orange | |
'C5 Super Typhoon': '#FF0000' # Red | |
} | |
def rgb_string_to_hex(rgb_string): | |
"""Convert 'rgb(r,g,b)' string to hex color for matplotlib""" | |
try: | |
# Extract numbers from 'rgb(r,g,b)' format | |
import re | |
numbers = re.findall(r'\d+', rgb_string) | |
if len(numbers) == 3: | |
r, g, b = map(int, numbers) | |
return f'#{r:02x}{g:02x}{b:02x}' | |
else: | |
return '#808080' # Default gray | |
except: | |
return '#808080' # Default gray | |
def get_matplotlib_color(category): | |
"""Get matplotlib-compatible color for a storm category""" | |
return matplotlib_color_map.get(category, '#808080') | |
# Cluster colors for route visualization | |
CLUSTER_COLORS = [ | |
'#FF6B6B', '#4ECDC4', '#45B7D1', '#96CEB4', '#FFEAA7', | |
'#DDA0DD', '#98D8C8', '#F7DC6F', '#BB8FCE', '#85C1E9', | |
'#F8C471', '#82E0AA', '#F1948A', '#85C1E9', '#D2B4DE' | |
] | |
# Original color map for backward compatibility | |
color_map = { | |
'C5 Super Typhoon': 'rgb(255, 0, 0)', | |
'C4 Very Strong Typhoon': 'rgb(255, 165, 0)', | |
'C3 Strong Typhoon': 'rgb(255, 255, 0)', | |
'C2 Typhoon': 'rgb(0, 255, 0)', | |
'C1 Typhoon': 'rgb(0, 255, 255)', | |
'Tropical Storm': 'rgb(0, 0, 255)', | |
'Tropical Depression': 'rgb(128, 128, 128)' | |
} | |
atlantic_standard = { | |
'C5 Super Typhoon': {'wind_speed': 137, 'color': 'Red', 'hex': '#FF0000'}, | |
'C4 Very Strong Typhoon': {'wind_speed': 113, 'color': 'Orange', 'hex': '#FFA500'}, | |
'C3 Strong Typhoon': {'wind_speed': 96, 'color': 'Yellow', 'hex': '#FFFF00'}, | |
'C2 Typhoon': {'wind_speed': 83, 'color': 'Green', 'hex': '#00FF00'}, | |
'C1 Typhoon': {'wind_speed': 64, 'color': 'Cyan', 'hex': '#00FFFF'}, | |
'Tropical Storm': {'wind_speed': 34, 'color': 'Blue', 'hex': '#0000FF'}, | |
'Tropical Depression': {'wind_speed': 0, 'color': 'Gray', 'hex': '#808080'} | |
} | |
taiwan_standard = { | |
'Strong Typhoon': {'wind_speed': 51.0, 'color': 'Red', 'hex': '#FF0000'}, | |
'Medium Typhoon': {'wind_speed': 33.7, 'color': 'Orange', 'hex': '#FFA500'}, | |
'Mild Typhoon': {'wind_speed': 17.2, 'color': 'Yellow', 'hex': '#FFFF00'}, | |
'Tropical Depression': {'wind_speed': 0, 'color': 'Gray', 'hex': '#808080'} | |
} | |
# ----------------------------- | |
# Utility Functions for HF Spaces | |
# ----------------------------- | |
def safe_file_write(file_path, data_frame, backup_dir=None): | |
"""Safely write DataFrame to CSV with backup and error handling""" | |
try: | |
# Create directory if it doesn't exist | |
os.makedirs(os.path.dirname(file_path), exist_ok=True) | |
# Try to write to a temporary file first | |
temp_path = file_path + '.tmp' | |
data_frame.to_csv(temp_path, index=False) | |
# If successful, rename to final file | |
os.rename(temp_path, file_path) | |
logging.info(f"Successfully saved {len(data_frame)} records to {file_path}") | |
return True | |
except PermissionError as e: | |
logging.warning(f"Permission denied writing to {file_path}: {e}") | |
if backup_dir: | |
try: | |
backup_path = os.path.join(backup_dir, os.path.basename(file_path)) | |
data_frame.to_csv(backup_path, index=False) | |
logging.info(f"Saved to backup location: {backup_path}") | |
return True | |
except Exception as backup_e: | |
logging.error(f"Failed to save to backup location: {backup_e}") | |
return False | |
except Exception as e: | |
logging.error(f"Error saving file {file_path}: {e}") | |
# Clean up temp file if it exists | |
temp_path = file_path + '.tmp' | |
if os.path.exists(temp_path): | |
try: | |
os.remove(temp_path) | |
except: | |
pass | |
return False | |
def get_fallback_data_dir(): | |
"""Get a fallback data directory that's guaranteed to be writable""" | |
fallback_dirs = [ | |
tempfile.gettempdir(), | |
'/tmp', | |
os.path.expanduser('~'), | |
os.getcwd() | |
] | |
for directory in fallback_dirs: | |
try: | |
test_dir = os.path.join(directory, 'typhoon_fallback') | |
os.makedirs(test_dir, exist_ok=True) | |
test_file = os.path.join(test_dir, 'test.txt') | |
with open(test_file, 'w') as f: | |
f.write('test') | |
os.remove(test_file) | |
return test_dir | |
except: | |
continue | |
# If all else fails, use current directory | |
return os.getcwd() | |
# ----------------------------- | |
# ONI and Typhoon Data Functions | |
# ----------------------------- | |
def download_oni_file(url, filename): | |
"""Download ONI file with retry logic""" | |
max_retries = 3 | |
for attempt in range(max_retries): | |
try: | |
response = requests.get(url, timeout=30) | |
response.raise_for_status() | |
with open(filename, 'wb') as f: | |
f.write(response.content) | |
return True | |
except Exception as e: | |
logging.warning(f"Attempt {attempt + 1} failed to download ONI: {e}") | |
if attempt < max_retries - 1: | |
time.sleep(2 ** attempt) # Exponential backoff | |
else: | |
logging.error(f"Failed to download ONI after {max_retries} attempts") | |
return False | |
def convert_oni_ascii_to_csv(input_file, output_file): | |
"""Convert ONI ASCII format to CSV""" | |
data = defaultdict(lambda: [''] * 12) | |
season_to_month = {'DJF':12, 'JFM':1, 'FMA':2, 'MAM':3, 'AMJ':4, 'MJJ':5, | |
'JJA':6, 'JAS':7, 'ASO':8, 'SON':9, 'OND':10, 'NDJ':11} | |
try: | |
with open(input_file, 'r') as f: | |
lines = f.readlines()[1:] # Skip header | |
for line in lines: | |
parts = line.split() | |
if len(parts) >= 4: | |
season, year, anom = parts[0], parts[1], parts[-1] | |
if season in season_to_month: | |
month = season_to_month[season] | |
if season == 'DJF': | |
year = str(int(year)-1) | |
data[year][month-1] = anom | |
# Write to CSV with safe write | |
df = pd.DataFrame(data).T.reset_index() | |
df.columns = ['Year','Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'] | |
df = df.sort_values('Year').reset_index(drop=True) | |
return safe_file_write(output_file, df, get_fallback_data_dir()) | |
except Exception as e: | |
logging.error(f"Error converting ONI file: {e}") | |
return False | |
def update_oni_data(): | |
"""Update ONI data with error handling""" | |
url = "https://www.cpc.ncep.noaa.gov/data/indices/oni.ascii.txt" | |
temp_file = os.path.join(DATA_PATH, "temp_oni.ascii.txt") | |
input_file = os.path.join(DATA_PATH, "oni.ascii.txt") | |
output_file = ONI_DATA_PATH | |
try: | |
if download_oni_file(url, temp_file): | |
if not os.path.exists(input_file) or not os.path.exists(output_file): | |
os.rename(temp_file, input_file) | |
convert_oni_ascii_to_csv(input_file, output_file) | |
else: | |
os.remove(temp_file) | |
else: | |
# Create fallback ONI data if download fails | |
logging.warning("Creating fallback ONI data") | |
create_fallback_oni_data(output_file) | |
except Exception as e: | |
logging.error(f"Error updating ONI data: {e}") | |
create_fallback_oni_data(output_file) | |
def create_fallback_oni_data(output_file): | |
"""Create minimal ONI data for testing""" | |
years = range(2000, 2026) # Extended to include 2025 | |
months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'] | |
# Create synthetic ONI data | |
data = [] | |
for year in years: | |
row = [year] | |
for month in months: | |
# Generate some realistic ONI values | |
value = np.random.normal(0, 1) * 0.5 | |
row.append(f"{value:.2f}") | |
data.append(row) | |
df = pd.DataFrame(data, columns=['Year'] + months) | |
safe_file_write(output_file, df, get_fallback_data_dir()) | |
# ----------------------------- | |
# FIXED: IBTrACS Data Loading | |
# ----------------------------- | |
def download_ibtracs_file(basin, force_download=False): | |
"""Download specific basin file from IBTrACS""" | |
filename = BASIN_FILES[basin] | |
local_path = os.path.join(DATA_PATH, filename) | |
url = IBTRACS_BASE_URL + filename | |
# Check if file exists and is recent (less than 7 days old) | |
if os.path.exists(local_path) and not force_download: | |
file_age = time.time() - os.path.getmtime(local_path) | |
if file_age < 7 * 24 * 3600: # 7 days | |
logging.info(f"Using cached {basin} basin file") | |
return local_path | |
try: | |
logging.info(f"Downloading {basin} basin file from {url}") | |
response = requests.get(url, timeout=60) | |
response.raise_for_status() | |
# Ensure directory exists | |
os.makedirs(os.path.dirname(local_path), exist_ok=True) | |
with open(local_path, 'wb') as f: | |
f.write(response.content) | |
logging.info(f"Successfully downloaded {basin} basin file") | |
return local_path | |
except Exception as e: | |
logging.error(f"Failed to download {basin} basin file: {e}") | |
return None | |
def examine_ibtracs_structure(file_path): | |
"""Examine the actual structure of an IBTrACS CSV file""" | |
try: | |
with open(file_path, 'r') as f: | |
lines = f.readlines() | |
# Show first 5 lines | |
logging.info("First 5 lines of IBTrACS file:") | |
for i, line in enumerate(lines[:5]): | |
logging.info(f"Line {i}: {line.strip()}") | |
# The first line contains the actual column headers | |
# No need to skip rows for IBTrACS v04r01 | |
df = pd.read_csv(file_path, nrows=5) | |
logging.info(f"Columns from first row: {list(df.columns)}") | |
return list(df.columns) | |
except Exception as e: | |
logging.error(f"Error examining IBTrACS structure: {e}") | |
return None | |
def load_ibtracs_csv_directly(basin='WP'): | |
"""Load IBTrACS data directly from CSV - FIXED VERSION""" | |
filename = BASIN_FILES[basin] | |
local_path = os.path.join(DATA_PATH, filename) | |
# Download if not exists | |
if not os.path.exists(local_path): | |
downloaded_path = download_ibtracs_file(basin) | |
if not downloaded_path: | |
return None | |
try: | |
# First, examine the structure | |
actual_columns = examine_ibtracs_structure(local_path) | |
if not actual_columns: | |
logging.error("Could not examine IBTrACS file structure") | |
return None | |
# Read IBTrACS CSV - DON'T skip any rows for v04r01 | |
# The first row contains proper column headers | |
logging.info(f"Reading IBTrACS CSV file: {local_path}") | |
df = pd.read_csv(local_path, low_memory=False) # Don't skip any rows | |
logging.info(f"Original columns: {list(df.columns)}") | |
logging.info(f"Data shape before cleaning: {df.shape}") | |
# Check which essential columns exist | |
required_cols = ['SID', 'ISO_TIME', 'LAT', 'LON'] | |
available_required = [col for col in required_cols if col in df.columns] | |
if len(available_required) < 2: | |
logging.error(f"Missing critical columns. Available: {list(df.columns)}") | |
return None | |
# Clean and standardize the data with format specification | |
if 'ISO_TIME' in df.columns: | |
df['ISO_TIME'] = pd.to_datetime(df['ISO_TIME'], format='%Y-%m-%d %H:%M:%S', errors='coerce') | |
# Clean numeric columns | |
numeric_columns = ['LAT', 'LON', 'WMO_WIND', 'WMO_PRES', 'USA_WIND', 'USA_PRES'] | |
for col in numeric_columns: | |
if col in df.columns: | |
df[col] = pd.to_numeric(df[col], errors='coerce') | |
# Filter out invalid/missing critical data | |
valid_rows = df['LAT'].notna() & df['LON'].notna() | |
df = df[valid_rows] | |
# Ensure LAT/LON are in reasonable ranges | |
df = df[(df['LAT'] >= -90) & (df['LAT'] <= 90)] | |
df = df[(df['LON'] >= -180) & (df['LON'] <= 180)] | |
# Add basin info if missing | |
if 'BASIN' not in df.columns: | |
df['BASIN'] = basin | |
# Add default columns if missing | |
if 'NAME' not in df.columns: | |
df['NAME'] = 'UNNAMED' | |
if 'SEASON' not in df.columns and 'ISO_TIME' in df.columns: | |
df['SEASON'] = df['ISO_TIME'].dt.year | |
logging.info(f"Successfully loaded {len(df)} records from {basin} basin") | |
return df | |
except Exception as e: | |
logging.error(f"Error reading IBTrACS CSV file: {e}") | |
return None | |
def load_ibtracs_data_fixed(): | |
"""Fixed version of IBTrACS data loading""" | |
ibtracs_data = {} | |
# Try to load each basin, but prioritize WP for this application | |
load_order = ['WP', 'EP', 'NA'] | |
for basin in load_order: | |
try: | |
logging.info(f"Loading {basin} basin data...") | |
df = load_ibtracs_csv_directly(basin) | |
if df is not None and not df.empty: | |
ibtracs_data[basin] = df | |
logging.info(f"Successfully loaded {basin} basin with {len(df)} records") | |
else: | |
logging.warning(f"No data loaded for basin {basin}") | |
ibtracs_data[basin] = None | |
except Exception as e: | |
logging.error(f"Failed to load basin {basin}: {e}") | |
ibtracs_data[basin] = None | |
return ibtracs_data | |
def load_data_fixed(oni_path, typhoon_path): | |
"""Fixed version of load_data function""" | |
# Load ONI data | |
oni_data = pd.DataFrame({'Year': [], 'Jan': [], 'Feb': [], 'Mar': [], 'Apr': [], | |
'May': [], 'Jun': [], 'Jul': [], 'Aug': [], 'Sep': [], | |
'Oct': [], 'Nov': [], 'Dec': []}) | |
if not os.path.exists(oni_path): | |
logging.warning(f"ONI data file not found: {oni_path}") | |
update_oni_data() | |
try: | |
oni_data = pd.read_csv(oni_path) | |
logging.info(f"Successfully loaded ONI data with {len(oni_data)} years") | |
except Exception as e: | |
logging.error(f"Error loading ONI data: {e}") | |
update_oni_data() | |
try: | |
oni_data = pd.read_csv(oni_path) | |
except Exception as e: | |
logging.error(f"Still can't load ONI data: {e}") | |
# Load typhoon data - NEW APPROACH | |
typhoon_data = None | |
# First, try to load from existing processed file | |
if os.path.exists(typhoon_path): | |
try: | |
typhoon_data = pd.read_csv(typhoon_path, low_memory=False) | |
# Ensure basic columns exist and are valid | |
required_cols = ['LAT', 'LON'] | |
if all(col in typhoon_data.columns for col in required_cols): | |
if 'ISO_TIME' in typhoon_data.columns: | |
typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce') | |
logging.info(f"Loaded processed typhoon data with {len(typhoon_data)} records") | |
else: | |
logging.warning("Processed typhoon data missing required columns, will reload from IBTrACS") | |
typhoon_data = None | |
except Exception as e: | |
logging.error(f"Error loading processed typhoon data: {e}") | |
typhoon_data = None | |
# If no valid processed data, load from IBTrACS | |
if typhoon_data is None or typhoon_data.empty: | |
logging.info("Loading typhoon data from IBTrACS...") | |
ibtracs_data = load_ibtracs_data_fixed() | |
# Combine all available basin data, prioritizing WP | |
combined_dfs = [] | |
for basin in ['WP', 'EP', 'NA']: | |
if basin in ibtracs_data and ibtracs_data[basin] is not None: | |
df = ibtracs_data[basin].copy() | |
df['BASIN'] = basin | |
combined_dfs.append(df) | |
if combined_dfs: | |
typhoon_data = pd.concat(combined_dfs, ignore_index=True) | |
# Ensure SID has proper format | |
if 'SID' not in typhoon_data.columns and 'BASIN' in typhoon_data.columns: | |
# Create SID from basin and other identifiers if missing | |
if 'SEASON' in typhoon_data.columns: | |
typhoon_data['SID'] = (typhoon_data['BASIN'].astype(str) + | |
typhoon_data.index.astype(str).str.zfill(2) + | |
typhoon_data['SEASON'].astype(str)) | |
else: | |
typhoon_data['SID'] = (typhoon_data['BASIN'].astype(str) + | |
typhoon_data.index.astype(str).str.zfill(2) + | |
'2000') | |
# Save the processed data for future use | |
safe_file_write(typhoon_path, typhoon_data, get_fallback_data_dir()) | |
logging.info(f"Combined IBTrACS data: {len(typhoon_data)} total records") | |
else: | |
logging.error("Failed to load any IBTrACS basin data") | |
# Create minimal fallback data | |
typhoon_data = create_fallback_typhoon_data() | |
# Final validation of typhoon data | |
if typhoon_data is not None: | |
# Ensure required columns exist with fallback values | |
required_columns = { | |
'SID': 'UNKNOWN', | |
'ISO_TIME': pd.Timestamp('2000-01-01'), | |
'LAT': 0.0, | |
'LON': 0.0, | |
'USA_WIND': np.nan, | |
'USA_PRES': np.nan, | |
'NAME': 'UNNAMED', | |
'SEASON': 2000 | |
} | |
for col, default_val in required_columns.items(): | |
if col not in typhoon_data.columns: | |
typhoon_data[col] = default_val | |
logging.warning(f"Added missing column {col} with default value") | |
# Ensure data types | |
if 'ISO_TIME' in typhoon_data.columns: | |
typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce') | |
typhoon_data['LAT'] = pd.to_numeric(typhoon_data['LAT'], errors='coerce') | |
typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce') | |
typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce') | |
typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce') | |
# Remove rows with invalid coordinates | |
typhoon_data = typhoon_data.dropna(subset=['LAT', 'LON']) | |
logging.info(f"Final typhoon data: {len(typhoon_data)} records after validation") | |
return oni_data, typhoon_data | |
def create_fallback_typhoon_data(): | |
"""Create minimal fallback typhoon data - FIXED VERSION""" | |
# Use proper pandas date_range instead of numpy | |
dates = pd.date_range(start='2000-01-01', end='2025-12-31', freq='D') # Extended to 2025 | |
storm_dates = dates[np.random.choice(len(dates), size=100, replace=False)] | |
data = [] | |
for i, date in enumerate(storm_dates): | |
# Create realistic WP storm tracks | |
base_lat = np.random.uniform(10, 30) | |
base_lon = np.random.uniform(130, 160) | |
# Generate 20-50 data points per storm | |
track_length = np.random.randint(20, 51) | |
sid = f"WP{i+1:02d}{date.year}" | |
for j in range(track_length): | |
lat = base_lat + j * 0.2 + np.random.normal(0, 0.1) | |
lon = base_lon + j * 0.3 + np.random.normal(0, 0.1) | |
wind = max(25, 70 + np.random.normal(0, 20)) | |
pres = max(950, 1000 - wind + np.random.normal(0, 5)) | |
data.append({ | |
'SID': sid, | |
'ISO_TIME': date + pd.Timedelta(hours=j*6), # Use pd.Timedelta instead | |
'NAME': f'FALLBACK_{i+1}', | |
'SEASON': date.year, | |
'LAT': lat, | |
'LON': lon, | |
'USA_WIND': wind, | |
'USA_PRES': pres, | |
'BASIN': 'WP' | |
}) | |
df = pd.DataFrame(data) | |
logging.info(f"Created fallback typhoon data with {len(df)} records") | |
return df | |
def process_oni_data(oni_data): | |
"""Process ONI data into long format""" | |
oni_long = oni_data.melt(id_vars=['Year'], var_name='Month', value_name='ONI') | |
month_map = {'Jan':'01','Feb':'02','Mar':'03','Apr':'04','May':'05','Jun':'06', | |
'Jul':'07','Aug':'08','Sep':'09','Oct':'10','Nov':'11','Dec':'12'} | |
oni_long['Month'] = oni_long['Month'].map(month_map) | |
oni_long['Date'] = pd.to_datetime(oni_long['Year'].astype(str)+'-'+oni_long['Month']+'-01') | |
oni_long['ONI'] = pd.to_numeric(oni_long['ONI'], errors='coerce') | |
return oni_long | |
def process_typhoon_data(typhoon_data): | |
"""Process typhoon data""" | |
if 'ISO_TIME' in typhoon_data.columns: | |
typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce') | |
typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce') | |
typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce') | |
typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce') | |
logging.info(f"Unique basins in typhoon_data: {typhoon_data['SID'].str[:2].unique()}") | |
typhoon_max = typhoon_data.groupby('SID').agg({ | |
'USA_WIND':'max','USA_PRES':'min','ISO_TIME':'first','SEASON':'first','NAME':'first', | |
'LAT':'first','LON':'first' | |
}).reset_index() | |
if 'ISO_TIME' in typhoon_max.columns: | |
typhoon_max['Month'] = typhoon_max['ISO_TIME'].dt.strftime('%m') | |
typhoon_max['Year'] = typhoon_max['ISO_TIME'].dt.year | |
else: | |
# Fallback if no ISO_TIME | |
typhoon_max['Month'] = '01' | |
typhoon_max['Year'] = typhoon_max['SEASON'] | |
typhoon_max['Category'] = typhoon_max['USA_WIND'].apply(categorize_typhoon_enhanced) | |
return typhoon_max | |
def merge_data(oni_long, typhoon_max): | |
"""Merge ONI and typhoon data""" | |
return pd.merge(typhoon_max, oni_long, on=['Year','Month']) | |
# ----------------------------- | |
# ENHANCED: Categorization Functions | |
# ----------------------------- | |
def categorize_typhoon_enhanced(wind_speed): | |
"""Enhanced categorization that properly includes Tropical Depressions""" | |
if pd.isna(wind_speed): | |
return 'Unknown' | |
# Convert to knots if in m/s (some datasets use m/s) | |
if wind_speed < 10: # Likely in m/s, convert to knots | |
wind_speed = wind_speed * 1.94384 | |
# FIXED thresholds to include TD | |
if wind_speed < 34: # Below 34 knots = Tropical Depression | |
return 'Tropical Depression' | |
elif wind_speed < 64: # 34-63 knots = Tropical Storm | |
return 'Tropical Storm' | |
elif wind_speed < 83: # 64-82 knots = Category 1 Typhoon | |
return 'C1 Typhoon' | |
elif wind_speed < 96: # 83-95 knots = Category 2 Typhoon | |
return 'C2 Typhoon' | |
elif wind_speed < 113: # 96-112 knots = Category 3 Strong Typhoon | |
return 'C3 Strong Typhoon' | |
elif wind_speed < 137: # 113-136 knots = Category 4 Very Strong Typhoon | |
return 'C4 Very Strong Typhoon' | |
else: # 137+ knots = Category 5 Super Typhoon | |
return 'C5 Super Typhoon' | |
# Original function for backward compatibility | |
def categorize_typhoon(wind_speed): | |
"""Original categorize typhoon function for backward compatibility""" | |
return categorize_typhoon_enhanced(wind_speed) | |
def classify_enso_phases(oni_value): | |
"""Classify ENSO phases based on ONI value""" | |
if isinstance(oni_value, pd.Series): | |
oni_value = oni_value.iloc[0] | |
if pd.isna(oni_value): | |
return 'Neutral' | |
if oni_value >= 0.5: | |
return 'El Nino' | |
elif oni_value <= -0.5: | |
return 'La Nina' | |
else: | |
return 'Neutral' | |
# ----------------------------- | |
# NEW: ADVANCED ML FEATURES WITH ROUTE VISUALIZATION | |
# ----------------------------- | |
def extract_storm_features(typhoon_data): | |
"""Extract comprehensive features for clustering analysis""" | |
# Group by storm ID to get storm-level features | |
storm_features = typhoon_data.groupby('SID').agg({ | |
'USA_WIND': ['max', 'mean', 'std'], | |
'USA_PRES': ['min', 'mean', 'std'], | |
'LAT': ['mean', 'std', 'max', 'min'], | |
'LON': ['mean', 'std', 'max', 'min'], | |
'ISO_TIME': ['count'] # Track length | |
}).reset_index() | |
# Flatten column names | |
storm_features.columns = ['SID'] + ['_'.join(col).strip() for col in storm_features.columns[1:]] | |
# Add additional computed features | |
storm_features['lat_range'] = storm_features['LAT_max'] - storm_features['LAT_min'] | |
storm_features['lon_range'] = storm_features['LON_max'] - storm_features['LON_min'] | |
storm_features['track_length'] = storm_features['ISO_TIME_count'] | |
# Add genesis location features | |
genesis_data = typhoon_data.groupby('SID').first()[['LAT', 'LON', 'USA_WIND']] | |
genesis_data.columns = ['genesis_lat', 'genesis_lon', 'genesis_intensity'] | |
storm_features = storm_features.merge(genesis_data, on='SID', how='left') | |
# Add track shape features | |
track_stats = [] | |
for sid in storm_features['SID']: | |
storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME') | |
if len(storm_track) > 2: | |
# Calculate track curvature and direction changes | |
lats = storm_track['LAT'].values | |
lons = storm_track['LON'].values | |
# Calculate bearing changes | |
bearing_changes = [] | |
for i in range(1, len(lats)-1): | |
# Simple bearing calculation | |
dlat1 = lats[i] - lats[i-1] | |
dlon1 = lons[i] - lons[i-1] | |
dlat2 = lats[i+1] - lats[i] | |
dlon2 = lons[i+1] - lons[i] | |
angle1 = np.arctan2(dlat1, dlon1) | |
angle2 = np.arctan2(dlat2, dlon2) | |
change = abs(angle2 - angle1) | |
bearing_changes.append(change) | |
avg_curvature = np.mean(bearing_changes) if bearing_changes else 0 | |
total_distance = np.sum(np.sqrt((np.diff(lats)**2 + np.diff(lons)**2))) | |
track_stats.append({ | |
'SID': sid, | |
'avg_curvature': avg_curvature, | |
'total_distance': total_distance | |
}) | |
else: | |
track_stats.append({ | |
'SID': sid, | |
'avg_curvature': 0, | |
'total_distance': 0 | |
}) | |
track_stats_df = pd.DataFrame(track_stats) | |
storm_features = storm_features.merge(track_stats_df, on='SID', how='left') | |
return storm_features | |
def perform_dimensionality_reduction(storm_features, method='umap', n_components=2): | |
"""Perform UMAP or t-SNE dimensionality reduction""" | |
# Select numeric features for clustering | |
feature_cols = [col for col in storm_features.columns if col != 'SID' and storm_features[col].dtype in ['float64', 'int64']] | |
X = storm_features[feature_cols].fillna(0) | |
# Standardize features | |
scaler = StandardScaler() | |
X_scaled = scaler.fit_transform(X) | |
if method.lower() == 'umap' and UMAP_AVAILABLE: | |
# UMAP parameters optimized for typhoon data - fixed warnings | |
reducer = umap.UMAP( | |
n_components=n_components, | |
n_neighbors=15, | |
min_dist=0.1, | |
metric='euclidean', | |
random_state=42, | |
n_jobs=1 # Explicitly set to avoid warning | |
) | |
elif method.lower() == 'tsne': | |
# t-SNE parameters | |
reducer = TSNE( | |
n_components=n_components, | |
perplexity=min(30, len(X_scaled)//4), | |
learning_rate=200, | |
n_iter=1000, | |
random_state=42 | |
) | |
else: | |
# Fallback to PCA if UMAP not available | |
reducer = PCA(n_components=n_components, random_state=42) | |
# Fit and transform | |
embedding = reducer.fit_transform(X_scaled) | |
return embedding, feature_cols, scaler | |
def cluster_storms(embedding, method='dbscan', eps=0.5, min_samples=3): | |
"""Cluster storms based on their embedding""" | |
if method.lower() == 'dbscan': | |
clusterer = DBSCAN(eps=eps, min_samples=min_samples) | |
elif method.lower() == 'kmeans': | |
clusterer = KMeans(n_clusters=5, random_state=42) | |
else: | |
raise ValueError("Method must be 'dbscan' or 'kmeans'") | |
clusters = clusterer.fit_predict(embedding) | |
return clusters | |
def create_advanced_clustering_visualization(storm_features, typhoon_data, method='umap', show_routes=True): | |
"""Create comprehensive clustering visualization with route display""" | |
try: | |
# Validate inputs | |
if storm_features is None or storm_features.empty: | |
raise ValueError("No storm features available for clustering") | |
if typhoon_data is None or typhoon_data.empty: | |
raise ValueError("No typhoon data available for route visualization") | |
# Perform dimensionality reduction | |
embedding, feature_cols, scaler = perform_dimensionality_reduction(storm_features, method) | |
# Perform clustering | |
cluster_labels = cluster_storms(embedding, 'dbscan') | |
# Add clustering results to storm features | |
storm_features_viz = storm_features.copy() | |
storm_features_viz['cluster'] = cluster_labels | |
storm_features_viz['dim1'] = embedding[:, 0] | |
storm_features_viz['dim2'] = embedding[:, 1] | |
# Merge with typhoon data for additional info | |
storm_info = typhoon_data.groupby('SID').first()[['NAME', 'SEASON']].reset_index() | |
storm_features_viz = storm_features_viz.merge(storm_info, on='SID', how='left') | |
if show_routes: | |
# Create subplot with both scatter plot and route map | |
fig = make_subplots( | |
rows=1, cols=2, | |
subplot_titles=( | |
f'Storm Clustering using {method.upper()}', | |
'Clustered Storm Routes' | |
), | |
specs=[[{"type": "scatter"}, {"type": "geo"}]], | |
column_widths=[0.5, 0.5] | |
) | |
# Add clustering scatter plot | |
unique_clusters = sorted(storm_features_viz['cluster'].unique()) | |
for i, cluster in enumerate(unique_clusters): | |
cluster_data = storm_features_viz[storm_features_viz['cluster'] == cluster] | |
color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)] if cluster != -1 else '#CCCCCC' | |
cluster_name = f'Cluster {cluster}' if cluster != -1 else 'Noise' | |
fig.add_trace( | |
go.Scatter( | |
x=cluster_data['dim1'], | |
y=cluster_data['dim2'], | |
mode='markers', | |
marker=dict(color=color, size=8), | |
name=cluster_name, | |
hovertemplate=( | |
'<b>%{customdata[0]}</b><br>' | |
'Season: %{customdata[1]}<br>' | |
'Max Wind: %{customdata[2]:.0f} kt<br>' | |
'Min Pressure: %{customdata[3]:.0f} hPa<br>' | |
'Track Length: %{customdata[4]:.0f} points<br>' | |
'<extra></extra>' | |
), | |
customdata=np.column_stack(( | |
cluster_data['NAME'].fillna('UNNAMED'), | |
cluster_data['SEASON'].fillna(2000), | |
cluster_data['USA_WIND_max'].fillna(0), | |
cluster_data['USA_PRES_min'].fillna(1000), | |
cluster_data['track_length'].fillna(0) | |
)) | |
), | |
row=1, col=1 | |
) | |
# Add route map | |
for i, cluster in enumerate(unique_clusters): | |
if cluster == -1: # Skip noise for route visualization | |
continue | |
cluster_storm_ids = storm_features_viz[storm_features_viz['cluster'] == cluster]['SID'].tolist() | |
color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)] | |
for j, sid in enumerate(cluster_storm_ids[:10]): # Limit to 10 storms per cluster for performance | |
try: | |
storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME') | |
if len(storm_track) > 1: | |
storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED' | |
fig.add_trace( | |
go.Scattergeo( | |
lon=storm_track['LON'], | |
lat=storm_track['LAT'], | |
mode='lines+markers', | |
line=dict(color=color, width=2), | |
marker=dict(color=color, size=4), | |
name=f'C{cluster}: {storm_name}' if j == 0 else None, | |
showlegend=(j == 0), | |
hovertemplate=( | |
f'<b>{storm_name}</b><br>' | |
'Lat: %{lat:.1f}Β°<br>' | |
'Lon: %{lon:.1f}Β°<br>' | |
f'Cluster: {cluster}<br>' | |
'<extra></extra>' | |
) | |
), | |
row=1, col=2 | |
) | |
except Exception as track_error: | |
print(f"Error adding track for storm {sid}: {track_error}") | |
continue | |
# Update layout | |
fig.update_layout( | |
title_text="Advanced Storm Clustering Analysis with Route Visualization", | |
showlegend=True | |
) | |
# Update geo layout | |
fig.update_geos( | |
projection_type="natural earth", | |
showland=True, | |
landcolor="LightGray", | |
showocean=True, | |
oceancolor="LightBlue", | |
showcoastlines=True, | |
coastlinecolor="Gray", | |
center=dict(lat=20, lon=140), | |
row=1, col=2 | |
) | |
# Update scatter plot axes | |
fig.update_xaxes(title_text=f"{method.upper()} Dimension 1", row=1, col=1) | |
fig.update_yaxes(title_text=f"{method.upper()} Dimension 2", row=1, col=1) | |
else: | |
# Simple scatter plot only | |
fig = px.scatter( | |
storm_features_viz, | |
x='dim1', | |
y='dim2', | |
color='cluster', | |
hover_data=['NAME', 'SEASON', 'USA_WIND_max', 'USA_PRES_min'], | |
title=f'Storm Clustering using {method.upper()}', | |
labels={ | |
'dim1': f'{method.upper()} Dimension 1', | |
'dim2': f'{method.upper()} Dimension 2', | |
'cluster': 'Cluster' | |
} | |
) | |
# Generate detailed cluster statistics | |
try: | |
cluster_stats = storm_features_viz.groupby('cluster').agg({ | |
'USA_WIND_max': ['mean', 'std', 'min', 'max'], | |
'USA_PRES_min': ['mean', 'std', 'min', 'max'], | |
'track_length': ['mean', 'std'], | |
'genesis_lat': 'mean', | |
'genesis_lon': 'mean', | |
'total_distance': 'mean', | |
'avg_curvature': 'mean', | |
'SID': 'count' | |
}).round(2) | |
# Flatten column names for readability | |
cluster_stats.columns = ['_'.join(col).strip() for col in cluster_stats.columns] | |
stats_text = "ADVANCED CLUSTER ANALYSIS RESULTS\n" + "="*50 + "\n\n" | |
for cluster in sorted(storm_features_viz['cluster'].unique()): | |
if cluster == -1: | |
stats_text += f"NOISE POINTS: {cluster_stats.loc[-1, 'SID_count']} storms\n\n" | |
continue | |
cluster_row = cluster_stats.loc[cluster] | |
storm_count = int(cluster_row['SID_count']) | |
stats_text += f"CLUSTER {cluster}: {storm_count} storms\n" | |
stats_text += f" Intensity: {cluster_row['USA_WIND_max_mean']:.1f} Β± {cluster_row['USA_WIND_max_std']:.1f} kt\n" | |
stats_text += f" Pressure: {cluster_row['USA_PRES_min_mean']:.1f} Β± {cluster_row['USA_PRES_min_std']:.1f} hPa\n" | |
stats_text += f" Track Length: {cluster_row['track_length_mean']:.1f} Β± {cluster_row['track_length_std']:.1f} points\n" | |
stats_text += f" Genesis Region: {cluster_row['genesis_lat']:.1f}Β°N, {cluster_row['genesis_lon']:.1f}Β°E\n" | |
stats_text += f" Avg Distance: {cluster_row['total_distance_mean']:.2f} degrees\n" | |
stats_text += f" Avg Curvature: {cluster_row['avg_curvature_mean']:.3f} radians\n\n" | |
# Add feature importance summary | |
stats_text += "CLUSTERING FEATURES USED:\n" | |
stats_text += f" β’ Storm intensity (max/mean/std wind & pressure)\n" | |
stats_text += f" β’ Track characteristics (length, curvature, distance)\n" | |
stats_text += f" β’ Genesis location (lat/lon)\n" | |
stats_text += f" β’ Geographic range (lat/lon span)\n" | |
stats_text += f" β’ Total features: {len(feature_cols)}\n\n" | |
stats_text += f"ALGORITHM: {method.upper()} + DBSCAN clustering\n" | |
stats_text += f"CLUSTERS FOUND: {len([c for c in storm_features_viz['cluster'].unique() if c != -1])}\n" | |
except Exception as stats_error: | |
stats_text = f"Error generating cluster statistics: {str(stats_error)}" | |
return fig, stats_text, storm_features_viz | |
except Exception as e: | |
error_fig = go.Figure() | |
error_fig.add_annotation( | |
text=f"Error in clustering analysis: {str(e)}", | |
xref="paper", yref="paper", | |
x=0.5, y=0.5, xanchor='center', yanchor='middle', | |
showarrow=False, font_size=16 | |
) | |
return error_fig, f"Error in clustering: {str(e)}", None | |
# ----------------------------- | |
# NEW: Optional CNN Implementation | |
# ----------------------------- | |
def create_cnn_model(input_shape=(64, 64, 3)): | |
"""Create CNN model for typhoon intensity prediction from satellite images""" | |
if not CNN_AVAILABLE: | |
return None | |
try: | |
model = models.Sequential([ | |
# Convolutional layers | |
layers.Conv2D(32, (3, 3), activation='relu', input_shape=input_shape), | |
layers.MaxPooling2D((2, 2)), | |
layers.Conv2D(64, (3, 3), activation='relu'), | |
layers.MaxPooling2D((2, 2)), | |
layers.Conv2D(64, (3, 3), activation='relu'), | |
layers.MaxPooling2D((2, 2)), | |
# Dense layers | |
layers.Flatten(), | |
layers.Dense(64, activation='relu'), | |
layers.Dropout(0.5), | |
layers.Dense(32, activation='relu'), | |
# Output layer for intensity prediction | |
layers.Dense(1, activation='linear') # Regression for wind speed | |
]) | |
model.compile( | |
optimizer='adam', | |
loss='mean_squared_error', | |
metrics=['mae'] | |
) | |
return model | |
except Exception as e: | |
print(f"Error creating CNN model: {e}") | |
return None | |
def simulate_cnn_prediction(lat, lon, month, oni_value): | |
"""Simulate CNN prediction with robust error handling""" | |
try: | |
if not CNN_AVAILABLE: | |
# Provide a physics-based prediction when CNN is not available | |
return simulate_physics_based_prediction(lat, lon, month, oni_value) | |
# This would normally process satellite imagery | |
# For demo purposes, we'll use a simple heuristic | |
# Simulate environmental factors | |
sst_anomaly = oni_value * 0.5 # Simplified SST relationship | |
seasonal_factor = 1.2 if month in [7, 8, 9, 10] else 0.8 | |
latitude_factor = max(0.5, (30 - abs(lat)) / 30) if abs(lat) < 30 else 0.1 | |
# Simple intensity prediction | |
base_intensity = 40 | |
intensity = base_intensity + sst_anomaly * 10 + seasonal_factor * 20 + latitude_factor * 30 | |
intensity = max(0, min(180, intensity)) # Clamp to reasonable range | |
confidence = 0.75 + np.random.normal(0, 0.1) | |
confidence = max(0.5, min(0.95, confidence)) | |
return intensity, f"CNN Prediction: {intensity:.1f} kt (Confidence: {confidence:.1%})" | |
except Exception as e: | |
# Fallback to physics-based prediction | |
return simulate_physics_based_prediction(lat, lon, month, oni_value) | |
def simulate_physics_based_prediction(lat, lon, month, oni_value): | |
"""Physics-based intensity prediction as fallback""" | |
try: | |
# Simple climatological prediction based on known relationships | |
base_intensity = 45 | |
# ENSO effects | |
if oni_value > 0.5: # El NiΓ±o | |
intensity_modifier = -15 # Generally suppresses activity in WP | |
elif oni_value < -0.5: # La NiΓ±a | |
intensity_modifier = +20 # Generally enhances activity | |
else: | |
intensity_modifier = 0 | |
# Seasonal effects | |
if month in [8, 9, 10]: # Peak season | |
seasonal_modifier = 25 | |
elif month in [6, 7, 11]: # Active season | |
seasonal_modifier = 15 | |
else: # Quiet season | |
seasonal_modifier = -10 | |
# Latitude effects (closer to equator = less favorable) | |
if abs(lat) < 10: | |
lat_modifier = -20 # Too close to equator | |
elif 10 <= abs(lat) <= 25: | |
lat_modifier = 10 # Optimal range | |
else: | |
lat_modifier = -5 # Too far from equator | |
# Longitude effects for Western Pacific | |
if 120 <= lon <= 160: | |
lon_modifier = 10 # Favorable WP region | |
else: | |
lon_modifier = -5 | |
predicted_intensity = base_intensity + intensity_modifier + seasonal_modifier + lat_modifier + lon_modifier | |
predicted_intensity = max(25, min(180, predicted_intensity)) | |
confidence = 0.65 # Lower confidence for physics-based model | |
return predicted_intensity, f"Physics-based Prediction: {predicted_intensity:.1f} kt (Confidence: {confidence:.1%})" | |
except Exception as e: | |
return 50, f"Error in prediction: {str(e)}" | |
# ----------------------------- | |
# Regression Functions (Original) | |
# ----------------------------- | |
def perform_wind_regression(start_year, start_month, end_year, end_month): | |
"""Perform wind regression analysis""" | |
start_date = datetime(start_year, start_month, 1) | |
end_date = datetime(end_year, end_month, 28) | |
data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['USA_WIND','ONI']) | |
data['severe_typhoon'] = (data['USA_WIND']>=64).astype(int) | |
X = sm.add_constant(data['ONI']) | |
y = data['severe_typhoon'] | |
try: | |
model = sm.Logit(y, X).fit(disp=0) | |
beta_1 = model.params['ONI'] | |
exp_beta_1 = np.exp(beta_1) | |
p_value = model.pvalues['ONI'] | |
return f"Wind Regression: Ξ²1={beta_1:.4f}, Odds Ratio={exp_beta_1:.4f}, P-value={p_value:.4f}" | |
except Exception as e: | |
return f"Wind Regression Error: {e}" | |
def perform_pressure_regression(start_year, start_month, end_year, end_month): | |
"""Perform pressure regression analysis""" | |
start_date = datetime(start_year, start_month, 1) | |
end_date = datetime(end_year, end_month, 28) | |
data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['USA_PRES','ONI']) | |
data['intense_typhoon'] = (data['USA_PRES']<=950).astype(int) | |
X = sm.add_constant(data['ONI']) | |
y = data['intense_typhoon'] | |
try: | |
model = sm.Logit(y, X).fit(disp=0) | |
beta_1 = model.params['ONI'] | |
exp_beta_1 = np.exp(beta_1) | |
p_value = model.pvalues['ONI'] | |
return f"Pressure Regression: Ξ²1={beta_1:.4f}, Odds Ratio={exp_beta_1:.4f}, P-value={p_value:.4f}" | |
except Exception as e: | |
return f"Pressure Regression Error: {e}" | |
def perform_longitude_regression(start_year, start_month, end_year, end_month): | |
"""Perform longitude regression analysis""" | |
start_date = datetime(start_year, start_month, 1) | |
end_date = datetime(end_year, end_month, 28) | |
data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['LON','ONI']) | |
data['western_typhoon'] = (data['LON']<=140).astype(int) | |
X = sm.add_constant(data['ONI']) | |
y = data['western_typhoon'] | |
try: | |
model = sm.OLS(y, sm.add_constant(X)).fit() | |
beta_1 = model.params['ONI'] | |
exp_beta_1 = np.exp(beta_1) | |
p_value = model.pvalues['ONI'] | |
return f"Longitude Regression: Ξ²1={beta_1:.4f}, Odds Ratio={exp_beta_1:.4f}, P-value={p_value:.4f}" | |
except Exception as e: | |
return f"Longitude Regression Error: {e}" | |
# ----------------------------- | |
# Visualization Functions (Enhanced) | |
# ----------------------------- | |
def get_full_tracks(start_year, start_month, end_year, end_month, enso_phase, typhoon_search): | |
"""Get full typhoon tracks""" | |
start_date = datetime(start_year, start_month, 1) | |
end_date = datetime(end_year, end_month, 28) | |
filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy() | |
filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases) | |
if enso_phase != 'all': | |
filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()] | |
unique_storms = filtered_data['SID'].unique() | |
count = len(unique_storms) | |
fig = go.Figure() | |
for sid in unique_storms: | |
storm_data = typhoon_data[typhoon_data['SID']==sid] | |
if storm_data.empty: | |
continue | |
name = storm_data['NAME'].iloc[0] if pd.notnull(storm_data['NAME'].iloc[0]) else "Unnamed" | |
basin = storm_data['SID'].iloc[0][:2] | |
storm_oni = filtered_data[filtered_data['SID']==sid]['ONI'].iloc[0] | |
color = 'red' if storm_oni>=0.5 else ('blue' if storm_oni<=-0.5 else 'green') | |
fig.add_trace(go.Scattergeo( | |
lon=storm_data['LON'], lat=storm_data['LAT'], mode='lines', | |
name=f"{name} ({basin})", | |
line=dict(width=1.5, color=color), hoverinfo="name" | |
)) | |
if typhoon_search: | |
search_mask = typhoon_data['NAME'].str.contains(typhoon_search, case=False, na=False) | |
if search_mask.any(): | |
for sid in typhoon_data[search_mask]['SID'].unique(): | |
storm_data = typhoon_data[typhoon_data['SID']==sid] | |
fig.add_trace(go.Scattergeo( | |
lon=storm_data['LON'], lat=storm_data['LAT'], mode='lines+markers', | |
name=f"MATCHED: {storm_data['NAME'].iloc[0]}", | |
line=dict(width=3, color='yellow'), | |
marker=dict(size=5), hoverinfo="name" | |
)) | |
fig.update_layout( | |
title=f"Typhoon Tracks ({start_year}-{start_month} to {end_year}-{end_month})", | |
geo=dict( | |
projection_type='natural earth', | |
showland=True, | |
showcoastlines=True, | |
landcolor='rgb(243,243,243)', | |
countrycolor='rgb(204,204,204)', | |
coastlinecolor='rgb(204,204,204)', | |
center=dict(lon=140, lat=20), | |
projection_scale=3 | |
), | |
legend_title="Typhoons by ENSO Phase", | |
showlegend=True, | |
height=700 | |
) | |
fig.add_annotation( | |
x=0.02, y=0.98, xref="paper", yref="paper", | |
text="Red: El NiΓ±o, Blue: La Nina, Green: Neutral", | |
showarrow=False, align="left", | |
bgcolor="rgba(255,255,255,0.8)" | |
) | |
return fig, f"Total typhoons displayed: {count}" | |
def get_wind_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search): | |
"""Get wind analysis with enhanced categorization""" | |
start_date = datetime(start_year, start_month, 1) | |
end_date = datetime(end_year, end_month, 28) | |
filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy() | |
filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases) | |
if enso_phase != 'all': | |
filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()] | |
fig = px.scatter(filtered_data, x='ONI', y='USA_WIND', color='Category', | |
hover_data=['NAME','Year','Category'], | |
title='Wind Speed vs ONI', | |
labels={'ONI':'ONI Value','USA_WIND':'Max Wind Speed (knots)'}, | |
color_discrete_map=enhanced_color_map) | |
if typhoon_search: | |
mask = filtered_data['NAME'].str.contains(typhoon_search, case=False, na=False) | |
if mask.any(): | |
fig.add_trace(go.Scatter( | |
x=filtered_data.loc[mask,'ONI'], y=filtered_data.loc[mask,'USA_WIND'], | |
mode='markers', marker=dict(size=10, color='red', symbol='star'), | |
name=f'Matched: {typhoon_search}', | |
text=filtered_data.loc[mask,'NAME']+' ('+filtered_data.loc[mask,'Year'].astype(str)+')' | |
)) | |
regression = perform_wind_regression(start_year, start_month, end_year, end_month) | |
return fig, regression | |
def get_pressure_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search): | |
"""Get pressure analysis with enhanced categorization""" | |
start_date = datetime(start_year, start_month, 1) | |
end_date = datetime(end_year, end_month, 28) | |
filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy() | |
filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases) | |
if enso_phase != 'all': | |
filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()] | |
fig = px.scatter(filtered_data, x='ONI', y='USA_PRES', color='Category', | |
hover_data=['NAME','Year','Category'], | |
title='Pressure vs ONI', | |
labels={'ONI':'ONI Value','USA_PRES':'Min Pressure (hPa)'}, | |
color_discrete_map=enhanced_color_map) | |
if typhoon_search: | |
mask = filtered_data['NAME'].str.contains(typhoon_search, case=False, na=False) | |
if mask.any(): | |
fig.add_trace(go.Scatter( | |
x=filtered_data.loc[mask,'ONI'], y=filtered_data.loc[mask,'USA_PRES'], | |
mode='markers', marker=dict(size=10, color='red', symbol='star'), | |
name=f'Matched: {typhoon_search}', | |
text=filtered_data.loc[mask,'NAME']+' ('+filtered_data.loc[mask,'Year'].astype(str)+')' | |
)) | |
regression = perform_pressure_regression(start_year, start_month, end_year, end_month) | |
return fig, regression | |
def get_longitude_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search): | |
"""Get longitude analysis""" | |
start_date = datetime(start_year, start_month, 1) | |
end_date = datetime(end_year, end_month, 28) | |
filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy() | |
filtered_data['ENSO_Phase'] = filtered_data['ONI'].apply(classify_enso_phases) | |
if enso_phase != 'all': | |
filtered_data = filtered_data[filtered_data['ENSO_Phase'] == enso_phase.capitalize()] | |
fig = px.scatter(filtered_data, x='LON', y='ONI', hover_data=['NAME'], | |
title='Typhoon Generation Longitude vs ONI (All Years)') | |
if len(filtered_data) > 1: | |
X = np.array(filtered_data['LON']).reshape(-1,1) | |
y = filtered_data['ONI'] | |
try: | |
model = sm.OLS(y, sm.add_constant(X)).fit() | |
y_pred = model.predict(sm.add_constant(X)) | |
fig.add_trace(go.Scatter(x=filtered_data['LON'], y=y_pred, mode='lines', name='Regression Line')) | |
slope = model.params[1] | |
slopes_text = f"All Years Slope: {slope:.4f}" | |
except Exception as e: | |
slopes_text = f"Regression Error: {e}" | |
else: | |
slopes_text = "Insufficient data for regression" | |
regression = perform_longitude_regression(start_year, start_month, end_year, end_month) | |
return fig, slopes_text, regression | |
def categorize_typhoon_by_standard(wind_speed, standard='atlantic'): | |
"""Categorize typhoon by standard with enhanced TD support - FIXED for matplotlib""" | |
if pd.isna(wind_speed): | |
return 'Tropical Depression', '#808080' | |
if standard=='taiwan': | |
wind_speed_ms = wind_speed * 0.514444 | |
if wind_speed_ms >= 51.0: | |
return 'Strong Typhoon', '#FF0000' # Red | |
elif wind_speed_ms >= 33.7: | |
return 'Medium Typhoon', '#FFA500' # Orange | |
elif wind_speed_ms >= 17.2: | |
return 'Mild Typhoon', '#FFFF00' # Yellow | |
return 'Tropical Depression', '#808080' # Gray | |
else: | |
if wind_speed >= 137: | |
return 'C5 Super Typhoon', '#FF0000' # Red | |
elif wind_speed >= 113: | |
return 'C4 Very Strong Typhoon', '#FFA500' # Orange | |
elif wind_speed >= 96: | |
return 'C3 Strong Typhoon', '#FFFF00' # Yellow | |
elif wind_speed >= 83: | |
return 'C2 Typhoon', '#00FF00' # Green | |
elif wind_speed >= 64: | |
return 'C1 Typhoon', '#00FFFF' # Cyan | |
elif wind_speed >= 34: | |
return 'Tropical Storm', '#0000FF' # Blue | |
return 'Tropical Depression', '#808080' # Gray | |
# ----------------------------- | |
# ENHANCED: Animation Functions | |
# ----------------------------- | |
def get_available_years(typhoon_data): | |
"""Get all available years including 2025 - with error handling""" | |
try: | |
if typhoon_data is None or typhoon_data.empty: | |
return [str(year) for year in range(2000, 2026)] | |
if 'ISO_TIME' in typhoon_data.columns: | |
years = typhoon_data['ISO_TIME'].dt.year.dropna().unique() | |
elif 'SEASON' in typhoon_data.columns: | |
years = typhoon_data['SEASON'].dropna().unique() | |
else: | |
years = range(2000, 2026) # Default range including 2025 | |
# Convert to strings and sort | |
year_strings = sorted([str(int(year)) for year in years if not pd.isna(year)]) | |
# Ensure we have at least some years | |
if not year_strings: | |
return [str(year) for year in range(2000, 2026)] | |
return year_strings | |
except Exception as e: | |
print(f"Error in get_available_years: {e}") | |
return [str(year) for year in range(2000, 2026)] | |
def update_typhoon_options_enhanced(year, basin): | |
"""Enhanced typhoon options with TD support and 2025 data""" | |
try: | |
year = int(year) | |
# Filter by year - handle both ISO_TIME and SEASON columns | |
if 'ISO_TIME' in typhoon_data.columns: | |
year_mask = typhoon_data['ISO_TIME'].dt.year == year | |
elif 'SEASON' in typhoon_data.columns: | |
year_mask = typhoon_data['SEASON'] == year | |
else: | |
# Fallback - try to extract year from SID or other fields | |
year_mask = typhoon_data.index >= 0 # Include all data as fallback | |
year_data = typhoon_data[year_mask].copy() | |
# Filter by basin if specified | |
if basin != "All Basins": | |
basin_code = basin.split(' - ')[0] if ' - ' in basin else basin[:2] | |
if 'SID' in year_data.columns: | |
year_data = year_data[year_data['SID'].str.startswith(basin_code, na=False)] | |
elif 'BASIN' in year_data.columns: | |
year_data = year_data[year_data['BASIN'] == basin_code] | |
if year_data.empty: | |
return gr.update(choices=["No storms found"], value=None) | |
# Get unique storms - include ALL intensities (including TD) | |
storms = year_data.groupby('SID').agg({ | |
'NAME': 'first', | |
'USA_WIND': 'max' | |
}).reset_index() | |
# Enhanced categorization including TD | |
storms['category'] = storms['USA_WIND'].apply(categorize_typhoon_enhanced) | |
# Create options with category information | |
options = [] | |
for _, storm in storms.iterrows(): | |
name = storm['NAME'] if pd.notna(storm['NAME']) and storm['NAME'] != '' else 'UNNAMED' | |
sid = storm['SID'] | |
category = storm['category'] | |
max_wind = storm['USA_WIND'] if pd.notna(storm['USA_WIND']) else 0 | |
option = f"{name} ({sid}) - {category} ({max_wind:.0f}kt)" | |
options.append(option) | |
if not options: | |
return gr.update(choices=["No storms found"], value=None) | |
return gr.update(choices=sorted(options), value=options[0]) | |
except Exception as e: | |
print(f"Error in update_typhoon_options_enhanced: {e}") | |
return gr.update(choices=["Error loading storms"], value=None) | |
def generate_enhanced_track_video(year, typhoon_selection, standard): | |
"""Enhanced track video generation with TD support and 2025 compatibility - FIXED color handling""" | |
if not typhoon_selection or typhoon_selection == "No storms found": | |
return None | |
try: | |
# Extract SID from selection | |
sid = typhoon_selection.split('(')[1].split(')')[0] | |
# Get storm data | |
storm_df = typhoon_data[typhoon_data['SID'] == sid].copy() | |
if storm_df.empty: | |
print(f"No data found for storm {sid}") | |
return None | |
# Sort by time | |
if 'ISO_TIME' in storm_df.columns: | |
storm_df = storm_df.sort_values('ISO_TIME') | |
# Extract data for animation | |
lats = storm_df['LAT'].astype(float).values | |
lons = storm_df['LON'].astype(float).values | |
if 'USA_WIND' in storm_df.columns: | |
winds = pd.to_numeric(storm_df['USA_WIND'], errors='coerce').fillna(0).values | |
else: | |
winds = np.full(len(lats), 30) # Default TD strength | |
# Enhanced metadata | |
storm_name = storm_df['NAME'].iloc[0] if pd.notna(storm_df['NAME'].iloc[0]) else "UNNAMED" | |
season = storm_df['SEASON'].iloc[0] if 'SEASON' in storm_df.columns else year | |
print(f"Generating video for {storm_name} ({sid}) with {len(lats)} track points") | |
# Create figure with enhanced map | |
fig, ax = plt.subplots(figsize=(14, 8), subplot_kw={'projection': ccrs.PlateCarree()}) | |
# Enhanced map features | |
ax.stock_img() | |
ax.add_feature(cfeature.COASTLINE, linewidth=0.8) | |
ax.add_feature(cfeature.BORDERS, linewidth=0.5) | |
ax.add_feature(cfeature.OCEAN, color='lightblue', alpha=0.5) | |
ax.add_feature(cfeature.LAND, color='lightgray', alpha=0.5) | |
# Set extent based on track | |
padding = 5 | |
ax.set_extent([ | |
min(lons) - padding, max(lons) + padding, | |
min(lats) - padding, max(lats) + padding | |
]) | |
# Add gridlines | |
gl = ax.gridlines(draw_labels=True, alpha=0.3) | |
gl.top_labels = gl.right_labels = False | |
# Title with enhanced info | |
ax.set_title(f"{season} {storm_name} ({sid}) Track Animation", fontsize=16, fontweight='bold') | |
# Animation elements | |
line, = ax.plot([], [], 'b-', linewidth=3, alpha=0.7, label='Track') | |
point, = ax.plot([], [], 'o', markersize=12) | |
# Enhanced info display | |
info_box = ax.text(0.02, 0.98, '', transform=ax.transAxes, | |
fontsize=11, verticalalignment='top', | |
bbox=dict(boxstyle="round,pad=0.5", facecolor='white', alpha=0.9)) | |
# Color legend with TD support - FIXED | |
legend_elements = [] | |
for category in ['Tropical Depression', 'Tropical Storm', 'C1 Typhoon', 'C2 Typhoon', | |
'C3 Strong Typhoon', 'C4 Very Strong Typhoon', 'C5 Super Typhoon']: | |
if category in matplotlib_color_map: | |
color = get_matplotlib_color(category) | |
legend_elements.append(plt.Line2D([0], [0], marker='o', color='w', | |
markerfacecolor=color, markersize=8, label=category)) | |
ax.legend(handles=legend_elements, loc='upper right', fontsize=9) | |
def animate(frame): | |
try: | |
if frame >= len(lats): | |
return line, point, info_box | |
# Update track line | |
line.set_data(lons[:frame+1], lats[:frame+1]) | |
# Update current position | |
current_wind = winds[frame] | |
category = categorize_typhoon_enhanced(current_wind) | |
color = get_matplotlib_color(category) # FIXED: Use matplotlib-compatible color | |
# Debug print for first few frames | |
if frame < 3: | |
print(f"Frame {frame}: Wind={current_wind:.1f}kt, Category={category}, Color={color}") | |
point.set_data([lons[frame]], [lats[frame]]) | |
point.set_color(color) | |
point.set_markersize(8 + current_wind/10) # Size based on intensity | |
# Enhanced info display | |
if 'ISO_TIME' in storm_df.columns and frame < len(storm_df): | |
current_time = storm_df.iloc[frame]['ISO_TIME'] | |
time_str = current_time.strftime('%Y-%m-%d %H:%M UTC') if pd.notna(current_time) else 'Unknown' | |
else: | |
time_str = f"Step {frame+1}" | |
info_text = ( | |
f"Storm: {storm_name}\n" | |
f"Time: {time_str}\n" | |
f"Position: {lats[frame]:.1f}Β°N, {lons[frame]:.1f}Β°E\n" | |
f"Max Wind: {current_wind:.0f} kt\n" | |
f"Category: {category}\n" | |
f"Frame: {frame+1}/{len(lats)}" | |
) | |
info_box.set_text(info_text) | |
return line, point, info_box | |
except Exception as e: | |
print(f"Error in animate frame {frame}: {e}") | |
return line, point, info_box | |
# Create animation | |
anim = animation.FuncAnimation( | |
fig, animate, frames=len(lats), | |
interval=300, blit=False, repeat=True | |
) | |
# Save animation | |
temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4', | |
dir=tempfile.gettempdir()) | |
# Enhanced writer settings | |
writer = animation.FFMpegWriter( | |
fps=4, bitrate=2000, codec='libx264', | |
extra_args=['-pix_fmt', 'yuv420p'] # Better compatibility | |
) | |
print(f"Saving animation to {temp_file.name}") | |
anim.save(temp_file.name, writer=writer, dpi=100) | |
plt.close(fig) | |
print(f"Video generated successfully: {temp_file.name}") | |
return temp_file.name | |
except Exception as e: | |
print(f"Error generating video: {e}") | |
import traceback | |
traceback.print_exc() | |
return None | |
# Simplified wrapper for backward compatibility - FIXED | |
def simplified_track_video(year, basin, typhoon, standard): | |
"""Simplified track video function with fixed color handling""" | |
if not typhoon: | |
return None | |
return generate_enhanced_track_video(year, typhoon, standard) | |
# ----------------------------- | |
# Load & Process Data | |
# ----------------------------- | |
# Global variables initialization | |
oni_data = None | |
typhoon_data = None | |
merged_data = None | |
def initialize_data(): | |
"""Initialize all data safely""" | |
global oni_data, typhoon_data, merged_data | |
try: | |
logging.info("Starting data loading process...") | |
update_oni_data() | |
oni_data, typhoon_data = load_data_fixed(ONI_DATA_PATH, TYPHOON_DATA_PATH) | |
if oni_data is not None and typhoon_data is not None: | |
oni_long = process_oni_data(oni_data) | |
typhoon_max = process_typhoon_data(typhoon_data) | |
merged_data = merge_data(oni_long, typhoon_max) | |
logging.info("Data loading complete.") | |
else: | |
logging.error("Failed to load required data") | |
# Create minimal fallback data | |
oni_data = pd.DataFrame({'Year': [2000], 'Jan': [0], 'Feb': [0], 'Mar': [0], 'Apr': [0], | |
'May': [0], 'Jun': [0], 'Jul': [0], 'Aug': [0], 'Sep': [0], | |
'Oct': [0], 'Nov': [0], 'Dec': [0]}) | |
typhoon_data = create_fallback_typhoon_data() | |
oni_long = process_oni_data(oni_data) | |
typhoon_max = process_typhoon_data(typhoon_data) | |
merged_data = merge_data(oni_long, typhoon_max) | |
except Exception as e: | |
logging.error(f"Error during data initialization: {e}") | |
# Create minimal fallback data | |
oni_data = pd.DataFrame({'Year': [2000], 'Jan': [0], 'Feb': [0], 'Mar': [0], 'Apr': [0], | |
'May': [0], 'Jun': [0], 'Jul': [0], 'Aug': [0], 'Sep': [0], | |
'Oct': [0], 'Nov': [0], 'Dec': [0]}) | |
typhoon_data = create_fallback_typhoon_data() | |
oni_long = process_oni_data(oni_data) | |
typhoon_max = process_typhoon_data(typhoon_data) | |
merged_data = merge_data(oni_long, typhoon_max) | |
# Initialize data | |
initialize_data() | |
# ----------------------------- | |
# ENHANCED: Gradio Interface | |
# ----------------------------- | |
def create_interface(): | |
"""Create the enhanced Gradio interface with robust error handling""" | |
try: | |
# Ensure data is available | |
if oni_data is None or typhoon_data is None or merged_data is None: | |
logging.warning("Data not properly loaded, creating minimal interface") | |
return create_minimal_fallback_interface() | |
# Get safe data statistics | |
try: | |
total_storms = len(typhoon_data['SID'].unique()) if 'SID' in typhoon_data.columns else 0 | |
total_records = len(typhoon_data) | |
available_years = get_available_years(typhoon_data) | |
year_range_display = f"{available_years[0]} - {available_years[-1]}" if available_years else "Unknown" | |
except Exception as e: | |
logging.error(f"Error getting data statistics: {e}") | |
total_storms = 0 | |
total_records = 0 | |
year_range_display = "Unknown" | |
available_years = [str(year) for year in range(2000, 2026)] | |
with gr.Blocks(title="Enhanced Typhoon Analysis Platform", theme=gr.themes.Soft()) as demo: | |
gr.Markdown("# Enhanced Typhoon Analysis Platform") | |
gr.Markdown("Advanced ML clustering, CNN predictions, and comprehensive tropical cyclone analysis including Tropical Depressions") | |
with gr.Tab("Overview"): | |
gr.Markdown(f""" | |
## Welcome to the Enhanced Typhoon Analysis Dashboard | |
This dashboard provides comprehensive analysis of typhoon data in relation to ENSO phases with advanced machine learning capabilities. | |
### Enhanced Features: | |
- **Advanced ML Clustering**: UMAP/t-SNE storm pattern analysis with route visualization | |
- **Optional CNN Predictions**: Deep learning intensity forecasting | |
- **Complete TD Support**: Now includes Tropical Depressions (< 34 kt) | |
- **2025 Data Ready**: Real-time compatibility with current year data | |
- **Enhanced Animations**: High-quality storm track visualizations | |
### Data Status: | |
- **ONI Data**: {len(oni_data)} years loaded | |
- **Typhoon Data**: {total_records} records loaded | |
- **Merged Data**: {len(merged_data)} typhoons with ONI values | |
- **Available Years**: {year_range_display} | |
### Technical Capabilities: | |
- **UMAP Clustering**: {"Available" if UMAP_AVAILABLE else "Limited to t-SNE/PCA"} | |
- **AI Predictions**: {"Deep Learning" if CNN_AVAILABLE else "Physics-based"} | |
- **Enhanced Categorization**: Tropical Depression to Super Typhoon | |
- **Platform Compatibility**: Optimized for Hugging Face Spaces | |
""") | |
with gr.Tab("Advanced ML Clustering with Routes"): | |
gr.Markdown("## Storm Pattern Analysis using UMAP/t-SNE with Route Visualization") | |
gr.Markdown("**This tab shows both the dimensional clustering analysis AND the actual storm tracks colored by cluster**") | |
with gr.Row(): | |
reduction_method = gr.Dropdown( | |
choices=['UMAP', 't-SNE', 'PCA'], | |
value='UMAP' if UMAP_AVAILABLE else 't-SNE', | |
label="Dimensionality Reduction Method" | |
) | |
show_routes = gr.Checkbox( | |
label="Show Storm Routes on Map", | |
value=True, | |
info="Display actual storm tracks colored by cluster" | |
) | |
analyze_clusters_btn = gr.Button("Analyze Storm Clusters & Routes", variant="primary") | |
with gr.Row(): | |
cluster_plot = gr.Plot(label="Storm Clustering with Route Visualization") | |
with gr.Row(): | |
cluster_stats = gr.Textbox(label="Detailed Cluster Statistics", lines=15, max_lines=20) | |
def run_advanced_clustering_analysis(method, show_routes): | |
try: | |
# Extract features for clustering | |
storm_features = extract_storm_features(typhoon_data) | |
fig, stats, _ = create_advanced_clustering_visualization(storm_features, typhoon_data, method.lower(), show_routes) | |
return fig, stats | |
except Exception as e: | |
import traceback | |
error_details = traceback.format_exc() | |
return None, f"Error: {str(e)}\n\nDetails:\n{error_details}" | |
analyze_clusters_btn.click( | |
fn=run_advanced_clustering_analysis, | |
inputs=[reduction_method, show_routes], | |
outputs=[cluster_plot, cluster_stats] | |
) | |
gr.Markdown(""" | |
### Advanced Clustering Features: | |
- **Multi-dimensional Analysis**: Uses 15+ storm characteristics including intensity, track shape, genesis location | |
- **Route Visualization**: Shows actual storm tracks colored by cluster membership | |
- **DBSCAN Clustering**: Automatically finds natural groupings without predefined cluster count | |
- **Comprehensive Stats**: Detailed cluster analysis including intensity, pressure, track length, curvature | |
- **Interactive**: Hover over points to see storm details, zoom and pan the route map | |
### How to Interpret: | |
- **Left Plot**: Each dot is a storm positioned by similarity (close = similar characteristics) | |
- **Right Plot**: Actual geographic storm tracks, colored by which cluster they belong to | |
- **Cluster Colors**: Each cluster gets a unique color to identify similar storm patterns | |
- **Noise Points**: Gray points represent storms that don't fit clear patterns | |
""") | |
with gr.Tab("π€ Intensity Prediction"): | |
gr.Markdown("## AI-Powered Storm Intensity Forecasting") | |
if CNN_AVAILABLE: | |
gr.Markdown("**Deep Learning models available** - TensorFlow loaded successfully") | |
method_description = "Using Convolutional Neural Networks for advanced intensity prediction" | |
else: | |
gr.Markdown("**Physics-based models available** - Using climatological relationships") | |
gr.Markdown("*Install TensorFlow for deep learning features: pip install tensorflow-cpu*") | |
method_description = "Using established meteorological relationships and climatology" | |
gr.Markdown(f"**Current Method**: {method_description}") | |
with gr.Row(): | |
cnn_lat = gr.Number(label="Latitude", value=20.0, info="Storm center latitude (-90 to 90)") | |
cnn_lon = gr.Number(label="Longitude", value=140.0, info="Storm center longitude (-180 to 180)") | |
cnn_month = gr.Slider(1, 12, label="Month", value=9, info="Month of year (1=Jan, 12=Dec)") | |
cnn_oni = gr.Number(label="ONI Value", value=0.0, info="Current ENSO index (-3 to 3)") | |
predict_btn = gr.Button("Predict Storm Intensity", variant="primary") | |
with gr.Row(): | |
intensity_output = gr.Number(label="Predicted Max Wind (kt)") | |
confidence_output = gr.Textbox(label="Model Output & Confidence") | |
predict_btn.click( | |
fn=simulate_cnn_prediction, | |
inputs=[cnn_lat, cnn_lon, cnn_month, cnn_oni], | |
outputs=[intensity_output, confidence_output] | |
) | |
gr.Markdown(""" | |
gr.Markdown(""" | |
### π§ Prediction Features: | |
- **Environmental Analysis**: Considers ENSO, latitude, seasonality | |
- **Real-time Capable**: Predictions in milliseconds | |
- **Confidence Scoring**: Uncertainty quantification included | |
- **Robust Fallbacks**: Works with or without deep learning libraries | |
### π Interpretation Guide: | |
- **25-33 kt**: Tropical Depression (TD) | |
- **34-63 kt**: Tropical Storm (TS) | |
- **64+ kt**: Typhoon categories (C1-C5) | |
- **100+ kt**: Major typhoon (C3+) | |
""") | |
with gr.Tab("π Track Visualization"): | |
with gr.Row(): | |
start_year = gr.Number(label="Start Year", value=2020) | |
start_month = gr.Dropdown(label="Start Month", choices=list(range(1, 13)), value=1) | |
end_year = gr.Number(label="End Year", value=2025) | |
end_month = gr.Dropdown(label="End Month", choices=list(range(1, 13)), value=6) | |
enso_phase = gr.Dropdown(label="ENSO Phase", choices=['all', 'El Nino', 'La Nina', 'Neutral'], value='all') | |
typhoon_search = gr.Textbox(label="Typhoon Search") | |
analyze_btn = gr.Button("Generate Tracks") | |
tracks_plot = gr.Plot() | |
typhoon_count = gr.Textbox(label="Number of Typhoons Displayed") | |
analyze_btn.click( | |
fn=get_full_tracks, | |
inputs=[start_year, start_month, end_year, end_month, enso_phase, typhoon_search], | |
outputs=[tracks_plot, typhoon_count] | |
) | |
with gr.Tab("π¨ Wind Analysis"): | |
with gr.Row(): | |
wind_start_year = gr.Number(label="Start Year", value=2020) | |
wind_start_month = gr.Dropdown(label="Start Month", choices=list(range(1, 13)), value=1) | |
wind_end_year = gr.Number(label="End Year", value=2024) | |
wind_end_month = gr.Dropdown(label="End Month", choices=list(range(1, 13)), value=6) | |
wind_enso_phase = gr.Dropdown(label="ENSO Phase", choices=['all', 'El Nino', 'La Nina', 'Neutral'], value='all') | |
wind_typhoon_search = gr.Textbox(label="Typhoon Search") | |
wind_analyze_btn = gr.Button("Generate Wind Analysis") | |
wind_scatter = gr.Plot() | |
wind_regression_results = gr.Textbox(label="Wind Regression Results") | |
wind_analyze_btn.click( | |
fn=get_wind_analysis, | |
inputs=[wind_start_year, wind_start_month, wind_end_year, wind_end_month, wind_enso_phase, wind_typhoon_search], | |
outputs=[wind_scatter, wind_regression_results] | |
) | |
with gr.Tab("π Pressure Analysis"): | |
with gr.Row(): | |
pressure_start_year = gr.Number(label="Start Year", value=2020) | |
pressure_start_month = gr.Dropdown(label="Start Month", choices=list(range(1, 13)), value=1) | |
pressure_end_year = gr.Number(label="End Year", value=2024) | |
pressure_end_month = gr.Dropdown(label="End Month", choices=list(range(1, 13)), value=6) | |
pressure_enso_phase = gr.Dropdown(label="ENSO Phase", choices=['all', 'El Nino', 'La Nina', 'Neutral'], value='all') | |
pressure_typhoon_search = gr.Textbox(label="Typhoon Search") | |
pressure_analyze_btn = gr.Button("Generate Pressure Analysis") | |
pressure_scatter = gr.Plot() | |
pressure_regression_results = gr.Textbox(label="Pressure Regression Results") | |
pressure_analyze_btn.click( | |
fn=get_pressure_analysis, | |
inputs=[pressure_start_year, pressure_start_month, pressure_end_year, pressure_end_month, pressure_enso_phase, pressure_typhoon_search], | |
outputs=[pressure_scatter, pressure_regression_results] | |
) | |
with gr.Tab("π Longitude Analysis"): | |
with gr.Row(): | |
lon_start_year = gr.Number(label="Start Year", value=2020) | |
lon_start_month = gr.Dropdown(label="Start Month", choices=list(range(1, 13)), value=1) | |
lon_end_year = gr.Number(label="End Year", value=2020) | |
lon_end_month = gr.Dropdown(label="End Month", choices=list(range(1, 13)), value=6) | |
lon_enso_phase = gr.Dropdown(label="ENSO Phase", choices=['all', 'El Nino', 'La Nina', 'Neutral'], value='all') | |
lon_typhoon_search = gr.Textbox(label="Typhoon Search (Optional)") | |
lon_analyze_btn = gr.Button("Generate Longitude Analysis") | |
regression_plot = gr.Plot() | |
slopes_text = gr.Textbox(label="Regression Slopes") | |
lon_regression_results = gr.Textbox(label="Longitude Regression Results") | |
lon_analyze_btn.click( | |
fn=get_longitude_analysis, | |
inputs=[lon_start_year, lon_start_month, lon_end_year, lon_end_month, lon_enso_phase, lon_typhoon_search], | |
outputs=[regression_plot, slopes_text, lon_regression_results] | |
) | |
with gr.Tab("π¬ Enhanced Track Animation"): | |
gr.Markdown("## High-Quality Storm Track Visualization (All Categories Including TD)") | |
with gr.Row(): | |
year_dropdown = gr.Dropdown( | |
label="Year", | |
choices=available_years, | |
value=available_years[-1] if available_years else "2024" | |
) | |
basin_dropdown = gr.Dropdown( | |
label="Basin", | |
choices=["All Basins", "WP - Western Pacific", "EP - Eastern Pacific", "NA - North Atlantic"], | |
value="All Basins" | |
) | |
with gr.Row(): | |
typhoon_dropdown = gr.Dropdown(label="Storm Selection (All Categories Including TD)") | |
standard_dropdown = gr.Dropdown( | |
label="Classification Standard", | |
choices=['atlantic', 'taiwan'], | |
value='atlantic' | |
) | |
generate_video_btn = gr.Button("Generate Enhanced Animation", variant="primary") | |
video_output = gr.Video(label="Storm Track Animation") | |
# Update storm options when year or basin changes | |
for input_comp in [year_dropdown, basin_dropdown]: | |
input_comp.change( | |
fn=update_typhoon_options_enhanced, | |
inputs=[year_dropdown, basin_dropdown], | |
outputs=[typhoon_dropdown] | |
) | |
# Generate video | |
generate_video_btn.click( | |
fn=generate_enhanced_track_video, | |
inputs=[year_dropdown, typhoon_dropdown, standard_dropdown], | |
outputs=[video_output] | |
) | |
gr.Markdown(""" | |
### Enhanced Animation Features: | |
- **Full TD Support**: Now displays Tropical Depressions (< 34 kt) in gray | |
- **2025 Compatibility**: Complete support for current year data | |
- **Enhanced Maps**: Better cartographic projections with terrain features | |
- **Smart Scaling**: Storm symbols scale dynamically with intensity | |
- **Real-time Info**: Live position, time, and meteorological data display | |
- **Professional Styling**: Publication-quality animations with proper legends | |
- **Optimized Export**: Fast rendering with web-compatible video formats | |
""") | |
with gr.Tab("Data Statistics & Insights"): | |
gr.Markdown("## Comprehensive Dataset Analysis") | |
# Create enhanced data summary | |
try: | |
if len(typhoon_data) > 0: | |
# Storm category distribution | |
storm_cats = typhoon_data.groupby('SID')['USA_WIND'].max().apply(categorize_typhoon_enhanced) | |
cat_counts = storm_cats.value_counts() | |
# Create distribution chart with enhanced colors | |
fig_dist = px.bar( | |
x=cat_counts.index, | |
y=cat_counts.values, | |
title="Storm Intensity Distribution (Including Tropical Depressions)", | |
labels={'x': 'Category', 'y': 'Number of Storms'}, | |
color=cat_counts.index, | |
color_discrete_map=enhanced_color_map | |
) | |
# Seasonal distribution | |
if 'ISO_TIME' in typhoon_data.columns: | |
seasonal_data = typhoon_data.copy() | |
seasonal_data['Month'] = seasonal_data['ISO_TIME'].dt.month | |
monthly_counts = seasonal_data.groupby(['Month', 'SID']).size().groupby('Month').size() | |
fig_seasonal = px.bar( | |
x=monthly_counts.index, | |
y=monthly_counts.values, | |
title="Seasonal Storm Distribution", | |
labels={'x': 'Month', 'y': 'Number of Storms'}, | |
color=monthly_counts.values, | |
color_continuous_scale='Viridis' | |
) | |
else: | |
fig_seasonal = None | |
# Basin distribution | |
if 'SID' in typhoon_data.columns: | |
basin_data = typhoon_data['SID'].str[:2].value_counts() | |
fig_basin = px.pie( | |
values=basin_data.values, | |
names=basin_data.index, | |
title="Distribution by Basin" | |
) | |
else: | |
fig_basin = None | |
with gr.Row(): | |
gr.Plot(value=fig_dist) | |
if fig_seasonal: | |
with gr.Row(): | |
gr.Plot(value=fig_seasonal) | |
if fig_basin: | |
with gr.Row(): | |
gr.Plot(value=fig_basin) | |
except Exception as e: | |
gr.Markdown(f"Visualization error: {str(e)}") | |
# Enhanced statistics | |
total_storms = len(typhoon_data['SID'].unique()) if 'SID' in typhoon_data.columns else 0 | |
total_records = len(typhoon_data) | |
if 'SEASON' in typhoon_data.columns: | |
try: | |
min_year = int(typhoon_data['SEASON'].min()) | |
max_year = int(typhoon_data['SEASON'].max()) | |
year_range = f"{min_year}-{max_year}" | |
years_covered = typhoon_data['SEASON'].nunique() | |
except (ValueError, TypeError): | |
year_range = "Unknown" | |
years_covered = 0 | |
else: | |
year_range = "Unknown" | |
years_covered = 0 | |
if 'SID' in typhoon_data.columns: | |
try: | |
basins_available = ', '.join(sorted(typhoon_data['SID'].str[:2].unique())) | |
avg_storms_per_year = total_storms / max(years_covered, 1) | |
except Exception: | |
basins_available = "Unknown" | |
avg_storms_per_year = 0 | |
else: | |
basins_available = "Unknown" | |
avg_storms_per_year = 0 | |
# TD specific statistics | |
try: | |
if 'USA_WIND' in typhoon_data.columns: | |
td_storms = len(typhoon_data[typhoon_data['USA_WIND'] < 34]['SID'].unique()) | |
ts_storms = len(typhoon_data[(typhoon_data['USA_WIND'] >= 34) & (typhoon_data['USA_WIND'] < 64)]['SID'].unique()) | |
typhoon_storms = len(typhoon_data[typhoon_data['USA_WIND'] >= 64]['SID'].unique()) | |
td_percentage = (td_storms / max(total_storms, 1)) * 100 | |
else: | |
td_storms = ts_storms = typhoon_storms = 0 | |
td_percentage = 0 | |
except Exception as e: | |
print(f"Error calculating TD statistics: {e}") | |
td_storms = ts_storms = typhoon_storms = 0 | |
td_percentage = 0 | |
gr.Markdown(f""" | |
### Enhanced Dataset Summary: | |
- **Total Unique Storms**: {total_storms:,} | |
- **Total Track Records**: {total_records:,} | |
- **Year Range**: {year_range} ({years_covered} years) | |
- **Basins Available**: {basins_available} | |
- **Average Storms/Year**: {avg_storms_per_year:.1f} | |
### Storm Category Breakdown: | |
- **Tropical Depressions**: {td_storms:,} storms ({td_percentage:.1f}%) | |
- **Tropical Storms**: {ts_storms:,} storms | |
- **Typhoons (C1-C5)**: {typhoon_storms:,} storms | |
### New Platform Capabilities: | |
- **Complete TD Analysis** - First platform to include comprehensive TD tracking | |
- **Advanced ML Clustering** - DBSCAN pattern recognition with route visualization | |
- **Real-time Predictions** - Physics-based and optional CNN intensity forecasting | |
- **2025 Data Ready** - Full compatibility with current season data | |
- **Enhanced Animations** - Professional-quality storm track videos | |
- **Multi-basin Analysis** - Comprehensive Pacific and Atlantic coverage | |
### Research Applications: | |
- Climate change impact studies | |
- Seasonal forecasting research | |
- Storm pattern classification | |
- ENSO-typhoon relationship analysis | |
- Intensity prediction model development | |
""") | |
return demo | |
except Exception as e: | |
logging.error(f"Error creating Gradio interface: {e}") | |
import traceback | |
traceback.print_exc() | |
# Create a minimal fallback interface | |
return create_minimal_fallback_interface() | |
def create_minimal_fallback_interface(): | |
"""Create a minimal fallback interface when main interface fails""" | |
with gr.Blocks() as demo: | |
gr.Markdown("# Enhanced Typhoon Analysis Platform") | |
gr.Markdown("**Notice**: Loading with minimal interface due to data issues.") | |
with gr.Tab("Status"): | |
gr.Markdown(""" | |
## Platform Status | |
The application is running but encountered issues loading the full interface. | |
This could be due to: | |
- Data loading problems | |
- Missing dependencies | |
- Configuration issues | |
### Available Features: | |
- Basic interface is functional | |
- Error logs are being generated | |
- System is ready for debugging | |
### Next Steps: | |
1. Check the console logs for detailed error information | |
2. Verify all required data files are accessible | |
3. Ensure all dependencies are properly installed | |
4. Try restarting the application | |
""") | |
with gr.Tab("Debug"): | |
gr.Markdown("## Debug Information") | |
def get_debug_info(): | |
debug_text = f""" | |
Python Environment: | |
- Working Directory: {os.getcwd()} | |
- Data Path: {DATA_PATH} | |
- UMAP Available: {UMAP_AVAILABLE} | |
- CNN Available: {CNN_AVAILABLE} | |
Data Status: | |
- ONI Data: {'Loaded' if oni_data is not None else 'Failed'} | |
- Typhoon Data: {'Loaded' if typhoon_data is not None else 'Failed'} | |
- Merged Data: {'Loaded' if merged_data is not None else 'Failed'} | |
File Checks: | |
- ONI Path Exists: {os.path.exists(ONI_DATA_PATH)} | |
- Typhoon Path Exists: {os.path.exists(TYPHOON_DATA_PATH)} | |
""" | |
return debug_text | |
debug_btn = gr.Button("Get Debug Info") | |
debug_output = gr.Textbox(label="Debug Information", lines=15) | |
debug_btn.click(fn=get_debug_info, outputs=debug_output) | |
return demo | |
# ----------------------------- | |
# Color Test Functions (Optional) | |
# ----------------------------- | |
def test_color_conversion(): | |
"""Test color conversion functions""" | |
print("Testing color conversion...") | |
# Test all categories | |
test_winds = [25, 40, 70, 85, 100, 120, 150] # TD, TS, C1, C2, C3, C4, C5 | |
for wind in test_winds: | |
category = categorize_typhoon_enhanced(wind) | |
plotly_color = enhanced_color_map.get(category, 'rgb(128,128,128)') | |
matplotlib_color = get_matplotlib_color(category) | |
print(f"Wind: {wind:3d}kt β {category:20s} β Plotly: {plotly_color:15s} β Matplotlib: {matplotlib_color}") | |
print("Color conversion test complete!") | |
def test_rgb_conversion(): | |
"""Test RGB string to hex conversion""" | |
test_colors = [ | |
'rgb(128, 128, 128)', | |
'rgb(255, 0, 0)', | |
'rgb(0, 255, 0)', | |
'rgb(0, 0, 255)' | |
] | |
print("Testing RGB to hex conversion...") | |
for rgb_str in test_colors: | |
hex_color = rgb_string_to_hex(rgb_str) | |
print(f"{rgb_str:20s} β {hex_color}") | |
print("RGB conversion test complete!") | |
# Create and launch the interface | |
demo = create_interface() | |
if __name__ == "__main__": | |
demo.launch() |