Spaces:

euler314
/

typhoon-data-analysis

Running

App Files Files Community

euler314 commited on May 14

Commit

66c3b07

verified ·

1 Parent(s): 473c7a8

Update app.py

Browse files

Files changed (1) hide show

app.py +174 -54

app.py CHANGED Viewed

@@ -324,8 +324,28 @@ def download_ibtracs_file(basin, force_download=False):
         logging.error(f"Failed to download {basin} basin file: {e}")
         return None
 def load_ibtracs_csv_directly(basin='WP'):
-    """Load IBTrACS data directly from CSV without tropycal"""
     filename = BASIN_FILES[basin]
     local_path = os.path.join(DATA_PATH, filename)
@@ -336,47 +356,102 @@ def load_ibtracs_csv_directly(basin='WP'):
             return None
     try:
-        # Read IBTrACS CSV with specific parameters
-        essential_columns = [
-            'SID', 'SEASON', 'NUMBER', 'BASIN', 'SUBBASIN', 'NAME',
-            'ISO_TIME', 'NATURE', 'LAT', 'LON', 'WMO_WIND', 'WMO_PRES',
-            'USA_WIND', 'USA_PRES', 'USA_STATUS', 'USA_R34_NE', 'USA_R34_SE',
-            'USA_R34_SW', 'USA_R34_NW', 'USA_R50_NE', 'USA_R50_SE',
-            'USA_R50_SW', 'USA_R50_NW', 'USA_R64_NE', 'USA_R64_SE',
-            'USA_R64_SW', 'USA_R64_NW', 'USA_RMW', 'USA_EYE'
-        ]
-        # Read with error handling for missing columns
         logging.info(f"Reading IBTrACS CSV file: {local_path}")
-        df = pd.read_csv(local_path, low_memory=False, skiprows=1)  # Skip header row with units
-        # Check which essential columns exist
-        available_columns = [col for col in essential_columns if col in df.columns]
-        missing_columns = [col for col in essential_columns if col not in df.columns]
-        if missing_columns:
-            logging.warning(f"Missing columns in IBTrACS data: {missing_columns}")
-        # Select only available columns
-        df = df[available_columns].copy()
         # Clean and standardize the data
-        # Convert ISO_TIME to datetime
-        df['ISO_TIME'] = pd.to_datetime(df['ISO_TIME'], format='%Y-%m-%d %H:%M:%S', errors='coerce')
         # Clean numeric columns
-        numeric_columns = ['LAT', 'LON', 'WMO_WIND', 'WMO_PRES', 'USA_WIND', 'USA_PRES']
         for col in numeric_columns:
             if col in df.columns:
                 df[col] = pd.to_numeric(df[col], errors='coerce')
         # Filter out invalid/missing critical data
-        df = df.dropna(subset=['ISO_TIME', 'LAT', 'LON'])
         # Ensure LAT/LON are in reasonable ranges
         df = df[(df['LAT'] >= -90) & (df['LAT'] <= 90)]
         df = df[(df['LON'] >= -180) & (df['LON'] <= 180)]
         logging.info(f"Successfully loaded {len(df)} records from {basin} basin")
         return df
@@ -439,10 +514,10 @@ def load_data_fixed(oni_path, typhoon_path):
         try:
             typhoon_data = pd.read_csv(typhoon_path, low_memory=False)
             # Ensure basic columns exist and are valid
-            required_cols = ['SID', 'ISO_TIME', 'LAT', 'LON']
             if all(col in typhoon_data.columns for col in required_cols):
-                typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
-                typhoon_data = typhoon_data.dropna(subset=['ISO_TIME'])
                 logging.info(f"Loaded processed typhoon data with {len(typhoon_data)} records")
             else:
                 logging.warning("Processed typhoon data missing required columns, will reload from IBTrACS")
@@ -469,10 +544,14 @@ def load_data_fixed(oni_path, typhoon_path):
             # Ensure SID has proper format
             if 'SID' not in typhoon_data.columns and 'BASIN' in typhoon_data.columns:
                 # Create SID from basin and other identifiers if missing
-                if 'NUMBER' in typhoon_data.columns and 'SEASON' in typhoon_data.columns:
                     typhoon_data['SID'] = (typhoon_data['BASIN'].astype(str) +
-                                         typhoon_data['NUMBER'].astype(str).str.zfill(2) +
                                          typhoon_data['SEASON'].astype(str))
             # Save the processed data for future use
             safe_file_write(typhoon_path, typhoon_data, get_fallback_data_dir())
@@ -502,23 +581,25 @@ def load_data_fixed(oni_path, typhoon_path):
                 logging.warning(f"Added missing column {col} with default value")
         # Ensure data types
-        typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
         typhoon_data['LAT'] = pd.to_numeric(typhoon_data['LAT'], errors='coerce')
         typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce')
         typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce')
         typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce')
-        # Remove rows with invalid times or coordinates
-        typhoon_data = typhoon_data.dropna(subset=['ISO_TIME', 'LAT', 'LON'])
         logging.info(f"Final typhoon data: {len(typhoon_data)} records after validation")
     return oni_data, typhoon_data
 def create_fallback_typhoon_data():
-    """Create minimal fallback typhoon data"""
     dates = pd.date_range(start='2000-01-01', end='2023-12-31', freq='D')
-    storm_dates = np.random.choice(dates, size=100, replace=False)
     data = []
     for i, date in enumerate(storm_dates):
@@ -538,7 +619,7 @@ def create_fallback_typhoon_data():
             data.append({
                 'SID': sid,
-                'ISO_TIME': date + timedelta(hours=j*6),
                 'NAME': f'FALLBACK_{i+1}',
                 'SEASON': date.year,
                 'LAT': lat,
@@ -548,7 +629,9 @@ def create_fallback_typhoon_data():
                 'BASIN': 'WP'
             })
-    return pd.DataFrame(data)
 def process_oni_data(oni_data):
     """Process ONI data into long format"""
@@ -562,7 +645,8 @@ def process_oni_data(oni_data):
 def process_typhoon_data(typhoon_data):
     """Process typhoon data"""
-    typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
     typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce')
     typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce')
     typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce')
@@ -574,8 +658,14 @@ def process_typhoon_data(typhoon_data):
         'LAT':'first','LON':'first'
     }).reset_index()
-    typhoon_max['Month'] = typhoon_max['ISO_TIME'].dt.strftime('%m')
-    typhoon_max['Year'] = typhoon_max['ISO_TIME'].dt.year
     typhoon_max['Category'] = typhoon_max['USA_WIND'].apply(categorize_typhoon)
     return typhoon_max
@@ -585,6 +675,8 @@ def merge_data(oni_long, typhoon_max):
 def categorize_typhoon(wind_speed):
     """Categorize typhoon based on wind speed"""
     if wind_speed >= 137:
         return 'C5 Super Typhoon'
     elif wind_speed >= 113:
@@ -604,6 +696,8 @@ def classify_enso_phases(oni_value):
     """Classify ENSO phases based on ONI value"""
     if isinstance(oni_value, pd.Series):
         oni_value = oni_value.iloc[0]
     if oni_value >= 0.5:
         return 'El Nino'
     elif oni_value <= -0.5:
@@ -778,6 +872,8 @@ def get_full_tracks(start_year, start_month, end_year, end_month, enso_phase, ty
     fig = go.Figure()
     for sid in unique_storms:
         storm_data = typhoon_data[typhoon_data['SID']==sid]
         name = storm_data['NAME'].iloc[0] if pd.notnull(storm_data['NAME'].iloc[0]) else "Unnamed"
         basin = storm_data['SID'].iloc[0][:2]
         storm_oni = filtered_data[filtered_data['SID']==sid]['ONI'].iloc[0]
@@ -842,6 +938,9 @@ def get_longitude_analysis(start_year, start_month, end_year, end_month, enso_ph
 def categorize_typhoon_by_standard(wind_speed, standard='atlantic'):
     """Categorize typhoon by standard"""
     if standard=='taiwan':
         wind_speed_ms = wind_speed * 0.514444
         if wind_speed_ms >= 51.0:
@@ -875,6 +974,10 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
     try:
         # Merge raw typhoon data with ONI
         raw_data = typhoon_data.copy()
         raw_data['Year'] = raw_data['ISO_TIME'].dt.year
         raw_data['Month'] = raw_data['ISO_TIME'].dt.strftime('%m')
         merged_raw = pd.merge(raw_data, process_oni_data(oni_data), on=['Year','Month'], how='left')
@@ -918,7 +1021,7 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
             return go.Figure(), go.Figure(), make_subplots(rows=2, cols=1), "No valid storms for clustering."
         # Interpolate each storm's route to a common length
-        max_length = max(len(item[1]) for item in all_storms_data)
         route_vectors = []
         wind_curves = []
         pres_curves = []
@@ -972,31 +1075,34 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
         pres_curves = np.array(pres_curves)
         # Run TSNE on route vectors
-        tsne = TSNE(n_components=2, random_state=42, verbose=1)
         tsne_results = tsne.fit_transform(route_vectors)
         # Dynamic DBSCAN
         selected_labels = None
         selected_eps = None
         for eps in np.linspace(1.0, 10.0, 91):
-            dbscan = DBSCAN(eps=eps, min_samples=3)
             labels = dbscan.fit_predict(tsne_results)
             clusters = set(labels) - {-1}
-            if 5 <= len(clusters) <= 20:
                 selected_labels = labels
                 selected_eps = eps
                 break
         if selected_labels is None:
             selected_eps = 5.0
-            dbscan = DBSCAN(eps=selected_eps, min_samples=3)
             selected_labels = dbscan.fit_predict(tsne_results)
         logging.info(f"Selected DBSCAN eps: {selected_eps:.2f} yielding {len(set(selected_labels)-{-1})} clusters.")
         # TSNE scatter plot
         fig_tsne = go.Figure()
-        colors = px.colors.qualitative.Safe
         unique_labels = sorted(set(selected_labels) - {-1})
         for i, label in enumerate(unique_labels):
@@ -1053,6 +1159,12 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
             mean_pres_curve = np.nanmean(cluster_pres, axis=0)
             cluster_stats.append((label, mean_wind_curve, mean_pres_curve))
         # Create cluster stats plot
         x_axis = np.linspace(0, 1, max_length)
         fig_stats = make_subplots(rows=2, cols=1, shared_xaxes=True,
@@ -1064,7 +1176,8 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
                 y=wind_curve,
                 mode='lines',
                 line=dict(width=2, color=colors[i % len(colors)]),
-                name=f"Cluster {label} Mean Wind"
             ), row=1, col=1)
             fig_stats.add_trace(go.Scatter(
@@ -1072,7 +1185,8 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
                 y=pres_curve,
                 mode='lines',
                 line=dict(width=2, color=colors[i % len(colors)]),
-                name=f"Cluster {label} Mean MSLP"
             ), row=2, col=1)
         fig_stats.update_layout(
@@ -1081,10 +1195,10 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
             yaxis_title="Mean Wind Speed (knots)",
             xaxis2_title="Normalized Route Index",
             yaxis2_title="Mean MSLP (hPa)",
-            showlegend=True
         )
-        info = f"TSNE clustering complete. Selected eps: {selected_eps:.2f}. Clusters: {len(unique_labels)}."
         return fig_tsne, fig_routes, fig_stats, info
     except Exception as e:
@@ -1112,9 +1226,9 @@ def generate_track_video_from_csv(year, storm_id, standard):
     else:
         winds = np.full(len(lats), np.nan)
-    storm_name = storm_df['NAME'].iloc[0]
     basin = storm_df['SID'].iloc[0][:2]
-    season = storm_df['SEASON'].iloc[0]
     min_lat, max_lat = np.min(lats), np.max(lats)
     min_lon, max_lon = np.min(lons), np.max(lons)
@@ -1157,7 +1271,7 @@ def generate_track_video_from_csv(year, storm_id, standard):
     def update(frame):
         line.set_data(lons[:frame+1], lats[:frame+1])
         point.set_data([lons[frame]], [lats[frame]])
-        wind_speed = winds[frame] if frame < len(winds) else np.nan
         category, color = categorize_typhoon_by_standard(wind_speed, standard)
         point.set_color(color)
         dt_str = pd.to_datetime(times[frame]).strftime('%Y-%m-%d %H:%M')
@@ -1201,7 +1315,13 @@ def update_typhoon_options_fixed(year, basin):
             return gr.update(choices=[], value=None)
         # Filter by year
-        year_data = typhoon_data[typhoon_data['ISO_TIME'].dt.year == int(year)].copy()
         if basin != "All Basins":
             # Extract basin code
@@ -1222,7 +1342,7 @@ def update_typhoon_options_fixed(year, basin):
         for _, storm in storms.iterrows():
             name = storm.get('NAME', 'UNNAMED')
-            if pd.isna(name) or name == '':
                 name = 'UNNAMED'
             sid = storm['SID']
             options.append(f"{name} ({sid})")

         logging.error(f"Failed to download {basin} basin file: {e}")
         return None
+def examine_ibtracs_structure(file_path):
+    """Examine the actual structure of an IBTrACS CSV file"""
+    try:
+        with open(file_path, 'r') as f:
+            lines = f.readlines()
+        # Show first 5 lines
+        logging.info("First 5 lines of IBTrACS file:")
+        for i, line in enumerate(lines[:5]):
+            logging.info(f"Line {i}: {line.strip()}")
+        # Try to read with proper skip
+        df = pd.read_csv(file_path, skiprows=3, nrows=5)
+        logging.info(f"Columns after skipping 3 rows: {list(df.columns)}")
+        return list(df.columns)
+    except Exception as e:
+        logging.error(f"Error examining IBTrACS structure: {e}")
+        return None
 def load_ibtracs_csv_directly(basin='WP'):
+    """Load IBTrACS data directly from CSV - FIXED VERSION"""
     filename = BASIN_FILES[basin]
     local_path = os.path.join(DATA_PATH, filename)
             return None
     try:
+        # First, examine the structure
+        actual_columns = examine_ibtracs_structure(local_path)
+        if not actual_columns:
+            logging.error("Could not examine IBTrACS file structure")
+            return None
+        # Read IBTrACS CSV with proper number of header rows skipped
+        # IBTrACS v04r01 has 3 header rows that need to be skipped
         logging.info(f"Reading IBTrACS CSV file: {local_path}")
+        df = pd.read_csv(local_path, low_memory=False, skiprows=3)  # Skip 3 metadata rows
+        logging.info(f"Original columns: {list(df.columns)}")
+        logging.info(f"Data shape before cleaning: {df.shape}")
+        # Map actual column names to our expected names
+        # Based on IBTrACS documentation, typical column names are:
+        column_mapping = {}
+        # Look for common variations of column names
+        for col in df.columns:
+            col_upper = col.upper()
+            if 'SID' in col_upper or col_upper == 'STORM_ID':
+                column_mapping[col] = 'SID'
+            elif 'SEASON' in col_upper and col_upper != 'SUB_SEASON':
+                column_mapping[col] = 'SEASON'
+            elif 'NAME' in col_upper and 'FILE' not in col_upper:
+                column_mapping[col] = 'NAME'
+            elif 'ISO_TIME' in col_upper or col_upper == 'TIME':
+                column_mapping[col] = 'ISO_TIME'
+            elif col_upper == 'LAT' or 'LATITUDE' in col_upper:
+                column_mapping[col] = 'LAT'
+            elif col_upper == 'LON' or 'LONGITUDE' in col_upper:
+                column_mapping[col] = 'LON'
+            elif 'USA_WIND' in col_upper or col_upper == 'WIND':
+                column_mapping[col] = 'USA_WIND'
+            elif 'USA_PRES' in col_upper or col_upper == 'PRESSURE':
+                column_mapping[col] = 'USA_PRES'
+            elif 'BASIN' in col_upper and 'SUB' not in col_upper:
+                column_mapping[col] = 'BASIN'
+        # Rename columns
+        df = df.rename(columns=column_mapping)
+        logging.info(f"Mapped columns: {list(df.columns)}")
+        # If we still don't have essential columns, try creating them
+        if 'SID' not in df.columns:
+            # Try to create SID from other columns
+            possible_sid_cols = [col for col in df.columns if 'id' in col.lower() or 'sid' in col.lower()]
+            if possible_sid_cols:
+                df['SID'] = df[possible_sid_cols[0]]
+                logging.info(f"Created SID from {possible_sid_cols[0]}")
+        if 'ISO_TIME' not in df.columns:
+            # Look for time-related columns
+            time_cols = [col for col in df.columns if 'time' in col.lower() or 'date' in col.lower()]
+            if time_cols:
+                df['ISO_TIME'] = df[time_cols[0]]
+                logging.info(f"Created ISO_TIME from {time_cols[0]}")
+        # Ensure we have minimum required columns
+        required_cols = ['LAT', 'LON']
+        available_required = [col for col in required_cols if col in df.columns]
+        if len(available_required) < 2:
+            logging.error(f"Missing critical columns. Available: {list(df.columns)}")
+            return None
         # Clean and standardize the data
+        if 'ISO_TIME' in df.columns:
+            df['ISO_TIME'] = pd.to_datetime(df['ISO_TIME'], errors='coerce')
         # Clean numeric columns
+        numeric_columns = ['LAT', 'LON', 'USA_WIND', 'USA_PRES']
         for col in numeric_columns:
             if col in df.columns:
                 df[col] = pd.to_numeric(df[col], errors='coerce')
         # Filter out invalid/missing critical data
+        valid_rows = df['LAT'].notna() & df['LON'].notna()
+        df = df[valid_rows]
         # Ensure LAT/LON are in reasonable ranges
         df = df[(df['LAT'] >= -90) & (df['LAT'] <= 90)]
         df = df[(df['LON'] >= -180) & (df['LON'] <= 180)]
+        # Add basin info if missing
+        if 'BASIN' not in df.columns:
+            df['BASIN'] = basin
+        # Add default columns if missing
+        if 'NAME' not in df.columns:
+            df['NAME'] = 'UNNAMED'
+        if 'SEASON' not in df.columns and 'ISO_TIME' in df.columns:
+            df['SEASON'] = df['ISO_TIME'].dt.year
         logging.info(f"Successfully loaded {len(df)} records from {basin} basin")
         return df
         try:
             typhoon_data = pd.read_csv(typhoon_path, low_memory=False)
             # Ensure basic columns exist and are valid
+            required_cols = ['LAT', 'LON']
             if all(col in typhoon_data.columns for col in required_cols):
+                if 'ISO_TIME' in typhoon_data.columns:
+                    typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
                 logging.info(f"Loaded processed typhoon data with {len(typhoon_data)} records")
             else:
                 logging.warning("Processed typhoon data missing required columns, will reload from IBTrACS")
             # Ensure SID has proper format
             if 'SID' not in typhoon_data.columns and 'BASIN' in typhoon_data.columns:
                 # Create SID from basin and other identifiers if missing
+                if 'SEASON' in typhoon_data.columns:
                     typhoon_data['SID'] = (typhoon_data['BASIN'].astype(str) +
+                                         typhoon_data.index.astype(str).str.zfill(2) +
                                          typhoon_data['SEASON'].astype(str))
+                else:
+                    typhoon_data['SID'] = (typhoon_data['BASIN'].astype(str) +
+                                         typhoon_data.index.astype(str).str.zfill(2) +
+                                         '2000')
             # Save the processed data for future use
             safe_file_write(typhoon_path, typhoon_data, get_fallback_data_dir())
                 logging.warning(f"Added missing column {col} with default value")
         # Ensure data types
+        if 'ISO_TIME' in typhoon_data.columns:
+            typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
         typhoon_data['LAT'] = pd.to_numeric(typhoon_data['LAT'], errors='coerce')
         typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce')
         typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce')
         typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce')
+        # Remove rows with invalid coordinates
+        typhoon_data = typhoon_data.dropna(subset=['LAT', 'LON'])
         logging.info(f"Final typhoon data: {len(typhoon_data)} records after validation")
     return oni_data, typhoon_data
 def create_fallback_typhoon_data():
+    """Create minimal fallback typhoon data - FIXED VERSION"""
+    # Use proper pandas date_range instead of numpy
     dates = pd.date_range(start='2000-01-01', end='2023-12-31', freq='D')
+    storm_dates = dates[np.random.choice(len(dates), size=100, replace=False)]
     data = []
     for i, date in enumerate(storm_dates):
             data.append({
                 'SID': sid,
+                'ISO_TIME': date + pd.Timedelta(hours=j*6),  # Use pd.Timedelta instead
                 'NAME': f'FALLBACK_{i+1}',
                 'SEASON': date.year,
                 'LAT': lat,
                 'BASIN': 'WP'
             })
+    df = pd.DataFrame(data)
+    logging.info(f"Created fallback typhoon data with {len(df)} records")
+    return df
 def process_oni_data(oni_data):
     """Process ONI data into long format"""
 def process_typhoon_data(typhoon_data):
     """Process typhoon data"""
+    if 'ISO_TIME' in typhoon_data.columns:
+        typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
     typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce')
     typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce')
     typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce')
         'LAT':'first','LON':'first'
     }).reset_index()
+    if 'ISO_TIME' in typhoon_max.columns:
+        typhoon_max['Month'] = typhoon_max['ISO_TIME'].dt.strftime('%m')
+        typhoon_max['Year'] = typhoon_max['ISO_TIME'].dt.year
+    else:
+        # Fallback if no ISO_TIME
+        typhoon_max['Month'] = '01'
+        typhoon_max['Year'] = typhoon_max['SEASON']
     typhoon_max['Category'] = typhoon_max['USA_WIND'].apply(categorize_typhoon)
     return typhoon_max
 def categorize_typhoon(wind_speed):
     """Categorize typhoon based on wind speed"""
+    if pd.isna(wind_speed):
+        return 'Tropical Depression'
     if wind_speed >= 137:
         return 'C5 Super Typhoon'
     elif wind_speed >= 113:
     """Classify ENSO phases based on ONI value"""
     if isinstance(oni_value, pd.Series):
         oni_value = oni_value.iloc[0]
+    if pd.isna(oni_value):
+        return 'Neutral'
     if oni_value >= 0.5:
         return 'El Nino'
     elif oni_value <= -0.5:
     fig = go.Figure()
     for sid in unique_storms:
         storm_data = typhoon_data[typhoon_data['SID']==sid]
+        if storm_data.empty:
+            continue
         name = storm_data['NAME'].iloc[0] if pd.notnull(storm_data['NAME'].iloc[0]) else "Unnamed"
         basin = storm_data['SID'].iloc[0][:2]
         storm_oni = filtered_data[filtered_data['SID']==sid]['ONI'].iloc[0]
 def categorize_typhoon_by_standard(wind_speed, standard='atlantic'):
     """Categorize typhoon by standard"""
+    if pd.isna(wind_speed):
+        return 'Tropical Depression', '#808080'
     if standard=='taiwan':
         wind_speed_ms = wind_speed * 0.514444
         if wind_speed_ms >= 51.0:
     try:
         # Merge raw typhoon data with ONI
         raw_data = typhoon_data.copy()
+        if 'ISO_TIME' not in raw_data.columns:
+            logging.error("ISO_TIME column not found in typhoon data")
+            return go.Figure(), go.Figure(), make_subplots(rows=2, cols=1), "Error: ISO_TIME column missing"
         raw_data['Year'] = raw_data['ISO_TIME'].dt.year
         raw_data['Month'] = raw_data['ISO_TIME'].dt.strftime('%m')
         merged_raw = pd.merge(raw_data, process_oni_data(oni_data), on=['Year','Month'], how='left')
             return go.Figure(), go.Figure(), make_subplots(rows=2, cols=1), "No valid storms for clustering."
         # Interpolate each storm's route to a common length
+        max_length = min(50, max(len(item[1]) for item in all_storms_data))  # Cap at 50 points
         route_vectors = []
         wind_curves = []
         pres_curves = []
         pres_curves = np.array(pres_curves)
         # Run TSNE on route vectors
+        if len(route_vectors) < 5:
+            return go.Figure(), go.Figure(), make_subplots(rows=2, cols=1), "Need at least 5 storms for clustering."
+        tsne = TSNE(n_components=2, random_state=42, verbose=1, perplexity=min(30, len(route_vectors)-1))
         tsne_results = tsne.fit_transform(route_vectors)
         # Dynamic DBSCAN
         selected_labels = None
         selected_eps = None
         for eps in np.linspace(1.0, 10.0, 91):
+            dbscan = DBSCAN(eps=eps, min_samples=max(2, len(route_vectors)//10))
             labels = dbscan.fit_predict(tsne_results)
             clusters = set(labels) - {-1}
+            if 2 <= len(clusters) <= min(10, len(route_vectors)//2):
                 selected_labels = labels
                 selected_eps = eps
                 break
         if selected_labels is None:
             selected_eps = 5.0
+            dbscan = DBSCAN(eps=selected_eps, min_samples=max(2, len(route_vectors)//10))
             selected_labels = dbscan.fit_predict(tsne_results)
         logging.info(f"Selected DBSCAN eps: {selected_eps:.2f} yielding {len(set(selected_labels)-{-1})} clusters.")
         # TSNE scatter plot
         fig_tsne = go.Figure()
+        colors = px.colors.qualitative.Set3
         unique_labels = sorted(set(selected_labels) - {-1})
         for i, label in enumerate(unique_labels):
             mean_pres_curve = np.nanmean(cluster_pres, axis=0)
             cluster_stats.append((label, mean_wind_curve, mean_pres_curve))
+        fig_routes.update_layout(
+            title="Cluster Mean Routes",
+            geo=dict(projection_type='natural earth', showland=True),
+            height=600
+        )
         # Create cluster stats plot
         x_axis = np.linspace(0, 1, max_length)
         fig_stats = make_subplots(rows=2, cols=1, shared_xaxes=True,
                 y=wind_curve,
                 mode='lines',
                 line=dict(width=2, color=colors[i % len(colors)]),
+                name=f"Cluster {label} Mean Wind",
+                showlegend=True
             ), row=1, col=1)
             fig_stats.add_trace(go.Scatter(
                 y=pres_curve,
                 mode='lines',
                 line=dict(width=2, color=colors[i % len(colors)]),
+                name=f"Cluster {label} Mean MSLP",
+                showlegend=False
             ), row=2, col=1)
         fig_stats.update_layout(
             yaxis_title="Mean Wind Speed (knots)",
             xaxis2_title="Normalized Route Index",
             yaxis2_title="Mean MSLP (hPa)",
+            height=600
         )
+        info = f"TSNE clustering complete. Selected eps: {selected_eps:.2f}. Clusters: {len(unique_labels)}. Total storms: {len(route_vectors)}."
         return fig_tsne, fig_routes, fig_stats, info
     except Exception as e:
     else:
         winds = np.full(len(lats), np.nan)
+    storm_name = storm_df['NAME'].iloc[0] if pd.notnull(storm_df['NAME'].iloc[0]) else "Unnamed"
     basin = storm_df['SID'].iloc[0][:2]
+    season = storm_df['SEASON'].iloc[0] if 'SEASON' in storm_df.columns else year
     min_lat, max_lat = np.min(lats), np.max(lats)
     min_lon, max_lon = np.min(lons), np.max(lons)
     def update(frame):
         line.set_data(lons[:frame+1], lats[:frame+1])
         point.set_data([lons[frame]], [lats[frame]])
+        wind_speed = winds[frame] if frame < len(winds) and not pd.isna(winds[frame]) else 0
         category, color = categorize_typhoon_by_standard(wind_speed, standard)
         point.set_color(color)
         dt_str = pd.to_datetime(times[frame]).strftime('%Y-%m-%d %H:%M')
             return gr.update(choices=[], value=None)
         # Filter by year
+        if 'ISO_TIME' in typhoon_data.columns:
+            year_data = typhoon_data[typhoon_data['ISO_TIME'].dt.year == int(year)].copy()
+        elif 'SEASON' in typhoon_data.columns:
+            year_data = typhoon_data[typhoon_data['SEASON'] == int(year)].copy()
+        else:
+            # Fallback: use all data
+            year_data = typhoon_data.copy()
         if basin != "All Basins":
             # Extract basin code
         for _, storm in storms.iterrows():
             name = storm.get('NAME', 'UNNAMED')
+            if pd.isna(name) or name == '' or name == 'UNNAMED':
                 name = 'UNNAMED'
             sid = storm['SID']
             options.append(f"{name} ({sid})")