euler314 commited on
Commit
66c3b07
·
verified ·
1 Parent(s): 473c7a8

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +174 -54
app.py CHANGED
@@ -324,8 +324,28 @@ def download_ibtracs_file(basin, force_download=False):
324
  logging.error(f"Failed to download {basin} basin file: {e}")
325
  return None
326
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
327
  def load_ibtracs_csv_directly(basin='WP'):
328
- """Load IBTrACS data directly from CSV without tropycal"""
329
  filename = BASIN_FILES[basin]
330
  local_path = os.path.join(DATA_PATH, filename)
331
 
@@ -336,47 +356,102 @@ def load_ibtracs_csv_directly(basin='WP'):
336
  return None
337
 
338
  try:
339
- # Read IBTrACS CSV with specific parameters
340
- essential_columns = [
341
- 'SID', 'SEASON', 'NUMBER', 'BASIN', 'SUBBASIN', 'NAME',
342
- 'ISO_TIME', 'NATURE', 'LAT', 'LON', 'WMO_WIND', 'WMO_PRES',
343
- 'USA_WIND', 'USA_PRES', 'USA_STATUS', 'USA_R34_NE', 'USA_R34_SE',
344
- 'USA_R34_SW', 'USA_R34_NW', 'USA_R50_NE', 'USA_R50_SE',
345
- 'USA_R50_SW', 'USA_R50_NW', 'USA_R64_NE', 'USA_R64_SE',
346
- 'USA_R64_SW', 'USA_R64_NW', 'USA_RMW', 'USA_EYE'
347
- ]
348
-
349
- # Read with error handling for missing columns
350
  logging.info(f"Reading IBTrACS CSV file: {local_path}")
351
- df = pd.read_csv(local_path, low_memory=False, skiprows=1) # Skip header row with units
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
352
 
353
- # Check which essential columns exist
354
- available_columns = [col for col in essential_columns if col in df.columns]
355
- missing_columns = [col for col in essential_columns if col not in df.columns]
356
 
357
- if missing_columns:
358
- logging.warning(f"Missing columns in IBTrACS data: {missing_columns}")
 
 
 
 
 
359
 
360
- # Select only available columns
361
- df = df[available_columns].copy()
 
 
 
 
 
 
 
 
 
 
 
 
362
 
363
  # Clean and standardize the data
364
- # Convert ISO_TIME to datetime
365
- df['ISO_TIME'] = pd.to_datetime(df['ISO_TIME'], format='%Y-%m-%d %H:%M:%S', errors='coerce')
366
 
367
  # Clean numeric columns
368
- numeric_columns = ['LAT', 'LON', 'WMO_WIND', 'WMO_PRES', 'USA_WIND', 'USA_PRES']
369
  for col in numeric_columns:
370
  if col in df.columns:
371
  df[col] = pd.to_numeric(df[col], errors='coerce')
372
 
373
  # Filter out invalid/missing critical data
374
- df = df.dropna(subset=['ISO_TIME', 'LAT', 'LON'])
 
375
 
376
  # Ensure LAT/LON are in reasonable ranges
377
  df = df[(df['LAT'] >= -90) & (df['LAT'] <= 90)]
378
  df = df[(df['LON'] >= -180) & (df['LON'] <= 180)]
379
 
 
 
 
 
 
 
 
 
 
 
 
380
  logging.info(f"Successfully loaded {len(df)} records from {basin} basin")
381
  return df
382
 
@@ -439,10 +514,10 @@ def load_data_fixed(oni_path, typhoon_path):
439
  try:
440
  typhoon_data = pd.read_csv(typhoon_path, low_memory=False)
441
  # Ensure basic columns exist and are valid
442
- required_cols = ['SID', 'ISO_TIME', 'LAT', 'LON']
443
  if all(col in typhoon_data.columns for col in required_cols):
444
- typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
445
- typhoon_data = typhoon_data.dropna(subset=['ISO_TIME'])
446
  logging.info(f"Loaded processed typhoon data with {len(typhoon_data)} records")
447
  else:
448
  logging.warning("Processed typhoon data missing required columns, will reload from IBTrACS")
@@ -469,10 +544,14 @@ def load_data_fixed(oni_path, typhoon_path):
469
  # Ensure SID has proper format
470
  if 'SID' not in typhoon_data.columns and 'BASIN' in typhoon_data.columns:
471
  # Create SID from basin and other identifiers if missing
472
- if 'NUMBER' in typhoon_data.columns and 'SEASON' in typhoon_data.columns:
473
  typhoon_data['SID'] = (typhoon_data['BASIN'].astype(str) +
474
- typhoon_data['NUMBER'].astype(str).str.zfill(2) +
475
  typhoon_data['SEASON'].astype(str))
 
 
 
 
476
 
477
  # Save the processed data for future use
478
  safe_file_write(typhoon_path, typhoon_data, get_fallback_data_dir())
@@ -502,23 +581,25 @@ def load_data_fixed(oni_path, typhoon_path):
502
  logging.warning(f"Added missing column {col} with default value")
503
 
504
  # Ensure data types
505
- typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
 
506
  typhoon_data['LAT'] = pd.to_numeric(typhoon_data['LAT'], errors='coerce')
507
  typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce')
508
  typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce')
509
  typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce')
510
 
511
- # Remove rows with invalid times or coordinates
512
- typhoon_data = typhoon_data.dropna(subset=['ISO_TIME', 'LAT', 'LON'])
513
 
514
  logging.info(f"Final typhoon data: {len(typhoon_data)} records after validation")
515
 
516
  return oni_data, typhoon_data
517
 
518
  def create_fallback_typhoon_data():
519
- """Create minimal fallback typhoon data"""
 
520
  dates = pd.date_range(start='2000-01-01', end='2023-12-31', freq='D')
521
- storm_dates = np.random.choice(dates, size=100, replace=False)
522
 
523
  data = []
524
  for i, date in enumerate(storm_dates):
@@ -538,7 +619,7 @@ def create_fallback_typhoon_data():
538
 
539
  data.append({
540
  'SID': sid,
541
- 'ISO_TIME': date + timedelta(hours=j*6),
542
  'NAME': f'FALLBACK_{i+1}',
543
  'SEASON': date.year,
544
  'LAT': lat,
@@ -548,7 +629,9 @@ def create_fallback_typhoon_data():
548
  'BASIN': 'WP'
549
  })
550
 
551
- return pd.DataFrame(data)
 
 
552
 
553
  def process_oni_data(oni_data):
554
  """Process ONI data into long format"""
@@ -562,7 +645,8 @@ def process_oni_data(oni_data):
562
 
563
  def process_typhoon_data(typhoon_data):
564
  """Process typhoon data"""
565
- typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
 
566
  typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce')
567
  typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce')
568
  typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce')
@@ -574,8 +658,14 @@ def process_typhoon_data(typhoon_data):
574
  'LAT':'first','LON':'first'
575
  }).reset_index()
576
 
577
- typhoon_max['Month'] = typhoon_max['ISO_TIME'].dt.strftime('%m')
578
- typhoon_max['Year'] = typhoon_max['ISO_TIME'].dt.year
 
 
 
 
 
 
579
  typhoon_max['Category'] = typhoon_max['USA_WIND'].apply(categorize_typhoon)
580
  return typhoon_max
581
 
@@ -585,6 +675,8 @@ def merge_data(oni_long, typhoon_max):
585
 
586
  def categorize_typhoon(wind_speed):
587
  """Categorize typhoon based on wind speed"""
 
 
588
  if wind_speed >= 137:
589
  return 'C5 Super Typhoon'
590
  elif wind_speed >= 113:
@@ -604,6 +696,8 @@ def classify_enso_phases(oni_value):
604
  """Classify ENSO phases based on ONI value"""
605
  if isinstance(oni_value, pd.Series):
606
  oni_value = oni_value.iloc[0]
 
 
607
  if oni_value >= 0.5:
608
  return 'El Nino'
609
  elif oni_value <= -0.5:
@@ -778,6 +872,8 @@ def get_full_tracks(start_year, start_month, end_year, end_month, enso_phase, ty
778
  fig = go.Figure()
779
  for sid in unique_storms:
780
  storm_data = typhoon_data[typhoon_data['SID']==sid]
 
 
781
  name = storm_data['NAME'].iloc[0] if pd.notnull(storm_data['NAME'].iloc[0]) else "Unnamed"
782
  basin = storm_data['SID'].iloc[0][:2]
783
  storm_oni = filtered_data[filtered_data['SID']==sid]['ONI'].iloc[0]
@@ -842,6 +938,9 @@ def get_longitude_analysis(start_year, start_month, end_year, end_month, enso_ph
842
 
843
  def categorize_typhoon_by_standard(wind_speed, standard='atlantic'):
844
  """Categorize typhoon by standard"""
 
 
 
845
  if standard=='taiwan':
846
  wind_speed_ms = wind_speed * 0.514444
847
  if wind_speed_ms >= 51.0:
@@ -875,6 +974,10 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
875
  try:
876
  # Merge raw typhoon data with ONI
877
  raw_data = typhoon_data.copy()
 
 
 
 
878
  raw_data['Year'] = raw_data['ISO_TIME'].dt.year
879
  raw_data['Month'] = raw_data['ISO_TIME'].dt.strftime('%m')
880
  merged_raw = pd.merge(raw_data, process_oni_data(oni_data), on=['Year','Month'], how='left')
@@ -918,7 +1021,7 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
918
  return go.Figure(), go.Figure(), make_subplots(rows=2, cols=1), "No valid storms for clustering."
919
 
920
  # Interpolate each storm's route to a common length
921
- max_length = max(len(item[1]) for item in all_storms_data)
922
  route_vectors = []
923
  wind_curves = []
924
  pres_curves = []
@@ -972,31 +1075,34 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
972
  pres_curves = np.array(pres_curves)
973
 
974
  # Run TSNE on route vectors
975
- tsne = TSNE(n_components=2, random_state=42, verbose=1)
 
 
 
976
  tsne_results = tsne.fit_transform(route_vectors)
977
 
978
  # Dynamic DBSCAN
979
  selected_labels = None
980
  selected_eps = None
981
  for eps in np.linspace(1.0, 10.0, 91):
982
- dbscan = DBSCAN(eps=eps, min_samples=3)
983
  labels = dbscan.fit_predict(tsne_results)
984
  clusters = set(labels) - {-1}
985
- if 5 <= len(clusters) <= 20:
986
  selected_labels = labels
987
  selected_eps = eps
988
  break
989
 
990
  if selected_labels is None:
991
  selected_eps = 5.0
992
- dbscan = DBSCAN(eps=selected_eps, min_samples=3)
993
  selected_labels = dbscan.fit_predict(tsne_results)
994
 
995
  logging.info(f"Selected DBSCAN eps: {selected_eps:.2f} yielding {len(set(selected_labels)-{-1})} clusters.")
996
 
997
  # TSNE scatter plot
998
  fig_tsne = go.Figure()
999
- colors = px.colors.qualitative.Safe
1000
  unique_labels = sorted(set(selected_labels) - {-1})
1001
 
1002
  for i, label in enumerate(unique_labels):
@@ -1053,6 +1159,12 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
1053
  mean_pres_curve = np.nanmean(cluster_pres, axis=0)
1054
  cluster_stats.append((label, mean_wind_curve, mean_pres_curve))
1055
 
 
 
 
 
 
 
1056
  # Create cluster stats plot
1057
  x_axis = np.linspace(0, 1, max_length)
1058
  fig_stats = make_subplots(rows=2, cols=1, shared_xaxes=True,
@@ -1064,7 +1176,8 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
1064
  y=wind_curve,
1065
  mode='lines',
1066
  line=dict(width=2, color=colors[i % len(colors)]),
1067
- name=f"Cluster {label} Mean Wind"
 
1068
  ), row=1, col=1)
1069
 
1070
  fig_stats.add_trace(go.Scatter(
@@ -1072,7 +1185,8 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
1072
  y=pres_curve,
1073
  mode='lines',
1074
  line=dict(width=2, color=colors[i % len(colors)]),
1075
- name=f"Cluster {label} Mean MSLP"
 
1076
  ), row=2, col=1)
1077
 
1078
  fig_stats.update_layout(
@@ -1081,10 +1195,10 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
1081
  yaxis_title="Mean Wind Speed (knots)",
1082
  xaxis2_title="Normalized Route Index",
1083
  yaxis2_title="Mean MSLP (hPa)",
1084
- showlegend=True
1085
  )
1086
 
1087
- info = f"TSNE clustering complete. Selected eps: {selected_eps:.2f}. Clusters: {len(unique_labels)}."
1088
  return fig_tsne, fig_routes, fig_stats, info
1089
 
1090
  except Exception as e:
@@ -1112,9 +1226,9 @@ def generate_track_video_from_csv(year, storm_id, standard):
1112
  else:
1113
  winds = np.full(len(lats), np.nan)
1114
 
1115
- storm_name = storm_df['NAME'].iloc[0]
1116
  basin = storm_df['SID'].iloc[0][:2]
1117
- season = storm_df['SEASON'].iloc[0]
1118
 
1119
  min_lat, max_lat = np.min(lats), np.max(lats)
1120
  min_lon, max_lon = np.min(lons), np.max(lons)
@@ -1157,7 +1271,7 @@ def generate_track_video_from_csv(year, storm_id, standard):
1157
  def update(frame):
1158
  line.set_data(lons[:frame+1], lats[:frame+1])
1159
  point.set_data([lons[frame]], [lats[frame]])
1160
- wind_speed = winds[frame] if frame < len(winds) else np.nan
1161
  category, color = categorize_typhoon_by_standard(wind_speed, standard)
1162
  point.set_color(color)
1163
  dt_str = pd.to_datetime(times[frame]).strftime('%Y-%m-%d %H:%M')
@@ -1201,7 +1315,13 @@ def update_typhoon_options_fixed(year, basin):
1201
  return gr.update(choices=[], value=None)
1202
 
1203
  # Filter by year
1204
- year_data = typhoon_data[typhoon_data['ISO_TIME'].dt.year == int(year)].copy()
 
 
 
 
 
 
1205
 
1206
  if basin != "All Basins":
1207
  # Extract basin code
@@ -1222,7 +1342,7 @@ def update_typhoon_options_fixed(year, basin):
1222
 
1223
  for _, storm in storms.iterrows():
1224
  name = storm.get('NAME', 'UNNAMED')
1225
- if pd.isna(name) or name == '':
1226
  name = 'UNNAMED'
1227
  sid = storm['SID']
1228
  options.append(f"{name} ({sid})")
 
324
  logging.error(f"Failed to download {basin} basin file: {e}")
325
  return None
326
 
327
+ def examine_ibtracs_structure(file_path):
328
+ """Examine the actual structure of an IBTrACS CSV file"""
329
+ try:
330
+ with open(file_path, 'r') as f:
331
+ lines = f.readlines()
332
+
333
+ # Show first 5 lines
334
+ logging.info("First 5 lines of IBTrACS file:")
335
+ for i, line in enumerate(lines[:5]):
336
+ logging.info(f"Line {i}: {line.strip()}")
337
+
338
+ # Try to read with proper skip
339
+ df = pd.read_csv(file_path, skiprows=3, nrows=5)
340
+ logging.info(f"Columns after skipping 3 rows: {list(df.columns)}")
341
+
342
+ return list(df.columns)
343
+ except Exception as e:
344
+ logging.error(f"Error examining IBTrACS structure: {e}")
345
+ return None
346
+
347
  def load_ibtracs_csv_directly(basin='WP'):
348
+ """Load IBTrACS data directly from CSV - FIXED VERSION"""
349
  filename = BASIN_FILES[basin]
350
  local_path = os.path.join(DATA_PATH, filename)
351
 
 
356
  return None
357
 
358
  try:
359
+ # First, examine the structure
360
+ actual_columns = examine_ibtracs_structure(local_path)
361
+ if not actual_columns:
362
+ logging.error("Could not examine IBTrACS file structure")
363
+ return None
364
+
365
+ # Read IBTrACS CSV with proper number of header rows skipped
366
+ # IBTrACS v04r01 has 3 header rows that need to be skipped
 
 
 
367
  logging.info(f"Reading IBTrACS CSV file: {local_path}")
368
+ df = pd.read_csv(local_path, low_memory=False, skiprows=3) # Skip 3 metadata rows
369
+
370
+ logging.info(f"Original columns: {list(df.columns)}")
371
+ logging.info(f"Data shape before cleaning: {df.shape}")
372
+
373
+ # Map actual column names to our expected names
374
+ # Based on IBTrACS documentation, typical column names are:
375
+ column_mapping = {}
376
+
377
+ # Look for common variations of column names
378
+ for col in df.columns:
379
+ col_upper = col.upper()
380
+ if 'SID' in col_upper or col_upper == 'STORM_ID':
381
+ column_mapping[col] = 'SID'
382
+ elif 'SEASON' in col_upper and col_upper != 'SUB_SEASON':
383
+ column_mapping[col] = 'SEASON'
384
+ elif 'NAME' in col_upper and 'FILE' not in col_upper:
385
+ column_mapping[col] = 'NAME'
386
+ elif 'ISO_TIME' in col_upper or col_upper == 'TIME':
387
+ column_mapping[col] = 'ISO_TIME'
388
+ elif col_upper == 'LAT' or 'LATITUDE' in col_upper:
389
+ column_mapping[col] = 'LAT'
390
+ elif col_upper == 'LON' or 'LONGITUDE' in col_upper:
391
+ column_mapping[col] = 'LON'
392
+ elif 'USA_WIND' in col_upper or col_upper == 'WIND':
393
+ column_mapping[col] = 'USA_WIND'
394
+ elif 'USA_PRES' in col_upper or col_upper == 'PRESSURE':
395
+ column_mapping[col] = 'USA_PRES'
396
+ elif 'BASIN' in col_upper and 'SUB' not in col_upper:
397
+ column_mapping[col] = 'BASIN'
398
 
399
+ # Rename columns
400
+ df = df.rename(columns=column_mapping)
401
+ logging.info(f"Mapped columns: {list(df.columns)}")
402
 
403
+ # If we still don't have essential columns, try creating them
404
+ if 'SID' not in df.columns:
405
+ # Try to create SID from other columns
406
+ possible_sid_cols = [col for col in df.columns if 'id' in col.lower() or 'sid' in col.lower()]
407
+ if possible_sid_cols:
408
+ df['SID'] = df[possible_sid_cols[0]]
409
+ logging.info(f"Created SID from {possible_sid_cols[0]}")
410
 
411
+ if 'ISO_TIME' not in df.columns:
412
+ # Look for time-related columns
413
+ time_cols = [col for col in df.columns if 'time' in col.lower() or 'date' in col.lower()]
414
+ if time_cols:
415
+ df['ISO_TIME'] = df[time_cols[0]]
416
+ logging.info(f"Created ISO_TIME from {time_cols[0]}")
417
+
418
+ # Ensure we have minimum required columns
419
+ required_cols = ['LAT', 'LON']
420
+ available_required = [col for col in required_cols if col in df.columns]
421
+
422
+ if len(available_required) < 2:
423
+ logging.error(f"Missing critical columns. Available: {list(df.columns)}")
424
+ return None
425
 
426
  # Clean and standardize the data
427
+ if 'ISO_TIME' in df.columns:
428
+ df['ISO_TIME'] = pd.to_datetime(df['ISO_TIME'], errors='coerce')
429
 
430
  # Clean numeric columns
431
+ numeric_columns = ['LAT', 'LON', 'USA_WIND', 'USA_PRES']
432
  for col in numeric_columns:
433
  if col in df.columns:
434
  df[col] = pd.to_numeric(df[col], errors='coerce')
435
 
436
  # Filter out invalid/missing critical data
437
+ valid_rows = df['LAT'].notna() & df['LON'].notna()
438
+ df = df[valid_rows]
439
 
440
  # Ensure LAT/LON are in reasonable ranges
441
  df = df[(df['LAT'] >= -90) & (df['LAT'] <= 90)]
442
  df = df[(df['LON'] >= -180) & (df['LON'] <= 180)]
443
 
444
+ # Add basin info if missing
445
+ if 'BASIN' not in df.columns:
446
+ df['BASIN'] = basin
447
+
448
+ # Add default columns if missing
449
+ if 'NAME' not in df.columns:
450
+ df['NAME'] = 'UNNAMED'
451
+
452
+ if 'SEASON' not in df.columns and 'ISO_TIME' in df.columns:
453
+ df['SEASON'] = df['ISO_TIME'].dt.year
454
+
455
  logging.info(f"Successfully loaded {len(df)} records from {basin} basin")
456
  return df
457
 
 
514
  try:
515
  typhoon_data = pd.read_csv(typhoon_path, low_memory=False)
516
  # Ensure basic columns exist and are valid
517
+ required_cols = ['LAT', 'LON']
518
  if all(col in typhoon_data.columns for col in required_cols):
519
+ if 'ISO_TIME' in typhoon_data.columns:
520
+ typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
521
  logging.info(f"Loaded processed typhoon data with {len(typhoon_data)} records")
522
  else:
523
  logging.warning("Processed typhoon data missing required columns, will reload from IBTrACS")
 
544
  # Ensure SID has proper format
545
  if 'SID' not in typhoon_data.columns and 'BASIN' in typhoon_data.columns:
546
  # Create SID from basin and other identifiers if missing
547
+ if 'SEASON' in typhoon_data.columns:
548
  typhoon_data['SID'] = (typhoon_data['BASIN'].astype(str) +
549
+ typhoon_data.index.astype(str).str.zfill(2) +
550
  typhoon_data['SEASON'].astype(str))
551
+ else:
552
+ typhoon_data['SID'] = (typhoon_data['BASIN'].astype(str) +
553
+ typhoon_data.index.astype(str).str.zfill(2) +
554
+ '2000')
555
 
556
  # Save the processed data for future use
557
  safe_file_write(typhoon_path, typhoon_data, get_fallback_data_dir())
 
581
  logging.warning(f"Added missing column {col} with default value")
582
 
583
  # Ensure data types
584
+ if 'ISO_TIME' in typhoon_data.columns:
585
+ typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
586
  typhoon_data['LAT'] = pd.to_numeric(typhoon_data['LAT'], errors='coerce')
587
  typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce')
588
  typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce')
589
  typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce')
590
 
591
+ # Remove rows with invalid coordinates
592
+ typhoon_data = typhoon_data.dropna(subset=['LAT', 'LON'])
593
 
594
  logging.info(f"Final typhoon data: {len(typhoon_data)} records after validation")
595
 
596
  return oni_data, typhoon_data
597
 
598
  def create_fallback_typhoon_data():
599
+ """Create minimal fallback typhoon data - FIXED VERSION"""
600
+ # Use proper pandas date_range instead of numpy
601
  dates = pd.date_range(start='2000-01-01', end='2023-12-31', freq='D')
602
+ storm_dates = dates[np.random.choice(len(dates), size=100, replace=False)]
603
 
604
  data = []
605
  for i, date in enumerate(storm_dates):
 
619
 
620
  data.append({
621
  'SID': sid,
622
+ 'ISO_TIME': date + pd.Timedelta(hours=j*6), # Use pd.Timedelta instead
623
  'NAME': f'FALLBACK_{i+1}',
624
  'SEASON': date.year,
625
  'LAT': lat,
 
629
  'BASIN': 'WP'
630
  })
631
 
632
+ df = pd.DataFrame(data)
633
+ logging.info(f"Created fallback typhoon data with {len(df)} records")
634
+ return df
635
 
636
  def process_oni_data(oni_data):
637
  """Process ONI data into long format"""
 
645
 
646
  def process_typhoon_data(typhoon_data):
647
  """Process typhoon data"""
648
+ if 'ISO_TIME' in typhoon_data.columns:
649
+ typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
650
  typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce')
651
  typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce')
652
  typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce')
 
658
  'LAT':'first','LON':'first'
659
  }).reset_index()
660
 
661
+ if 'ISO_TIME' in typhoon_max.columns:
662
+ typhoon_max['Month'] = typhoon_max['ISO_TIME'].dt.strftime('%m')
663
+ typhoon_max['Year'] = typhoon_max['ISO_TIME'].dt.year
664
+ else:
665
+ # Fallback if no ISO_TIME
666
+ typhoon_max['Month'] = '01'
667
+ typhoon_max['Year'] = typhoon_max['SEASON']
668
+
669
  typhoon_max['Category'] = typhoon_max['USA_WIND'].apply(categorize_typhoon)
670
  return typhoon_max
671
 
 
675
 
676
  def categorize_typhoon(wind_speed):
677
  """Categorize typhoon based on wind speed"""
678
+ if pd.isna(wind_speed):
679
+ return 'Tropical Depression'
680
  if wind_speed >= 137:
681
  return 'C5 Super Typhoon'
682
  elif wind_speed >= 113:
 
696
  """Classify ENSO phases based on ONI value"""
697
  if isinstance(oni_value, pd.Series):
698
  oni_value = oni_value.iloc[0]
699
+ if pd.isna(oni_value):
700
+ return 'Neutral'
701
  if oni_value >= 0.5:
702
  return 'El Nino'
703
  elif oni_value <= -0.5:
 
872
  fig = go.Figure()
873
  for sid in unique_storms:
874
  storm_data = typhoon_data[typhoon_data['SID']==sid]
875
+ if storm_data.empty:
876
+ continue
877
  name = storm_data['NAME'].iloc[0] if pd.notnull(storm_data['NAME'].iloc[0]) else "Unnamed"
878
  basin = storm_data['SID'].iloc[0][:2]
879
  storm_oni = filtered_data[filtered_data['SID']==sid]['ONI'].iloc[0]
 
938
 
939
  def categorize_typhoon_by_standard(wind_speed, standard='atlantic'):
940
  """Categorize typhoon by standard"""
941
+ if pd.isna(wind_speed):
942
+ return 'Tropical Depression', '#808080'
943
+
944
  if standard=='taiwan':
945
  wind_speed_ms = wind_speed * 0.514444
946
  if wind_speed_ms >= 51.0:
 
974
  try:
975
  # Merge raw typhoon data with ONI
976
  raw_data = typhoon_data.copy()
977
+ if 'ISO_TIME' not in raw_data.columns:
978
+ logging.error("ISO_TIME column not found in typhoon data")
979
+ return go.Figure(), go.Figure(), make_subplots(rows=2, cols=1), "Error: ISO_TIME column missing"
980
+
981
  raw_data['Year'] = raw_data['ISO_TIME'].dt.year
982
  raw_data['Month'] = raw_data['ISO_TIME'].dt.strftime('%m')
983
  merged_raw = pd.merge(raw_data, process_oni_data(oni_data), on=['Year','Month'], how='left')
 
1021
  return go.Figure(), go.Figure(), make_subplots(rows=2, cols=1), "No valid storms for clustering."
1022
 
1023
  # Interpolate each storm's route to a common length
1024
+ max_length = min(50, max(len(item[1]) for item in all_storms_data)) # Cap at 50 points
1025
  route_vectors = []
1026
  wind_curves = []
1027
  pres_curves = []
 
1075
  pres_curves = np.array(pres_curves)
1076
 
1077
  # Run TSNE on route vectors
1078
+ if len(route_vectors) < 5:
1079
+ return go.Figure(), go.Figure(), make_subplots(rows=2, cols=1), "Need at least 5 storms for clustering."
1080
+
1081
+ tsne = TSNE(n_components=2, random_state=42, verbose=1, perplexity=min(30, len(route_vectors)-1))
1082
  tsne_results = tsne.fit_transform(route_vectors)
1083
 
1084
  # Dynamic DBSCAN
1085
  selected_labels = None
1086
  selected_eps = None
1087
  for eps in np.linspace(1.0, 10.0, 91):
1088
+ dbscan = DBSCAN(eps=eps, min_samples=max(2, len(route_vectors)//10))
1089
  labels = dbscan.fit_predict(tsne_results)
1090
  clusters = set(labels) - {-1}
1091
+ if 2 <= len(clusters) <= min(10, len(route_vectors)//2):
1092
  selected_labels = labels
1093
  selected_eps = eps
1094
  break
1095
 
1096
  if selected_labels is None:
1097
  selected_eps = 5.0
1098
+ dbscan = DBSCAN(eps=selected_eps, min_samples=max(2, len(route_vectors)//10))
1099
  selected_labels = dbscan.fit_predict(tsne_results)
1100
 
1101
  logging.info(f"Selected DBSCAN eps: {selected_eps:.2f} yielding {len(set(selected_labels)-{-1})} clusters.")
1102
 
1103
  # TSNE scatter plot
1104
  fig_tsne = go.Figure()
1105
+ colors = px.colors.qualitative.Set3
1106
  unique_labels = sorted(set(selected_labels) - {-1})
1107
 
1108
  for i, label in enumerate(unique_labels):
 
1159
  mean_pres_curve = np.nanmean(cluster_pres, axis=0)
1160
  cluster_stats.append((label, mean_wind_curve, mean_pres_curve))
1161
 
1162
+ fig_routes.update_layout(
1163
+ title="Cluster Mean Routes",
1164
+ geo=dict(projection_type='natural earth', showland=True),
1165
+ height=600
1166
+ )
1167
+
1168
  # Create cluster stats plot
1169
  x_axis = np.linspace(0, 1, max_length)
1170
  fig_stats = make_subplots(rows=2, cols=1, shared_xaxes=True,
 
1176
  y=wind_curve,
1177
  mode='lines',
1178
  line=dict(width=2, color=colors[i % len(colors)]),
1179
+ name=f"Cluster {label} Mean Wind",
1180
+ showlegend=True
1181
  ), row=1, col=1)
1182
 
1183
  fig_stats.add_trace(go.Scatter(
 
1185
  y=pres_curve,
1186
  mode='lines',
1187
  line=dict(width=2, color=colors[i % len(colors)]),
1188
+ name=f"Cluster {label} Mean MSLP",
1189
+ showlegend=False
1190
  ), row=2, col=1)
1191
 
1192
  fig_stats.update_layout(
 
1195
  yaxis_title="Mean Wind Speed (knots)",
1196
  xaxis2_title="Normalized Route Index",
1197
  yaxis2_title="Mean MSLP (hPa)",
1198
+ height=600
1199
  )
1200
 
1201
+ info = f"TSNE clustering complete. Selected eps: {selected_eps:.2f}. Clusters: {len(unique_labels)}. Total storms: {len(route_vectors)}."
1202
  return fig_tsne, fig_routes, fig_stats, info
1203
 
1204
  except Exception as e:
 
1226
  else:
1227
  winds = np.full(len(lats), np.nan)
1228
 
1229
+ storm_name = storm_df['NAME'].iloc[0] if pd.notnull(storm_df['NAME'].iloc[0]) else "Unnamed"
1230
  basin = storm_df['SID'].iloc[0][:2]
1231
+ season = storm_df['SEASON'].iloc[0] if 'SEASON' in storm_df.columns else year
1232
 
1233
  min_lat, max_lat = np.min(lats), np.max(lats)
1234
  min_lon, max_lon = np.min(lons), np.max(lons)
 
1271
  def update(frame):
1272
  line.set_data(lons[:frame+1], lats[:frame+1])
1273
  point.set_data([lons[frame]], [lats[frame]])
1274
+ wind_speed = winds[frame] if frame < len(winds) and not pd.isna(winds[frame]) else 0
1275
  category, color = categorize_typhoon_by_standard(wind_speed, standard)
1276
  point.set_color(color)
1277
  dt_str = pd.to_datetime(times[frame]).strftime('%Y-%m-%d %H:%M')
 
1315
  return gr.update(choices=[], value=None)
1316
 
1317
  # Filter by year
1318
+ if 'ISO_TIME' in typhoon_data.columns:
1319
+ year_data = typhoon_data[typhoon_data['ISO_TIME'].dt.year == int(year)].copy()
1320
+ elif 'SEASON' in typhoon_data.columns:
1321
+ year_data = typhoon_data[typhoon_data['SEASON'] == int(year)].copy()
1322
+ else:
1323
+ # Fallback: use all data
1324
+ year_data = typhoon_data.copy()
1325
 
1326
  if basin != "All Basins":
1327
  # Extract basin code
 
1342
 
1343
  for _, storm in storms.iterrows():
1344
  name = storm.get('NAME', 'UNNAMED')
1345
+ if pd.isna(name) or name == '' or name == 'UNNAMED':
1346
  name = 'UNNAMED'
1347
  sid = storm['SID']
1348
  options.append(f"{name} ({sid})")