euler314 commited on
Commit
a1ed77d
·
verified ·
1 Parent(s): e81fdea

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +100 -75
app.py CHANGED
@@ -4,9 +4,16 @@ import logging
4
  import pickle
5
  import threading
6
  import time
 
7
  from datetime import datetime, timedelta
8
  from collections import defaultdict
9
  import csv
 
 
 
 
 
 
10
  import gradio as gr
11
  import pandas as pd
12
  import numpy as np
@@ -807,13 +814,14 @@ def perform_dimensionality_reduction(storm_features, method='umap', n_components
807
  X_scaled = scaler.fit_transform(X)
808
 
809
  if method.lower() == 'umap' and UMAP_AVAILABLE:
810
- # UMAP parameters optimized for typhoon data
811
  reducer = umap.UMAP(
812
  n_components=n_components,
813
  n_neighbors=15,
814
  min_dist=0.1,
815
  metric='euclidean',
816
- random_state=42
 
817
  )
818
  elif method.lower() == 'tsne':
819
  # t-SNE parameters
@@ -848,15 +856,22 @@ def cluster_storms(embedding, method='dbscan', eps=0.5, min_samples=3):
848
  def create_advanced_clustering_visualization(storm_features, typhoon_data, method='umap', show_routes=True):
849
  """Create comprehensive clustering visualization with route display"""
850
  try:
 
 
 
 
 
 
 
851
  # Perform dimensionality reduction
852
  embedding, feature_cols, scaler = perform_dimensionality_reduction(storm_features, method)
853
 
854
  # Perform clustering
855
- clusters = cluster_storms(embedding, 'dbscan')
856
 
857
  # Add clustering results to storm features
858
  storm_features_viz = storm_features.copy()
859
- storm_features_viz['cluster'] = clusters
860
  storm_features_viz['dim1'] = embedding[:, 0]
861
  storm_features_viz['dim2'] = embedding[:, 1]
862
 
@@ -899,11 +914,11 @@ def create_advanced_clustering_visualization(storm_features, typhoon_data, metho
899
  '<extra></extra>'
900
  ),
901
  customdata=np.column_stack((
902
- cluster_data['NAME'],
903
- cluster_data['SEASON'],
904
- cluster_data['USA_WIND_max'],
905
- cluster_data['USA_PRES_min'],
906
- cluster_data['track_length']
907
  ))
908
  ),
909
  row=1, col=1
@@ -914,38 +929,41 @@ def create_advanced_clustering_visualization(storm_features, typhoon_data, metho
914
  if cluster == -1: # Skip noise for route visualization
915
  continue
916
 
917
- cluster_storms = storm_features_viz[storm_features_viz['cluster'] == cluster]['SID'].tolist()
918
  color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)]
919
 
920
- for j, sid in enumerate(cluster_storms[:10]): # Limit to 10 storms per cluster for performance
921
- storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME')
922
- if len(storm_track) > 1:
923
- storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED'
924
-
925
- fig.add_trace(
926
- go.Scattergeo(
927
- lon=storm_track['LON'],
928
- lat=storm_track['LAT'],
929
- mode='lines+markers',
930
- line=dict(color=color, width=2),
931
- marker=dict(color=color, size=4),
932
- name=f'C{cluster}: {storm_name}' if j == 0 else None,
933
- showlegend=(j == 0),
934
- hovertemplate=(
935
- f'<b>{storm_name}</b><br>'
936
- 'Lat: %{lat:.1f}°<br>'
937
- 'Lon: %{lon:.1f}°<br>'
938
- f'Cluster: {cluster}<br>'
939
- '<extra></extra>'
940
- )
941
- ),
942
- row=1, col=2
943
- )
 
 
 
 
944
 
945
  # Update layout
946
  fig.update_layout(
947
  title_text="Advanced Storm Clustering Analysis with Route Visualization",
948
- height=600,
949
  showlegend=True
950
  )
951
 
@@ -983,48 +1001,52 @@ def create_advanced_clustering_visualization(storm_features, typhoon_data, metho
983
  )
984
 
985
  # Generate detailed cluster statistics
986
- cluster_stats = storm_features_viz.groupby('cluster').agg({
987
- 'USA_WIND_max': ['mean', 'std', 'min', 'max'],
988
- 'USA_PRES_min': ['mean', 'std', 'min', 'max'],
989
- 'track_length': ['mean', 'std'],
990
- 'genesis_lat': 'mean',
991
- 'genesis_lon': 'mean',
992
- 'total_distance': 'mean',
993
- 'avg_curvature': 'mean',
994
- 'SID': 'count'
995
- }).round(2)
996
-
997
- # Flatten column names for readability
998
- cluster_stats.columns = ['_'.join(col).strip() for col in cluster_stats.columns]
999
-
1000
- stats_text = "🌀 ADVANCED CLUSTER ANALYSIS RESULTS\n" + "="*50 + "\n\n"
1001
-
1002
- for cluster in sorted(storm_features_viz['cluster'].unique()):
1003
- if cluster == -1:
1004
- stats_text += f"🔸 NOISE POINTS: {cluster_stats.loc[-1, 'SID_count']} storms\n\n"
1005
- continue
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1006
 
1007
- cluster_row = cluster_stats.loc[cluster]
1008
- storm_count = int(cluster_row['SID_count'])
1009
 
1010
- stats_text += f"🌪️ CLUSTER {cluster}: {storm_count} storms\n"
1011
- stats_text += f" Intensity: {cluster_row['USA_WIND_max_mean']:.1f} ± {cluster_row['USA_WIND_max_std']:.1f} kt\n"
1012
- stats_text += f" Pressure: {cluster_row['USA_PRES_min_mean']:.1f} ± {cluster_row['USA_PRES_min_std']:.1f} hPa\n"
1013
- stats_text += f" Track Length: {cluster_row['track_length_mean']:.1f} ± {cluster_row['track_length_std']:.1f} points\n"
1014
- stats_text += f" Genesis Region: {cluster_row['genesis_lat']:.1f}°N, {cluster_row['genesis_lon']:.1f}°E\n"
1015
- stats_text += f" Avg Distance: {cluster_row['total_distance_mean']:.2f} degrees\n"
1016
- stats_text += f" Avg Curvature: {cluster_row['avg_curvature_mean']:.3f} radians\n\n"
1017
-
1018
- # Add feature importance summary
1019
- stats_text += "📊 CLUSTERING FEATURES USED:\n"
1020
- stats_text += f" • Storm intensity (max/mean/std wind & pressure)\n"
1021
- stats_text += f" • Track characteristics (length, curvature, distance)\n"
1022
- stats_text += f" • Genesis location (lat/lon)\n"
1023
- stats_text += f" • Geographic range (lat/lon span)\n"
1024
- stats_text += f" • Total features: {len(feature_cols)}\n\n"
1025
-
1026
- stats_text += f"🎯 ALGORITHM: {method.upper()} + DBSCAN clustering\n"
1027
- stats_text += f"📈 CLUSTERS FOUND: {len([c for c in storm_features_viz['cluster'].unique() if c != -1])}\n"
1028
 
1029
  return fig, stats_text, storm_features_viz
1030
 
@@ -1696,6 +1718,8 @@ def create_interface():
1696
  total_storms = 0
1697
  total_records = 0
1698
  year_range_display = "Unknown"
 
 
1699
  with gr.Blocks(title="Enhanced Typhoon Analysis Platform", theme=gr.themes.Soft()) as demo:
1700
  gr.Markdown("# 🌪️ Enhanced Typhoon Analysis Platform")
1701
  gr.Markdown("Advanced ML clustering, CNN predictions, and comprehensive tropical cyclone analysis including Tropical Depressions")
@@ -1813,6 +1837,7 @@ def create_interface():
1813
  outputs=[intensity_output, confidence_output]
1814
  )
1815
 
 
1816
  gr.Markdown("""
1817
  ### 🧠 Prediction Features:
1818
  - **Environmental Analysis**: Considers ENSO, latitude, seasonality
 
4
  import pickle
5
  import threading
6
  import time
7
+ import warnings
8
  from datetime import datetime, timedelta
9
  from collections import defaultdict
10
  import csv
11
+
12
+ # Suppress warnings for cleaner output
13
+ warnings.filterwarnings('ignore', category=FutureWarning)
14
+ warnings.filterwarnings('ignore', category=UserWarning, module='umap')
15
+ warnings.filterwarnings('ignore', category=UserWarning, module='sklearn')
16
+
17
  import gradio as gr
18
  import pandas as pd
19
  import numpy as np
 
814
  X_scaled = scaler.fit_transform(X)
815
 
816
  if method.lower() == 'umap' and UMAP_AVAILABLE:
817
+ # UMAP parameters optimized for typhoon data - fixed warnings
818
  reducer = umap.UMAP(
819
  n_components=n_components,
820
  n_neighbors=15,
821
  min_dist=0.1,
822
  metric='euclidean',
823
+ random_state=42,
824
+ n_jobs=1 # Explicitly set to avoid warning
825
  )
826
  elif method.lower() == 'tsne':
827
  # t-SNE parameters
 
856
  def create_advanced_clustering_visualization(storm_features, typhoon_data, method='umap', show_routes=True):
857
  """Create comprehensive clustering visualization with route display"""
858
  try:
859
+ # Validate inputs
860
+ if storm_features is None or storm_features.empty:
861
+ raise ValueError("No storm features available for clustering")
862
+
863
+ if typhoon_data is None or typhoon_data.empty:
864
+ raise ValueError("No typhoon data available for route visualization")
865
+
866
  # Perform dimensionality reduction
867
  embedding, feature_cols, scaler = perform_dimensionality_reduction(storm_features, method)
868
 
869
  # Perform clustering
870
+ cluster_labels = cluster_storms(embedding, 'dbscan')
871
 
872
  # Add clustering results to storm features
873
  storm_features_viz = storm_features.copy()
874
+ storm_features_viz['cluster'] = cluster_labels
875
  storm_features_viz['dim1'] = embedding[:, 0]
876
  storm_features_viz['dim2'] = embedding[:, 1]
877
 
 
914
  '<extra></extra>'
915
  ),
916
  customdata=np.column_stack((
917
+ cluster_data['NAME'].fillna('UNNAMED'),
918
+ cluster_data['SEASON'].fillna(2000),
919
+ cluster_data['USA_WIND_max'].fillna(0),
920
+ cluster_data['USA_PRES_min'].fillna(1000),
921
+ cluster_data['track_length'].fillna(0)
922
  ))
923
  ),
924
  row=1, col=1
 
929
  if cluster == -1: # Skip noise for route visualization
930
  continue
931
 
932
+ cluster_storm_ids = storm_features_viz[storm_features_viz['cluster'] == cluster]['SID'].tolist()
933
  color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)]
934
 
935
+ for j, sid in enumerate(cluster_storm_ids[:10]): # Limit to 10 storms per cluster for performance
936
+ try:
937
+ storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME')
938
+ if len(storm_track) > 1:
939
+ storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED'
940
+
941
+ fig.add_trace(
942
+ go.Scattergeo(
943
+ lon=storm_track['LON'],
944
+ lat=storm_track['LAT'],
945
+ mode='lines+markers',
946
+ line=dict(color=color, width=2),
947
+ marker=dict(color=color, size=4),
948
+ name=f'C{cluster}: {storm_name}' if j == 0 else None,
949
+ showlegend=(j == 0),
950
+ hovertemplate=(
951
+ f'<b>{storm_name}</b><br>'
952
+ 'Lat: %{lat:.1f}°<br>'
953
+ 'Lon: %{lon:.1f}°<br>'
954
+ f'Cluster: {cluster}<br>'
955
+ '<extra></extra>'
956
+ )
957
+ ),
958
+ row=1, col=2
959
+ )
960
+ except Exception as track_error:
961
+ print(f"Error adding track for storm {sid}: {track_error}")
962
+ continue
963
 
964
  # Update layout
965
  fig.update_layout(
966
  title_text="Advanced Storm Clustering Analysis with Route Visualization",
 
967
  showlegend=True
968
  )
969
 
 
1001
  )
1002
 
1003
  # Generate detailed cluster statistics
1004
+ try:
1005
+ cluster_stats = storm_features_viz.groupby('cluster').agg({
1006
+ 'USA_WIND_max': ['mean', 'std', 'min', 'max'],
1007
+ 'USA_PRES_min': ['mean', 'std', 'min', 'max'],
1008
+ 'track_length': ['mean', 'std'],
1009
+ 'genesis_lat': 'mean',
1010
+ 'genesis_lon': 'mean',
1011
+ 'total_distance': 'mean',
1012
+ 'avg_curvature': 'mean',
1013
+ 'SID': 'count'
1014
+ }).round(2)
1015
+
1016
+ # Flatten column names for readability
1017
+ cluster_stats.columns = ['_'.join(col).strip() for col in cluster_stats.columns]
1018
+
1019
+ stats_text = "🌀 ADVANCED CLUSTER ANALYSIS RESULTS\n" + "="*50 + "\n\n"
1020
+
1021
+ for cluster in sorted(storm_features_viz['cluster'].unique()):
1022
+ if cluster == -1:
1023
+ stats_text += f"🔸 NOISE POINTS: {cluster_stats.loc[-1, 'SID_count']} storms\n\n"
1024
+ continue
1025
+
1026
+ cluster_row = cluster_stats.loc[cluster]
1027
+ storm_count = int(cluster_row['SID_count'])
1028
+
1029
+ stats_text += f"🌪️ CLUSTER {cluster}: {storm_count} storms\n"
1030
+ stats_text += f" Intensity: {cluster_row['USA_WIND_max_mean']:.1f} ± {cluster_row['USA_WIND_max_std']:.1f} kt\n"
1031
+ stats_text += f" Pressure: {cluster_row['USA_PRES_min_mean']:.1f} ± {cluster_row['USA_PRES_min_std']:.1f} hPa\n"
1032
+ stats_text += f" Track Length: {cluster_row['track_length_mean']:.1f} ± {cluster_row['track_length_std']:.1f} points\n"
1033
+ stats_text += f" Genesis Region: {cluster_row['genesis_lat']:.1f}°N, {cluster_row['genesis_lon']:.1f}°E\n"
1034
+ stats_text += f" Avg Distance: {cluster_row['total_distance_mean']:.2f} degrees\n"
1035
+ stats_text += f" Avg Curvature: {cluster_row['avg_curvature_mean']:.3f} radians\n\n"
1036
+
1037
+ # Add feature importance summary
1038
+ stats_text += "📊 CLUSTERING FEATURES USED:\n"
1039
+ stats_text += f" • Storm intensity (max/mean/std wind & pressure)\n"
1040
+ stats_text += f" • Track characteristics (length, curvature, distance)\n"
1041
+ stats_text += f" • Genesis location (lat/lon)\n"
1042
+ stats_text += f" • Geographic range (lat/lon span)\n"
1043
+ stats_text += f" • Total features: {len(feature_cols)}\n\n"
1044
 
1045
+ stats_text += f"🎯 ALGORITHM: {method.upper()} + DBSCAN clustering\n"
1046
+ stats_text += f"📈 CLUSTERS FOUND: {len([c for c in storm_features_viz['cluster'].unique() if c != -1])}\n"
1047
 
1048
+ except Exception as stats_error:
1049
+ stats_text = f"Error generating cluster statistics: {str(stats_error)}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1050
 
1051
  return fig, stats_text, storm_features_viz
1052
 
 
1718
  total_storms = 0
1719
  total_records = 0
1720
  year_range_display = "Unknown"
1721
+ available_years = [str(year) for year in range(2000, 2026)]
1722
+
1723
  with gr.Blocks(title="Enhanced Typhoon Analysis Platform", theme=gr.themes.Soft()) as demo:
1724
  gr.Markdown("# 🌪️ Enhanced Typhoon Analysis Platform")
1725
  gr.Markdown("Advanced ML clustering, CNN predictions, and comprehensive tropical cyclone analysis including Tropical Depressions")
 
1837
  outputs=[intensity_output, confidence_output]
1838
  )
1839
 
1840
+ gr.Markdown("""
1841
  gr.Markdown("""
1842
  ### 🧠 Prediction Features:
1843
  - **Environmental Analysis**: Considers ENSO, latitude, seasonality