Spaces:
Running
Running
Update app.py
Browse files
app.py
CHANGED
@@ -4,9 +4,16 @@ import logging
|
|
4 |
import pickle
|
5 |
import threading
|
6 |
import time
|
|
|
7 |
from datetime import datetime, timedelta
|
8 |
from collections import defaultdict
|
9 |
import csv
|
|
|
|
|
|
|
|
|
|
|
|
|
10 |
import gradio as gr
|
11 |
import pandas as pd
|
12 |
import numpy as np
|
@@ -807,13 +814,14 @@ def perform_dimensionality_reduction(storm_features, method='umap', n_components
|
|
807 |
X_scaled = scaler.fit_transform(X)
|
808 |
|
809 |
if method.lower() == 'umap' and UMAP_AVAILABLE:
|
810 |
-
# UMAP parameters optimized for typhoon data
|
811 |
reducer = umap.UMAP(
|
812 |
n_components=n_components,
|
813 |
n_neighbors=15,
|
814 |
min_dist=0.1,
|
815 |
metric='euclidean',
|
816 |
-
random_state=42
|
|
|
817 |
)
|
818 |
elif method.lower() == 'tsne':
|
819 |
# t-SNE parameters
|
@@ -848,15 +856,22 @@ def cluster_storms(embedding, method='dbscan', eps=0.5, min_samples=3):
|
|
848 |
def create_advanced_clustering_visualization(storm_features, typhoon_data, method='umap', show_routes=True):
|
849 |
"""Create comprehensive clustering visualization with route display"""
|
850 |
try:
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
851 |
# Perform dimensionality reduction
|
852 |
embedding, feature_cols, scaler = perform_dimensionality_reduction(storm_features, method)
|
853 |
|
854 |
# Perform clustering
|
855 |
-
|
856 |
|
857 |
# Add clustering results to storm features
|
858 |
storm_features_viz = storm_features.copy()
|
859 |
-
storm_features_viz['cluster'] =
|
860 |
storm_features_viz['dim1'] = embedding[:, 0]
|
861 |
storm_features_viz['dim2'] = embedding[:, 1]
|
862 |
|
@@ -899,11 +914,11 @@ def create_advanced_clustering_visualization(storm_features, typhoon_data, metho
|
|
899 |
'<extra></extra>'
|
900 |
),
|
901 |
customdata=np.column_stack((
|
902 |
-
cluster_data['NAME'],
|
903 |
-
cluster_data['SEASON'],
|
904 |
-
cluster_data['USA_WIND_max'],
|
905 |
-
cluster_data['USA_PRES_min'],
|
906 |
-
cluster_data['track_length']
|
907 |
))
|
908 |
),
|
909 |
row=1, col=1
|
@@ -914,38 +929,41 @@ def create_advanced_clustering_visualization(storm_features, typhoon_data, metho
|
|
914 |
if cluster == -1: # Skip noise for route visualization
|
915 |
continue
|
916 |
|
917 |
-
|
918 |
color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)]
|
919 |
|
920 |
-
for j, sid in enumerate(
|
921 |
-
|
922 |
-
|
923 |
-
|
924 |
-
|
925 |
-
|
926 |
-
|
927 |
-
|
928 |
-
|
929 |
-
|
930 |
-
|
931 |
-
|
932 |
-
|
933 |
-
|
934 |
-
|
935 |
-
|
936 |
-
|
937 |
-
|
938 |
-
|
939 |
-
|
940 |
-
|
941 |
-
|
942 |
-
|
943 |
-
|
|
|
|
|
|
|
|
|
944 |
|
945 |
# Update layout
|
946 |
fig.update_layout(
|
947 |
title_text="Advanced Storm Clustering Analysis with Route Visualization",
|
948 |
-
height=600,
|
949 |
showlegend=True
|
950 |
)
|
951 |
|
@@ -983,48 +1001,52 @@ def create_advanced_clustering_visualization(storm_features, typhoon_data, metho
|
|
983 |
)
|
984 |
|
985 |
# Generate detailed cluster statistics
|
986 |
-
|
987 |
-
|
988 |
-
|
989 |
-
|
990 |
-
|
991 |
-
|
992 |
-
|
993 |
-
|
994 |
-
|
995 |
-
|
996 |
-
|
997 |
-
|
998 |
-
|
999 |
-
|
1000 |
-
|
1001 |
-
|
1002 |
-
|
1003 |
-
|
1004 |
-
|
1005 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1006 |
|
1007 |
-
|
1008 |
-
|
1009 |
|
1010 |
-
|
1011 |
-
stats_text
|
1012 |
-
stats_text += f" Pressure: {cluster_row['USA_PRES_min_mean']:.1f} ± {cluster_row['USA_PRES_min_std']:.1f} hPa\n"
|
1013 |
-
stats_text += f" Track Length: {cluster_row['track_length_mean']:.1f} ± {cluster_row['track_length_std']:.1f} points\n"
|
1014 |
-
stats_text += f" Genesis Region: {cluster_row['genesis_lat']:.1f}°N, {cluster_row['genesis_lon']:.1f}°E\n"
|
1015 |
-
stats_text += f" Avg Distance: {cluster_row['total_distance_mean']:.2f} degrees\n"
|
1016 |
-
stats_text += f" Avg Curvature: {cluster_row['avg_curvature_mean']:.3f} radians\n\n"
|
1017 |
-
|
1018 |
-
# Add feature importance summary
|
1019 |
-
stats_text += "📊 CLUSTERING FEATURES USED:\n"
|
1020 |
-
stats_text += f" • Storm intensity (max/mean/std wind & pressure)\n"
|
1021 |
-
stats_text += f" • Track characteristics (length, curvature, distance)\n"
|
1022 |
-
stats_text += f" • Genesis location (lat/lon)\n"
|
1023 |
-
stats_text += f" • Geographic range (lat/lon span)\n"
|
1024 |
-
stats_text += f" • Total features: {len(feature_cols)}\n\n"
|
1025 |
-
|
1026 |
-
stats_text += f"🎯 ALGORITHM: {method.upper()} + DBSCAN clustering\n"
|
1027 |
-
stats_text += f"📈 CLUSTERS FOUND: {len([c for c in storm_features_viz['cluster'].unique() if c != -1])}\n"
|
1028 |
|
1029 |
return fig, stats_text, storm_features_viz
|
1030 |
|
@@ -1696,6 +1718,8 @@ def create_interface():
|
|
1696 |
total_storms = 0
|
1697 |
total_records = 0
|
1698 |
year_range_display = "Unknown"
|
|
|
|
|
1699 |
with gr.Blocks(title="Enhanced Typhoon Analysis Platform", theme=gr.themes.Soft()) as demo:
|
1700 |
gr.Markdown("# 🌪️ Enhanced Typhoon Analysis Platform")
|
1701 |
gr.Markdown("Advanced ML clustering, CNN predictions, and comprehensive tropical cyclone analysis including Tropical Depressions")
|
@@ -1813,6 +1837,7 @@ def create_interface():
|
|
1813 |
outputs=[intensity_output, confidence_output]
|
1814 |
)
|
1815 |
|
|
|
1816 |
gr.Markdown("""
|
1817 |
### 🧠 Prediction Features:
|
1818 |
- **Environmental Analysis**: Considers ENSO, latitude, seasonality
|
|
|
4 |
import pickle
|
5 |
import threading
|
6 |
import time
|
7 |
+
import warnings
|
8 |
from datetime import datetime, timedelta
|
9 |
from collections import defaultdict
|
10 |
import csv
|
11 |
+
|
12 |
+
# Suppress warnings for cleaner output
|
13 |
+
warnings.filterwarnings('ignore', category=FutureWarning)
|
14 |
+
warnings.filterwarnings('ignore', category=UserWarning, module='umap')
|
15 |
+
warnings.filterwarnings('ignore', category=UserWarning, module='sklearn')
|
16 |
+
|
17 |
import gradio as gr
|
18 |
import pandas as pd
|
19 |
import numpy as np
|
|
|
814 |
X_scaled = scaler.fit_transform(X)
|
815 |
|
816 |
if method.lower() == 'umap' and UMAP_AVAILABLE:
|
817 |
+
# UMAP parameters optimized for typhoon data - fixed warnings
|
818 |
reducer = umap.UMAP(
|
819 |
n_components=n_components,
|
820 |
n_neighbors=15,
|
821 |
min_dist=0.1,
|
822 |
metric='euclidean',
|
823 |
+
random_state=42,
|
824 |
+
n_jobs=1 # Explicitly set to avoid warning
|
825 |
)
|
826 |
elif method.lower() == 'tsne':
|
827 |
# t-SNE parameters
|
|
|
856 |
def create_advanced_clustering_visualization(storm_features, typhoon_data, method='umap', show_routes=True):
|
857 |
"""Create comprehensive clustering visualization with route display"""
|
858 |
try:
|
859 |
+
# Validate inputs
|
860 |
+
if storm_features is None or storm_features.empty:
|
861 |
+
raise ValueError("No storm features available for clustering")
|
862 |
+
|
863 |
+
if typhoon_data is None or typhoon_data.empty:
|
864 |
+
raise ValueError("No typhoon data available for route visualization")
|
865 |
+
|
866 |
# Perform dimensionality reduction
|
867 |
embedding, feature_cols, scaler = perform_dimensionality_reduction(storm_features, method)
|
868 |
|
869 |
# Perform clustering
|
870 |
+
cluster_labels = cluster_storms(embedding, 'dbscan')
|
871 |
|
872 |
# Add clustering results to storm features
|
873 |
storm_features_viz = storm_features.copy()
|
874 |
+
storm_features_viz['cluster'] = cluster_labels
|
875 |
storm_features_viz['dim1'] = embedding[:, 0]
|
876 |
storm_features_viz['dim2'] = embedding[:, 1]
|
877 |
|
|
|
914 |
'<extra></extra>'
|
915 |
),
|
916 |
customdata=np.column_stack((
|
917 |
+
cluster_data['NAME'].fillna('UNNAMED'),
|
918 |
+
cluster_data['SEASON'].fillna(2000),
|
919 |
+
cluster_data['USA_WIND_max'].fillna(0),
|
920 |
+
cluster_data['USA_PRES_min'].fillna(1000),
|
921 |
+
cluster_data['track_length'].fillna(0)
|
922 |
))
|
923 |
),
|
924 |
row=1, col=1
|
|
|
929 |
if cluster == -1: # Skip noise for route visualization
|
930 |
continue
|
931 |
|
932 |
+
cluster_storm_ids = storm_features_viz[storm_features_viz['cluster'] == cluster]['SID'].tolist()
|
933 |
color = CLUSTER_COLORS[i % len(CLUSTER_COLORS)]
|
934 |
|
935 |
+
for j, sid in enumerate(cluster_storm_ids[:10]): # Limit to 10 storms per cluster for performance
|
936 |
+
try:
|
937 |
+
storm_track = typhoon_data[typhoon_data['SID'] == sid].sort_values('ISO_TIME')
|
938 |
+
if len(storm_track) > 1:
|
939 |
+
storm_name = storm_track['NAME'].iloc[0] if pd.notna(storm_track['NAME'].iloc[0]) else 'UNNAMED'
|
940 |
+
|
941 |
+
fig.add_trace(
|
942 |
+
go.Scattergeo(
|
943 |
+
lon=storm_track['LON'],
|
944 |
+
lat=storm_track['LAT'],
|
945 |
+
mode='lines+markers',
|
946 |
+
line=dict(color=color, width=2),
|
947 |
+
marker=dict(color=color, size=4),
|
948 |
+
name=f'C{cluster}: {storm_name}' if j == 0 else None,
|
949 |
+
showlegend=(j == 0),
|
950 |
+
hovertemplate=(
|
951 |
+
f'<b>{storm_name}</b><br>'
|
952 |
+
'Lat: %{lat:.1f}°<br>'
|
953 |
+
'Lon: %{lon:.1f}°<br>'
|
954 |
+
f'Cluster: {cluster}<br>'
|
955 |
+
'<extra></extra>'
|
956 |
+
)
|
957 |
+
),
|
958 |
+
row=1, col=2
|
959 |
+
)
|
960 |
+
except Exception as track_error:
|
961 |
+
print(f"Error adding track for storm {sid}: {track_error}")
|
962 |
+
continue
|
963 |
|
964 |
# Update layout
|
965 |
fig.update_layout(
|
966 |
title_text="Advanced Storm Clustering Analysis with Route Visualization",
|
|
|
967 |
showlegend=True
|
968 |
)
|
969 |
|
|
|
1001 |
)
|
1002 |
|
1003 |
# Generate detailed cluster statistics
|
1004 |
+
try:
|
1005 |
+
cluster_stats = storm_features_viz.groupby('cluster').agg({
|
1006 |
+
'USA_WIND_max': ['mean', 'std', 'min', 'max'],
|
1007 |
+
'USA_PRES_min': ['mean', 'std', 'min', 'max'],
|
1008 |
+
'track_length': ['mean', 'std'],
|
1009 |
+
'genesis_lat': 'mean',
|
1010 |
+
'genesis_lon': 'mean',
|
1011 |
+
'total_distance': 'mean',
|
1012 |
+
'avg_curvature': 'mean',
|
1013 |
+
'SID': 'count'
|
1014 |
+
}).round(2)
|
1015 |
+
|
1016 |
+
# Flatten column names for readability
|
1017 |
+
cluster_stats.columns = ['_'.join(col).strip() for col in cluster_stats.columns]
|
1018 |
+
|
1019 |
+
stats_text = "🌀 ADVANCED CLUSTER ANALYSIS RESULTS\n" + "="*50 + "\n\n"
|
1020 |
+
|
1021 |
+
for cluster in sorted(storm_features_viz['cluster'].unique()):
|
1022 |
+
if cluster == -1:
|
1023 |
+
stats_text += f"🔸 NOISE POINTS: {cluster_stats.loc[-1, 'SID_count']} storms\n\n"
|
1024 |
+
continue
|
1025 |
+
|
1026 |
+
cluster_row = cluster_stats.loc[cluster]
|
1027 |
+
storm_count = int(cluster_row['SID_count'])
|
1028 |
+
|
1029 |
+
stats_text += f"🌪️ CLUSTER {cluster}: {storm_count} storms\n"
|
1030 |
+
stats_text += f" Intensity: {cluster_row['USA_WIND_max_mean']:.1f} ± {cluster_row['USA_WIND_max_std']:.1f} kt\n"
|
1031 |
+
stats_text += f" Pressure: {cluster_row['USA_PRES_min_mean']:.1f} ± {cluster_row['USA_PRES_min_std']:.1f} hPa\n"
|
1032 |
+
stats_text += f" Track Length: {cluster_row['track_length_mean']:.1f} ± {cluster_row['track_length_std']:.1f} points\n"
|
1033 |
+
stats_text += f" Genesis Region: {cluster_row['genesis_lat']:.1f}°N, {cluster_row['genesis_lon']:.1f}°E\n"
|
1034 |
+
stats_text += f" Avg Distance: {cluster_row['total_distance_mean']:.2f} degrees\n"
|
1035 |
+
stats_text += f" Avg Curvature: {cluster_row['avg_curvature_mean']:.3f} radians\n\n"
|
1036 |
+
|
1037 |
+
# Add feature importance summary
|
1038 |
+
stats_text += "📊 CLUSTERING FEATURES USED:\n"
|
1039 |
+
stats_text += f" • Storm intensity (max/mean/std wind & pressure)\n"
|
1040 |
+
stats_text += f" • Track characteristics (length, curvature, distance)\n"
|
1041 |
+
stats_text += f" • Genesis location (lat/lon)\n"
|
1042 |
+
stats_text += f" • Geographic range (lat/lon span)\n"
|
1043 |
+
stats_text += f" • Total features: {len(feature_cols)}\n\n"
|
1044 |
|
1045 |
+
stats_text += f"🎯 ALGORITHM: {method.upper()} + DBSCAN clustering\n"
|
1046 |
+
stats_text += f"📈 CLUSTERS FOUND: {len([c for c in storm_features_viz['cluster'].unique() if c != -1])}\n"
|
1047 |
|
1048 |
+
except Exception as stats_error:
|
1049 |
+
stats_text = f"Error generating cluster statistics: {str(stats_error)}"
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1050 |
|
1051 |
return fig, stats_text, storm_features_viz
|
1052 |
|
|
|
1718 |
total_storms = 0
|
1719 |
total_records = 0
|
1720 |
year_range_display = "Unknown"
|
1721 |
+
available_years = [str(year) for year in range(2000, 2026)]
|
1722 |
+
|
1723 |
with gr.Blocks(title="Enhanced Typhoon Analysis Platform", theme=gr.themes.Soft()) as demo:
|
1724 |
gr.Markdown("# 🌪️ Enhanced Typhoon Analysis Platform")
|
1725 |
gr.Markdown("Advanced ML clustering, CNN predictions, and comprehensive tropical cyclone analysis including Tropical Depressions")
|
|
|
1837 |
outputs=[intensity_output, confidence_output]
|
1838 |
)
|
1839 |
|
1840 |
+
gr.Markdown("""
|
1841 |
gr.Markdown("""
|
1842 |
### 🧠 Prediction Features:
|
1843 |
- **Environmental Analysis**: Considers ENSO, latitude, seasonality
|