euler314 commited on
Commit
e2d999e
·
verified ·
1 Parent(s): 76ee46f

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +102 -0
app.py CHANGED
@@ -1,3 +1,77 @@
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1
  import os
2
  import argparse
3
  import logging
@@ -10,6 +84,13 @@ import csv
10
  import gradio as gr
11
  import pandas as pd
12
  import numpy as np
 
 
 
 
 
 
 
13
  import matplotlib.pyplot as plt
14
  import matplotlib.animation as animation
15
  from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
@@ -1162,6 +1243,27 @@ def create_interface():
1162
  outputs=[regression_plot, slopes_text, lon_regression_results]
1163
  )
1164
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1165
  with gr.Tab("Tropical Cyclone Path Animation"):
1166
  with gr.Row():
1167
  year_dropdown = gr.Dropdown(label="Year", choices=[str(y) for y in range(1950, 2025)], value="2000")
 
1
+
2
+ # ------------------------------------------------------------
3
+ # NEW — Dimensional‑reduction / clustering feature
4
+ # ------------------------------------------------------------
5
+ def dim_reduction(method='t-SNE', perplexity=30, n_neighbors=15, min_dist=0.1,
6
+ use_cnn=False, cluster_eps=0.5, cluster_min_samples=5):
7
+ """
8
+ Compute 2‑D embedding (t‑SNE or UMAP) of per‑storm features.
9
+ Returns a Plotly Figure and an info string.
10
+ """
11
+ global typhoon_max, typhoon_data
12
+ if typhoon_max is None:
13
+ return None, "No typhoon data loaded."
14
+
15
+ df = typhoon_max.copy()
16
+
17
+ # Select features
18
+ if use_cnn and TF_AVAILABLE:
19
+ MAX_LEN = 100
20
+ grouped = typhoon_data.groupby('SID')
21
+ sequences = []
22
+ for sid in df['SID']:
23
+ winds = grouped.get_group(sid)['USA_WIND'].fillna(0).values if sid in grouped.groups else []
24
+ winds = winds[:MAX_LEN]
25
+ seq = np.zeros(MAX_LEN, dtype=np.float32)
26
+ seq[:len(winds)] = winds
27
+ sequences.append(seq)
28
+ X = np.stack(sequences)[..., None]
29
+
30
+ model = models.Sequential([
31
+ layers.Conv1D(16, 5, activation='relu', input_shape=(MAX_LEN,1)),
32
+ layers.MaxPool1D(2),
33
+ layers.Conv1D(32, 3, activation='relu'),
34
+ layers.GlobalAveragePooling1D(),
35
+ layers.Dense(32, activation='relu')
36
+ ])
37
+ embeddings = model.predict(X, verbose=0)
38
+ features_for_dr = embeddings
39
+ feature_text = "CNN latent vectors"
40
+ else:
41
+ features = ['USA_WIND','USA_PRES','LAT','LON']
42
+ features_for_dr = df[features].fillna(0).values
43
+ feature_text = ", ".join(features)
44
+
45
+ from sklearn.preprocessing import StandardScaler
46
+ X_std = StandardScaler().fit_transform(features_for_dr)
47
+
48
+ if method == 't-SNE':
49
+ from sklearn.manifold import TSNE
50
+ reducer = TSNE(n_components=2, init='pca', perplexity=perplexity, learning_rate='auto', random_state=0)
51
+ coords = reducer.fit_transform(X_std)
52
+ else:
53
+ reducer = umap.UMAP(n_neighbors=n_neighbors, min_dist=min_dist, n_components=2, random_state=0)
54
+ coords = reducer.fit_transform(X_std)
55
+
56
+ df['DR_X'] = coords[:,0]
57
+ df['DR_Y'] = coords[:,1]
58
+
59
+ from sklearn.cluster import DBSCAN
60
+ clusterer = DBSCAN(eps=cluster_eps, min_samples=cluster_min_samples)
61
+ labels = clusterer.fit_predict(coords)
62
+ df['Cluster'] = labels.astype(str)
63
+
64
+ import plotly.express as px
65
+ fig = px.scatter(
66
+ df, x='DR_X', y='DR_Y',
67
+ color='Category', symbol='Cluster',
68
+ hover_data={'Name': df['NAME'], 'Year': df['Year'], 'Wind': df['USA_WIND'], 'Pressure': df['USA_PRES']},
69
+ title=f"{method} embedding (features: {feature_text})"
70
+ )
71
+ fig.update_traces(marker=dict(size=6, line=dict(width=0.5,color='black')))
72
+ info=f"Method: {method} | Points: {len(df)} | Clusters: {len(set(labels))-('-1' in set(labels))}"
73
+ return fig, info
74
+
75
  import os
76
  import argparse
77
  import logging
 
84
  import gradio as gr
85
  import pandas as pd
86
  import numpy as np
87
+ import umap # NEW
88
+ try:
89
+ import tensorflow as tf
90
+ from tensorflow.keras import layers, models
91
+ TF_AVAILABLE = True
92
+ except Exception:
93
+ TF_AVAILABLE = False
94
  import matplotlib.pyplot as plt
95
  import matplotlib.animation as animation
96
  from matplotlib.backends.backend_agg import FigureCanvasAgg as FigureCanvas
 
1243
  outputs=[regression_plot, slopes_text, lon_regression_results]
1244
  )
1245
 
1246
+
1247
+ # ---- Dimensional Reduction (t‑SNE / UMAP) ----
1248
+ with gr.Tab("Dimensional Reduction (t‑SNE / UMAP)"):
1249
+ dr_method = gr.Dropdown(['t-SNE','UMAP'], label="Method", value='t-SNE')
1250
+ tsne_perp = gr.Slider(5,100,step=5,value=30,label="t‑SNE Perplexity")
1251
+ umap_nn = gr.Slider(5,100,step=1,value=15,label="UMAP n_neighbors")
1252
+ umap_md = gr.Slider(0.0,1.0,step=0.05,value=0.1,label="UMAP min_dist")
1253
+ use_cnn_box = gr.Checkbox(label="Use CNN latent features", value=False, visible=TF_AVAILABLE)
1254
+ cluster_eps = gr.Slider(0.1,5.0,step=0.1,value=0.5,label="DBSCAN ε")
1255
+ cluster_min = gr.Slider(2,20,step=1,value=5,label="DBSCAN min_samples")
1256
+ dr_btn = gr.Button("Compute Embedding")
1257
+ dr_plot = gr.Plot()
1258
+ dr_info = gr.Textbox(label="Info")
1259
+ def _dr_wrapper(method, perp, nn, md, cnn, eps, mns):
1260
+ return dim_reduction(method, perp, nn, md, cnn, eps, mns)
1261
+ dr_btn.click(
1262
+ fn=_dr_wrapper,
1263
+ inputs=[dr_method, tsne_perp, umap_nn, umap_md, use_cnn_box, cluster_eps, cluster_min],
1264
+ outputs=[dr_plot, dr_info]
1265
+ )
1266
+
1267
  with gr.Tab("Tropical Cyclone Path Animation"):
1268
  with gr.Row():
1269
  year_dropdown = gr.Dropdown(label="Year", choices=[str(y) for y in range(1950, 2025)], value="2000")