euler314 commited on
Commit
473c7a8
·
verified ·
1 Parent(s): f77bb4b

Update app.py

Browse files
Files changed (1) hide show
  1. app.py +574 -221
app.py CHANGED
@@ -48,21 +48,37 @@ logging.basicConfig(
48
  parser = argparse.ArgumentParser(description='Typhoon Analysis Dashboard')
49
  parser.add_argument('--data_path', type=str, default=os.getcwd(), help='Path to the data directory')
50
  args = parser.parse_args()
51
- DATA_PATH = args.data_path
52
 
53
- # Update data paths for Huggingface Spaces
54
- TEMP_DIR = tempfile.gettempdir()
55
- DATA_PATH = os.environ.get('DATA_PATH', TEMP_DIR)
 
 
 
 
 
 
56
 
57
- # Ensure directory exists
58
- os.makedirs(DATA_PATH, exist_ok=True)
 
 
 
 
 
 
 
 
 
 
 
59
 
60
- # Update your file paths
61
  ONI_DATA_PATH = os.path.join(DATA_PATH, 'oni_data.csv')
62
  TYPHOON_DATA_PATH = os.path.join(DATA_PATH, 'processed_typhoon_data.csv')
63
  MERGED_DATA_CSV = os.path.join(DATA_PATH, 'merged_typhoon_era5_data.csv')
64
 
65
- # IBTrACS settings (for typhoon options)
66
  BASIN_FILES = {
67
  'EP': 'ibtracs.EP.list.v04r01.csv',
68
  'NA': 'ibtracs.NA.list.v04r01.csv',
@@ -118,62 +134,295 @@ regions = {
118
  "Philippines": {"lat_min": 5, "lat_max": 21, "lon_min": 115, "lon_max": 130}
119
  }
120
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
121
  # -----------------------------
122
  # ONI and Typhoon Data Functions
123
  # -----------------------------
 
124
  def download_oni_file(url, filename):
125
- response = requests.get(url)
126
- response.raise_for_status()
127
- with open(filename, 'wb') as f:
128
- f.write(response.content)
129
- return True
 
 
 
 
 
 
 
 
 
 
 
130
 
131
  def convert_oni_ascii_to_csv(input_file, output_file):
 
132
  data = defaultdict(lambda: [''] * 12)
133
  season_to_month = {'DJF':12, 'JFM':1, 'FMA':2, 'MAM':3, 'AMJ':4, 'MJJ':5,
134
  'JJA':6, 'JAS':7, 'ASO':8, 'SON':9, 'OND':10, 'NDJ':11}
135
- with open(input_file, 'r') as f:
136
- lines = f.readlines()[1:]
137
- for line in lines:
138
- parts = line.split()
139
- if len(parts) >= 4:
140
- season, year, anom = parts[0], parts[1], parts[-1]
141
- if season in season_to_month:
142
- month = season_to_month[season]
143
- if season == 'DJF':
144
- year = str(int(year)-1)
145
- data[year][month-1] = anom
146
- with open(output_file, 'w', newline='') as f:
147
- writer = csv.writer(f)
148
- writer.writerow(['Year','Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec'])
149
- for year in sorted(data.keys()):
150
- writer.writerow([year] + data[year])
 
 
 
 
 
 
 
 
151
 
152
  def update_oni_data():
 
153
  url = "https://www.cpc.ncep.noaa.gov/data/indices/oni.ascii.txt"
154
  temp_file = os.path.join(DATA_PATH, "temp_oni.ascii.txt")
155
  input_file = os.path.join(DATA_PATH, "oni.ascii.txt")
156
  output_file = ONI_DATA_PATH
157
- if download_oni_file(url, temp_file):
158
- if not os.path.exists(input_file) or not os.path.exists(output_file):
159
- os.replace(temp_file, input_file)
160
- convert_oni_ascii_to_csv(input_file, output_file)
 
 
 
 
161
  else:
162
- os.remove(temp_file)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
163
 
164
- def load_data(oni_path, typhoon_path):
165
- # Create default empty DataFrames with minimum structure
 
166
  oni_data = pd.DataFrame({'Year': [], 'Jan': [], 'Feb': [], 'Mar': [], 'Apr': [],
167
  'May': [], 'Jun': [], 'Jul': [], 'Aug': [], 'Sep': [],
168
  'Oct': [], 'Nov': [], 'Dec': []})
169
 
170
- # Try to load ONI data or create it
171
  if not os.path.exists(oni_path):
172
  logging.warning(f"ONI data file not found: {oni_path}")
173
  update_oni_data()
174
 
175
  try:
176
  oni_data = pd.read_csv(oni_path)
 
177
  except Exception as e:
178
  logging.error(f"Error loading ONI data: {e}")
179
  update_oni_data()
@@ -182,56 +431,127 @@ def load_data(oni_path, typhoon_path):
182
  except Exception as e:
183
  logging.error(f"Still can't load ONI data: {e}")
184
 
185
- # For typhoon data, focus on getting WP data
 
 
 
186
  if os.path.exists(typhoon_path):
187
  try:
188
  typhoon_data = pd.read_csv(typhoon_path, low_memory=False)
189
- typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
190
- typhoon_data = typhoon_data.dropna(subset=['ISO_TIME'])
191
- # Log WP data count
192
- wp_count = len(typhoon_data[typhoon_data['SID'].str.startswith('WP')])
193
- logging.info(f"Loaded {wp_count} Western Pacific typhoon records")
194
- except Exception as e:
195
- logging.error(f"Error loading typhoon data: {e}")
196
- typhoon_data = pd.DataFrame()
197
- else:
198
- logging.error(f"Typhoon data file not found: {typhoon_path}")
199
- # Download WP typhoon data directly from IBTrACS if available
200
- try:
201
- if LOCAL_IBTRACS_PATH and os.path.exists(LOCAL_IBTRACS_PATH):
202
- logging.info("Loading WP data from local IBTrACS file")
203
- wp_data = pd.read_csv(LOCAL_IBTRACS_PATH, low_memory=False)
204
- typhoon_data = wp_data
205
- logging.info(f"Loaded {len(typhoon_data)} WP records from IBTrACS")
206
  else:
207
- # Try to download WP file if not exists
208
- logging.info("Downloading WP basin file...")
209
- response = requests.get(IBTRACS_BASE_URL + BASIN_FILES['WP'])
210
- if response.status_code == 200:
211
- os.makedirs(os.path.dirname(LOCAL_IBTRACS_PATH), exist_ok=True)
212
- with open(LOCAL_IBTRACS_PATH, 'wb') as f:
213
- f.write(response.content)
214
- wp_data = pd.read_csv(LOCAL_IBTRACS_PATH, low_memory=False)
215
- typhoon_data = wp_data
216
- logging.info(f"Downloaded and loaded {len(typhoon_data)} WP records")
217
  except Exception as e:
218
- logging.error(f"Failed to load or download WP data: {e}")
219
- # Create minimal WP sample data to prevent crashes
220
- typhoon_data = pd.DataFrame({
221
- 'SID': ['WP012000', 'WP022000', 'WP032000'],
222
- 'ISO_TIME': [pd.Timestamp('2000-01-01'), pd.Timestamp('2000-02-01'), pd.Timestamp('2000-03-01')],
223
- 'NAME': ['SAMPLE_WP1', 'SAMPLE_WP2', 'SAMPLE_WP3'],
224
- 'SEASON': [2000, 2000, 2000],
225
- 'LAT': [20.0, 21.0, 22.0],
226
- 'LON': [140.0, 141.0, 142.0],
227
- 'USA_WIND': [50.0, 60.0, 70.0],
228
- 'USA_PRES': [990.0, 980.0, 970.0]
229
- })
230
- logging.warning("Created minimal Western Pacific sample data to prevent crashes")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
231
 
232
  return oni_data, typhoon_data
233
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
234
  def process_oni_data(oni_data):
 
235
  oni_long = oni_data.melt(id_vars=['Year'], var_name='Month', value_name='ONI')
236
  month_map = {'Jan':'01','Feb':'02','Mar':'03','Apr':'04','May':'05','Jun':'06',
237
  'Jul':'07','Aug':'08','Sep':'09','Oct':'10','Nov':'11','Dec':'12'}
@@ -241,24 +561,30 @@ def process_oni_data(oni_data):
241
  return oni_long
242
 
243
  def process_typhoon_data(typhoon_data):
 
244
  typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
245
  typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce')
246
  typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce')
247
  typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce')
 
248
  logging.info(f"Unique basins in typhoon_data: {typhoon_data['SID'].str[:2].unique()}")
 
249
  typhoon_max = typhoon_data.groupby('SID').agg({
250
  'USA_WIND':'max','USA_PRES':'min','ISO_TIME':'first','SEASON':'first','NAME':'first',
251
  'LAT':'first','LON':'first'
252
  }).reset_index()
 
253
  typhoon_max['Month'] = typhoon_max['ISO_TIME'].dt.strftime('%m')
254
  typhoon_max['Year'] = typhoon_max['ISO_TIME'].dt.year
255
  typhoon_max['Category'] = typhoon_max['USA_WIND'].apply(categorize_typhoon)
256
  return typhoon_max
257
 
258
  def merge_data(oni_long, typhoon_max):
 
259
  return pd.merge(typhoon_max, oni_long, on=['Year','Month'])
260
 
261
  def categorize_typhoon(wind_speed):
 
262
  if wind_speed >= 137:
263
  return 'C5 Super Typhoon'
264
  elif wind_speed >= 113:
@@ -275,6 +601,7 @@ def categorize_typhoon(wind_speed):
275
  return 'Tropical Depression'
276
 
277
  def classify_enso_phases(oni_value):
 
278
  if isinstance(oni_value, pd.Series):
279
  oni_value = oni_value.iloc[0]
280
  if oni_value >= 0.5:
@@ -287,84 +614,64 @@ def classify_enso_phases(oni_value):
287
  # -----------------------------
288
  # Regression Functions
289
  # -----------------------------
 
290
  def perform_wind_regression(start_year, start_month, end_year, end_month):
 
291
  start_date = datetime(start_year, start_month, 1)
292
  end_date = datetime(end_year, end_month, 28)
293
  data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['USA_WIND','ONI'])
294
  data['severe_typhoon'] = (data['USA_WIND']>=64).astype(int)
295
  X = sm.add_constant(data['ONI'])
296
  y = data['severe_typhoon']
297
- model = sm.Logit(y, X).fit(disp=0)
298
- beta_1 = model.params['ONI']
299
- exp_beta_1 = np.exp(beta_1)
300
- p_value = model.pvalues['ONI']
301
- return f"Wind Regression: β1={beta_1:.4f}, Odds Ratio={exp_beta_1:.4f}, P-value={p_value:.4f}"
 
 
 
302
 
303
  def perform_pressure_regression(start_year, start_month, end_year, end_month):
 
304
  start_date = datetime(start_year, start_month, 1)
305
  end_date = datetime(end_year, end_month, 28)
306
  data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['USA_PRES','ONI'])
307
  data['intense_typhoon'] = (data['USA_PRES']<=950).astype(int)
308
  X = sm.add_constant(data['ONI'])
309
  y = data['intense_typhoon']
310
- model = sm.Logit(y, X).fit(disp=0)
311
- beta_1 = model.params['ONI']
312
- exp_beta_1 = np.exp(beta_1)
313
- p_value = model.pvalues['ONI']
314
- return f"Pressure Regression: β1={beta_1:.4f}, Odds Ratio={exp_beta_1:.4f}, P-value={p_value:.4f}"
 
 
 
315
 
316
  def perform_longitude_regression(start_year, start_month, end_year, end_month):
 
317
  start_date = datetime(start_year, start_month, 1)
318
  end_date = datetime(end_year, end_month, 28)
319
  data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['LON','ONI'])
320
  data['western_typhoon'] = (data['LON']<=140).astype(int)
321
  X = sm.add_constant(data['ONI'])
322
  y = data['western_typhoon']
323
- model = sm.OLS(y, sm.add_constant(X)).fit()
324
- beta_1 = model.params['ONI']
325
- exp_beta_1 = np.exp(beta_1)
326
- p_value = model.pvalues['ONI']
327
- return f"Longitude Regression: β1={beta_1:.4f}, Odds Ratio={exp_beta_1:.4f}, P-value={p_value:.4f}"
328
-
329
- # -----------------------------
330
- # IBTrACS Data Loading
331
- # -----------------------------
332
- def load_ibtracs_data():
333
- ibtracs_data = {}
334
- for basin, filename in BASIN_FILES.items():
335
- local_path = os.path.join(DATA_PATH, filename)
336
- if not os.path.exists(local_path):
337
- logging.info(f"Downloading {basin} basin file...")
338
- response = requests.get(IBTRACS_BASE_URL+filename)
339
- response.raise_for_status()
340
- with open(local_path, 'wb') as f:
341
- f.write(response.content)
342
- logging.info(f"Downloaded {basin} basin file.")
343
- try:
344
- logging.info(f"--> Starting to read in IBTrACS data for basin {basin}")
345
- ds = tracks.TrackDataset(source='ibtracs', ibtracs_url=local_path)
346
- logging.info(f"--> Completed reading in IBTrACS data for basin {basin}")
347
- ibtracs_data[basin] = ds
348
- except ValueError as e:
349
- logging.warning(f"Skipping basin {basin} due to error: {e}")
350
- ibtracs_data[basin] = None
351
- return ibtracs_data
352
-
353
- ibtracs = load_ibtracs_data()
354
-
355
- # -----------------------------
356
- # Load & Process Data
357
- # -----------------------------
358
- update_oni_data()
359
- oni_data, typhoon_data = load_data(ONI_DATA_PATH, TYPHOON_DATA_PATH)
360
- oni_long = process_oni_data(oni_data)
361
- typhoon_max = process_typhoon_data(typhoon_data)
362
- merged_data = merge_data(oni_long, typhoon_max)
363
 
364
  # -----------------------------
365
  # Visualization Functions
366
  # -----------------------------
 
367
  def generate_typhoon_tracks(filtered_data, typhoon_search):
 
368
  fig = go.Figure()
369
  for sid in filtered_data['SID'].unique():
370
  storm_data = filtered_data[filtered_data['SID'] == sid]
@@ -390,6 +697,7 @@ def generate_typhoon_tracks(filtered_data, typhoon_search):
390
  return fig
391
 
392
  def generate_wind_oni_scatter(filtered_data, typhoon_search):
 
393
  fig = px.scatter(filtered_data, x='ONI', y='USA_WIND', color='Category',
394
  hover_data=['NAME','Year','Category'],
395
  title='Wind Speed vs ONI',
@@ -407,6 +715,7 @@ def generate_wind_oni_scatter(filtered_data, typhoon_search):
407
  return fig
408
 
409
  def generate_pressure_oni_scatter(filtered_data, typhoon_search):
 
410
  fig = px.scatter(filtered_data, x='ONI', y='USA_PRES', color='Category',
411
  hover_data=['NAME','Year','Category'],
412
  title='Pressure vs ONI',
@@ -424,21 +733,26 @@ def generate_pressure_oni_scatter(filtered_data, typhoon_search):
424
  return fig
425
 
426
  def generate_regression_analysis(filtered_data):
 
427
  fig = px.scatter(filtered_data, x='LON', y='ONI', hover_data=['NAME'],
428
  title='Typhoon Generation Longitude vs ONI (All Years)')
429
  if len(filtered_data) > 1:
430
  X = np.array(filtered_data['LON']).reshape(-1,1)
431
  y = filtered_data['ONI']
432
- model = sm.OLS(y, sm.add_constant(X)).fit()
433
- y_pred = model.predict(sm.add_constant(X))
434
- fig.add_trace(go.Scatter(x=filtered_data['LON'], y=y_pred, mode='lines', name='Regression Line'))
435
- slope = model.params[1]
436
- slopes_text = f"All Years Slope: {slope:.4f}"
 
 
 
437
  else:
438
  slopes_text = "Insufficient data for regression"
439
  return fig, slopes_text
440
 
441
  def generate_main_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
 
442
  start_date = datetime(start_year, start_month, 1)
443
  end_date = datetime(end_year, end_month, 28)
444
  filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
@@ -452,6 +766,7 @@ def generate_main_analysis(start_year, start_month, end_year, end_month, enso_ph
452
  return tracks_fig, wind_scatter, pressure_scatter, regression_fig, slopes_text
453
 
454
  def get_full_tracks(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
 
455
  start_date = datetime(start_year, start_month, 1)
456
  end_date = datetime(end_year, end_month, 28)
457
  filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
@@ -464,7 +779,7 @@ def get_full_tracks(start_year, start_month, end_year, end_month, enso_phase, ty
464
  for sid in unique_storms:
465
  storm_data = typhoon_data[typhoon_data['SID']==sid]
466
  name = storm_data['NAME'].iloc[0] if pd.notnull(storm_data['NAME'].iloc[0]) else "Unnamed"
467
- basin = storm_data['SID'].iloc[0][:2] # First 2 characters often denote basin
468
  storm_oni = filtered_data[filtered_data['SID']==sid]['ONI'].iloc[0]
469
  color = 'red' if storm_oni>=0.5 else ('blue' if storm_oni<=-0.5 else 'green')
470
  fig.add_trace(go.Scattergeo(
@@ -508,21 +823,25 @@ def get_full_tracks(start_year, start_month, end_year, end_month, enso_phase, ty
508
  return fig, f"Total typhoons displayed: {count}"
509
 
510
  def get_wind_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
 
511
  results = generate_main_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search)
512
  regression = perform_wind_regression(start_year, start_month, end_year, end_month)
513
  return results[1], regression
514
 
515
  def get_pressure_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
 
516
  results = generate_main_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search)
517
  regression = perform_pressure_regression(start_year, start_month, end_year, end_month)
518
  return results[2], regression
519
 
520
  def get_longitude_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
 
521
  results = generate_main_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search)
522
  regression = perform_longitude_regression(start_year, start_month, end_year, end_month)
523
  return results[3], results[4], regression
524
 
525
  def categorize_typhoon_by_standard(wind_speed, standard='atlantic'):
 
526
  if standard=='taiwan':
527
  wind_speed_ms = wind_speed * 0.514444
528
  if wind_speed_ms >= 51.0:
@@ -548,11 +867,13 @@ def categorize_typhoon_by_standard(wind_speed, standard='atlantic'):
548
  return 'Tropical Depression', atlantic_standard['Tropical Depression']['hex']
549
 
550
  # -----------------------------
551
- # Updated TSNE Cluster Function with Mean Curves
552
  # -----------------------------
 
553
  def update_route_clusters(start_year, start_month, end_year, end_month, enso_value, season):
 
554
  try:
555
- # Merge raw typhoon data with ONI so each storm has multiple observations.
556
  raw_data = typhoon_data.copy()
557
  raw_data['Year'] = raw_data['ISO_TIME'].dt.year
558
  raw_data['Month'] = raw_data['ISO_TIME'].dt.strftime('%m')
@@ -578,7 +899,7 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
578
  logging.info("WP regional filter returned no data; using all filtered data.")
579
  wp_data = merged_raw
580
 
581
- # Group by storm ID so each storm has multiple observations
582
  all_storms_data = []
583
  for sid, group in wp_data.groupby('SID'):
584
  group = group.sort_values('ISO_TIME')
@@ -587,20 +908,22 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
587
  lons = group['LON'].astype(float).values
588
  if len(lons) < 2:
589
  continue
590
- # Also extract wind and pressure curves
591
  wind = group['USA_WIND'].astype(float).values if 'USA_WIND' in group.columns else None
592
  pres = group['USA_PRES'].astype(float).values if 'USA_PRES' in group.columns else None
593
  all_storms_data.append((sid, lons, lats, times, wind, pres))
 
594
  logging.info(f"Storms available for TSNE after grouping: {len(all_storms_data)}")
595
  if not all_storms_data:
596
  return go.Figure(), go.Figure(), make_subplots(rows=2, cols=1), "No valid storms for clustering."
597
 
598
- # Interpolate each storm's route, wind, and pressure to a common length
599
  max_length = max(len(item[1]) for item in all_storms_data)
600
  route_vectors = []
601
  wind_curves = []
602
  pres_curves = []
603
  storm_ids = []
 
604
  for sid, lons, lats, times, wind, pres in all_storms_data:
605
  t = np.linspace(0, 1, len(lons))
606
  t_new = np.linspace(0, 1, max_length)
@@ -610,12 +933,15 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
610
  except Exception as ex:
611
  logging.error(f"Interpolation error for storm {sid}: {ex}")
612
  continue
 
613
  route_vector = np.column_stack((lon_interp, lat_interp)).flatten()
614
  if np.isnan(route_vector).any():
615
  continue
 
616
  route_vectors.append(route_vector)
617
  storm_ids.append(sid)
618
- # Interpolate wind and pressure if available
 
619
  if wind is not None and len(wind) >= 2:
620
  try:
621
  wind_interp = interp1d(t, wind, kind='linear', fill_value='extrapolate')(t_new)
@@ -624,6 +950,7 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
624
  wind_interp = np.full(max_length, np.nan)
625
  else:
626
  wind_interp = np.full(max_length, np.nan)
 
627
  if pres is not None and len(pres) >= 2:
628
  try:
629
  pres_interp = interp1d(t, pres, kind='linear', fill_value='extrapolate')(t_new)
@@ -632,8 +959,10 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
632
  pres_interp = np.full(max_length, np.nan)
633
  else:
634
  pres_interp = np.full(max_length, np.nan)
 
635
  wind_curves.append(wind_interp)
636
  pres_curves.append(pres_interp)
 
637
  logging.info(f"Storms with valid route vectors: {len(route_vectors)}")
638
  if len(route_vectors) == 0:
639
  return go.Figure(), go.Figure(), make_subplots(rows=2, cols=1), "No valid storms after interpolation."
@@ -646,7 +975,7 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
646
  tsne = TSNE(n_components=2, random_state=42, verbose=1)
647
  tsne_results = tsne.fit_transform(route_vectors)
648
 
649
- # Dynamic DBSCAN: choose eps to yield roughly 5 to 20 clusters
650
  selected_labels = None
651
  selected_eps = None
652
  for eps in np.linspace(1.0, 10.0, 91):
@@ -657,16 +986,19 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
657
  selected_labels = labels
658
  selected_eps = eps
659
  break
 
660
  if selected_labels is None:
661
  selected_eps = 5.0
662
  dbscan = DBSCAN(eps=selected_eps, min_samples=3)
663
  selected_labels = dbscan.fit_predict(tsne_results)
 
664
  logging.info(f"Selected DBSCAN eps: {selected_eps:.2f} yielding {len(set(selected_labels)-{-1})} clusters.")
665
 
666
  # TSNE scatter plot
667
  fig_tsne = go.Figure()
668
  colors = px.colors.qualitative.Safe
669
  unique_labels = sorted(set(selected_labels) - {-1})
 
670
  for i, label in enumerate(unique_labels):
671
  indices = np.where(selected_labels == label)[0]
672
  fig_tsne.add_trace(go.Scatter(
@@ -676,6 +1008,7 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
676
  marker=dict(color=colors[i % len(colors)]),
677
  name=f"Cluster {label}"
678
  ))
 
679
  noise_indices = np.where(selected_labels == -1)[0]
680
  if len(noise_indices) > 0:
681
  fig_tsne.add_trace(go.Scatter(
@@ -685,15 +1018,17 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
685
  marker=dict(color='grey'),
686
  name='Noise'
687
  ))
 
688
  fig_tsne.update_layout(
689
  title="t-SNE of Storm Routes",
690
  xaxis_title="t-SNE Dim 1",
691
  yaxis_title="t-SNE Dim 2"
692
  )
693
 
694
- # For each cluster, compute mean route, and compute mean wind and pressure curves along normalized route index.
695
  fig_routes = go.Figure()
696
- cluster_stats = [] # To hold mean curves per cluster
 
697
  for i, label in enumerate(unique_labels):
698
  indices = np.where(selected_labels == label)[0]
699
  cluster_ids = [storm_ids[j] for j in indices]
@@ -702,6 +1037,7 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
702
  mean_route = mean_vector.reshape((max_length, 2))
703
  mean_lon = mean_route[:, 0]
704
  mean_lat = mean_route[:, 1]
 
705
  fig_routes.add_trace(go.Scattergeo(
706
  lon=mean_lon,
707
  lat=mean_lat,
@@ -709,17 +1045,19 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
709
  line=dict(width=4, color=colors[i % len(colors)]),
710
  name=f"Cluster {label} Mean Route"
711
  ))
712
- # Retrieve raw wind and pressure curves for storms in this cluster
 
713
  cluster_winds = wind_curves[indices, :]
714
  cluster_pres = pres_curves[indices, :]
715
  mean_wind_curve = np.nanmean(cluster_winds, axis=0)
716
  mean_pres_curve = np.nanmean(cluster_pres, axis=0)
717
  cluster_stats.append((label, mean_wind_curve, mean_pres_curve))
718
 
719
- # Create a cluster stats plot with curves vs normalized route index (0 to 1)
720
  x_axis = np.linspace(0, 1, max_length)
721
  fig_stats = make_subplots(rows=2, cols=1, shared_xaxes=True,
722
  subplot_titles=("Mean Wind Speed (knots)", "Mean MSLP (hPa)"))
 
723
  for i, (label, wind_curve, pres_curve) in enumerate(cluster_stats):
724
  fig_stats.add_trace(go.Scatter(
725
  x=x_axis,
@@ -728,6 +1066,7 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
728
  line=dict(width=2, color=colors[i % len(colors)]),
729
  name=f"Cluster {label} Mean Wind"
730
  ), row=1, col=1)
 
731
  fig_stats.add_trace(go.Scatter(
732
  x=x_axis,
733
  y=pres_curve,
@@ -735,6 +1074,7 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
735
  line=dict(width=2, color=colors[i % len(colors)]),
736
  name=f"Cluster {label} Mean MSLP"
737
  ), row=2, col=1)
 
738
  fig_stats.update_layout(
739
  title="Cluster Mean Curves",
740
  xaxis_title="Normalized Route Index",
@@ -746,28 +1086,34 @@ def update_route_clusters(start_year, start_month, end_year, end_month, enso_val
746
 
747
  info = f"TSNE clustering complete. Selected eps: {selected_eps:.2f}. Clusters: {len(unique_labels)}."
748
  return fig_tsne, fig_routes, fig_stats, info
 
749
  except Exception as e:
750
  logging.error(f"Error in TSNE clustering: {e}")
751
  return go.Figure(), go.Figure(), make_subplots(rows=2, cols=1), f"Error in TSNE clustering: {e}"
752
 
753
  # -----------------------------
754
- # Animation Functions Using Processed CSV & Stock Map
755
  # -----------------------------
 
756
  def generate_track_video_from_csv(year, storm_id, standard):
 
757
  storm_df = typhoon_data[typhoon_data['SID'] == storm_id].copy()
758
  if storm_df.empty:
759
  logging.error(f"No data found for storm: {storm_id}")
760
  return None
 
761
  storm_df = storm_df.sort_values('ISO_TIME')
762
  lats = storm_df['LAT'].astype(float).values
763
  lons = storm_df['LON'].astype(float).values
764
  times = pd.to_datetime(storm_df['ISO_TIME']).values
 
765
  if 'USA_WIND' in storm_df.columns:
766
  winds = pd.to_numeric(storm_df['USA_WIND'], errors='coerce').values
767
  else:
768
  winds = np.full(len(lats), np.nan)
 
769
  storm_name = storm_df['NAME'].iloc[0]
770
- basin = storm_df['SID'].iloc[0][:2] # Use first 2 characters as basin code
771
  season = storm_df['SEASON'].iloc[0]
772
 
773
  min_lat, max_lat = np.min(lats), np.max(lats)
@@ -790,7 +1136,6 @@ def generate_track_video_from_csv(year, storm_id, standard):
790
  point, = ax.plot([], [], 'o', markersize=8, transform=ccrs.PlateCarree())
791
  date_text = ax.text(0.02, 0.02, '', transform=ax.transAxes, fontsize=10,
792
  bbox=dict(facecolor='white', alpha=0.8))
793
- # Display storm name and basin in a dynamic sidebar
794
  storm_info_text = fig.text(0.70, 0.60, '', fontsize=10,
795
  bbox=dict(facecolor='white', alpha=0.8, boxstyle='round,pad=0.5'))
796
 
@@ -823,87 +1168,90 @@ def generate_track_video_from_csv(year, storm_id, standard):
823
 
824
  ani = animation.FuncAnimation(fig, update, init_func=init, frames=len(times),
825
  interval=200, blit=True, repeat=True)
826
- temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4')
827
- writer = animation.FFMpegWriter(fps=5, bitrate=1800)
828
- ani.save(temp_file.name, writer=writer)
829
- plt.close(fig)
830
- return temp_file.name
 
 
 
 
 
 
 
831
 
832
  def simplified_track_video(year, basin, typhoon, standard):
 
833
  if not typhoon:
834
  return None
835
  storm_id = typhoon.split('(')[-1].strip(')')
836
  return generate_track_video_from_csv(year, storm_id, standard)
837
 
838
  # -----------------------------
839
- # Typhoon Options Update Functions
840
  # -----------------------------
841
- basin_to_prefix = {
842
- "All Basins": "all",
843
- "NA - North Atlantic": "NA",
844
- "EP - Eastern North Pacific": "EP",
845
- "WP - Western North Pacific": "WP"
846
- }
847
 
848
- def update_typhoon_options(year, basin):
 
849
  try:
850
- if basin == "All Basins":
851
- summaries = []
852
- for data in ibtracs.values():
853
- if data is not None:
854
- season_data = data.get_season(int(year))
855
- if season_data.summary().empty:
856
- continue
857
- summaries.append(season_data.summary())
858
- if len(summaries) == 0:
859
- logging.error("No storms found for given year and basin.")
860
- return gr.update(choices=[], value=None)
861
- combined_summary = pd.concat(summaries, ignore_index=True)
862
- else:
863
- prefix = basin_to_prefix.get(basin)
864
- ds = ibtracs.get(prefix)
865
- if ds is None:
866
- logging.error(f"Dataset not found for basin {basin}")
867
- return gr.update(choices=[], value=None)
868
- season_data = ds.get_season(int(year))
869
- if season_data.summary().empty:
870
- logging.error("No storms found for given year and basin.")
871
- return gr.update(choices=[], value=None)
872
- combined_summary = season_data.summary()
873
- options = []
874
- for i in range(len(combined_summary)):
875
- try:
876
- name = combined_summary['name'][i] if pd.notnull(combined_summary['name'][i]) else "Unnamed"
877
- storm_id = combined_summary['id'][i]
878
- options.append(f"{name} ({storm_id})")
879
- except Exception:
880
- continue
881
- return gr.update(choices=options, value=options[0] if options else None)
882
- except Exception as e:
883
- logging.error(f"Error in update_typhoon_options: {e}")
884
- return gr.update(choices=[], value=None)
885
-
886
- def update_typhoon_options_anim(year, basin):
887
- try:
888
- data = typhoon_data.copy()
889
- data['Year'] = data['ISO_TIME'].dt.year
890
- season_data = data[data['Year'] == int(year)]
891
- if season_data.empty:
892
- logging.error(f"No storms found for year {year} in animation update.")
893
  return gr.update(choices=[], value=None)
894
- summary = season_data.groupby('SID').first().reset_index()
 
 
895
  options = []
896
- for idx, row in summary.iterrows():
897
- name = row['NAME'] if pd.notnull(row['NAME']) else "Unnamed"
898
- options.append(f"{name} ({row['SID']})")
899
- return gr.update(choices=options, value=options[0] if options else None)
 
 
 
 
 
 
 
 
 
900
  except Exception as e:
901
- logging.error(f"Error in update_typhoon_options_anim: {e}")
902
  return gr.update(choices=[], value=None)
903
 
 
 
 
 
 
 
 
 
 
 
 
 
904
  # -----------------------------
905
  # Gradio Interface
906
  # -----------------------------
 
907
  with gr.Blocks(title="Typhoon Analysis Dashboard") as demo:
908
  gr.Markdown("# Typhoon Analysis Dashboard")
909
 
@@ -918,10 +1266,14 @@ with gr.Blocks(title="Typhoon Analysis Dashboard") as demo:
918
  - **Wind Analysis**: Examine wind speed vs ONI relationships.
919
  - **Pressure Analysis**: Analyze pressure vs ONI relationships.
920
  - **Longitude Analysis**: Study typhoon generation longitude vs ONI.
921
- - **Path Animation**: View animated storm tracks on a free stock world map (centered at 180°) with a dynamic sidebar that shows the typhoon name and basin.
922
- - **TSNE Cluster**: Perform t-SNE clustering on WP storm routes using raw merged typhoon+ONI data with detailed error management.
923
- Mean routes and evolving curves (wind and pressure vs. normalized route index) are computed.
924
- """)
 
 
 
 
925
 
926
  with gr.Tab("Track Visualization"):
927
  with gr.Row():
@@ -987,7 +1339,6 @@ with gr.Blocks(title="Typhoon Analysis Dashboard") as demo:
987
  with gr.Tab("Tropical Cyclone Path Animation"):
988
  with gr.Row():
989
  year_dropdown = gr.Dropdown(label="Year", choices=[str(y) for y in range(1950, 2025)], value="2000")
990
- # Create a hidden component for basin constant; always "All Basins"
991
  basin_constant = gr.Textbox(value="All Basins", visible=False)
992
  with gr.Row():
993
  typhoon_dropdown = gr.Dropdown(label="Tropical Cyclone")
@@ -996,15 +1347,16 @@ with gr.Blocks(title="Typhoon Analysis Dashboard") as demo:
996
  path_video = gr.Video(label="Tropical Cyclone Path Animation", format="mp4", interactive=False, elem_id="path_video")
997
  animation_info = gr.Markdown("""
998
  ### Animation Instructions
999
- 1. Select a year (data is from your processed CSV, using all basins).
1000
  2. Choose a tropical cyclone from the populated list.
1001
  3. Select a classification standard (Atlantic or Taiwan).
1002
  4. Click "Generate Animation".
1003
- 5. The animation displays the storm track on a free stock world map (centered at 180°) with a dynamic sidebar.
1004
- The sidebar shows the storm name and basin.
1005
  """)
1006
- # Update typhoon dropdown using only year (ignore basin since it's fixed)
1007
- year_dropdown.change(fn=update_typhoon_options_anim, inputs=[year_dropdown, gr.State("dummy")], outputs=typhoon_dropdown)
 
 
1008
  animate_btn.click(fn=simplified_track_video,
1009
  inputs=[year_dropdown, basin_constant, typhoon_dropdown, standard_dropdown],
1010
  outputs=path_video)
@@ -1026,4 +1378,5 @@ with gr.Blocks(title="Typhoon Analysis Dashboard") as demo:
1026
  inputs=[tsne_start_year, tsne_start_month, tsne_end_year, tsne_end_month, tsne_enso_phase, tsne_season],
1027
  outputs=[tsne_plot, routes_plot, stats_plot, cluster_info])
1028
 
1029
- demo.launch(share=True)
 
 
48
  parser = argparse.ArgumentParser(description='Typhoon Analysis Dashboard')
49
  parser.add_argument('--data_path', type=str, default=os.getcwd(), help='Path to the data directory')
50
  args = parser.parse_args()
 
51
 
52
+ # Enhanced data path handling for HuggingFace Spaces
53
+ if 'SPACE_ID' in os.environ:
54
+ # Running on HuggingFace Spaces
55
+ DATA_PATH = '/tmp/typhoon_data'
56
+ os.makedirs(DATA_PATH, exist_ok=True)
57
+ logging.info(f"Running on HuggingFace Spaces, using data path: {DATA_PATH}")
58
+ else:
59
+ # Local development
60
+ DATA_PATH = os.environ.get('DATA_PATH', tempfile.gettempdir())
61
 
62
+ # Ensure directory exists and is writable
63
+ try:
64
+ os.makedirs(DATA_PATH, exist_ok=True)
65
+ # Test write permissions
66
+ test_file = os.path.join(DATA_PATH, 'test_write.txt')
67
+ with open(test_file, 'w') as f:
68
+ f.write('test')
69
+ os.remove(test_file)
70
+ logging.info(f"Data directory is writable: {DATA_PATH}")
71
+ except Exception as e:
72
+ logging.warning(f"Data directory not writable, using temp dir: {e}")
73
+ DATA_PATH = tempfile.mkdtemp()
74
+ logging.info(f"Using temporary directory: {DATA_PATH}")
75
 
76
+ # Update file paths
77
  ONI_DATA_PATH = os.path.join(DATA_PATH, 'oni_data.csv')
78
  TYPHOON_DATA_PATH = os.path.join(DATA_PATH, 'processed_typhoon_data.csv')
79
  MERGED_DATA_CSV = os.path.join(DATA_PATH, 'merged_typhoon_era5_data.csv')
80
 
81
+ # IBTrACS settings
82
  BASIN_FILES = {
83
  'EP': 'ibtracs.EP.list.v04r01.csv',
84
  'NA': 'ibtracs.NA.list.v04r01.csv',
 
134
  "Philippines": {"lat_min": 5, "lat_max": 21, "lon_min": 115, "lon_max": 130}
135
  }
136
 
137
+ # -----------------------------
138
+ # Utility Functions for HF Spaces
139
+ # -----------------------------
140
+
141
+ def safe_file_write(file_path, data_frame, backup_dir=None):
142
+ """Safely write DataFrame to CSV with backup and error handling"""
143
+ try:
144
+ # Create directory if it doesn't exist
145
+ os.makedirs(os.path.dirname(file_path), exist_ok=True)
146
+
147
+ # Try to write to a temporary file first
148
+ temp_path = file_path + '.tmp'
149
+ data_frame.to_csv(temp_path, index=False)
150
+
151
+ # If successful, rename to final file
152
+ os.rename(temp_path, file_path)
153
+ logging.info(f"Successfully saved {len(data_frame)} records to {file_path}")
154
+ return True
155
+
156
+ except PermissionError as e:
157
+ logging.warning(f"Permission denied writing to {file_path}: {e}")
158
+ if backup_dir:
159
+ try:
160
+ backup_path = os.path.join(backup_dir, os.path.basename(file_path))
161
+ data_frame.to_csv(backup_path, index=False)
162
+ logging.info(f"Saved to backup location: {backup_path}")
163
+ return True
164
+ except Exception as backup_e:
165
+ logging.error(f"Failed to save to backup location: {backup_e}")
166
+ return False
167
+
168
+ except Exception as e:
169
+ logging.error(f"Error saving file {file_path}: {e}")
170
+ # Clean up temp file if it exists
171
+ if os.path.exists(temp_path):
172
+ try:
173
+ os.remove(temp_path)
174
+ except:
175
+ pass
176
+ return False
177
+
178
+ def get_fallback_data_dir():
179
+ """Get a fallback data directory that's guaranteed to be writable"""
180
+ fallback_dirs = [
181
+ tempfile.gettempdir(),
182
+ '/tmp',
183
+ os.path.expanduser('~'),
184
+ os.getcwd()
185
+ ]
186
+
187
+ for directory in fallback_dirs:
188
+ try:
189
+ test_dir = os.path.join(directory, 'typhoon_fallback')
190
+ os.makedirs(test_dir, exist_ok=True)
191
+ test_file = os.path.join(test_dir, 'test.txt')
192
+ with open(test_file, 'w') as f:
193
+ f.write('test')
194
+ os.remove(test_file)
195
+ return test_dir
196
+ except:
197
+ continue
198
+
199
+ # If all else fails, use current directory
200
+ return os.getcwd()
201
+
202
  # -----------------------------
203
  # ONI and Typhoon Data Functions
204
  # -----------------------------
205
+
206
  def download_oni_file(url, filename):
207
+ """Download ONI file with retry logic"""
208
+ max_retries = 3
209
+ for attempt in range(max_retries):
210
+ try:
211
+ response = requests.get(url, timeout=30)
212
+ response.raise_for_status()
213
+ with open(filename, 'wb') as f:
214
+ f.write(response.content)
215
+ return True
216
+ except Exception as e:
217
+ logging.warning(f"Attempt {attempt + 1} failed to download ONI: {e}")
218
+ if attempt < max_retries - 1:
219
+ time.sleep(2 ** attempt) # Exponential backoff
220
+ else:
221
+ logging.error(f"Failed to download ONI after {max_retries} attempts")
222
+ return False
223
 
224
  def convert_oni_ascii_to_csv(input_file, output_file):
225
+ """Convert ONI ASCII format to CSV"""
226
  data = defaultdict(lambda: [''] * 12)
227
  season_to_month = {'DJF':12, 'JFM':1, 'FMA':2, 'MAM':3, 'AMJ':4, 'MJJ':5,
228
  'JJA':6, 'JAS':7, 'ASO':8, 'SON':9, 'OND':10, 'NDJ':11}
229
+
230
+ try:
231
+ with open(input_file, 'r') as f:
232
+ lines = f.readlines()[1:] # Skip header
233
+ for line in lines:
234
+ parts = line.split()
235
+ if len(parts) >= 4:
236
+ season, year, anom = parts[0], parts[1], parts[-1]
237
+ if season in season_to_month:
238
+ month = season_to_month[season]
239
+ if season == 'DJF':
240
+ year = str(int(year)-1)
241
+ data[year][month-1] = anom
242
+
243
+ # Write to CSV with safe write
244
+ df = pd.DataFrame(data).T.reset_index()
245
+ df.columns = ['Year','Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
246
+ df = df.sort_values('Year').reset_index(drop=True)
247
+
248
+ return safe_file_write(output_file, df, get_fallback_data_dir())
249
+
250
+ except Exception as e:
251
+ logging.error(f"Error converting ONI file: {e}")
252
+ return False
253
 
254
  def update_oni_data():
255
+ """Update ONI data with error handling"""
256
  url = "https://www.cpc.ncep.noaa.gov/data/indices/oni.ascii.txt"
257
  temp_file = os.path.join(DATA_PATH, "temp_oni.ascii.txt")
258
  input_file = os.path.join(DATA_PATH, "oni.ascii.txt")
259
  output_file = ONI_DATA_PATH
260
+
261
+ try:
262
+ if download_oni_file(url, temp_file):
263
+ if not os.path.exists(input_file) or not os.path.exists(output_file):
264
+ os.rename(temp_file, input_file)
265
+ convert_oni_ascii_to_csv(input_file, output_file)
266
+ else:
267
+ os.remove(temp_file)
268
  else:
269
+ # Create fallback ONI data if download fails
270
+ logging.warning("Creating fallback ONI data")
271
+ create_fallback_oni_data(output_file)
272
+ except Exception as e:
273
+ logging.error(f"Error updating ONI data: {e}")
274
+ create_fallback_oni_data(output_file)
275
+
276
+ def create_fallback_oni_data(output_file):
277
+ """Create minimal ONI data for testing"""
278
+ years = range(2000, 2025)
279
+ months = ['Jan','Feb','Mar','Apr','May','Jun','Jul','Aug','Sep','Oct','Nov','Dec']
280
+
281
+ # Create synthetic ONI data
282
+ data = []
283
+ for year in years:
284
+ row = [year]
285
+ for month in months:
286
+ # Generate some realistic ONI values
287
+ value = np.random.normal(0, 1) * 0.5
288
+ row.append(f"{value:.2f}")
289
+ data.append(row)
290
+
291
+ df = pd.DataFrame(data, columns=['Year'] + months)
292
+ safe_file_write(output_file, df, get_fallback_data_dir())
293
+
294
+ # -----------------------------
295
+ # FIXED: IBTrACS Data Loading
296
+ # -----------------------------
297
+
298
+ def download_ibtracs_file(basin, force_download=False):
299
+ """Download specific basin file from IBTrACS"""
300
+ filename = BASIN_FILES[basin]
301
+ local_path = os.path.join(DATA_PATH, filename)
302
+ url = IBTRACS_BASE_URL + filename
303
+
304
+ # Check if file exists and is recent (less than 7 days old)
305
+ if os.path.exists(local_path) and not force_download:
306
+ file_age = time.time() - os.path.getmtime(local_path)
307
+ if file_age < 7 * 24 * 3600: # 7 days
308
+ logging.info(f"Using cached {basin} basin file")
309
+ return local_path
310
+
311
+ try:
312
+ logging.info(f"Downloading {basin} basin file from {url}")
313
+ response = requests.get(url, timeout=60)
314
+ response.raise_for_status()
315
+
316
+ # Ensure directory exists
317
+ os.makedirs(os.path.dirname(local_path), exist_ok=True)
318
+
319
+ with open(local_path, 'wb') as f:
320
+ f.write(response.content)
321
+ logging.info(f"Successfully downloaded {basin} basin file")
322
+ return local_path
323
+ except Exception as e:
324
+ logging.error(f"Failed to download {basin} basin file: {e}")
325
+ return None
326
+
327
+ def load_ibtracs_csv_directly(basin='WP'):
328
+ """Load IBTrACS data directly from CSV without tropycal"""
329
+ filename = BASIN_FILES[basin]
330
+ local_path = os.path.join(DATA_PATH, filename)
331
+
332
+ # Download if not exists
333
+ if not os.path.exists(local_path):
334
+ downloaded_path = download_ibtracs_file(basin)
335
+ if not downloaded_path:
336
+ return None
337
+
338
+ try:
339
+ # Read IBTrACS CSV with specific parameters
340
+ essential_columns = [
341
+ 'SID', 'SEASON', 'NUMBER', 'BASIN', 'SUBBASIN', 'NAME',
342
+ 'ISO_TIME', 'NATURE', 'LAT', 'LON', 'WMO_WIND', 'WMO_PRES',
343
+ 'USA_WIND', 'USA_PRES', 'USA_STATUS', 'USA_R34_NE', 'USA_R34_SE',
344
+ 'USA_R34_SW', 'USA_R34_NW', 'USA_R50_NE', 'USA_R50_SE',
345
+ 'USA_R50_SW', 'USA_R50_NW', 'USA_R64_NE', 'USA_R64_SE',
346
+ 'USA_R64_SW', 'USA_R64_NW', 'USA_RMW', 'USA_EYE'
347
+ ]
348
+
349
+ # Read with error handling for missing columns
350
+ logging.info(f"Reading IBTrACS CSV file: {local_path}")
351
+ df = pd.read_csv(local_path, low_memory=False, skiprows=1) # Skip header row with units
352
+
353
+ # Check which essential columns exist
354
+ available_columns = [col for col in essential_columns if col in df.columns]
355
+ missing_columns = [col for col in essential_columns if col not in df.columns]
356
+
357
+ if missing_columns:
358
+ logging.warning(f"Missing columns in IBTrACS data: {missing_columns}")
359
+
360
+ # Select only available columns
361
+ df = df[available_columns].copy()
362
+
363
+ # Clean and standardize the data
364
+ # Convert ISO_TIME to datetime
365
+ df['ISO_TIME'] = pd.to_datetime(df['ISO_TIME'], format='%Y-%m-%d %H:%M:%S', errors='coerce')
366
+
367
+ # Clean numeric columns
368
+ numeric_columns = ['LAT', 'LON', 'WMO_WIND', 'WMO_PRES', 'USA_WIND', 'USA_PRES']
369
+ for col in numeric_columns:
370
+ if col in df.columns:
371
+ df[col] = pd.to_numeric(df[col], errors='coerce')
372
+
373
+ # Filter out invalid/missing critical data
374
+ df = df.dropna(subset=['ISO_TIME', 'LAT', 'LON'])
375
+
376
+ # Ensure LAT/LON are in reasonable ranges
377
+ df = df[(df['LAT'] >= -90) & (df['LAT'] <= 90)]
378
+ df = df[(df['LON'] >= -180) & (df['LON'] <= 180)]
379
+
380
+ logging.info(f"Successfully loaded {len(df)} records from {basin} basin")
381
+ return df
382
+
383
+ except Exception as e:
384
+ logging.error(f"Error reading IBTrACS CSV file: {e}")
385
+ return None
386
+
387
+ def load_ibtracs_data_fixed():
388
+ """Fixed version of IBTrACS data loading"""
389
+ ibtracs_data = {}
390
+
391
+ # Try to load each basin, but prioritize WP for this application
392
+ load_order = ['WP', 'EP', 'NA']
393
+
394
+ for basin in load_order:
395
+ try:
396
+ logging.info(f"Loading {basin} basin data...")
397
+ df = load_ibtracs_csv_directly(basin)
398
+
399
+ if df is not None and not df.empty:
400
+ ibtracs_data[basin] = df
401
+ logging.info(f"Successfully loaded {basin} basin with {len(df)} records")
402
+ else:
403
+ logging.warning(f"No data loaded for basin {basin}")
404
+ ibtracs_data[basin] = None
405
+
406
+ except Exception as e:
407
+ logging.error(f"Failed to load basin {basin}: {e}")
408
+ ibtracs_data[basin] = None
409
+
410
+ return ibtracs_data
411
 
412
+ def load_data_fixed(oni_path, typhoon_path):
413
+ """Fixed version of load_data function"""
414
+ # Load ONI data
415
  oni_data = pd.DataFrame({'Year': [], 'Jan': [], 'Feb': [], 'Mar': [], 'Apr': [],
416
  'May': [], 'Jun': [], 'Jul': [], 'Aug': [], 'Sep': [],
417
  'Oct': [], 'Nov': [], 'Dec': []})
418
 
 
419
  if not os.path.exists(oni_path):
420
  logging.warning(f"ONI data file not found: {oni_path}")
421
  update_oni_data()
422
 
423
  try:
424
  oni_data = pd.read_csv(oni_path)
425
+ logging.info(f"Successfully loaded ONI data with {len(oni_data)} years")
426
  except Exception as e:
427
  logging.error(f"Error loading ONI data: {e}")
428
  update_oni_data()
 
431
  except Exception as e:
432
  logging.error(f"Still can't load ONI data: {e}")
433
 
434
+ # Load typhoon data - NEW APPROACH
435
+ typhoon_data = None
436
+
437
+ # First, try to load from existing processed file
438
  if os.path.exists(typhoon_path):
439
  try:
440
  typhoon_data = pd.read_csv(typhoon_path, low_memory=False)
441
+ # Ensure basic columns exist and are valid
442
+ required_cols = ['SID', 'ISO_TIME', 'LAT', 'LON']
443
+ if all(col in typhoon_data.columns for col in required_cols):
444
+ typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
445
+ typhoon_data = typhoon_data.dropna(subset=['ISO_TIME'])
446
+ logging.info(f"Loaded processed typhoon data with {len(typhoon_data)} records")
 
 
 
 
 
 
 
 
 
 
 
447
  else:
448
+ logging.warning("Processed typhoon data missing required columns, will reload from IBTrACS")
449
+ typhoon_data = None
 
 
 
 
 
 
 
 
450
  except Exception as e:
451
+ logging.error(f"Error loading processed typhoon data: {e}")
452
+ typhoon_data = None
453
+
454
+ # If no valid processed data, load from IBTrACS
455
+ if typhoon_data is None or typhoon_data.empty:
456
+ logging.info("Loading typhoon data from IBTrACS...")
457
+ ibtracs_data = load_ibtracs_data_fixed()
458
+
459
+ # Combine all available basin data, prioritizing WP
460
+ combined_dfs = []
461
+ for basin in ['WP', 'EP', 'NA']:
462
+ if basin in ibtracs_data and ibtracs_data[basin] is not None:
463
+ df = ibtracs_data[basin].copy()
464
+ df['BASIN'] = basin
465
+ combined_dfs.append(df)
466
+
467
+ if combined_dfs:
468
+ typhoon_data = pd.concat(combined_dfs, ignore_index=True)
469
+ # Ensure SID has proper format
470
+ if 'SID' not in typhoon_data.columns and 'BASIN' in typhoon_data.columns:
471
+ # Create SID from basin and other identifiers if missing
472
+ if 'NUMBER' in typhoon_data.columns and 'SEASON' in typhoon_data.columns:
473
+ typhoon_data['SID'] = (typhoon_data['BASIN'].astype(str) +
474
+ typhoon_data['NUMBER'].astype(str).str.zfill(2) +
475
+ typhoon_data['SEASON'].astype(str))
476
+
477
+ # Save the processed data for future use
478
+ safe_file_write(typhoon_path, typhoon_data, get_fallback_data_dir())
479
+ logging.info(f"Combined IBTrACS data: {len(typhoon_data)} total records")
480
+ else:
481
+ logging.error("Failed to load any IBTrACS basin data")
482
+ # Create minimal fallback data
483
+ typhoon_data = create_fallback_typhoon_data()
484
+
485
+ # Final validation of typhoon data
486
+ if typhoon_data is not None:
487
+ # Ensure required columns exist with fallback values
488
+ required_columns = {
489
+ 'SID': 'UNKNOWN',
490
+ 'ISO_TIME': pd.Timestamp('2000-01-01'),
491
+ 'LAT': 0.0,
492
+ 'LON': 0.0,
493
+ 'USA_WIND': np.nan,
494
+ 'USA_PRES': np.nan,
495
+ 'NAME': 'UNNAMED',
496
+ 'SEASON': 2000
497
+ }
498
+
499
+ for col, default_val in required_columns.items():
500
+ if col not in typhoon_data.columns:
501
+ typhoon_data[col] = default_val
502
+ logging.warning(f"Added missing column {col} with default value")
503
+
504
+ # Ensure data types
505
+ typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
506
+ typhoon_data['LAT'] = pd.to_numeric(typhoon_data['LAT'], errors='coerce')
507
+ typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce')
508
+ typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce')
509
+ typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce')
510
+
511
+ # Remove rows with invalid times or coordinates
512
+ typhoon_data = typhoon_data.dropna(subset=['ISO_TIME', 'LAT', 'LON'])
513
+
514
+ logging.info(f"Final typhoon data: {len(typhoon_data)} records after validation")
515
 
516
  return oni_data, typhoon_data
517
 
518
+ def create_fallback_typhoon_data():
519
+ """Create minimal fallback typhoon data"""
520
+ dates = pd.date_range(start='2000-01-01', end='2023-12-31', freq='D')
521
+ storm_dates = np.random.choice(dates, size=100, replace=False)
522
+
523
+ data = []
524
+ for i, date in enumerate(storm_dates):
525
+ # Create realistic WP storm tracks
526
+ base_lat = np.random.uniform(10, 30)
527
+ base_lon = np.random.uniform(130, 160)
528
+
529
+ # Generate 20-50 data points per storm
530
+ track_length = np.random.randint(20, 51)
531
+ sid = f"WP{i+1:02d}{date.year}"
532
+
533
+ for j in range(track_length):
534
+ lat = base_lat + j * 0.2 + np.random.normal(0, 0.1)
535
+ lon = base_lon + j * 0.3 + np.random.normal(0, 0.1)
536
+ wind = max(25, 70 + np.random.normal(0, 20))
537
+ pres = max(950, 1000 - wind + np.random.normal(0, 5))
538
+
539
+ data.append({
540
+ 'SID': sid,
541
+ 'ISO_TIME': date + timedelta(hours=j*6),
542
+ 'NAME': f'FALLBACK_{i+1}',
543
+ 'SEASON': date.year,
544
+ 'LAT': lat,
545
+ 'LON': lon,
546
+ 'USA_WIND': wind,
547
+ 'USA_PRES': pres,
548
+ 'BASIN': 'WP'
549
+ })
550
+
551
+ return pd.DataFrame(data)
552
+
553
  def process_oni_data(oni_data):
554
+ """Process ONI data into long format"""
555
  oni_long = oni_data.melt(id_vars=['Year'], var_name='Month', value_name='ONI')
556
  month_map = {'Jan':'01','Feb':'02','Mar':'03','Apr':'04','May':'05','Jun':'06',
557
  'Jul':'07','Aug':'08','Sep':'09','Oct':'10','Nov':'11','Dec':'12'}
 
561
  return oni_long
562
 
563
  def process_typhoon_data(typhoon_data):
564
+ """Process typhoon data"""
565
  typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce')
566
  typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce')
567
  typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce')
568
  typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce')
569
+
570
  logging.info(f"Unique basins in typhoon_data: {typhoon_data['SID'].str[:2].unique()}")
571
+
572
  typhoon_max = typhoon_data.groupby('SID').agg({
573
  'USA_WIND':'max','USA_PRES':'min','ISO_TIME':'first','SEASON':'first','NAME':'first',
574
  'LAT':'first','LON':'first'
575
  }).reset_index()
576
+
577
  typhoon_max['Month'] = typhoon_max['ISO_TIME'].dt.strftime('%m')
578
  typhoon_max['Year'] = typhoon_max['ISO_TIME'].dt.year
579
  typhoon_max['Category'] = typhoon_max['USA_WIND'].apply(categorize_typhoon)
580
  return typhoon_max
581
 
582
  def merge_data(oni_long, typhoon_max):
583
+ """Merge ONI and typhoon data"""
584
  return pd.merge(typhoon_max, oni_long, on=['Year','Month'])
585
 
586
  def categorize_typhoon(wind_speed):
587
+ """Categorize typhoon based on wind speed"""
588
  if wind_speed >= 137:
589
  return 'C5 Super Typhoon'
590
  elif wind_speed >= 113:
 
601
  return 'Tropical Depression'
602
 
603
  def classify_enso_phases(oni_value):
604
+ """Classify ENSO phases based on ONI value"""
605
  if isinstance(oni_value, pd.Series):
606
  oni_value = oni_value.iloc[0]
607
  if oni_value >= 0.5:
 
614
  # -----------------------------
615
  # Regression Functions
616
  # -----------------------------
617
+
618
  def perform_wind_regression(start_year, start_month, end_year, end_month):
619
+ """Perform wind regression analysis"""
620
  start_date = datetime(start_year, start_month, 1)
621
  end_date = datetime(end_year, end_month, 28)
622
  data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['USA_WIND','ONI'])
623
  data['severe_typhoon'] = (data['USA_WIND']>=64).astype(int)
624
  X = sm.add_constant(data['ONI'])
625
  y = data['severe_typhoon']
626
+ try:
627
+ model = sm.Logit(y, X).fit(disp=0)
628
+ beta_1 = model.params['ONI']
629
+ exp_beta_1 = np.exp(beta_1)
630
+ p_value = model.pvalues['ONI']
631
+ return f"Wind Regression: β1={beta_1:.4f}, Odds Ratio={exp_beta_1:.4f}, P-value={p_value:.4f}"
632
+ except Exception as e:
633
+ return f"Wind Regression Error: {e}"
634
 
635
  def perform_pressure_regression(start_year, start_month, end_year, end_month):
636
+ """Perform pressure regression analysis"""
637
  start_date = datetime(start_year, start_month, 1)
638
  end_date = datetime(end_year, end_month, 28)
639
  data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['USA_PRES','ONI'])
640
  data['intense_typhoon'] = (data['USA_PRES']<=950).astype(int)
641
  X = sm.add_constant(data['ONI'])
642
  y = data['intense_typhoon']
643
+ try:
644
+ model = sm.Logit(y, X).fit(disp=0)
645
+ beta_1 = model.params['ONI']
646
+ exp_beta_1 = np.exp(beta_1)
647
+ p_value = model.pvalues['ONI']
648
+ return f"Pressure Regression: β1={beta_1:.4f}, Odds Ratio={exp_beta_1:.4f}, P-value={p_value:.4f}"
649
+ except Exception as e:
650
+ return f"Pressure Regression Error: {e}"
651
 
652
  def perform_longitude_regression(start_year, start_month, end_year, end_month):
653
+ """Perform longitude regression analysis"""
654
  start_date = datetime(start_year, start_month, 1)
655
  end_date = datetime(end_year, end_month, 28)
656
  data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].dropna(subset=['LON','ONI'])
657
  data['western_typhoon'] = (data['LON']<=140).astype(int)
658
  X = sm.add_constant(data['ONI'])
659
  y = data['western_typhoon']
660
+ try:
661
+ model = sm.OLS(y, sm.add_constant(X)).fit()
662
+ beta_1 = model.params['ONI']
663
+ exp_beta_1 = np.exp(beta_1)
664
+ p_value = model.pvalues['ONI']
665
+ return f"Longitude Regression: β1={beta_1:.4f}, Odds Ratio={exp_beta_1:.4f}, P-value={p_value:.4f}"
666
+ except Exception as e:
667
+ return f"Longitude Regression Error: {e}"
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
668
 
669
  # -----------------------------
670
  # Visualization Functions
671
  # -----------------------------
672
+
673
  def generate_typhoon_tracks(filtered_data, typhoon_search):
674
+ """Generate typhoon tracks visualization"""
675
  fig = go.Figure()
676
  for sid in filtered_data['SID'].unique():
677
  storm_data = filtered_data[filtered_data['SID'] == sid]
 
697
  return fig
698
 
699
  def generate_wind_oni_scatter(filtered_data, typhoon_search):
700
+ """Generate wind vs ONI scatter plot"""
701
  fig = px.scatter(filtered_data, x='ONI', y='USA_WIND', color='Category',
702
  hover_data=['NAME','Year','Category'],
703
  title='Wind Speed vs ONI',
 
715
  return fig
716
 
717
  def generate_pressure_oni_scatter(filtered_data, typhoon_search):
718
+ """Generate pressure vs ONI scatter plot"""
719
  fig = px.scatter(filtered_data, x='ONI', y='USA_PRES', color='Category',
720
  hover_data=['NAME','Year','Category'],
721
  title='Pressure vs ONI',
 
733
  return fig
734
 
735
  def generate_regression_analysis(filtered_data):
736
+ """Generate regression analysis plot"""
737
  fig = px.scatter(filtered_data, x='LON', y='ONI', hover_data=['NAME'],
738
  title='Typhoon Generation Longitude vs ONI (All Years)')
739
  if len(filtered_data) > 1:
740
  X = np.array(filtered_data['LON']).reshape(-1,1)
741
  y = filtered_data['ONI']
742
+ try:
743
+ model = sm.OLS(y, sm.add_constant(X)).fit()
744
+ y_pred = model.predict(sm.add_constant(X))
745
+ fig.add_trace(go.Scatter(x=filtered_data['LON'], y=y_pred, mode='lines', name='Regression Line'))
746
+ slope = model.params[1]
747
+ slopes_text = f"All Years Slope: {slope:.4f}"
748
+ except Exception as e:
749
+ slopes_text = f"Regression Error: {e}"
750
  else:
751
  slopes_text = "Insufficient data for regression"
752
  return fig, slopes_text
753
 
754
  def generate_main_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
755
+ """Generate main analysis plots"""
756
  start_date = datetime(start_year, start_month, 1)
757
  end_date = datetime(end_year, end_month, 28)
758
  filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
 
766
  return tracks_fig, wind_scatter, pressure_scatter, regression_fig, slopes_text
767
 
768
  def get_full_tracks(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
769
+ """Get full typhoon tracks"""
770
  start_date = datetime(start_year, start_month, 1)
771
  end_date = datetime(end_year, end_month, 28)
772
  filtered_data = merged_data[(merged_data['ISO_TIME']>=start_date) & (merged_data['ISO_TIME']<=end_date)].copy()
 
779
  for sid in unique_storms:
780
  storm_data = typhoon_data[typhoon_data['SID']==sid]
781
  name = storm_data['NAME'].iloc[0] if pd.notnull(storm_data['NAME'].iloc[0]) else "Unnamed"
782
+ basin = storm_data['SID'].iloc[0][:2]
783
  storm_oni = filtered_data[filtered_data['SID']==sid]['ONI'].iloc[0]
784
  color = 'red' if storm_oni>=0.5 else ('blue' if storm_oni<=-0.5 else 'green')
785
  fig.add_trace(go.Scattergeo(
 
823
  return fig, f"Total typhoons displayed: {count}"
824
 
825
  def get_wind_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
826
+ """Get wind analysis"""
827
  results = generate_main_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search)
828
  regression = perform_wind_regression(start_year, start_month, end_year, end_month)
829
  return results[1], regression
830
 
831
  def get_pressure_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
832
+ """Get pressure analysis"""
833
  results = generate_main_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search)
834
  regression = perform_pressure_regression(start_year, start_month, end_year, end_month)
835
  return results[2], regression
836
 
837
  def get_longitude_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search):
838
+ """Get longitude analysis"""
839
  results = generate_main_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search)
840
  regression = perform_longitude_regression(start_year, start_month, end_year, end_month)
841
  return results[3], results[4], regression
842
 
843
  def categorize_typhoon_by_standard(wind_speed, standard='atlantic'):
844
+ """Categorize typhoon by standard"""
845
  if standard=='taiwan':
846
  wind_speed_ms = wind_speed * 0.514444
847
  if wind_speed_ms >= 51.0:
 
867
  return 'Tropical Depression', atlantic_standard['Tropical Depression']['hex']
868
 
869
  # -----------------------------
870
+ # TSNE Cluster Function
871
  # -----------------------------
872
+
873
  def update_route_clusters(start_year, start_month, end_year, end_month, enso_value, season):
874
+ """Updated TSNE cluster function with mean curves"""
875
  try:
876
+ # Merge raw typhoon data with ONI
877
  raw_data = typhoon_data.copy()
878
  raw_data['Year'] = raw_data['ISO_TIME'].dt.year
879
  raw_data['Month'] = raw_data['ISO_TIME'].dt.strftime('%m')
 
899
  logging.info("WP regional filter returned no data; using all filtered data.")
900
  wp_data = merged_raw
901
 
902
+ # Group by storm ID
903
  all_storms_data = []
904
  for sid, group in wp_data.groupby('SID'):
905
  group = group.sort_values('ISO_TIME')
 
908
  lons = group['LON'].astype(float).values
909
  if len(lons) < 2:
910
  continue
911
+ # Extract wind and pressure curves
912
  wind = group['USA_WIND'].astype(float).values if 'USA_WIND' in group.columns else None
913
  pres = group['USA_PRES'].astype(float).values if 'USA_PRES' in group.columns else None
914
  all_storms_data.append((sid, lons, lats, times, wind, pres))
915
+
916
  logging.info(f"Storms available for TSNE after grouping: {len(all_storms_data)}")
917
  if not all_storms_data:
918
  return go.Figure(), go.Figure(), make_subplots(rows=2, cols=1), "No valid storms for clustering."
919
 
920
+ # Interpolate each storm's route to a common length
921
  max_length = max(len(item[1]) for item in all_storms_data)
922
  route_vectors = []
923
  wind_curves = []
924
  pres_curves = []
925
  storm_ids = []
926
+
927
  for sid, lons, lats, times, wind, pres in all_storms_data:
928
  t = np.linspace(0, 1, len(lons))
929
  t_new = np.linspace(0, 1, max_length)
 
933
  except Exception as ex:
934
  logging.error(f"Interpolation error for storm {sid}: {ex}")
935
  continue
936
+
937
  route_vector = np.column_stack((lon_interp, lat_interp)).flatten()
938
  if np.isnan(route_vector).any():
939
  continue
940
+
941
  route_vectors.append(route_vector)
942
  storm_ids.append(sid)
943
+
944
+ # Interpolate wind and pressure
945
  if wind is not None and len(wind) >= 2:
946
  try:
947
  wind_interp = interp1d(t, wind, kind='linear', fill_value='extrapolate')(t_new)
 
950
  wind_interp = np.full(max_length, np.nan)
951
  else:
952
  wind_interp = np.full(max_length, np.nan)
953
+
954
  if pres is not None and len(pres) >= 2:
955
  try:
956
  pres_interp = interp1d(t, pres, kind='linear', fill_value='extrapolate')(t_new)
 
959
  pres_interp = np.full(max_length, np.nan)
960
  else:
961
  pres_interp = np.full(max_length, np.nan)
962
+
963
  wind_curves.append(wind_interp)
964
  pres_curves.append(pres_interp)
965
+
966
  logging.info(f"Storms with valid route vectors: {len(route_vectors)}")
967
  if len(route_vectors) == 0:
968
  return go.Figure(), go.Figure(), make_subplots(rows=2, cols=1), "No valid storms after interpolation."
 
975
  tsne = TSNE(n_components=2, random_state=42, verbose=1)
976
  tsne_results = tsne.fit_transform(route_vectors)
977
 
978
+ # Dynamic DBSCAN
979
  selected_labels = None
980
  selected_eps = None
981
  for eps in np.linspace(1.0, 10.0, 91):
 
986
  selected_labels = labels
987
  selected_eps = eps
988
  break
989
+
990
  if selected_labels is None:
991
  selected_eps = 5.0
992
  dbscan = DBSCAN(eps=selected_eps, min_samples=3)
993
  selected_labels = dbscan.fit_predict(tsne_results)
994
+
995
  logging.info(f"Selected DBSCAN eps: {selected_eps:.2f} yielding {len(set(selected_labels)-{-1})} clusters.")
996
 
997
  # TSNE scatter plot
998
  fig_tsne = go.Figure()
999
  colors = px.colors.qualitative.Safe
1000
  unique_labels = sorted(set(selected_labels) - {-1})
1001
+
1002
  for i, label in enumerate(unique_labels):
1003
  indices = np.where(selected_labels == label)[0]
1004
  fig_tsne.add_trace(go.Scatter(
 
1008
  marker=dict(color=colors[i % len(colors)]),
1009
  name=f"Cluster {label}"
1010
  ))
1011
+
1012
  noise_indices = np.where(selected_labels == -1)[0]
1013
  if len(noise_indices) > 0:
1014
  fig_tsne.add_trace(go.Scatter(
 
1018
  marker=dict(color='grey'),
1019
  name='Noise'
1020
  ))
1021
+
1022
  fig_tsne.update_layout(
1023
  title="t-SNE of Storm Routes",
1024
  xaxis_title="t-SNE Dim 1",
1025
  yaxis_title="t-SNE Dim 2"
1026
  )
1027
 
1028
+ # Compute mean routes and curves for each cluster
1029
  fig_routes = go.Figure()
1030
+ cluster_stats = []
1031
+
1032
  for i, label in enumerate(unique_labels):
1033
  indices = np.where(selected_labels == label)[0]
1034
  cluster_ids = [storm_ids[j] for j in indices]
 
1037
  mean_route = mean_vector.reshape((max_length, 2))
1038
  mean_lon = mean_route[:, 0]
1039
  mean_lat = mean_route[:, 1]
1040
+
1041
  fig_routes.add_trace(go.Scattergeo(
1042
  lon=mean_lon,
1043
  lat=mean_lat,
 
1045
  line=dict(width=4, color=colors[i % len(colors)]),
1046
  name=f"Cluster {label} Mean Route"
1047
  ))
1048
+
1049
+ # Compute mean curves
1050
  cluster_winds = wind_curves[indices, :]
1051
  cluster_pres = pres_curves[indices, :]
1052
  mean_wind_curve = np.nanmean(cluster_winds, axis=0)
1053
  mean_pres_curve = np.nanmean(cluster_pres, axis=0)
1054
  cluster_stats.append((label, mean_wind_curve, mean_pres_curve))
1055
 
1056
+ # Create cluster stats plot
1057
  x_axis = np.linspace(0, 1, max_length)
1058
  fig_stats = make_subplots(rows=2, cols=1, shared_xaxes=True,
1059
  subplot_titles=("Mean Wind Speed (knots)", "Mean MSLP (hPa)"))
1060
+
1061
  for i, (label, wind_curve, pres_curve) in enumerate(cluster_stats):
1062
  fig_stats.add_trace(go.Scatter(
1063
  x=x_axis,
 
1066
  line=dict(width=2, color=colors[i % len(colors)]),
1067
  name=f"Cluster {label} Mean Wind"
1068
  ), row=1, col=1)
1069
+
1070
  fig_stats.add_trace(go.Scatter(
1071
  x=x_axis,
1072
  y=pres_curve,
 
1074
  line=dict(width=2, color=colors[i % len(colors)]),
1075
  name=f"Cluster {label} Mean MSLP"
1076
  ), row=2, col=1)
1077
+
1078
  fig_stats.update_layout(
1079
  title="Cluster Mean Curves",
1080
  xaxis_title="Normalized Route Index",
 
1086
 
1087
  info = f"TSNE clustering complete. Selected eps: {selected_eps:.2f}. Clusters: {len(unique_labels)}."
1088
  return fig_tsne, fig_routes, fig_stats, info
1089
+
1090
  except Exception as e:
1091
  logging.error(f"Error in TSNE clustering: {e}")
1092
  return go.Figure(), go.Figure(), make_subplots(rows=2, cols=1), f"Error in TSNE clustering: {e}"
1093
 
1094
  # -----------------------------
1095
+ # Animation Functions
1096
  # -----------------------------
1097
+
1098
  def generate_track_video_from_csv(year, storm_id, standard):
1099
+ """Generate track video from CSV data"""
1100
  storm_df = typhoon_data[typhoon_data['SID'] == storm_id].copy()
1101
  if storm_df.empty:
1102
  logging.error(f"No data found for storm: {storm_id}")
1103
  return None
1104
+
1105
  storm_df = storm_df.sort_values('ISO_TIME')
1106
  lats = storm_df['LAT'].astype(float).values
1107
  lons = storm_df['LON'].astype(float).values
1108
  times = pd.to_datetime(storm_df['ISO_TIME']).values
1109
+
1110
  if 'USA_WIND' in storm_df.columns:
1111
  winds = pd.to_numeric(storm_df['USA_WIND'], errors='coerce').values
1112
  else:
1113
  winds = np.full(len(lats), np.nan)
1114
+
1115
  storm_name = storm_df['NAME'].iloc[0]
1116
+ basin = storm_df['SID'].iloc[0][:2]
1117
  season = storm_df['SEASON'].iloc[0]
1118
 
1119
  min_lat, max_lat = np.min(lats), np.max(lats)
 
1136
  point, = ax.plot([], [], 'o', markersize=8, transform=ccrs.PlateCarree())
1137
  date_text = ax.text(0.02, 0.02, '', transform=ax.transAxes, fontsize=10,
1138
  bbox=dict(facecolor='white', alpha=0.8))
 
1139
  storm_info_text = fig.text(0.70, 0.60, '', fontsize=10,
1140
  bbox=dict(facecolor='white', alpha=0.8, boxstyle='round,pad=0.5'))
1141
 
 
1168
 
1169
  ani = animation.FuncAnimation(fig, update, init_func=init, frames=len(times),
1170
  interval=200, blit=True, repeat=True)
1171
+
1172
+ # Create animation file
1173
+ temp_file = tempfile.NamedTemporaryFile(delete=False, suffix='.mp4', dir=DATA_PATH)
1174
+ try:
1175
+ writer = animation.FFMpegWriter(fps=5, bitrate=1800)
1176
+ ani.save(temp_file.name, writer=writer)
1177
+ plt.close(fig)
1178
+ return temp_file.name
1179
+ except Exception as e:
1180
+ logging.error(f"Error creating animation: {e}")
1181
+ plt.close(fig)
1182
+ return None
1183
 
1184
  def simplified_track_video(year, basin, typhoon, standard):
1185
+ """Simplified track video function"""
1186
  if not typhoon:
1187
  return None
1188
  storm_id = typhoon.split('(')[-1].strip(')')
1189
  return generate_track_video_from_csv(year, storm_id, standard)
1190
 
1191
  # -----------------------------
1192
+ # FIXED: Update Typhoon Options Function
1193
  # -----------------------------
 
 
 
 
 
 
1194
 
1195
+ def update_typhoon_options_fixed(year, basin):
1196
+ """Fixed version of update_typhoon_options"""
1197
  try:
1198
+ # Use the typhoon_data already loaded
1199
+ if typhoon_data is None or typhoon_data.empty:
1200
+ logging.error("No typhoon data available")
1201
+ return gr.update(choices=[], value=None)
1202
+
1203
+ # Filter by year
1204
+ year_data = typhoon_data[typhoon_data['ISO_TIME'].dt.year == int(year)].copy()
1205
+
1206
+ if basin != "All Basins":
1207
+ # Extract basin code
1208
+ basin_code = basin.split(' - ')[0] if ' - ' in basin else basin[:2]
1209
+ # Filter by basin
1210
+ if 'SID' in year_data.columns:
1211
+ year_data = year_data[year_data['SID'].str.startswith(basin_code, na=False)]
1212
+ elif 'BASIN' in year_data.columns:
1213
+ year_data = year_data[year_data['BASIN'] == basin_code]
1214
+
1215
+ if year_data.empty:
1216
+ logging.warning(f"No storms found for year {year} and basin {basin}")
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1217
  return gr.update(choices=[], value=None)
1218
+
1219
+ # Get unique storms and create options
1220
+ storms = year_data.groupby('SID').first().reset_index()
1221
  options = []
1222
+
1223
+ for _, storm in storms.iterrows():
1224
+ name = storm.get('NAME', 'UNNAMED')
1225
+ if pd.isna(name) or name == '':
1226
+ name = 'UNNAMED'
1227
+ sid = storm['SID']
1228
+ options.append(f"{name} ({sid})")
1229
+
1230
+ if not options:
1231
+ return gr.update(choices=[], value=None)
1232
+
1233
+ return gr.update(choices=sorted(options), value=options[0])
1234
+
1235
  except Exception as e:
1236
+ logging.error(f"Error in update_typhoon_options_fixed: {e}")
1237
  return gr.update(choices=[], value=None)
1238
 
1239
+ # -----------------------------
1240
+ # Load & Process Data (using fixed functions)
1241
+ # -----------------------------
1242
+
1243
+ logging.info("Starting data loading process...")
1244
+ update_oni_data()
1245
+ oni_data, typhoon_data = load_data_fixed(ONI_DATA_PATH, TYPHOON_DATA_PATH)
1246
+ oni_long = process_oni_data(oni_data)
1247
+ typhoon_max = process_typhoon_data(typhoon_data)
1248
+ merged_data = merge_data(oni_long, typhoon_max)
1249
+ logging.info("Data loading complete.")
1250
+
1251
  # -----------------------------
1252
  # Gradio Interface
1253
  # -----------------------------
1254
+
1255
  with gr.Blocks(title="Typhoon Analysis Dashboard") as demo:
1256
  gr.Markdown("# Typhoon Analysis Dashboard")
1257
 
 
1266
  - **Wind Analysis**: Examine wind speed vs ONI relationships.
1267
  - **Pressure Analysis**: Analyze pressure vs ONI relationships.
1268
  - **Longitude Analysis**: Study typhoon generation longitude vs ONI.
1269
+ - **Path Animation**: View animated storm tracks on a world map.
1270
+ - **TSNE Cluster**: Perform t-SNE clustering on storm routes.
1271
+
1272
+ ### Data Status:
1273
+ - **ONI Data**: %d years loaded
1274
+ - **Typhoon Data**: %d records loaded
1275
+ - **Merged Data**: %d typhoons with ONI values
1276
+ """ % (len(oni_data), len(typhoon_data), len(merged_data)))
1277
 
1278
  with gr.Tab("Track Visualization"):
1279
  with gr.Row():
 
1339
  with gr.Tab("Tropical Cyclone Path Animation"):
1340
  with gr.Row():
1341
  year_dropdown = gr.Dropdown(label="Year", choices=[str(y) for y in range(1950, 2025)], value="2000")
 
1342
  basin_constant = gr.Textbox(value="All Basins", visible=False)
1343
  with gr.Row():
1344
  typhoon_dropdown = gr.Dropdown(label="Tropical Cyclone")
 
1347
  path_video = gr.Video(label="Tropical Cyclone Path Animation", format="mp4", interactive=False, elem_id="path_video")
1348
  animation_info = gr.Markdown("""
1349
  ### Animation Instructions
1350
+ 1. Select a year.
1351
  2. Choose a tropical cyclone from the populated list.
1352
  3. Select a classification standard (Atlantic or Taiwan).
1353
  4. Click "Generate Animation".
1354
+ 5. The animation displays the storm track on a world map with dynamic sidebar information.
 
1355
  """)
1356
+ # Update typhoon dropdown using fixed function
1357
+ year_dropdown.change(fn=update_typhoon_options_fixed,
1358
+ inputs=[year_dropdown, basin_constant],
1359
+ outputs=typhoon_dropdown)
1360
  animate_btn.click(fn=simplified_track_video,
1361
  inputs=[year_dropdown, basin_constant, typhoon_dropdown, standard_dropdown],
1362
  outputs=path_video)
 
1378
  inputs=[tsne_start_year, tsne_start_month, tsne_end_year, tsne_end_month, tsne_enso_phase, tsne_season],
1379
  outputs=[tsne_plot, routes_plot, stats_plot, cluster_info])
1380
 
1381
+ if __name__ == "__main__":
1382
+ demo.launch(share=True)