import gradio as gr import plotly.graph_objects as go import plotly.express as px import pandas as pd import numpy as np from datetime import datetime from scipy import stats from sklearn.linear_model import LinearRegression from sklearn.cluster import KMeans from scipy.interpolate import interp1d from fractions import Fraction import statsmodels.api as sm import tropycal.tracks as tracks import os import pickle import requests import tempfile import shutil import filecmp import csv from collections import defaultdict import argparse # Command-line argument parsing parser = argparse.ArgumentParser(description='Typhoon Analysis Dashboard') parser.add_argument('--data_path', type=str, default=os.getcwd(), help='Path to the data directory') args = parser.parse_args() DATA_PATH = args.data_path # File paths ONI_DATA_PATH = os.path.join(DATA_PATH, 'oni_data.csv') TYPHOON_DATA_PATH = os.path.join(DATA_PATH, 'processed_typhoon_data.csv') LOCAL_iBtrace_PATH = os.path.join(DATA_PATH, 'ibtracs.WP.list.v04r01.csv') iBtrace_uri = 'https://www.ncei.noaa.gov/data/international-best-track-archive-for-climate-stewardship-ibtracs/v04r01/access/csv/ibtracs.WP.list.v04r01.csv' CACHE_FILE = 'ibtracs_cache.pkl' CACHE_EXPIRY_DAYS = 1 # Color map for categories color_map = { 'C5 Super Typhoon': 'rgb(255, 0, 0)', 'C4 Very Strong Typhoon': 'rgb(255, 63, 0)', 'C3 Strong Typhoon': 'rgb(255, 127, 0)', 'C2 Typhoon': 'rgb(255, 191, 0)', 'C1 Typhoon': 'rgb(255, 255, 0)', 'Tropical Storm': 'rgb(0, 255, 255)', 'Tropical Depression': 'rgb(173, 216, 230)' } # Classification standards atlantic_standard = { 'C5 Super Typhoon': {'wind_speed': 137, 'color': 'rgb(255, 0, 0)'}, 'C4 Very Strong Typhoon': {'wind_speed': 113, 'color': 'rgb(255, 63, 0)'}, 'C3 Strong Typhoon': {'wind_speed': 96, 'color': 'rgb(255, 127, 0)'}, 'C2 Typhoon': {'wind_speed': 83, 'color': 'rgb(255, 191, 0)'}, 'C1 Typhoon': {'wind_speed': 64, 'color': 'rgb(255, 255, 0)'}, 'Tropical Storm': {'wind_speed': 34, 'color': 'rgb(0, 255, 255)'}, 'Tropical Depression': {'wind_speed': 0, 'color': 'rgb(173, 216, 230)'} } taiwan_standard = { 'Strong Typhoon': {'wind_speed': 51.0, 'color': 'rgb(255, 0, 0)'}, 'Medium Typhoon': {'wind_speed': 33.7, 'color': 'rgb(255, 127, 0)'}, 'Mild Typhoon': {'wind_speed': 17.2, 'color': 'rgb(255, 255, 0)'}, 'Tropical Depression': {'wind_speed': 0, 'color': 'rgb(173, 216, 230)'} } # Data loading and processing functions (unchanged from Dash) def convert_typhoondata(input_file, output_file): with open(input_file, 'r') as infile: next(infile) next(infile) reader = csv.reader(infile) sid_data = defaultdict(list) for row in reader: if not row: continue sid = row[0] iso_time = row[6] sid_data[sid].append((row, iso_time)) with open(output_file, 'w', newline='') as outfile: fieldnames = ['SID', 'ISO_TIME', 'LAT', 'LON', 'SEASON', 'NAME', 'WMO_WIND', 'WMO_PRES', 'USA_WIND', 'USA_PRES', 'START_DATE', 'END_DATE'] writer = csv.DictWriter(outfile, fieldnames=fieldnames) writer.writeheader() for sid, data in sid_data.items(): start_date = min(data, key=lambda x: x[1])[1] end_date = max(data, key=lambda x: x[1])[1] for row, iso_time in data: writer.writerow({ 'SID': row[0], 'ISO_TIME': iso_time, 'LAT': row[8], 'LON': row[9], 'SEASON': row[1], 'NAME': row[5], 'WMO_WIND': row[10].strip() or ' ', 'WMO_PRES': row[11].strip() or ' ', 'USA_WIND': row[23].strip() or ' ', 'USA_PRES': row[24].strip() or ' ', 'START_DATE': start_date, 'END_DATE': end_date }) def download_oni_file(url, filename): try: response = requests.get(url) response.raise_for_status() with open(filename, 'wb') as f: f.write(response.content) return True except requests.RequestException: return False def convert_oni_ascii_to_csv(input_file, output_file): data = defaultdict(lambda: [''] * 12) season_to_month = {'DJF': 12, 'JFM': 1, 'FMA': 2, 'MAM': 3, 'AMJ': 4, 'MJJ': 5, 'JJA': 6, 'JAS': 7, 'ASO': 8, 'SON': 9, 'OND': 10, 'NDJ': 11} with open(input_file, 'r') as f: lines = f.readlines()[1:] for line in lines: parts = line.split() if len(parts) >= 4: season, year, anom = parts[0], parts[1], parts[-1] if season in season_to_month: month = season_to_month[season] if season == 'DJF': year = str(int(year) - 1) data[year][month-1] = anom with open(output_file, 'w', newline='') as f: writer = csv.writer(f) writer.writerow(['Year', 'Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec']) for year in sorted(data.keys()): writer.writerow([year] + data[year]) def update_oni_data(): url = "https://www.cpc.ncep.noaa.gov/data/indices/oni.ascii.txt" temp_file = os.path.join(DATA_PATH, "temp_oni.ascii.txt") input_file = os.path.join(DATA_PATH, "oni.ascii.txt") output_file = ONI_DATA_PATH if download_oni_file(url, temp_file): if not os.path.exists(input_file) or not filecmp.cmp(temp_file, input_file, shallow=False): os.replace(temp_file, input_file) convert_oni_ascii_to_csv(input_file, output_file) else: os.remove(temp_file) def load_ibtracs_data(): if os.path.exists(CACHE_FILE) and (datetime.now() - datetime.fromtimestamp(os.path.getmtime(CACHE_FILE))).days < CACHE_EXPIRY_DAYS: with open(CACHE_FILE, 'rb') as f: return pickle.load(f) if os.path.exists(LOCAL_iBtrace_PATH): ibtracs = tracks.TrackDataset(basin='west_pacific', source='ibtracs', ibtracs_url=LOCAL_iBtrace_PATH) else: response = requests.get(iBtrace_uri) response.raise_for_status() with tempfile.NamedTemporaryFile(mode='w', delete=False, suffix='.csv') as temp_file: temp_file.write(response.text) temp_file_path = temp_file.name shutil.move(temp_file_path, LOCAL_iBtrace_PATH) ibtracs = tracks.TrackDataset(basin='west_pacific', source='ibtracs', ibtracs_url=LOCAL_iBtrace_PATH) with open(CACHE_FILE, 'wb') as f: pickle.dump(ibtracs, f) return ibtracs def process_oni_data(oni_data): oni_long = oni_data.melt(id_vars=['Year'], var_name='Month', value_name='ONI') month_map = {'Jan': '01', 'Feb': '02', 'Mar': '03', 'Apr': '04', 'May': '05', 'Jun': '06', 'Jul': '07', 'Aug': '08', 'Sep': '09', 'Oct': '10', 'Nov': '11', 'Dec': '12'} oni_long['Month'] = oni_long['Month'].map(month_map) oni_long['Date'] = pd.to_datetime(oni_long['Year'].astype(str) + '-' + oni_long['Month'] + '-01') oni_long['ONI'] = pd.to_numeric(oni_long['ONI'], errors='coerce') return oni_long def process_typhoon_data(typhoon_data): typhoon_data['ISO_TIME'] = pd.to_datetime(typhoon_data['ISO_TIME'], errors='coerce') typhoon_data['USA_WIND'] = pd.to_numeric(typhoon_data['USA_WIND'], errors='coerce') typhoon_data['USA_PRES'] = pd.to_numeric(typhoon_data['USA_PRES'], errors='coerce') typhoon_data['LON'] = pd.to_numeric(typhoon_data['LON'], errors='coerce') typhoon_max = typhoon_data.groupby('SID').agg({ 'USA_WIND': 'max', 'USA_PRES': 'min', 'ISO_TIME': 'first', 'SEASON': 'first', 'NAME': 'first', 'LAT': 'first', 'LON': 'first' }).reset_index() typhoon_max['Month'] = typhoon_max['ISO_TIME'].dt.strftime('%m') typhoon_max['Year'] = typhoon_max['ISO_TIME'].dt.year typhoon_max['Category'] = typhoon_max['USA_WIND'].apply(categorize_typhoon) return typhoon_max def merge_data(oni_long, typhoon_max): return pd.merge(typhoon_max, oni_long, on=['Year', 'Month']) def categorize_typhoon(wind_speed): wind_speed_kt = wind_speed / 2 if wind_speed_kt >= 137/2.35: return 'C5 Super Typhoon' elif wind_speed_kt >= 113/2.35: return 'C4 Very Strong Typhoon' elif wind_speed_kt >= 96/2.35: return 'C3 Strong Typhoon' elif wind_speed_kt >= 83/2.35: return 'C2 Typhoon' elif wind_speed_kt >= 64/2.35: return 'C1 Typhoon' elif wind_speed_kt >= 34/2.35: return 'Tropical Storm' else: return 'Tropical Depression' def classify_enso_phases(oni_value): if isinstance(oni_value, pd.Series): oni_value = oni_value.iloc[0] if oni_value >= 0.5: return 'El Nino' elif oni_value <= -0.5: return 'La Nina' else: return 'Neutral' def filter_west_pacific_coordinates(lons, lats): mask = (100 <= lons) & (lons <= 180) & (0 <= lats) & (lats <= 40) return lons[mask], lats[mask] def get_storm_data(storm_id): return ibtracs.get_storm(storm_id) # Load data globally update_oni_data() ibtracs = load_ibtracs_data() convert_typhoondata(LOCAL_iBtrace_PATH, TYPHOON_DATA_PATH) oni_data = pd.read_csv(ONI_DATA_PATH) typhoon_data = pd.read_csv(TYPHOON_DATA_PATH, low_memory=False) oni_long = process_oni_data(oni_data) typhoon_max = process_typhoon_data(typhoon_data) merged_data = merge_data(oni_long, typhoon_max) oni_df = pd.read_csv(ONI_DATA_PATH, index_col='Date', parse_dates=True) # Main Analysis Function def main_analysis(start_year, start_month, end_year, end_month, enso_phase, typhoon_search): start_date = datetime(start_year, start_month, 1) end_date = datetime(end_year, end_month, 28) filtered_oni_df = oni_df[(oni_df.index >= start_date) & (oni_df.index <= end_date)] filtered_data = merged_data[(merged_data['Year'] >= start_year) & (merged_data['Year'] <= end_year) & (merged_data['Month'].astype(int) >= start_month) & (merged_data['Month'].astype(int) <= end_month)] # Typhoon Tracks fig_tracks = go.Figure() regression_data = {'El Nino': {'longitudes': [], 'oni_values': [], 'names': []}, 'La Nina': {'longitudes': [], 'oni_values': [], 'names': []}, 'Neutral': {'longitudes': [], 'oni_values': [], 'names': []}, 'All': {'longitudes': [], 'oni_values': [], 'names': []}} for year in range(start_year, end_year + 1): season = ibtracs.get_season(year) for storm_id in season.summary()['id']: storm = get_storm_data(storm_id) storm_dates = storm.time if any(start_date <= date <= end_date for date in storm_dates): storm_oni = filtered_oni_df.loc[storm_dates[0].strftime('%Y-%b')]['ONI'] if isinstance(storm_oni, pd.Series): storm_oni = storm_oni.iloc[0] phase = classify_enso_phases(storm_oni) regression_data[phase]['longitudes'].append(storm.lon[0]) regression_data[phase]['oni_values'].append(storm_oni) regression_data[phase]['names'].append(f'{storm.name} ({year})') regression_data['All']['longitudes'].append(storm.lon[0]) regression_data['All']['oni_values'].append(storm_oni) regression_data['All']['names'].append(f'{storm.name} ({year})') if (enso_phase == 'All Years' or (enso_phase == 'El Niño Years' and phase == 'El Nino') or (enso_phase == 'La Niña Years' and phase == 'La Nina') or (enso_phase == 'Neutral Years' and phase == 'Neutral')): color = {'El Nino': 'red', 'La Nina': 'blue', 'Neutral': 'green'}[phase] fig_tracks.add_trace(go.Scattergeo(lon=storm.lon, lat=storm.lat, mode='lines', name=storm.name, text=f'{storm.name} ({year})', hoverinfo='text', line=dict(width=2, color=color))) fig_tracks.update_layout(title=f'Typhoon Tracks from {start_year}-{start_month} to {end_year}-{end_month}', geo=dict(projection_type='natural earth', showland=True)) # All Years Regression all_years_fig = go.Figure() df_all = pd.DataFrame({'Longitude': regression_data['All']['longitudes'], 'ONI': regression_data['All']['oni_values'], 'Name': regression_data['All']['names']}) if not df_all.empty and len(df_all) > 1: all_years_fig = px.scatter(df_all, x='Longitude', y='ONI', hover_data=['Name'], title='All Years Typhoon Generation vs. ONI') X = np.array(df_all['Longitude']).reshape(-1, 1) y = df_all['ONI'] model = LinearRegression().fit(X, y) y_pred = model.predict(X) all_years_fig.add_trace(go.Scatter(x=df_all['Longitude'], y=y_pred, mode='lines', name='Regression Line')) # Regression Graphs by Phase regression_html = "" slopes_html = "" for phase in ['El Nino', 'La Nina', 'Neutral']: df = pd.DataFrame({'Longitude': regression_data[phase]['longitudes'], 'ONI': regression_data[phase]['oni_values'], 'Name': regression_data[phase]['names']}) if not df.empty and len(df) > 1: fig = px.scatter(df, x='Longitude', y='ONI', hover_data=['Name'], title=f'{phase} Typhoon Generation vs. ONI') X = np.array(df['Longitude']).reshape(-1, 1) y = df['ONI'] model = LinearRegression().fit(X, y) y_pred = model.predict(X) slope = model.coef_[0] correlation_coef = np.corrcoef(df['Longitude'], df['ONI'])[0, 1] fig.add_trace(go.Scatter(x=df['Longitude'], y=y_pred, mode='lines', name='Regression Line')) regression_html += fig.to_html(include_plotlyjs=False) slopes_html += f"
{phase} Regression Slope: {slope:.4f}, Correlation Coefficient: {correlation_coef:.4f}
" # Wind and Pressure Scatter Plots wind_oni_scatter = px.scatter(filtered_data, x='ONI', y='USA_WIND', color='Category', hover_data=['NAME', 'Year', 'Category'], title='Wind Speed vs ONI', labels={'USA_WIND': 'Maximum Wind Speed (knots)'}, color_discrete_map=color_map) pressure_oni_scatter = px.scatter(filtered_data, x='ONI', y='USA_PRES', color='Category', hover_data=['NAME', 'Year', 'Category'], title='Pressure vs ONI', labels={'USA_PRES': 'Minimum Pressure (hPa)'}, color_discrete_map=color_map) if typhoon_search: for fig in [wind_oni_scatter, pressure_oni_scatter]: mask = filtered_data['NAME'].str.contains(typhoon_search, case=False, na=False) fig.add_trace(go.Scatter(x=filtered_data.loc[mask, 'ONI'], y=filtered_data.loc[mask, 'USA_WIND' if 'Wind' in fig.layout.title.text else 'USA_PRES'], mode='markers', marker=dict(size=10, color='red', symbol='star'), name=f'Matched: {typhoon_search}')) # Additional Metrics max_wind_speed = filtered_data['USA_WIND'].max() min_pressure = filtered_data['USA_PRES'].min() typhoon_counts = filtered_data['ONI'].apply(classify_enso_phases).value_counts().to_dict() month_counts = filtered_data.groupby([filtered_data['ONI'].apply(classify_enso_phases), filtered_data['ISO_TIME'].dt.month]).size().unstack(fill_value=0) concentrated_months = month_counts.idxmax(axis=1).to_dict() month_names = ['Jan', 'Feb', 'Mar', 'Apr', 'May', 'Jun', 'Jul', 'Aug', 'Sep', 'Oct', 'Nov', 'Dec'] count_analysis_html = "".join([f"{phase}: {count} typhoons
" for phase, count in typhoon_counts.items()]) month_analysis_html = "".join([f"{phase}: Most concentrated in {month_names[month-1]}
" for phase, month in concentrated_months.items()]) return (fig_tracks, all_years_fig, regression_html, slopes_html, wind_oni_scatter, pressure_oni_scatter, "Logistic Regression Results: See Logistic Regression Tab", f"Maximum Wind Speed: {max_wind_speed:.2f} knots", f"Minimum Pressure: {min_pressure:.2f} hPa", "Wind-ONI correlation: See Logistic Regression Tab", "Pressure-ONI correlation: See Logistic Regression Tab", count_analysis_html, month_analysis_html) # Cluster Analysis Function def cluster_analysis(n_clusters, show_clusters, show_routes, fourier_series, start_year, start_month, end_year, end_month, enso_phase): start_date = datetime(start_year, start_month, 1) end_date = datetime(end_year, end_month, 28) filtered_oni_df = oni_df[(oni_df.index >= start_date) & (oni_df.index <= end_date)] fig_routes = go.Figure() west_pacific_storms = [] for year in range(start_year, end_year + 1): season = ibtracs.get_season(year) for storm_id in season.summary()['id']: storm = get_storm_data(storm_id) storm_date = storm.time[0] storm_oni = filtered_oni_df.loc[storm_date.strftime('%Y-%b')]['ONI'] if isinstance(storm_oni, pd.Series): storm_oni = storm_oni.iloc[0] storm_phase = classify_enso_phases(storm_oni) if (enso_phase == 'All Years' or (enso_phase == 'El Niño Years' and storm_phase == 'El Nino') or (enso_phase == 'La Niña Years' and storm_phase == 'La Nina') or (enso_phase == 'Neutral Years' and storm_phase == 'Neutral')): lons, lats = filter_west_pacific_coordinates(np.array(storm.lon), np.array(storm.lat)) if len(lons) > 1: west_pacific_storms.append((lons, lats)) max_length = max(len(storm[0]) for storm in west_pacific_storms) standardized_routes = [] for lons, lats in west_pacific_storms: if len(lons) < 2: continue t = np.linspace(0, 1, len(lons)) t_new = np.linspace(0, 1, max_length) lon_interp = interp1d(t, lons, kind='linear')(t_new) lat_interp = interp1d(t, lats, kind='linear')(t_new) route_vector = np.column_stack((lon_interp, lat_interp)).flatten() standardized_routes.append(route_vector) kmeans = KMeans(n_clusters=n_clusters, random_state=42, n_init=10) clusters = kmeans.fit_predict(standardized_routes) cluster_counts = np.bincount(clusters) equations_html = "" if show_routes: for lons, lats in west_pacific_storms: fig_routes.add_trace(go.Scattergeo(lon=lons, lat=lats, mode='lines', line=dict(width=1, color='lightgray'), showlegend=False, hoverinfo='none')) if show_clusters: for i in range(n_clusters): cluster_center = kmeans.cluster_centers_[i].reshape(-1, 2) fig_routes.add_trace(go.Scattergeo(lon=cluster_center[:, 0], lat=cluster_center[:, 1], mode='lines', name=f'Cluster {i+1} (n={cluster_counts[i]})', line=dict(width=3))) if fourier_series: X = cluster_center[:, 0] y = cluster_center[:, 1] x_min, x_max = X.min(), X.max() X_normalized = 2 * np.pi * (X - x_min) / (x_max - x_min) params, _ = curve_fit(lambda x, a0, a1, b1, a2, b2, a3, b3, a4, b4: a0 + a1*np.cos(x) + b1*np.sin(x) + a2*np.cos(2*x) + b2*np.sin(2*x) + a3*np.cos(3*x) + b3*np.sin(3*x) + a4*np.cos(4*x) + b4*np.sin(4*x), X_normalized, y) a0, a1, b1, a2, b2, a3, b3, a4, b4 = params equations_html += f"Fourier Series: y = {a0:.4f} + {a1:.4f}*cos(x) + {b1:.4f}*sin(x) + " \ f"{a2:.4f}*cos(2x) + {b2:.4f}*sin(2x) + {a3:.4f}*cos(3x) + {b3:.4f}*sin(3x) + {a4:.4f}*cos(4x) + {b4:.4f}*sin(4x)
" \ f"X Range: 0 to {2*np.pi:.4f}
Longitude Range: {x_min:.4f}°E to {x_max:.4f}°E
β1: {beta_1:.4f}
Odds Ratio: {exp_beta_1:.4f}
P-value: {p_value:.4f}
" \ f"El Niño: {el_nino_severe:.2%}
La Niña: {la_nina_severe:.2%}
Neutral: {neutral_severe:.2%}
" elif regression_type == 'Pressure': filtered_data['intense_typhoon'] = (filtered_data['USA_PRES'] <= 950).astype(int) X = sm.add_constant(filtered_data['ONI']) y = filtered_data['intense_typhoon'] model = sm.Logit(y, X).fit() beta_1, exp_beta_1, p_value = model.params['ONI'], np.exp(model.params['ONI']), model.pvalues['ONI'] el_nino_intense = filtered_data[filtered_data['ONI'] >= 0.5]['intense_typhoon'].mean() la_nina_intense = filtered_data[filtered_data['ONI'] <= -0.5]['intense_typhoon'].mean() neutral_intense = filtered_data[(filtered_data['ONI'] > -0.5) & (filtered_data['ONI'] < 0.5)]['intense_typhoon'].mean() return f"β1: {beta_1:.4f}
Odds Ratio: {exp_beta_1:.4f}
P-value: {p_value:.4f}
" \ f"El Niño: {el_nino_intense:.2%}
La Niña: {la_nina_intense:.2%}
Neutral: {neutral_intense:.2%}
" elif regression_type == 'Longitude': filtered_data = filtered_data.dropna(subset=['LON']) filtered_data['western_typhoon'] = (filtered_data['LON'] <= 140).astype(int) X = sm.add_constant(filtered_data['ONI']) y = filtered_data['western_typhoon'] model = sm.Logit(y, X).fit() beta_1, exp_beta_1, p_value = model.params['ONI'], np.exp(model.params['ONI']), model.pvalues['ONI'] el_nino_western = filtered_data[filtered_data['ONI'] >= 0.5]['western_typhoon'].mean() la_nina_western = filtered_data[filtered_data['ONI'] <= -0.5]['western_typhoon'].mean() neutral_western = filtered_data[(filtered_data['ONI'] > -0.5) & (filtered_data['ONI'] < 0.5)]['western_typhoon'].mean() return f"β1: {beta_1:.4f}
Odds Ratio: {exp_beta_1:.4f}
P-value: {p_value:.4f}
" \ f"El Niño: {el_nino_western:.2%}
La Niña: {la_nina_western:.2%}
Neutral: {neutral_western:.2%}
" # Typhoon Path Animation Function def typhoon_path_animation(year, typhoon, standard): storm = ibtracs.get_storm(typhoon) fig = go.Figure() fig.add_trace(go.Scattergeo(lon=storm.lon, lat=storm.lat, mode='lines', line=dict(width=2, color='gray'), name='Path', showlegend=False)) fig.add_trace(go.Scattergeo(lon=[storm.lon[0]], lat=[storm.lat[0]], mode='markers', marker=dict(size=10, color='green', symbol='star'), name='Starting Point', text=storm.time[0].strftime('%Y-%m-%d %H:%M'), hoverinfo='text+name')) frames = [] for i in range(len(storm.time)): category, color = categorize_typhoon_by_standard(storm.vmax[i], standard) frame_data = [ go.Scattergeo(lon=storm.lon[:i+1], lat=storm.lat[:i+1], mode='lines', line=dict(width=2, color='blue'), name='Path Traveled', showlegend=False), go.Scattergeo(lon=[storm.lon[i]], lat=[storm.lat[i]], mode='markers+text', marker=dict(size=10, color=color, symbol='star'), text=category, textposition="top center", name='Current Location', hovertext=f"{storm.time[i].strftime('%Y-%m-%d %H:%M')}