Spaces:
Sleeping
Sleeping
Update app.py
Browse files
app.py
CHANGED
@@ -335,9 +335,10 @@ def examine_ibtracs_structure(file_path):
|
|
335 |
for i, line in enumerate(lines[:5]):
|
336 |
logging.info(f"Line {i}: {line.strip()}")
|
337 |
|
338 |
-
#
|
339 |
-
|
340 |
-
|
|
|
341 |
|
342 |
return list(df.columns)
|
343 |
except Exception as e:
|
@@ -362,61 +363,16 @@ def load_ibtracs_csv_directly(basin='WP'):
|
|
362 |
logging.error("Could not examine IBTrACS file structure")
|
363 |
return None
|
364 |
|
365 |
-
# Read IBTrACS CSV
|
366 |
-
#
|
367 |
logging.info(f"Reading IBTrACS CSV file: {local_path}")
|
368 |
-
df = pd.read_csv(local_path, low_memory=False
|
369 |
|
370 |
logging.info(f"Original columns: {list(df.columns)}")
|
371 |
logging.info(f"Data shape before cleaning: {df.shape}")
|
372 |
|
373 |
-
#
|
374 |
-
|
375 |
-
column_mapping = {}
|
376 |
-
|
377 |
-
# Look for common variations of column names
|
378 |
-
for col in df.columns:
|
379 |
-
col_upper = col.upper()
|
380 |
-
if 'SID' in col_upper or col_upper == 'STORM_ID':
|
381 |
-
column_mapping[col] = 'SID'
|
382 |
-
elif 'SEASON' in col_upper and col_upper != 'SUB_SEASON':
|
383 |
-
column_mapping[col] = 'SEASON'
|
384 |
-
elif 'NAME' in col_upper and 'FILE' not in col_upper:
|
385 |
-
column_mapping[col] = 'NAME'
|
386 |
-
elif 'ISO_TIME' in col_upper or col_upper == 'TIME':
|
387 |
-
column_mapping[col] = 'ISO_TIME'
|
388 |
-
elif col_upper == 'LAT' or 'LATITUDE' in col_upper:
|
389 |
-
column_mapping[col] = 'LAT'
|
390 |
-
elif col_upper == 'LON' or 'LONGITUDE' in col_upper:
|
391 |
-
column_mapping[col] = 'LON'
|
392 |
-
elif 'USA_WIND' in col_upper or col_upper == 'WIND':
|
393 |
-
column_mapping[col] = 'USA_WIND'
|
394 |
-
elif 'USA_PRES' in col_upper or col_upper == 'PRESSURE':
|
395 |
-
column_mapping[col] = 'USA_PRES'
|
396 |
-
elif 'BASIN' in col_upper and 'SUB' not in col_upper:
|
397 |
-
column_mapping[col] = 'BASIN'
|
398 |
-
|
399 |
-
# Rename columns
|
400 |
-
df = df.rename(columns=column_mapping)
|
401 |
-
logging.info(f"Mapped columns: {list(df.columns)}")
|
402 |
-
|
403 |
-
# If we still don't have essential columns, try creating them
|
404 |
-
if 'SID' not in df.columns:
|
405 |
-
# Try to create SID from other columns
|
406 |
-
possible_sid_cols = [col for col in df.columns if 'id' in col.lower() or 'sid' in col.lower()]
|
407 |
-
if possible_sid_cols:
|
408 |
-
df['SID'] = df[possible_sid_cols[0]]
|
409 |
-
logging.info(f"Created SID from {possible_sid_cols[0]}")
|
410 |
-
|
411 |
-
if 'ISO_TIME' not in df.columns:
|
412 |
-
# Look for time-related columns
|
413 |
-
time_cols = [col for col in df.columns if 'time' in col.lower() or 'date' in col.lower()]
|
414 |
-
if time_cols:
|
415 |
-
df['ISO_TIME'] = df[time_cols[0]]
|
416 |
-
logging.info(f"Created ISO_TIME from {time_cols[0]}")
|
417 |
-
|
418 |
-
# Ensure we have minimum required columns
|
419 |
-
required_cols = ['LAT', 'LON']
|
420 |
available_required = [col for col in required_cols if col in df.columns]
|
421 |
|
422 |
if len(available_required) < 2:
|
@@ -428,7 +384,7 @@ def load_ibtracs_csv_directly(basin='WP'):
|
|
428 |
df['ISO_TIME'] = pd.to_datetime(df['ISO_TIME'], errors='coerce')
|
429 |
|
430 |
# Clean numeric columns
|
431 |
-
numeric_columns = ['LAT', 'LON', 'USA_WIND', 'USA_PRES']
|
432 |
for col in numeric_columns:
|
433 |
if col in df.columns:
|
434 |
df[col] = pd.to_numeric(df[col], errors='coerce')
|
@@ -1372,7 +1328,8 @@ logging.info("Data loading complete.")
|
|
1372 |
# Gradio Interface
|
1373 |
# -----------------------------
|
1374 |
|
1375 |
-
|
|
|
1376 |
gr.Markdown("# Typhoon Analysis Dashboard")
|
1377 |
|
1378 |
with gr.Tab("Overview"):
|
@@ -1499,4 +1456,5 @@ with gr.Blocks(title="Typhoon Analysis Dashboard") as demo:
|
|
1499 |
outputs=[tsne_plot, routes_plot, stats_plot, cluster_info])
|
1500 |
|
1501 |
if __name__ == "__main__":
|
1502 |
-
|
|
|
|
335 |
for i, line in enumerate(lines[:5]):
|
336 |
logging.info(f"Line {i}: {line.strip()}")
|
337 |
|
338 |
+
# The first line contains the actual column headers
|
339 |
+
# No need to skip rows for IBTrACS v04r01
|
340 |
+
df = pd.read_csv(file_path, nrows=5)
|
341 |
+
logging.info(f"Columns from first row: {list(df.columns)}")
|
342 |
|
343 |
return list(df.columns)
|
344 |
except Exception as e:
|
|
|
363 |
logging.error("Could not examine IBTrACS file structure")
|
364 |
return None
|
365 |
|
366 |
+
# Read IBTrACS CSV - DON'T skip any rows for v04r01
|
367 |
+
# The first row contains proper column headers
|
368 |
logging.info(f"Reading IBTrACS CSV file: {local_path}")
|
369 |
+
df = pd.read_csv(local_path, low_memory=False) # Don't skip any rows
|
370 |
|
371 |
logging.info(f"Original columns: {list(df.columns)}")
|
372 |
logging.info(f"Data shape before cleaning: {df.shape}")
|
373 |
|
374 |
+
# Check which essential columns exist
|
375 |
+
required_cols = ['SID', 'ISO_TIME', 'LAT', 'LON']
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
376 |
available_required = [col for col in required_cols if col in df.columns]
|
377 |
|
378 |
if len(available_required) < 2:
|
|
|
384 |
df['ISO_TIME'] = pd.to_datetime(df['ISO_TIME'], errors='coerce')
|
385 |
|
386 |
# Clean numeric columns
|
387 |
+
numeric_columns = ['LAT', 'LON', 'WMO_WIND', 'WMO_PRES', 'USA_WIND', 'USA_PRES']
|
388 |
for col in numeric_columns:
|
389 |
if col in df.columns:
|
390 |
df[col] = pd.to_numeric(df[col], errors='coerce')
|
|
|
1328 |
# Gradio Interface
|
1329 |
# -----------------------------
|
1330 |
|
1331 |
+
# Fix the Gradio interface creation with explicit configuration
|
1332 |
+
with gr.Blocks(title="Typhoon Analysis Dashboard", theme=gr.themes.Default()) as demo:
|
1333 |
gr.Markdown("# Typhoon Analysis Dashboard")
|
1334 |
|
1335 |
with gr.Tab("Overview"):
|
|
|
1456 |
outputs=[tsne_plot, routes_plot, stats_plot, cluster_info])
|
1457 |
|
1458 |
if __name__ == "__main__":
|
1459 |
+
# Remove the share parameter for HuggingFace Spaces
|
1460 |
+
demo.launch()
|