Spaces:
Running
Running
adds steps logging + parsing - woops
Browse files- src/monitoring.py +14 -0
- templates/spaces/trackio/app.py +20 -2
src/monitoring.py
CHANGED
@@ -449,6 +449,20 @@ class SmolLM3Monitor:
|
|
449 |
try:
|
450 |
# Add timestamp
|
451 |
metrics['timestamp'] = datetime.now().isoformat()
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
452 |
if step is not None:
|
453 |
metrics['step'] = step
|
454 |
|
|
|
449 |
try:
|
450 |
# Add timestamp
|
451 |
metrics['timestamp'] = datetime.now().isoformat()
|
452 |
+
# If caller didn't provide step, try to infer it from common keys emitted by HF/TRL
|
453 |
+
if step is None:
|
454 |
+
try:
|
455 |
+
for step_key in (
|
456 |
+
'global_step',
|
457 |
+
'train/global_step',
|
458 |
+
'step',
|
459 |
+
'train/step',
|
460 |
+
):
|
461 |
+
if step_key in metrics and metrics[step_key] is not None:
|
462 |
+
step = int(metrics[step_key])
|
463 |
+
break
|
464 |
+
except Exception:
|
465 |
+
step = step # keep None if parsing fails
|
466 |
if step is not None:
|
467 |
metrics['step'] = step
|
468 |
|
templates/spaces/trackio/app.py
CHANGED
@@ -1146,7 +1146,17 @@ def create_metrics_plot(experiment_id: str, metric_name: str = "loss") -> go.Fig
|
|
1146 |
# Ensure steps are numeric and monotonically increasing to avoid zig-zag lines
|
1147 |
try:
|
1148 |
df = df.copy()
|
1149 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1150 |
df.sort_values('step', inplace=True)
|
1151 |
except Exception:
|
1152 |
pass
|
@@ -1546,7 +1556,15 @@ def create_combined_metrics_plot(experiment_id: str) -> go.Figure:
|
|
1546 |
# Clean steps for each subplot too
|
1547 |
try:
|
1548 |
df_sub = df.copy()
|
1549 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
1550 |
df_sub.sort_values('step', inplace=True)
|
1551 |
except Exception:
|
1552 |
df_sub = df
|
|
|
1146 |
# Ensure steps are numeric and monotonically increasing to avoid zig-zag lines
|
1147 |
try:
|
1148 |
df = df.copy()
|
1149 |
+
# If step looks constant or missing, try to derive it from a common field
|
1150 |
+
if 'step' not in df or df['step'].nunique() <= 1:
|
1151 |
+
for alt in ['train/global_step', 'global_step', 'train/step']:
|
1152 |
+
if alt in df.columns and df[alt].notna().any():
|
1153 |
+
df['step'] = pd.to_numeric(df[alt], errors='coerce')
|
1154 |
+
break
|
1155 |
+
# If still missing or constant, fallback to an inferred counter by order of arrival
|
1156 |
+
if 'step' not in df.columns or df['step'].isna().all() or df['step'].nunique() <= 1:
|
1157 |
+
df['step'] = range(1, len(df) + 1)
|
1158 |
+
else:
|
1159 |
+
df['step'] = pd.to_numeric(df.get('step', -1), errors='coerce').fillna(-1)
|
1160 |
df.sort_values('step', inplace=True)
|
1161 |
except Exception:
|
1162 |
pass
|
|
|
1556 |
# Clean steps for each subplot too
|
1557 |
try:
|
1558 |
df_sub = df.copy()
|
1559 |
+
if 'step' not in df_sub or df_sub['step'].nunique() <= 1:
|
1560 |
+
for alt in ['train/global_step', 'global_step', 'train/step']:
|
1561 |
+
if alt in df_sub.columns and df_sub[alt].notna().any():
|
1562 |
+
df_sub['step'] = pd.to_numeric(df_sub[alt], errors='coerce')
|
1563 |
+
break
|
1564 |
+
if 'step' not in df_sub.columns or df_sub['step'].isna().all() or df_sub['step'].nunique() <= 1:
|
1565 |
+
df_sub['step'] = range(1, len(df_sub) + 1)
|
1566 |
+
else:
|
1567 |
+
df_sub['step'] = pd.to_numeric(df_sub.get('step', -1), errors='coerce').fillna(-1)
|
1568 |
df_sub.sort_values('step', inplace=True)
|
1569 |
except Exception:
|
1570 |
df_sub = df
|