Tonic commited on
Commit
a509b8b
·
1 Parent(s): c560f4f

adds steps logging + parsing - woops

Browse files
src/monitoring.py CHANGED
@@ -449,6 +449,20 @@ class SmolLM3Monitor:
449
  try:
450
  # Add timestamp
451
  metrics['timestamp'] = datetime.now().isoformat()
 
 
 
 
 
 
 
 
 
 
 
 
 
 
452
  if step is not None:
453
  metrics['step'] = step
454
 
 
449
  try:
450
  # Add timestamp
451
  metrics['timestamp'] = datetime.now().isoformat()
452
+ # If caller didn't provide step, try to infer it from common keys emitted by HF/TRL
453
+ if step is None:
454
+ try:
455
+ for step_key in (
456
+ 'global_step',
457
+ 'train/global_step',
458
+ 'step',
459
+ 'train/step',
460
+ ):
461
+ if step_key in metrics and metrics[step_key] is not None:
462
+ step = int(metrics[step_key])
463
+ break
464
+ except Exception:
465
+ step = step # keep None if parsing fails
466
  if step is not None:
467
  metrics['step'] = step
468
 
templates/spaces/trackio/app.py CHANGED
@@ -1146,7 +1146,17 @@ def create_metrics_plot(experiment_id: str, metric_name: str = "loss") -> go.Fig
1146
  # Ensure steps are numeric and monotonically increasing to avoid zig-zag lines
1147
  try:
1148
  df = df.copy()
1149
- df['step'] = pd.to_numeric(df['step'], errors='coerce').fillna(-1)
 
 
 
 
 
 
 
 
 
 
1150
  df.sort_values('step', inplace=True)
1151
  except Exception:
1152
  pass
@@ -1546,7 +1556,15 @@ def create_combined_metrics_plot(experiment_id: str) -> go.Figure:
1546
  # Clean steps for each subplot too
1547
  try:
1548
  df_sub = df.copy()
1549
- df_sub['step'] = pd.to_numeric(df_sub['step'], errors='coerce').fillna(-1)
 
 
 
 
 
 
 
 
1550
  df_sub.sort_values('step', inplace=True)
1551
  except Exception:
1552
  df_sub = df
 
1146
  # Ensure steps are numeric and monotonically increasing to avoid zig-zag lines
1147
  try:
1148
  df = df.copy()
1149
+ # If step looks constant or missing, try to derive it from a common field
1150
+ if 'step' not in df or df['step'].nunique() <= 1:
1151
+ for alt in ['train/global_step', 'global_step', 'train/step']:
1152
+ if alt in df.columns and df[alt].notna().any():
1153
+ df['step'] = pd.to_numeric(df[alt], errors='coerce')
1154
+ break
1155
+ # If still missing or constant, fallback to an inferred counter by order of arrival
1156
+ if 'step' not in df.columns or df['step'].isna().all() or df['step'].nunique() <= 1:
1157
+ df['step'] = range(1, len(df) + 1)
1158
+ else:
1159
+ df['step'] = pd.to_numeric(df.get('step', -1), errors='coerce').fillna(-1)
1160
  df.sort_values('step', inplace=True)
1161
  except Exception:
1162
  pass
 
1556
  # Clean steps for each subplot too
1557
  try:
1558
  df_sub = df.copy()
1559
+ if 'step' not in df_sub or df_sub['step'].nunique() <= 1:
1560
+ for alt in ['train/global_step', 'global_step', 'train/step']:
1561
+ if alt in df_sub.columns and df_sub[alt].notna().any():
1562
+ df_sub['step'] = pd.to_numeric(df_sub[alt], errors='coerce')
1563
+ break
1564
+ if 'step' not in df_sub.columns or df_sub['step'].isna().all() or df_sub['step'].nunique() <= 1:
1565
+ df_sub['step'] = range(1, len(df_sub) + 1)
1566
+ else:
1567
+ df_sub['step'] = pd.to_numeric(df_sub.get('step', -1), errors='coerce').fillna(-1)
1568
  df_sub.sort_values('step', inplace=True)
1569
  except Exception:
1570
  df_sub = df