Tonic commited on
Commit
0f12d91
·
1 Parent(s): 468cb10

adds normalized variables on update

Browse files
src/dataset_utils.py CHANGED
@@ -253,19 +253,58 @@ class TrackioDatasetManager:
253
  inc_params = _parse_json_field(incoming.get('parameters'), {})
254
  inc_artifacts = _parse_json_field(incoming.get('artifacts'), [])
255
  inc_logs = _parse_json_field(incoming.get('logs'), [])
256
- # Merge metrics with de-dup
257
  merged_metrics = []
258
  seen = set()
259
  for entry in base_metrics + inc_metrics:
260
  try:
261
- # Use the original entry so _metrics_key can properly
262
- # distinguish dict vs non-dict entries
263
  key = _metrics_key(entry)
264
  except Exception:
265
  key = (None, None)
266
  if key not in seen:
267
  seen.add(key)
268
  merged_metrics.append(entry)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
269
  # Merge params
270
  merged_params = {}
271
  if isinstance(base_params, dict):
 
253
  inc_params = _parse_json_field(incoming.get('parameters'), {})
254
  inc_artifacts = _parse_json_field(incoming.get('artifacts'), [])
255
  inc_logs = _parse_json_field(incoming.get('logs'), [])
256
+ # Merge metrics with de-dup (by step+timestamp) then collapse per step
257
  merged_metrics = []
258
  seen = set()
259
  for entry in base_metrics + inc_metrics:
260
  try:
 
 
261
  key = _metrics_key(entry)
262
  except Exception:
263
  key = (None, None)
264
  if key not in seen:
265
  seen.add(key)
266
  merged_metrics.append(entry)
267
+
268
+ # Collapse duplicate steps by merging their metric dicts and keeping the latest timestamp
269
+ try:
270
+ step_to_entry: Dict[Any, Dict[str, Any]] = {}
271
+ for e in merged_metrics:
272
+ if not isinstance(e, dict):
273
+ continue
274
+ # Ensure nested structure {timestamp, step, metrics}
275
+ if 'metrics' not in e:
276
+ e = {
277
+ 'timestamp': e.get('timestamp'),
278
+ 'step': e.get('step'),
279
+ 'metrics': {k: v for k, v in e.items() if k not in ('step', 'timestamp')}
280
+ }
281
+ step_val = e.get('step')
282
+ if step_val in step_to_entry:
283
+ existing_e = step_to_entry[step_val]
284
+ try:
285
+ existing_metrics_dict = existing_e.get('metrics', {})
286
+ if isinstance(existing_metrics_dict, dict):
287
+ existing_metrics_dict.update(e.get('metrics', {}))
288
+ else:
289
+ existing_e['metrics'] = e.get('metrics', {})
290
+ except Exception:
291
+ existing_e['metrics'] = e.get('metrics', {})
292
+ try:
293
+ if str(e.get('timestamp', '')) > str(existing_e.get('timestamp', '')):
294
+ existing_e['timestamp'] = e.get('timestamp')
295
+ except Exception:
296
+ pass
297
+ else:
298
+ step_to_entry[step_val] = dict(e)
299
+ def _step_key(x: Dict[str, Any]):
300
+ try:
301
+ return float(x.get('step'))
302
+ except Exception:
303
+ return -1.0
304
+ merged_metrics = sorted(step_to_entry.values(), key=_step_key)
305
+ except Exception:
306
+ # On any error, keep the de-duplicated list
307
+ pass
308
  # Merge params
309
  merged_params = {}
310
  if isinstance(base_params, dict):
src/monitoring.py CHANGED
@@ -310,6 +310,54 @@ class SmolLM3Monitor:
310
  except Exception:
311
  pass
312
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
313
  # Merge artifacts if provided
314
  if 'artifacts' in experiment_data and isinstance(experiment_data['artifacts'], list):
315
  # De-duplicate while preserving order
 
310
  except Exception:
311
  pass
312
 
313
+ # Collapse duplicate step entries by merging their metric dictionaries
314
+ try:
315
+ def _collapse_by_step(entries: list) -> list:
316
+ step_to_entry: dict = {}
317
+ for e in entries:
318
+ if not isinstance(e, dict):
319
+ continue
320
+ # Normalize to nested structure
321
+ if 'metrics' not in e:
322
+ e = {
323
+ 'timestamp': e.get('timestamp'),
324
+ 'step': e.get('step'),
325
+ 'metrics': {k: v for k, v in e.items() if k not in ('step', 'timestamp')}
326
+ }
327
+ step_val = e.get('step')
328
+ if step_val in step_to_entry:
329
+ # Merge metrics into existing entry for the same step
330
+ existing_e = step_to_entry[step_val]
331
+ try:
332
+ existing_e_metrics = existing_e.get('metrics', {})
333
+ if isinstance(existing_e_metrics, dict):
334
+ existing_e_metrics.update(e.get('metrics', {}))
335
+ else:
336
+ existing_e['metrics'] = e.get('metrics', {})
337
+ except Exception:
338
+ existing_e['metrics'] = e.get('metrics', {})
339
+ # Prefer the latest timestamp (ISO strings compare lexicographically)
340
+ try:
341
+ if str(e.get('timestamp', '')) > str(existing_e.get('timestamp', '')):
342
+ existing_e['timestamp'] = e.get('timestamp')
343
+ except Exception:
344
+ pass
345
+ else:
346
+ step_to_entry[step_val] = dict(e)
347
+ # Sort by step (fallback to 0 for None/non-numeric)
348
+ def _step_key(x):
349
+ val = x.get('step')
350
+ try:
351
+ return float(val)
352
+ except Exception:
353
+ return -1.0
354
+ return sorted(step_to_entry.values(), key=_step_key)
355
+
356
+ merged_metrics = _collapse_by_step(merged_metrics)
357
+ except Exception:
358
+ # If anything goes wrong, keep original list
359
+ pass
360
+
361
  # Merge artifacts if provided
362
  if 'artifacts' in experiment_data and isinstance(experiment_data['artifacts'], list):
363
  # De-duplicate while preserving order
templates/spaces/trackio/app.py CHANGED
@@ -661,18 +661,32 @@ class TrackioSpace:
661
  if not experiment['metrics']:
662
  return pd.DataFrame()
663
 
664
- # Convert metrics to DataFrame
665
  data = []
666
  for metric_entry in experiment['metrics']:
667
  step = metric_entry.get('step', 0)
668
  timestamp = metric_entry.get('timestamp', '')
669
  metrics = metric_entry.get('metrics', {})
670
-
671
  row = {'step': step, 'timestamp': timestamp}
672
  row.update(metrics)
673
  data.append(row)
674
-
675
- return pd.DataFrame(data)
 
 
 
 
 
 
 
 
 
 
 
 
 
 
676
 
677
  # Global instance
678
  trackio_space = TrackioSpace()
 
661
  if not experiment['metrics']:
662
  return pd.DataFrame()
663
 
664
+ # Convert metrics to DataFrame (merge duplicate steps)
665
  data = []
666
  for metric_entry in experiment['metrics']:
667
  step = metric_entry.get('step', 0)
668
  timestamp = metric_entry.get('timestamp', '')
669
  metrics = metric_entry.get('metrics', {})
670
+
671
  row = {'step': step, 'timestamp': timestamp}
672
  row.update(metrics)
673
  data.append(row)
674
+
675
+ if not data:
676
+ return pd.DataFrame()
677
+
678
+ df = pd.DataFrame(data)
679
+ # Ensure step exists even if None
680
+ if 'step' not in df.columns:
681
+ df['step'] = 0
682
+ # For duplicate steps, keep the latest timestamp and merge columns by last valid value
683
+ try:
684
+ df.sort_values(['step', 'timestamp'], inplace=True)
685
+ # Take the last row per step (latest timestamp)
686
+ df = df.groupby('step', as_index=False).last()
687
+ except Exception:
688
+ pass
689
+ return df
690
 
691
  # Global instance
692
  trackio_space = TrackioSpace()