Spaces:
Running
Running
adds normalized variables on update
Browse files- src/dataset_utils.py +42 -3
- src/monitoring.py +48 -0
- templates/spaces/trackio/app.py +18 -4
src/dataset_utils.py
CHANGED
@@ -253,19 +253,58 @@ class TrackioDatasetManager:
|
|
253 |
inc_params = _parse_json_field(incoming.get('parameters'), {})
|
254 |
inc_artifacts = _parse_json_field(incoming.get('artifacts'), [])
|
255 |
inc_logs = _parse_json_field(incoming.get('logs'), [])
|
256 |
-
# Merge metrics with de-dup
|
257 |
merged_metrics = []
|
258 |
seen = set()
|
259 |
for entry in base_metrics + inc_metrics:
|
260 |
try:
|
261 |
-
# Use the original entry so _metrics_key can properly
|
262 |
-
# distinguish dict vs non-dict entries
|
263 |
key = _metrics_key(entry)
|
264 |
except Exception:
|
265 |
key = (None, None)
|
266 |
if key not in seen:
|
267 |
seen.add(key)
|
268 |
merged_metrics.append(entry)
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
269 |
# Merge params
|
270 |
merged_params = {}
|
271 |
if isinstance(base_params, dict):
|
|
|
253 |
inc_params = _parse_json_field(incoming.get('parameters'), {})
|
254 |
inc_artifacts = _parse_json_field(incoming.get('artifacts'), [])
|
255 |
inc_logs = _parse_json_field(incoming.get('logs'), [])
|
256 |
+
# Merge metrics with de-dup (by step+timestamp) then collapse per step
|
257 |
merged_metrics = []
|
258 |
seen = set()
|
259 |
for entry in base_metrics + inc_metrics:
|
260 |
try:
|
|
|
|
|
261 |
key = _metrics_key(entry)
|
262 |
except Exception:
|
263 |
key = (None, None)
|
264 |
if key not in seen:
|
265 |
seen.add(key)
|
266 |
merged_metrics.append(entry)
|
267 |
+
|
268 |
+
# Collapse duplicate steps by merging their metric dicts and keeping the latest timestamp
|
269 |
+
try:
|
270 |
+
step_to_entry: Dict[Any, Dict[str, Any]] = {}
|
271 |
+
for e in merged_metrics:
|
272 |
+
if not isinstance(e, dict):
|
273 |
+
continue
|
274 |
+
# Ensure nested structure {timestamp, step, metrics}
|
275 |
+
if 'metrics' not in e:
|
276 |
+
e = {
|
277 |
+
'timestamp': e.get('timestamp'),
|
278 |
+
'step': e.get('step'),
|
279 |
+
'metrics': {k: v for k, v in e.items() if k not in ('step', 'timestamp')}
|
280 |
+
}
|
281 |
+
step_val = e.get('step')
|
282 |
+
if step_val in step_to_entry:
|
283 |
+
existing_e = step_to_entry[step_val]
|
284 |
+
try:
|
285 |
+
existing_metrics_dict = existing_e.get('metrics', {})
|
286 |
+
if isinstance(existing_metrics_dict, dict):
|
287 |
+
existing_metrics_dict.update(e.get('metrics', {}))
|
288 |
+
else:
|
289 |
+
existing_e['metrics'] = e.get('metrics', {})
|
290 |
+
except Exception:
|
291 |
+
existing_e['metrics'] = e.get('metrics', {})
|
292 |
+
try:
|
293 |
+
if str(e.get('timestamp', '')) > str(existing_e.get('timestamp', '')):
|
294 |
+
existing_e['timestamp'] = e.get('timestamp')
|
295 |
+
except Exception:
|
296 |
+
pass
|
297 |
+
else:
|
298 |
+
step_to_entry[step_val] = dict(e)
|
299 |
+
def _step_key(x: Dict[str, Any]):
|
300 |
+
try:
|
301 |
+
return float(x.get('step'))
|
302 |
+
except Exception:
|
303 |
+
return -1.0
|
304 |
+
merged_metrics = sorted(step_to_entry.values(), key=_step_key)
|
305 |
+
except Exception:
|
306 |
+
# On any error, keep the de-duplicated list
|
307 |
+
pass
|
308 |
# Merge params
|
309 |
merged_params = {}
|
310 |
if isinstance(base_params, dict):
|
src/monitoring.py
CHANGED
@@ -310,6 +310,54 @@ class SmolLM3Monitor:
|
|
310 |
except Exception:
|
311 |
pass
|
312 |
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
313 |
# Merge artifacts if provided
|
314 |
if 'artifacts' in experiment_data and isinstance(experiment_data['artifacts'], list):
|
315 |
# De-duplicate while preserving order
|
|
|
310 |
except Exception:
|
311 |
pass
|
312 |
|
313 |
+
# Collapse duplicate step entries by merging their metric dictionaries
|
314 |
+
try:
|
315 |
+
def _collapse_by_step(entries: list) -> list:
|
316 |
+
step_to_entry: dict = {}
|
317 |
+
for e in entries:
|
318 |
+
if not isinstance(e, dict):
|
319 |
+
continue
|
320 |
+
# Normalize to nested structure
|
321 |
+
if 'metrics' not in e:
|
322 |
+
e = {
|
323 |
+
'timestamp': e.get('timestamp'),
|
324 |
+
'step': e.get('step'),
|
325 |
+
'metrics': {k: v for k, v in e.items() if k not in ('step', 'timestamp')}
|
326 |
+
}
|
327 |
+
step_val = e.get('step')
|
328 |
+
if step_val in step_to_entry:
|
329 |
+
# Merge metrics into existing entry for the same step
|
330 |
+
existing_e = step_to_entry[step_val]
|
331 |
+
try:
|
332 |
+
existing_e_metrics = existing_e.get('metrics', {})
|
333 |
+
if isinstance(existing_e_metrics, dict):
|
334 |
+
existing_e_metrics.update(e.get('metrics', {}))
|
335 |
+
else:
|
336 |
+
existing_e['metrics'] = e.get('metrics', {})
|
337 |
+
except Exception:
|
338 |
+
existing_e['metrics'] = e.get('metrics', {})
|
339 |
+
# Prefer the latest timestamp (ISO strings compare lexicographically)
|
340 |
+
try:
|
341 |
+
if str(e.get('timestamp', '')) > str(existing_e.get('timestamp', '')):
|
342 |
+
existing_e['timestamp'] = e.get('timestamp')
|
343 |
+
except Exception:
|
344 |
+
pass
|
345 |
+
else:
|
346 |
+
step_to_entry[step_val] = dict(e)
|
347 |
+
# Sort by step (fallback to 0 for None/non-numeric)
|
348 |
+
def _step_key(x):
|
349 |
+
val = x.get('step')
|
350 |
+
try:
|
351 |
+
return float(val)
|
352 |
+
except Exception:
|
353 |
+
return -1.0
|
354 |
+
return sorted(step_to_entry.values(), key=_step_key)
|
355 |
+
|
356 |
+
merged_metrics = _collapse_by_step(merged_metrics)
|
357 |
+
except Exception:
|
358 |
+
# If anything goes wrong, keep original list
|
359 |
+
pass
|
360 |
+
|
361 |
# Merge artifacts if provided
|
362 |
if 'artifacts' in experiment_data and isinstance(experiment_data['artifacts'], list):
|
363 |
# De-duplicate while preserving order
|
templates/spaces/trackio/app.py
CHANGED
@@ -661,18 +661,32 @@ class TrackioSpace:
|
|
661 |
if not experiment['metrics']:
|
662 |
return pd.DataFrame()
|
663 |
|
664 |
-
# Convert metrics to DataFrame
|
665 |
data = []
|
666 |
for metric_entry in experiment['metrics']:
|
667 |
step = metric_entry.get('step', 0)
|
668 |
timestamp = metric_entry.get('timestamp', '')
|
669 |
metrics = metric_entry.get('metrics', {})
|
670 |
-
|
671 |
row = {'step': step, 'timestamp': timestamp}
|
672 |
row.update(metrics)
|
673 |
data.append(row)
|
674 |
-
|
675 |
-
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
676 |
|
677 |
# Global instance
|
678 |
trackio_space = TrackioSpace()
|
|
|
661 |
if not experiment['metrics']:
|
662 |
return pd.DataFrame()
|
663 |
|
664 |
+
# Convert metrics to DataFrame (merge duplicate steps)
|
665 |
data = []
|
666 |
for metric_entry in experiment['metrics']:
|
667 |
step = metric_entry.get('step', 0)
|
668 |
timestamp = metric_entry.get('timestamp', '')
|
669 |
metrics = metric_entry.get('metrics', {})
|
670 |
+
|
671 |
row = {'step': step, 'timestamp': timestamp}
|
672 |
row.update(metrics)
|
673 |
data.append(row)
|
674 |
+
|
675 |
+
if not data:
|
676 |
+
return pd.DataFrame()
|
677 |
+
|
678 |
+
df = pd.DataFrame(data)
|
679 |
+
# Ensure step exists even if None
|
680 |
+
if 'step' not in df.columns:
|
681 |
+
df['step'] = 0
|
682 |
+
# For duplicate steps, keep the latest timestamp and merge columns by last valid value
|
683 |
+
try:
|
684 |
+
df.sort_values(['step', 'timestamp'], inplace=True)
|
685 |
+
# Take the last row per step (latest timestamp)
|
686 |
+
df = df.groupby('step', as_index=False).last()
|
687 |
+
except Exception:
|
688 |
+
pass
|
689 |
+
return df
|
690 |
|
691 |
# Global instance
|
692 |
trackio_space = TrackioSpace()
|