{ "best_metric": 0.8528685149601977, "best_model_checkpoint": "ds3-img-classification/checkpoint-284", "epoch": 24.217054263565892, "eval_steps": 142, "global_step": 1562, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.015503875968992248, "grad_norm": 14.292332649230957, "learning_rate": 1.736111111111111e-06, "loss": 5.8979, "step": 1 }, { "epoch": 2.201550387596899, "grad_norm": 3.2709274291992188, "learning_rate": 0.0002465277777777778, "loss": 2.2063, "step": 142 }, { "epoch": 2.201550387596899, "eval_accuracy": 0.8300850947021685, "eval_loss": 0.6956288814544678, "eval_macro_f1": 0.8020115300522307, "eval_macro_precision": 0.8326277180719005, "eval_macro_recall": 0.8130180643302005, "eval_micro_f1": 0.8300850947021685, "eval_micro_precision": 0.8300850947021685, "eval_micro_recall": 0.8300850947021685, "eval_runtime": 8.1254, "eval_samples_per_second": 448.347, "eval_steps_per_second": 3.569, "eval_weighted_f1": 0.8217784359221563, "eval_weighted_precision": 0.8439879212180366, "eval_weighted_recall": 0.8300850947021685, "step": 142 }, { "epoch": 4.403100775193798, "grad_norm": 3.7781729698181152, "learning_rate": 0.0004930555555555556, "loss": 0.4906, "step": 284 }, { "epoch": 4.403100775193798, "eval_accuracy": 0.8528685149601977, "eval_loss": 0.5704612731933594, "eval_macro_f1": 0.8447646928874578, "eval_macro_precision": 0.8434580548053667, "eval_macro_recall": 0.8590562294756982, "eval_micro_f1": 0.8528685149601977, "eval_micro_precision": 0.8528685149601977, "eval_micro_recall": 0.8528685149601977, "eval_runtime": 8.3026, "eval_samples_per_second": 438.779, "eval_steps_per_second": 3.493, "eval_weighted_f1": 0.8542422373852568, "eval_weighted_precision": 0.8661150819486806, "eval_weighted_recall": 0.8528685149601977, "step": 284 }, { "epoch": 6.604651162790698, "grad_norm": 2.7355144023895264, "learning_rate": 0.0007395833333333334, "loss": 0.4125, "step": 426 }, { "epoch": 6.604651162790698, "eval_accuracy": 0.7400494098270656, "eval_loss": 1.1121388673782349, "eval_macro_f1": 0.7104399369512154, "eval_macro_precision": 0.7984066441843661, "eval_macro_recall": 0.7246907362358344, "eval_micro_f1": 0.7400494098270657, "eval_micro_precision": 0.7400494098270656, "eval_micro_recall": 0.7400494098270656, "eval_runtime": 8.3145, "eval_samples_per_second": 438.152, "eval_steps_per_second": 3.488, "eval_weighted_f1": 0.7343329270391191, "eval_weighted_precision": 0.8109139038669717, "eval_weighted_recall": 0.7400494098270656, "step": 426 }, { "epoch": 8.806201550387597, "grad_norm": 2.7910587787628174, "learning_rate": 0.0009861111111111112, "loss": 0.3948, "step": 568 }, { "epoch": 8.806201550387597, "eval_accuracy": 0.769146307987922, "eval_loss": 0.9053287506103516, "eval_macro_f1": 0.7380470144030622, "eval_macro_precision": 0.7929019396341611, "eval_macro_recall": 0.7416184656713186, "eval_micro_f1": 0.769146307987922, "eval_micro_precision": 0.769146307987922, "eval_micro_recall": 0.769146307987922, "eval_runtime": 8.4314, "eval_samples_per_second": 432.076, "eval_steps_per_second": 3.44, "eval_weighted_f1": 0.7647284160437106, "eval_weighted_precision": 0.804728448308173, "eval_weighted_recall": 0.769146307987922, "step": 568 }, { "epoch": 11.007751937984496, "grad_norm": 6.627900123596191, "learning_rate": 0.0009983522903302263, "loss": 0.3706, "step": 710 }, { "epoch": 11.007751937984496, "eval_accuracy": 0.7672248147131485, "eval_loss": 0.9641202688217163, "eval_macro_f1": 0.7528371715147637, "eval_macro_precision": 0.787293122179915, "eval_macro_recall": 0.7589348598973276, "eval_micro_f1": 0.7672248147131485, "eval_micro_precision": 0.7672248147131485, "eval_micro_recall": 0.7672248147131485, "eval_runtime": 8.3804, "eval_samples_per_second": 434.705, "eval_steps_per_second": 3.46, "eval_weighted_f1": 0.7623056450515923, "eval_weighted_precision": 0.7946085579995245, "eval_weighted_recall": 0.7672248147131485, "step": 710 }, { "epoch": 13.209302325581396, "grad_norm": 3.9615182876586914, "learning_rate": 0.0009930222532081596, "loss": 0.3302, "step": 852 }, { "epoch": 13.209302325581396, "eval_accuracy": 0.7669503156738952, "eval_loss": 0.9978815317153931, "eval_macro_f1": 0.731377351970227, "eval_macro_precision": 0.7752862718850362, "eval_macro_recall": 0.7408605973033737, "eval_micro_f1": 0.766950315673895, "eval_micro_precision": 0.7669503156738952, "eval_micro_recall": 0.7669503156738952, "eval_runtime": 8.3024, "eval_samples_per_second": 438.791, "eval_steps_per_second": 3.493, "eval_weighted_f1": 0.7591614272562255, "eval_weighted_precision": 0.7900342900911238, "eval_weighted_recall": 0.7669503156738952, "step": 852 }, { "epoch": 15.410852713178295, "grad_norm": 2.4717087745666504, "learning_rate": 0.0009840434590541415, "loss": 0.2924, "step": 994 }, { "epoch": 15.410852713178295, "eval_accuracy": 0.7233049684326105, "eval_loss": 1.1638933420181274, "eval_macro_f1": 0.7113593382146706, "eval_macro_precision": 0.759489526605669, "eval_macro_recall": 0.7371465474721075, "eval_micro_f1": 0.7233049684326105, "eval_micro_precision": 0.7233049684326105, "eval_micro_recall": 0.7233049684326105, "eval_runtime": 8.2739, "eval_samples_per_second": 440.3, "eval_steps_per_second": 3.505, "eval_weighted_f1": 0.7249947526290721, "eval_weighted_precision": 0.7928301153657854, "eval_weighted_recall": 0.7233049684326105, "step": 994 }, { "epoch": 17.612403100775193, "grad_norm": 1.886505126953125, "learning_rate": 0.0009714823580904379, "loss": 0.271, "step": 1136 }, { "epoch": 17.612403100775193, "eval_accuracy": 0.7631073291243481, "eval_loss": 0.9723167419433594, "eval_macro_f1": 0.7630758622320426, "eval_macro_precision": 0.8404983162678952, "eval_macro_recall": 0.7684491516518803, "eval_micro_f1": 0.7631073291243481, "eval_micro_precision": 0.7631073291243481, "eval_micro_recall": 0.7631073291243481, "eval_runtime": 8.2536, "eval_samples_per_second": 441.381, "eval_steps_per_second": 3.514, "eval_weighted_f1": 0.7641468306327522, "eval_weighted_precision": 0.8384853206385829, "eval_weighted_recall": 0.7631073291243481, "step": 1136 }, { "epoch": 19.813953488372093, "grad_norm": 1.751386284828186, "learning_rate": 0.000955431912460588, "loss": 0.2493, "step": 1278 }, { "epoch": 19.813953488372093, "eval_accuracy": 0.7556958550645073, "eval_loss": 0.9924145340919495, "eval_macro_f1": 0.7186954972930405, "eval_macro_precision": 0.7780354194554568, "eval_macro_recall": 0.7246815062488177, "eval_micro_f1": 0.7556958550645072, "eval_micro_precision": 0.7556958550645073, "eval_micro_recall": 0.7556958550645073, "eval_runtime": 8.2332, "eval_samples_per_second": 442.478, "eval_steps_per_second": 3.522, "eval_weighted_f1": 0.7424053023538434, "eval_weighted_precision": 0.7815999680084916, "eval_weighted_recall": 0.7556958550645073, "step": 1278 }, { "epoch": 22.015503875968992, "grad_norm": 1.887330174446106, "learning_rate": 0.0009360109082355581, "loss": 0.2276, "step": 1420 }, { "epoch": 22.015503875968992, "eval_accuracy": 0.7488333790831732, "eval_loss": 1.0906082391738892, "eval_macro_f1": 0.7232412414342934, "eval_macro_precision": 0.7888326922300277, "eval_macro_recall": 0.7295020066680354, "eval_micro_f1": 0.7488333790831732, "eval_micro_precision": 0.7488333790831732, "eval_micro_recall": 0.7488333790831732, "eval_runtime": 8.3573, "eval_samples_per_second": 435.905, "eval_steps_per_second": 3.47, "eval_weighted_f1": 0.7411123038164863, "eval_weighted_precision": 0.78924044171482, "eval_weighted_recall": 0.7488333790831732, "step": 1420 }, { "epoch": 24.217054263565892, "grad_norm": 1.4166276454925537, "learning_rate": 0.0009133630763022861, "loss": 0.2147, "step": 1562 }, { "epoch": 24.217054263565892, "eval_accuracy": 0.7595388416140544, "eval_loss": 1.046807050704956, "eval_macro_f1": 0.7218058945395823, "eval_macro_precision": 0.7806200306355751, "eval_macro_recall": 0.7363816501236903, "eval_micro_f1": 0.7595388416140544, "eval_micro_precision": 0.7595388416140544, "eval_micro_recall": 0.7595388416140544, "eval_runtime": 8.3705, "eval_samples_per_second": 435.218, "eval_steps_per_second": 3.465, "eval_weighted_f1": 0.7500356793933665, "eval_weighted_precision": 0.7963937500158152, "eval_weighted_recall": 0.7595388416140544, "step": 1562 } ], "logging_steps": 142, "max_steps": 5760, "num_input_tokens_seen": 0, "num_train_epochs": 90, "save_steps": 142, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 10, "early_stopping_threshold": 0.01 }, "attributes": { "early_stopping_patience_counter": 9 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 2.175595314432671e+20, "train_batch_size": 256, "trial_name": null, "trial_params": null }