|
{ |
|
"best_metric": 0.8528685149601977, |
|
"best_model_checkpoint": "ds3-img-classification/checkpoint-284", |
|
"epoch": 26.41860465116279, |
|
"eval_steps": 142, |
|
"global_step": 1704, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.015503875968992248, |
|
"grad_norm": 14.292332649230957, |
|
"learning_rate": 1.736111111111111e-06, |
|
"loss": 5.8979, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 2.201550387596899, |
|
"grad_norm": 3.2709274291992188, |
|
"learning_rate": 0.0002465277777777778, |
|
"loss": 2.2063, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 2.201550387596899, |
|
"eval_accuracy": 0.8300850947021685, |
|
"eval_loss": 0.6956288814544678, |
|
"eval_macro_f1": 0.8020115300522307, |
|
"eval_macro_precision": 0.8326277180719005, |
|
"eval_macro_recall": 0.8130180643302005, |
|
"eval_micro_f1": 0.8300850947021685, |
|
"eval_micro_precision": 0.8300850947021685, |
|
"eval_micro_recall": 0.8300850947021685, |
|
"eval_runtime": 8.1254, |
|
"eval_samples_per_second": 448.347, |
|
"eval_steps_per_second": 3.569, |
|
"eval_weighted_f1": 0.8217784359221563, |
|
"eval_weighted_precision": 0.8439879212180366, |
|
"eval_weighted_recall": 0.8300850947021685, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 4.403100775193798, |
|
"grad_norm": 3.7781729698181152, |
|
"learning_rate": 0.0004930555555555556, |
|
"loss": 0.4906, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 4.403100775193798, |
|
"eval_accuracy": 0.8528685149601977, |
|
"eval_loss": 0.5704612731933594, |
|
"eval_macro_f1": 0.8447646928874578, |
|
"eval_macro_precision": 0.8434580548053667, |
|
"eval_macro_recall": 0.8590562294756982, |
|
"eval_micro_f1": 0.8528685149601977, |
|
"eval_micro_precision": 0.8528685149601977, |
|
"eval_micro_recall": 0.8528685149601977, |
|
"eval_runtime": 8.3026, |
|
"eval_samples_per_second": 438.779, |
|
"eval_steps_per_second": 3.493, |
|
"eval_weighted_f1": 0.8542422373852568, |
|
"eval_weighted_precision": 0.8661150819486806, |
|
"eval_weighted_recall": 0.8528685149601977, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 6.604651162790698, |
|
"grad_norm": 2.7355144023895264, |
|
"learning_rate": 0.0007395833333333334, |
|
"loss": 0.4125, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 6.604651162790698, |
|
"eval_accuracy": 0.7400494098270656, |
|
"eval_loss": 1.1121388673782349, |
|
"eval_macro_f1": 0.7104399369512154, |
|
"eval_macro_precision": 0.7984066441843661, |
|
"eval_macro_recall": 0.7246907362358344, |
|
"eval_micro_f1": 0.7400494098270657, |
|
"eval_micro_precision": 0.7400494098270656, |
|
"eval_micro_recall": 0.7400494098270656, |
|
"eval_runtime": 8.3145, |
|
"eval_samples_per_second": 438.152, |
|
"eval_steps_per_second": 3.488, |
|
"eval_weighted_f1": 0.7343329270391191, |
|
"eval_weighted_precision": 0.8109139038669717, |
|
"eval_weighted_recall": 0.7400494098270656, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 8.806201550387597, |
|
"grad_norm": 2.7910587787628174, |
|
"learning_rate": 0.0009861111111111112, |
|
"loss": 0.3948, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 8.806201550387597, |
|
"eval_accuracy": 0.769146307987922, |
|
"eval_loss": 0.9053287506103516, |
|
"eval_macro_f1": 0.7380470144030622, |
|
"eval_macro_precision": 0.7929019396341611, |
|
"eval_macro_recall": 0.7416184656713186, |
|
"eval_micro_f1": 0.769146307987922, |
|
"eval_micro_precision": 0.769146307987922, |
|
"eval_micro_recall": 0.769146307987922, |
|
"eval_runtime": 8.4314, |
|
"eval_samples_per_second": 432.076, |
|
"eval_steps_per_second": 3.44, |
|
"eval_weighted_f1": 0.7647284160437106, |
|
"eval_weighted_precision": 0.804728448308173, |
|
"eval_weighted_recall": 0.769146307987922, |
|
"step": 568 |
|
}, |
|
{ |
|
"epoch": 11.007751937984496, |
|
"grad_norm": 6.627900123596191, |
|
"learning_rate": 0.0009983522903302263, |
|
"loss": 0.3706, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 11.007751937984496, |
|
"eval_accuracy": 0.7672248147131485, |
|
"eval_loss": 0.9641202688217163, |
|
"eval_macro_f1": 0.7528371715147637, |
|
"eval_macro_precision": 0.787293122179915, |
|
"eval_macro_recall": 0.7589348598973276, |
|
"eval_micro_f1": 0.7672248147131485, |
|
"eval_micro_precision": 0.7672248147131485, |
|
"eval_micro_recall": 0.7672248147131485, |
|
"eval_runtime": 8.3804, |
|
"eval_samples_per_second": 434.705, |
|
"eval_steps_per_second": 3.46, |
|
"eval_weighted_f1": 0.7623056450515923, |
|
"eval_weighted_precision": 0.7946085579995245, |
|
"eval_weighted_recall": 0.7672248147131485, |
|
"step": 710 |
|
}, |
|
{ |
|
"epoch": 13.209302325581396, |
|
"grad_norm": 3.9615182876586914, |
|
"learning_rate": 0.0009930222532081596, |
|
"loss": 0.3302, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 13.209302325581396, |
|
"eval_accuracy": 0.7669503156738952, |
|
"eval_loss": 0.9978815317153931, |
|
"eval_macro_f1": 0.731377351970227, |
|
"eval_macro_precision": 0.7752862718850362, |
|
"eval_macro_recall": 0.7408605973033737, |
|
"eval_micro_f1": 0.766950315673895, |
|
"eval_micro_precision": 0.7669503156738952, |
|
"eval_micro_recall": 0.7669503156738952, |
|
"eval_runtime": 8.3024, |
|
"eval_samples_per_second": 438.791, |
|
"eval_steps_per_second": 3.493, |
|
"eval_weighted_f1": 0.7591614272562255, |
|
"eval_weighted_precision": 0.7900342900911238, |
|
"eval_weighted_recall": 0.7669503156738952, |
|
"step": 852 |
|
}, |
|
{ |
|
"epoch": 15.410852713178295, |
|
"grad_norm": 2.4717087745666504, |
|
"learning_rate": 0.0009840434590541415, |
|
"loss": 0.2924, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 15.410852713178295, |
|
"eval_accuracy": 0.7233049684326105, |
|
"eval_loss": 1.1638933420181274, |
|
"eval_macro_f1": 0.7113593382146706, |
|
"eval_macro_precision": 0.759489526605669, |
|
"eval_macro_recall": 0.7371465474721075, |
|
"eval_micro_f1": 0.7233049684326105, |
|
"eval_micro_precision": 0.7233049684326105, |
|
"eval_micro_recall": 0.7233049684326105, |
|
"eval_runtime": 8.2739, |
|
"eval_samples_per_second": 440.3, |
|
"eval_steps_per_second": 3.505, |
|
"eval_weighted_f1": 0.7249947526290721, |
|
"eval_weighted_precision": 0.7928301153657854, |
|
"eval_weighted_recall": 0.7233049684326105, |
|
"step": 994 |
|
}, |
|
{ |
|
"epoch": 17.612403100775193, |
|
"grad_norm": 1.886505126953125, |
|
"learning_rate": 0.0009714823580904379, |
|
"loss": 0.271, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 17.612403100775193, |
|
"eval_accuracy": 0.7631073291243481, |
|
"eval_loss": 0.9723167419433594, |
|
"eval_macro_f1": 0.7630758622320426, |
|
"eval_macro_precision": 0.8404983162678952, |
|
"eval_macro_recall": 0.7684491516518803, |
|
"eval_micro_f1": 0.7631073291243481, |
|
"eval_micro_precision": 0.7631073291243481, |
|
"eval_micro_recall": 0.7631073291243481, |
|
"eval_runtime": 8.2536, |
|
"eval_samples_per_second": 441.381, |
|
"eval_steps_per_second": 3.514, |
|
"eval_weighted_f1": 0.7641468306327522, |
|
"eval_weighted_precision": 0.8384853206385829, |
|
"eval_weighted_recall": 0.7631073291243481, |
|
"step": 1136 |
|
}, |
|
{ |
|
"epoch": 19.813953488372093, |
|
"grad_norm": 1.751386284828186, |
|
"learning_rate": 0.000955431912460588, |
|
"loss": 0.2493, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 19.813953488372093, |
|
"eval_accuracy": 0.7556958550645073, |
|
"eval_loss": 0.9924145340919495, |
|
"eval_macro_f1": 0.7186954972930405, |
|
"eval_macro_precision": 0.7780354194554568, |
|
"eval_macro_recall": 0.7246815062488177, |
|
"eval_micro_f1": 0.7556958550645072, |
|
"eval_micro_precision": 0.7556958550645073, |
|
"eval_micro_recall": 0.7556958550645073, |
|
"eval_runtime": 8.2332, |
|
"eval_samples_per_second": 442.478, |
|
"eval_steps_per_second": 3.522, |
|
"eval_weighted_f1": 0.7424053023538434, |
|
"eval_weighted_precision": 0.7815999680084916, |
|
"eval_weighted_recall": 0.7556958550645073, |
|
"step": 1278 |
|
}, |
|
{ |
|
"epoch": 22.015503875968992, |
|
"grad_norm": 1.887330174446106, |
|
"learning_rate": 0.0009360109082355581, |
|
"loss": 0.2276, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 22.015503875968992, |
|
"eval_accuracy": 0.7488333790831732, |
|
"eval_loss": 1.0906082391738892, |
|
"eval_macro_f1": 0.7232412414342934, |
|
"eval_macro_precision": 0.7888326922300277, |
|
"eval_macro_recall": 0.7295020066680354, |
|
"eval_micro_f1": 0.7488333790831732, |
|
"eval_micro_precision": 0.7488333790831732, |
|
"eval_micro_recall": 0.7488333790831732, |
|
"eval_runtime": 8.3573, |
|
"eval_samples_per_second": 435.905, |
|
"eval_steps_per_second": 3.47, |
|
"eval_weighted_f1": 0.7411123038164863, |
|
"eval_weighted_precision": 0.78924044171482, |
|
"eval_weighted_recall": 0.7488333790831732, |
|
"step": 1420 |
|
}, |
|
{ |
|
"epoch": 24.217054263565892, |
|
"grad_norm": 1.4166276454925537, |
|
"learning_rate": 0.0009133630763022861, |
|
"loss": 0.2147, |
|
"step": 1562 |
|
}, |
|
{ |
|
"epoch": 24.217054263565892, |
|
"eval_accuracy": 0.7595388416140544, |
|
"eval_loss": 1.046807050704956, |
|
"eval_macro_f1": 0.7218058945395823, |
|
"eval_macro_precision": 0.7806200306355751, |
|
"eval_macro_recall": 0.7363816501236903, |
|
"eval_micro_f1": 0.7595388416140544, |
|
"eval_micro_precision": 0.7595388416140544, |
|
"eval_micro_recall": 0.7595388416140544, |
|
"eval_runtime": 8.3705, |
|
"eval_samples_per_second": 435.218, |
|
"eval_steps_per_second": 3.465, |
|
"eval_weighted_f1": 0.7500356793933665, |
|
"eval_weighted_precision": 0.7963937500158152, |
|
"eval_weighted_recall": 0.7595388416140544, |
|
"step": 1562 |
|
}, |
|
{ |
|
"epoch": 26.41860465116279, |
|
"grad_norm": 1.3001500368118286, |
|
"learning_rate": 0.0008876560286407329, |
|
"loss": 0.1915, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 26.41860465116279, |
|
"eval_accuracy": 0.7757342849300027, |
|
"eval_loss": 0.9456538558006287, |
|
"eval_macro_f1": 0.7518200808982276, |
|
"eval_macro_precision": 0.7941242669868954, |
|
"eval_macro_recall": 0.7603347162166142, |
|
"eval_micro_f1": 0.7757342849300026, |
|
"eval_micro_precision": 0.7757342849300027, |
|
"eval_micro_recall": 0.7757342849300027, |
|
"eval_runtime": 8.4106, |
|
"eval_samples_per_second": 433.145, |
|
"eval_steps_per_second": 3.448, |
|
"eval_weighted_f1": 0.7688586665917762, |
|
"eval_weighted_precision": 0.8022618721710044, |
|
"eval_weighted_recall": 0.7757342849300027, |
|
"step": 1704 |
|
}, |
|
{ |
|
"epoch": 26.41860465116279, |
|
"step": 1704, |
|
"total_flos": 2.3734952710727598e+20, |
|
"train_loss": 0.4731367647368024, |
|
"train_runtime": 2312.991, |
|
"train_samples_per_second": 1275.725, |
|
"train_steps_per_second": 2.49 |
|
} |
|
], |
|
"logging_steps": 142, |
|
"max_steps": 5760, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 90, |
|
"save_steps": 142, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 10, |
|
"early_stopping_threshold": 0.01 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 10 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.3734952710727598e+20, |
|
"train_batch_size": 256, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|