{ "best_metric": 0.09644630551338196, "best_model_checkpoint": "finetuned-ai-real-beit/checkpoint-50", "epoch": 5.0, "eval_steps": 50, "global_step": 110, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.45454545454545453, "grad_norm": 4.455732345581055, "learning_rate": 0.00018363636363636366, "loss": 0.3675, "step": 10 }, { "epoch": 0.9090909090909091, "grad_norm": 6.04647159576416, "learning_rate": 0.00016545454545454545, "loss": 0.1004, "step": 20 }, { "epoch": 1.3636363636363638, "grad_norm": 7.7111616134643555, "learning_rate": 0.00014727272727272728, "loss": 0.1074, "step": 30 }, { "epoch": 1.8181818181818183, "grad_norm": 3.5139763355255127, "learning_rate": 0.0001290909090909091, "loss": 0.0934, "step": 40 }, { "epoch": 2.2727272727272725, "grad_norm": 1.9594430923461914, "learning_rate": 0.00011090909090909092, "loss": 0.0724, "step": 50 }, { "epoch": 2.2727272727272725, "eval_accuracy": 0.9669421487603306, "eval_loss": 0.09644630551338196, "eval_runtime": 1.506, "eval_samples_per_second": 80.347, "eval_steps_per_second": 10.624, "step": 50 }, { "epoch": 2.7272727272727275, "grad_norm": 1.1000548601150513, "learning_rate": 9.272727272727273e-05, "loss": 0.0213, "step": 60 }, { "epoch": 3.1818181818181817, "grad_norm": 1.2705051898956299, "learning_rate": 7.454545454545455e-05, "loss": 0.0563, "step": 70 }, { "epoch": 3.6363636363636362, "grad_norm": 0.07004624605178833, "learning_rate": 5.636363636363636e-05, "loss": 0.0588, "step": 80 }, { "epoch": 4.090909090909091, "grad_norm": 6.447866916656494, "learning_rate": 3.818181818181819e-05, "loss": 0.0296, "step": 90 }, { "epoch": 4.545454545454545, "grad_norm": 0.036224253475666046, "learning_rate": 2e-05, "loss": 0.0198, "step": 100 }, { "epoch": 4.545454545454545, "eval_accuracy": 0.9586776859504132, "eval_loss": 0.13395561277866364, "eval_runtime": 1.4421, "eval_samples_per_second": 83.904, "eval_steps_per_second": 11.095, "step": 100 }, { "epoch": 5.0, "grad_norm": 0.009324366226792336, "learning_rate": 1.818181818181818e-06, "loss": 0.0103, "step": 110 }, { "epoch": 5.0, "step": 110, "total_flos": 2.633601533681664e+17, "train_loss": 0.08520049764351412, "train_runtime": 91.5555, "train_samples_per_second": 37.136, "train_steps_per_second": 1.201 } ], "logging_steps": 10, "max_steps": 110, "num_input_tokens_seen": 0, "num_train_epochs": 5, "save_steps": 100, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.633601533681664e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }