{ "best_metric": 0.04927213117480278, "best_model_checkpoint": "deberta-v3-xsmall-zyda-2-sentiment/checkpoint-6286", "epoch": 3.0, "eval_steps": 500, "global_step": 9429, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.1590836780146357, "grad_norm": 1.8468247652053833, "learning_rate": 4.7348605366422736e-05, "loss": 0.2115, "step": 500 }, { "epoch": 0.3181673560292714, "grad_norm": 1.7370903491973877, "learning_rate": 4.4697210732845476e-05, "loss": 0.101, "step": 1000 }, { "epoch": 0.4772510340439071, "grad_norm": 1.7206146717071533, "learning_rate": 4.2045816099268216e-05, "loss": 0.0846, "step": 1500 }, { "epoch": 0.6363347120585428, "grad_norm": 1.1373802423477173, "learning_rate": 3.9394421465690956e-05, "loss": 0.0748, "step": 2000 }, { "epoch": 0.7954183900731785, "grad_norm": 0.9603880047798157, "learning_rate": 3.674302683211369e-05, "loss": 0.0691, "step": 2500 }, { "epoch": 0.9545020680878142, "grad_norm": 1.0165342092514038, "learning_rate": 3.409163219853643e-05, "loss": 0.0627, "step": 3000 }, { "epoch": 1.0, "eval_loss": 0.06652908027172089, "eval_mse": 0.06652908171153529, "eval_runtime": 10.5244, "eval_samples_per_second": 950.17, "eval_steps_per_second": 118.771, "step": 3143 }, { "epoch": 1.1135857461024499, "grad_norm": 0.9926055073738098, "learning_rate": 3.144023756495917e-05, "loss": 0.0522, "step": 3500 }, { "epoch": 1.2726694241170855, "grad_norm": 1.247205376625061, "learning_rate": 2.878884293138191e-05, "loss": 0.0485, "step": 4000 }, { "epoch": 1.4317531021317214, "grad_norm": 1.7589031457901, "learning_rate": 2.6137448297804644e-05, "loss": 0.0463, "step": 4500 }, { "epoch": 1.590836780146357, "grad_norm": 0.7484694719314575, "learning_rate": 2.3486053664227384e-05, "loss": 0.0443, "step": 5000 }, { "epoch": 1.7499204581609926, "grad_norm": 1.5068027973175049, "learning_rate": 2.083465903065012e-05, "loss": 0.0421, "step": 5500 }, { "epoch": 1.9090041361756285, "grad_norm": 0.832625150680542, "learning_rate": 1.818326439707286e-05, "loss": 0.0411, "step": 6000 }, { "epoch": 2.0, "eval_loss": 0.04927213117480278, "eval_mse": 0.049272132016595305, "eval_runtime": 11.3101, "eval_samples_per_second": 884.162, "eval_steps_per_second": 110.52, "step": 6286 }, { "epoch": 2.068087814190264, "grad_norm": 0.6708300709724426, "learning_rate": 1.5531869763495598e-05, "loss": 0.0387, "step": 6500 }, { "epoch": 2.2271714922048997, "grad_norm": 0.6490187644958496, "learning_rate": 1.2880475129918337e-05, "loss": 0.0337, "step": 7000 }, { "epoch": 2.3862551702195356, "grad_norm": 0.7127770185470581, "learning_rate": 1.0229080496341075e-05, "loss": 0.0324, "step": 7500 }, { "epoch": 2.545338848234171, "grad_norm": 0.6604452133178711, "learning_rate": 7.5776858627638146e-06, "loss": 0.0326, "step": 8000 }, { "epoch": 2.704422526248807, "grad_norm": 0.5042712092399597, "learning_rate": 4.926291229186552e-06, "loss": 0.0323, "step": 8500 }, { "epoch": 2.8635062042634427, "grad_norm": 0.573316752910614, "learning_rate": 2.2748965956092908e-06, "loss": 0.0321, "step": 9000 }, { "epoch": 3.0, "eval_loss": 0.05235280096530914, "eval_mse": 0.05235280389813637, "eval_runtime": 10.3984, "eval_samples_per_second": 961.689, "eval_steps_per_second": 120.211, "step": 9429 }, { "epoch": 3.0, "step": 9429, "total_flos": 9935679003367680.0, "train_loss": 0.05866297316179509, "train_runtime": 1207.603, "train_samples_per_second": 499.597, "train_steps_per_second": 7.808 } ], "logging_steps": 500, "max_steps": 9429, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 9935679003367680.0, "train_batch_size": 64, "trial_name": null, "trial_params": null }