{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.23573785950023574, "eval_steps": 100, "global_step": 1000, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.023573785950023574, "eval_accuracy": 0.6317716870477788, "eval_loss": 1.8646224737167358, "eval_runtime": 492.4929, "eval_samples_per_second": 15.507, "eval_steps_per_second": 0.485, "step": 100 }, { "epoch": 0.04714757190004715, "eval_accuracy": 0.7021049967171662, "eval_loss": 1.4454373121261597, "eval_runtime": 492.5577, "eval_samples_per_second": 15.505, "eval_steps_per_second": 0.485, "step": 200 }, { "epoch": 0.07072135785007072, "eval_accuracy": 0.7335578085807225, "eval_loss": 1.2683207988739014, "eval_runtime": 492.7056, "eval_samples_per_second": 15.5, "eval_steps_per_second": 0.485, "step": 300 }, { "epoch": 0.0942951438000943, "eval_accuracy": 0.7496352846832398, "eval_loss": 1.1724069118499756, "eval_runtime": 492.418, "eval_samples_per_second": 15.509, "eval_steps_per_second": 0.485, "step": 400 }, { "epoch": 0.11786892975011787, "grad_norm": 3.823148250579834, "learning_rate": 4.803551783749804e-05, "loss": 1.7013, "step": 500 }, { "epoch": 0.11786892975011787, "eval_accuracy": 0.7622726018519113, "eval_loss": 1.1071853637695312, "eval_runtime": 492.5647, "eval_samples_per_second": 15.505, "eval_steps_per_second": 0.485, "step": 500 }, { "epoch": 0.14144271570014144, "eval_accuracy": 0.7731863812800325, "eval_loss": 1.0427114963531494, "eval_runtime": 492.2995, "eval_samples_per_second": 15.513, "eval_steps_per_second": 0.485, "step": 600 }, { "epoch": 0.16501650165016502, "eval_accuracy": 0.7809848069202094, "eval_loss": 1.003616452217102, "eval_runtime": 492.5831, "eval_samples_per_second": 15.504, "eval_steps_per_second": 0.485, "step": 700 }, { "epoch": 0.1885902876001886, "eval_accuracy": 0.7868278015496704, "eval_loss": 0.9764024615287781, "eval_runtime": 492.4908, "eval_samples_per_second": 15.507, "eval_steps_per_second": 0.485, "step": 800 }, { "epoch": 0.21216407355021216, "eval_accuracy": 0.7921159020990164, "eval_loss": 0.94410640001297, "eval_runtime": 492.6567, "eval_samples_per_second": 15.502, "eval_steps_per_second": 0.485, "step": 900 }, { "epoch": 0.23573785950023574, "grad_norm": 2.0617034435272217, "learning_rate": 4.607103567499607e-05, "loss": 0.9942, "step": 1000 }, { "epoch": 0.23573785950023574, "eval_accuracy": 0.7963057274181528, "eval_loss": 0.9231188297271729, "eval_runtime": 492.777, "eval_samples_per_second": 15.498, "eval_steps_per_second": 0.485, "step": 1000 } ], "logging_steps": 500, "max_steps": 12726, "num_input_tokens_seen": 0, "num_train_epochs": 3, "save_steps": 200, "total_flos": 7.41887283560448e+17, "train_batch_size": 32, "trial_name": null, "trial_params": null }