{ "best_metric": 0.43726277351379395, "best_model_checkpoint": "miner_id_24/checkpoint-50", "epoch": 0.02185732630256004, "eval_steps": 25, "global_step": 50, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.0004371465260512008, "grad_norm": 0.7138184905052185, "learning_rate": 5e-05, "loss": 0.486, "step": 1 }, { "epoch": 0.0004371465260512008, "eval_loss": 0.5118860006332397, "eval_runtime": 1.0334, "eval_samples_per_second": 48.384, "eval_steps_per_second": 12.58, "step": 1 }, { "epoch": 0.0008742930521024015, "grad_norm": 0.6570831537246704, "learning_rate": 0.0001, "loss": 0.4875, "step": 2 }, { "epoch": 0.0013114395781536024, "grad_norm": 0.5541381239891052, "learning_rate": 9.990365154573717e-05, "loss": 0.4766, "step": 3 }, { "epoch": 0.001748586104204803, "grad_norm": 1.4234938621520996, "learning_rate": 9.961501876182148e-05, "loss": 0.4778, "step": 4 }, { "epoch": 0.0021857326302560038, "grad_norm": 0.37663623690605164, "learning_rate": 9.913533761814537e-05, "loss": 0.4877, "step": 5 }, { "epoch": 0.002622879156307205, "grad_norm": 0.21502217650413513, "learning_rate": 9.846666218300807e-05, "loss": 0.4557, "step": 6 }, { "epoch": 0.0030600256823584055, "grad_norm": 0.19036097824573517, "learning_rate": 9.761185582727977e-05, "loss": 0.428, "step": 7 }, { "epoch": 0.003497172208409606, "grad_norm": 0.29344573616981506, "learning_rate": 9.657457896300791e-05, "loss": 0.4509, "step": 8 }, { "epoch": 0.003934318734460807, "grad_norm": 0.22489379346370697, "learning_rate": 9.535927336897098e-05, "loss": 0.459, "step": 9 }, { "epoch": 0.0043714652605120075, "grad_norm": 0.16101472079753876, "learning_rate": 9.397114317029975e-05, "loss": 0.4505, "step": 10 }, { "epoch": 0.004808611786563209, "grad_norm": 0.12483734637498856, "learning_rate": 9.241613255361455e-05, "loss": 0.4075, "step": 11 }, { "epoch": 0.00524575831261441, "grad_norm": 0.13228893280029297, "learning_rate": 9.070090031310558e-05, "loss": 0.412, "step": 12 }, { "epoch": 0.00568290483866561, "grad_norm": 0.14945045113563538, "learning_rate": 8.883279133655399e-05, "loss": 0.4078, "step": 13 }, { "epoch": 0.006120051364716811, "grad_norm": 0.18098153173923492, "learning_rate": 8.681980515339464e-05, "loss": 0.4405, "step": 14 }, { "epoch": 0.006557197890768012, "grad_norm": 0.12866996228694916, "learning_rate": 8.467056167950311e-05, "loss": 0.414, "step": 15 }, { "epoch": 0.006994344416819212, "grad_norm": 0.1270732879638672, "learning_rate": 8.239426430539243e-05, "loss": 0.4155, "step": 16 }, { "epoch": 0.0074314909428704135, "grad_norm": 0.10422370582818985, "learning_rate": 8.000066048588211e-05, "loss": 0.4153, "step": 17 }, { "epoch": 0.007868637468921615, "grad_norm": 0.1436351239681244, "learning_rate": 7.75e-05, "loss": 0.3871, "step": 18 }, { "epoch": 0.008305783994972815, "grad_norm": 0.2538048326969147, "learning_rate": 7.490299105985507e-05, "loss": 0.431, "step": 19 }, { "epoch": 0.008742930521024015, "grad_norm": 0.1760319322347641, "learning_rate": 7.222075445642904e-05, "loss": 0.4419, "step": 20 }, { "epoch": 0.009180077047075217, "grad_norm": 0.19340398907661438, "learning_rate": 6.946477593864228e-05, "loss": 0.403, "step": 21 }, { "epoch": 0.009617223573126417, "grad_norm": 0.18303807079792023, "learning_rate": 6.664685702961344e-05, "loss": 0.4495, "step": 22 }, { "epoch": 0.010054370099177617, "grad_norm": 0.153054878115654, "learning_rate": 6.377906449072578e-05, "loss": 0.4351, "step": 23 }, { "epoch": 0.01049151662522882, "grad_norm": 0.17695312201976776, "learning_rate": 6.087367864990233e-05, "loss": 0.4215, "step": 24 }, { "epoch": 0.01092866315128002, "grad_norm": 0.14918991923332214, "learning_rate": 5.794314081535644e-05, "loss": 0.4419, "step": 25 }, { "epoch": 0.01092866315128002, "eval_loss": 0.4473043382167816, "eval_runtime": 0.5973, "eval_samples_per_second": 83.71, "eval_steps_per_second": 21.765, "step": 25 }, { "epoch": 0.01136580967733122, "grad_norm": 0.12351560592651367, "learning_rate": 5.500000000000001e-05, "loss": 0.4167, "step": 26 }, { "epoch": 0.011802956203382422, "grad_norm": 0.1452387571334839, "learning_rate": 5.205685918464356e-05, "loss": 0.4281, "step": 27 }, { "epoch": 0.012240102729433622, "grad_norm": 0.09778492152690887, "learning_rate": 4.912632135009769e-05, "loss": 0.4602, "step": 28 }, { "epoch": 0.012677249255484822, "grad_norm": 0.13965074717998505, "learning_rate": 4.6220935509274235e-05, "loss": 0.4197, "step": 29 }, { "epoch": 0.013114395781536024, "grad_norm": 0.11117494106292725, "learning_rate": 4.3353142970386564e-05, "loss": 0.4266, "step": 30 }, { "epoch": 0.013551542307587225, "grad_norm": 0.1902451366186142, "learning_rate": 4.053522406135775e-05, "loss": 0.4378, "step": 31 }, { "epoch": 0.013988688833638425, "grad_norm": 0.0919874906539917, "learning_rate": 3.777924554357096e-05, "loss": 0.403, "step": 32 }, { "epoch": 0.014425835359689627, "grad_norm": 0.13868221640586853, "learning_rate": 3.509700894014496e-05, "loss": 0.4227, "step": 33 }, { "epoch": 0.014862981885740827, "grad_norm": 0.0835568979382515, "learning_rate": 3.250000000000001e-05, "loss": 0.3739, "step": 34 }, { "epoch": 0.015300128411792027, "grad_norm": 0.08689092099666595, "learning_rate": 2.9999339514117912e-05, "loss": 0.3906, "step": 35 }, { "epoch": 0.01573727493784323, "grad_norm": 0.14874935150146484, "learning_rate": 2.760573569460757e-05, "loss": 0.4052, "step": 36 }, { "epoch": 0.016174421463894428, "grad_norm": 0.10629599541425705, "learning_rate": 2.53294383204969e-05, "loss": 0.4037, "step": 37 }, { "epoch": 0.01661156798994563, "grad_norm": 0.10195549577474594, "learning_rate": 2.3180194846605367e-05, "loss": 0.429, "step": 38 }, { "epoch": 0.01704871451599683, "grad_norm": 0.11101008206605911, "learning_rate": 2.1167208663446025e-05, "loss": 0.4121, "step": 39 }, { "epoch": 0.01748586104204803, "grad_norm": 0.086122527718544, "learning_rate": 1.9299099686894423e-05, "loss": 0.4265, "step": 40 }, { "epoch": 0.017923007568099232, "grad_norm": 0.09646829962730408, "learning_rate": 1.758386744638546e-05, "loss": 0.4043, "step": 41 }, { "epoch": 0.018360154094150434, "grad_norm": 0.0811895951628685, "learning_rate": 1.602885682970026e-05, "loss": 0.383, "step": 42 }, { "epoch": 0.018797300620201633, "grad_norm": 0.11058395355939865, "learning_rate": 1.464072663102903e-05, "loss": 0.4021, "step": 43 }, { "epoch": 0.019234447146252834, "grad_norm": 0.1073421910405159, "learning_rate": 1.3425421036992098e-05, "loss": 0.3988, "step": 44 }, { "epoch": 0.019671593672304036, "grad_norm": 0.10188556462526321, "learning_rate": 1.2388144172720251e-05, "loss": 0.4105, "step": 45 }, { "epoch": 0.020108740198355235, "grad_norm": 0.12634286284446716, "learning_rate": 1.1533337816991932e-05, "loss": 0.3868, "step": 46 }, { "epoch": 0.020545886724406437, "grad_norm": 0.10863292217254639, "learning_rate": 1.0864662381854632e-05, "loss": 0.4126, "step": 47 }, { "epoch": 0.02098303325045764, "grad_norm": 0.07743315398693085, "learning_rate": 1.0384981238178534e-05, "loss": 0.4153, "step": 48 }, { "epoch": 0.021420179776508837, "grad_norm": 0.08929809182882309, "learning_rate": 1.0096348454262845e-05, "loss": 0.423, "step": 49 }, { "epoch": 0.02185732630256004, "grad_norm": 0.1446986198425293, "learning_rate": 1e-05, "loss": 0.4138, "step": 50 }, { "epoch": 0.02185732630256004, "eval_loss": 0.43726277351379395, "eval_runtime": 0.5912, "eval_samples_per_second": 84.58, "eval_steps_per_second": 21.991, "step": 50 } ], "logging_steps": 1, "max_steps": 50, "num_input_tokens_seen": 0, "num_train_epochs": 1, "save_steps": 25, "stateful_callbacks": { "EarlyStoppingCallback": { "args": { "early_stopping_patience": 1, "early_stopping_threshold": 0.0 }, "attributes": { "early_stopping_patience_counter": 0 } }, "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": true }, "attributes": {} } }, "total_flos": 2.95300912644096e+16, "train_batch_size": 2, "trial_name": null, "trial_params": null }