{ "best_metric": null, "best_model_checkpoint": null, "epoch": 6.896551724137931, "eval_steps": 250, "global_step": 1500, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.004597701149425287, "grad_norm": 37.01336935001283, "learning_rate": 5e-06, "loss": 0.8637, "num_input_tokens_seen": 90336, "step": 1 }, { "epoch": 0.004597701149425287, "loss": 1.0519328117370605, "loss_ce": 0.5021281838417053, "loss_iou": 0.400390625, "loss_num": 0.10986328125, "loss_xval": 0.55078125, "num_input_tokens_seen": 90336, "step": 1 }, { "epoch": 0.009195402298850575, "grad_norm": 40.354572183137385, "learning_rate": 5e-06, "loss": 0.8717, "num_input_tokens_seen": 180800, "step": 2 }, { "epoch": 0.009195402298850575, "loss": 0.7949165105819702, "loss_ce": 0.3349556028842926, "loss_iou": 0.341796875, "loss_num": 0.091796875, "loss_xval": 0.4609375, "num_input_tokens_seen": 180800, "step": 2 }, { "epoch": 0.013793103448275862, "grad_norm": 39.06584845163415, "learning_rate": 5e-06, "loss": 0.606, "num_input_tokens_seen": 271252, "step": 3 }, { "epoch": 0.013793103448275862, "loss": 0.5930109620094299, "loss_ce": 0.20678049325942993, "loss_iou": 0.478515625, "loss_num": 0.0771484375, "loss_xval": 0.38671875, "num_input_tokens_seen": 271252, "step": 3 }, { "epoch": 0.01839080459770115, "grad_norm": 57.83141578006947, "learning_rate": 5e-06, "loss": 0.6455, "num_input_tokens_seen": 361480, "step": 4 }, { "epoch": 0.01839080459770115, "loss": 0.7589341402053833, "loss_ce": 0.3553696274757385, "loss_iou": 0.51953125, "loss_num": 0.08056640625, "loss_xval": 0.404296875, "num_input_tokens_seen": 361480, "step": 4 }, { "epoch": 0.022988505747126436, "grad_norm": 42.90588851722674, "learning_rate": 5e-06, "loss": 1.0614, "num_input_tokens_seen": 451876, "step": 5 }, { "epoch": 0.022988505747126436, "loss": 1.2513668537139893, "loss_ce": 0.703759491443634, "loss_iou": 0.421875, "loss_num": 0.109375, "loss_xval": 0.546875, "num_input_tokens_seen": 451876, "step": 5 }, { "epoch": 0.027586206896551724, "grad_norm": 22.460688537328316, "learning_rate": 5e-06, "loss": 0.6396, "num_input_tokens_seen": 542192, "step": 6 }, { "epoch": 0.027586206896551724, "loss": 0.504480242729187, "loss_ce": 0.05452907085418701, "loss_iou": 0.4140625, "loss_num": 0.08984375, "loss_xval": 0.44921875, "num_input_tokens_seen": 542192, "step": 6 }, { "epoch": 0.03218390804597701, "grad_norm": 23.415625594005842, "learning_rate": 5e-06, "loss": 0.874, "num_input_tokens_seen": 632568, "step": 7 }, { "epoch": 0.03218390804597701, "loss": 1.0449306964874268, "loss_ce": 0.5966885685920715, "loss_iou": 0.357421875, "loss_num": 0.08984375, "loss_xval": 0.44921875, "num_input_tokens_seen": 632568, "step": 7 }, { "epoch": 0.0367816091954023, "grad_norm": 31.16740618162066, "learning_rate": 5e-06, "loss": 0.7589, "num_input_tokens_seen": 723052, "step": 8 }, { "epoch": 0.0367816091954023, "loss": 0.6076769828796387, "loss_ce": 0.25794553756713867, "loss_iou": 0.494140625, "loss_num": 0.06982421875, "loss_xval": 0.349609375, "num_input_tokens_seen": 723052, "step": 8 }, { "epoch": 0.041379310344827586, "grad_norm": 79.07479774439705, "learning_rate": 5e-06, "loss": 0.687, "num_input_tokens_seen": 813440, "step": 9 }, { "epoch": 0.041379310344827586, "loss": 0.6652740240097046, "loss_ce": 0.1359771490097046, "loss_iou": 0.392578125, "loss_num": 0.10595703125, "loss_xval": 0.53125, "num_input_tokens_seen": 813440, "step": 9 }, { "epoch": 0.04597701149425287, "grad_norm": 44.77421252011234, "learning_rate": 5e-06, "loss": 0.6675, "num_input_tokens_seen": 903664, "step": 10 }, { "epoch": 0.04597701149425287, "loss": 0.6931918859481812, "loss_ce": 0.2824862599372864, "loss_iou": 0.388671875, "loss_num": 0.08203125, "loss_xval": 0.41015625, "num_input_tokens_seen": 903664, "step": 10 }, { "epoch": 0.05057471264367816, "grad_norm": 80.61281743018304, "learning_rate": 5e-06, "loss": 0.8424, "num_input_tokens_seen": 994196, "step": 11 }, { "epoch": 0.05057471264367816, "loss": 0.9065788984298706, "loss_ce": 0.4577263593673706, "loss_iou": 0.416015625, "loss_num": 0.08984375, "loss_xval": 0.44921875, "num_input_tokens_seen": 994196, "step": 11 }, { "epoch": 0.05517241379310345, "grad_norm": 21.38207584365465, "learning_rate": 5e-06, "loss": 0.6596, "num_input_tokens_seen": 1084712, "step": 12 }, { "epoch": 0.05517241379310345, "loss": 0.5851885080337524, "loss_ce": 0.21836715936660767, "loss_iou": 0.43359375, "loss_num": 0.0732421875, "loss_xval": 0.3671875, "num_input_tokens_seen": 1084712, "step": 12 }, { "epoch": 0.059770114942528735, "grad_norm": 27.964832638511165, "learning_rate": 5e-06, "loss": 0.5867, "num_input_tokens_seen": 1175040, "step": 13 }, { "epoch": 0.059770114942528735, "loss": 0.5128318071365356, "loss_ce": 0.13587866723537445, "loss_iou": 0.421875, "loss_num": 0.0751953125, "loss_xval": 0.376953125, "num_input_tokens_seen": 1175040, "step": 13 }, { "epoch": 0.06436781609195402, "grad_norm": 15.47526861039646, "learning_rate": 5e-06, "loss": 0.6604, "num_input_tokens_seen": 1265560, "step": 14 }, { "epoch": 0.06436781609195402, "loss": 0.6039849519729614, "loss_ce": 0.20707331597805023, "loss_iou": 0.466796875, "loss_num": 0.07958984375, "loss_xval": 0.396484375, "num_input_tokens_seen": 1265560, "step": 14 }, { "epoch": 0.06896551724137931, "grad_norm": 8.807051044677067, "learning_rate": 5e-06, "loss": 0.6509, "num_input_tokens_seen": 1355984, "step": 15 }, { "epoch": 0.06896551724137931, "loss": 0.6505950689315796, "loss_ce": 0.1728118658065796, "loss_iou": 0.376953125, "loss_num": 0.095703125, "loss_xval": 0.478515625, "num_input_tokens_seen": 1355984, "step": 15 }, { "epoch": 0.0735632183908046, "grad_norm": 16.39684002573081, "learning_rate": 5e-06, "loss": 0.5639, "num_input_tokens_seen": 1446400, "step": 16 }, { "epoch": 0.0735632183908046, "loss": 0.6162420511245728, "loss_ce": 0.24746768176555634, "loss_iou": 0.490234375, "loss_num": 0.07373046875, "loss_xval": 0.369140625, "num_input_tokens_seen": 1446400, "step": 16 }, { "epoch": 0.07816091954022988, "grad_norm": 19.846643198159168, "learning_rate": 5e-06, "loss": 0.6321, "num_input_tokens_seen": 1536776, "step": 17 }, { "epoch": 0.07816091954022988, "loss": 0.6417566537857056, "loss_ce": 0.18911993503570557, "loss_iou": 0.44140625, "loss_num": 0.0908203125, "loss_xval": 0.453125, "num_input_tokens_seen": 1536776, "step": 17 }, { "epoch": 0.08275862068965517, "grad_norm": 28.522543896242876, "learning_rate": 5e-06, "loss": 0.6082, "num_input_tokens_seen": 1627260, "step": 18 }, { "epoch": 0.08275862068965517, "loss": 0.5867501497268677, "loss_ce": 0.10921109467744827, "loss_iou": 0.35546875, "loss_num": 0.095703125, "loss_xval": 0.4765625, "num_input_tokens_seen": 1627260, "step": 18 }, { "epoch": 0.08735632183908046, "grad_norm": 37.06692415264521, "learning_rate": 5e-06, "loss": 0.6748, "num_input_tokens_seen": 1717724, "step": 19 }, { "epoch": 0.08735632183908046, "loss": 0.691688060760498, "loss_ce": 0.22208356857299805, "loss_iou": 0.400390625, "loss_num": 0.09375, "loss_xval": 0.46875, "num_input_tokens_seen": 1717724, "step": 19 }, { "epoch": 0.09195402298850575, "grad_norm": 113.58143371326585, "learning_rate": 5e-06, "loss": 0.5325, "num_input_tokens_seen": 1808100, "step": 20 }, { "epoch": 0.09195402298850575, "loss": 0.5246105194091797, "loss_ce": 0.1267833560705185, "loss_iou": 0.37890625, "loss_num": 0.07958984375, "loss_xval": 0.3984375, "num_input_tokens_seen": 1808100, "step": 20 }, { "epoch": 0.09655172413793103, "grad_norm": 45.45186371478247, "learning_rate": 5e-06, "loss": 0.672, "num_input_tokens_seen": 1898532, "step": 21 }, { "epoch": 0.09655172413793103, "loss": 0.6351579427719116, "loss_ce": 0.06411299854516983, "loss_iou": 0.50390625, "loss_num": 0.1142578125, "loss_xval": 0.5703125, "num_input_tokens_seen": 1898532, "step": 21 }, { "epoch": 0.10114942528735632, "grad_norm": 23.89941149750651, "learning_rate": 5e-06, "loss": 0.5832, "num_input_tokens_seen": 1988852, "step": 22 }, { "epoch": 0.10114942528735632, "loss": 0.5167936682701111, "loss_ce": 0.1544889509677887, "loss_iou": 0.51953125, "loss_num": 0.07275390625, "loss_xval": 0.36328125, "num_input_tokens_seen": 1988852, "step": 22 }, { "epoch": 0.10574712643678161, "grad_norm": 10.318774820543899, "learning_rate": 5e-06, "loss": 0.6361, "num_input_tokens_seen": 2079304, "step": 23 }, { "epoch": 0.10574712643678161, "loss": 0.5845872759819031, "loss_ce": 0.07311265170574188, "loss_iou": 0.423828125, "loss_num": 0.1025390625, "loss_xval": 0.51171875, "num_input_tokens_seen": 2079304, "step": 23 }, { "epoch": 0.1103448275862069, "grad_norm": 25.73676580542981, "learning_rate": 5e-06, "loss": 0.568, "num_input_tokens_seen": 2169628, "step": 24 }, { "epoch": 0.1103448275862069, "loss": 0.5678774118423462, "loss_ce": 0.06470361351966858, "loss_iou": 0.52734375, "loss_num": 0.1005859375, "loss_xval": 0.50390625, "num_input_tokens_seen": 2169628, "step": 24 }, { "epoch": 0.11494252873563218, "grad_norm": 81.51890029791515, "learning_rate": 5e-06, "loss": 0.5453, "num_input_tokens_seen": 2259984, "step": 25 }, { "epoch": 0.11494252873563218, "loss": 0.5036299824714661, "loss_ce": 0.05953817814588547, "loss_iou": 0.435546875, "loss_num": 0.0888671875, "loss_xval": 0.443359375, "num_input_tokens_seen": 2259984, "step": 25 }, { "epoch": 0.11954022988505747, "grad_norm": 27.04495599462074, "learning_rate": 5e-06, "loss": 0.5524, "num_input_tokens_seen": 2350292, "step": 26 }, { "epoch": 0.11954022988505747, "loss": 0.6328690052032471, "loss_ce": 0.08086705207824707, "loss_iou": 0.380859375, "loss_num": 0.1103515625, "loss_xval": 0.55078125, "num_input_tokens_seen": 2350292, "step": 26 }, { "epoch": 0.12413793103448276, "grad_norm": 10.193343220359463, "learning_rate": 5e-06, "loss": 0.5916, "num_input_tokens_seen": 2440640, "step": 27 }, { "epoch": 0.12413793103448276, "loss": 0.5718714594841003, "loss_ce": 0.14609020948410034, "loss_iou": 0.388671875, "loss_num": 0.08544921875, "loss_xval": 0.42578125, "num_input_tokens_seen": 2440640, "step": 27 }, { "epoch": 0.12873563218390804, "grad_norm": 19.9427592340637, "learning_rate": 5e-06, "loss": 0.581, "num_input_tokens_seen": 2530228, "step": 28 }, { "epoch": 0.12873563218390804, "loss": 0.6976370811462402, "loss_ce": 0.15210485458374023, "loss_iou": 0.267578125, "loss_num": 0.109375, "loss_xval": 0.546875, "num_input_tokens_seen": 2530228, "step": 28 }, { "epoch": 0.13333333333333333, "grad_norm": 22.33962406561534, "learning_rate": 5e-06, "loss": 0.5247, "num_input_tokens_seen": 2619848, "step": 29 }, { "epoch": 0.13333333333333333, "loss": 0.570274829864502, "loss_ce": 0.09835098683834076, "loss_iou": 0.44921875, "loss_num": 0.09423828125, "loss_xval": 0.47265625, "num_input_tokens_seen": 2619848, "step": 29 }, { "epoch": 0.13793103448275862, "grad_norm": 14.599136978515288, "learning_rate": 5e-06, "loss": 0.5767, "num_input_tokens_seen": 2710140, "step": 30 }, { "epoch": 0.13793103448275862, "loss": 0.603084146976471, "loss_ce": 0.09063299000263214, "loss_iou": 0.427734375, "loss_num": 0.1025390625, "loss_xval": 0.51171875, "num_input_tokens_seen": 2710140, "step": 30 }, { "epoch": 0.1425287356321839, "grad_norm": 18.74413636198144, "learning_rate": 5e-06, "loss": 0.5167, "num_input_tokens_seen": 2800536, "step": 31 }, { "epoch": 0.1425287356321839, "loss": 0.547565758228302, "loss_ce": 0.1607249677181244, "loss_iou": 0.36328125, "loss_num": 0.0771484375, "loss_xval": 0.38671875, "num_input_tokens_seen": 2800536, "step": 31 }, { "epoch": 0.1471264367816092, "grad_norm": 30.495448668876183, "learning_rate": 5e-06, "loss": 0.4526, "num_input_tokens_seen": 2890960, "step": 32 }, { "epoch": 0.1471264367816092, "loss": 0.42909184098243713, "loss_ce": 0.10731448233127594, "loss_iou": 0.45703125, "loss_num": 0.064453125, "loss_xval": 0.322265625, "num_input_tokens_seen": 2890960, "step": 32 }, { "epoch": 0.15172413793103448, "grad_norm": 82.31686845472512, "learning_rate": 5e-06, "loss": 0.5327, "num_input_tokens_seen": 2979808, "step": 33 }, { "epoch": 0.15172413793103448, "loss": 0.4499969482421875, "loss_ce": 0.008224454708397388, "loss_iou": 0.408203125, "loss_num": 0.08837890625, "loss_xval": 0.44140625, "num_input_tokens_seen": 2979808, "step": 33 }, { "epoch": 0.15632183908045977, "grad_norm": 10.971908242941907, "learning_rate": 5e-06, "loss": 0.4713, "num_input_tokens_seen": 3070312, "step": 34 }, { "epoch": 0.15632183908045977, "loss": 0.3486025929450989, "loss_ce": 0.06405669450759888, "loss_iou": 0.466796875, "loss_num": 0.056884765625, "loss_xval": 0.28515625, "num_input_tokens_seen": 3070312, "step": 34 }, { "epoch": 0.16091954022988506, "grad_norm": 59.48550166875302, "learning_rate": 5e-06, "loss": 0.4692, "num_input_tokens_seen": 3159280, "step": 35 }, { "epoch": 0.16091954022988506, "loss": 0.5126940011978149, "loss_ce": 0.08544795215129852, "loss_iou": 0.337890625, "loss_num": 0.08544921875, "loss_xval": 0.427734375, "num_input_tokens_seen": 3159280, "step": 35 }, { "epoch": 0.16551724137931034, "grad_norm": 60.760015794484936, "learning_rate": 5e-06, "loss": 0.4513, "num_input_tokens_seen": 3249672, "step": 36 }, { "epoch": 0.16551724137931034, "loss": 0.4749954044818878, "loss_ce": 0.11220243573188782, "loss_iou": 0.490234375, "loss_num": 0.072265625, "loss_xval": 0.36328125, "num_input_tokens_seen": 3249672, "step": 36 }, { "epoch": 0.17011494252873563, "grad_norm": 24.0106917483809, "learning_rate": 5e-06, "loss": 0.481, "num_input_tokens_seen": 3339988, "step": 37 }, { "epoch": 0.17011494252873563, "loss": 0.5111528635025024, "loss_ce": 0.13212455809116364, "loss_iou": 0.5078125, "loss_num": 0.07568359375, "loss_xval": 0.37890625, "num_input_tokens_seen": 3339988, "step": 37 }, { "epoch": 0.17471264367816092, "grad_norm": 30.5271253034538, "learning_rate": 5e-06, "loss": 0.5096, "num_input_tokens_seen": 3430444, "step": 38 }, { "epoch": 0.17471264367816092, "loss": 0.4569355845451355, "loss_ce": 0.03676954656839371, "loss_iou": 0.5234375, "loss_num": 0.083984375, "loss_xval": 0.419921875, "num_input_tokens_seen": 3430444, "step": 38 }, { "epoch": 0.1793103448275862, "grad_norm": 8.31708311536423, "learning_rate": 5e-06, "loss": 0.417, "num_input_tokens_seen": 3519396, "step": 39 }, { "epoch": 0.1793103448275862, "loss": 0.4267890453338623, "loss_ce": 0.0754707008600235, "loss_iou": 0.337890625, "loss_num": 0.0703125, "loss_xval": 0.3515625, "num_input_tokens_seen": 3519396, "step": 39 }, { "epoch": 0.1839080459770115, "grad_norm": 20.02750098475025, "learning_rate": 5e-06, "loss": 0.3488, "num_input_tokens_seen": 3609736, "step": 40 }, { "epoch": 0.1839080459770115, "loss": 0.3892139196395874, "loss_ce": 0.047050852328538895, "loss_iou": 0.46484375, "loss_num": 0.068359375, "loss_xval": 0.341796875, "num_input_tokens_seen": 3609736, "step": 40 }, { "epoch": 0.18850574712643678, "grad_norm": 32.78061699301825, "learning_rate": 5e-06, "loss": 0.4256, "num_input_tokens_seen": 3700148, "step": 41 }, { "epoch": 0.18850574712643678, "loss": 0.3672151267528534, "loss_ce": 0.013943652622401714, "loss_iou": 0.44921875, "loss_num": 0.07080078125, "loss_xval": 0.353515625, "num_input_tokens_seen": 3700148, "step": 41 }, { "epoch": 0.19310344827586207, "grad_norm": 9.890488159206065, "learning_rate": 5e-06, "loss": 0.4023, "num_input_tokens_seen": 3790612, "step": 42 }, { "epoch": 0.19310344827586207, "loss": 0.42205893993377686, "loss_ce": 0.034973956644535065, "loss_iou": 0.5625, "loss_num": 0.07763671875, "loss_xval": 0.38671875, "num_input_tokens_seen": 3790612, "step": 42 }, { "epoch": 0.19770114942528735, "grad_norm": 24.69833771237315, "learning_rate": 5e-06, "loss": 0.5508, "num_input_tokens_seen": 3880836, "step": 43 }, { "epoch": 0.19770114942528735, "loss": 0.5896004438400269, "loss_ce": 0.20044030249118805, "loss_iou": 0.431640625, "loss_num": 0.078125, "loss_xval": 0.388671875, "num_input_tokens_seen": 3880836, "step": 43 }, { "epoch": 0.20229885057471264, "grad_norm": 16.669851454492647, "learning_rate": 5e-06, "loss": 0.4632, "num_input_tokens_seen": 3971176, "step": 44 }, { "epoch": 0.20229885057471264, "loss": 0.4887722134590149, "loss_ce": 0.0513942651450634, "loss_iou": 0.474609375, "loss_num": 0.08740234375, "loss_xval": 0.4375, "num_input_tokens_seen": 3971176, "step": 44 }, { "epoch": 0.20689655172413793, "grad_norm": 18.6471842377231, "learning_rate": 5e-06, "loss": 0.4398, "num_input_tokens_seen": 4061524, "step": 45 }, { "epoch": 0.20689655172413793, "loss": 0.3761554956436157, "loss_ce": 0.028621304780244827, "loss_iou": 0.384765625, "loss_num": 0.0693359375, "loss_xval": 0.34765625, "num_input_tokens_seen": 4061524, "step": 45 }, { "epoch": 0.21149425287356322, "grad_norm": 28.00486282610174, "learning_rate": 5e-06, "loss": 0.3932, "num_input_tokens_seen": 4151828, "step": 46 }, { "epoch": 0.21149425287356322, "loss": 0.3925134539604187, "loss_ce": 0.04986211284995079, "loss_iou": 0.48828125, "loss_num": 0.068359375, "loss_xval": 0.341796875, "num_input_tokens_seen": 4151828, "step": 46 }, { "epoch": 0.2160919540229885, "grad_norm": 24.306371497861868, "learning_rate": 5e-06, "loss": 0.4599, "num_input_tokens_seen": 4241372, "step": 47 }, { "epoch": 0.2160919540229885, "loss": 0.49177491664886475, "loss_ce": 0.07002198696136475, "loss_iou": 0.3671875, "loss_num": 0.08447265625, "loss_xval": 0.421875, "num_input_tokens_seen": 4241372, "step": 47 }, { "epoch": 0.2206896551724138, "grad_norm": 6.774260279559146, "learning_rate": 5e-06, "loss": 0.414, "num_input_tokens_seen": 4331848, "step": 48 }, { "epoch": 0.2206896551724138, "loss": 0.3134571313858032, "loss_ce": 0.009929286316037178, "loss_iou": 0.439453125, "loss_num": 0.060791015625, "loss_xval": 0.302734375, "num_input_tokens_seen": 4331848, "step": 48 }, { "epoch": 0.22528735632183908, "grad_norm": 7.185186363794483, "learning_rate": 5e-06, "loss": 0.3401, "num_input_tokens_seen": 4422192, "step": 49 }, { "epoch": 0.22528735632183908, "loss": 0.34919464588165283, "loss_ce": 0.04316438362002373, "loss_iou": 0.4140625, "loss_num": 0.061279296875, "loss_xval": 0.306640625, "num_input_tokens_seen": 4422192, "step": 49 }, { "epoch": 0.22988505747126436, "grad_norm": 13.427495770074591, "learning_rate": 5e-06, "loss": 0.3785, "num_input_tokens_seen": 4512568, "step": 50 }, { "epoch": 0.22988505747126436, "loss": 0.3789505362510681, "loss_ce": 0.001997418701648712, "loss_iou": 0.3984375, "loss_num": 0.0751953125, "loss_xval": 0.376953125, "num_input_tokens_seen": 4512568, "step": 50 }, { "epoch": 0.23448275862068965, "grad_norm": 13.227669876358155, "learning_rate": 5e-06, "loss": 0.4474, "num_input_tokens_seen": 4602984, "step": 51 }, { "epoch": 0.23448275862068965, "loss": 0.5125839710235596, "loss_ce": 0.09559179842472076, "loss_iou": 0.314453125, "loss_num": 0.08349609375, "loss_xval": 0.41796875, "num_input_tokens_seen": 4602984, "step": 51 }, { "epoch": 0.23908045977011494, "grad_norm": 23.649689887557532, "learning_rate": 5e-06, "loss": 0.3993, "num_input_tokens_seen": 4693356, "step": 52 }, { "epoch": 0.23908045977011494, "loss": 0.3423450291156769, "loss_ce": 0.011290331371128559, "loss_iou": 0.4609375, "loss_num": 0.06640625, "loss_xval": 0.33203125, "num_input_tokens_seen": 4693356, "step": 52 }, { "epoch": 0.24367816091954023, "grad_norm": 29.218941891807887, "learning_rate": 5e-06, "loss": 0.4166, "num_input_tokens_seen": 4783796, "step": 53 }, { "epoch": 0.24367816091954023, "loss": 0.4327765107154846, "loss_ce": 0.06705383956432343, "loss_iou": 0.37890625, "loss_num": 0.0732421875, "loss_xval": 0.365234375, "num_input_tokens_seen": 4783796, "step": 53 }, { "epoch": 0.2482758620689655, "grad_norm": 12.83340723694157, "learning_rate": 5e-06, "loss": 0.4025, "num_input_tokens_seen": 4874232, "step": 54 }, { "epoch": 0.2482758620689655, "loss": 0.3716029226779938, "loss_ce": 0.03688611835241318, "loss_iou": 0.50390625, "loss_num": 0.06689453125, "loss_xval": 0.333984375, "num_input_tokens_seen": 4874232, "step": 54 }, { "epoch": 0.25287356321839083, "grad_norm": 8.220941129491107, "learning_rate": 5e-06, "loss": 0.4638, "num_input_tokens_seen": 4963396, "step": 55 }, { "epoch": 0.25287356321839083, "loss": 0.5566645860671997, "loss_ce": 0.040917523205280304, "loss_iou": 0.421875, "loss_num": 0.10302734375, "loss_xval": 0.515625, "num_input_tokens_seen": 4963396, "step": 55 }, { "epoch": 0.2574712643678161, "grad_norm": 13.288705604735872, "learning_rate": 5e-06, "loss": 0.3137, "num_input_tokens_seen": 5053628, "step": 56 }, { "epoch": 0.2574712643678161, "loss": 0.3751257061958313, "loss_ce": 0.020999712869524956, "loss_iou": 0.462890625, "loss_num": 0.07080078125, "loss_xval": 0.353515625, "num_input_tokens_seen": 5053628, "step": 56 }, { "epoch": 0.2620689655172414, "grad_norm": 39.13223161382933, "learning_rate": 5e-06, "loss": 0.4695, "num_input_tokens_seen": 5144036, "step": 57 }, { "epoch": 0.2620689655172414, "loss": 0.5425558686256409, "loss_ce": 0.023757044225931168, "loss_iou": 0.34765625, "loss_num": 0.103515625, "loss_xval": 0.51953125, "num_input_tokens_seen": 5144036, "step": 57 }, { "epoch": 0.26666666666666666, "grad_norm": 15.33841968913398, "learning_rate": 5e-06, "loss": 0.3458, "num_input_tokens_seen": 5234456, "step": 58 }, { "epoch": 0.26666666666666666, "loss": 0.38235118985176086, "loss_ce": 0.03310802951455116, "loss_iou": 0.412109375, "loss_num": 0.06982421875, "loss_xval": 0.349609375, "num_input_tokens_seen": 5234456, "step": 58 }, { "epoch": 0.271264367816092, "grad_norm": 26.180906675160497, "learning_rate": 5e-06, "loss": 0.3917, "num_input_tokens_seen": 5324744, "step": 59 }, { "epoch": 0.271264367816092, "loss": 0.43390488624572754, "loss_ce": 0.039861951023340225, "loss_iou": 0.47265625, "loss_num": 0.07861328125, "loss_xval": 0.39453125, "num_input_tokens_seen": 5324744, "step": 59 }, { "epoch": 0.27586206896551724, "grad_norm": 9.816205473208605, "learning_rate": 5e-06, "loss": 0.3208, "num_input_tokens_seen": 5415040, "step": 60 }, { "epoch": 0.27586206896551724, "loss": 0.244655042886734, "loss_ce": 0.01015797071158886, "loss_iou": 0.423828125, "loss_num": 0.046875, "loss_xval": 0.234375, "num_input_tokens_seen": 5415040, "step": 60 }, { "epoch": 0.28045977011494255, "grad_norm": 15.328830837517142, "learning_rate": 5e-06, "loss": 0.4713, "num_input_tokens_seen": 5505400, "step": 61 }, { "epoch": 0.28045977011494255, "loss": 0.3520286977291107, "loss_ce": 0.0024193418212234974, "loss_iou": 0.37109375, "loss_num": 0.06982421875, "loss_xval": 0.349609375, "num_input_tokens_seen": 5505400, "step": 61 }, { "epoch": 0.2850574712643678, "grad_norm": 25.32547400452274, "learning_rate": 5e-06, "loss": 0.367, "num_input_tokens_seen": 5595868, "step": 62 }, { "epoch": 0.2850574712643678, "loss": 0.39265066385269165, "loss_ce": 0.026805955916643143, "loss_iou": 0.322265625, "loss_num": 0.0732421875, "loss_xval": 0.365234375, "num_input_tokens_seen": 5595868, "step": 62 }, { "epoch": 0.2896551724137931, "grad_norm": 8.379792746028329, "learning_rate": 5e-06, "loss": 0.3734, "num_input_tokens_seen": 5686176, "step": 63 }, { "epoch": 0.2896551724137931, "loss": 0.413876473903656, "loss_ce": 0.0571870356798172, "loss_iou": 0.42578125, "loss_num": 0.0712890625, "loss_xval": 0.357421875, "num_input_tokens_seen": 5686176, "step": 63 }, { "epoch": 0.2942528735632184, "grad_norm": 9.587938754950969, "learning_rate": 5e-06, "loss": 0.431, "num_input_tokens_seen": 5776472, "step": 64 }, { "epoch": 0.2942528735632184, "loss": 0.45100903511047363, "loss_ce": 0.017171159386634827, "loss_iou": 0.408203125, "loss_num": 0.0869140625, "loss_xval": 0.43359375, "num_input_tokens_seen": 5776472, "step": 64 }, { "epoch": 0.2988505747126437, "grad_norm": 6.157907021347152, "learning_rate": 5e-06, "loss": 0.3847, "num_input_tokens_seen": 5866900, "step": 65 }, { "epoch": 0.2988505747126437, "loss": 0.3870934844017029, "loss_ce": 0.006722383201122284, "loss_iou": 0.3984375, "loss_num": 0.076171875, "loss_xval": 0.380859375, "num_input_tokens_seen": 5866900, "step": 65 }, { "epoch": 0.30344827586206896, "grad_norm": 9.170883238290122, "learning_rate": 5e-06, "loss": 0.3444, "num_input_tokens_seen": 5957224, "step": 66 }, { "epoch": 0.30344827586206896, "loss": 0.3455989360809326, "loss_ce": 0.0012385793961584568, "loss_iou": 0.48828125, "loss_num": 0.06884765625, "loss_xval": 0.34375, "num_input_tokens_seen": 5957224, "step": 66 }, { "epoch": 0.3080459770114943, "grad_norm": 27.234249102852328, "learning_rate": 5e-06, "loss": 0.3401, "num_input_tokens_seen": 6047500, "step": 67 }, { "epoch": 0.3080459770114943, "loss": 0.3533991575241089, "loss_ce": 0.007085674442350864, "loss_iou": 0.423828125, "loss_num": 0.0693359375, "loss_xval": 0.345703125, "num_input_tokens_seen": 6047500, "step": 67 }, { "epoch": 0.31264367816091954, "grad_norm": 47.75785601894446, "learning_rate": 5e-06, "loss": 0.3646, "num_input_tokens_seen": 6137912, "step": 68 }, { "epoch": 0.31264367816091954, "loss": 0.3522525429725647, "loss_ce": 0.019244728609919548, "loss_iou": 0.45703125, "loss_num": 0.06640625, "loss_xval": 0.33203125, "num_input_tokens_seen": 6137912, "step": 68 }, { "epoch": 0.31724137931034485, "grad_norm": 6.987555924094217, "learning_rate": 5e-06, "loss": 0.3722, "num_input_tokens_seen": 6228264, "step": 69 }, { "epoch": 0.31724137931034485, "loss": 0.3856116533279419, "loss_ce": 0.011588208377361298, "loss_iou": 0.53125, "loss_num": 0.07470703125, "loss_xval": 0.375, "num_input_tokens_seen": 6228264, "step": 69 }, { "epoch": 0.3218390804597701, "grad_norm": 5.498036338797732, "learning_rate": 5e-06, "loss": 0.3389, "num_input_tokens_seen": 6318588, "step": 70 }, { "epoch": 0.3218390804597701, "loss": 0.31558507680892944, "loss_ce": 0.0037564674858003855, "loss_iou": 0.33984375, "loss_num": 0.0625, "loss_xval": 0.3125, "num_input_tokens_seen": 6318588, "step": 70 }, { "epoch": 0.3264367816091954, "grad_norm": 8.031546032564327, "learning_rate": 5e-06, "loss": 0.3435, "num_input_tokens_seen": 6409052, "step": 71 }, { "epoch": 0.3264367816091954, "loss": 0.25542575120925903, "loss_ce": 0.016717255115509033, "loss_iou": 0.423828125, "loss_num": 0.0478515625, "loss_xval": 0.23828125, "num_input_tokens_seen": 6409052, "step": 71 }, { "epoch": 0.3310344827586207, "grad_norm": 6.547993935141339, "learning_rate": 5e-06, "loss": 0.4004, "num_input_tokens_seen": 6499328, "step": 72 }, { "epoch": 0.3310344827586207, "loss": 0.5037937164306641, "loss_ce": 0.02991676703095436, "loss_iou": 0.375, "loss_num": 0.0947265625, "loss_xval": 0.474609375, "num_input_tokens_seen": 6499328, "step": 72 }, { "epoch": 0.335632183908046, "grad_norm": 68.22686102549073, "learning_rate": 5e-06, "loss": 0.4303, "num_input_tokens_seen": 6589628, "step": 73 }, { "epoch": 0.335632183908046, "loss": 0.39546552300453186, "loss_ce": 0.03560223802924156, "loss_iou": 0.40625, "loss_num": 0.07177734375, "loss_xval": 0.359375, "num_input_tokens_seen": 6589628, "step": 73 }, { "epoch": 0.34022988505747126, "grad_norm": 13.503622709112387, "learning_rate": 5e-06, "loss": 0.345, "num_input_tokens_seen": 6680112, "step": 74 }, { "epoch": 0.34022988505747126, "loss": 0.3707137703895569, "loss_ce": 0.03233487531542778, "loss_iou": 0.37109375, "loss_num": 0.06787109375, "loss_xval": 0.337890625, "num_input_tokens_seen": 6680112, "step": 74 }, { "epoch": 0.3448275862068966, "grad_norm": 13.855537520348836, "learning_rate": 5e-06, "loss": 0.3428, "num_input_tokens_seen": 6770428, "step": 75 }, { "epoch": 0.3448275862068966, "loss": 0.34209129214286804, "loss_ce": 0.017384245991706848, "loss_iou": 0.4765625, "loss_num": 0.06494140625, "loss_xval": 0.32421875, "num_input_tokens_seen": 6770428, "step": 75 }, { "epoch": 0.34942528735632183, "grad_norm": 23.91656100675138, "learning_rate": 5e-06, "loss": 0.395, "num_input_tokens_seen": 6860876, "step": 76 }, { "epoch": 0.34942528735632183, "loss": 0.3706984221935272, "loss_ce": 0.04159684479236603, "loss_iou": 0.44140625, "loss_num": 0.06591796875, "loss_xval": 0.328125, "num_input_tokens_seen": 6860876, "step": 76 }, { "epoch": 0.35402298850574715, "grad_norm": 12.200721545193472, "learning_rate": 5e-06, "loss": 0.3466, "num_input_tokens_seen": 6950396, "step": 77 }, { "epoch": 0.35402298850574715, "loss": 0.4722903072834015, "loss_ce": 0.05505400151014328, "loss_iou": 0.384765625, "loss_num": 0.08349609375, "loss_xval": 0.41796875, "num_input_tokens_seen": 6950396, "step": 77 }, { "epoch": 0.3586206896551724, "grad_norm": 8.19258423406812, "learning_rate": 5e-06, "loss": 0.3716, "num_input_tokens_seen": 7040716, "step": 78 }, { "epoch": 0.3586206896551724, "loss": 0.37182319164276123, "loss_ce": 0.010861298069357872, "loss_iou": 0.3046875, "loss_num": 0.072265625, "loss_xval": 0.361328125, "num_input_tokens_seen": 7040716, "step": 78 }, { "epoch": 0.3632183908045977, "grad_norm": 18.895525833487042, "learning_rate": 5e-06, "loss": 0.3798, "num_input_tokens_seen": 7131028, "step": 79 }, { "epoch": 0.3632183908045977, "loss": 0.2786239981651306, "loss_ce": 0.0010361107997596264, "loss_iou": 0.46875, "loss_num": 0.055419921875, "loss_xval": 0.27734375, "num_input_tokens_seen": 7131028, "step": 79 }, { "epoch": 0.367816091954023, "grad_norm": 14.693361209406406, "learning_rate": 5e-06, "loss": 0.33, "num_input_tokens_seen": 7221608, "step": 80 }, { "epoch": 0.367816091954023, "loss": 0.33968040347099304, "loss_ce": 0.019367896020412445, "loss_iou": 0.44921875, "loss_num": 0.06396484375, "loss_xval": 0.3203125, "num_input_tokens_seen": 7221608, "step": 80 }, { "epoch": 0.3724137931034483, "grad_norm": 20.35099683349881, "learning_rate": 5e-06, "loss": 0.391, "num_input_tokens_seen": 7311916, "step": 81 }, { "epoch": 0.3724137931034483, "loss": 0.451404333114624, "loss_ce": 0.04014945402741432, "loss_iou": 0.4765625, "loss_num": 0.08203125, "loss_xval": 0.412109375, "num_input_tokens_seen": 7311916, "step": 81 }, { "epoch": 0.37701149425287356, "grad_norm": 9.233297779847412, "learning_rate": 5e-06, "loss": 0.3335, "num_input_tokens_seen": 7402256, "step": 82 }, { "epoch": 0.37701149425287356, "loss": 0.3448345959186554, "loss_ce": 0.0010846068616956472, "loss_iou": 0.46484375, "loss_num": 0.06884765625, "loss_xval": 0.34375, "num_input_tokens_seen": 7402256, "step": 82 }, { "epoch": 0.3816091954022989, "grad_norm": 2.170529749870518, "learning_rate": 5e-06, "loss": 0.206, "num_input_tokens_seen": 7492744, "step": 83 }, { "epoch": 0.3816091954022989, "loss": 0.19931316375732422, "loss_ce": 0.002306930720806122, "loss_iou": 0.44140625, "loss_num": 0.039306640625, "loss_xval": 0.197265625, "num_input_tokens_seen": 7492744, "step": 83 }, { "epoch": 0.38620689655172413, "grad_norm": 5.8890959300403, "learning_rate": 5e-06, "loss": 0.3027, "num_input_tokens_seen": 7583004, "step": 84 }, { "epoch": 0.38620689655172413, "loss": 0.3211175799369812, "loss_ce": 0.01050965953618288, "loss_iou": 0.4765625, "loss_num": 0.062255859375, "loss_xval": 0.310546875, "num_input_tokens_seen": 7583004, "step": 84 }, { "epoch": 0.39080459770114945, "grad_norm": 13.034484868845816, "learning_rate": 5e-06, "loss": 0.3487, "num_input_tokens_seen": 7673256, "step": 85 }, { "epoch": 0.39080459770114945, "loss": 0.36519187688827515, "loss_ce": 0.003131319535896182, "loss_iou": 0.404296875, "loss_num": 0.072265625, "loss_xval": 0.361328125, "num_input_tokens_seen": 7673256, "step": 85 }, { "epoch": 0.3954022988505747, "grad_norm": 13.756069652716036, "learning_rate": 5e-06, "loss": 0.335, "num_input_tokens_seen": 7763604, "step": 86 }, { "epoch": 0.3954022988505747, "loss": 0.37784039974212646, "loss_ce": 0.018831633031368256, "loss_iou": 0.39453125, "loss_num": 0.07177734375, "loss_xval": 0.359375, "num_input_tokens_seen": 7763604, "step": 86 }, { "epoch": 0.4, "grad_norm": 15.575683221755641, "learning_rate": 5e-06, "loss": 0.3935, "num_input_tokens_seen": 7853108, "step": 87 }, { "epoch": 0.4, "loss": 0.36000126600265503, "loss_ce": 0.04499881714582443, "loss_iou": 0.435546875, "loss_num": 0.06298828125, "loss_xval": 0.314453125, "num_input_tokens_seen": 7853108, "step": 87 }, { "epoch": 0.4045977011494253, "grad_norm": 25.9232530151757, "learning_rate": 5e-06, "loss": 0.3719, "num_input_tokens_seen": 7943488, "step": 88 }, { "epoch": 0.4045977011494253, "loss": 0.42258501052856445, "loss_ce": 0.03049515187740326, "loss_iou": 0.5078125, "loss_num": 0.07861328125, "loss_xval": 0.392578125, "num_input_tokens_seen": 7943488, "step": 88 }, { "epoch": 0.4091954022988506, "grad_norm": 13.656470567398008, "learning_rate": 5e-06, "loss": 0.3132, "num_input_tokens_seen": 8033868, "step": 89 }, { "epoch": 0.4091954022988506, "loss": 0.2531309425830841, "loss_ce": 0.0003233220777474344, "loss_iou": 0.484375, "loss_num": 0.050537109375, "loss_xval": 0.251953125, "num_input_tokens_seen": 8033868, "step": 89 }, { "epoch": 0.41379310344827586, "grad_norm": 16.547729090720072, "learning_rate": 5e-06, "loss": 0.318, "num_input_tokens_seen": 8124172, "step": 90 }, { "epoch": 0.41379310344827586, "loss": 0.30939018726348877, "loss_ce": 0.001162638422101736, "loss_iou": 0.515625, "loss_num": 0.0615234375, "loss_xval": 0.30859375, "num_input_tokens_seen": 8124172, "step": 90 }, { "epoch": 0.41839080459770117, "grad_norm": 15.99038307829354, "learning_rate": 5e-06, "loss": 0.3983, "num_input_tokens_seen": 8213772, "step": 91 }, { "epoch": 0.41839080459770117, "loss": 0.5176781415939331, "loss_ce": 0.1472862958908081, "loss_iou": 0.3046875, "loss_num": 0.07421875, "loss_xval": 0.37109375, "num_input_tokens_seen": 8213772, "step": 91 }, { "epoch": 0.42298850574712643, "grad_norm": 17.254415632547147, "learning_rate": 5e-06, "loss": 0.4036, "num_input_tokens_seen": 8304136, "step": 92 }, { "epoch": 0.42298850574712643, "loss": 0.44501999020576477, "loss_ce": 0.025098130106925964, "loss_iou": 0.39453125, "loss_num": 0.083984375, "loss_xval": 0.419921875, "num_input_tokens_seen": 8304136, "step": 92 }, { "epoch": 0.42758620689655175, "grad_norm": 22.369663390664186, "learning_rate": 5e-06, "loss": 0.3815, "num_input_tokens_seen": 8394520, "step": 93 }, { "epoch": 0.42758620689655175, "loss": 0.3709450364112854, "loss_ce": 0.0024147892836481333, "loss_iou": 0.470703125, "loss_num": 0.07373046875, "loss_xval": 0.369140625, "num_input_tokens_seen": 8394520, "step": 93 }, { "epoch": 0.432183908045977, "grad_norm": 7.546128838220428, "learning_rate": 5e-06, "loss": 0.3913, "num_input_tokens_seen": 8484876, "step": 94 }, { "epoch": 0.432183908045977, "loss": 0.4254246652126312, "loss_ce": 0.01648910902440548, "loss_iou": 0.48828125, "loss_num": 0.08154296875, "loss_xval": 0.408203125, "num_input_tokens_seen": 8484876, "step": 94 }, { "epoch": 0.4367816091954023, "grad_norm": 11.464537758049973, "learning_rate": 5e-06, "loss": 0.3445, "num_input_tokens_seen": 8575288, "step": 95 }, { "epoch": 0.4367816091954023, "loss": 0.4096546173095703, "loss_ce": 0.011278166435658932, "loss_iou": 0.396484375, "loss_num": 0.07958984375, "loss_xval": 0.3984375, "num_input_tokens_seen": 8575288, "step": 95 }, { "epoch": 0.4413793103448276, "grad_norm": 14.34926027066774, "learning_rate": 5e-06, "loss": 0.3164, "num_input_tokens_seen": 8665812, "step": 96 }, { "epoch": 0.4413793103448276, "loss": 0.3848586082458496, "loss_ce": 0.016572486609220505, "loss_iou": 0.359375, "loss_num": 0.07373046875, "loss_xval": 0.369140625, "num_input_tokens_seen": 8665812, "step": 96 }, { "epoch": 0.4459770114942529, "grad_norm": 9.735611757167003, "learning_rate": 5e-06, "loss": 0.3679, "num_input_tokens_seen": 8756136, "step": 97 }, { "epoch": 0.4459770114942529, "loss": 0.39260411262512207, "loss_ce": 0.046046510338783264, "loss_iou": 0.44921875, "loss_num": 0.0693359375, "loss_xval": 0.345703125, "num_input_tokens_seen": 8756136, "step": 97 }, { "epoch": 0.45057471264367815, "grad_norm": 8.954384165667761, "learning_rate": 5e-06, "loss": 0.2693, "num_input_tokens_seen": 8846560, "step": 98 }, { "epoch": 0.45057471264367815, "loss": 0.2520179748535156, "loss_ce": 0.0018959222361445427, "loss_iou": 0.5078125, "loss_num": 0.050048828125, "loss_xval": 0.25, "num_input_tokens_seen": 8846560, "step": 98 }, { "epoch": 0.45517241379310347, "grad_norm": 4.271408735001314, "learning_rate": 5e-06, "loss": 0.2796, "num_input_tokens_seen": 8936096, "step": 99 }, { "epoch": 0.45517241379310347, "loss": 0.2937536835670471, "loss_ce": 0.01689821295440197, "loss_iou": 0.376953125, "loss_num": 0.055419921875, "loss_xval": 0.27734375, "num_input_tokens_seen": 8936096, "step": 99 }, { "epoch": 0.45977011494252873, "grad_norm": 45.09395035520587, "learning_rate": 5e-06, "loss": 0.341, "num_input_tokens_seen": 9026468, "step": 100 }, { "epoch": 0.45977011494252873, "loss": 0.3114800751209259, "loss_ce": 0.009722266346216202, "loss_iou": 0.396484375, "loss_num": 0.060302734375, "loss_xval": 0.30078125, "num_input_tokens_seen": 9026468, "step": 100 }, { "epoch": 0.46436781609195404, "grad_norm": 19.359219772759506, "learning_rate": 5e-06, "loss": 0.341, "num_input_tokens_seen": 9116736, "step": 101 }, { "epoch": 0.46436781609195404, "loss": 0.3214173913002014, "loss_ce": 0.003912519197911024, "loss_iou": 0.474609375, "loss_num": 0.0634765625, "loss_xval": 0.318359375, "num_input_tokens_seen": 9116736, "step": 101 }, { "epoch": 0.4689655172413793, "grad_norm": 6.534059918097954, "learning_rate": 5e-06, "loss": 0.3168, "num_input_tokens_seen": 9207040, "step": 102 }, { "epoch": 0.4689655172413793, "loss": 0.3264657258987427, "loss_ce": 0.02580653503537178, "loss_iou": 0.451171875, "loss_num": 0.06005859375, "loss_xval": 0.30078125, "num_input_tokens_seen": 9207040, "step": 102 }, { "epoch": 0.4735632183908046, "grad_norm": 7.393393813109138, "learning_rate": 5e-06, "loss": 0.359, "num_input_tokens_seen": 9297316, "step": 103 }, { "epoch": 0.4735632183908046, "loss": 0.3772013187408447, "loss_ce": 0.01831459254026413, "loss_iou": 0.5078125, "loss_num": 0.07177734375, "loss_xval": 0.359375, "num_input_tokens_seen": 9297316, "step": 103 }, { "epoch": 0.4781609195402299, "grad_norm": 41.31290787104306, "learning_rate": 5e-06, "loss": 0.3541, "num_input_tokens_seen": 9387640, "step": 104 }, { "epoch": 0.4781609195402299, "loss": 0.32786232233047485, "loss_ce": 0.0027890922501683235, "loss_iou": 0.44140625, "loss_num": 0.06494140625, "loss_xval": 0.32421875, "num_input_tokens_seen": 9387640, "step": 104 }, { "epoch": 0.4827586206896552, "grad_norm": 5.6085929794757, "learning_rate": 5e-06, "loss": 0.3398, "num_input_tokens_seen": 9477976, "step": 105 }, { "epoch": 0.4827586206896552, "loss": 0.3234696090221405, "loss_ce": 0.02805946208536625, "loss_iou": 0.41796875, "loss_num": 0.05908203125, "loss_xval": 0.294921875, "num_input_tokens_seen": 9477976, "step": 105 }, { "epoch": 0.48735632183908045, "grad_norm": 20.069977336539267, "learning_rate": 5e-06, "loss": 0.4092, "num_input_tokens_seen": 9568252, "step": 106 }, { "epoch": 0.48735632183908045, "loss": 0.3579648733139038, "loss_ce": 0.017388703301548958, "loss_iou": 0.4296875, "loss_num": 0.068359375, "loss_xval": 0.33984375, "num_input_tokens_seen": 9568252, "step": 106 }, { "epoch": 0.49195402298850577, "grad_norm": 69.89835804812493, "learning_rate": 5e-06, "loss": 0.4106, "num_input_tokens_seen": 9657112, "step": 107 }, { "epoch": 0.49195402298850577, "loss": 0.38876211643218994, "loss_ce": 0.019133206456899643, "loss_iou": 0.451171875, "loss_num": 0.07421875, "loss_xval": 0.369140625, "num_input_tokens_seen": 9657112, "step": 107 }, { "epoch": 0.496551724137931, "grad_norm": 6.848175278978215, "learning_rate": 5e-06, "loss": 0.2565, "num_input_tokens_seen": 9747548, "step": 108 }, { "epoch": 0.496551724137931, "loss": 0.21308478713035583, "loss_ce": 0.00849493220448494, "loss_iou": 0.423828125, "loss_num": 0.041015625, "loss_xval": 0.205078125, "num_input_tokens_seen": 9747548, "step": 108 }, { "epoch": 0.5011494252873563, "grad_norm": 11.091512135924047, "learning_rate": 5e-06, "loss": 0.3075, "num_input_tokens_seen": 9837824, "step": 109 }, { "epoch": 0.5011494252873563, "loss": 0.3376855254173279, "loss_ce": 0.01969236694276333, "loss_iou": 0.455078125, "loss_num": 0.0634765625, "loss_xval": 0.318359375, "num_input_tokens_seen": 9837824, "step": 109 }, { "epoch": 0.5057471264367817, "grad_norm": 8.760189132358677, "learning_rate": 5e-06, "loss": 0.2375, "num_input_tokens_seen": 9928152, "step": 110 }, { "epoch": 0.5057471264367817, "loss": 0.26674026250839233, "loss_ce": 0.002091821748763323, "loss_iou": 0.443359375, "loss_num": 0.052978515625, "loss_xval": 0.265625, "num_input_tokens_seen": 9928152, "step": 110 }, { "epoch": 0.5103448275862069, "grad_norm": 8.592645738220657, "learning_rate": 5e-06, "loss": 0.3073, "num_input_tokens_seen": 10017708, "step": 111 }, { "epoch": 0.5103448275862069, "loss": 0.29403021931648254, "loss_ce": 0.005700131878256798, "loss_iou": 0.435546875, "loss_num": 0.0576171875, "loss_xval": 0.2890625, "num_input_tokens_seen": 10017708, "step": 111 }, { "epoch": 0.5149425287356322, "grad_norm": 13.883237322149405, "learning_rate": 5e-06, "loss": 0.2906, "num_input_tokens_seen": 10108008, "step": 112 }, { "epoch": 0.5149425287356322, "loss": 0.22113177180290222, "loss_ce": 0.011414967477321625, "loss_iou": 0.44921875, "loss_num": 0.0419921875, "loss_xval": 0.2099609375, "num_input_tokens_seen": 10108008, "step": 112 }, { "epoch": 0.5195402298850574, "grad_norm": 44.804966100254894, "learning_rate": 5e-06, "loss": 0.3082, "num_input_tokens_seen": 10198460, "step": 113 }, { "epoch": 0.5195402298850574, "loss": 0.26152580976486206, "loss_ce": 0.0007836385047994554, "loss_iou": 0.44140625, "loss_num": 0.052001953125, "loss_xval": 0.26171875, "num_input_tokens_seen": 10198460, "step": 113 }, { "epoch": 0.5241379310344828, "grad_norm": 11.857004672937288, "learning_rate": 5e-06, "loss": 0.3335, "num_input_tokens_seen": 10288856, "step": 114 }, { "epoch": 0.5241379310344828, "loss": 0.3588264584541321, "loss_ce": 0.039612580090761185, "loss_iou": 0.4609375, "loss_num": 0.06396484375, "loss_xval": 0.318359375, "num_input_tokens_seen": 10288856, "step": 114 }, { "epoch": 0.5287356321839081, "grad_norm": 12.190324307933235, "learning_rate": 5e-06, "loss": 0.2532, "num_input_tokens_seen": 10379180, "step": 115 }, { "epoch": 0.5287356321839081, "loss": 0.2677502930164337, "loss_ce": 0.008900204673409462, "loss_iou": 0.44140625, "loss_num": 0.0517578125, "loss_xval": 0.259765625, "num_input_tokens_seen": 10379180, "step": 115 }, { "epoch": 0.5333333333333333, "grad_norm": 5.20934661237588, "learning_rate": 5e-06, "loss": 0.3062, "num_input_tokens_seen": 10468788, "step": 116 }, { "epoch": 0.5333333333333333, "loss": 0.3348138928413391, "loss_ce": 0.0026605918537825346, "loss_iou": 0.40625, "loss_num": 0.06640625, "loss_xval": 0.33203125, "num_input_tokens_seen": 10468788, "step": 116 }, { "epoch": 0.5379310344827586, "grad_norm": 102.83862228180482, "learning_rate": 5e-06, "loss": 0.2783, "num_input_tokens_seen": 10559244, "step": 117 }, { "epoch": 0.5379310344827586, "loss": 0.2607054114341736, "loss_ce": 0.005578459706157446, "loss_iou": 0.453125, "loss_num": 0.051025390625, "loss_xval": 0.255859375, "num_input_tokens_seen": 10559244, "step": 117 }, { "epoch": 0.542528735632184, "grad_norm": 11.560087419415964, "learning_rate": 5e-06, "loss": 0.2611, "num_input_tokens_seen": 10649640, "step": 118 }, { "epoch": 0.542528735632184, "loss": 0.2891330122947693, "loss_ce": 0.017282411456108093, "loss_iou": 0.458984375, "loss_num": 0.054443359375, "loss_xval": 0.271484375, "num_input_tokens_seen": 10649640, "step": 118 }, { "epoch": 0.5471264367816092, "grad_norm": 6.8172804181644775, "learning_rate": 5e-06, "loss": 0.281, "num_input_tokens_seen": 10740080, "step": 119 }, { "epoch": 0.5471264367816092, "loss": 0.2734673321247101, "loss_ce": 0.002349165268242359, "loss_iou": 0.51171875, "loss_num": 0.05419921875, "loss_xval": 0.271484375, "num_input_tokens_seen": 10740080, "step": 119 }, { "epoch": 0.5517241379310345, "grad_norm": 15.572102183682414, "learning_rate": 5e-06, "loss": 0.3282, "num_input_tokens_seen": 10830516, "step": 120 }, { "epoch": 0.5517241379310345, "loss": 0.37165123224258423, "loss_ce": 0.0017782035283744335, "loss_iou": 0.478515625, "loss_num": 0.07373046875, "loss_xval": 0.369140625, "num_input_tokens_seen": 10830516, "step": 120 }, { "epoch": 0.5563218390804597, "grad_norm": 9.896381717450371, "learning_rate": 5e-06, "loss": 0.2514, "num_input_tokens_seen": 10920860, "step": 121 }, { "epoch": 0.5563218390804597, "loss": 0.24367234110832214, "loss_ce": 0.0051469625905156136, "loss_iou": 0.4765625, "loss_num": 0.047607421875, "loss_xval": 0.23828125, "num_input_tokens_seen": 10920860, "step": 121 }, { "epoch": 0.5609195402298851, "grad_norm": 8.57733919240062, "learning_rate": 5e-06, "loss": 0.2907, "num_input_tokens_seen": 11011300, "step": 122 }, { "epoch": 0.5609195402298851, "loss": 0.30107301473617554, "loss_ce": 0.005418718792498112, "loss_iou": 0.51953125, "loss_num": 0.05908203125, "loss_xval": 0.294921875, "num_input_tokens_seen": 11011300, "step": 122 }, { "epoch": 0.5655172413793104, "grad_norm": 19.20663757306324, "learning_rate": 5e-06, "loss": 0.208, "num_input_tokens_seen": 11101728, "step": 123 }, { "epoch": 0.5655172413793104, "loss": 0.1830858439207077, "loss_ce": 0.006968907080590725, "loss_iou": 0.515625, "loss_num": 0.03515625, "loss_xval": 0.17578125, "num_input_tokens_seen": 11101728, "step": 123 }, { "epoch": 0.5701149425287356, "grad_norm": 15.412744082634788, "learning_rate": 5e-06, "loss": 0.3385, "num_input_tokens_seen": 11192036, "step": 124 }, { "epoch": 0.5701149425287356, "loss": 0.36701270937919617, "loss_ce": 0.0038535220082849264, "loss_iou": 0.435546875, "loss_num": 0.07275390625, "loss_xval": 0.36328125, "num_input_tokens_seen": 11192036, "step": 124 }, { "epoch": 0.5747126436781609, "grad_norm": 56.494382183302136, "learning_rate": 5e-06, "loss": 0.3616, "num_input_tokens_seen": 11282432, "step": 125 }, { "epoch": 0.5747126436781609, "loss": 0.4529553949832916, "loss_ce": 0.010572599247097969, "loss_iou": 0.349609375, "loss_num": 0.08837890625, "loss_xval": 0.44140625, "num_input_tokens_seen": 11282432, "step": 125 }, { "epoch": 0.5793103448275863, "grad_norm": 10.883196608124821, "learning_rate": 5e-06, "loss": 0.2749, "num_input_tokens_seen": 11372852, "step": 126 }, { "epoch": 0.5793103448275863, "loss": 0.21984298527240753, "loss_ce": 0.0012150609400123358, "loss_iou": 0.421875, "loss_num": 0.043701171875, "loss_xval": 0.21875, "num_input_tokens_seen": 11372852, "step": 126 }, { "epoch": 0.5839080459770115, "grad_norm": 13.409455091165006, "learning_rate": 5e-06, "loss": 0.2527, "num_input_tokens_seen": 11463312, "step": 127 }, { "epoch": 0.5839080459770115, "loss": 0.21884769201278687, "loss_ce": 0.008154332637786865, "loss_iou": 0.453125, "loss_num": 0.042236328125, "loss_xval": 0.2109375, "num_input_tokens_seen": 11463312, "step": 127 }, { "epoch": 0.5885057471264368, "grad_norm": 66.84081956875737, "learning_rate": 5e-06, "loss": 0.3699, "num_input_tokens_seen": 11553556, "step": 128 }, { "epoch": 0.5885057471264368, "loss": 0.3832904100418091, "loss_ce": 0.004384158179163933, "loss_iou": 0.451171875, "loss_num": 0.07568359375, "loss_xval": 0.37890625, "num_input_tokens_seen": 11553556, "step": 128 }, { "epoch": 0.593103448275862, "grad_norm": 12.161700433268052, "learning_rate": 5e-06, "loss": 0.3168, "num_input_tokens_seen": 11643936, "step": 129 }, { "epoch": 0.593103448275862, "loss": 0.29100292921066284, "loss_ce": 0.004259749781340361, "loss_iou": 0.419921875, "loss_num": 0.057373046875, "loss_xval": 0.287109375, "num_input_tokens_seen": 11643936, "step": 129 }, { "epoch": 0.5977011494252874, "grad_norm": 29.82460546961223, "learning_rate": 5e-06, "loss": 0.4414, "num_input_tokens_seen": 11734172, "step": 130 }, { "epoch": 0.5977011494252874, "loss": 0.43849754333496094, "loss_ce": 0.007955562323331833, "loss_iou": 0.302734375, "loss_num": 0.0859375, "loss_xval": 0.4296875, "num_input_tokens_seen": 11734172, "step": 130 }, { "epoch": 0.6022988505747127, "grad_norm": 9.962930837362645, "learning_rate": 5e-06, "loss": 0.3169, "num_input_tokens_seen": 11824480, "step": 131 }, { "epoch": 0.6022988505747127, "loss": 0.28843504190444946, "loss_ce": 0.01182370726019144, "loss_iou": 0.384765625, "loss_num": 0.05517578125, "loss_xval": 0.27734375, "num_input_tokens_seen": 11824480, "step": 131 }, { "epoch": 0.6068965517241379, "grad_norm": 19.583672279804937, "learning_rate": 5e-06, "loss": 0.33, "num_input_tokens_seen": 11914836, "step": 132 }, { "epoch": 0.6068965517241379, "loss": 0.43384528160095215, "loss_ce": 0.0053784484043717384, "loss_iou": 0.43359375, "loss_num": 0.08544921875, "loss_xval": 0.427734375, "num_input_tokens_seen": 11914836, "step": 132 }, { "epoch": 0.6114942528735632, "grad_norm": 25.89247440808492, "learning_rate": 5e-06, "loss": 0.2403, "num_input_tokens_seen": 12005236, "step": 133 }, { "epoch": 0.6114942528735632, "loss": 0.23137599229812622, "loss_ce": 0.003348652273416519, "loss_iou": 0.427734375, "loss_num": 0.045654296875, "loss_xval": 0.228515625, "num_input_tokens_seen": 12005236, "step": 133 }, { "epoch": 0.6160919540229886, "grad_norm": 11.009266151622725, "learning_rate": 5e-06, "loss": 0.3161, "num_input_tokens_seen": 12095572, "step": 134 }, { "epoch": 0.6160919540229886, "loss": 0.32933273911476135, "loss_ce": 0.003038788214325905, "loss_iou": 0.392578125, "loss_num": 0.0654296875, "loss_xval": 0.326171875, "num_input_tokens_seen": 12095572, "step": 134 }, { "epoch": 0.6206896551724138, "grad_norm": 4.89493638128289, "learning_rate": 5e-06, "loss": 0.2481, "num_input_tokens_seen": 12185852, "step": 135 }, { "epoch": 0.6206896551724138, "loss": 0.17523905634880066, "loss_ce": 0.0022654307540506124, "loss_iou": 0.46484375, "loss_num": 0.03466796875, "loss_xval": 0.1728515625, "num_input_tokens_seen": 12185852, "step": 135 }, { "epoch": 0.6252873563218391, "grad_norm": 9.499104731899472, "learning_rate": 5e-06, "loss": 0.2763, "num_input_tokens_seen": 12276216, "step": 136 }, { "epoch": 0.6252873563218391, "loss": 0.24068671464920044, "loss_ce": 0.010278991423547268, "loss_iou": 0.45703125, "loss_num": 0.046142578125, "loss_xval": 0.23046875, "num_input_tokens_seen": 12276216, "step": 136 }, { "epoch": 0.6298850574712643, "grad_norm": 16.393863320752942, "learning_rate": 5e-06, "loss": 0.3345, "num_input_tokens_seen": 12366584, "step": 137 }, { "epoch": 0.6298850574712643, "loss": 0.3390456438064575, "loss_ce": 0.004023653920739889, "loss_iou": 0.4140625, "loss_num": 0.06689453125, "loss_xval": 0.3359375, "num_input_tokens_seen": 12366584, "step": 137 }, { "epoch": 0.6344827586206897, "grad_norm": 12.427779653501139, "learning_rate": 5e-06, "loss": 0.2663, "num_input_tokens_seen": 12457056, "step": 138 }, { "epoch": 0.6344827586206897, "loss": 0.2509799599647522, "loss_ce": 0.004947238601744175, "loss_iou": 0.53515625, "loss_num": 0.049072265625, "loss_xval": 0.24609375, "num_input_tokens_seen": 12457056, "step": 138 }, { "epoch": 0.639080459770115, "grad_norm": 4.879940316834736, "learning_rate": 5e-06, "loss": 0.291, "num_input_tokens_seen": 12547580, "step": 139 }, { "epoch": 0.639080459770115, "loss": 0.28111234307289124, "loss_ce": 0.0024258154444396496, "loss_iou": 0.44921875, "loss_num": 0.0556640625, "loss_xval": 0.279296875, "num_input_tokens_seen": 12547580, "step": 139 }, { "epoch": 0.6436781609195402, "grad_norm": 19.612477583005116, "learning_rate": 5e-06, "loss": 0.2142, "num_input_tokens_seen": 12637988, "step": 140 }, { "epoch": 0.6436781609195402, "loss": 0.21896348893642426, "loss_ce": 0.0008238445734605193, "loss_iou": 0.408203125, "loss_num": 0.043701171875, "loss_xval": 0.2177734375, "num_input_tokens_seen": 12637988, "step": 140 }, { "epoch": 0.6482758620689655, "grad_norm": 8.925670199142369, "learning_rate": 5e-06, "loss": 0.3375, "num_input_tokens_seen": 12727624, "step": 141 }, { "epoch": 0.6482758620689655, "loss": 0.3529576063156128, "loss_ce": 0.010306272655725479, "loss_iou": 0.32421875, "loss_num": 0.068359375, "loss_xval": 0.341796875, "num_input_tokens_seen": 12727624, "step": 141 }, { "epoch": 0.6528735632183909, "grad_norm": 12.46478278407722, "learning_rate": 5e-06, "loss": 0.2862, "num_input_tokens_seen": 12818080, "step": 142 }, { "epoch": 0.6528735632183909, "loss": 0.2979605793952942, "loss_ce": 0.011522573418915272, "loss_iou": 0.41015625, "loss_num": 0.05712890625, "loss_xval": 0.287109375, "num_input_tokens_seen": 12818080, "step": 142 }, { "epoch": 0.6574712643678161, "grad_norm": 6.157855338602633, "learning_rate": 5e-06, "loss": 0.2823, "num_input_tokens_seen": 12908420, "step": 143 }, { "epoch": 0.6574712643678161, "loss": 0.2000827193260193, "loss_ce": 0.015390344895422459, "loss_iou": 0.5, "loss_num": 0.036865234375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 12908420, "step": 143 }, { "epoch": 0.6620689655172414, "grad_norm": 7.342547102611114, "learning_rate": 5e-06, "loss": 0.3044, "num_input_tokens_seen": 12998800, "step": 144 }, { "epoch": 0.6620689655172414, "loss": 0.3009682297706604, "loss_ce": 0.0017739273607730865, "loss_iou": 0.470703125, "loss_num": 0.059814453125, "loss_xval": 0.298828125, "num_input_tokens_seen": 12998800, "step": 144 }, { "epoch": 0.6666666666666666, "grad_norm": 4.580788750753373, "learning_rate": 5e-06, "loss": 0.251, "num_input_tokens_seen": 13088988, "step": 145 }, { "epoch": 0.6666666666666666, "loss": 0.24675819277763367, "loss_ce": 0.0006034071557223797, "loss_iou": 0.42578125, "loss_num": 0.04931640625, "loss_xval": 0.24609375, "num_input_tokens_seen": 13088988, "step": 145 }, { "epoch": 0.671264367816092, "grad_norm": 36.43993056681854, "learning_rate": 5e-06, "loss": 0.3002, "num_input_tokens_seen": 13179296, "step": 146 }, { "epoch": 0.671264367816092, "loss": 0.29325583577156067, "loss_ce": 0.003033676417544484, "loss_iou": 0.400390625, "loss_num": 0.057861328125, "loss_xval": 0.291015625, "num_input_tokens_seen": 13179296, "step": 146 }, { "epoch": 0.6758620689655173, "grad_norm": 19.476664971027393, "learning_rate": 5e-06, "loss": 0.301, "num_input_tokens_seen": 13269688, "step": 147 }, { "epoch": 0.6758620689655173, "loss": 0.27462151646614075, "loss_ce": 0.014977962709963322, "loss_iou": 0.390625, "loss_num": 0.052001953125, "loss_xval": 0.259765625, "num_input_tokens_seen": 13269688, "step": 147 }, { "epoch": 0.6804597701149425, "grad_norm": 6.680497018215309, "learning_rate": 5e-06, "loss": 0.2906, "num_input_tokens_seen": 13360096, "step": 148 }, { "epoch": 0.6804597701149425, "loss": 0.31177955865859985, "loss_ce": 0.007702387869358063, "loss_iou": 0.3515625, "loss_num": 0.060791015625, "loss_xval": 0.3046875, "num_input_tokens_seen": 13360096, "step": 148 }, { "epoch": 0.6850574712643678, "grad_norm": 42.907574919392374, "learning_rate": 5e-06, "loss": 0.2479, "num_input_tokens_seen": 13450548, "step": 149 }, { "epoch": 0.6850574712643678, "loss": 0.23805175721645355, "loss_ce": 0.0016015599248930812, "loss_iou": 0.4765625, "loss_num": 0.04736328125, "loss_xval": 0.236328125, "num_input_tokens_seen": 13450548, "step": 149 }, { "epoch": 0.6896551724137931, "grad_norm": 26.91726277344863, "learning_rate": 5e-06, "loss": 0.2976, "num_input_tokens_seen": 13540784, "step": 150 }, { "epoch": 0.6896551724137931, "loss": 0.315399706363678, "loss_ce": 0.0002141495351679623, "loss_iou": 0.416015625, "loss_num": 0.06298828125, "loss_xval": 0.314453125, "num_input_tokens_seen": 13540784, "step": 150 }, { "epoch": 0.6942528735632184, "grad_norm": 14.145343351845199, "learning_rate": 5e-06, "loss": 0.3234, "num_input_tokens_seen": 13630388, "step": 151 }, { "epoch": 0.6942528735632184, "loss": 0.29768356680870056, "loss_ce": 0.0025175553746521473, "loss_iou": 0.376953125, "loss_num": 0.05908203125, "loss_xval": 0.294921875, "num_input_tokens_seen": 13630388, "step": 151 }, { "epoch": 0.6988505747126437, "grad_norm": 6.305485293216934, "learning_rate": 5e-06, "loss": 0.34, "num_input_tokens_seen": 13720696, "step": 152 }, { "epoch": 0.6988505747126437, "loss": 0.23166516423225403, "loss_ce": 0.00021984206978231668, "loss_iou": 0.375, "loss_num": 0.04638671875, "loss_xval": 0.2314453125, "num_input_tokens_seen": 13720696, "step": 152 }, { "epoch": 0.7034482758620689, "grad_norm": 7.601656082559478, "learning_rate": 5e-06, "loss": 0.2815, "num_input_tokens_seen": 13811092, "step": 153 }, { "epoch": 0.7034482758620689, "loss": 0.32397621870040894, "loss_ce": 0.010408090427517891, "loss_iou": 0.5078125, "loss_num": 0.0625, "loss_xval": 0.314453125, "num_input_tokens_seen": 13811092, "step": 153 }, { "epoch": 0.7080459770114943, "grad_norm": 9.200935279622126, "learning_rate": 5e-06, "loss": 0.2512, "num_input_tokens_seen": 13900680, "step": 154 }, { "epoch": 0.7080459770114943, "loss": 0.1441739946603775, "loss_ce": 0.0004972339374944568, "loss_iou": 0.490234375, "loss_num": 0.0286865234375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 13900680, "step": 154 }, { "epoch": 0.7126436781609196, "grad_norm": 15.640269655312782, "learning_rate": 5e-06, "loss": 0.2777, "num_input_tokens_seen": 13991164, "step": 155 }, { "epoch": 0.7126436781609196, "loss": 0.26036322116851807, "loss_ce": 0.004870051983743906, "loss_iou": 0.490234375, "loss_num": 0.051025390625, "loss_xval": 0.255859375, "num_input_tokens_seen": 13991164, "step": 155 }, { "epoch": 0.7172413793103448, "grad_norm": 8.162614855844033, "learning_rate": 5e-06, "loss": 0.2297, "num_input_tokens_seen": 14081500, "step": 156 }, { "epoch": 0.7172413793103448, "loss": 0.2218717634677887, "loss_ce": 0.000680366822052747, "loss_iou": 0.423828125, "loss_num": 0.044189453125, "loss_xval": 0.220703125, "num_input_tokens_seen": 14081500, "step": 156 }, { "epoch": 0.7218390804597701, "grad_norm": 3.320193610069822, "learning_rate": 5e-06, "loss": 0.2703, "num_input_tokens_seen": 14171952, "step": 157 }, { "epoch": 0.7218390804597701, "loss": 0.2780265808105469, "loss_ce": 0.00043868436478078365, "loss_iou": 0.412109375, "loss_num": 0.055419921875, "loss_xval": 0.27734375, "num_input_tokens_seen": 14171952, "step": 157 }, { "epoch": 0.7264367816091954, "grad_norm": 35.54312110471689, "learning_rate": 5e-06, "loss": 0.2552, "num_input_tokens_seen": 14262436, "step": 158 }, { "epoch": 0.7264367816091954, "loss": 0.2401730865240097, "loss_ce": 0.0017697698203846812, "loss_iou": 0.421875, "loss_num": 0.047607421875, "loss_xval": 0.23828125, "num_input_tokens_seen": 14262436, "step": 158 }, { "epoch": 0.7310344827586207, "grad_norm": 7.06971142407152, "learning_rate": 5e-06, "loss": 0.2596, "num_input_tokens_seen": 14352756, "step": 159 }, { "epoch": 0.7310344827586207, "loss": 0.20814064145088196, "loss_ce": 0.00019387324573472142, "loss_iou": 0.392578125, "loss_num": 0.04150390625, "loss_xval": 0.2080078125, "num_input_tokens_seen": 14352756, "step": 159 }, { "epoch": 0.735632183908046, "grad_norm": 3.0564053075210946, "learning_rate": 5e-06, "loss": 0.2989, "num_input_tokens_seen": 14443136, "step": 160 }, { "epoch": 0.735632183908046, "loss": 0.3017638325691223, "loss_ce": 0.007879569195210934, "loss_iou": 0.47265625, "loss_num": 0.058837890625, "loss_xval": 0.29296875, "num_input_tokens_seen": 14443136, "step": 160 }, { "epoch": 0.7402298850574712, "grad_norm": 12.412697167878472, "learning_rate": 5e-06, "loss": 0.2787, "num_input_tokens_seen": 14533508, "step": 161 }, { "epoch": 0.7402298850574712, "loss": 0.2811751663684845, "loss_ce": 0.006394892930984497, "loss_iou": 0.322265625, "loss_num": 0.054931640625, "loss_xval": 0.275390625, "num_input_tokens_seen": 14533508, "step": 161 }, { "epoch": 0.7448275862068966, "grad_norm": 7.776759221836541, "learning_rate": 5e-06, "loss": 0.2709, "num_input_tokens_seen": 14623944, "step": 162 }, { "epoch": 0.7448275862068966, "loss": 0.27529919147491455, "loss_ce": 0.004486183635890484, "loss_iou": 0.46484375, "loss_num": 0.05419921875, "loss_xval": 0.271484375, "num_input_tokens_seen": 14623944, "step": 162 }, { "epoch": 0.7494252873563219, "grad_norm": 4.690047668781669, "learning_rate": 5e-06, "loss": 0.2994, "num_input_tokens_seen": 14714176, "step": 163 }, { "epoch": 0.7494252873563219, "loss": 0.3100579082965851, "loss_ce": 0.01446463167667389, "loss_iou": 0.41796875, "loss_num": 0.05908203125, "loss_xval": 0.294921875, "num_input_tokens_seen": 14714176, "step": 163 }, { "epoch": 0.7540229885057471, "grad_norm": 14.854640525604244, "learning_rate": 5e-06, "loss": 0.3539, "num_input_tokens_seen": 14804540, "step": 164 }, { "epoch": 0.7540229885057471, "loss": 0.44931861758232117, "loss_ce": 0.01682349294424057, "loss_iou": 0.390625, "loss_num": 0.08642578125, "loss_xval": 0.431640625, "num_input_tokens_seen": 14804540, "step": 164 }, { "epoch": 0.7586206896551724, "grad_norm": 17.524154236618525, "learning_rate": 5e-06, "loss": 0.3198, "num_input_tokens_seen": 14895024, "step": 165 }, { "epoch": 0.7586206896551724, "loss": 0.27517974376678467, "loss_ce": 0.0019253486534580588, "loss_iou": 0.37109375, "loss_num": 0.0546875, "loss_xval": 0.2734375, "num_input_tokens_seen": 14895024, "step": 165 }, { "epoch": 0.7632183908045977, "grad_norm": 14.028282838728552, "learning_rate": 5e-06, "loss": 0.2588, "num_input_tokens_seen": 14985468, "step": 166 }, { "epoch": 0.7632183908045977, "loss": 0.26632797718048096, "loss_ce": 0.021088741719722748, "loss_iou": 0.37890625, "loss_num": 0.049072265625, "loss_xval": 0.2451171875, "num_input_tokens_seen": 14985468, "step": 166 }, { "epoch": 0.767816091954023, "grad_norm": 13.184310261988879, "learning_rate": 5e-06, "loss": 0.3099, "num_input_tokens_seen": 15075900, "step": 167 }, { "epoch": 0.767816091954023, "loss": 0.2652779519557953, "loss_ce": 0.001728140632621944, "loss_iou": 0.4375, "loss_num": 0.052734375, "loss_xval": 0.263671875, "num_input_tokens_seen": 15075900, "step": 167 }, { "epoch": 0.7724137931034483, "grad_norm": 14.801504481488628, "learning_rate": 5e-06, "loss": 0.2864, "num_input_tokens_seen": 15166300, "step": 168 }, { "epoch": 0.7724137931034483, "loss": 0.2605416774749756, "loss_ce": 0.012616850435733795, "loss_iou": 0.408203125, "loss_num": 0.049560546875, "loss_xval": 0.248046875, "num_input_tokens_seen": 15166300, "step": 168 }, { "epoch": 0.7770114942528735, "grad_norm": 7.804434978620636, "learning_rate": 5e-06, "loss": 0.2893, "num_input_tokens_seen": 15256556, "step": 169 }, { "epoch": 0.7770114942528735, "loss": 0.17921343445777893, "loss_ce": 0.003676328808069229, "loss_iou": 0.5390625, "loss_num": 0.03515625, "loss_xval": 0.17578125, "num_input_tokens_seen": 15256556, "step": 169 }, { "epoch": 0.7816091954022989, "grad_norm": 7.905968772421947, "learning_rate": 5e-06, "loss": 0.2628, "num_input_tokens_seen": 15346920, "step": 170 }, { "epoch": 0.7816091954022989, "loss": 0.26719164848327637, "loss_ce": 0.0010783508187159896, "loss_iou": 0.453125, "loss_num": 0.05322265625, "loss_xval": 0.265625, "num_input_tokens_seen": 15346920, "step": 170 }, { "epoch": 0.7862068965517242, "grad_norm": 5.672105961071837, "learning_rate": 5e-06, "loss": 0.2997, "num_input_tokens_seen": 15437236, "step": 171 }, { "epoch": 0.7862068965517242, "loss": 0.30997809767723083, "loss_ce": 0.0011402069358155131, "loss_iou": 0.462890625, "loss_num": 0.061767578125, "loss_xval": 0.30859375, "num_input_tokens_seen": 15437236, "step": 171 }, { "epoch": 0.7908045977011494, "grad_norm": 31.647759717380147, "learning_rate": 5e-06, "loss": 0.3455, "num_input_tokens_seen": 15527680, "step": 172 }, { "epoch": 0.7908045977011494, "loss": 0.4378126859664917, "loss_ce": 0.006202578078955412, "loss_iou": 0.408203125, "loss_num": 0.08642578125, "loss_xval": 0.431640625, "num_input_tokens_seen": 15527680, "step": 172 }, { "epoch": 0.7954022988505747, "grad_norm": 9.751745462208614, "learning_rate": 5e-06, "loss": 0.3265, "num_input_tokens_seen": 15618124, "step": 173 }, { "epoch": 0.7954022988505747, "loss": 0.4681752622127533, "loss_ce": 0.1417897641658783, "loss_iou": 0.482421875, "loss_num": 0.0654296875, "loss_xval": 0.326171875, "num_input_tokens_seen": 15618124, "step": 173 }, { "epoch": 0.8, "grad_norm": 11.98242525805576, "learning_rate": 5e-06, "loss": 0.303, "num_input_tokens_seen": 15708484, "step": 174 }, { "epoch": 0.8, "loss": 0.30590367317199707, "loss_ce": 0.0006058230064809322, "loss_iou": 0.427734375, "loss_num": 0.06103515625, "loss_xval": 0.3046875, "num_input_tokens_seen": 15708484, "step": 174 }, { "epoch": 0.8045977011494253, "grad_norm": 10.897026680920138, "learning_rate": 5e-06, "loss": 0.2461, "num_input_tokens_seen": 15798884, "step": 175 }, { "epoch": 0.8045977011494253, "loss": 0.20932422578334808, "loss_ce": 0.0005229614907875657, "loss_iou": 0.515625, "loss_num": 0.041748046875, "loss_xval": 0.208984375, "num_input_tokens_seen": 15798884, "step": 175 }, { "epoch": 0.8091954022988506, "grad_norm": 8.597962137572907, "learning_rate": 5e-06, "loss": 0.2656, "num_input_tokens_seen": 15889316, "step": 176 }, { "epoch": 0.8091954022988506, "loss": 0.2625455856323242, "loss_ce": 0.0009489042568020523, "loss_iou": 0.453125, "loss_num": 0.05224609375, "loss_xval": 0.26171875, "num_input_tokens_seen": 15889316, "step": 176 }, { "epoch": 0.8137931034482758, "grad_norm": 11.519278966897533, "learning_rate": 5e-06, "loss": 0.2704, "num_input_tokens_seen": 15979592, "step": 177 }, { "epoch": 0.8137931034482758, "loss": 0.2799646854400635, "loss_ce": 0.000789885874837637, "loss_iou": 0.50390625, "loss_num": 0.055908203125, "loss_xval": 0.279296875, "num_input_tokens_seen": 15979592, "step": 177 }, { "epoch": 0.8183908045977012, "grad_norm": 53.5897696229877, "learning_rate": 5e-06, "loss": 0.3061, "num_input_tokens_seen": 16066948, "step": 178 }, { "epoch": 0.8183908045977012, "loss": 0.3006019592285156, "loss_ce": 0.00037002595490776, "loss_iou": 0.447265625, "loss_num": 0.06005859375, "loss_xval": 0.30078125, "num_input_tokens_seen": 16066948, "step": 178 }, { "epoch": 0.8229885057471265, "grad_norm": 8.142699994347275, "learning_rate": 5e-06, "loss": 0.2444, "num_input_tokens_seen": 16157256, "step": 179 }, { "epoch": 0.8229885057471265, "loss": 0.21453739702701569, "loss_ce": 0.00024295142793562263, "loss_iou": 0.484375, "loss_num": 0.042724609375, "loss_xval": 0.2138671875, "num_input_tokens_seen": 16157256, "step": 179 }, { "epoch": 0.8275862068965517, "grad_norm": 8.723664846573774, "learning_rate": 5e-06, "loss": 0.2936, "num_input_tokens_seen": 16247552, "step": 180 }, { "epoch": 0.8275862068965517, "loss": 0.30998989939689636, "loss_ce": 0.008781395852565765, "loss_iou": 0.44140625, "loss_num": 0.060302734375, "loss_xval": 0.30078125, "num_input_tokens_seen": 16247552, "step": 180 }, { "epoch": 0.832183908045977, "grad_norm": 31.640092388654665, "learning_rate": 5e-06, "loss": 0.2839, "num_input_tokens_seen": 16337132, "step": 181 }, { "epoch": 0.832183908045977, "loss": 0.2720521092414856, "loss_ce": 0.006427087355405092, "loss_iou": 0.3671875, "loss_num": 0.05322265625, "loss_xval": 0.265625, "num_input_tokens_seen": 16337132, "step": 181 }, { "epoch": 0.8367816091954023, "grad_norm": 15.726684290210118, "learning_rate": 5e-06, "loss": 0.2782, "num_input_tokens_seen": 16427532, "step": 182 }, { "epoch": 0.8367816091954023, "loss": 0.22582070529460907, "loss_ce": 0.0006009868229739368, "loss_iou": 0.400390625, "loss_num": 0.044921875, "loss_xval": 0.2255859375, "num_input_tokens_seen": 16427532, "step": 182 }, { "epoch": 0.8413793103448276, "grad_norm": 9.413293029313524, "learning_rate": 5e-06, "loss": 0.2648, "num_input_tokens_seen": 16517996, "step": 183 }, { "epoch": 0.8413793103448276, "loss": 0.2550521492958069, "loss_ce": 0.0031600736547261477, "loss_iou": 0.48046875, "loss_num": 0.05029296875, "loss_xval": 0.251953125, "num_input_tokens_seen": 16517996, "step": 183 }, { "epoch": 0.8459770114942529, "grad_norm": 17.034560829091234, "learning_rate": 5e-06, "loss": 0.2503, "num_input_tokens_seen": 16608392, "step": 184 }, { "epoch": 0.8459770114942529, "loss": 0.24292418360710144, "loss_ce": 0.006596065126359463, "loss_iou": 0.46875, "loss_num": 0.047119140625, "loss_xval": 0.236328125, "num_input_tokens_seen": 16608392, "step": 184 }, { "epoch": 0.8505747126436781, "grad_norm": 24.671310704425558, "learning_rate": 5e-06, "loss": 0.2523, "num_input_tokens_seen": 16698852, "step": 185 }, { "epoch": 0.8505747126436781, "loss": 0.2351730614900589, "loss_ce": 0.002690147841349244, "loss_iou": 0.447265625, "loss_num": 0.04638671875, "loss_xval": 0.232421875, "num_input_tokens_seen": 16698852, "step": 185 }, { "epoch": 0.8551724137931035, "grad_norm": 8.258454597087393, "learning_rate": 5e-06, "loss": 0.2756, "num_input_tokens_seen": 16789340, "step": 186 }, { "epoch": 0.8551724137931035, "loss": 0.3019639849662781, "loss_ce": 0.01021592691540718, "loss_iou": 0.498046875, "loss_num": 0.058349609375, "loss_xval": 0.291015625, "num_input_tokens_seen": 16789340, "step": 186 }, { "epoch": 0.8597701149425288, "grad_norm": 4.472141198961523, "learning_rate": 5e-06, "loss": 0.1966, "num_input_tokens_seen": 16879736, "step": 187 }, { "epoch": 0.8597701149425288, "loss": 0.19520393013954163, "loss_ce": 0.003004225669428706, "loss_iou": 0.474609375, "loss_num": 0.038330078125, "loss_xval": 0.1923828125, "num_input_tokens_seen": 16879736, "step": 187 }, { "epoch": 0.864367816091954, "grad_norm": 4.244846244353357, "learning_rate": 5e-06, "loss": 0.2684, "num_input_tokens_seen": 16968620, "step": 188 }, { "epoch": 0.864367816091954, "loss": 0.29818519949913025, "loss_ce": 0.007596845738589764, "loss_iou": 0.365234375, "loss_num": 0.05810546875, "loss_xval": 0.291015625, "num_input_tokens_seen": 16968620, "step": 188 }, { "epoch": 0.8689655172413793, "grad_norm": 17.287226125768598, "learning_rate": 5e-06, "loss": 0.2103, "num_input_tokens_seen": 17058968, "step": 189 }, { "epoch": 0.8689655172413793, "loss": 0.23691387474536896, "loss_ce": 0.00015851360512897372, "loss_iou": 0.4765625, "loss_num": 0.04736328125, "loss_xval": 0.236328125, "num_input_tokens_seen": 17058968, "step": 189 }, { "epoch": 0.8735632183908046, "grad_norm": 11.788400904878815, "learning_rate": 5e-06, "loss": 0.2889, "num_input_tokens_seen": 17149248, "step": 190 }, { "epoch": 0.8735632183908046, "loss": 0.24897444248199463, "loss_ce": 0.00013410599785856903, "loss_iou": 0.4453125, "loss_num": 0.0498046875, "loss_xval": 0.2490234375, "num_input_tokens_seen": 17149248, "step": 190 }, { "epoch": 0.8781609195402299, "grad_norm": 13.617723461222461, "learning_rate": 5e-06, "loss": 0.2228, "num_input_tokens_seen": 17239628, "step": 191 }, { "epoch": 0.8781609195402299, "loss": 0.23315443098545074, "loss_ce": 0.005981584079563618, "loss_iou": 0.388671875, "loss_num": 0.04541015625, "loss_xval": 0.2275390625, "num_input_tokens_seen": 17239628, "step": 191 }, { "epoch": 0.8827586206896552, "grad_norm": 22.65066129559751, "learning_rate": 5e-06, "loss": 0.218, "num_input_tokens_seen": 17329208, "step": 192 }, { "epoch": 0.8827586206896552, "loss": 0.23583698272705078, "loss_ce": 0.004879951477050781, "loss_iou": 0.3984375, "loss_num": 0.046142578125, "loss_xval": 0.23046875, "num_input_tokens_seen": 17329208, "step": 192 }, { "epoch": 0.8873563218390804, "grad_norm": 21.965810935500244, "learning_rate": 5e-06, "loss": 0.2832, "num_input_tokens_seen": 17419616, "step": 193 }, { "epoch": 0.8873563218390804, "loss": 0.2855183780193329, "loss_ce": 0.004756668582558632, "loss_iou": 0.458984375, "loss_num": 0.05615234375, "loss_xval": 0.28125, "num_input_tokens_seen": 17419616, "step": 193 }, { "epoch": 0.8919540229885058, "grad_norm": 8.272351331856266, "learning_rate": 5e-06, "loss": 0.2343, "num_input_tokens_seen": 17507752, "step": 194 }, { "epoch": 0.8919540229885058, "loss": 0.2399004101753235, "loss_ce": 0.0027177799493074417, "loss_iou": 0.451171875, "loss_num": 0.04736328125, "loss_xval": 0.2373046875, "num_input_tokens_seen": 17507752, "step": 194 }, { "epoch": 0.896551724137931, "grad_norm": 5.04648132108404, "learning_rate": 5e-06, "loss": 0.247, "num_input_tokens_seen": 17595748, "step": 195 }, { "epoch": 0.896551724137931, "loss": 0.2049858421087265, "loss_ce": 0.02108692191541195, "loss_iou": 0.421875, "loss_num": 0.036865234375, "loss_xval": 0.18359375, "num_input_tokens_seen": 17595748, "step": 195 }, { "epoch": 0.9011494252873563, "grad_norm": 11.7279486833154, "learning_rate": 5e-06, "loss": 0.3365, "num_input_tokens_seen": 17686040, "step": 196 }, { "epoch": 0.9011494252873563, "loss": 0.37410539388656616, "loss_ce": 0.002889568218961358, "loss_iou": 0.375, "loss_num": 0.07421875, "loss_xval": 0.37109375, "num_input_tokens_seen": 17686040, "step": 196 }, { "epoch": 0.9057471264367816, "grad_norm": 9.565742677061259, "learning_rate": 5e-06, "loss": 0.293, "num_input_tokens_seen": 17776348, "step": 197 }, { "epoch": 0.9057471264367816, "loss": 0.3795655369758606, "loss_ce": 0.0005372293526306748, "loss_iou": 0.361328125, "loss_num": 0.07568359375, "loss_xval": 0.37890625, "num_input_tokens_seen": 17776348, "step": 197 }, { "epoch": 0.9103448275862069, "grad_norm": 5.3718887593573275, "learning_rate": 5e-06, "loss": 0.245, "num_input_tokens_seen": 17866780, "step": 198 }, { "epoch": 0.9103448275862069, "loss": 0.23624387383460999, "loss_ce": 0.004615448880940676, "loss_iou": 0.515625, "loss_num": 0.04638671875, "loss_xval": 0.2314453125, "num_input_tokens_seen": 17866780, "step": 198 }, { "epoch": 0.9149425287356322, "grad_norm": 10.445265942368462, "learning_rate": 5e-06, "loss": 0.3073, "num_input_tokens_seen": 17957092, "step": 199 }, { "epoch": 0.9149425287356322, "loss": 0.4080125093460083, "loss_ce": 0.0026475111953914165, "loss_iou": 0.3984375, "loss_num": 0.0810546875, "loss_xval": 0.40625, "num_input_tokens_seen": 17957092, "step": 199 }, { "epoch": 0.9195402298850575, "grad_norm": 13.36367583066565, "learning_rate": 5e-06, "loss": 0.2221, "num_input_tokens_seen": 18047500, "step": 200 }, { "epoch": 0.9195402298850575, "loss": 0.18272346258163452, "loss_ce": 0.0006555922445841134, "loss_iou": 0.421875, "loss_num": 0.036376953125, "loss_xval": 0.181640625, "num_input_tokens_seen": 18047500, "step": 200 }, { "epoch": 0.9241379310344827, "grad_norm": 7.033010833292816, "learning_rate": 5e-06, "loss": 0.3052, "num_input_tokens_seen": 18137988, "step": 201 }, { "epoch": 0.9241379310344827, "loss": 0.37599673867225647, "loss_ce": 0.007222320418804884, "loss_iou": 0.39453125, "loss_num": 0.07373046875, "loss_xval": 0.369140625, "num_input_tokens_seen": 18137988, "step": 201 }, { "epoch": 0.9287356321839081, "grad_norm": 3.214939505030853, "learning_rate": 5e-06, "loss": 0.2099, "num_input_tokens_seen": 18228316, "step": 202 }, { "epoch": 0.9287356321839081, "loss": 0.2917129397392273, "loss_ce": 0.0024673594161868095, "loss_iou": 0.447265625, "loss_num": 0.057861328125, "loss_xval": 0.2890625, "num_input_tokens_seen": 18228316, "step": 202 }, { "epoch": 0.9333333333333333, "grad_norm": 47.44674206476791, "learning_rate": 5e-06, "loss": 0.2881, "num_input_tokens_seen": 18318712, "step": 203 }, { "epoch": 0.9333333333333333, "loss": 0.27268558740615845, "loss_ce": 0.0027881115674972534, "loss_iou": 0.369140625, "loss_num": 0.053955078125, "loss_xval": 0.26953125, "num_input_tokens_seen": 18318712, "step": 203 }, { "epoch": 0.9379310344827586, "grad_norm": 15.635747866161521, "learning_rate": 5e-06, "loss": 0.2178, "num_input_tokens_seen": 18409128, "step": 204 }, { "epoch": 0.9379310344827586, "loss": 0.21751438081264496, "loss_ce": 0.009933818131685257, "loss_iou": 0.3828125, "loss_num": 0.04150390625, "loss_xval": 0.2080078125, "num_input_tokens_seen": 18409128, "step": 204 }, { "epoch": 0.9425287356321839, "grad_norm": 14.842085686479773, "learning_rate": 5e-06, "loss": 0.2294, "num_input_tokens_seen": 18499512, "step": 205 }, { "epoch": 0.9425287356321839, "loss": 0.2684970498085022, "loss_ce": 0.005435529164969921, "loss_iou": 0.41015625, "loss_num": 0.052490234375, "loss_xval": 0.263671875, "num_input_tokens_seen": 18499512, "step": 205 }, { "epoch": 0.9471264367816092, "grad_norm": 10.75647435799341, "learning_rate": 5e-06, "loss": 0.2631, "num_input_tokens_seen": 18589756, "step": 206 }, { "epoch": 0.9471264367816092, "loss": 0.3199033737182617, "loss_ce": 0.0008725962834432721, "loss_iou": 0.4296875, "loss_num": 0.06396484375, "loss_xval": 0.318359375, "num_input_tokens_seen": 18589756, "step": 206 }, { "epoch": 0.9517241379310345, "grad_norm": 10.014451793263762, "learning_rate": 5e-06, "loss": 0.3506, "num_input_tokens_seen": 18680168, "step": 207 }, { "epoch": 0.9517241379310345, "loss": 0.41780880093574524, "loss_ce": 0.004295613616704941, "loss_iou": 0.5078125, "loss_num": 0.0830078125, "loss_xval": 0.4140625, "num_input_tokens_seen": 18680168, "step": 207 }, { "epoch": 0.9563218390804598, "grad_norm": 81.22703034283288, "learning_rate": 5e-06, "loss": 0.2683, "num_input_tokens_seen": 18768984, "step": 208 }, { "epoch": 0.9563218390804598, "loss": 0.22123399376869202, "loss_ce": 0.00040880130836740136, "loss_iou": 0.451171875, "loss_num": 0.044189453125, "loss_xval": 0.220703125, "num_input_tokens_seen": 18768984, "step": 208 }, { "epoch": 0.960919540229885, "grad_norm": 18.16487091628093, "learning_rate": 5e-06, "loss": 0.2017, "num_input_tokens_seen": 18859384, "step": 209 }, { "epoch": 0.960919540229885, "loss": 0.1849987506866455, "loss_ce": 0.00650144275277853, "loss_iou": 0.419921875, "loss_num": 0.03564453125, "loss_xval": 0.1787109375, "num_input_tokens_seen": 18859384, "step": 209 }, { "epoch": 0.9655172413793104, "grad_norm": 6.997216906550849, "learning_rate": 5e-06, "loss": 0.3129, "num_input_tokens_seen": 18949688, "step": 210 }, { "epoch": 0.9655172413793104, "loss": 0.38702672719955444, "loss_ce": 0.0003079898888245225, "loss_iou": 0.49609375, "loss_num": 0.0771484375, "loss_xval": 0.38671875, "num_input_tokens_seen": 18949688, "step": 210 }, { "epoch": 0.9701149425287356, "grad_norm": 7.51470392722959, "learning_rate": 5e-06, "loss": 0.2452, "num_input_tokens_seen": 19039948, "step": 211 }, { "epoch": 0.9701149425287356, "loss": 0.30642956495285034, "loss_ce": 0.0001551464811200276, "loss_iou": 0.408203125, "loss_num": 0.061279296875, "loss_xval": 0.306640625, "num_input_tokens_seen": 19039948, "step": 211 }, { "epoch": 0.9747126436781609, "grad_norm": 5.659981606083217, "learning_rate": 5e-06, "loss": 0.2912, "num_input_tokens_seen": 19130216, "step": 212 }, { "epoch": 0.9747126436781609, "loss": 0.28307363390922546, "loss_ce": 0.0021898397244513035, "loss_iou": 0.458984375, "loss_num": 0.05615234375, "loss_xval": 0.28125, "num_input_tokens_seen": 19130216, "step": 212 }, { "epoch": 0.9793103448275862, "grad_norm": 15.905977611839273, "learning_rate": 5e-06, "loss": 0.2863, "num_input_tokens_seen": 19220568, "step": 213 }, { "epoch": 0.9793103448275862, "loss": 0.39371466636657715, "loss_ce": 0.0007703077862970531, "loss_iou": 0.44140625, "loss_num": 0.07861328125, "loss_xval": 0.392578125, "num_input_tokens_seen": 19220568, "step": 213 }, { "epoch": 0.9839080459770115, "grad_norm": 6.117701022668606, "learning_rate": 5e-06, "loss": 0.2468, "num_input_tokens_seen": 19311004, "step": 214 }, { "epoch": 0.9839080459770115, "loss": 0.2368617057800293, "loss_ce": 0.001754299970343709, "loss_iou": 0.3984375, "loss_num": 0.046875, "loss_xval": 0.2353515625, "num_input_tokens_seen": 19311004, "step": 214 }, { "epoch": 0.9885057471264368, "grad_norm": 13.736398962240743, "learning_rate": 5e-06, "loss": 0.2513, "num_input_tokens_seen": 19401384, "step": 215 }, { "epoch": 0.9885057471264368, "loss": 0.24910268187522888, "loss_ce": 0.004153335001319647, "loss_iou": 0.44140625, "loss_num": 0.049072265625, "loss_xval": 0.2451171875, "num_input_tokens_seen": 19401384, "step": 215 }, { "epoch": 0.993103448275862, "grad_norm": 5.219663028608165, "learning_rate": 5e-06, "loss": 0.2163, "num_input_tokens_seen": 19491860, "step": 216 }, { "epoch": 0.993103448275862, "loss": 0.2344357669353485, "loss_ce": 0.0007016360759735107, "loss_iou": 0.486328125, "loss_num": 0.046875, "loss_xval": 0.2333984375, "num_input_tokens_seen": 19491860, "step": 216 }, { "epoch": 0.9977011494252873, "grad_norm": 9.37488702763018, "learning_rate": 5e-06, "loss": 0.1707, "num_input_tokens_seen": 19582284, "step": 217 }, { "epoch": 0.9977011494252873, "loss": 0.14838698506355286, "loss_ce": 0.00046829067287035286, "loss_iou": 0.51171875, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 19582284, "step": 217 }, { "epoch": 0.9977011494252873, "loss": 0.24481603503227234, "loss_ce": 0.001377313630655408, "loss_iou": 0.41015625, "loss_num": 0.048583984375, "loss_xval": 0.2431640625, "num_input_tokens_seen": 19627492, "step": 217 }, { "epoch": 1.0022988505747126, "grad_norm": 13.179380791300144, "learning_rate": 5e-06, "loss": 0.237, "num_input_tokens_seen": 19672672, "step": 218 }, { "epoch": 1.0022988505747126, "loss": 0.22918623685836792, "loss_ce": 0.0003043994656763971, "loss_iou": 0.4140625, "loss_num": 0.045654296875, "loss_xval": 0.228515625, "num_input_tokens_seen": 19672672, "step": 218 }, { "epoch": 1.006896551724138, "grad_norm": 11.575549644295782, "learning_rate": 5e-06, "loss": 0.1865, "num_input_tokens_seen": 19762928, "step": 219 }, { "epoch": 1.006896551724138, "loss": 0.20134694874286652, "loss_ce": 5.299979602568783e-05, "loss_iou": 0.52734375, "loss_num": 0.040283203125, "loss_xval": 0.201171875, "num_input_tokens_seen": 19762928, "step": 219 }, { "epoch": 1.0114942528735633, "grad_norm": 7.770217563711411, "learning_rate": 5e-06, "loss": 0.2716, "num_input_tokens_seen": 19853264, "step": 220 }, { "epoch": 1.0114942528735633, "loss": 0.23982341587543488, "loss_ce": 0.0010538852075114846, "loss_iou": 0.484375, "loss_num": 0.0478515625, "loss_xval": 0.23828125, "num_input_tokens_seen": 19853264, "step": 220 }, { "epoch": 1.0160919540229885, "grad_norm": 13.318468212320983, "learning_rate": 5e-06, "loss": 0.2188, "num_input_tokens_seen": 19942828, "step": 221 }, { "epoch": 1.0160919540229885, "loss": 0.19837090373039246, "loss_ce": 0.000189754442544654, "loss_iou": 0.412109375, "loss_num": 0.03955078125, "loss_xval": 0.1982421875, "num_input_tokens_seen": 19942828, "step": 221 }, { "epoch": 1.0206896551724138, "grad_norm": 4.7677670902469735, "learning_rate": 5e-06, "loss": 0.1753, "num_input_tokens_seen": 20033268, "step": 222 }, { "epoch": 1.0206896551724138, "loss": 0.23340031504631042, "loss_ce": 0.01013370230793953, "loss_iou": 0.46484375, "loss_num": 0.044677734375, "loss_xval": 0.2236328125, "num_input_tokens_seen": 20033268, "step": 222 }, { "epoch": 1.025287356321839, "grad_norm": 7.294150569078434, "learning_rate": 5e-06, "loss": 0.2063, "num_input_tokens_seen": 20123744, "step": 223 }, { "epoch": 1.025287356321839, "loss": 0.16170556843280792, "loss_ce": 0.00222071073949337, "loss_iou": 0.44921875, "loss_num": 0.03173828125, "loss_xval": 0.1591796875, "num_input_tokens_seen": 20123744, "step": 223 }, { "epoch": 1.0298850574712644, "grad_norm": 13.473703879708541, "learning_rate": 5e-06, "loss": 0.1737, "num_input_tokens_seen": 20214216, "step": 224 }, { "epoch": 1.0298850574712644, "loss": 0.12107338011264801, "loss_ce": 0.0005899769021198153, "loss_iou": 0.45703125, "loss_num": 0.0240478515625, "loss_xval": 0.12060546875, "num_input_tokens_seen": 20214216, "step": 224 }, { "epoch": 1.0344827586206897, "grad_norm": 16.323294537818267, "learning_rate": 5e-06, "loss": 0.2242, "num_input_tokens_seen": 20304620, "step": 225 }, { "epoch": 1.0344827586206897, "loss": 0.2116052806377411, "loss_ce": 0.002529359422624111, "loss_iou": 0.361328125, "loss_num": 0.041748046875, "loss_xval": 0.208984375, "num_input_tokens_seen": 20304620, "step": 225 }, { "epoch": 1.0390804597701149, "grad_norm": 5.045052986814076, "learning_rate": 5e-06, "loss": 0.2254, "num_input_tokens_seen": 20394932, "step": 226 }, { "epoch": 1.0390804597701149, "loss": 0.19622498750686646, "loss_ce": 0.0004242146678734571, "loss_iou": 0.494140625, "loss_num": 0.0390625, "loss_xval": 0.1953125, "num_input_tokens_seen": 20394932, "step": 226 }, { "epoch": 1.0436781609195402, "grad_norm": 10.85002200148262, "learning_rate": 5e-06, "loss": 0.2326, "num_input_tokens_seen": 20485364, "step": 227 }, { "epoch": 1.0436781609195402, "loss": 0.16513219475746155, "loss_ce": 0.0007034791633486748, "loss_iou": 0.4453125, "loss_num": 0.032958984375, "loss_xval": 0.1640625, "num_input_tokens_seen": 20485364, "step": 227 }, { "epoch": 1.0482758620689656, "grad_norm": 10.36785476657075, "learning_rate": 5e-06, "loss": 0.189, "num_input_tokens_seen": 20575780, "step": 228 }, { "epoch": 1.0482758620689656, "loss": 0.1746249496936798, "loss_ce": 0.0005526923341676593, "loss_iou": 0.3828125, "loss_num": 0.034912109375, "loss_xval": 0.173828125, "num_input_tokens_seen": 20575780, "step": 228 }, { "epoch": 1.0528735632183908, "grad_norm": 11.993359880118176, "learning_rate": 5e-06, "loss": 0.2183, "num_input_tokens_seen": 20666132, "step": 229 }, { "epoch": 1.0528735632183908, "loss": 0.24332331120967865, "loss_ce": 0.001074782107025385, "loss_iou": 0.498046875, "loss_num": 0.04833984375, "loss_xval": 0.2421875, "num_input_tokens_seen": 20666132, "step": 229 }, { "epoch": 1.0574712643678161, "grad_norm": 21.11605854531589, "learning_rate": 5e-06, "loss": 0.2293, "num_input_tokens_seen": 20756592, "step": 230 }, { "epoch": 1.0574712643678161, "loss": 0.21172873675823212, "loss_ce": 0.005735091865062714, "loss_iou": 0.51953125, "loss_num": 0.041259765625, "loss_xval": 0.2060546875, "num_input_tokens_seen": 20756592, "step": 230 }, { "epoch": 1.0620689655172413, "grad_norm": 13.639736851512211, "learning_rate": 5e-06, "loss": 0.1664, "num_input_tokens_seen": 20846888, "step": 231 }, { "epoch": 1.0620689655172413, "loss": 0.1343887746334076, "loss_ce": 0.010151727125048637, "loss_iou": 0.53515625, "loss_num": 0.0247802734375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 20846888, "step": 231 }, { "epoch": 1.0666666666666667, "grad_norm": 4.8416225246295195, "learning_rate": 5e-06, "loss": 0.2231, "num_input_tokens_seen": 20937220, "step": 232 }, { "epoch": 1.0666666666666667, "loss": 0.19790634512901306, "loss_ce": 0.00024397407833021134, "loss_iou": 0.341796875, "loss_num": 0.03955078125, "loss_xval": 0.197265625, "num_input_tokens_seen": 20937220, "step": 232 }, { "epoch": 1.071264367816092, "grad_norm": 8.735510518442098, "learning_rate": 5e-06, "loss": 0.2111, "num_input_tokens_seen": 21027540, "step": 233 }, { "epoch": 1.071264367816092, "loss": 0.2129209041595459, "loss_ce": 0.014556641690433025, "loss_iou": 0.5234375, "loss_num": 0.03955078125, "loss_xval": 0.1982421875, "num_input_tokens_seen": 21027540, "step": 233 }, { "epoch": 1.0758620689655172, "grad_norm": 64.22232050337796, "learning_rate": 5e-06, "loss": 0.243, "num_input_tokens_seen": 21117908, "step": 234 }, { "epoch": 1.0758620689655172, "loss": 0.19178733229637146, "loss_ce": 0.000259012304013595, "loss_iou": 0.482421875, "loss_num": 0.038330078125, "loss_xval": 0.19140625, "num_input_tokens_seen": 21117908, "step": 234 }, { "epoch": 1.0804597701149425, "grad_norm": 29.01468538309071, "learning_rate": 5e-06, "loss": 0.2216, "num_input_tokens_seen": 21208340, "step": 235 }, { "epoch": 1.0804597701149425, "loss": 0.22727806866168976, "loss_ce": 0.0060866596177220345, "loss_iou": 0.400390625, "loss_num": 0.044189453125, "loss_xval": 0.220703125, "num_input_tokens_seen": 21208340, "step": 235 }, { "epoch": 1.085057471264368, "grad_norm": 11.877399551682112, "learning_rate": 5e-06, "loss": 0.193, "num_input_tokens_seen": 21298700, "step": 236 }, { "epoch": 1.085057471264368, "loss": 0.2526742219924927, "loss_ce": 0.00011075517249992117, "loss_iou": 0.396484375, "loss_num": 0.050537109375, "loss_xval": 0.251953125, "num_input_tokens_seen": 21298700, "step": 236 }, { "epoch": 1.089655172413793, "grad_norm": 11.06433974753488, "learning_rate": 5e-06, "loss": 0.2513, "num_input_tokens_seen": 21389112, "step": 237 }, { "epoch": 1.089655172413793, "loss": 0.2866198718547821, "loss_ce": 0.0012194736627861857, "loss_iou": 0.443359375, "loss_num": 0.05712890625, "loss_xval": 0.28515625, "num_input_tokens_seen": 21389112, "step": 237 }, { "epoch": 1.0942528735632184, "grad_norm": 5.522127581505815, "learning_rate": 5e-06, "loss": 0.2329, "num_input_tokens_seen": 21479544, "step": 238 }, { "epoch": 1.0942528735632184, "loss": 0.24488475918769836, "loss_ce": 0.0002558681007940322, "loss_iou": 0.474609375, "loss_num": 0.048828125, "loss_xval": 0.244140625, "num_input_tokens_seen": 21479544, "step": 238 }, { "epoch": 1.0988505747126436, "grad_norm": 20.5377108131709, "learning_rate": 5e-06, "loss": 0.183, "num_input_tokens_seen": 21569880, "step": 239 }, { "epoch": 1.0988505747126436, "loss": 0.17812752723693848, "loss_ce": 2.6934067136608064e-05, "loss_iou": 0.482421875, "loss_num": 0.03564453125, "loss_xval": 0.177734375, "num_input_tokens_seen": 21569880, "step": 239 }, { "epoch": 1.103448275862069, "grad_norm": 12.413266667265427, "learning_rate": 5e-06, "loss": 0.2067, "num_input_tokens_seen": 21660268, "step": 240 }, { "epoch": 1.103448275862069, "loss": 0.21825896203517914, "loss_ce": 0.03881560266017914, "loss_iou": 0.48828125, "loss_num": 0.035888671875, "loss_xval": 0.1796875, "num_input_tokens_seen": 21660268, "step": 240 }, { "epoch": 1.1080459770114943, "grad_norm": 3.800385893974569, "learning_rate": 5e-06, "loss": 0.2301, "num_input_tokens_seen": 21750640, "step": 241 }, { "epoch": 1.1080459770114943, "loss": 0.2575361132621765, "loss_ce": 0.000211891092476435, "loss_iou": 0.482421875, "loss_num": 0.051513671875, "loss_xval": 0.2578125, "num_input_tokens_seen": 21750640, "step": 241 }, { "epoch": 1.1126436781609195, "grad_norm": 8.560187437534255, "learning_rate": 5e-06, "loss": 0.2571, "num_input_tokens_seen": 21841068, "step": 242 }, { "epoch": 1.1126436781609195, "loss": 0.2956390976905823, "loss_ce": 0.0056000337935984135, "loss_iou": 0.38671875, "loss_num": 0.05810546875, "loss_xval": 0.2890625, "num_input_tokens_seen": 21841068, "step": 242 }, { "epoch": 1.1172413793103448, "grad_norm": 20.63729856829291, "learning_rate": 5e-06, "loss": 0.2309, "num_input_tokens_seen": 21931304, "step": 243 }, { "epoch": 1.1172413793103448, "loss": 0.21533158421516418, "loss_ce": 0.00045730240526609123, "loss_iou": 0.50390625, "loss_num": 0.04296875, "loss_xval": 0.21484375, "num_input_tokens_seen": 21931304, "step": 243 }, { "epoch": 1.1218390804597702, "grad_norm": 25.579280798369926, "learning_rate": 5e-06, "loss": 0.2176, "num_input_tokens_seen": 22021568, "step": 244 }, { "epoch": 1.1218390804597702, "loss": 0.25474822521209717, "loss_ce": 0.0018490497022867203, "loss_iou": 0.4140625, "loss_num": 0.050537109375, "loss_xval": 0.251953125, "num_input_tokens_seen": 22021568, "step": 244 }, { "epoch": 1.1264367816091954, "grad_norm": 32.89831549609003, "learning_rate": 5e-06, "loss": 0.2793, "num_input_tokens_seen": 22111816, "step": 245 }, { "epoch": 1.1264367816091954, "loss": 0.2946988344192505, "loss_ce": 0.0027066715992987156, "loss_iou": 0.3984375, "loss_num": 0.058349609375, "loss_xval": 0.29296875, "num_input_tokens_seen": 22111816, "step": 245 }, { "epoch": 1.1310344827586207, "grad_norm": 11.404530774167966, "learning_rate": 5e-06, "loss": 0.2252, "num_input_tokens_seen": 22202224, "step": 246 }, { "epoch": 1.1310344827586207, "loss": 0.17997342348098755, "loss_ce": 0.0004079932114109397, "loss_iou": 0.5, "loss_num": 0.035888671875, "loss_xval": 0.1796875, "num_input_tokens_seen": 22202224, "step": 246 }, { "epoch": 1.1356321839080459, "grad_norm": 24.132647296050727, "learning_rate": 5e-06, "loss": 0.2185, "num_input_tokens_seen": 22292492, "step": 247 }, { "epoch": 1.1356321839080459, "loss": 0.17757512629032135, "loss_ce": 0.000573178636841476, "loss_iou": 0.38671875, "loss_num": 0.035400390625, "loss_xval": 0.1767578125, "num_input_tokens_seen": 22292492, "step": 247 }, { "epoch": 1.1402298850574712, "grad_norm": 7.434676789770062, "learning_rate": 5e-06, "loss": 0.2615, "num_input_tokens_seen": 22382928, "step": 248 }, { "epoch": 1.1402298850574712, "loss": 0.27751120924949646, "loss_ce": 0.010482416488230228, "loss_iou": 0.35546875, "loss_num": 0.053466796875, "loss_xval": 0.267578125, "num_input_tokens_seen": 22382928, "step": 248 }, { "epoch": 1.1448275862068966, "grad_norm": 10.379288916413433, "learning_rate": 5e-06, "loss": 0.2081, "num_input_tokens_seen": 22473304, "step": 249 }, { "epoch": 1.1448275862068966, "loss": 0.14225530624389648, "loss_ce": 0.0012641068315133452, "loss_iou": 0.46484375, "loss_num": 0.0281982421875, "loss_xval": 0.140625, "num_input_tokens_seen": 22473304, "step": 249 }, { "epoch": 1.1494252873563218, "grad_norm": 7.839282118755005, "learning_rate": 5e-06, "loss": 0.2665, "num_input_tokens_seen": 22563700, "step": 250 }, { "epoch": 1.1494252873563218, "eval_seeclick_CIoU": 0.4888755977153778, "eval_seeclick_GIoU": 0.47952449321746826, "eval_seeclick_IoU": 0.5278463363647461, "eval_seeclick_MAE_all": 0.056948138400912285, "eval_seeclick_MAE_h": 0.04946732707321644, "eval_seeclick_MAE_w": 0.09706718474626541, "eval_seeclick_MAE_x_boxes": 0.09927782043814659, "eval_seeclick_MAE_y_boxes": 0.046883879229426384, "eval_seeclick_NUM_probability": 0.9999977946281433, "eval_seeclick_inside_bbox": 0.8451704680919647, "eval_seeclick_loss": 0.3242720365524292, "eval_seeclick_loss_ce": 0.040716785937547684, "eval_seeclick_loss_iou": 0.467529296875, "eval_seeclick_loss_num": 0.0596923828125, "eval_seeclick_loss_xval": 0.29864501953125, "eval_seeclick_runtime": 75.8262, "eval_seeclick_samples_per_second": 0.567, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 22563700, "step": 250 }, { "epoch": 1.1494252873563218, "eval_icons_CIoU": 0.5218705832958221, "eval_icons_GIoU": 0.5266680121421814, "eval_icons_IoU": 0.5595913529396057, "eval_icons_MAE_all": 0.043722933158278465, "eval_icons_MAE_h": 0.08452420309185982, "eval_icons_MAE_w": 0.06406798399984837, "eval_icons_MAE_x_boxes": 0.05667761527001858, "eval_icons_MAE_y_boxes": 0.08344150707125664, "eval_icons_NUM_probability": 0.9999994039535522, "eval_icons_inside_bbox": 0.7239583432674408, "eval_icons_loss": 0.2156449854373932, "eval_icons_loss_ce": 4.674579088259634e-07, "eval_icons_loss_iou": 0.43145751953125, "eval_icons_loss_num": 0.04747772216796875, "eval_icons_loss_xval": 0.237518310546875, "eval_icons_runtime": 84.5354, "eval_icons_samples_per_second": 0.591, "eval_icons_steps_per_second": 0.024, "num_input_tokens_seen": 22563700, "step": 250 }, { "epoch": 1.1494252873563218, "eval_screenspot_CIoU": 0.30689453333616257, "eval_screenspot_GIoU": 0.2762495552500089, "eval_screenspot_IoU": 0.3886234064896901, "eval_screenspot_MAE_all": 0.10496337960163753, "eval_screenspot_MAE_h": 0.11666570603847504, "eval_screenspot_MAE_w": 0.208540049691995, "eval_screenspot_MAE_x_boxes": 0.18961978455384573, "eval_screenspot_MAE_y_boxes": 0.1133890226483345, "eval_screenspot_NUM_probability": 0.9999755620956421, "eval_screenspot_inside_bbox": 0.6433333357175192, "eval_screenspot_loss": 0.5237547755241394, "eval_screenspot_loss_ce": 0.0012614075288486977, "eval_screenspot_loss_iou": 0.3485921223958333, "eval_screenspot_loss_num": 0.10777791341145833, "eval_screenspot_loss_xval": 0.5385335286458334, "eval_screenspot_runtime": 158.8594, "eval_screenspot_samples_per_second": 0.56, "eval_screenspot_steps_per_second": 0.019, "num_input_tokens_seen": 22563700, "step": 250 }, { "epoch": 1.1494252873563218, "eval_compot_CIoU": 0.44808267056941986, "eval_compot_GIoU": 0.4227418601512909, "eval_compot_IoU": 0.5081988573074341, "eval_compot_MAE_all": 0.05795424245297909, "eval_compot_MAE_h": 0.09416088834404945, "eval_compot_MAE_w": 0.09847152419388294, "eval_compot_MAE_x_boxes": 0.08727088011801243, "eval_compot_MAE_y_boxes": 0.09673519805073738, "eval_compot_NUM_probability": 0.9999892115592957, "eval_compot_inside_bbox": 0.7604166567325592, "eval_compot_loss": 0.32618048787117004, "eval_compot_loss_ce": 0.013961461605504155, "eval_compot_loss_iou": 0.4866943359375, "eval_compot_loss_num": 0.0546875, "eval_compot_loss_xval": 0.273345947265625, "eval_compot_runtime": 88.276, "eval_compot_samples_per_second": 0.566, "eval_compot_steps_per_second": 0.023, "num_input_tokens_seen": 22563700, "step": 250 }, { "epoch": 1.1494252873563218, "loss": 0.19636714458465576, "loss_ce": 0.007402303162962198, "loss_iou": 0.546875, "loss_num": 0.037841796875, "loss_xval": 0.189453125, "num_input_tokens_seen": 22563700, "step": 250 }, { "epoch": 1.1540229885057471, "grad_norm": 7.43698227348526, "learning_rate": 5e-06, "loss": 0.1928, "num_input_tokens_seen": 22654036, "step": 251 }, { "epoch": 1.1540229885057471, "loss": 0.19022439420223236, "loss_ce": 0.00022195614292286336, "loss_iou": 0.5, "loss_num": 0.0380859375, "loss_xval": 0.1904296875, "num_input_tokens_seen": 22654036, "step": 251 }, { "epoch": 1.1586206896551725, "grad_norm": 7.724863758957877, "learning_rate": 5e-06, "loss": 0.235, "num_input_tokens_seen": 22744428, "step": 252 }, { "epoch": 1.1586206896551725, "loss": 0.28669023513793945, "loss_ce": 0.0006184765952639282, "loss_iou": 0.375, "loss_num": 0.05712890625, "loss_xval": 0.28515625, "num_input_tokens_seen": 22744428, "step": 252 }, { "epoch": 1.1632183908045977, "grad_norm": 52.54527766863086, "learning_rate": 5e-06, "loss": 0.2173, "num_input_tokens_seen": 22834692, "step": 253 }, { "epoch": 1.1632183908045977, "loss": 0.1744190752506256, "loss_ce": 0.0014149189228191972, "loss_iou": 0.451171875, "loss_num": 0.03466796875, "loss_xval": 0.1728515625, "num_input_tokens_seen": 22834692, "step": 253 }, { "epoch": 1.167816091954023, "grad_norm": 4.696366017744483, "learning_rate": 5e-06, "loss": 0.1373, "num_input_tokens_seen": 22924280, "step": 254 }, { "epoch": 1.167816091954023, "loss": 0.12924961745738983, "loss_ce": 0.0024185676593333483, "loss_iou": 0.44140625, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 22924280, "step": 254 }, { "epoch": 1.1724137931034484, "grad_norm": 6.684225949401091, "learning_rate": 5e-06, "loss": 0.1772, "num_input_tokens_seen": 23014664, "step": 255 }, { "epoch": 1.1724137931034484, "loss": 0.1868850439786911, "loss_ce": 0.00048367734416387975, "loss_iou": 0.5078125, "loss_num": 0.037353515625, "loss_xval": 0.1865234375, "num_input_tokens_seen": 23014664, "step": 255 }, { "epoch": 1.1770114942528735, "grad_norm": 10.115688964897386, "learning_rate": 5e-06, "loss": 0.2414, "num_input_tokens_seen": 23104224, "step": 256 }, { "epoch": 1.1770114942528735, "loss": 0.2677534222602844, "loss_ce": 0.00048048628377728164, "loss_iou": 0.435546875, "loss_num": 0.053466796875, "loss_xval": 0.267578125, "num_input_tokens_seen": 23104224, "step": 256 }, { "epoch": 1.181609195402299, "grad_norm": 5.045379741734736, "learning_rate": 5e-06, "loss": 0.2122, "num_input_tokens_seen": 23194388, "step": 257 }, { "epoch": 1.181609195402299, "loss": 0.24545620381832123, "loss_ce": 9.487089118920267e-05, "loss_iou": 0.3671875, "loss_num": 0.049072265625, "loss_xval": 0.2451171875, "num_input_tokens_seen": 23194388, "step": 257 }, { "epoch": 1.186206896551724, "grad_norm": 26.131469875864838, "learning_rate": 5e-06, "loss": 0.2115, "num_input_tokens_seen": 23284736, "step": 258 }, { "epoch": 1.186206896551724, "loss": 0.21708114445209503, "loss_ce": 0.000528403848875314, "loss_iou": 0.482421875, "loss_num": 0.043212890625, "loss_xval": 0.216796875, "num_input_tokens_seen": 23284736, "step": 258 }, { "epoch": 1.1908045977011494, "grad_norm": 19.645438145213465, "learning_rate": 5e-06, "loss": 0.2313, "num_input_tokens_seen": 23375184, "step": 259 }, { "epoch": 1.1908045977011494, "loss": 0.268694132566452, "loss_ce": 0.0001394439022988081, "loss_iou": 0.41015625, "loss_num": 0.0537109375, "loss_xval": 0.26953125, "num_input_tokens_seen": 23375184, "step": 259 }, { "epoch": 1.1954022988505748, "grad_norm": 16.21307669612568, "learning_rate": 5e-06, "loss": 0.2792, "num_input_tokens_seen": 23465472, "step": 260 }, { "epoch": 1.1954022988505748, "loss": 0.29360634088516235, "loss_ce": 0.0001798336743377149, "loss_iou": 0.314453125, "loss_num": 0.05859375, "loss_xval": 0.29296875, "num_input_tokens_seen": 23465472, "step": 260 }, { "epoch": 1.2, "grad_norm": 12.81808028387753, "learning_rate": 5e-06, "loss": 0.2153, "num_input_tokens_seen": 23555724, "step": 261 }, { "epoch": 1.2, "loss": 0.23135723173618317, "loss_ce": 0.009524943307042122, "loss_iou": 0.53125, "loss_num": 0.04443359375, "loss_xval": 0.2216796875, "num_input_tokens_seen": 23555724, "step": 261 }, { "epoch": 1.2045977011494253, "grad_norm": 4.1999709843832616, "learning_rate": 5e-06, "loss": 0.2374, "num_input_tokens_seen": 23645988, "step": 262 }, { "epoch": 1.2045977011494253, "loss": 0.22801579535007477, "loss_ce": 0.0014532884815707803, "loss_iou": 0.44140625, "loss_num": 0.04541015625, "loss_xval": 0.2265625, "num_input_tokens_seen": 23645988, "step": 262 }, { "epoch": 1.2091954022988505, "grad_norm": 6.414285711751042, "learning_rate": 5e-06, "loss": 0.1998, "num_input_tokens_seen": 23736268, "step": 263 }, { "epoch": 1.2091954022988505, "loss": 0.15405645966529846, "loss_ce": 0.0007971944869495928, "loss_iou": 0.53125, "loss_num": 0.0306396484375, "loss_xval": 0.1533203125, "num_input_tokens_seen": 23736268, "step": 263 }, { "epoch": 1.2137931034482758, "grad_norm": 4.480437290662561, "learning_rate": 5e-06, "loss": 0.2126, "num_input_tokens_seen": 23826792, "step": 264 }, { "epoch": 1.2137931034482758, "loss": 0.20262828469276428, "loss_ce": 0.000479854759760201, "loss_iou": 0.435546875, "loss_num": 0.04052734375, "loss_xval": 0.2021484375, "num_input_tokens_seen": 23826792, "step": 264 }, { "epoch": 1.2183908045977012, "grad_norm": 15.731375012073059, "learning_rate": 5e-06, "loss": 0.1946, "num_input_tokens_seen": 23916428, "step": 265 }, { "epoch": 1.2183908045977012, "loss": 0.15348592400550842, "loss_ce": 0.00010457600728841498, "loss_iou": 0.427734375, "loss_num": 0.03076171875, "loss_xval": 0.1533203125, "num_input_tokens_seen": 23916428, "step": 265 }, { "epoch": 1.2229885057471264, "grad_norm": 7.176014460557662, "learning_rate": 5e-06, "loss": 0.304, "num_input_tokens_seen": 24006752, "step": 266 }, { "epoch": 1.2229885057471264, "loss": 0.3059394955635071, "loss_ce": 0.00015336027718149126, "loss_iou": 0.51953125, "loss_num": 0.06103515625, "loss_xval": 0.306640625, "num_input_tokens_seen": 24006752, "step": 266 }, { "epoch": 1.2275862068965517, "grad_norm": 6.940435918405699, "learning_rate": 5e-06, "loss": 0.1946, "num_input_tokens_seen": 24097012, "step": 267 }, { "epoch": 1.2275862068965517, "loss": 0.18653042614459991, "loss_ce": 0.0003121576155535877, "loss_iou": 0.50390625, "loss_num": 0.037353515625, "loss_xval": 0.1865234375, "num_input_tokens_seen": 24097012, "step": 267 }, { "epoch": 1.232183908045977, "grad_norm": 3.7469836271055623, "learning_rate": 5e-06, "loss": 0.3164, "num_input_tokens_seen": 24187256, "step": 268 }, { "epoch": 1.232183908045977, "loss": 0.27794623374938965, "loss_ce": 0.00569892255589366, "loss_iou": 0.35546875, "loss_num": 0.054443359375, "loss_xval": 0.271484375, "num_input_tokens_seen": 24187256, "step": 268 }, { "epoch": 1.2367816091954023, "grad_norm": 7.413553829750902, "learning_rate": 5e-06, "loss": 0.2141, "num_input_tokens_seen": 24277688, "step": 269 }, { "epoch": 1.2367816091954023, "loss": 0.19044940173625946, "loss_ce": 8.075028017628938e-05, "loss_iou": 0.45703125, "loss_num": 0.0380859375, "loss_xval": 0.1904296875, "num_input_tokens_seen": 24277688, "step": 269 }, { "epoch": 1.2413793103448276, "grad_norm": 7.193196745882768, "learning_rate": 5e-06, "loss": 0.2068, "num_input_tokens_seen": 24368024, "step": 270 }, { "epoch": 1.2413793103448276, "loss": 0.24729135632514954, "loss_ce": 3.7946036172797903e-05, "loss_iou": 0.3515625, "loss_num": 0.04931640625, "loss_xval": 0.2470703125, "num_input_tokens_seen": 24368024, "step": 270 }, { "epoch": 1.245977011494253, "grad_norm": 23.36412717520471, "learning_rate": 5e-06, "loss": 0.1889, "num_input_tokens_seen": 24458400, "step": 271 }, { "epoch": 1.245977011494253, "loss": 0.18987199664115906, "loss_ce": 0.00023576414969284087, "loss_iou": 0.421875, "loss_num": 0.037841796875, "loss_xval": 0.189453125, "num_input_tokens_seen": 24458400, "step": 271 }, { "epoch": 1.2505747126436781, "grad_norm": 6.27939746197536, "learning_rate": 5e-06, "loss": 0.1642, "num_input_tokens_seen": 24548712, "step": 272 }, { "epoch": 1.2505747126436781, "loss": 0.193972647190094, "loss_ce": 0.0008268996607512236, "loss_iou": 0.484375, "loss_num": 0.03857421875, "loss_xval": 0.193359375, "num_input_tokens_seen": 24548712, "step": 272 }, { "epoch": 1.2551724137931035, "grad_norm": 25.45105413808657, "learning_rate": 5e-06, "loss": 0.2115, "num_input_tokens_seen": 24639064, "step": 273 }, { "epoch": 1.2551724137931035, "loss": 0.2186853289604187, "loss_ce": 0.0026818893384188414, "loss_iou": 0.416015625, "loss_num": 0.043212890625, "loss_xval": 0.2158203125, "num_input_tokens_seen": 24639064, "step": 273 }, { "epoch": 1.2597701149425287, "grad_norm": 37.080183589133725, "learning_rate": 5e-06, "loss": 0.1826, "num_input_tokens_seen": 24729388, "step": 274 }, { "epoch": 1.2597701149425287, "loss": 0.19386309385299683, "loss_ce": 0.0006868370110169053, "loss_iou": 0.447265625, "loss_num": 0.03857421875, "loss_xval": 0.193359375, "num_input_tokens_seen": 24729388, "step": 274 }, { "epoch": 1.264367816091954, "grad_norm": 16.018753076692136, "learning_rate": 5e-06, "loss": 0.2302, "num_input_tokens_seen": 24819812, "step": 275 }, { "epoch": 1.264367816091954, "loss": 0.2251913845539093, "loss_ce": 0.0006735554779879749, "loss_iou": 0.5, "loss_num": 0.044921875, "loss_xval": 0.224609375, "num_input_tokens_seen": 24819812, "step": 275 }, { "epoch": 1.2689655172413792, "grad_norm": 6.147405946334659, "learning_rate": 5e-06, "loss": 0.1915, "num_input_tokens_seen": 24910200, "step": 276 }, { "epoch": 1.2689655172413792, "loss": 0.14948949217796326, "loss_ce": 0.00031957216560840607, "loss_iou": 0.408203125, "loss_num": 0.02978515625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 24910200, "step": 276 }, { "epoch": 1.2735632183908046, "grad_norm": 6.051687547447403, "learning_rate": 5e-06, "loss": 0.1815, "num_input_tokens_seen": 25000476, "step": 277 }, { "epoch": 1.2735632183908046, "loss": 0.2223796248435974, "loss_ce": 0.008512439206242561, "loss_iou": 0.474609375, "loss_num": 0.042724609375, "loss_xval": 0.2138671875, "num_input_tokens_seen": 25000476, "step": 277 }, { "epoch": 1.27816091954023, "grad_norm": 8.095544751091358, "learning_rate": 5e-06, "loss": 0.2494, "num_input_tokens_seen": 25090868, "step": 278 }, { "epoch": 1.27816091954023, "loss": 0.21112553775310516, "loss_ce": 0.00047794863348826766, "loss_iou": 0.44140625, "loss_num": 0.042236328125, "loss_xval": 0.2109375, "num_input_tokens_seen": 25090868, "step": 278 }, { "epoch": 1.282758620689655, "grad_norm": 29.745999578899646, "learning_rate": 5e-06, "loss": 0.2185, "num_input_tokens_seen": 25181160, "step": 279 }, { "epoch": 1.282758620689655, "loss": 0.16160300374031067, "loss_ce": 0.00013449997641146183, "loss_iou": 0.421875, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 25181160, "step": 279 }, { "epoch": 1.2873563218390804, "grad_norm": 9.770076744230986, "learning_rate": 5e-06, "loss": 0.2388, "num_input_tokens_seen": 25270696, "step": 280 }, { "epoch": 1.2873563218390804, "loss": 0.2805434465408325, "loss_ce": 0.00020895421039313078, "loss_iou": 0.44921875, "loss_num": 0.05615234375, "loss_xval": 0.28125, "num_input_tokens_seen": 25270696, "step": 280 }, { "epoch": 1.2919540229885058, "grad_norm": 6.866286661032349, "learning_rate": 5e-06, "loss": 0.2209, "num_input_tokens_seen": 25361132, "step": 281 }, { "epoch": 1.2919540229885058, "loss": 0.21573258936405182, "loss_ce": 9.537945152260363e-05, "loss_iou": 0.44140625, "loss_num": 0.043212890625, "loss_xval": 0.2158203125, "num_input_tokens_seen": 25361132, "step": 281 }, { "epoch": 1.296551724137931, "grad_norm": 14.632378384320322, "learning_rate": 5e-06, "loss": 0.1523, "num_input_tokens_seen": 25451508, "step": 282 }, { "epoch": 1.296551724137931, "loss": 0.14539626240730286, "loss_ce": 0.002390899695456028, "loss_iou": 0.443359375, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 25451508, "step": 282 }, { "epoch": 1.3011494252873563, "grad_norm": 10.586676404677785, "learning_rate": 5e-06, "loss": 0.2193, "num_input_tokens_seen": 25541988, "step": 283 }, { "epoch": 1.3011494252873563, "loss": 0.18449945747852325, "loss_ce": 0.0038964273408055305, "loss_iou": 0.470703125, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 25541988, "step": 283 }, { "epoch": 1.3057471264367817, "grad_norm": 20.999614027385967, "learning_rate": 5e-06, "loss": 0.2048, "num_input_tokens_seen": 25632416, "step": 284 }, { "epoch": 1.3057471264367817, "loss": 0.2276066094636917, "loss_ce": 0.00018961683963425457, "loss_iou": 0.361328125, "loss_num": 0.04541015625, "loss_xval": 0.2275390625, "num_input_tokens_seen": 25632416, "step": 284 }, { "epoch": 1.3103448275862069, "grad_norm": 5.543128202063141, "learning_rate": 5e-06, "loss": 0.1622, "num_input_tokens_seen": 25722744, "step": 285 }, { "epoch": 1.3103448275862069, "loss": 0.16011224687099457, "loss_ce": 0.0001390949182678014, "loss_iou": 0.4140625, "loss_num": 0.031982421875, "loss_xval": 0.16015625, "num_input_tokens_seen": 25722744, "step": 285 }, { "epoch": 1.3149425287356322, "grad_norm": 11.112725830839327, "learning_rate": 5e-06, "loss": 0.2203, "num_input_tokens_seen": 25813124, "step": 286 }, { "epoch": 1.3149425287356322, "loss": 0.3086729943752289, "loss_ce": 0.0008421677048318088, "loss_iou": 0.41015625, "loss_num": 0.0615234375, "loss_xval": 0.30859375, "num_input_tokens_seen": 25813124, "step": 286 }, { "epoch": 1.3195402298850576, "grad_norm": 9.783123070255048, "learning_rate": 5e-06, "loss": 0.1712, "num_input_tokens_seen": 25903612, "step": 287 }, { "epoch": 1.3195402298850576, "loss": 0.17264115810394287, "loss_ce": 0.0005983194569125772, "loss_iou": 0.40625, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 25903612, "step": 287 }, { "epoch": 1.3241379310344827, "grad_norm": 4.636122741394509, "learning_rate": 5e-06, "loss": 0.2552, "num_input_tokens_seen": 25994072, "step": 288 }, { "epoch": 1.3241379310344827, "loss": 0.22548282146453857, "loss_ce": 0.00018680733046494424, "loss_iou": 0.43359375, "loss_num": 0.045166015625, "loss_xval": 0.2255859375, "num_input_tokens_seen": 25994072, "step": 288 }, { "epoch": 1.328735632183908, "grad_norm": 3.861529643068919, "learning_rate": 5e-06, "loss": 0.1707, "num_input_tokens_seen": 26084532, "step": 289 }, { "epoch": 1.328735632183908, "loss": 0.16129527986049652, "loss_ce": 0.00016246250015683472, "loss_iou": 0.515625, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 26084532, "step": 289 }, { "epoch": 1.3333333333333333, "grad_norm": 57.38016500276973, "learning_rate": 5e-06, "loss": 0.2326, "num_input_tokens_seen": 26174984, "step": 290 }, { "epoch": 1.3333333333333333, "loss": 0.15951338410377502, "loss_ce": 0.00039472500793635845, "loss_iou": 0.443359375, "loss_num": 0.03173828125, "loss_xval": 0.1591796875, "num_input_tokens_seen": 26174984, "step": 290 }, { "epoch": 1.3379310344827586, "grad_norm": 8.111819603516635, "learning_rate": 5e-06, "loss": 0.1983, "num_input_tokens_seen": 26265348, "step": 291 }, { "epoch": 1.3379310344827586, "loss": 0.1832209974527359, "loss_ce": 0.0004817442095372826, "loss_iou": 0.4765625, "loss_num": 0.03662109375, "loss_xval": 0.1826171875, "num_input_tokens_seen": 26265348, "step": 291 }, { "epoch": 1.3425287356321838, "grad_norm": 18.16139378563598, "learning_rate": 5e-06, "loss": 0.1927, "num_input_tokens_seen": 26355688, "step": 292 }, { "epoch": 1.3425287356321838, "loss": 0.17206496000289917, "loss_ce": 6.788483005948365e-05, "loss_iou": 0.5078125, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 26355688, "step": 292 }, { "epoch": 1.3471264367816091, "grad_norm": 20.460245063855925, "learning_rate": 5e-06, "loss": 0.237, "num_input_tokens_seen": 26446172, "step": 293 }, { "epoch": 1.3471264367816091, "loss": 0.23456808924674988, "loss_ce": 0.005442116409540176, "loss_iou": 0.388671875, "loss_num": 0.0458984375, "loss_xval": 0.2294921875, "num_input_tokens_seen": 26446172, "step": 293 }, { "epoch": 1.3517241379310345, "grad_norm": 12.205499115713149, "learning_rate": 5e-06, "loss": 0.2191, "num_input_tokens_seen": 26536628, "step": 294 }, { "epoch": 1.3517241379310345, "loss": 0.2167300283908844, "loss_ce": 0.00017729138198774308, "loss_iou": 0.494140625, "loss_num": 0.043212890625, "loss_xval": 0.216796875, "num_input_tokens_seen": 26536628, "step": 294 }, { "epoch": 1.3563218390804597, "grad_norm": 7.426284095641791, "learning_rate": 5e-06, "loss": 0.1613, "num_input_tokens_seen": 26627064, "step": 295 }, { "epoch": 1.3563218390804597, "loss": 0.19740846753120422, "loss_ce": 0.0002038878737948835, "loss_iou": 0.38671875, "loss_num": 0.03955078125, "loss_xval": 0.197265625, "num_input_tokens_seen": 26627064, "step": 295 }, { "epoch": 1.360919540229885, "grad_norm": 39.074194628927266, "learning_rate": 5e-06, "loss": 0.2518, "num_input_tokens_seen": 26717480, "step": 296 }, { "epoch": 1.360919540229885, "loss": 0.3336067795753479, "loss_ce": 0.0002937709796242416, "loss_iou": 0.44921875, "loss_num": 0.06640625, "loss_xval": 0.333984375, "num_input_tokens_seen": 26717480, "step": 296 }, { "epoch": 1.3655172413793104, "grad_norm": 20.276024886618437, "learning_rate": 5e-06, "loss": 0.209, "num_input_tokens_seen": 26807844, "step": 297 }, { "epoch": 1.3655172413793104, "loss": 0.3174262046813965, "loss_ce": 0.0001654599909670651, "loss_iou": 0.451171875, "loss_num": 0.0634765625, "loss_xval": 0.31640625, "num_input_tokens_seen": 26807844, "step": 297 }, { "epoch": 1.3701149425287356, "grad_norm": 4.705748915237839, "learning_rate": 5e-06, "loss": 0.2168, "num_input_tokens_seen": 26898224, "step": 298 }, { "epoch": 1.3701149425287356, "loss": 0.24118977785110474, "loss_ce": 0.00010090330033563077, "loss_iou": 0.4296875, "loss_num": 0.048095703125, "loss_xval": 0.2412109375, "num_input_tokens_seen": 26898224, "step": 298 }, { "epoch": 1.374712643678161, "grad_norm": 25.22660122284678, "learning_rate": 5e-06, "loss": 0.2173, "num_input_tokens_seen": 26988588, "step": 299 }, { "epoch": 1.374712643678161, "loss": 0.18158341944217682, "loss_ce": 6.486591883003712e-05, "loss_iou": 0.455078125, "loss_num": 0.036376953125, "loss_xval": 0.181640625, "num_input_tokens_seen": 26988588, "step": 299 }, { "epoch": 1.3793103448275863, "grad_norm": 8.272745007832095, "learning_rate": 5e-06, "loss": 0.229, "num_input_tokens_seen": 27078844, "step": 300 }, { "epoch": 1.3793103448275863, "loss": 0.19429120421409607, "loss_ce": 0.00010784986079670489, "loss_iou": 0.4140625, "loss_num": 0.038818359375, "loss_xval": 0.1943359375, "num_input_tokens_seen": 27078844, "step": 300 }, { "epoch": 1.3839080459770114, "grad_norm": 11.158840002018794, "learning_rate": 5e-06, "loss": 0.1863, "num_input_tokens_seen": 27169132, "step": 301 }, { "epoch": 1.3839080459770114, "loss": 0.24286043643951416, "loss_ce": 6.258698704186827e-05, "loss_iou": 0.443359375, "loss_num": 0.048583984375, "loss_xval": 0.2431640625, "num_input_tokens_seen": 27169132, "step": 301 }, { "epoch": 1.3885057471264368, "grad_norm": 11.626395028754814, "learning_rate": 5e-06, "loss": 0.1938, "num_input_tokens_seen": 27259360, "step": 302 }, { "epoch": 1.3885057471264368, "loss": 0.22401869297027588, "loss_ce": 0.0002638171426951885, "loss_iou": 0.412109375, "loss_num": 0.044677734375, "loss_xval": 0.2236328125, "num_input_tokens_seen": 27259360, "step": 302 }, { "epoch": 1.3931034482758622, "grad_norm": 8.347578373209931, "learning_rate": 5e-06, "loss": 0.2332, "num_input_tokens_seen": 27349756, "step": 303 }, { "epoch": 1.3931034482758622, "loss": 0.24985189735889435, "loss_ce": 0.00015708088176324964, "loss_iou": 0.421875, "loss_num": 0.050048828125, "loss_xval": 0.25, "num_input_tokens_seen": 27349756, "step": 303 }, { "epoch": 1.3977011494252873, "grad_norm": 9.382990132866077, "learning_rate": 5e-06, "loss": 0.2427, "num_input_tokens_seen": 27440160, "step": 304 }, { "epoch": 1.3977011494252873, "loss": 0.23031748831272125, "loss_ce": 0.002442737342789769, "loss_iou": 0.390625, "loss_num": 0.045654296875, "loss_xval": 0.2275390625, "num_input_tokens_seen": 27440160, "step": 304 }, { "epoch": 1.4022988505747127, "grad_norm": 13.016846770254826, "learning_rate": 5e-06, "loss": 0.2354, "num_input_tokens_seen": 27530484, "step": 305 }, { "epoch": 1.4022988505747127, "loss": 0.2382659912109375, "loss_ce": 0.0012664890382438898, "loss_iou": 0.384765625, "loss_num": 0.04736328125, "loss_xval": 0.2373046875, "num_input_tokens_seen": 27530484, "step": 305 }, { "epoch": 1.4068965517241379, "grad_norm": 13.820143768808174, "learning_rate": 5e-06, "loss": 0.1966, "num_input_tokens_seen": 27620888, "step": 306 }, { "epoch": 1.4068965517241379, "loss": 0.1968221664428711, "loss_ce": 0.00025843450566753745, "loss_iou": 0.455078125, "loss_num": 0.039306640625, "loss_xval": 0.1962890625, "num_input_tokens_seen": 27620888, "step": 306 }, { "epoch": 1.4114942528735632, "grad_norm": 3.5475816109452896, "learning_rate": 5e-06, "loss": 0.1937, "num_input_tokens_seen": 27711272, "step": 307 }, { "epoch": 1.4114942528735632, "loss": 0.1471329778432846, "loss_ce": 0.00022135992185212672, "loss_iou": 0.46875, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 27711272, "step": 307 }, { "epoch": 1.4160919540229884, "grad_norm": 10.22489016272008, "learning_rate": 5e-06, "loss": 0.2147, "num_input_tokens_seen": 27801544, "step": 308 }, { "epoch": 1.4160919540229884, "loss": 0.19165240228176117, "loss_ce": 0.008699517697095871, "loss_iou": 0.47265625, "loss_num": 0.03662109375, "loss_xval": 0.1826171875, "num_input_tokens_seen": 27801544, "step": 308 }, { "epoch": 1.4206896551724137, "grad_norm": 9.134110088295508, "learning_rate": 5e-06, "loss": 0.2337, "num_input_tokens_seen": 27891884, "step": 309 }, { "epoch": 1.4206896551724137, "loss": 0.2654152512550354, "loss_ce": 0.005771718919277191, "loss_iou": 0.451171875, "loss_num": 0.052001953125, "loss_xval": 0.259765625, "num_input_tokens_seen": 27891884, "step": 309 }, { "epoch": 1.4252873563218391, "grad_norm": 10.161788829631169, "learning_rate": 5e-06, "loss": 0.2401, "num_input_tokens_seen": 27982204, "step": 310 }, { "epoch": 1.4252873563218391, "loss": 0.2308727651834488, "loss_ce": 0.003730440977960825, "loss_iou": 0.4296875, "loss_num": 0.04541015625, "loss_xval": 0.2275390625, "num_input_tokens_seen": 27982204, "step": 310 }, { "epoch": 1.4298850574712643, "grad_norm": 8.186672197144189, "learning_rate": 5e-06, "loss": 0.1798, "num_input_tokens_seen": 28072556, "step": 311 }, { "epoch": 1.4298850574712643, "loss": 0.19206635653972626, "loss_ce": 0.0002023405977524817, "loss_iou": 0.435546875, "loss_num": 0.038330078125, "loss_xval": 0.19140625, "num_input_tokens_seen": 28072556, "step": 311 }, { "epoch": 1.4344827586206896, "grad_norm": 20.204462885292834, "learning_rate": 5e-06, "loss": 0.1739, "num_input_tokens_seen": 28162972, "step": 312 }, { "epoch": 1.4344827586206896, "loss": 0.15177220106124878, "loss_ce": 9.983143536373973e-05, "loss_iou": 0.353515625, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 28162972, "step": 312 }, { "epoch": 1.439080459770115, "grad_norm": 10.349632633496206, "learning_rate": 5e-06, "loss": 0.2239, "num_input_tokens_seen": 28252592, "step": 313 }, { "epoch": 1.439080459770115, "loss": 0.23639342188835144, "loss_ce": 0.0029949769377708435, "loss_iou": 0.45703125, "loss_num": 0.046630859375, "loss_xval": 0.2333984375, "num_input_tokens_seen": 28252592, "step": 313 }, { "epoch": 1.4436781609195402, "grad_norm": 4.037411009142002, "learning_rate": 5e-06, "loss": 0.2111, "num_input_tokens_seen": 28342884, "step": 314 }, { "epoch": 1.4436781609195402, "loss": 0.14021849632263184, "loss_ce": 0.00011228647781535983, "loss_iou": 0.431640625, "loss_num": 0.028076171875, "loss_xval": 0.1396484375, "num_input_tokens_seen": 28342884, "step": 314 }, { "epoch": 1.4482758620689655, "grad_norm": 10.393150532292086, "learning_rate": 5e-06, "loss": 0.118, "num_input_tokens_seen": 28433376, "step": 315 }, { "epoch": 1.4482758620689655, "loss": 0.12029355764389038, "loss_ce": 0.00014585554890800267, "loss_iou": 0.48046875, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 28433376, "step": 315 }, { "epoch": 1.452873563218391, "grad_norm": 3.824936350612978, "learning_rate": 5e-06, "loss": 0.2174, "num_input_tokens_seen": 28523648, "step": 316 }, { "epoch": 1.452873563218391, "loss": 0.24938887357711792, "loss_ce": 0.0014030258171260357, "loss_iou": 0.484375, "loss_num": 0.049560546875, "loss_xval": 0.248046875, "num_input_tokens_seen": 28523648, "step": 316 }, { "epoch": 1.457471264367816, "grad_norm": 11.988045018065144, "learning_rate": 5e-06, "loss": 0.1848, "num_input_tokens_seen": 28613988, "step": 317 }, { "epoch": 1.457471264367816, "loss": 0.17838840186595917, "loss_ce": 0.009259981103241444, "loss_iou": 0.515625, "loss_num": 0.03369140625, "loss_xval": 0.1689453125, "num_input_tokens_seen": 28613988, "step": 317 }, { "epoch": 1.4620689655172414, "grad_norm": 4.034511511486509, "learning_rate": 5e-06, "loss": 0.1713, "num_input_tokens_seen": 28704372, "step": 318 }, { "epoch": 1.4620689655172414, "loss": 0.1520136594772339, "loss_ce": 0.0004633643548004329, "loss_iou": 0.384765625, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 28704372, "step": 318 }, { "epoch": 1.4666666666666668, "grad_norm": 15.615735006728443, "learning_rate": 5e-06, "loss": 0.2133, "num_input_tokens_seen": 28794788, "step": 319 }, { "epoch": 1.4666666666666668, "loss": 0.23856469988822937, "loss_ce": 0.00043603702215477824, "loss_iou": 0.34375, "loss_num": 0.047607421875, "loss_xval": 0.23828125, "num_input_tokens_seen": 28794788, "step": 319 }, { "epoch": 1.471264367816092, "grad_norm": 22.79226873920817, "learning_rate": 5e-06, "loss": 0.2221, "num_input_tokens_seen": 28885240, "step": 320 }, { "epoch": 1.471264367816092, "loss": 0.1695161610841751, "loss_ce": 5.20510075148195e-05, "loss_iou": 0.396484375, "loss_num": 0.033935546875, "loss_xval": 0.169921875, "num_input_tokens_seen": 28885240, "step": 320 }, { "epoch": 1.4758620689655173, "grad_norm": 12.555178366315296, "learning_rate": 5e-06, "loss": 0.2411, "num_input_tokens_seen": 28975712, "step": 321 }, { "epoch": 1.4758620689655173, "loss": 0.24558743834495544, "loss_ce": 0.00040922165499068797, "loss_iou": 0.419921875, "loss_num": 0.049072265625, "loss_xval": 0.2451171875, "num_input_tokens_seen": 28975712, "step": 321 }, { "epoch": 1.4804597701149425, "grad_norm": 6.643673356109838, "learning_rate": 5e-06, "loss": 0.2199, "num_input_tokens_seen": 29066048, "step": 322 }, { "epoch": 1.4804597701149425, "loss": 0.18256893754005432, "loss_ce": 0.001660746755078435, "loss_iou": 0.35546875, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 29066048, "step": 322 }, { "epoch": 1.4850574712643678, "grad_norm": 5.317104474065933, "learning_rate": 5e-06, "loss": 0.1692, "num_input_tokens_seen": 29156544, "step": 323 }, { "epoch": 1.4850574712643678, "loss": 0.19987276196479797, "loss_ce": 0.00025728094624355435, "loss_iou": 0.45703125, "loss_num": 0.039794921875, "loss_xval": 0.19921875, "num_input_tokens_seen": 29156544, "step": 323 }, { "epoch": 1.489655172413793, "grad_norm": 12.208298681282333, "learning_rate": 5e-06, "loss": 0.1693, "num_input_tokens_seen": 29246832, "step": 324 }, { "epoch": 1.489655172413793, "loss": 0.15619094669818878, "loss_ce": 3.249588917242363e-05, "loss_iou": 0.48828125, "loss_num": 0.03125, "loss_xval": 0.15625, "num_input_tokens_seen": 29246832, "step": 324 }, { "epoch": 1.4942528735632183, "grad_norm": 14.551980500080187, "learning_rate": 5e-06, "loss": 0.2611, "num_input_tokens_seen": 29337088, "step": 325 }, { "epoch": 1.4942528735632183, "loss": 0.348785400390625, "loss_ce": 0.0024414421059191227, "loss_iou": 0.419921875, "loss_num": 0.0693359375, "loss_xval": 0.345703125, "num_input_tokens_seen": 29337088, "step": 325 }, { "epoch": 1.4988505747126437, "grad_norm": 11.379206303993785, "learning_rate": 5e-06, "loss": 0.1849, "num_input_tokens_seen": 29427352, "step": 326 }, { "epoch": 1.4988505747126437, "loss": 0.22499850392341614, "loss_ce": 0.0005417080246843398, "loss_iou": 0.435546875, "loss_num": 0.044921875, "loss_xval": 0.224609375, "num_input_tokens_seen": 29427352, "step": 326 }, { "epoch": 1.5034482758620689, "grad_norm": 90.0439810100679, "learning_rate": 5e-06, "loss": 0.2161, "num_input_tokens_seen": 29517724, "step": 327 }, { "epoch": 1.5034482758620689, "loss": 0.18729592859745026, "loss_ce": 0.0005893875495530665, "loss_iou": 0.478515625, "loss_num": 0.037353515625, "loss_xval": 0.1865234375, "num_input_tokens_seen": 29517724, "step": 327 }, { "epoch": 1.5080459770114942, "grad_norm": 19.33964708457954, "learning_rate": 5e-06, "loss": 0.2298, "num_input_tokens_seen": 29608140, "step": 328 }, { "epoch": 1.5080459770114942, "loss": 0.24188189208507538, "loss_ce": 0.0005641538882628083, "loss_iou": 0.44921875, "loss_num": 0.048095703125, "loss_xval": 0.2412109375, "num_input_tokens_seen": 29608140, "step": 328 }, { "epoch": 1.5126436781609196, "grad_norm": 6.833812883588884, "learning_rate": 5e-06, "loss": 0.2319, "num_input_tokens_seen": 29697696, "step": 329 }, { "epoch": 1.5126436781609196, "loss": 0.2876734733581543, "loss_ce": 0.0008998108096420765, "loss_iou": 0.353515625, "loss_num": 0.057373046875, "loss_xval": 0.287109375, "num_input_tokens_seen": 29697696, "step": 329 }, { "epoch": 1.5172413793103448, "grad_norm": 10.08968474441476, "learning_rate": 5e-06, "loss": 0.1793, "num_input_tokens_seen": 29787960, "step": 330 }, { "epoch": 1.5172413793103448, "loss": 0.15798088908195496, "loss_ce": 0.00012871227227151394, "loss_iou": 0.474609375, "loss_num": 0.031494140625, "loss_xval": 0.158203125, "num_input_tokens_seen": 29787960, "step": 330 }, { "epoch": 1.5218390804597701, "grad_norm": 10.48777960826385, "learning_rate": 5e-06, "loss": 0.2303, "num_input_tokens_seen": 29878288, "step": 331 }, { "epoch": 1.5218390804597701, "loss": 0.27822285890579224, "loss_ce": 0.00014670401287730783, "loss_iou": 0.42578125, "loss_num": 0.0556640625, "loss_xval": 0.27734375, "num_input_tokens_seen": 29878288, "step": 331 }, { "epoch": 1.5264367816091955, "grad_norm": 15.669568573881612, "learning_rate": 5e-06, "loss": 0.1935, "num_input_tokens_seen": 29968616, "step": 332 }, { "epoch": 1.5264367816091955, "loss": 0.1327584981918335, "loss_ce": 0.00019014105782844126, "loss_iou": 0.3984375, "loss_num": 0.0264892578125, "loss_xval": 0.1328125, "num_input_tokens_seen": 29968616, "step": 332 }, { "epoch": 1.5310344827586206, "grad_norm": 10.895228295518322, "learning_rate": 5e-06, "loss": 0.1781, "num_input_tokens_seen": 30058964, "step": 333 }, { "epoch": 1.5310344827586206, "loss": 0.13389872014522552, "loss_ce": 0.00044535251799970865, "loss_iou": 0.4453125, "loss_num": 0.0267333984375, "loss_xval": 0.1337890625, "num_input_tokens_seen": 30058964, "step": 333 }, { "epoch": 1.535632183908046, "grad_norm": 12.555340714374447, "learning_rate": 5e-06, "loss": 0.2026, "num_input_tokens_seen": 30149276, "step": 334 }, { "epoch": 1.535632183908046, "loss": 0.22426463663578033, "loss_ce": 0.0006928644143044949, "loss_iou": 0.390625, "loss_num": 0.044677734375, "loss_xval": 0.2236328125, "num_input_tokens_seen": 30149276, "step": 334 }, { "epoch": 1.5402298850574714, "grad_norm": 6.653750036819891, "learning_rate": 5e-06, "loss": 0.1895, "num_input_tokens_seen": 30239660, "step": 335 }, { "epoch": 1.5402298850574714, "loss": 0.2286517322063446, "loss_ce": 0.009657589718699455, "loss_iou": 0.47265625, "loss_num": 0.0439453125, "loss_xval": 0.21875, "num_input_tokens_seen": 30239660, "step": 335 }, { "epoch": 1.5448275862068965, "grad_norm": 7.196935162615249, "learning_rate": 5e-06, "loss": 0.2105, "num_input_tokens_seen": 30328584, "step": 336 }, { "epoch": 1.5448275862068965, "loss": 0.2146240770816803, "loss_ce": 0.00045170937664806843, "loss_iou": 0.34375, "loss_num": 0.042724609375, "loss_xval": 0.2138671875, "num_input_tokens_seen": 30328584, "step": 336 }, { "epoch": 1.5494252873563217, "grad_norm": 15.439783012231938, "learning_rate": 5e-06, "loss": 0.2042, "num_input_tokens_seen": 30418984, "step": 337 }, { "epoch": 1.5494252873563217, "loss": 0.2664060592651367, "loss_ce": 0.000842072709929198, "loss_iou": 0.46484375, "loss_num": 0.052978515625, "loss_xval": 0.265625, "num_input_tokens_seen": 30418984, "step": 337 }, { "epoch": 1.5540229885057473, "grad_norm": 9.972362426319057, "learning_rate": 5e-06, "loss": 0.1914, "num_input_tokens_seen": 30509544, "step": 338 }, { "epoch": 1.5540229885057473, "loss": 0.20261165499687195, "loss_ce": 0.0002190732047893107, "loss_iou": 0.443359375, "loss_num": 0.04052734375, "loss_xval": 0.2021484375, "num_input_tokens_seen": 30509544, "step": 338 }, { "epoch": 1.5586206896551724, "grad_norm": 15.96705640498333, "learning_rate": 5e-06, "loss": 0.1748, "num_input_tokens_seen": 30599924, "step": 339 }, { "epoch": 1.5586206896551724, "loss": 0.14146339893341064, "loss_ce": 0.0009604630176909268, "loss_iou": 0.46484375, "loss_num": 0.028076171875, "loss_xval": 0.140625, "num_input_tokens_seen": 30599924, "step": 339 }, { "epoch": 1.5632183908045976, "grad_norm": 24.02833413826726, "learning_rate": 5e-06, "loss": 0.1581, "num_input_tokens_seen": 30690300, "step": 340 }, { "epoch": 1.5632183908045976, "loss": 0.14789816737174988, "loss_ce": 0.0007423943607136607, "loss_iou": 0.451171875, "loss_num": 0.0294189453125, "loss_xval": 0.1474609375, "num_input_tokens_seen": 30690300, "step": 340 }, { "epoch": 1.567816091954023, "grad_norm": 11.132936405519033, "learning_rate": 5e-06, "loss": 0.1858, "num_input_tokens_seen": 30780564, "step": 341 }, { "epoch": 1.567816091954023, "loss": 0.18376675248146057, "loss_ce": 0.00038663047598674893, "loss_iou": 0.455078125, "loss_num": 0.03662109375, "loss_xval": 0.18359375, "num_input_tokens_seen": 30780564, "step": 341 }, { "epoch": 1.5724137931034483, "grad_norm": 11.301299582007728, "learning_rate": 5e-06, "loss": 0.1446, "num_input_tokens_seen": 30870936, "step": 342 }, { "epoch": 1.5724137931034483, "loss": 0.15917402505874634, "loss_ce": 0.00011640776210697368, "loss_iou": 0.51953125, "loss_num": 0.03173828125, "loss_xval": 0.1591796875, "num_input_tokens_seen": 30870936, "step": 342 }, { "epoch": 1.5770114942528735, "grad_norm": 4.3022352764349225, "learning_rate": 5e-06, "loss": 0.1434, "num_input_tokens_seen": 30961368, "step": 343 }, { "epoch": 1.5770114942528735, "loss": 0.13939973711967468, "loss_ce": 0.0004837117448914796, "loss_iou": 0.4765625, "loss_num": 0.02783203125, "loss_xval": 0.138671875, "num_input_tokens_seen": 30961368, "step": 343 }, { "epoch": 1.5816091954022988, "grad_norm": 21.124622330389396, "learning_rate": 5e-06, "loss": 0.1911, "num_input_tokens_seen": 31051716, "step": 344 }, { "epoch": 1.5816091954022988, "loss": 0.2080269455909729, "loss_ce": 0.00011068060121033341, "loss_iou": 0.412109375, "loss_num": 0.04150390625, "loss_xval": 0.2080078125, "num_input_tokens_seen": 31051716, "step": 344 }, { "epoch": 1.5862068965517242, "grad_norm": 6.603147217700077, "learning_rate": 5e-06, "loss": 0.2052, "num_input_tokens_seen": 31142164, "step": 345 }, { "epoch": 1.5862068965517242, "loss": 0.19146263599395752, "loss_ce": 5.637338836095296e-05, "loss_iou": 0.359375, "loss_num": 0.038330078125, "loss_xval": 0.19140625, "num_input_tokens_seen": 31142164, "step": 345 }, { "epoch": 1.5908045977011493, "grad_norm": 18.413342940258104, "learning_rate": 5e-06, "loss": 0.1885, "num_input_tokens_seen": 31232568, "step": 346 }, { "epoch": 1.5908045977011493, "loss": 0.21431368589401245, "loss_ce": 8.028043521335348e-05, "loss_iou": 0.4296875, "loss_num": 0.04296875, "loss_xval": 0.2138671875, "num_input_tokens_seen": 31232568, "step": 346 }, { "epoch": 1.5954022988505747, "grad_norm": 3.750494996396292, "learning_rate": 5e-06, "loss": 0.2055, "num_input_tokens_seen": 31322940, "step": 347 }, { "epoch": 1.5954022988505747, "loss": 0.2030225545167923, "loss_ce": 0.00014168729830998927, "loss_iou": 0.470703125, "loss_num": 0.04052734375, "loss_xval": 0.203125, "num_input_tokens_seen": 31322940, "step": 347 }, { "epoch": 1.6, "grad_norm": 19.608744946070097, "learning_rate": 5e-06, "loss": 0.2093, "num_input_tokens_seen": 31412484, "step": 348 }, { "epoch": 1.6, "loss": 0.21284618973731995, "loss_ce": 0.00016918416076805443, "loss_iou": 0.4375, "loss_num": 0.04248046875, "loss_xval": 0.212890625, "num_input_tokens_seen": 31412484, "step": 348 }, { "epoch": 1.6045977011494252, "grad_norm": 16.926454810756088, "learning_rate": 5e-06, "loss": 0.1911, "num_input_tokens_seen": 31502796, "step": 349 }, { "epoch": 1.6045977011494252, "loss": 0.236657053232193, "loss_ce": 0.002068430185317993, "loss_iou": 0.3828125, "loss_num": 0.046875, "loss_xval": 0.234375, "num_input_tokens_seen": 31502796, "step": 349 }, { "epoch": 1.6091954022988506, "grad_norm": 8.440280678490312, "learning_rate": 5e-06, "loss": 0.2148, "num_input_tokens_seen": 31593140, "step": 350 }, { "epoch": 1.6091954022988506, "loss": 0.1520470678806305, "loss_ce": 3.901964009855874e-05, "loss_iou": 0.41796875, "loss_num": 0.0303955078125, "loss_xval": 0.15234375, "num_input_tokens_seen": 31593140, "step": 350 }, { "epoch": 1.613793103448276, "grad_norm": 12.531015324402237, "learning_rate": 5e-06, "loss": 0.2053, "num_input_tokens_seen": 31683560, "step": 351 }, { "epoch": 1.613793103448276, "loss": 0.28503406047821045, "loss_ce": 0.00012196854368085042, "loss_iou": 0.515625, "loss_num": 0.056884765625, "loss_xval": 0.28515625, "num_input_tokens_seen": 31683560, "step": 351 }, { "epoch": 1.6183908045977011, "grad_norm": 13.56199610824294, "learning_rate": 5e-06, "loss": 0.2573, "num_input_tokens_seen": 31774020, "step": 352 }, { "epoch": 1.6183908045977011, "loss": 0.2630327045917511, "loss_ce": 0.0005510285845957696, "loss_iou": 0.39453125, "loss_num": 0.052490234375, "loss_xval": 0.26171875, "num_input_tokens_seen": 31774020, "step": 352 }, { "epoch": 1.6229885057471263, "grad_norm": 16.539672672228185, "learning_rate": 5e-06, "loss": 0.218, "num_input_tokens_seen": 31864284, "step": 353 }, { "epoch": 1.6229885057471263, "loss": 0.2166557013988495, "loss_ce": 0.004497497342526913, "loss_iou": 0.4140625, "loss_num": 0.04248046875, "loss_xval": 0.2119140625, "num_input_tokens_seen": 31864284, "step": 353 }, { "epoch": 1.6275862068965519, "grad_norm": 6.7039057056478555, "learning_rate": 5e-06, "loss": 0.1477, "num_input_tokens_seen": 31954736, "step": 354 }, { "epoch": 1.6275862068965519, "loss": 0.12781822681427002, "loss_ce": 0.00013267630129121244, "loss_iou": 0.421875, "loss_num": 0.0255126953125, "loss_xval": 0.1279296875, "num_input_tokens_seen": 31954736, "step": 354 }, { "epoch": 1.632183908045977, "grad_norm": 24.22876257944078, "learning_rate": 5e-06, "loss": 0.1899, "num_input_tokens_seen": 32045092, "step": 355 }, { "epoch": 1.632183908045977, "loss": 0.21440809965133667, "loss_ce": 5.264465289656073e-05, "loss_iou": 0.439453125, "loss_num": 0.04296875, "loss_xval": 0.21484375, "num_input_tokens_seen": 32045092, "step": 355 }, { "epoch": 1.6367816091954022, "grad_norm": 6.350667267782482, "learning_rate": 5e-06, "loss": 0.1784, "num_input_tokens_seen": 32135436, "step": 356 }, { "epoch": 1.6367816091954022, "loss": 0.20722705125808716, "loss_ce": 0.005749986041337252, "loss_iou": 0.39453125, "loss_num": 0.040283203125, "loss_xval": 0.201171875, "num_input_tokens_seen": 32135436, "step": 356 }, { "epoch": 1.6413793103448275, "grad_norm": 24.037632779184456, "learning_rate": 5e-06, "loss": 0.2292, "num_input_tokens_seen": 32225760, "step": 357 }, { "epoch": 1.6413793103448275, "loss": 0.1975146234035492, "loss_ce": 0.0004473668523132801, "loss_iou": 0.546875, "loss_num": 0.039306640625, "loss_xval": 0.197265625, "num_input_tokens_seen": 32225760, "step": 357 }, { "epoch": 1.645977011494253, "grad_norm": 10.97286648357755, "learning_rate": 5e-06, "loss": 0.2958, "num_input_tokens_seen": 32314628, "step": 358 }, { "epoch": 1.645977011494253, "loss": 0.30159226059913635, "loss_ce": 7.8596654930152e-05, "loss_iou": 0.322265625, "loss_num": 0.060302734375, "loss_xval": 0.30078125, "num_input_tokens_seen": 32314628, "step": 358 }, { "epoch": 1.650574712643678, "grad_norm": 14.188317923001321, "learning_rate": 5e-06, "loss": 0.1884, "num_input_tokens_seen": 32405028, "step": 359 }, { "epoch": 1.650574712643678, "loss": 0.1701948344707489, "loss_ce": 0.0005018344381824136, "loss_iou": 0.46875, "loss_num": 0.033935546875, "loss_xval": 0.169921875, "num_input_tokens_seen": 32405028, "step": 359 }, { "epoch": 1.6551724137931034, "grad_norm": 15.866397059455883, "learning_rate": 5e-06, "loss": 0.2404, "num_input_tokens_seen": 32495460, "step": 360 }, { "epoch": 1.6551724137931034, "loss": 0.21389362215995789, "loss_ce": 0.00789997074753046, "loss_iou": 0.447265625, "loss_num": 0.041259765625, "loss_xval": 0.2060546875, "num_input_tokens_seen": 32495460, "step": 360 }, { "epoch": 1.6597701149425288, "grad_norm": 22.63781592648334, "learning_rate": 5e-06, "loss": 0.1604, "num_input_tokens_seen": 32585812, "step": 361 }, { "epoch": 1.6597701149425288, "loss": 0.1410699486732483, "loss_ce": 0.0007196052465587854, "loss_iou": 0.37890625, "loss_num": 0.028076171875, "loss_xval": 0.140625, "num_input_tokens_seen": 32585812, "step": 361 }, { "epoch": 1.664367816091954, "grad_norm": 24.104025272898923, "learning_rate": 5e-06, "loss": 0.1888, "num_input_tokens_seen": 32676080, "step": 362 }, { "epoch": 1.664367816091954, "loss": 0.18728001415729523, "loss_ce": 2.41522429860197e-05, "loss_iou": 0.478515625, "loss_num": 0.037353515625, "loss_xval": 0.1875, "num_input_tokens_seen": 32676080, "step": 362 }, { "epoch": 1.6689655172413793, "grad_norm": 4.251795484895364, "learning_rate": 5e-06, "loss": 0.2331, "num_input_tokens_seen": 32766524, "step": 363 }, { "epoch": 1.6689655172413793, "loss": 0.26753073930740356, "loss_ce": 0.00013572408352047205, "loss_iou": 0.408203125, "loss_num": 0.053466796875, "loss_xval": 0.267578125, "num_input_tokens_seen": 32766524, "step": 363 }, { "epoch": 1.6735632183908047, "grad_norm": 16.06020227141594, "learning_rate": 5e-06, "loss": 0.2075, "num_input_tokens_seen": 32856880, "step": 364 }, { "epoch": 1.6735632183908047, "loss": 0.20815755426883698, "loss_ce": 8.871030149748549e-05, "loss_iou": 0.392578125, "loss_num": 0.041748046875, "loss_xval": 0.2080078125, "num_input_tokens_seen": 32856880, "step": 364 }, { "epoch": 1.6781609195402298, "grad_norm": 4.038583152340203, "learning_rate": 5e-06, "loss": 0.199, "num_input_tokens_seen": 32947228, "step": 365 }, { "epoch": 1.6781609195402298, "loss": 0.1920887678861618, "loss_ce": 0.00013320505968295038, "loss_iou": 0.4765625, "loss_num": 0.038330078125, "loss_xval": 0.1923828125, "num_input_tokens_seen": 32947228, "step": 365 }, { "epoch": 1.6827586206896552, "grad_norm": 10.849275070130034, "learning_rate": 5e-06, "loss": 0.2501, "num_input_tokens_seen": 33037680, "step": 366 }, { "epoch": 1.6827586206896552, "loss": 0.28570684790611267, "loss_ce": 0.00018438987899571657, "loss_iou": 0.37109375, "loss_num": 0.05712890625, "loss_xval": 0.28515625, "num_input_tokens_seen": 33037680, "step": 366 }, { "epoch": 1.6873563218390806, "grad_norm": 15.71736414697125, "learning_rate": 5e-06, "loss": 0.2526, "num_input_tokens_seen": 33127980, "step": 367 }, { "epoch": 1.6873563218390806, "loss": 0.19320005178451538, "loss_ce": 0.00048154895193874836, "loss_iou": 0.4296875, "loss_num": 0.03857421875, "loss_xval": 0.1923828125, "num_input_tokens_seen": 33127980, "step": 367 }, { "epoch": 1.6919540229885057, "grad_norm": 11.7136055813051, "learning_rate": 5e-06, "loss": 0.214, "num_input_tokens_seen": 33218308, "step": 368 }, { "epoch": 1.6919540229885057, "loss": 0.21528667211532593, "loss_ce": 0.003677786560729146, "loss_iou": 0.48046875, "loss_num": 0.04248046875, "loss_xval": 0.2119140625, "num_input_tokens_seen": 33218308, "step": 368 }, { "epoch": 1.6965517241379309, "grad_norm": 9.03276554803375, "learning_rate": 5e-06, "loss": 0.1567, "num_input_tokens_seen": 33308680, "step": 369 }, { "epoch": 1.6965517241379309, "loss": 0.20362484455108643, "loss_ce": 0.00010311185906175524, "loss_iou": 0.44140625, "loss_num": 0.040771484375, "loss_xval": 0.203125, "num_input_tokens_seen": 33308680, "step": 369 }, { "epoch": 1.7011494252873565, "grad_norm": 4.661137477640627, "learning_rate": 5e-06, "loss": 0.2119, "num_input_tokens_seen": 33399264, "step": 370 }, { "epoch": 1.7011494252873565, "loss": 0.13792553544044495, "loss_ce": 0.00016917982429731637, "loss_iou": 0.439453125, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 33399264, "step": 370 }, { "epoch": 1.7057471264367816, "grad_norm": 3.2251449715057787, "learning_rate": 5e-06, "loss": 0.1771, "num_input_tokens_seen": 33489564, "step": 371 }, { "epoch": 1.7057471264367816, "loss": 0.14613063633441925, "loss_ce": 0.00024134977138601243, "loss_iou": 0.3984375, "loss_num": 0.0291748046875, "loss_xval": 0.1455078125, "num_input_tokens_seen": 33489564, "step": 371 }, { "epoch": 1.7103448275862068, "grad_norm": 8.964171049100557, "learning_rate": 5e-06, "loss": 0.2087, "num_input_tokens_seen": 33579112, "step": 372 }, { "epoch": 1.7103448275862068, "loss": 0.22618785500526428, "loss_ce": 0.006644395180046558, "loss_iou": 0.578125, "loss_num": 0.0439453125, "loss_xval": 0.2197265625, "num_input_tokens_seen": 33579112, "step": 372 }, { "epoch": 1.7149425287356321, "grad_norm": 5.141600835893525, "learning_rate": 5e-06, "loss": 0.1954, "num_input_tokens_seen": 33669472, "step": 373 }, { "epoch": 1.7149425287356321, "loss": 0.24420268833637238, "loss_ce": 0.0002451670588925481, "loss_iou": 0.37890625, "loss_num": 0.048828125, "loss_xval": 0.244140625, "num_input_tokens_seen": 33669472, "step": 373 }, { "epoch": 1.7195402298850575, "grad_norm": 8.029154993860924, "learning_rate": 5e-06, "loss": 0.2167, "num_input_tokens_seen": 33759812, "step": 374 }, { "epoch": 1.7195402298850575, "loss": 0.21689020097255707, "loss_ce": 0.005037169903516769, "loss_iou": 0.40234375, "loss_num": 0.04248046875, "loss_xval": 0.2119140625, "num_input_tokens_seen": 33759812, "step": 374 }, { "epoch": 1.7241379310344827, "grad_norm": 8.857319520624422, "learning_rate": 5e-06, "loss": 0.2094, "num_input_tokens_seen": 33848696, "step": 375 }, { "epoch": 1.7241379310344827, "loss": 0.24398654699325562, "loss_ce": 5.9540048823691905e-05, "loss_iou": 0.388671875, "loss_num": 0.048828125, "loss_xval": 0.244140625, "num_input_tokens_seen": 33848696, "step": 375 }, { "epoch": 1.728735632183908, "grad_norm": 29.84409998795184, "learning_rate": 5e-06, "loss": 0.1964, "num_input_tokens_seen": 33939052, "step": 376 }, { "epoch": 1.728735632183908, "loss": 0.2298891395330429, "loss_ce": 3.074748383369297e-05, "loss_iou": 0.41796875, "loss_num": 0.0458984375, "loss_xval": 0.2294921875, "num_input_tokens_seen": 33939052, "step": 376 }, { "epoch": 1.7333333333333334, "grad_norm": 16.908297420855593, "learning_rate": 5e-06, "loss": 0.1799, "num_input_tokens_seen": 34029428, "step": 377 }, { "epoch": 1.7333333333333334, "loss": 0.17225658893585205, "loss_ce": 0.00032055945484898984, "loss_iou": 0.486328125, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 34029428, "step": 377 }, { "epoch": 1.7379310344827585, "grad_norm": 5.924418877591673, "learning_rate": 5e-06, "loss": 0.1529, "num_input_tokens_seen": 34119072, "step": 378 }, { "epoch": 1.7379310344827585, "loss": 0.1289907693862915, "loss_ce": 0.00023710176174063236, "loss_iou": 0.43359375, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 34119072, "step": 378 }, { "epoch": 1.742528735632184, "grad_norm": 4.126805999853441, "learning_rate": 5e-06, "loss": 0.216, "num_input_tokens_seen": 34209448, "step": 379 }, { "epoch": 1.742528735632184, "loss": 0.21730422973632812, "loss_ce": 0.00011062939302064478, "loss_iou": 0.5625, "loss_num": 0.04345703125, "loss_xval": 0.216796875, "num_input_tokens_seen": 34209448, "step": 379 }, { "epoch": 1.7471264367816093, "grad_norm": 6.583943856738861, "learning_rate": 5e-06, "loss": 0.2227, "num_input_tokens_seen": 34299756, "step": 380 }, { "epoch": 1.7471264367816093, "loss": 0.23842403292655945, "loss_ce": 8.174288086593151e-05, "loss_iou": 0.416015625, "loss_num": 0.047607421875, "loss_xval": 0.23828125, "num_input_tokens_seen": 34299756, "step": 380 }, { "epoch": 1.7517241379310344, "grad_norm": 6.121020487954506, "learning_rate": 5e-06, "loss": 0.1894, "num_input_tokens_seen": 34390068, "step": 381 }, { "epoch": 1.7517241379310344, "loss": 0.2400810569524765, "loss_ce": 0.00012134698044974357, "loss_iou": 0.365234375, "loss_num": 0.048095703125, "loss_xval": 0.240234375, "num_input_tokens_seen": 34390068, "step": 381 }, { "epoch": 1.7563218390804598, "grad_norm": 14.275086476453184, "learning_rate": 5e-06, "loss": 0.1368, "num_input_tokens_seen": 34480520, "step": 382 }, { "epoch": 1.7563218390804598, "loss": 0.08874941617250443, "loss_ce": 9.585011866874993e-05, "loss_iou": 0.333984375, "loss_num": 0.0177001953125, "loss_xval": 0.0888671875, "num_input_tokens_seen": 34480520, "step": 382 }, { "epoch": 1.7609195402298852, "grad_norm": 10.514767544788414, "learning_rate": 5e-06, "loss": 0.1745, "num_input_tokens_seen": 34570948, "step": 383 }, { "epoch": 1.7609195402298852, "loss": 0.1786903589963913, "loss_ce": 0.001032268744893372, "loss_iou": 0.4921875, "loss_num": 0.035400390625, "loss_xval": 0.177734375, "num_input_tokens_seen": 34570948, "step": 383 }, { "epoch": 1.7655172413793103, "grad_norm": 61.44054500522531, "learning_rate": 5e-06, "loss": 0.1866, "num_input_tokens_seen": 34661360, "step": 384 }, { "epoch": 1.7655172413793103, "loss": 0.2136577069759369, "loss_ce": 0.0018352140905335546, "loss_iou": 0.3203125, "loss_num": 0.04248046875, "loss_xval": 0.2119140625, "num_input_tokens_seen": 34661360, "step": 384 }, { "epoch": 1.7701149425287355, "grad_norm": 20.341892049512285, "learning_rate": 5e-06, "loss": 0.2173, "num_input_tokens_seen": 34751736, "step": 385 }, { "epoch": 1.7701149425287355, "loss": 0.29879891872406006, "loss_ce": 0.00033700710628181696, "loss_iou": 0.42578125, "loss_num": 0.0595703125, "loss_xval": 0.298828125, "num_input_tokens_seen": 34751736, "step": 385 }, { "epoch": 1.774712643678161, "grad_norm": 10.713533548556878, "learning_rate": 5e-06, "loss": 0.2004, "num_input_tokens_seen": 34841944, "step": 386 }, { "epoch": 1.774712643678161, "loss": 0.2071501910686493, "loss_ce": 0.00011893494956893846, "loss_iou": 0.421875, "loss_num": 0.041259765625, "loss_xval": 0.20703125, "num_input_tokens_seen": 34841944, "step": 386 }, { "epoch": 1.7793103448275862, "grad_norm": 6.168349303523133, "learning_rate": 5e-06, "loss": 0.1741, "num_input_tokens_seen": 34932320, "step": 387 }, { "epoch": 1.7793103448275862, "loss": 0.18104185163974762, "loss_ce": 7.260293932631612e-05, "loss_iou": 0.46484375, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 34932320, "step": 387 }, { "epoch": 1.7839080459770114, "grad_norm": 10.622510358830738, "learning_rate": 5e-06, "loss": 0.2128, "num_input_tokens_seen": 35022772, "step": 388 }, { "epoch": 1.7839080459770114, "loss": 0.18907198309898376, "loss_ce": 0.00041230578790418804, "loss_iou": 0.42578125, "loss_num": 0.037841796875, "loss_xval": 0.1884765625, "num_input_tokens_seen": 35022772, "step": 388 }, { "epoch": 1.7885057471264367, "grad_norm": 11.951897075599353, "learning_rate": 5e-06, "loss": 0.1647, "num_input_tokens_seen": 35113260, "step": 389 }, { "epoch": 1.7885057471264367, "loss": 0.16620348393917084, "loss_ce": 0.0003404413000680506, "loss_iou": 0.357421875, "loss_num": 0.033203125, "loss_xval": 0.166015625, "num_input_tokens_seen": 35113260, "step": 389 }, { "epoch": 1.793103448275862, "grad_norm": 4.828903526280168, "learning_rate": 5e-06, "loss": 0.2304, "num_input_tokens_seen": 35203688, "step": 390 }, { "epoch": 1.793103448275862, "loss": 0.29129573702812195, "loss_ce": 0.0006463380996137857, "loss_iou": 0.4921875, "loss_num": 0.05810546875, "loss_xval": 0.291015625, "num_input_tokens_seen": 35203688, "step": 390 }, { "epoch": 1.7977011494252872, "grad_norm": 6.198908160148479, "learning_rate": 5e-06, "loss": 0.1866, "num_input_tokens_seen": 35294020, "step": 391 }, { "epoch": 1.7977011494252872, "loss": 0.18983140587806702, "loss_ce": 0.00046984368236735463, "loss_iou": 0.435546875, "loss_num": 0.037841796875, "loss_xval": 0.189453125, "num_input_tokens_seen": 35294020, "step": 391 }, { "epoch": 1.8022988505747126, "grad_norm": 11.14648078385914, "learning_rate": 5e-06, "loss": 0.1362, "num_input_tokens_seen": 35384488, "step": 392 }, { "epoch": 1.8022988505747126, "loss": 0.1650119572877884, "loss_ce": 0.0010715241078287363, "loss_iou": 0.439453125, "loss_num": 0.03271484375, "loss_xval": 0.1640625, "num_input_tokens_seen": 35384488, "step": 392 }, { "epoch": 1.806896551724138, "grad_norm": 18.559559905574613, "learning_rate": 5e-06, "loss": 0.1779, "num_input_tokens_seen": 35474708, "step": 393 }, { "epoch": 1.806896551724138, "loss": 0.19601336121559143, "loss_ce": 0.000517760228831321, "loss_iou": 0.4609375, "loss_num": 0.0390625, "loss_xval": 0.1953125, "num_input_tokens_seen": 35474708, "step": 393 }, { "epoch": 1.8114942528735631, "grad_norm": 5.146339340993819, "learning_rate": 5e-06, "loss": 0.1527, "num_input_tokens_seen": 35564984, "step": 394 }, { "epoch": 1.8114942528735631, "loss": 0.1250748336315155, "loss_ce": 9.00992818060331e-05, "loss_iou": 0.416015625, "loss_num": 0.0250244140625, "loss_xval": 0.125, "num_input_tokens_seen": 35564984, "step": 394 }, { "epoch": 1.8160919540229885, "grad_norm": 13.805918822000008, "learning_rate": 5e-06, "loss": 0.2097, "num_input_tokens_seen": 35655304, "step": 395 }, { "epoch": 1.8160919540229885, "loss": 0.1994381844997406, "loss_ce": 6.685496191494167e-05, "loss_iou": 0.43359375, "loss_num": 0.039794921875, "loss_xval": 0.19921875, "num_input_tokens_seen": 35655304, "step": 395 }, { "epoch": 1.8206896551724139, "grad_norm": 17.32882624388584, "learning_rate": 5e-06, "loss": 0.2631, "num_input_tokens_seen": 35744700, "step": 396 }, { "epoch": 1.8206896551724139, "loss": 0.3162878155708313, "loss_ce": 0.006351289339363575, "loss_iou": 0.3671875, "loss_num": 0.06201171875, "loss_xval": 0.310546875, "num_input_tokens_seen": 35744700, "step": 396 }, { "epoch": 1.825287356321839, "grad_norm": 26.732334449078184, "learning_rate": 5e-06, "loss": 0.2161, "num_input_tokens_seen": 35835144, "step": 397 }, { "epoch": 1.825287356321839, "loss": 0.2433795928955078, "loss_ce": 0.0004596640937961638, "loss_iou": 0.408203125, "loss_num": 0.048583984375, "loss_xval": 0.2431640625, "num_input_tokens_seen": 35835144, "step": 397 }, { "epoch": 1.8298850574712644, "grad_norm": 5.47457435472519, "learning_rate": 5e-06, "loss": 0.1831, "num_input_tokens_seen": 35925388, "step": 398 }, { "epoch": 1.8298850574712644, "loss": 0.1931520402431488, "loss_ce": 0.00018938624998554587, "loss_iou": 0.4296875, "loss_num": 0.03857421875, "loss_xval": 0.193359375, "num_input_tokens_seen": 35925388, "step": 398 }, { "epoch": 1.8344827586206898, "grad_norm": 6.840227842822447, "learning_rate": 5e-06, "loss": 0.1699, "num_input_tokens_seen": 36015004, "step": 399 }, { "epoch": 1.8344827586206898, "loss": 0.142633855342865, "loss_ce": 2.5207998987752944e-05, "loss_iou": 0.46484375, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 36015004, "step": 399 }, { "epoch": 1.839080459770115, "grad_norm": 8.746845766431045, "learning_rate": 5e-06, "loss": 0.1852, "num_input_tokens_seen": 36105580, "step": 400 }, { "epoch": 1.839080459770115, "loss": 0.15033434331417084, "loss_ce": 0.000493036350235343, "loss_iou": 0.443359375, "loss_num": 0.030029296875, "loss_xval": 0.1494140625, "num_input_tokens_seen": 36105580, "step": 400 }, { "epoch": 1.84367816091954, "grad_norm": 12.05362200433303, "learning_rate": 5e-06, "loss": 0.1043, "num_input_tokens_seen": 36196032, "step": 401 }, { "epoch": 1.84367816091954, "loss": 0.10613197833299637, "loss_ce": 0.00014442511019296944, "loss_iou": 0.4609375, "loss_num": 0.021240234375, "loss_xval": 0.10595703125, "num_input_tokens_seen": 36196032, "step": 401 }, { "epoch": 1.8482758620689657, "grad_norm": 12.871135990489064, "learning_rate": 5e-06, "loss": 0.1111, "num_input_tokens_seen": 36286396, "step": 402 }, { "epoch": 1.8482758620689657, "loss": 0.1379774510860443, "loss_ce": 3.800613194471225e-05, "loss_iou": 0.373046875, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 36286396, "step": 402 }, { "epoch": 1.8528735632183908, "grad_norm": 14.529023016496748, "learning_rate": 5e-06, "loss": 0.193, "num_input_tokens_seen": 36376740, "step": 403 }, { "epoch": 1.8528735632183908, "loss": 0.23211072385311127, "loss_ce": 0.003534059040248394, "loss_iou": 0.455078125, "loss_num": 0.0458984375, "loss_xval": 0.228515625, "num_input_tokens_seen": 36376740, "step": 403 }, { "epoch": 1.857471264367816, "grad_norm": 21.924185342651707, "learning_rate": 5e-06, "loss": 0.1735, "num_input_tokens_seen": 36466984, "step": 404 }, { "epoch": 1.857471264367816, "loss": 0.15595406293869019, "loss_ce": 0.00013130568549968302, "loss_iou": 0.470703125, "loss_num": 0.0311279296875, "loss_xval": 0.15625, "num_input_tokens_seen": 36466984, "step": 404 }, { "epoch": 1.8620689655172413, "grad_norm": 13.574410791488862, "learning_rate": 5e-06, "loss": 0.1739, "num_input_tokens_seen": 36557356, "step": 405 }, { "epoch": 1.8620689655172413, "loss": 0.17355313897132874, "loss_ce": 3.0175322535797022e-05, "loss_iou": 0.478515625, "loss_num": 0.03466796875, "loss_xval": 0.173828125, "num_input_tokens_seen": 36557356, "step": 405 }, { "epoch": 1.8666666666666667, "grad_norm": 7.0386023660360335, "learning_rate": 5e-06, "loss": 0.2389, "num_input_tokens_seen": 36647736, "step": 406 }, { "epoch": 1.8666666666666667, "loss": 0.25896725058555603, "loss_ce": 0.0009106049546971917, "loss_iou": 0.404296875, "loss_num": 0.0517578125, "loss_xval": 0.2578125, "num_input_tokens_seen": 36647736, "step": 406 }, { "epoch": 1.8712643678160918, "grad_norm": 22.160625349053383, "learning_rate": 5e-06, "loss": 0.122, "num_input_tokens_seen": 36738104, "step": 407 }, { "epoch": 1.8712643678160918, "loss": 0.1394781917333603, "loss_ce": 7.390179962385446e-05, "loss_iou": 0.474609375, "loss_num": 0.02783203125, "loss_xval": 0.1396484375, "num_input_tokens_seen": 36738104, "step": 407 }, { "epoch": 1.8758620689655172, "grad_norm": 17.35116305455773, "learning_rate": 5e-06, "loss": 0.2099, "num_input_tokens_seen": 36828500, "step": 408 }, { "epoch": 1.8758620689655172, "loss": 0.1852053552865982, "loss_ce": 0.00032986659789457917, "loss_iou": 0.380859375, "loss_num": 0.037109375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 36828500, "step": 408 }, { "epoch": 1.8804597701149426, "grad_norm": 28.24772029390849, "learning_rate": 5e-06, "loss": 0.2476, "num_input_tokens_seen": 36918796, "step": 409 }, { "epoch": 1.8804597701149426, "loss": 0.2876579761505127, "loss_ce": 0.00016715031233616173, "loss_iou": 0.388671875, "loss_num": 0.0576171875, "loss_xval": 0.287109375, "num_input_tokens_seen": 36918796, "step": 409 }, { "epoch": 1.8850574712643677, "grad_norm": 11.025181667958632, "learning_rate": 5e-06, "loss": 0.1974, "num_input_tokens_seen": 37008328, "step": 410 }, { "epoch": 1.8850574712643677, "loss": 0.19846147298812866, "loss_ce": 0.0004329003859311342, "loss_iou": 0.40625, "loss_num": 0.03955078125, "loss_xval": 0.1982421875, "num_input_tokens_seen": 37008328, "step": 410 }, { "epoch": 1.889655172413793, "grad_norm": 9.446092449238963, "learning_rate": 5e-06, "loss": 0.2362, "num_input_tokens_seen": 37098728, "step": 411 }, { "epoch": 1.889655172413793, "loss": 0.2067907452583313, "loss_ce": 0.00021725523401983082, "loss_iou": 0.453125, "loss_num": 0.041259765625, "loss_xval": 0.20703125, "num_input_tokens_seen": 37098728, "step": 411 }, { "epoch": 1.8942528735632185, "grad_norm": 5.183189456998249, "learning_rate": 5e-06, "loss": 0.159, "num_input_tokens_seen": 37189052, "step": 412 }, { "epoch": 1.8942528735632185, "loss": 0.13279348611831665, "loss_ce": 4.201898264000192e-05, "loss_iou": 0.4453125, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 37189052, "step": 412 }, { "epoch": 1.8988505747126436, "grad_norm": 9.650000103120663, "learning_rate": 5e-06, "loss": 0.1621, "num_input_tokens_seen": 37279516, "step": 413 }, { "epoch": 1.8988505747126436, "loss": 0.16115236282348633, "loss_ce": 8.058699313551188e-05, "loss_iou": 0.421875, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 37279516, "step": 413 }, { "epoch": 1.903448275862069, "grad_norm": 4.0534506430610495, "learning_rate": 5e-06, "loss": 0.2039, "num_input_tokens_seen": 37369960, "step": 414 }, { "epoch": 1.903448275862069, "loss": 0.19549965858459473, "loss_ce": 0.0015299279475584626, "loss_iou": 0.50390625, "loss_num": 0.038818359375, "loss_xval": 0.1943359375, "num_input_tokens_seen": 37369960, "step": 414 }, { "epoch": 1.9080459770114944, "grad_norm": 10.593561048071034, "learning_rate": 5e-06, "loss": 0.1738, "num_input_tokens_seen": 37460424, "step": 415 }, { "epoch": 1.9080459770114944, "loss": 0.16938260197639465, "loss_ce": 0.00025418028235435486, "loss_iou": 0.419921875, "loss_num": 0.033935546875, "loss_xval": 0.1689453125, "num_input_tokens_seen": 37460424, "step": 415 }, { "epoch": 1.9126436781609195, "grad_norm": 2.9328584393134003, "learning_rate": 5e-06, "loss": 0.1799, "num_input_tokens_seen": 37550816, "step": 416 }, { "epoch": 1.9126436781609195, "loss": 0.17176763713359833, "loss_ce": 4.522378003457561e-05, "loss_iou": 0.41015625, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 37550816, "step": 416 }, { "epoch": 1.9172413793103447, "grad_norm": 6.966385105625737, "learning_rate": 5e-06, "loss": 0.1852, "num_input_tokens_seen": 37640308, "step": 417 }, { "epoch": 1.9172413793103447, "loss": 0.18929776549339294, "loss_ce": 8.878001244738698e-05, "loss_iou": 0.3359375, "loss_num": 0.037841796875, "loss_xval": 0.189453125, "num_input_tokens_seen": 37640308, "step": 417 }, { "epoch": 1.9218390804597703, "grad_norm": 28.585591957284574, "learning_rate": 5e-06, "loss": 0.1666, "num_input_tokens_seen": 37730788, "step": 418 }, { "epoch": 1.9218390804597703, "loss": 0.20673710107803345, "loss_ce": 0.005260062403976917, "loss_iou": 0.373046875, "loss_num": 0.040283203125, "loss_xval": 0.201171875, "num_input_tokens_seen": 37730788, "step": 418 }, { "epoch": 1.9264367816091954, "grad_norm": 14.331276777730162, "learning_rate": 5e-06, "loss": 0.1901, "num_input_tokens_seen": 37821240, "step": 419 }, { "epoch": 1.9264367816091954, "loss": 0.17201045155525208, "loss_ce": 0.0006237286143004894, "loss_iou": 0.474609375, "loss_num": 0.0341796875, "loss_xval": 0.171875, "num_input_tokens_seen": 37821240, "step": 419 }, { "epoch": 1.9310344827586206, "grad_norm": 51.49279529680463, "learning_rate": 5e-06, "loss": 0.1523, "num_input_tokens_seen": 37911548, "step": 420 }, { "epoch": 1.9310344827586206, "loss": 0.10174712538719177, "loss_ce": 0.0005813572788611054, "loss_iou": 0.5, "loss_num": 0.020263671875, "loss_xval": 0.10107421875, "num_input_tokens_seen": 37911548, "step": 420 }, { "epoch": 1.935632183908046, "grad_norm": 20.70402147732612, "learning_rate": 5e-06, "loss": 0.1432, "num_input_tokens_seen": 38001920, "step": 421 }, { "epoch": 1.935632183908046, "loss": 0.1458110809326172, "loss_ce": 0.00012014327512588352, "loss_iou": 0.396484375, "loss_num": 0.0291748046875, "loss_xval": 0.1455078125, "num_input_tokens_seen": 38001920, "step": 421 }, { "epoch": 1.9402298850574713, "grad_norm": 15.948122093541322, "learning_rate": 5e-06, "loss": 0.2476, "num_input_tokens_seen": 38092224, "step": 422 }, { "epoch": 1.9402298850574713, "loss": 0.23001374304294586, "loss_ce": 0.00039948339690454304, "loss_iou": 0.4140625, "loss_num": 0.0458984375, "loss_xval": 0.2294921875, "num_input_tokens_seen": 38092224, "step": 422 }, { "epoch": 1.9448275862068964, "grad_norm": 4.692813126881015, "learning_rate": 5e-06, "loss": 0.19, "num_input_tokens_seen": 38181780, "step": 423 }, { "epoch": 1.9448275862068964, "loss": 0.2313835322856903, "loss_ce": 0.00016710199997760355, "loss_iou": 0.41015625, "loss_num": 0.046142578125, "loss_xval": 0.2314453125, "num_input_tokens_seen": 38181780, "step": 423 }, { "epoch": 1.9494252873563218, "grad_norm": 10.416976166382188, "learning_rate": 5e-06, "loss": 0.1808, "num_input_tokens_seen": 38272044, "step": 424 }, { "epoch": 1.9494252873563218, "loss": 0.20144467055797577, "loss_ce": 0.0002117574622388929, "loss_iou": 0.39453125, "loss_num": 0.040283203125, "loss_xval": 0.201171875, "num_input_tokens_seen": 38272044, "step": 424 }, { "epoch": 1.9540229885057472, "grad_norm": 19.032893916715878, "learning_rate": 5e-06, "loss": 0.1744, "num_input_tokens_seen": 38362384, "step": 425 }, { "epoch": 1.9540229885057472, "loss": 0.14751462638378143, "loss_ce": 0.00011472392361611128, "loss_iou": 0.302734375, "loss_num": 0.0294189453125, "loss_xval": 0.1474609375, "num_input_tokens_seen": 38362384, "step": 425 }, { "epoch": 1.9586206896551723, "grad_norm": 4.116116262537347, "learning_rate": 5e-06, "loss": 0.1736, "num_input_tokens_seen": 38452876, "step": 426 }, { "epoch": 1.9586206896551723, "loss": 0.20857056975364685, "loss_ce": 0.00031861409661360085, "loss_iou": 0.375, "loss_num": 0.04150390625, "loss_xval": 0.2080078125, "num_input_tokens_seen": 38452876, "step": 426 }, { "epoch": 1.9632183908045977, "grad_norm": 12.769621257575608, "learning_rate": 5e-06, "loss": 0.2186, "num_input_tokens_seen": 38543180, "step": 427 }, { "epoch": 1.9632183908045977, "loss": 0.21470102667808533, "loss_ce": 0.0008338369661942124, "loss_iou": 0.42578125, "loss_num": 0.042724609375, "loss_xval": 0.2138671875, "num_input_tokens_seen": 38543180, "step": 427 }, { "epoch": 1.967816091954023, "grad_norm": 4.963161851563267, "learning_rate": 5e-06, "loss": 0.1961, "num_input_tokens_seen": 38633460, "step": 428 }, { "epoch": 1.967816091954023, "loss": 0.26360833644866943, "loss_ce": 0.00018057512352243066, "loss_iou": 0.4140625, "loss_num": 0.052734375, "loss_xval": 0.263671875, "num_input_tokens_seen": 38633460, "step": 428 }, { "epoch": 1.9724137931034482, "grad_norm": 9.041750000087218, "learning_rate": 5e-06, "loss": 0.1099, "num_input_tokens_seen": 38723828, "step": 429 }, { "epoch": 1.9724137931034482, "loss": 0.12721416354179382, "loss_ce": 7.794749399181455e-05, "loss_iou": 0.39453125, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 38723828, "step": 429 }, { "epoch": 1.9770114942528736, "grad_norm": 4.689071794501053, "learning_rate": 5e-06, "loss": 0.1893, "num_input_tokens_seen": 38814248, "step": 430 }, { "epoch": 1.9770114942528736, "loss": 0.1737414002418518, "loss_ce": 0.0002489687467459589, "loss_iou": 0.458984375, "loss_num": 0.03466796875, "loss_xval": 0.173828125, "num_input_tokens_seen": 38814248, "step": 430 }, { "epoch": 1.981609195402299, "grad_norm": 27.154301658955358, "learning_rate": 5e-06, "loss": 0.1834, "num_input_tokens_seen": 38904564, "step": 431 }, { "epoch": 1.981609195402299, "loss": 0.1887126863002777, "loss_ce": 5.3021052735857666e-05, "loss_iou": 0.375, "loss_num": 0.03759765625, "loss_xval": 0.1884765625, "num_input_tokens_seen": 38904564, "step": 431 }, { "epoch": 1.986206896551724, "grad_norm": 15.753748795396127, "learning_rate": 5e-06, "loss": 0.2578, "num_input_tokens_seen": 38993440, "step": 432 }, { "epoch": 1.986206896551724, "loss": 0.17184701561927795, "loss_ce": 0.00027720603975467384, "loss_iou": 0.357421875, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 38993440, "step": 432 }, { "epoch": 1.9908045977011493, "grad_norm": 16.878556629349514, "learning_rate": 5e-06, "loss": 0.1694, "num_input_tokens_seen": 39083816, "step": 433 }, { "epoch": 1.9908045977011493, "loss": 0.18325263261795044, "loss_ce": 2.5101442588493228e-05, "loss_iou": 0.40234375, "loss_num": 0.03662109375, "loss_xval": 0.18359375, "num_input_tokens_seen": 39083816, "step": 433 }, { "epoch": 1.9954022988505749, "grad_norm": 12.650814063184473, "learning_rate": 5e-06, "loss": 0.1683, "num_input_tokens_seen": 39174208, "step": 434 }, { "epoch": 1.9954022988505749, "loss": 0.2204468846321106, "loss_ce": 0.000476180954137817, "loss_iou": 0.46875, "loss_num": 0.0439453125, "loss_xval": 0.2197265625, "num_input_tokens_seen": 39174208, "step": 434 }, { "epoch": 2.0, "grad_norm": 10.19888475335077, "learning_rate": 5e-06, "loss": 0.2245, "num_input_tokens_seen": 39264536, "step": 435 }, { "epoch": 2.0, "loss": 0.16308581829071045, "loss_ce": 0.00015246294788084924, "loss_iou": 0.333984375, "loss_num": 0.032470703125, "loss_xval": 0.1630859375, "num_input_tokens_seen": 39264536, "step": 435 }, { "epoch": 2.004597701149425, "grad_norm": 5.446086415432793, "learning_rate": 5e-06, "loss": 0.1492, "num_input_tokens_seen": 39355008, "step": 436 }, { "epoch": 2.004597701149425, "loss": 0.11079922318458557, "loss_ce": 0.0006002452573738992, "loss_iou": 0.388671875, "loss_num": 0.02197265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 39355008, "step": 436 }, { "epoch": 2.0091954022988507, "grad_norm": 11.525128525925364, "learning_rate": 5e-06, "loss": 0.1542, "num_input_tokens_seen": 39445364, "step": 437 }, { "epoch": 2.0091954022988507, "loss": 0.1240125373005867, "loss_ce": 0.0001722091546980664, "loss_iou": 0.3515625, "loss_num": 0.0247802734375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 39445364, "step": 437 }, { "epoch": 2.013793103448276, "grad_norm": 5.953661546303923, "learning_rate": 5e-06, "loss": 0.1436, "num_input_tokens_seen": 39535692, "step": 438 }, { "epoch": 2.013793103448276, "loss": 0.14717864990234375, "loss_ce": 5.339417839422822e-05, "loss_iou": 0.435546875, "loss_num": 0.0294189453125, "loss_xval": 0.1474609375, "num_input_tokens_seen": 39535692, "step": 438 }, { "epoch": 2.018390804597701, "grad_norm": 9.622272916635373, "learning_rate": 5e-06, "loss": 0.2175, "num_input_tokens_seen": 39625276, "step": 439 }, { "epoch": 2.018390804597701, "loss": 0.19843432307243347, "loss_ce": 0.00040575824095867574, "loss_iou": 0.3671875, "loss_num": 0.03955078125, "loss_xval": 0.1982421875, "num_input_tokens_seen": 39625276, "step": 439 }, { "epoch": 2.0229885057471266, "grad_norm": 11.722688812201046, "learning_rate": 5e-06, "loss": 0.1435, "num_input_tokens_seen": 39715664, "step": 440 }, { "epoch": 2.0229885057471266, "loss": 0.13606533408164978, "loss_ce": 1.7956510419026017e-05, "loss_iou": 0.45703125, "loss_num": 0.0272216796875, "loss_xval": 0.1357421875, "num_input_tokens_seen": 39715664, "step": 440 }, { "epoch": 2.027586206896552, "grad_norm": 9.734047142024144, "learning_rate": 5e-06, "loss": 0.1546, "num_input_tokens_seen": 39806120, "step": 441 }, { "epoch": 2.027586206896552, "loss": 0.15544164180755615, "loss_ce": 0.00019872028497047722, "loss_iou": 0.330078125, "loss_num": 0.031005859375, "loss_xval": 0.1552734375, "num_input_tokens_seen": 39806120, "step": 441 }, { "epoch": 2.032183908045977, "grad_norm": 4.317712136329391, "learning_rate": 5e-06, "loss": 0.1521, "num_input_tokens_seen": 39896512, "step": 442 }, { "epoch": 2.032183908045977, "loss": 0.19860993325710297, "loss_ce": 0.004579176660627127, "loss_iou": 0.390625, "loss_num": 0.038818359375, "loss_xval": 0.1943359375, "num_input_tokens_seen": 39896512, "step": 442 }, { "epoch": 2.036781609195402, "grad_norm": 20.253389515904008, "learning_rate": 5e-06, "loss": 0.1573, "num_input_tokens_seen": 39987032, "step": 443 }, { "epoch": 2.036781609195402, "loss": 0.19383695721626282, "loss_ce": 0.0006912024109624326, "loss_iou": 0.3984375, "loss_num": 0.03857421875, "loss_xval": 0.193359375, "num_input_tokens_seen": 39987032, "step": 443 }, { "epoch": 2.0413793103448277, "grad_norm": 4.97212014387009, "learning_rate": 5e-06, "loss": 0.1301, "num_input_tokens_seen": 40077476, "step": 444 }, { "epoch": 2.0413793103448277, "loss": 0.12523597478866577, "loss_ce": 0.00011389970313757658, "loss_iou": 0.427734375, "loss_num": 0.0250244140625, "loss_xval": 0.125, "num_input_tokens_seen": 40077476, "step": 444 }, { "epoch": 2.045977011494253, "grad_norm": 20.311552823682124, "learning_rate": 5e-06, "loss": 0.1806, "num_input_tokens_seen": 40168036, "step": 445 }, { "epoch": 2.045977011494253, "loss": 0.18241354823112488, "loss_ce": 0.00022360245930030942, "loss_iou": 0.3828125, "loss_num": 0.036376953125, "loss_xval": 0.1826171875, "num_input_tokens_seen": 40168036, "step": 445 }, { "epoch": 2.050574712643678, "grad_norm": 20.574323382128274, "learning_rate": 5e-06, "loss": 0.2583, "num_input_tokens_seen": 40258332, "step": 446 }, { "epoch": 2.050574712643678, "loss": 0.2653703987598419, "loss_ce": 0.002186804311349988, "loss_iou": 0.474609375, "loss_num": 0.052490234375, "loss_xval": 0.263671875, "num_input_tokens_seen": 40258332, "step": 446 }, { "epoch": 2.0551724137931036, "grad_norm": 7.1556843698299, "learning_rate": 5e-06, "loss": 0.2328, "num_input_tokens_seen": 40348728, "step": 447 }, { "epoch": 2.0551724137931036, "loss": 0.2722781300544739, "loss_ce": 0.00015285555855371058, "loss_iou": 0.384765625, "loss_num": 0.054443359375, "loss_xval": 0.271484375, "num_input_tokens_seen": 40348728, "step": 447 }, { "epoch": 2.0597701149425287, "grad_norm": 2.328436671702478, "learning_rate": 5e-06, "loss": 0.1258, "num_input_tokens_seen": 40439056, "step": 448 }, { "epoch": 2.0597701149425287, "loss": 0.1083475798368454, "loss_ce": 2.544172093621455e-05, "loss_iou": 0.4375, "loss_num": 0.021728515625, "loss_xval": 0.1083984375, "num_input_tokens_seen": 40439056, "step": 448 }, { "epoch": 2.064367816091954, "grad_norm": 7.50649292907234, "learning_rate": 5e-06, "loss": 0.1104, "num_input_tokens_seen": 40529452, "step": 449 }, { "epoch": 2.064367816091954, "loss": 0.08954399824142456, "loss_ce": 6.646315159741789e-05, "loss_iou": 0.326171875, "loss_num": 0.017822265625, "loss_xval": 0.08935546875, "num_input_tokens_seen": 40529452, "step": 449 }, { "epoch": 2.0689655172413794, "grad_norm": 10.363487196000653, "learning_rate": 5e-06, "loss": 0.155, "num_input_tokens_seen": 40619852, "step": 450 }, { "epoch": 2.0689655172413794, "loss": 0.19379128515720367, "loss_ce": 3.5169647162547335e-05, "loss_iou": 0.34765625, "loss_num": 0.038818359375, "loss_xval": 0.193359375, "num_input_tokens_seen": 40619852, "step": 450 }, { "epoch": 2.0735632183908046, "grad_norm": 8.83788070087072, "learning_rate": 5e-06, "loss": 0.1502, "num_input_tokens_seen": 40710184, "step": 451 }, { "epoch": 2.0735632183908046, "loss": 0.1824960559606552, "loss_ce": 3.146529707009904e-05, "loss_iou": 0.453125, "loss_num": 0.036376953125, "loss_xval": 0.1826171875, "num_input_tokens_seen": 40710184, "step": 451 }, { "epoch": 2.0781609195402297, "grad_norm": 22.605606537183156, "learning_rate": 5e-06, "loss": 0.2029, "num_input_tokens_seen": 40799700, "step": 452 }, { "epoch": 2.0781609195402297, "loss": 0.24570101499557495, "loss_ce": 0.00021761911921203136, "loss_iou": 0.458984375, "loss_num": 0.049072265625, "loss_xval": 0.2451171875, "num_input_tokens_seen": 40799700, "step": 452 }, { "epoch": 2.0827586206896553, "grad_norm": 8.39747671515735, "learning_rate": 5e-06, "loss": 0.1604, "num_input_tokens_seen": 40890160, "step": 453 }, { "epoch": 2.0827586206896553, "loss": 0.11327225714921951, "loss_ce": 0.00023515010252594948, "loss_iou": 0.34765625, "loss_num": 0.0225830078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 40890160, "step": 453 }, { "epoch": 2.0873563218390805, "grad_norm": 12.581825524675565, "learning_rate": 5e-06, "loss": 0.161, "num_input_tokens_seen": 40980460, "step": 454 }, { "epoch": 2.0873563218390805, "loss": 0.19595982134342194, "loss_ce": 0.0002200792368967086, "loss_iou": 0.4609375, "loss_num": 0.0390625, "loss_xval": 0.1953125, "num_input_tokens_seen": 40980460, "step": 454 }, { "epoch": 2.0919540229885056, "grad_norm": 4.810948555506714, "learning_rate": 5e-06, "loss": 0.1872, "num_input_tokens_seen": 41070800, "step": 455 }, { "epoch": 2.0919540229885056, "loss": 0.195834219455719, "loss_ce": 0.0006132656708359718, "loss_iou": 0.443359375, "loss_num": 0.0390625, "loss_xval": 0.1953125, "num_input_tokens_seen": 41070800, "step": 455 }, { "epoch": 2.0965517241379312, "grad_norm": 16.611126353595154, "learning_rate": 5e-06, "loss": 0.1576, "num_input_tokens_seen": 41161136, "step": 456 }, { "epoch": 2.0965517241379312, "loss": 0.18046724796295166, "loss_ce": 4.7336572606582195e-05, "loss_iou": 0.37890625, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 41161136, "step": 456 }, { "epoch": 2.1011494252873564, "grad_norm": 4.0448675422540346, "learning_rate": 5e-06, "loss": 0.1711, "num_input_tokens_seen": 41251520, "step": 457 }, { "epoch": 2.1011494252873564, "loss": 0.17239469289779663, "loss_ce": 0.0003060669405385852, "loss_iou": 0.44921875, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 41251520, "step": 457 }, { "epoch": 2.1057471264367815, "grad_norm": 6.408813044691335, "learning_rate": 5e-06, "loss": 0.1917, "num_input_tokens_seen": 41341836, "step": 458 }, { "epoch": 2.1057471264367815, "loss": 0.22308608889579773, "loss_ce": 0.034731604158878326, "loss_iou": 0.3515625, "loss_num": 0.03759765625, "loss_xval": 0.1884765625, "num_input_tokens_seen": 41341836, "step": 458 }, { "epoch": 2.110344827586207, "grad_norm": 13.658825889797921, "learning_rate": 5e-06, "loss": 0.2127, "num_input_tokens_seen": 41432320, "step": 459 }, { "epoch": 2.110344827586207, "loss": 0.27129611372947693, "loss_ce": 5.588199564954266e-05, "loss_iou": 0.40625, "loss_num": 0.05419921875, "loss_xval": 0.271484375, "num_input_tokens_seen": 41432320, "step": 459 }, { "epoch": 2.1149425287356323, "grad_norm": 48.461654169692885, "learning_rate": 5e-06, "loss": 0.1478, "num_input_tokens_seen": 41522668, "step": 460 }, { "epoch": 2.1149425287356323, "loss": 0.15583747625350952, "loss_ce": 0.0002588740608189255, "loss_iou": 0.42578125, "loss_num": 0.0311279296875, "loss_xval": 0.1552734375, "num_input_tokens_seen": 41522668, "step": 460 }, { "epoch": 2.1195402298850574, "grad_norm": 20.517893200082028, "learning_rate": 5e-06, "loss": 0.1576, "num_input_tokens_seen": 41613056, "step": 461 }, { "epoch": 2.1195402298850574, "loss": 0.16513849794864655, "loss_ce": 0.00016046430391725153, "loss_iou": 0.451171875, "loss_num": 0.032958984375, "loss_xval": 0.1650390625, "num_input_tokens_seen": 41613056, "step": 461 }, { "epoch": 2.1241379310344826, "grad_norm": 5.0252606370642505, "learning_rate": 5e-06, "loss": 0.1313, "num_input_tokens_seen": 41703492, "step": 462 }, { "epoch": 2.1241379310344826, "loss": 0.16197888553142548, "loss_ce": 0.00014416445628739893, "loss_iou": 0.455078125, "loss_num": 0.032470703125, "loss_xval": 0.162109375, "num_input_tokens_seen": 41703492, "step": 462 }, { "epoch": 2.128735632183908, "grad_norm": 35.99843113408324, "learning_rate": 5e-06, "loss": 0.1223, "num_input_tokens_seen": 41792988, "step": 463 }, { "epoch": 2.128735632183908, "loss": 0.11322343349456787, "loss_ce": 3.226540684408974e-06, "loss_iou": 0.423828125, "loss_num": 0.022705078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 41792988, "step": 463 }, { "epoch": 2.1333333333333333, "grad_norm": 19.56124215547498, "learning_rate": 5e-06, "loss": 0.1097, "num_input_tokens_seen": 41883320, "step": 464 }, { "epoch": 2.1333333333333333, "loss": 0.10433776676654816, "loss_ce": 1.3423067684925627e-05, "loss_iou": 0.384765625, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 41883320, "step": 464 }, { "epoch": 2.1379310344827585, "grad_norm": 5.283946883842529, "learning_rate": 5e-06, "loss": 0.1595, "num_input_tokens_seen": 41972864, "step": 465 }, { "epoch": 2.1379310344827585, "loss": 0.18815144896507263, "loss_ce": 0.00022419335437007248, "loss_iou": 0.5, "loss_num": 0.03759765625, "loss_xval": 0.1875, "num_input_tokens_seen": 41972864, "step": 465 }, { "epoch": 2.142528735632184, "grad_norm": 12.855612844319221, "learning_rate": 5e-06, "loss": 0.1953, "num_input_tokens_seen": 42063268, "step": 466 }, { "epoch": 2.142528735632184, "loss": 0.1868751049041748, "loss_ce": 4.649303446058184e-05, "loss_iou": 0.375, "loss_num": 0.037353515625, "loss_xval": 0.1865234375, "num_input_tokens_seen": 42063268, "step": 466 }, { "epoch": 2.147126436781609, "grad_norm": 9.976883452360386, "learning_rate": 5e-06, "loss": 0.1689, "num_input_tokens_seen": 42152812, "step": 467 }, { "epoch": 2.147126436781609, "loss": 0.18522751331329346, "loss_ce": 0.0004130652523599565, "loss_iou": 0.341796875, "loss_num": 0.036865234375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 42152812, "step": 467 }, { "epoch": 2.1517241379310343, "grad_norm": 16.988301795489004, "learning_rate": 5e-06, "loss": 0.1362, "num_input_tokens_seen": 42243232, "step": 468 }, { "epoch": 2.1517241379310343, "loss": 0.16275331377983093, "loss_ce": 0.0045196665450930595, "loss_iou": 0.447265625, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 42243232, "step": 468 }, { "epoch": 2.15632183908046, "grad_norm": 18.341882617271445, "learning_rate": 5e-06, "loss": 0.1953, "num_input_tokens_seen": 42333576, "step": 469 }, { "epoch": 2.15632183908046, "loss": 0.2384186089038849, "loss_ce": 0.0006256342749111354, "loss_iou": 0.36328125, "loss_num": 0.047607421875, "loss_xval": 0.23828125, "num_input_tokens_seen": 42333576, "step": 469 }, { "epoch": 2.160919540229885, "grad_norm": 23.406338359813535, "learning_rate": 5e-06, "loss": 0.1333, "num_input_tokens_seen": 42423888, "step": 470 }, { "epoch": 2.160919540229885, "loss": 0.09170129895210266, "loss_ce": 1.1235326383030042e-05, "loss_iou": 0.482421875, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 42423888, "step": 470 }, { "epoch": 2.1655172413793102, "grad_norm": 5.704299050854984, "learning_rate": 5e-06, "loss": 0.1674, "num_input_tokens_seen": 42512692, "step": 471 }, { "epoch": 2.1655172413793102, "loss": 0.1880715936422348, "loss_ce": 5.279548349790275e-05, "loss_iou": 0.287109375, "loss_num": 0.03759765625, "loss_xval": 0.1884765625, "num_input_tokens_seen": 42512692, "step": 471 }, { "epoch": 2.170114942528736, "grad_norm": 7.3341829258742095, "learning_rate": 5e-06, "loss": 0.1509, "num_input_tokens_seen": 42602996, "step": 472 }, { "epoch": 2.170114942528736, "loss": 0.13333070278167725, "loss_ce": 0.00010622564150253311, "loss_iou": 0.4375, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 42602996, "step": 472 }, { "epoch": 2.174712643678161, "grad_norm": 7.267255134108421, "learning_rate": 5e-06, "loss": 0.2403, "num_input_tokens_seen": 42693276, "step": 473 }, { "epoch": 2.174712643678161, "loss": 0.16120409965515137, "loss_ce": 4.077212361153215e-05, "loss_iou": 0.404296875, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 42693276, "step": 473 }, { "epoch": 2.179310344827586, "grad_norm": 128.39041769732546, "learning_rate": 5e-06, "loss": 0.2092, "num_input_tokens_seen": 42782804, "step": 474 }, { "epoch": 2.179310344827586, "loss": 0.18164223432540894, "loss_ce": 3.213401942048222e-05, "loss_iou": 0.33984375, "loss_num": 0.036376953125, "loss_xval": 0.181640625, "num_input_tokens_seen": 42782804, "step": 474 }, { "epoch": 2.1839080459770113, "grad_norm": 21.574370414672206, "learning_rate": 5e-06, "loss": 0.1485, "num_input_tokens_seen": 42873080, "step": 475 }, { "epoch": 2.1839080459770113, "loss": 0.14893415570259094, "loss_ce": 8.381912266486324e-06, "loss_iou": 0.416015625, "loss_num": 0.02978515625, "loss_xval": 0.1484375, "num_input_tokens_seen": 42873080, "step": 475 }, { "epoch": 2.188505747126437, "grad_norm": 4.41738665455178, "learning_rate": 5e-06, "loss": 0.2296, "num_input_tokens_seen": 42962612, "step": 476 }, { "epoch": 2.188505747126437, "loss": 0.2648059129714966, "loss_ce": 0.001317158224992454, "loss_iou": 0.2734375, "loss_num": 0.052734375, "loss_xval": 0.263671875, "num_input_tokens_seen": 42962612, "step": 476 }, { "epoch": 2.193103448275862, "grad_norm": 6.382558470057758, "learning_rate": 5e-06, "loss": 0.1859, "num_input_tokens_seen": 43052192, "step": 477 }, { "epoch": 2.193103448275862, "loss": 0.18961870670318604, "loss_ce": 4.350763629190624e-05, "loss_iou": 0.490234375, "loss_num": 0.037841796875, "loss_xval": 0.189453125, "num_input_tokens_seen": 43052192, "step": 477 }, { "epoch": 2.197701149425287, "grad_norm": 9.913316189631466, "learning_rate": 5e-06, "loss": 0.1934, "num_input_tokens_seen": 43142436, "step": 478 }, { "epoch": 2.197701149425287, "loss": 0.20609432458877563, "loss_ce": 0.0001006659513222985, "loss_iou": 0.416015625, "loss_num": 0.041259765625, "loss_xval": 0.2060546875, "num_input_tokens_seen": 43142436, "step": 478 }, { "epoch": 2.2022988505747128, "grad_norm": 3.649045492362426, "learning_rate": 5e-06, "loss": 0.1473, "num_input_tokens_seen": 43232740, "step": 479 }, { "epoch": 2.2022988505747128, "loss": 0.15660002827644348, "loss_ce": 0.00016692971985321492, "loss_iou": 0.34375, "loss_num": 0.03125, "loss_xval": 0.15625, "num_input_tokens_seen": 43232740, "step": 479 }, { "epoch": 2.206896551724138, "grad_norm": 16.113831960073153, "learning_rate": 5e-06, "loss": 0.1596, "num_input_tokens_seen": 43323116, "step": 480 }, { "epoch": 2.206896551724138, "loss": 0.14228999614715576, "loss_ce": 0.00010859394387807697, "loss_iou": 0.396484375, "loss_num": 0.0284423828125, "loss_xval": 0.142578125, "num_input_tokens_seen": 43323116, "step": 480 }, { "epoch": 2.211494252873563, "grad_norm": 13.400014157011977, "learning_rate": 5e-06, "loss": 0.2149, "num_input_tokens_seen": 43413536, "step": 481 }, { "epoch": 2.211494252873563, "loss": 0.181840181350708, "loss_ce": 0.00013852809206582606, "loss_iou": 0.337890625, "loss_num": 0.036376953125, "loss_xval": 0.181640625, "num_input_tokens_seen": 43413536, "step": 481 }, { "epoch": 2.2160919540229886, "grad_norm": 4.019097019730536, "learning_rate": 5e-06, "loss": 0.1358, "num_input_tokens_seen": 43503912, "step": 482 }, { "epoch": 2.2160919540229886, "loss": 0.11794019490480423, "loss_ce": 2.0275372662581503e-05, "loss_iou": 0.31640625, "loss_num": 0.0235595703125, "loss_xval": 0.1181640625, "num_input_tokens_seen": 43503912, "step": 482 }, { "epoch": 2.220689655172414, "grad_norm": 10.618191683361344, "learning_rate": 5e-06, "loss": 0.1288, "num_input_tokens_seen": 43594288, "step": 483 }, { "epoch": 2.220689655172414, "loss": 0.11793670058250427, "loss_ce": 9.307506843470037e-05, "loss_iou": 0.490234375, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 43594288, "step": 483 }, { "epoch": 2.225287356321839, "grad_norm": 3.8979874019779968, "learning_rate": 5e-06, "loss": 0.1525, "num_input_tokens_seen": 43684704, "step": 484 }, { "epoch": 2.225287356321839, "loss": 0.16006925702095032, "loss_ce": 0.0007064603269100189, "loss_iou": 0.404296875, "loss_num": 0.031982421875, "loss_xval": 0.1591796875, "num_input_tokens_seen": 43684704, "step": 484 }, { "epoch": 2.2298850574712645, "grad_norm": 6.611052035871228, "learning_rate": 5e-06, "loss": 0.19, "num_input_tokens_seen": 43775036, "step": 485 }, { "epoch": 2.2298850574712645, "loss": 0.1399611234664917, "loss_ce": 6.85493359924294e-05, "loss_iou": 0.369140625, "loss_num": 0.0279541015625, "loss_xval": 0.1396484375, "num_input_tokens_seen": 43775036, "step": 485 }, { "epoch": 2.2344827586206897, "grad_norm": 4.314197235501585, "learning_rate": 5e-06, "loss": 0.1728, "num_input_tokens_seen": 43865412, "step": 486 }, { "epoch": 2.2344827586206897, "loss": 0.2288285195827484, "loss_ce": 0.00016030190454330295, "loss_iou": 0.3828125, "loss_num": 0.045654296875, "loss_xval": 0.228515625, "num_input_tokens_seen": 43865412, "step": 486 }, { "epoch": 2.239080459770115, "grad_norm": 9.468032363551144, "learning_rate": 5e-06, "loss": 0.1385, "num_input_tokens_seen": 43955720, "step": 487 }, { "epoch": 2.239080459770115, "loss": 0.152041494846344, "loss_ce": 9.446687181480229e-05, "loss_iou": 0.3984375, "loss_num": 0.0303955078125, "loss_xval": 0.15234375, "num_input_tokens_seen": 43955720, "step": 487 }, { "epoch": 2.2436781609195404, "grad_norm": 13.431440995922976, "learning_rate": 5e-06, "loss": 0.145, "num_input_tokens_seen": 44046112, "step": 488 }, { "epoch": 2.2436781609195404, "loss": 0.1250520944595337, "loss_ce": 0.00011312306742183864, "loss_iou": 0.421875, "loss_num": 0.0250244140625, "loss_xval": 0.125, "num_input_tokens_seen": 44046112, "step": 488 }, { "epoch": 2.2482758620689656, "grad_norm": 3.5222867230041612, "learning_rate": 5e-06, "loss": 0.1261, "num_input_tokens_seen": 44136408, "step": 489 }, { "epoch": 2.2482758620689656, "loss": 0.10490469634532928, "loss_ce": 4.629993418348022e-05, "loss_iou": 0.443359375, "loss_num": 0.02099609375, "loss_xval": 0.10498046875, "num_input_tokens_seen": 44136408, "step": 489 }, { "epoch": 2.2528735632183907, "grad_norm": 11.11111499247831, "learning_rate": 5e-06, "loss": 0.1954, "num_input_tokens_seen": 44226780, "step": 490 }, { "epoch": 2.2528735632183907, "loss": 0.196798175573349, "loss_ce": 5.1357543270569295e-05, "loss_iou": 0.43359375, "loss_num": 0.039306640625, "loss_xval": 0.1962890625, "num_input_tokens_seen": 44226780, "step": 490 }, { "epoch": 2.2574712643678163, "grad_norm": 5.957507308754348, "learning_rate": 5e-06, "loss": 0.1419, "num_input_tokens_seen": 44317112, "step": 491 }, { "epoch": 2.2574712643678163, "loss": 0.18355637788772583, "loss_ce": 0.004021458327770233, "loss_iou": 0.44921875, "loss_num": 0.035888671875, "loss_xval": 0.1796875, "num_input_tokens_seen": 44317112, "step": 491 }, { "epoch": 2.2620689655172415, "grad_norm": 6.742726892392994, "learning_rate": 5e-06, "loss": 0.1891, "num_input_tokens_seen": 44407476, "step": 492 }, { "epoch": 2.2620689655172415, "loss": 0.1890466809272766, "loss_ce": 0.0002039193786913529, "loss_iou": 0.41015625, "loss_num": 0.037841796875, "loss_xval": 0.1884765625, "num_input_tokens_seen": 44407476, "step": 492 }, { "epoch": 2.2666666666666666, "grad_norm": 11.363198722775557, "learning_rate": 5e-06, "loss": 0.1315, "num_input_tokens_seen": 44497844, "step": 493 }, { "epoch": 2.2666666666666666, "loss": 0.15371349453926086, "loss_ce": 0.00021006805764045566, "loss_iou": 0.384765625, "loss_num": 0.0306396484375, "loss_xval": 0.1533203125, "num_input_tokens_seen": 44497844, "step": 493 }, { "epoch": 2.2712643678160918, "grad_norm": 20.995205197621296, "learning_rate": 5e-06, "loss": 0.136, "num_input_tokens_seen": 44588252, "step": 494 }, { "epoch": 2.2712643678160918, "loss": 0.14532320201396942, "loss_ce": 5.9530953876674175e-05, "loss_iou": 0.42578125, "loss_num": 0.029052734375, "loss_xval": 0.1455078125, "num_input_tokens_seen": 44588252, "step": 494 }, { "epoch": 2.2758620689655173, "grad_norm": 13.929286977818037, "learning_rate": 5e-06, "loss": 0.2026, "num_input_tokens_seen": 44678616, "step": 495 }, { "epoch": 2.2758620689655173, "loss": 0.18880240619182587, "loss_ce": 0.0003868812054861337, "loss_iou": 0.30859375, "loss_num": 0.03759765625, "loss_xval": 0.1884765625, "num_input_tokens_seen": 44678616, "step": 495 }, { "epoch": 2.2804597701149425, "grad_norm": 5.471350156799274, "learning_rate": 5e-06, "loss": 0.1187, "num_input_tokens_seen": 44769036, "step": 496 }, { "epoch": 2.2804597701149425, "loss": 0.09233726561069489, "loss_ce": 9.788307215785608e-05, "loss_iou": 0.392578125, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 44769036, "step": 496 }, { "epoch": 2.2850574712643676, "grad_norm": 3.59565500880543, "learning_rate": 5e-06, "loss": 0.1109, "num_input_tokens_seen": 44859352, "step": 497 }, { "epoch": 2.2850574712643676, "loss": 0.11752472817897797, "loss_ce": 9.308746666647494e-05, "loss_iou": 0.39453125, "loss_num": 0.0234375, "loss_xval": 0.1171875, "num_input_tokens_seen": 44859352, "step": 497 }, { "epoch": 2.2896551724137932, "grad_norm": 9.014441044097715, "learning_rate": 5e-06, "loss": 0.1408, "num_input_tokens_seen": 44949672, "step": 498 }, { "epoch": 2.2896551724137932, "loss": 0.17396917939186096, "loss_ce": 0.00014104516594670713, "loss_iou": 0.40234375, "loss_num": 0.03466796875, "loss_xval": 0.173828125, "num_input_tokens_seen": 44949672, "step": 498 }, { "epoch": 2.2942528735632184, "grad_norm": 56.84361696439731, "learning_rate": 5e-06, "loss": 0.2118, "num_input_tokens_seen": 45040108, "step": 499 }, { "epoch": 2.2942528735632184, "loss": 0.16254353523254395, "loss_ce": 0.0002510526101104915, "loss_iou": 0.337890625, "loss_num": 0.032470703125, "loss_xval": 0.162109375, "num_input_tokens_seen": 45040108, "step": 499 }, { "epoch": 2.2988505747126435, "grad_norm": 4.499065914254272, "learning_rate": 5e-06, "loss": 0.2092, "num_input_tokens_seen": 45130652, "step": 500 }, { "epoch": 2.2988505747126435, "eval_seeclick_CIoU": 0.4563731998205185, "eval_seeclick_GIoU": 0.4370184391736984, "eval_seeclick_IoU": 0.49749037623405457, "eval_seeclick_MAE_all": 0.0592306274920702, "eval_seeclick_MAE_h": 0.04618253372609615, "eval_seeclick_MAE_w": 0.10425780341029167, "eval_seeclick_MAE_x_boxes": 0.10805046185851097, "eval_seeclick_MAE_y_boxes": 0.042274054139852524, "eval_seeclick_NUM_probability": 0.9999992847442627, "eval_seeclick_inside_bbox": 0.8607954680919647, "eval_seeclick_loss": 0.3510092794895172, "eval_seeclick_loss_ce": 0.05598363280296326, "eval_seeclick_loss_iou": 0.519287109375, "eval_seeclick_loss_num": 0.06208038330078125, "eval_seeclick_loss_xval": 0.310455322265625, "eval_seeclick_runtime": 73.6206, "eval_seeclick_samples_per_second": 0.584, "eval_seeclick_steps_per_second": 0.027, "num_input_tokens_seen": 45130652, "step": 500 }, { "epoch": 2.2988505747126435, "eval_icons_CIoU": 0.5248142629861832, "eval_icons_GIoU": 0.5294373333454132, "eval_icons_IoU": 0.5619192123413086, "eval_icons_MAE_all": 0.04526199400424957, "eval_icons_MAE_h": 0.08578694611787796, "eval_icons_MAE_w": 0.06393218040466309, "eval_icons_MAE_x_boxes": 0.05692756548523903, "eval_icons_MAE_y_boxes": 0.08431091904640198, "eval_icons_NUM_probability": 0.9999994337558746, "eval_icons_inside_bbox": 0.7118055522441864, "eval_icons_loss": 0.22472724318504333, "eval_icons_loss_ce": 1.0411458504222537e-06, "eval_icons_loss_iou": 0.43853759765625, "eval_icons_loss_num": 0.049304962158203125, "eval_icons_loss_xval": 0.246337890625, "eval_icons_runtime": 97.2319, "eval_icons_samples_per_second": 0.514, "eval_icons_steps_per_second": 0.021, "num_input_tokens_seen": 45130652, "step": 500 }, { "epoch": 2.2988505747126435, "eval_screenspot_CIoU": 0.36121458808581036, "eval_screenspot_GIoU": 0.3352409948905309, "eval_screenspot_IoU": 0.4333700935045878, "eval_screenspot_MAE_all": 0.09693960969646771, "eval_screenspot_MAE_h": 0.09257866690556209, "eval_screenspot_MAE_w": 0.19348373264074326, "eval_screenspot_MAE_x_boxes": 0.18671841422716776, "eval_screenspot_MAE_y_boxes": 0.08937582621971767, "eval_screenspot_NUM_probability": 0.9999986489613851, "eval_screenspot_inside_bbox": 0.7145833373069763, "eval_screenspot_loss": 0.4828945994377136, "eval_screenspot_loss_ce": 0.0005104693118482828, "eval_screenspot_loss_iou": 0.3957926432291667, "eval_screenspot_loss_num": 0.099456787109375, "eval_screenspot_loss_xval": 0.4974365234375, "eval_screenspot_runtime": 150.5805, "eval_screenspot_samples_per_second": 0.591, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 45130652, "step": 500 }, { "epoch": 2.2988505747126435, "eval_compot_CIoU": 0.4746464341878891, "eval_compot_GIoU": 0.444294735789299, "eval_compot_IoU": 0.5337270200252533, "eval_compot_MAE_all": 0.05769064649939537, "eval_compot_MAE_h": 0.08894045650959015, "eval_compot_MAE_w": 0.10224151611328125, "eval_compot_MAE_x_boxes": 0.08807999640703201, "eval_compot_MAE_y_boxes": 0.08862848207354546, "eval_compot_NUM_probability": 0.9999935030937195, "eval_compot_inside_bbox": 0.7638888955116272, "eval_compot_loss": 0.31889885663986206, "eval_compot_loss_ce": 0.01438705949112773, "eval_compot_loss_iou": 0.5205078125, "eval_compot_loss_num": 0.05315399169921875, "eval_compot_loss_xval": 0.265472412109375, "eval_compot_runtime": 86.6902, "eval_compot_samples_per_second": 0.577, "eval_compot_steps_per_second": 0.023, "num_input_tokens_seen": 45130652, "step": 500 }, { "epoch": 2.2988505747126435, "loss": 0.19853872060775757, "loss_ce": 0.008414196781814098, "loss_iou": 0.5703125, "loss_num": 0.0380859375, "loss_xval": 0.1904296875, "num_input_tokens_seen": 45130652, "step": 500 }, { "epoch": 2.303448275862069, "grad_norm": 25.629104321788386, "learning_rate": 5e-06, "loss": 0.1748, "num_input_tokens_seen": 45221168, "step": 501 }, { "epoch": 2.303448275862069, "loss": 0.22815854847431183, "loss_ce": 0.00013120633957441896, "loss_iou": 0.3671875, "loss_num": 0.045654296875, "loss_xval": 0.228515625, "num_input_tokens_seen": 45221168, "step": 501 }, { "epoch": 2.3080459770114943, "grad_norm": 5.708666769215059, "learning_rate": 5e-06, "loss": 0.1472, "num_input_tokens_seen": 45311552, "step": 502 }, { "epoch": 2.3080459770114943, "loss": 0.14808592200279236, "loss_ce": 0.00019773890380747616, "loss_iou": 0.37109375, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 45311552, "step": 502 }, { "epoch": 2.3126436781609194, "grad_norm": 5.468912782546553, "learning_rate": 5e-06, "loss": 0.1312, "num_input_tokens_seen": 45401820, "step": 503 }, { "epoch": 2.3126436781609194, "loss": 0.17252948880195618, "loss_ce": 0.00022724125301465392, "loss_iou": 0.4375, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 45401820, "step": 503 }, { "epoch": 2.317241379310345, "grad_norm": 3.4368379842335006, "learning_rate": 5e-06, "loss": 0.1426, "num_input_tokens_seen": 45491352, "step": 504 }, { "epoch": 2.317241379310345, "loss": 0.17304499447345734, "loss_ce": 0.004771072883158922, "loss_iou": 0.3046875, "loss_num": 0.03369140625, "loss_xval": 0.16796875, "num_input_tokens_seen": 45491352, "step": 504 }, { "epoch": 2.32183908045977, "grad_norm": 7.438752813468672, "learning_rate": 5e-06, "loss": 0.1578, "num_input_tokens_seen": 45581828, "step": 505 }, { "epoch": 2.32183908045977, "loss": 0.12591499090194702, "loss_ce": 6.0488393501145765e-05, "loss_iou": 0.40625, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 45581828, "step": 505 }, { "epoch": 2.3264367816091953, "grad_norm": 2.55941882010794, "learning_rate": 5e-06, "loss": 0.1551, "num_input_tokens_seen": 45672168, "step": 506 }, { "epoch": 2.3264367816091953, "loss": 0.15659506618976593, "loss_ce": 9.366869562654756e-06, "loss_iou": 0.408203125, "loss_num": 0.03125, "loss_xval": 0.15625, "num_input_tokens_seen": 45672168, "step": 506 }, { "epoch": 2.3310344827586205, "grad_norm": 3.8697008798484838, "learning_rate": 5e-06, "loss": 0.1594, "num_input_tokens_seen": 45761816, "step": 507 }, { "epoch": 2.3310344827586205, "loss": 0.1356024444103241, "loss_ce": 0.00010439224570291117, "loss_iou": 0.42578125, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 45761816, "step": 507 }, { "epoch": 2.335632183908046, "grad_norm": 9.154165977355914, "learning_rate": 5e-06, "loss": 0.165, "num_input_tokens_seen": 45851328, "step": 508 }, { "epoch": 2.335632183908046, "loss": 0.17103195190429688, "loss_ce": 1.1456304491730407e-05, "loss_iou": 0.41015625, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 45851328, "step": 508 }, { "epoch": 2.340229885057471, "grad_norm": 6.692072817800835, "learning_rate": 5e-06, "loss": 0.1542, "num_input_tokens_seen": 45941700, "step": 509 }, { "epoch": 2.340229885057471, "loss": 0.149922713637352, "loss_ce": 0.00017295028374064714, "loss_iou": 0.38671875, "loss_num": 0.0299072265625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 45941700, "step": 509 }, { "epoch": 2.344827586206897, "grad_norm": 6.611698710045344, "learning_rate": 5e-06, "loss": 0.1692, "num_input_tokens_seen": 46032036, "step": 510 }, { "epoch": 2.344827586206897, "loss": 0.22002628445625305, "loss_ce": 2.505266820662655e-05, "loss_iou": 0.39453125, "loss_num": 0.0439453125, "loss_xval": 0.2197265625, "num_input_tokens_seen": 46032036, "step": 510 }, { "epoch": 2.349425287356322, "grad_norm": 11.300552233375646, "learning_rate": 5e-06, "loss": 0.1201, "num_input_tokens_seen": 46122476, "step": 511 }, { "epoch": 2.349425287356322, "loss": 0.1223573088645935, "loss_ce": 4.286051989765838e-05, "loss_iou": 0.4296875, "loss_num": 0.0244140625, "loss_xval": 0.1220703125, "num_input_tokens_seen": 46122476, "step": 511 }, { "epoch": 2.354022988505747, "grad_norm": 4.419461671248112, "learning_rate": 5e-06, "loss": 0.1836, "num_input_tokens_seen": 46213036, "step": 512 }, { "epoch": 2.354022988505747, "loss": 0.21517296135425568, "loss_ce": 8.506829908583313e-05, "loss_iou": 0.3984375, "loss_num": 0.04296875, "loss_xval": 0.21484375, "num_input_tokens_seen": 46213036, "step": 512 }, { "epoch": 2.3586206896551722, "grad_norm": 4.8376770471091755, "learning_rate": 5e-06, "loss": 0.1198, "num_input_tokens_seen": 46303360, "step": 513 }, { "epoch": 2.3586206896551722, "loss": 0.13146010041236877, "loss_ce": 5.1406939746811986e-05, "loss_iou": 0.345703125, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 46303360, "step": 513 }, { "epoch": 2.363218390804598, "grad_norm": 14.293011444773292, "learning_rate": 5e-06, "loss": 0.1544, "num_input_tokens_seen": 46393732, "step": 514 }, { "epoch": 2.363218390804598, "loss": 0.1853524148464203, "loss_ce": 0.000568474642932415, "loss_iou": 0.255859375, "loss_num": 0.036865234375, "loss_xval": 0.1845703125, "num_input_tokens_seen": 46393732, "step": 514 }, { "epoch": 2.367816091954023, "grad_norm": 5.9219449761184135, "learning_rate": 5e-06, "loss": 0.1525, "num_input_tokens_seen": 46484084, "step": 515 }, { "epoch": 2.367816091954023, "loss": 0.13725048303604126, "loss_ce": 0.005231434479355812, "loss_iou": 0.4140625, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 46484084, "step": 515 }, { "epoch": 2.372413793103448, "grad_norm": 11.485628452257725, "learning_rate": 5e-06, "loss": 0.1312, "num_input_tokens_seen": 46574492, "step": 516 }, { "epoch": 2.372413793103448, "loss": 0.10670308768749237, "loss_ce": 4.415482544573024e-05, "loss_iou": 0.423828125, "loss_num": 0.0213623046875, "loss_xval": 0.1064453125, "num_input_tokens_seen": 46574492, "step": 516 }, { "epoch": 2.3770114942528737, "grad_norm": 3.642033089699345, "learning_rate": 5e-06, "loss": 0.1123, "num_input_tokens_seen": 46664972, "step": 517 }, { "epoch": 2.3770114942528737, "loss": 0.11098849028348923, "loss_ce": 5.709293327527121e-05, "loss_iou": 0.318359375, "loss_num": 0.022216796875, "loss_xval": 0.11083984375, "num_input_tokens_seen": 46664972, "step": 517 }, { "epoch": 2.381609195402299, "grad_norm": 5.135666742103272, "learning_rate": 5e-06, "loss": 0.1262, "num_input_tokens_seen": 46755488, "step": 518 }, { "epoch": 2.381609195402299, "loss": 0.10365065187215805, "loss_ce": 1.295450420002453e-05, "loss_iou": 0.3671875, "loss_num": 0.020751953125, "loss_xval": 0.103515625, "num_input_tokens_seen": 46755488, "step": 518 }, { "epoch": 2.386206896551724, "grad_norm": 14.48664391569232, "learning_rate": 5e-06, "loss": 0.1674, "num_input_tokens_seen": 46845788, "step": 519 }, { "epoch": 2.386206896551724, "loss": 0.1562279760837555, "loss_ce": 3.902369280694984e-05, "loss_iou": 0.41015625, "loss_num": 0.03125, "loss_xval": 0.15625, "num_input_tokens_seen": 46845788, "step": 519 }, { "epoch": 2.3908045977011496, "grad_norm": 5.121624515541806, "learning_rate": 5e-06, "loss": 0.1334, "num_input_tokens_seen": 46936132, "step": 520 }, { "epoch": 2.3908045977011496, "loss": 0.15609031915664673, "loss_ce": 0.002129129832610488, "loss_iou": 0.400390625, "loss_num": 0.03076171875, "loss_xval": 0.154296875, "num_input_tokens_seen": 46936132, "step": 520 }, { "epoch": 2.3954022988505748, "grad_norm": 12.984930412372549, "learning_rate": 5e-06, "loss": 0.1588, "num_input_tokens_seen": 47026436, "step": 521 }, { "epoch": 2.3954022988505748, "loss": 0.13674747943878174, "loss_ce": 5.9245572629151866e-05, "loss_iou": 0.333984375, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 47026436, "step": 521 }, { "epoch": 2.4, "grad_norm": 4.283346201311201, "learning_rate": 5e-06, "loss": 0.2249, "num_input_tokens_seen": 47116864, "step": 522 }, { "epoch": 2.4, "loss": 0.1752316951751709, "loss_ce": 0.00030492368387058377, "loss_iou": 0.421875, "loss_num": 0.034912109375, "loss_xval": 0.1748046875, "num_input_tokens_seen": 47116864, "step": 522 }, { "epoch": 2.4045977011494255, "grad_norm": 5.233495198593664, "learning_rate": 5e-06, "loss": 0.1417, "num_input_tokens_seen": 47207228, "step": 523 }, { "epoch": 2.4045977011494255, "loss": 0.18661803007125854, "loss_ce": 3.356490924488753e-05, "loss_iou": 0.341796875, "loss_num": 0.037353515625, "loss_xval": 0.1865234375, "num_input_tokens_seen": 47207228, "step": 523 }, { "epoch": 2.4091954022988507, "grad_norm": 5.2220413793779175, "learning_rate": 5e-06, "loss": 0.1413, "num_input_tokens_seen": 47297444, "step": 524 }, { "epoch": 2.4091954022988507, "loss": 0.1606762409210205, "loss_ce": 6.222333468031138e-05, "loss_iou": 0.5078125, "loss_num": 0.0322265625, "loss_xval": 0.16015625, "num_input_tokens_seen": 47297444, "step": 524 }, { "epoch": 2.413793103448276, "grad_norm": 18.74625653768912, "learning_rate": 5e-06, "loss": 0.132, "num_input_tokens_seen": 47387760, "step": 525 }, { "epoch": 2.413793103448276, "loss": 0.15789783000946045, "loss_ce": 0.00012196188617963344, "loss_iou": 0.361328125, "loss_num": 0.031494140625, "loss_xval": 0.158203125, "num_input_tokens_seen": 47387760, "step": 525 }, { "epoch": 2.418390804597701, "grad_norm": 5.191217680993693, "learning_rate": 5e-06, "loss": 0.1316, "num_input_tokens_seen": 47478124, "step": 526 }, { "epoch": 2.418390804597701, "loss": 0.13596881926059723, "loss_ce": 4.352313044364564e-05, "loss_iou": 0.396484375, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 47478124, "step": 526 }, { "epoch": 2.4229885057471265, "grad_norm": 4.706845583881529, "learning_rate": 5e-06, "loss": 0.1297, "num_input_tokens_seen": 47568600, "step": 527 }, { "epoch": 2.4229885057471265, "loss": 0.15037593245506287, "loss_ce": 4.634057768271305e-05, "loss_iou": 0.369140625, "loss_num": 0.030029296875, "loss_xval": 0.150390625, "num_input_tokens_seen": 47568600, "step": 527 }, { "epoch": 2.4275862068965517, "grad_norm": 7.095455232957096, "learning_rate": 5e-06, "loss": 0.1918, "num_input_tokens_seen": 47658948, "step": 528 }, { "epoch": 2.4275862068965517, "loss": 0.25855281949043274, "loss_ce": 0.00025203393306583166, "loss_iou": 0.34375, "loss_num": 0.0517578125, "loss_xval": 0.2578125, "num_input_tokens_seen": 47658948, "step": 528 }, { "epoch": 2.432183908045977, "grad_norm": 6.0875597343514265, "learning_rate": 5e-06, "loss": 0.1973, "num_input_tokens_seen": 47749252, "step": 529 }, { "epoch": 2.432183908045977, "loss": 0.21008528769016266, "loss_ce": 0.001833330374211073, "loss_iou": 0.330078125, "loss_num": 0.04150390625, "loss_xval": 0.2080078125, "num_input_tokens_seen": 47749252, "step": 529 }, { "epoch": 2.4367816091954024, "grad_norm": 43.1300948642533, "learning_rate": 5e-06, "loss": 0.1807, "num_input_tokens_seen": 47838180, "step": 530 }, { "epoch": 2.4367816091954024, "loss": 0.1620747148990631, "loss_ce": 1.1108362741651945e-05, "loss_iou": 0.4453125, "loss_num": 0.032470703125, "loss_xval": 0.162109375, "num_input_tokens_seen": 47838180, "step": 530 }, { "epoch": 2.4413793103448276, "grad_norm": 34.04511460331946, "learning_rate": 5e-06, "loss": 0.1328, "num_input_tokens_seen": 47928576, "step": 531 }, { "epoch": 2.4413793103448276, "loss": 0.14182758331298828, "loss_ce": 4.29103929491248e-05, "loss_iou": 0.3984375, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 47928576, "step": 531 }, { "epoch": 2.4459770114942527, "grad_norm": 14.495961279987304, "learning_rate": 5e-06, "loss": 0.207, "num_input_tokens_seen": 48019012, "step": 532 }, { "epoch": 2.4459770114942527, "loss": 0.205206036567688, "loss_ce": 0.00012791430344805121, "loss_iou": 0.421875, "loss_num": 0.041015625, "loss_xval": 0.205078125, "num_input_tokens_seen": 48019012, "step": 532 }, { "epoch": 2.4505747126436783, "grad_norm": 47.008419334105845, "learning_rate": 5e-06, "loss": 0.1181, "num_input_tokens_seen": 48109540, "step": 533 }, { "epoch": 2.4505747126436783, "loss": 0.09603258222341537, "loss_ce": 8.53195961099118e-05, "loss_iou": 0.416015625, "loss_num": 0.0191650390625, "loss_xval": 0.095703125, "num_input_tokens_seen": 48109540, "step": 533 }, { "epoch": 2.4551724137931035, "grad_norm": 22.246765917362193, "learning_rate": 5e-06, "loss": 0.1418, "num_input_tokens_seen": 48200004, "step": 534 }, { "epoch": 2.4551724137931035, "loss": 0.15302670001983643, "loss_ce": 7.259696576511487e-05, "loss_iou": 0.40234375, "loss_num": 0.0306396484375, "loss_xval": 0.1533203125, "num_input_tokens_seen": 48200004, "step": 534 }, { "epoch": 2.4597701149425286, "grad_norm": 10.021203723893532, "learning_rate": 5e-06, "loss": 0.1197, "num_input_tokens_seen": 48290512, "step": 535 }, { "epoch": 2.4597701149425286, "loss": 0.11590239405632019, "loss_ce": 0.00024077783746179193, "loss_iou": 0.3984375, "loss_num": 0.023193359375, "loss_xval": 0.11572265625, "num_input_tokens_seen": 48290512, "step": 535 }, { "epoch": 2.464367816091954, "grad_norm": 11.067971491143098, "learning_rate": 5e-06, "loss": 0.1704, "num_input_tokens_seen": 48380860, "step": 536 }, { "epoch": 2.464367816091954, "loss": 0.19783857464790344, "loss_ce": 0.00023726305516902357, "loss_iou": 0.44921875, "loss_num": 0.03955078125, "loss_xval": 0.197265625, "num_input_tokens_seen": 48380860, "step": 536 }, { "epoch": 2.4689655172413794, "grad_norm": 5.653254921318319, "learning_rate": 5e-06, "loss": 0.1489, "num_input_tokens_seen": 48471104, "step": 537 }, { "epoch": 2.4689655172413794, "loss": 0.1712353229522705, "loss_ce": 6.221771764103323e-05, "loss_iou": 0.408203125, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 48471104, "step": 537 }, { "epoch": 2.4735632183908045, "grad_norm": 2.9377901589977085, "learning_rate": 5e-06, "loss": 0.1349, "num_input_tokens_seen": 48561516, "step": 538 }, { "epoch": 2.4735632183908045, "loss": 0.12568363547325134, "loss_ce": 1.2256616173544899e-05, "loss_iou": 0.396484375, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 48561516, "step": 538 }, { "epoch": 2.4781609195402297, "grad_norm": 90.38237762494742, "learning_rate": 5e-06, "loss": 0.1775, "num_input_tokens_seen": 48651772, "step": 539 }, { "epoch": 2.4781609195402297, "loss": 0.20320890843868256, "loss_ce": 8.390971197513863e-05, "loss_iou": 0.46484375, "loss_num": 0.04052734375, "loss_xval": 0.203125, "num_input_tokens_seen": 48651772, "step": 539 }, { "epoch": 2.4827586206896552, "grad_norm": 32.249804456709384, "learning_rate": 5e-06, "loss": 0.1349, "num_input_tokens_seen": 48742128, "step": 540 }, { "epoch": 2.4827586206896552, "loss": 0.13325834274291992, "loss_ce": 7.96284293755889e-05, "loss_iou": 0.451171875, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 48742128, "step": 540 }, { "epoch": 2.4873563218390804, "grad_norm": 4.3167432311394505, "learning_rate": 5e-06, "loss": 0.1451, "num_input_tokens_seen": 48832520, "step": 541 }, { "epoch": 2.4873563218390804, "loss": 0.1864563375711441, "loss_ce": 5.4961776186246425e-05, "loss_iou": 0.376953125, "loss_num": 0.037353515625, "loss_xval": 0.1865234375, "num_input_tokens_seen": 48832520, "step": 541 }, { "epoch": 2.491954022988506, "grad_norm": 9.206979571070748, "learning_rate": 5e-06, "loss": 0.1701, "num_input_tokens_seen": 48922880, "step": 542 }, { "epoch": 2.491954022988506, "loss": 0.14663755893707275, "loss_ce": 0.00021421856945380569, "loss_iou": 0.44140625, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 48922880, "step": 542 }, { "epoch": 2.496551724137931, "grad_norm": 9.250882650158083, "learning_rate": 5e-06, "loss": 0.1586, "num_input_tokens_seen": 49013144, "step": 543 }, { "epoch": 2.496551724137931, "loss": 0.1472419798374176, "loss_ce": 9.938361472450197e-06, "loss_iou": 0.412109375, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 49013144, "step": 543 }, { "epoch": 2.5011494252873563, "grad_norm": 4.218071296940112, "learning_rate": 5e-06, "loss": 0.1509, "num_input_tokens_seen": 49103612, "step": 544 }, { "epoch": 2.5011494252873563, "loss": 0.13012462854385376, "loss_ce": 0.00018077107961289585, "loss_iou": 0.45703125, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 49103612, "step": 544 }, { "epoch": 2.5057471264367814, "grad_norm": 5.7187751949461285, "learning_rate": 5e-06, "loss": 0.1346, "num_input_tokens_seen": 49194032, "step": 545 }, { "epoch": 2.5057471264367814, "loss": 0.1249690055847168, "loss_ce": 0.00015211128629744053, "loss_iou": 0.357421875, "loss_num": 0.0250244140625, "loss_xval": 0.125, "num_input_tokens_seen": 49194032, "step": 545 }, { "epoch": 2.510344827586207, "grad_norm": 4.115869504252711, "learning_rate": 5e-06, "loss": 0.127, "num_input_tokens_seen": 49284388, "step": 546 }, { "epoch": 2.510344827586207, "loss": 0.13758164644241333, "loss_ce": 6.943976768525317e-05, "loss_iou": 0.3828125, "loss_num": 0.0274658203125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 49284388, "step": 546 }, { "epoch": 2.514942528735632, "grad_norm": 9.743639046477583, "learning_rate": 5e-06, "loss": 0.1385, "num_input_tokens_seen": 49374780, "step": 547 }, { "epoch": 2.514942528735632, "loss": 0.18289527297019958, "loss_ce": 3.3943615562748164e-05, "loss_iou": 0.359375, "loss_num": 0.03662109375, "loss_xval": 0.1826171875, "num_input_tokens_seen": 49374780, "step": 547 }, { "epoch": 2.5195402298850573, "grad_norm": 10.069950958132628, "learning_rate": 5e-06, "loss": 0.1805, "num_input_tokens_seen": 49465116, "step": 548 }, { "epoch": 2.5195402298850573, "loss": 0.1874997317790985, "loss_ce": 3.024038960575126e-05, "loss_iou": 0.392578125, "loss_num": 0.03759765625, "loss_xval": 0.1875, "num_input_tokens_seen": 49465116, "step": 548 }, { "epoch": 2.524137931034483, "grad_norm": 19.870338599096762, "learning_rate": 5e-06, "loss": 0.1563, "num_input_tokens_seen": 49555324, "step": 549 }, { "epoch": 2.524137931034483, "loss": 0.1507517397403717, "loss_ce": 5.592878733295947e-05, "loss_iou": 0.408203125, "loss_num": 0.0301513671875, "loss_xval": 0.150390625, "num_input_tokens_seen": 49555324, "step": 549 }, { "epoch": 2.528735632183908, "grad_norm": 10.407984592997497, "learning_rate": 5e-06, "loss": 0.1614, "num_input_tokens_seen": 49645776, "step": 550 }, { "epoch": 2.528735632183908, "loss": 0.18005026876926422, "loss_ce": 2.7080646759714e-05, "loss_iou": 0.392578125, "loss_num": 0.035888671875, "loss_xval": 0.1796875, "num_input_tokens_seen": 49645776, "step": 550 }, { "epoch": 2.533333333333333, "grad_norm": 7.734767791315441, "learning_rate": 5e-06, "loss": 0.1349, "num_input_tokens_seen": 49736148, "step": 551 }, { "epoch": 2.533333333333333, "loss": 0.16183626651763916, "loss_ce": 0.00012362068810034543, "loss_iou": 0.392578125, "loss_num": 0.0322265625, "loss_xval": 0.162109375, "num_input_tokens_seen": 49736148, "step": 551 }, { "epoch": 2.5379310344827584, "grad_norm": 6.4853103024455505, "learning_rate": 5e-06, "loss": 0.1397, "num_input_tokens_seen": 49826532, "step": 552 }, { "epoch": 2.5379310344827584, "loss": 0.10679163783788681, "loss_ce": 7.166996510932222e-05, "loss_iou": 0.43359375, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 49826532, "step": 552 }, { "epoch": 2.542528735632184, "grad_norm": 37.47119278150993, "learning_rate": 5e-06, "loss": 0.1699, "num_input_tokens_seen": 49917020, "step": 553 }, { "epoch": 2.542528735632184, "loss": 0.1629483699798584, "loss_ce": 0.0001065782635123469, "loss_iou": 0.4375, "loss_num": 0.032470703125, "loss_xval": 0.1630859375, "num_input_tokens_seen": 49917020, "step": 553 }, { "epoch": 2.547126436781609, "grad_norm": 6.379696690058362, "learning_rate": 5e-06, "loss": 0.1496, "num_input_tokens_seen": 50007432, "step": 554 }, { "epoch": 2.547126436781609, "loss": 0.15533311665058136, "loss_ce": 4.4418171455617994e-05, "loss_iou": 0.48828125, "loss_num": 0.031005859375, "loss_xval": 0.1552734375, "num_input_tokens_seen": 50007432, "step": 554 }, { "epoch": 2.5517241379310347, "grad_norm": 4.14549282032518, "learning_rate": 5e-06, "loss": 0.151, "num_input_tokens_seen": 50097820, "step": 555 }, { "epoch": 2.5517241379310347, "loss": 0.1373203992843628, "loss_ce": 5.233901902101934e-05, "loss_iou": 0.443359375, "loss_num": 0.0274658203125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 50097820, "step": 555 }, { "epoch": 2.55632183908046, "grad_norm": 3.4318598421901934, "learning_rate": 5e-06, "loss": 0.1171, "num_input_tokens_seen": 50188248, "step": 556 }, { "epoch": 2.55632183908046, "loss": 0.13454991579055786, "loss_ce": 8.948062895797193e-05, "loss_iou": 0.443359375, "loss_num": 0.02685546875, "loss_xval": 0.134765625, "num_input_tokens_seen": 50188248, "step": 556 }, { "epoch": 2.560919540229885, "grad_norm": 3.7413568064084655, "learning_rate": 5e-06, "loss": 0.1643, "num_input_tokens_seen": 50278580, "step": 557 }, { "epoch": 2.560919540229885, "loss": 0.10955231636762619, "loss_ce": 8.576564869144931e-05, "loss_iou": 0.40625, "loss_num": 0.02197265625, "loss_xval": 0.109375, "num_input_tokens_seen": 50278580, "step": 557 }, { "epoch": 2.56551724137931, "grad_norm": 16.960520342941066, "learning_rate": 5e-06, "loss": 0.1843, "num_input_tokens_seen": 50368852, "step": 558 }, { "epoch": 2.56551724137931, "loss": 0.19905152916908264, "loss_ce": 1.5889523638179526e-05, "loss_iou": 0.349609375, "loss_num": 0.039794921875, "loss_xval": 0.19921875, "num_input_tokens_seen": 50368852, "step": 558 }, { "epoch": 2.5701149425287357, "grad_norm": 2.4502760696851618, "learning_rate": 5e-06, "loss": 0.1506, "num_input_tokens_seen": 50459208, "step": 559 }, { "epoch": 2.5701149425287357, "loss": 0.15352153778076172, "loss_ce": 0.00010967279376927763, "loss_iou": 0.451171875, "loss_num": 0.0306396484375, "loss_xval": 0.1533203125, "num_input_tokens_seen": 50459208, "step": 559 }, { "epoch": 2.574712643678161, "grad_norm": 3.003683699720249, "learning_rate": 5e-06, "loss": 0.1435, "num_input_tokens_seen": 50549548, "step": 560 }, { "epoch": 2.574712643678161, "loss": 0.14211556315422058, "loss_ce": 0.00011727242235792801, "loss_iou": 0.37890625, "loss_num": 0.0284423828125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 50549548, "step": 560 }, { "epoch": 2.5793103448275865, "grad_norm": 8.360372471761355, "learning_rate": 5e-06, "loss": 0.136, "num_input_tokens_seen": 50639992, "step": 561 }, { "epoch": 2.5793103448275865, "loss": 0.12777529656887054, "loss_ce": 4.39732575614471e-05, "loss_iou": 0.396484375, "loss_num": 0.0255126953125, "loss_xval": 0.1279296875, "num_input_tokens_seen": 50639992, "step": 561 }, { "epoch": 2.5839080459770116, "grad_norm": 17.063605502973996, "learning_rate": 5e-06, "loss": 0.124, "num_input_tokens_seen": 50730300, "step": 562 }, { "epoch": 2.5839080459770116, "loss": 0.1004580706357956, "loss_ce": 2.4719898647163063e-05, "loss_iou": 0.349609375, "loss_num": 0.02001953125, "loss_xval": 0.1005859375, "num_input_tokens_seen": 50730300, "step": 562 }, { "epoch": 2.5885057471264368, "grad_norm": 5.4210250306263585, "learning_rate": 5e-06, "loss": 0.179, "num_input_tokens_seen": 50819996, "step": 563 }, { "epoch": 2.5885057471264368, "loss": 0.18996095657348633, "loss_ce": 1.954480103449896e-05, "loss_iou": 0.44921875, "loss_num": 0.0380859375, "loss_xval": 0.189453125, "num_input_tokens_seen": 50819996, "step": 563 }, { "epoch": 2.593103448275862, "grad_norm": 7.275873200745777, "learning_rate": 5e-06, "loss": 0.1733, "num_input_tokens_seen": 50910296, "step": 564 }, { "epoch": 2.593103448275862, "loss": 0.1952449530363083, "loss_ce": 8.50430442369543e-05, "loss_iou": 0.484375, "loss_num": 0.0390625, "loss_xval": 0.1953125, "num_input_tokens_seen": 50910296, "step": 564 }, { "epoch": 2.5977011494252875, "grad_norm": 15.23898519773349, "learning_rate": 5e-06, "loss": 0.1368, "num_input_tokens_seen": 51000576, "step": 565 }, { "epoch": 2.5977011494252875, "loss": 0.1579952836036682, "loss_ce": 2.1030613424954936e-05, "loss_iou": 0.44921875, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 51000576, "step": 565 }, { "epoch": 2.6022988505747127, "grad_norm": 4.894484835323628, "learning_rate": 5e-06, "loss": 0.1135, "num_input_tokens_seen": 51090992, "step": 566 }, { "epoch": 2.6022988505747127, "loss": 0.12843066453933716, "loss_ce": 4.321142114349641e-05, "loss_iou": 0.42578125, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 51090992, "step": 566 }, { "epoch": 2.606896551724138, "grad_norm": 20.80440116305703, "learning_rate": 5e-06, "loss": 0.1439, "num_input_tokens_seen": 51181196, "step": 567 }, { "epoch": 2.606896551724138, "loss": 0.14310401678085327, "loss_ce": 3.761286643566564e-05, "loss_iou": 0.3515625, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 51181196, "step": 567 }, { "epoch": 2.6114942528735634, "grad_norm": 4.997998048110718, "learning_rate": 5e-06, "loss": 0.1867, "num_input_tokens_seen": 51271612, "step": 568 }, { "epoch": 2.6114942528735634, "loss": 0.24052223563194275, "loss_ce": 4.3717802327591926e-05, "loss_iou": 0.357421875, "loss_num": 0.048095703125, "loss_xval": 0.240234375, "num_input_tokens_seen": 51271612, "step": 568 }, { "epoch": 2.6160919540229886, "grad_norm": 14.548525343911681, "learning_rate": 5e-06, "loss": 0.1517, "num_input_tokens_seen": 51362012, "step": 569 }, { "epoch": 2.6160919540229886, "loss": 0.15177559852600098, "loss_ce": 1.1683812772389501e-05, "loss_iou": 0.34765625, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 51362012, "step": 569 }, { "epoch": 2.6206896551724137, "grad_norm": 8.538017397932672, "learning_rate": 5e-06, "loss": 0.138, "num_input_tokens_seen": 51452264, "step": 570 }, { "epoch": 2.6206896551724137, "loss": 0.16324067115783691, "loss_ce": 3.2669748179614544e-05, "loss_iou": 0.51171875, "loss_num": 0.03271484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 51452264, "step": 570 }, { "epoch": 2.625287356321839, "grad_norm": 5.458750598266798, "learning_rate": 5e-06, "loss": 0.1717, "num_input_tokens_seen": 51542688, "step": 571 }, { "epoch": 2.625287356321839, "loss": 0.1548556387424469, "loss_ce": 3.9948965422809124e-05, "loss_iou": 0.458984375, "loss_num": 0.031005859375, "loss_xval": 0.1552734375, "num_input_tokens_seen": 51542688, "step": 571 }, { "epoch": 2.6298850574712644, "grad_norm": 6.9963748084758475, "learning_rate": 5e-06, "loss": 0.1511, "num_input_tokens_seen": 51633028, "step": 572 }, { "epoch": 2.6298850574712644, "loss": 0.10907353460788727, "loss_ce": 0.00012577624875120819, "loss_iou": 0.45703125, "loss_num": 0.0218505859375, "loss_xval": 0.10888671875, "num_input_tokens_seen": 51633028, "step": 572 }, { "epoch": 2.6344827586206896, "grad_norm": 5.485383848252606, "learning_rate": 5e-06, "loss": 0.2015, "num_input_tokens_seen": 51723304, "step": 573 }, { "epoch": 2.6344827586206896, "loss": 0.20509332418441772, "loss_ce": 0.009658743627369404, "loss_iou": 0.39453125, "loss_num": 0.0390625, "loss_xval": 0.1953125, "num_input_tokens_seen": 51723304, "step": 573 }, { "epoch": 2.639080459770115, "grad_norm": 5.145735431943079, "learning_rate": 5e-06, "loss": 0.1051, "num_input_tokens_seen": 51813780, "step": 574 }, { "epoch": 2.639080459770115, "loss": 0.14565084874629974, "loss_ce": 5.1484184950822964e-05, "loss_iou": 0.40234375, "loss_num": 0.0291748046875, "loss_xval": 0.1455078125, "num_input_tokens_seen": 51813780, "step": 574 }, { "epoch": 2.6436781609195403, "grad_norm": 11.268920979351746, "learning_rate": 5e-06, "loss": 0.1035, "num_input_tokens_seen": 51904132, "step": 575 }, { "epoch": 2.6436781609195403, "loss": 0.11697958409786224, "loss_ce": 6.674315227428451e-05, "loss_iou": 0.337890625, "loss_num": 0.0233154296875, "loss_xval": 0.11669921875, "num_input_tokens_seen": 51904132, "step": 575 }, { "epoch": 2.6482758620689655, "grad_norm": 5.697746433151331, "learning_rate": 5e-06, "loss": 0.1411, "num_input_tokens_seen": 51993764, "step": 576 }, { "epoch": 2.6482758620689655, "loss": 0.12572216987609863, "loss_ce": 5.002422767574899e-06, "loss_iou": 0.431640625, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 51993764, "step": 576 }, { "epoch": 2.6528735632183906, "grad_norm": 23.221495764418574, "learning_rate": 5e-06, "loss": 0.1201, "num_input_tokens_seen": 52084092, "step": 577 }, { "epoch": 2.6528735632183906, "loss": 0.10617563128471375, "loss_ce": 3.5495380870997906e-05, "loss_iou": 0.3671875, "loss_num": 0.021240234375, "loss_xval": 0.10595703125, "num_input_tokens_seen": 52084092, "step": 577 }, { "epoch": 2.657471264367816, "grad_norm": 9.375076718437624, "learning_rate": 5e-06, "loss": 0.1349, "num_input_tokens_seen": 52174520, "step": 578 }, { "epoch": 2.657471264367816, "loss": 0.15085318684577942, "loss_ce": 3.5313376429257914e-05, "loss_iou": 0.37109375, "loss_num": 0.0301513671875, "loss_xval": 0.150390625, "num_input_tokens_seen": 52174520, "step": 578 }, { "epoch": 2.6620689655172414, "grad_norm": 14.09120273250363, "learning_rate": 5e-06, "loss": 0.1184, "num_input_tokens_seen": 52264944, "step": 579 }, { "epoch": 2.6620689655172414, "loss": 0.13439422845840454, "loss_ce": 2.5327437469968572e-05, "loss_iou": 0.42578125, "loss_num": 0.02685546875, "loss_xval": 0.134765625, "num_input_tokens_seen": 52264944, "step": 579 }, { "epoch": 2.6666666666666665, "grad_norm": 7.150633563903932, "learning_rate": 5e-06, "loss": 0.12, "num_input_tokens_seen": 52355440, "step": 580 }, { "epoch": 2.6666666666666665, "loss": 0.12812362611293793, "loss_ce": 0.0001328981015831232, "loss_iou": 0.421875, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 52355440, "step": 580 }, { "epoch": 2.671264367816092, "grad_norm": 17.68235051618041, "learning_rate": 5e-06, "loss": 0.1257, "num_input_tokens_seen": 52445736, "step": 581 }, { "epoch": 2.671264367816092, "loss": 0.11226825416088104, "loss_ce": 2.46073159360094e-05, "loss_iou": 0.3828125, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 52445736, "step": 581 }, { "epoch": 2.6758620689655173, "grad_norm": 9.382663357540876, "learning_rate": 5e-06, "loss": 0.1607, "num_input_tokens_seen": 52536112, "step": 582 }, { "epoch": 2.6758620689655173, "loss": 0.12408533692359924, "loss_ce": 0.0001839685719460249, "loss_iou": 0.388671875, "loss_num": 0.0247802734375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 52536112, "step": 582 }, { "epoch": 2.6804597701149424, "grad_norm": 4.0581740149048615, "learning_rate": 5e-06, "loss": 0.1348, "num_input_tokens_seen": 52626460, "step": 583 }, { "epoch": 2.6804597701149424, "loss": 0.1088830754160881, "loss_ce": 4.213307693134993e-05, "loss_iou": 0.375, "loss_num": 0.021728515625, "loss_xval": 0.10888671875, "num_input_tokens_seen": 52626460, "step": 583 }, { "epoch": 2.6850574712643676, "grad_norm": 21.41413357247865, "learning_rate": 5e-06, "loss": 0.1527, "num_input_tokens_seen": 52716824, "step": 584 }, { "epoch": 2.6850574712643676, "loss": 0.1613665223121643, "loss_ce": 8.111814531730488e-05, "loss_iou": 0.349609375, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 52716824, "step": 584 }, { "epoch": 2.689655172413793, "grad_norm": 4.051520688977973, "learning_rate": 5e-06, "loss": 0.1631, "num_input_tokens_seen": 52807040, "step": 585 }, { "epoch": 2.689655172413793, "loss": 0.14354471862316132, "loss_ce": 5.1069248002022505e-05, "loss_iou": 0.41015625, "loss_num": 0.0286865234375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 52807040, "step": 585 }, { "epoch": 2.6942528735632183, "grad_norm": 8.143732797882166, "learning_rate": 5e-06, "loss": 0.2078, "num_input_tokens_seen": 52897404, "step": 586 }, { "epoch": 2.6942528735632183, "loss": 0.21973484754562378, "loss_ce": 0.0002524274750612676, "loss_iou": 0.482421875, "loss_num": 0.0439453125, "loss_xval": 0.2197265625, "num_input_tokens_seen": 52897404, "step": 586 }, { "epoch": 2.698850574712644, "grad_norm": 9.812036652439687, "learning_rate": 5e-06, "loss": 0.1997, "num_input_tokens_seen": 52987688, "step": 587 }, { "epoch": 2.698850574712644, "loss": 0.2230190932750702, "loss_ce": 5.7667512010084465e-05, "loss_iou": 0.3515625, "loss_num": 0.044677734375, "loss_xval": 0.22265625, "num_input_tokens_seen": 52987688, "step": 587 }, { "epoch": 2.703448275862069, "grad_norm": 11.85150221226106, "learning_rate": 5e-06, "loss": 0.162, "num_input_tokens_seen": 53078184, "step": 588 }, { "epoch": 2.703448275862069, "loss": 0.16252008080482483, "loss_ce": 1.3969104657007847e-05, "loss_iou": 0.44921875, "loss_num": 0.032470703125, "loss_xval": 0.162109375, "num_input_tokens_seen": 53078184, "step": 588 }, { "epoch": 2.708045977011494, "grad_norm": 4.7260277066471446, "learning_rate": 5e-06, "loss": 0.1668, "num_input_tokens_seen": 53168516, "step": 589 }, { "epoch": 2.708045977011494, "loss": 0.21362201869487762, "loss_ce": 0.0016469230176880956, "loss_iou": 0.349609375, "loss_num": 0.04248046875, "loss_xval": 0.2119140625, "num_input_tokens_seen": 53168516, "step": 589 }, { "epoch": 2.7126436781609193, "grad_norm": 18.524483984567052, "learning_rate": 5e-06, "loss": 0.1301, "num_input_tokens_seen": 53258748, "step": 590 }, { "epoch": 2.7126436781609193, "loss": 0.1348852813243866, "loss_ce": 2.8092332286178134e-05, "loss_iou": 0.345703125, "loss_num": 0.0269775390625, "loss_xval": 0.134765625, "num_input_tokens_seen": 53258748, "step": 590 }, { "epoch": 2.717241379310345, "grad_norm": 9.8553207385232, "learning_rate": 5e-06, "loss": 0.216, "num_input_tokens_seen": 53349272, "step": 591 }, { "epoch": 2.717241379310345, "loss": 0.26626330614089966, "loss_ce": 8.898475061869249e-05, "loss_iou": 0.5, "loss_num": 0.05322265625, "loss_xval": 0.265625, "num_input_tokens_seen": 53349272, "step": 591 }, { "epoch": 2.72183908045977, "grad_norm": 10.070351523808545, "learning_rate": 5e-06, "loss": 0.1794, "num_input_tokens_seen": 53438892, "step": 592 }, { "epoch": 2.72183908045977, "loss": 0.17163211107254028, "loss_ce": 6.230298458831385e-05, "loss_iou": 0.48046875, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 53438892, "step": 592 }, { "epoch": 2.7264367816091957, "grad_norm": 4.7799609797973535, "learning_rate": 5e-06, "loss": 0.229, "num_input_tokens_seen": 53529204, "step": 593 }, { "epoch": 2.7264367816091957, "loss": 0.2294745147228241, "loss_ce": 0.00010438873869134113, "loss_iou": 0.33203125, "loss_num": 0.0458984375, "loss_xval": 0.2294921875, "num_input_tokens_seen": 53529204, "step": 593 }, { "epoch": 2.731034482758621, "grad_norm": 10.927919604630798, "learning_rate": 5e-06, "loss": 0.1659, "num_input_tokens_seen": 53619564, "step": 594 }, { "epoch": 2.731034482758621, "loss": 0.16697654128074646, "loss_ce": 1.487006011302583e-05, "loss_iou": 0.38671875, "loss_num": 0.033447265625, "loss_xval": 0.1669921875, "num_input_tokens_seen": 53619564, "step": 594 }, { "epoch": 2.735632183908046, "grad_norm": 14.00357432861095, "learning_rate": 5e-06, "loss": 0.1539, "num_input_tokens_seen": 53709964, "step": 595 }, { "epoch": 2.735632183908046, "loss": 0.1489291936159134, "loss_ce": 9.496298298472539e-05, "loss_iou": 0.40625, "loss_num": 0.02978515625, "loss_xval": 0.1484375, "num_input_tokens_seen": 53709964, "step": 595 }, { "epoch": 2.740229885057471, "grad_norm": 18.78198114746188, "learning_rate": 5e-06, "loss": 0.1208, "num_input_tokens_seen": 53799628, "step": 596 }, { "epoch": 2.740229885057471, "loss": 0.17056533694267273, "loss_ce": 0.006441800855100155, "loss_iou": 0.375, "loss_num": 0.032958984375, "loss_xval": 0.1640625, "num_input_tokens_seen": 53799628, "step": 596 }, { "epoch": 2.7448275862068967, "grad_norm": 10.48163904175623, "learning_rate": 5e-06, "loss": 0.157, "num_input_tokens_seen": 53889936, "step": 597 }, { "epoch": 2.7448275862068967, "loss": 0.13397589325904846, "loss_ce": 1.898076334327925e-05, "loss_iou": 0.2890625, "loss_num": 0.02685546875, "loss_xval": 0.1337890625, "num_input_tokens_seen": 53889936, "step": 597 }, { "epoch": 2.749425287356322, "grad_norm": 7.682690937441725, "learning_rate": 5e-06, "loss": 0.114, "num_input_tokens_seen": 53980312, "step": 598 }, { "epoch": 2.749425287356322, "loss": 0.12832111120224, "loss_ce": 7.098779315128922e-05, "loss_iou": 0.443359375, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 53980312, "step": 598 }, { "epoch": 2.754022988505747, "grad_norm": 5.73861918938428, "learning_rate": 5e-06, "loss": 0.1738, "num_input_tokens_seen": 54070552, "step": 599 }, { "epoch": 2.754022988505747, "loss": 0.12556979060173035, "loss_ce": 8.150518988259137e-05, "loss_iou": 0.3828125, "loss_num": 0.025146484375, "loss_xval": 0.125, "num_input_tokens_seen": 54070552, "step": 599 }, { "epoch": 2.7586206896551726, "grad_norm": 6.309494256468956, "learning_rate": 5e-06, "loss": 0.1187, "num_input_tokens_seen": 54161016, "step": 600 }, { "epoch": 2.7586206896551726, "loss": 0.08496278524398804, "loss_ce": 3.236144038964994e-05, "loss_iou": 0.388671875, "loss_num": 0.0169677734375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 54161016, "step": 600 }, { "epoch": 2.7632183908045977, "grad_norm": 8.032143628331568, "learning_rate": 5e-06, "loss": 0.124, "num_input_tokens_seen": 54251360, "step": 601 }, { "epoch": 2.7632183908045977, "loss": 0.14277827739715576, "loss_ce": 1.7043152183759958e-05, "loss_iou": 0.453125, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 54251360, "step": 601 }, { "epoch": 2.767816091954023, "grad_norm": 4.475275047608208, "learning_rate": 5e-06, "loss": 0.1901, "num_input_tokens_seen": 54340280, "step": 602 }, { "epoch": 2.767816091954023, "loss": 0.16928395628929138, "loss_ce": 3.346416269778274e-05, "loss_iou": 0.453125, "loss_num": 0.033935546875, "loss_xval": 0.1689453125, "num_input_tokens_seen": 54340280, "step": 602 }, { "epoch": 2.772413793103448, "grad_norm": 20.093795622997725, "learning_rate": 5e-06, "loss": 0.1989, "num_input_tokens_seen": 54430644, "step": 603 }, { "epoch": 2.772413793103448, "loss": 0.24863861501216888, "loss_ce": 4.241707210894674e-05, "loss_iou": 0.37890625, "loss_num": 0.0498046875, "loss_xval": 0.2490234375, "num_input_tokens_seen": 54430644, "step": 603 }, { "epoch": 2.7770114942528736, "grad_norm": 7.529800946044408, "learning_rate": 5e-06, "loss": 0.108, "num_input_tokens_seen": 54521100, "step": 604 }, { "epoch": 2.7770114942528736, "loss": 0.060331616550683975, "loss_ce": 0.002012524986639619, "loss_iou": 0.390625, "loss_num": 0.01165771484375, "loss_xval": 0.058349609375, "num_input_tokens_seen": 54521100, "step": 604 }, { "epoch": 2.781609195402299, "grad_norm": 4.796037523923617, "learning_rate": 5e-06, "loss": 0.1482, "num_input_tokens_seen": 54611424, "step": 605 }, { "epoch": 2.781609195402299, "loss": 0.1368238478899002, "loss_ce": 1.3542239685193636e-05, "loss_iou": 0.38671875, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 54611424, "step": 605 }, { "epoch": 2.7862068965517244, "grad_norm": 6.220654742397298, "learning_rate": 5e-06, "loss": 0.1168, "num_input_tokens_seen": 54701824, "step": 606 }, { "epoch": 2.7862068965517244, "loss": 0.12376809120178223, "loss_ce": 4.983101462130435e-05, "loss_iou": 0.392578125, "loss_num": 0.0247802734375, "loss_xval": 0.12353515625, "num_input_tokens_seen": 54701824, "step": 606 }, { "epoch": 2.7908045977011495, "grad_norm": 6.281261617719392, "learning_rate": 5e-06, "loss": 0.1303, "num_input_tokens_seen": 54792204, "step": 607 }, { "epoch": 2.7908045977011495, "loss": 0.13932999968528748, "loss_ce": 4.776205605594441e-05, "loss_iou": 0.435546875, "loss_num": 0.02783203125, "loss_xval": 0.1396484375, "num_input_tokens_seen": 54792204, "step": 607 }, { "epoch": 2.7954022988505747, "grad_norm": 5.897586685245029, "learning_rate": 5e-06, "loss": 0.1316, "num_input_tokens_seen": 54882476, "step": 608 }, { "epoch": 2.7954022988505747, "loss": 0.11914113909006119, "loss_ce": 0.00033620704198256135, "loss_iou": 0.478515625, "loss_num": 0.0238037109375, "loss_xval": 0.11865234375, "num_input_tokens_seen": 54882476, "step": 608 }, { "epoch": 2.8, "grad_norm": 18.865031038984814, "learning_rate": 5e-06, "loss": 0.1713, "num_input_tokens_seen": 54972752, "step": 609 }, { "epoch": 2.8, "loss": 0.1230660229921341, "loss_ce": 1.914523818413727e-05, "loss_iou": 0.390625, "loss_num": 0.024658203125, "loss_xval": 0.123046875, "num_input_tokens_seen": 54972752, "step": 609 }, { "epoch": 2.8045977011494254, "grad_norm": 12.75408299711661, "learning_rate": 5e-06, "loss": 0.1575, "num_input_tokens_seen": 55062340, "step": 610 }, { "epoch": 2.8045977011494254, "loss": 0.16492819786071777, "loss_ce": 1.1197053026990034e-05, "loss_iou": 0.330078125, "loss_num": 0.032958984375, "loss_xval": 0.1650390625, "num_input_tokens_seen": 55062340, "step": 610 }, { "epoch": 2.8091954022988506, "grad_norm": 12.5815565888982, "learning_rate": 5e-06, "loss": 0.1924, "num_input_tokens_seen": 55151980, "step": 611 }, { "epoch": 2.8091954022988506, "loss": 0.19428588449954987, "loss_ce": 0.006968984380364418, "loss_iou": 0.44140625, "loss_num": 0.03759765625, "loss_xval": 0.1875, "num_input_tokens_seen": 55151980, "step": 611 }, { "epoch": 2.8137931034482757, "grad_norm": 3.291240590141606, "learning_rate": 5e-06, "loss": 0.1739, "num_input_tokens_seen": 55242284, "step": 612 }, { "epoch": 2.8137931034482757, "loss": 0.15410566329956055, "loss_ce": 0.00022076835739426315, "loss_iou": 0.37109375, "loss_num": 0.03076171875, "loss_xval": 0.154296875, "num_input_tokens_seen": 55242284, "step": 612 }, { "epoch": 2.8183908045977013, "grad_norm": 16.71758430849643, "learning_rate": 5e-06, "loss": 0.1686, "num_input_tokens_seen": 55332764, "step": 613 }, { "epoch": 2.8183908045977013, "loss": 0.21246647834777832, "loss_ce": 0.00018619894399307668, "loss_iou": 0.400390625, "loss_num": 0.04248046875, "loss_xval": 0.2119140625, "num_input_tokens_seen": 55332764, "step": 613 }, { "epoch": 2.8229885057471265, "grad_norm": 9.529924266178037, "learning_rate": 5e-06, "loss": 0.1512, "num_input_tokens_seen": 55423028, "step": 614 }, { "epoch": 2.8229885057471265, "loss": 0.10561984032392502, "loss_ce": 2.9023341994616203e-05, "loss_iou": 0.40234375, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 55423028, "step": 614 }, { "epoch": 2.8275862068965516, "grad_norm": 4.245318747599123, "learning_rate": 5e-06, "loss": 0.1232, "num_input_tokens_seen": 55513508, "step": 615 }, { "epoch": 2.8275862068965516, "loss": 0.1072404682636261, "loss_ce": 3.2217059924732894e-05, "loss_iou": 0.33984375, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 55513508, "step": 615 }, { "epoch": 2.8321839080459768, "grad_norm": 18.179797794436265, "learning_rate": 5e-06, "loss": 0.1493, "num_input_tokens_seen": 55603844, "step": 616 }, { "epoch": 2.8321839080459768, "loss": 0.15227092802524567, "loss_ce": 4.924844688503072e-05, "loss_iou": 0.373046875, "loss_num": 0.0303955078125, "loss_xval": 0.15234375, "num_input_tokens_seen": 55603844, "step": 616 }, { "epoch": 2.8367816091954023, "grad_norm": 5.57072118153668, "learning_rate": 5e-06, "loss": 0.1349, "num_input_tokens_seen": 55693488, "step": 617 }, { "epoch": 2.8367816091954023, "loss": 0.13283629715442657, "loss_ce": 0.00017638025747146457, "loss_iou": 0.47265625, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 55693488, "step": 617 }, { "epoch": 2.8413793103448275, "grad_norm": 3.7001051290248053, "learning_rate": 5e-06, "loss": 0.23, "num_input_tokens_seen": 55783768, "step": 618 }, { "epoch": 2.8413793103448275, "loss": 0.18909384310245514, "loss_ce": 3.744277637451887e-05, "loss_iou": 0.423828125, "loss_num": 0.037841796875, "loss_xval": 0.189453125, "num_input_tokens_seen": 55783768, "step": 618 }, { "epoch": 2.845977011494253, "grad_norm": 8.633989870151986, "learning_rate": 5e-06, "loss": 0.1286, "num_input_tokens_seen": 55874088, "step": 619 }, { "epoch": 2.845977011494253, "loss": 0.16746875643730164, "loss_ce": 4.932547381031327e-05, "loss_iou": 0.400390625, "loss_num": 0.033447265625, "loss_xval": 0.1669921875, "num_input_tokens_seen": 55874088, "step": 619 }, { "epoch": 2.8505747126436782, "grad_norm": 7.508204824741638, "learning_rate": 5e-06, "loss": 0.1742, "num_input_tokens_seen": 55964516, "step": 620 }, { "epoch": 2.8505747126436782, "loss": 0.18894179165363312, "loss_ce": 0.00011428301513660699, "loss_iou": 0.3515625, "loss_num": 0.037841796875, "loss_xval": 0.1884765625, "num_input_tokens_seen": 55964516, "step": 620 }, { "epoch": 2.8551724137931034, "grad_norm": 11.720485380185297, "learning_rate": 5e-06, "loss": 0.1838, "num_input_tokens_seen": 56054816, "step": 621 }, { "epoch": 2.8551724137931034, "loss": 0.12343352288007736, "loss_ce": 2.0439181753317825e-05, "loss_iou": 0.333984375, "loss_num": 0.024658203125, "loss_xval": 0.12353515625, "num_input_tokens_seen": 56054816, "step": 621 }, { "epoch": 2.8597701149425285, "grad_norm": 6.132110793900122, "learning_rate": 5e-06, "loss": 0.1194, "num_input_tokens_seen": 56145008, "step": 622 }, { "epoch": 2.8597701149425285, "loss": 0.11308827996253967, "loss_ce": 2.0656258129747584e-05, "loss_iou": 0.359375, "loss_num": 0.0225830078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 56145008, "step": 622 }, { "epoch": 2.864367816091954, "grad_norm": 27.370691194991768, "learning_rate": 5e-06, "loss": 0.2575, "num_input_tokens_seen": 56234656, "step": 623 }, { "epoch": 2.864367816091954, "loss": 0.2561437487602234, "loss_ce": 0.0002538765547797084, "loss_iou": 0.3984375, "loss_num": 0.051025390625, "loss_xval": 0.255859375, "num_input_tokens_seen": 56234656, "step": 623 }, { "epoch": 2.8689655172413793, "grad_norm": 4.250971216520788, "learning_rate": 5e-06, "loss": 0.1461, "num_input_tokens_seen": 56325056, "step": 624 }, { "epoch": 2.8689655172413793, "loss": 0.15083430707454681, "loss_ce": 1.643798896111548e-05, "loss_iou": 0.419921875, "loss_num": 0.0301513671875, "loss_xval": 0.150390625, "num_input_tokens_seen": 56325056, "step": 624 }, { "epoch": 2.873563218390805, "grad_norm": 23.371099875991142, "learning_rate": 5e-06, "loss": 0.1696, "num_input_tokens_seen": 56415548, "step": 625 }, { "epoch": 2.873563218390805, "loss": 0.2244938164949417, "loss_ce": 0.00018961615569423884, "loss_iou": 0.4453125, "loss_num": 0.044921875, "loss_xval": 0.224609375, "num_input_tokens_seen": 56415548, "step": 625 }, { "epoch": 2.87816091954023, "grad_norm": 19.44155033716697, "learning_rate": 5e-06, "loss": 0.1712, "num_input_tokens_seen": 56505852, "step": 626 }, { "epoch": 2.87816091954023, "loss": 0.12483800947666168, "loss_ce": 0.008840695023536682, "loss_iou": 0.4296875, "loss_num": 0.023193359375, "loss_xval": 0.1162109375, "num_input_tokens_seen": 56505852, "step": 626 }, { "epoch": 2.882758620689655, "grad_norm": 2.7000059987309135, "learning_rate": 5e-06, "loss": 0.132, "num_input_tokens_seen": 56596200, "step": 627 }, { "epoch": 2.882758620689655, "loss": 0.16553373634815216, "loss_ce": 3.6913388612447307e-05, "loss_iou": 0.318359375, "loss_num": 0.033203125, "loss_xval": 0.1650390625, "num_input_tokens_seen": 56596200, "step": 627 }, { "epoch": 2.8873563218390803, "grad_norm": 11.738060980857469, "learning_rate": 5e-06, "loss": 0.1943, "num_input_tokens_seen": 56686468, "step": 628 }, { "epoch": 2.8873563218390803, "loss": 0.13238102197647095, "loss_ce": 0.00013309504720382392, "loss_iou": 0.40625, "loss_num": 0.0264892578125, "loss_xval": 0.1318359375, "num_input_tokens_seen": 56686468, "step": 628 }, { "epoch": 2.891954022988506, "grad_norm": 5.374412860929261, "learning_rate": 5e-06, "loss": 0.1573, "num_input_tokens_seen": 56776724, "step": 629 }, { "epoch": 2.891954022988506, "loss": 0.10836595296859741, "loss_ce": 0.00018113740952685475, "loss_iou": 0.388671875, "loss_num": 0.0216064453125, "loss_xval": 0.1083984375, "num_input_tokens_seen": 56776724, "step": 629 }, { "epoch": 2.896551724137931, "grad_norm": 17.458934517040042, "learning_rate": 5e-06, "loss": 0.1121, "num_input_tokens_seen": 56867076, "step": 630 }, { "epoch": 2.896551724137931, "loss": 0.0917268842458725, "loss_ce": 8.260130562121049e-05, "loss_iou": 0.408203125, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 56867076, "step": 630 }, { "epoch": 2.901149425287356, "grad_norm": 7.144445161490093, "learning_rate": 5e-06, "loss": 0.1952, "num_input_tokens_seen": 56957452, "step": 631 }, { "epoch": 2.901149425287356, "loss": 0.17814010381698608, "loss_ce": 3.951931284973398e-05, "loss_iou": 0.36328125, "loss_num": 0.03564453125, "loss_xval": 0.177734375, "num_input_tokens_seen": 56957452, "step": 631 }, { "epoch": 2.905747126436782, "grad_norm": 4.881468932307776, "learning_rate": 5e-06, "loss": 0.1611, "num_input_tokens_seen": 57046320, "step": 632 }, { "epoch": 2.905747126436782, "loss": 0.158255934715271, "loss_ce": 8.333477308042347e-05, "loss_iou": 0.373046875, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 57046320, "step": 632 }, { "epoch": 2.910344827586207, "grad_norm": 6.020225715103822, "learning_rate": 5e-06, "loss": 0.1193, "num_input_tokens_seen": 57136760, "step": 633 }, { "epoch": 2.910344827586207, "loss": 0.08915083855390549, "loss_ce": 0.0001921012590173632, "loss_iou": 0.373046875, "loss_num": 0.0177001953125, "loss_xval": 0.0888671875, "num_input_tokens_seen": 57136760, "step": 633 }, { "epoch": 2.914942528735632, "grad_norm": 18.893533969376247, "learning_rate": 5e-06, "loss": 0.0845, "num_input_tokens_seen": 57227264, "step": 634 }, { "epoch": 2.914942528735632, "loss": 0.0713462084531784, "loss_ce": 8.766089740674943e-05, "loss_iou": 0.404296875, "loss_num": 0.0142822265625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 57227264, "step": 634 }, { "epoch": 2.9195402298850572, "grad_norm": 18.876600553359836, "learning_rate": 5e-06, "loss": 0.1505, "num_input_tokens_seen": 57316096, "step": 635 }, { "epoch": 2.9195402298850572, "loss": 0.14804166555404663, "loss_ce": 0.0002297760802321136, "loss_iou": 0.3828125, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 57316096, "step": 635 }, { "epoch": 2.924137931034483, "grad_norm": 14.779947666699496, "learning_rate": 5e-06, "loss": 0.1474, "num_input_tokens_seen": 57406464, "step": 636 }, { "epoch": 2.924137931034483, "loss": 0.1926122009754181, "loss_ce": 1.5764764611958526e-05, "loss_iou": 0.369140625, "loss_num": 0.03857421875, "loss_xval": 0.1923828125, "num_input_tokens_seen": 57406464, "step": 636 }, { "epoch": 2.928735632183908, "grad_norm": 33.4524141554006, "learning_rate": 5e-06, "loss": 0.1697, "num_input_tokens_seen": 57496896, "step": 637 }, { "epoch": 2.928735632183908, "loss": 0.19599446654319763, "loss_ce": 7.161433313740417e-05, "loss_iou": 0.431640625, "loss_num": 0.0390625, "loss_xval": 0.1962890625, "num_input_tokens_seen": 57496896, "step": 637 }, { "epoch": 2.9333333333333336, "grad_norm": 8.837724303528487, "learning_rate": 5e-06, "loss": 0.1276, "num_input_tokens_seen": 57587328, "step": 638 }, { "epoch": 2.9333333333333336, "loss": 0.11598608642816544, "loss_ce": 8.032606274355203e-05, "loss_iou": 0.4375, "loss_num": 0.023193359375, "loss_xval": 0.11572265625, "num_input_tokens_seen": 57587328, "step": 638 }, { "epoch": 2.9379310344827587, "grad_norm": 5.458845083322578, "learning_rate": 5e-06, "loss": 0.1521, "num_input_tokens_seen": 57676000, "step": 639 }, { "epoch": 2.9379310344827587, "loss": 0.1924465298652649, "loss_ce": 6.370765186147764e-05, "loss_iou": 0.3671875, "loss_num": 0.038330078125, "loss_xval": 0.1923828125, "num_input_tokens_seen": 57676000, "step": 639 }, { "epoch": 2.942528735632184, "grad_norm": 3.502931679203287, "learning_rate": 5e-06, "loss": 0.1517, "num_input_tokens_seen": 57766436, "step": 640 }, { "epoch": 2.942528735632184, "loss": 0.22913488745689392, "loss_ce": 0.00022254293435253203, "loss_iou": 0.39453125, "loss_num": 0.0458984375, "loss_xval": 0.228515625, "num_input_tokens_seen": 57766436, "step": 640 }, { "epoch": 2.947126436781609, "grad_norm": 27.45097976056178, "learning_rate": 5e-06, "loss": 0.1282, "num_input_tokens_seen": 57856748, "step": 641 }, { "epoch": 2.947126436781609, "loss": 0.1526871919631958, "loss_ce": 9.930751548381522e-05, "loss_iou": 0.330078125, "loss_num": 0.030517578125, "loss_xval": 0.15234375, "num_input_tokens_seen": 57856748, "step": 641 }, { "epoch": 2.9517241379310346, "grad_norm": 12.914503955582164, "learning_rate": 5e-06, "loss": 0.1521, "num_input_tokens_seen": 57946960, "step": 642 }, { "epoch": 2.9517241379310346, "loss": 0.17462590336799622, "loss_ce": 6.536281580338255e-05, "loss_iou": 0.373046875, "loss_num": 0.034912109375, "loss_xval": 0.1748046875, "num_input_tokens_seen": 57946960, "step": 642 }, { "epoch": 2.9563218390804598, "grad_norm": 11.39446414052775, "learning_rate": 5e-06, "loss": 0.115, "num_input_tokens_seen": 58035688, "step": 643 }, { "epoch": 2.9563218390804598, "loss": 0.11201904714107513, "loss_ce": 8.057546801865101e-05, "loss_iou": 0.345703125, "loss_num": 0.0223388671875, "loss_xval": 0.11181640625, "num_input_tokens_seen": 58035688, "step": 643 }, { "epoch": 2.960919540229885, "grad_norm": 3.1615458390497326, "learning_rate": 5e-06, "loss": 0.1129, "num_input_tokens_seen": 58126192, "step": 644 }, { "epoch": 2.960919540229885, "loss": 0.11491385102272034, "loss_ce": 0.00010671426571207121, "loss_iou": 0.3984375, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 58126192, "step": 644 }, { "epoch": 2.9655172413793105, "grad_norm": 11.115986632833536, "learning_rate": 5e-06, "loss": 0.1975, "num_input_tokens_seen": 58216656, "step": 645 }, { "epoch": 2.9655172413793105, "loss": 0.18091078102588654, "loss_ce": 0.00015516624262090772, "loss_iou": 0.38671875, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 58216656, "step": 645 }, { "epoch": 2.9701149425287356, "grad_norm": 10.595330403567505, "learning_rate": 5e-06, "loss": 0.1393, "num_input_tokens_seen": 58306940, "step": 646 }, { "epoch": 2.9701149425287356, "loss": 0.18637780845165253, "loss_ce": 3.746678703464568e-05, "loss_iou": 0.41015625, "loss_num": 0.037353515625, "loss_xval": 0.1865234375, "num_input_tokens_seen": 58306940, "step": 646 }, { "epoch": 2.974712643678161, "grad_norm": 12.224220904653137, "learning_rate": 5e-06, "loss": 0.1602, "num_input_tokens_seen": 58397184, "step": 647 }, { "epoch": 2.974712643678161, "loss": 0.1820899099111557, "loss_ce": 2.2039628674974665e-05, "loss_iou": 0.34765625, "loss_num": 0.036376953125, "loss_xval": 0.181640625, "num_input_tokens_seen": 58397184, "step": 647 }, { "epoch": 2.979310344827586, "grad_norm": 12.36377678211748, "learning_rate": 5e-06, "loss": 0.1605, "num_input_tokens_seen": 58487644, "step": 648 }, { "epoch": 2.979310344827586, "loss": 0.09505946934223175, "loss_ce": 4.29864194302354e-05, "loss_iou": 0.384765625, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 58487644, "step": 648 }, { "epoch": 2.9839080459770115, "grad_norm": 44.77338962392459, "learning_rate": 5e-06, "loss": 0.1624, "num_input_tokens_seen": 58578064, "step": 649 }, { "epoch": 2.9839080459770115, "loss": 0.19933252036571503, "loss_ce": 0.00029687874484807253, "loss_iou": 0.4375, "loss_num": 0.039794921875, "loss_xval": 0.19921875, "num_input_tokens_seen": 58578064, "step": 649 }, { "epoch": 2.9885057471264367, "grad_norm": 10.434155317304565, "learning_rate": 5e-06, "loss": 0.1712, "num_input_tokens_seen": 58668520, "step": 650 }, { "epoch": 2.9885057471264367, "loss": 0.17232680320739746, "loss_ce": 2.456527727190405e-05, "loss_iou": 0.349609375, "loss_num": 0.034423828125, "loss_xval": 0.171875, "num_input_tokens_seen": 58668520, "step": 650 }, { "epoch": 2.9931034482758623, "grad_norm": 11.343545620466347, "learning_rate": 5e-06, "loss": 0.1625, "num_input_tokens_seen": 58758840, "step": 651 }, { "epoch": 2.9931034482758623, "loss": 0.17710255086421967, "loss_ce": 2.4299104552483186e-05, "loss_iou": 0.296875, "loss_num": 0.035400390625, "loss_xval": 0.1767578125, "num_input_tokens_seen": 58758840, "step": 651 }, { "epoch": 2.9977011494252874, "grad_norm": 4.426473466923714, "learning_rate": 5e-06, "loss": 0.1199, "num_input_tokens_seen": 58849212, "step": 652 }, { "epoch": 2.9977011494252874, "loss": 0.1025293692946434, "loss_ce": 0.00014289778482634574, "loss_iou": 0.375, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 58849212, "step": 652 }, { "epoch": 2.9977011494252874, "loss": 0.18885542452335358, "loss_ce": 1.2649360542127397e-05, "loss_iou": 0.396484375, "loss_num": 0.037841796875, "loss_xval": 0.1884765625, "num_input_tokens_seen": 58894312, "step": 652 }, { "epoch": 3.0022988505747126, "grad_norm": 5.109594045617307, "learning_rate": 5e-06, "loss": 0.1499, "num_input_tokens_seen": 58939548, "step": 653 }, { "epoch": 3.0022988505747126, "loss": 0.11101669818162918, "loss_ce": 5.478527600644156e-05, "loss_iou": 0.3671875, "loss_num": 0.022216796875, "loss_xval": 0.11083984375, "num_input_tokens_seen": 58939548, "step": 653 }, { "epoch": 3.0068965517241377, "grad_norm": 10.518496135733074, "learning_rate": 5e-06, "loss": 0.1424, "num_input_tokens_seen": 59029104, "step": 654 }, { "epoch": 3.0068965517241377, "loss": 0.15445493161678314, "loss_ce": 0.00015805772272869945, "loss_iou": 0.4609375, "loss_num": 0.0308837890625, "loss_xval": 0.154296875, "num_input_tokens_seen": 59029104, "step": 654 }, { "epoch": 3.0114942528735633, "grad_norm": 14.317668018009412, "learning_rate": 5e-06, "loss": 0.1034, "num_input_tokens_seen": 59119440, "step": 655 }, { "epoch": 3.0114942528735633, "loss": 0.1157938614487648, "loss_ce": 1.0168273547606077e-05, "loss_iou": 0.431640625, "loss_num": 0.023193359375, "loss_xval": 0.11572265625, "num_input_tokens_seen": 59119440, "step": 655 }, { "epoch": 3.0160919540229885, "grad_norm": 6.462788772652226, "learning_rate": 5e-06, "loss": 0.0712, "num_input_tokens_seen": 59209864, "step": 656 }, { "epoch": 3.0160919540229885, "loss": 0.06505613029003143, "loss_ce": 0.00011471970356069505, "loss_iou": 0.390625, "loss_num": 0.01300048828125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 59209864, "step": 656 }, { "epoch": 3.0206896551724136, "grad_norm": 5.315176931861487, "learning_rate": 5e-06, "loss": 0.0992, "num_input_tokens_seen": 59300316, "step": 657 }, { "epoch": 3.0206896551724136, "loss": 0.064435213804245, "loss_ce": 0.002515045227482915, "loss_iou": 0.326171875, "loss_num": 0.01239013671875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 59300316, "step": 657 }, { "epoch": 3.025287356321839, "grad_norm": 4.182380926387102, "learning_rate": 5e-06, "loss": 0.1232, "num_input_tokens_seen": 59390752, "step": 658 }, { "epoch": 3.025287356321839, "loss": 0.13093848526477814, "loss_ce": 1.8076902051689103e-05, "loss_iou": 0.435546875, "loss_num": 0.026123046875, "loss_xval": 0.130859375, "num_input_tokens_seen": 59390752, "step": 658 }, { "epoch": 3.0298850574712644, "grad_norm": 14.571610617843975, "learning_rate": 5e-06, "loss": 0.0944, "num_input_tokens_seen": 59481036, "step": 659 }, { "epoch": 3.0298850574712644, "loss": 0.08019035309553146, "loss_ce": 8.17138425190933e-05, "loss_iou": 0.359375, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 59481036, "step": 659 }, { "epoch": 3.0344827586206895, "grad_norm": 16.65916183345602, "learning_rate": 5e-06, "loss": 0.1709, "num_input_tokens_seen": 59571308, "step": 660 }, { "epoch": 3.0344827586206895, "loss": 0.15776613354682922, "loss_ce": 5.127684198669158e-05, "loss_iou": 0.44140625, "loss_num": 0.031494140625, "loss_xval": 0.158203125, "num_input_tokens_seen": 59571308, "step": 660 }, { "epoch": 3.039080459770115, "grad_norm": 3.372941981569247, "learning_rate": 5e-06, "loss": 0.0978, "num_input_tokens_seen": 59661712, "step": 661 }, { "epoch": 3.039080459770115, "loss": 0.0939127653837204, "loss_ce": 4.0693001210456714e-05, "loss_iou": 0.384765625, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 59661712, "step": 661 }, { "epoch": 3.0436781609195402, "grad_norm": 7.8989890189247, "learning_rate": 5e-06, "loss": 0.1298, "num_input_tokens_seen": 59752088, "step": 662 }, { "epoch": 3.0436781609195402, "loss": 0.11880500614643097, "loss_ce": 3.058691436308436e-05, "loss_iou": 0.388671875, "loss_num": 0.0238037109375, "loss_xval": 0.11865234375, "num_input_tokens_seen": 59752088, "step": 662 }, { "epoch": 3.0482758620689654, "grad_norm": 9.966522515655097, "learning_rate": 5e-06, "loss": 0.1321, "num_input_tokens_seen": 59842376, "step": 663 }, { "epoch": 3.0482758620689654, "loss": 0.1415707916021347, "loss_ce": 3.027227103302721e-05, "loss_iou": 0.453125, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 59842376, "step": 663 }, { "epoch": 3.052873563218391, "grad_norm": 14.738658620476503, "learning_rate": 5e-06, "loss": 0.1078, "num_input_tokens_seen": 59932932, "step": 664 }, { "epoch": 3.052873563218391, "loss": 0.15776753425598145, "loss_ce": 2.216372377006337e-05, "loss_iou": 0.390625, "loss_num": 0.031494140625, "loss_xval": 0.158203125, "num_input_tokens_seen": 59932932, "step": 664 }, { "epoch": 3.057471264367816, "grad_norm": 11.861563289243291, "learning_rate": 5e-06, "loss": 0.161, "num_input_tokens_seen": 60023360, "step": 665 }, { "epoch": 3.057471264367816, "loss": 0.15695565938949585, "loss_ce": 3.427857154747471e-05, "loss_iou": 0.373046875, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 60023360, "step": 665 }, { "epoch": 3.0620689655172413, "grad_norm": 8.820370554295812, "learning_rate": 5e-06, "loss": 0.1466, "num_input_tokens_seen": 60112940, "step": 666 }, { "epoch": 3.0620689655172413, "loss": 0.13253626227378845, "loss_ce": 8.997396798804402e-05, "loss_iou": 0.2890625, "loss_num": 0.0264892578125, "loss_xval": 0.1328125, "num_input_tokens_seen": 60112940, "step": 666 }, { "epoch": 3.066666666666667, "grad_norm": 3.1489432695249526, "learning_rate": 5e-06, "loss": 0.1686, "num_input_tokens_seen": 60203192, "step": 667 }, { "epoch": 3.066666666666667, "loss": 0.14637088775634766, "loss_ce": 8.589012395532336e-06, "loss_iou": 0.439453125, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 60203192, "step": 667 }, { "epoch": 3.071264367816092, "grad_norm": 5.702383945793617, "learning_rate": 5e-06, "loss": 0.1081, "num_input_tokens_seen": 60293508, "step": 668 }, { "epoch": 3.071264367816092, "loss": 0.09078279137611389, "loss_ce": 2.3513095584348775e-05, "loss_iou": 0.3828125, "loss_num": 0.0181884765625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 60293508, "step": 668 }, { "epoch": 3.075862068965517, "grad_norm": 21.441294074851363, "learning_rate": 5e-06, "loss": 0.1243, "num_input_tokens_seen": 60383820, "step": 669 }, { "epoch": 3.075862068965517, "loss": 0.10486976057291031, "loss_ce": 1.1363279554643668e-05, "loss_iou": 0.33203125, "loss_num": 0.02099609375, "loss_xval": 0.10498046875, "num_input_tokens_seen": 60383820, "step": 669 }, { "epoch": 3.0804597701149423, "grad_norm": 3.914442501475495, "learning_rate": 5e-06, "loss": 0.193, "num_input_tokens_seen": 60474148, "step": 670 }, { "epoch": 3.0804597701149423, "loss": 0.16388007998466492, "loss_ce": 1.594252353243064e-05, "loss_iou": 0.34765625, "loss_num": 0.03271484375, "loss_xval": 0.1640625, "num_input_tokens_seen": 60474148, "step": 670 }, { "epoch": 3.085057471264368, "grad_norm": 4.9620660179246885, "learning_rate": 5e-06, "loss": 0.1414, "num_input_tokens_seen": 60564532, "step": 671 }, { "epoch": 3.085057471264368, "loss": 0.18953979015350342, "loss_ce": 8.66625050548464e-05, "loss_iou": 0.359375, "loss_num": 0.037841796875, "loss_xval": 0.189453125, "num_input_tokens_seen": 60564532, "step": 671 }, { "epoch": 3.089655172413793, "grad_norm": 16.0733034397525, "learning_rate": 5e-06, "loss": 0.1503, "num_input_tokens_seen": 60654828, "step": 672 }, { "epoch": 3.089655172413793, "loss": 0.14448416233062744, "loss_ce": 2.920163024100475e-05, "loss_iou": 0.349609375, "loss_num": 0.0289306640625, "loss_xval": 0.14453125, "num_input_tokens_seen": 60654828, "step": 672 }, { "epoch": 3.094252873563218, "grad_norm": 2.850437311995024, "learning_rate": 5e-06, "loss": 0.0937, "num_input_tokens_seen": 60745316, "step": 673 }, { "epoch": 3.094252873563218, "loss": 0.08657945692539215, "loss_ce": 0.00012315809726715088, "loss_iou": 0.41015625, "loss_num": 0.017333984375, "loss_xval": 0.08642578125, "num_input_tokens_seen": 60745316, "step": 673 }, { "epoch": 3.098850574712644, "grad_norm": 6.711572483738216, "learning_rate": 5e-06, "loss": 0.1611, "num_input_tokens_seen": 60835616, "step": 674 }, { "epoch": 3.098850574712644, "loss": 0.11418451368808746, "loss_ce": 1.8256445400766097e-05, "loss_iou": 0.27734375, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 60835616, "step": 674 }, { "epoch": 3.103448275862069, "grad_norm": 2.9732106658271267, "learning_rate": 5e-06, "loss": 0.2022, "num_input_tokens_seen": 60926020, "step": 675 }, { "epoch": 3.103448275862069, "loss": 0.22187387943267822, "loss_ce": 0.0012928310316056013, "loss_iou": 0.408203125, "loss_num": 0.044189453125, "loss_xval": 0.220703125, "num_input_tokens_seen": 60926020, "step": 675 }, { "epoch": 3.108045977011494, "grad_norm": 22.393897827054342, "learning_rate": 5e-06, "loss": 0.1741, "num_input_tokens_seen": 61016500, "step": 676 }, { "epoch": 3.108045977011494, "loss": 0.17077524960041046, "loss_ce": 0.0004261271096765995, "loss_iou": 0.34375, "loss_num": 0.033935546875, "loss_xval": 0.169921875, "num_input_tokens_seen": 61016500, "step": 676 }, { "epoch": 3.1126436781609197, "grad_norm": 4.958182554412022, "learning_rate": 5e-06, "loss": 0.1796, "num_input_tokens_seen": 61106876, "step": 677 }, { "epoch": 3.1126436781609197, "loss": 0.17057394981384277, "loss_ce": 8.749846165301278e-05, "loss_iou": 0.4375, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 61106876, "step": 677 }, { "epoch": 3.117241379310345, "grad_norm": 5.763483977522711, "learning_rate": 5e-06, "loss": 0.1333, "num_input_tokens_seen": 61197312, "step": 678 }, { "epoch": 3.117241379310345, "loss": 0.16354021430015564, "loss_ce": 0.00014910995378158987, "loss_iou": 0.408203125, "loss_num": 0.03271484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 61197312, "step": 678 }, { "epoch": 3.12183908045977, "grad_norm": 22.61759363539356, "learning_rate": 5e-06, "loss": 0.1438, "num_input_tokens_seen": 61287748, "step": 679 }, { "epoch": 3.12183908045977, "loss": 0.16320019960403442, "loss_ce": 8.373754099011421e-05, "loss_iou": 0.39453125, "loss_num": 0.03271484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 61287748, "step": 679 }, { "epoch": 3.1264367816091956, "grad_norm": 21.37560854861523, "learning_rate": 5e-06, "loss": 0.157, "num_input_tokens_seen": 61378228, "step": 680 }, { "epoch": 3.1264367816091956, "loss": 0.1652674674987793, "loss_ce": 4.528817953541875e-05, "loss_iou": 0.41796875, "loss_num": 0.033203125, "loss_xval": 0.1650390625, "num_input_tokens_seen": 61378228, "step": 680 }, { "epoch": 3.1310344827586207, "grad_norm": 9.912277304010486, "learning_rate": 5e-06, "loss": 0.168, "num_input_tokens_seen": 61468564, "step": 681 }, { "epoch": 3.1310344827586207, "loss": 0.14338386058807373, "loss_ce": 7.332136738114059e-05, "loss_iou": 0.36328125, "loss_num": 0.0286865234375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 61468564, "step": 681 }, { "epoch": 3.135632183908046, "grad_norm": 14.24831883027225, "learning_rate": 5e-06, "loss": 0.1808, "num_input_tokens_seen": 61559020, "step": 682 }, { "epoch": 3.135632183908046, "loss": 0.15874823927879333, "loss_ce": 0.000117871692054905, "loss_iou": 0.341796875, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 61559020, "step": 682 }, { "epoch": 3.1402298850574715, "grad_norm": 10.805661680356696, "learning_rate": 5e-06, "loss": 0.1006, "num_input_tokens_seen": 61649428, "step": 683 }, { "epoch": 3.1402298850574715, "loss": 0.11267475038766861, "loss_ce": 8.014314516913146e-05, "loss_iou": 0.427734375, "loss_num": 0.0224609375, "loss_xval": 0.11279296875, "num_input_tokens_seen": 61649428, "step": 683 }, { "epoch": 3.1448275862068966, "grad_norm": 10.580707465651063, "learning_rate": 5e-06, "loss": 0.1058, "num_input_tokens_seen": 61739904, "step": 684 }, { "epoch": 3.1448275862068966, "loss": 0.11008670926094055, "loss_ce": 2.5071400159504265e-05, "loss_iou": 0.435546875, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 61739904, "step": 684 }, { "epoch": 3.1494252873563218, "grad_norm": 9.787018998948723, "learning_rate": 5e-06, "loss": 0.1187, "num_input_tokens_seen": 61830168, "step": 685 }, { "epoch": 3.1494252873563218, "loss": 0.11129388213157654, "loss_ce": 7.25619393051602e-05, "loss_iou": 0.44921875, "loss_num": 0.022216796875, "loss_xval": 0.111328125, "num_input_tokens_seen": 61830168, "step": 685 }, { "epoch": 3.154022988505747, "grad_norm": 25.016543912767162, "learning_rate": 5e-06, "loss": 0.1306, "num_input_tokens_seen": 61920580, "step": 686 }, { "epoch": 3.154022988505747, "loss": 0.11655561625957489, "loss_ce": 8.985316526377574e-06, "loss_iou": 0.380859375, "loss_num": 0.0233154296875, "loss_xval": 0.11669921875, "num_input_tokens_seen": 61920580, "step": 686 }, { "epoch": 3.1586206896551725, "grad_norm": 27.789033510951985, "learning_rate": 5e-06, "loss": 0.104, "num_input_tokens_seen": 62010984, "step": 687 }, { "epoch": 3.1586206896551725, "loss": 0.10948251187801361, "loss_ce": 1.596645415702369e-05, "loss_iou": 0.421875, "loss_num": 0.0218505859375, "loss_xval": 0.109375, "num_input_tokens_seen": 62010984, "step": 687 }, { "epoch": 3.1632183908045977, "grad_norm": 3.436728324433976, "learning_rate": 5e-06, "loss": 0.1108, "num_input_tokens_seen": 62101364, "step": 688 }, { "epoch": 3.1632183908045977, "loss": 0.12181434035301208, "loss_ce": 0.0001102387614082545, "loss_iou": 0.3671875, "loss_num": 0.0244140625, "loss_xval": 0.12158203125, "num_input_tokens_seen": 62101364, "step": 688 }, { "epoch": 3.167816091954023, "grad_norm": 9.993289616742318, "learning_rate": 5e-06, "loss": 0.1475, "num_input_tokens_seen": 62190176, "step": 689 }, { "epoch": 3.167816091954023, "loss": 0.14744696021080017, "loss_ce": 0.003327697515487671, "loss_iou": 0.375, "loss_num": 0.02880859375, "loss_xval": 0.14453125, "num_input_tokens_seen": 62190176, "step": 689 }, { "epoch": 3.1724137931034484, "grad_norm": 12.827182492237142, "learning_rate": 5e-06, "loss": 0.1437, "num_input_tokens_seen": 62280480, "step": 690 }, { "epoch": 3.1724137931034484, "loss": 0.15000127255916595, "loss_ce": 3.789434413192794e-05, "loss_iou": 0.42578125, "loss_num": 0.030029296875, "loss_xval": 0.150390625, "num_input_tokens_seen": 62280480, "step": 690 }, { "epoch": 3.1770114942528735, "grad_norm": 2.7933249990009013, "learning_rate": 5e-06, "loss": 0.1374, "num_input_tokens_seen": 62370884, "step": 691 }, { "epoch": 3.1770114942528735, "loss": 0.13926616311073303, "loss_ce": 0.00022808580251876265, "loss_iou": 0.357421875, "loss_num": 0.02783203125, "loss_xval": 0.138671875, "num_input_tokens_seen": 62370884, "step": 691 }, { "epoch": 3.1816091954022987, "grad_norm": 3.400237110893925, "learning_rate": 5e-06, "loss": 0.1168, "num_input_tokens_seen": 62461280, "step": 692 }, { "epoch": 3.1816091954022987, "loss": 0.07589490711688995, "loss_ce": 5.872425026609562e-05, "loss_iou": 0.392578125, "loss_num": 0.01513671875, "loss_xval": 0.07568359375, "num_input_tokens_seen": 62461280, "step": 692 }, { "epoch": 3.1862068965517243, "grad_norm": 10.582817694578983, "learning_rate": 5e-06, "loss": 0.137, "num_input_tokens_seen": 62551580, "step": 693 }, { "epoch": 3.1862068965517243, "loss": 0.14650990068912506, "loss_ce": 2.5522436772007495e-05, "loss_iou": 0.462890625, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 62551580, "step": 693 }, { "epoch": 3.1908045977011494, "grad_norm": 13.951673209631243, "learning_rate": 5e-06, "loss": 0.0746, "num_input_tokens_seen": 62641896, "step": 694 }, { "epoch": 3.1908045977011494, "loss": 0.10015565156936646, "loss_ce": 0.00011902584810741246, "loss_iou": 0.380859375, "loss_num": 0.02001953125, "loss_xval": 0.10009765625, "num_input_tokens_seen": 62641896, "step": 694 }, { "epoch": 3.1954022988505746, "grad_norm": 8.105786030944435, "learning_rate": 5e-06, "loss": 0.171, "num_input_tokens_seen": 62731508, "step": 695 }, { "epoch": 3.1954022988505746, "loss": 0.2043384611606598, "loss_ce": 0.0001148240189650096, "loss_iou": 0.37109375, "loss_num": 0.040771484375, "loss_xval": 0.2041015625, "num_input_tokens_seen": 62731508, "step": 695 }, { "epoch": 3.2, "grad_norm": 29.137424088774004, "learning_rate": 5e-06, "loss": 0.1248, "num_input_tokens_seen": 62821956, "step": 696 }, { "epoch": 3.2, "loss": 0.13569773733615875, "loss_ce": 7.761328015476465e-05, "loss_iou": 0.267578125, "loss_num": 0.027099609375, "loss_xval": 0.1357421875, "num_input_tokens_seen": 62821956, "step": 696 }, { "epoch": 3.2045977011494253, "grad_norm": 15.53880122139471, "learning_rate": 5e-06, "loss": 0.1688, "num_input_tokens_seen": 62912308, "step": 697 }, { "epoch": 3.2045977011494253, "loss": 0.1253717541694641, "loss_ce": 8.183569298125803e-05, "loss_iou": 0.423828125, "loss_num": 0.0250244140625, "loss_xval": 0.125, "num_input_tokens_seen": 62912308, "step": 697 }, { "epoch": 3.2091954022988505, "grad_norm": 10.527367140209382, "learning_rate": 5e-06, "loss": 0.1387, "num_input_tokens_seen": 63002620, "step": 698 }, { "epoch": 3.2091954022988505, "loss": 0.1609758883714676, "loss_ce": 5.669322854373604e-05, "loss_iou": 0.32421875, "loss_num": 0.0322265625, "loss_xval": 0.1611328125, "num_input_tokens_seen": 63002620, "step": 698 }, { "epoch": 3.213793103448276, "grad_norm": 18.955512801085465, "learning_rate": 5e-06, "loss": 0.0993, "num_input_tokens_seen": 63093068, "step": 699 }, { "epoch": 3.213793103448276, "loss": 0.09753237664699554, "loss_ce": 2.871520700864494e-05, "loss_iou": 0.431640625, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 63093068, "step": 699 }, { "epoch": 3.218390804597701, "grad_norm": 19.835239072673776, "learning_rate": 5e-06, "loss": 0.1563, "num_input_tokens_seen": 63183464, "step": 700 }, { "epoch": 3.218390804597701, "loss": 0.14680443704128265, "loss_ce": 0.00010644025314832106, "loss_iou": 0.306640625, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 63183464, "step": 700 }, { "epoch": 3.2229885057471264, "grad_norm": 8.057607073965688, "learning_rate": 5e-06, "loss": 0.1078, "num_input_tokens_seen": 63273704, "step": 701 }, { "epoch": 3.2229885057471264, "loss": 0.10005685687065125, "loss_ce": 1.2611541023943573e-05, "loss_iou": 0.28125, "loss_num": 0.02001953125, "loss_xval": 0.10009765625, "num_input_tokens_seen": 63273704, "step": 701 }, { "epoch": 3.227586206896552, "grad_norm": 8.719404497052812, "learning_rate": 5e-06, "loss": 0.1474, "num_input_tokens_seen": 63363992, "step": 702 }, { "epoch": 3.227586206896552, "loss": 0.12116096913814545, "loss_ce": 3.67055217793677e-05, "loss_iou": 0.48828125, "loss_num": 0.0242919921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 63363992, "step": 702 }, { "epoch": 3.232183908045977, "grad_norm": 11.659344031825642, "learning_rate": 5e-06, "loss": 0.1215, "num_input_tokens_seen": 63454440, "step": 703 }, { "epoch": 3.232183908045977, "loss": 0.0965924859046936, "loss_ce": 1.9609713490353897e-05, "loss_iou": 0.392578125, "loss_num": 0.019287109375, "loss_xval": 0.0966796875, "num_input_tokens_seen": 63454440, "step": 703 }, { "epoch": 3.2367816091954023, "grad_norm": 8.032198591188585, "learning_rate": 5e-06, "loss": 0.1404, "num_input_tokens_seen": 63544912, "step": 704 }, { "epoch": 3.2367816091954023, "loss": 0.1306506246328354, "loss_ce": 6.59109718981199e-05, "loss_iou": 0.427734375, "loss_num": 0.026123046875, "loss_xval": 0.130859375, "num_input_tokens_seen": 63544912, "step": 704 }, { "epoch": 3.2413793103448274, "grad_norm": 6.297596133480101, "learning_rate": 5e-06, "loss": 0.1689, "num_input_tokens_seen": 63634348, "step": 705 }, { "epoch": 3.2413793103448274, "loss": 0.19925595819950104, "loss_ce": 9.824423614190891e-05, "loss_iou": 0.50390625, "loss_num": 0.039794921875, "loss_xval": 0.19921875, "num_input_tokens_seen": 63634348, "step": 705 }, { "epoch": 3.245977011494253, "grad_norm": 34.35621373849989, "learning_rate": 5e-06, "loss": 0.2007, "num_input_tokens_seen": 63724764, "step": 706 }, { "epoch": 3.245977011494253, "loss": 0.22871080040931702, "loss_ce": 0.0001493933523306623, "loss_iou": 0.33984375, "loss_num": 0.045654296875, "loss_xval": 0.228515625, "num_input_tokens_seen": 63724764, "step": 706 }, { "epoch": 3.250574712643678, "grad_norm": 11.328882741656939, "learning_rate": 5e-06, "loss": 0.1145, "num_input_tokens_seen": 63815068, "step": 707 }, { "epoch": 3.250574712643678, "loss": 0.12251278012990952, "loss_ce": 4.574016566039063e-05, "loss_iou": 0.396484375, "loss_num": 0.0245361328125, "loss_xval": 0.12255859375, "num_input_tokens_seen": 63815068, "step": 707 }, { "epoch": 3.2551724137931033, "grad_norm": 2.795600172154129, "learning_rate": 5e-06, "loss": 0.1224, "num_input_tokens_seen": 63905392, "step": 708 }, { "epoch": 3.2551724137931033, "loss": 0.1475527286529541, "loss_ce": 0.00013756597763858736, "loss_iou": 0.36328125, "loss_num": 0.0294189453125, "loss_xval": 0.1474609375, "num_input_tokens_seen": 63905392, "step": 708 }, { "epoch": 3.259770114942529, "grad_norm": 6.617540471617317, "learning_rate": 5e-06, "loss": 0.1119, "num_input_tokens_seen": 63995760, "step": 709 }, { "epoch": 3.259770114942529, "loss": 0.1187780499458313, "loss_ce": 0.00021725612168665975, "loss_iou": 0.369140625, "loss_num": 0.023681640625, "loss_xval": 0.11865234375, "num_input_tokens_seen": 63995760, "step": 709 }, { "epoch": 3.264367816091954, "grad_norm": 3.758683889886017, "learning_rate": 5e-06, "loss": 0.0999, "num_input_tokens_seen": 64086168, "step": 710 }, { "epoch": 3.264367816091954, "loss": 0.12687164545059204, "loss_ce": 0.0001016306850942783, "loss_iou": 0.451171875, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 64086168, "step": 710 }, { "epoch": 3.268965517241379, "grad_norm": 7.276014322969384, "learning_rate": 5e-06, "loss": 0.0923, "num_input_tokens_seen": 64174996, "step": 711 }, { "epoch": 3.268965517241379, "loss": 0.10789632797241211, "loss_ce": 1.669170342211146e-05, "loss_iou": 0.419921875, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 64174996, "step": 711 }, { "epoch": 3.2735632183908048, "grad_norm": 9.531874918868848, "learning_rate": 5e-06, "loss": 0.1484, "num_input_tokens_seen": 64265440, "step": 712 }, { "epoch": 3.2735632183908048, "loss": 0.14500431716442108, "loss_ce": 4.581706161843613e-05, "loss_iou": 0.427734375, "loss_num": 0.029052734375, "loss_xval": 0.14453125, "num_input_tokens_seen": 64265440, "step": 712 }, { "epoch": 3.27816091954023, "grad_norm": 4.769697015733171, "learning_rate": 5e-06, "loss": 0.1576, "num_input_tokens_seen": 64355800, "step": 713 }, { "epoch": 3.27816091954023, "loss": 0.1640397310256958, "loss_ce": 6.877434498164803e-05, "loss_iou": 0.421875, "loss_num": 0.03271484375, "loss_xval": 0.1640625, "num_input_tokens_seen": 64355800, "step": 713 }, { "epoch": 3.282758620689655, "grad_norm": 10.346574150082782, "learning_rate": 5e-06, "loss": 0.147, "num_input_tokens_seen": 64446020, "step": 714 }, { "epoch": 3.282758620689655, "loss": 0.13765498995780945, "loss_ce": 2.0713385310955346e-05, "loss_iou": 0.419921875, "loss_num": 0.0274658203125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 64446020, "step": 714 }, { "epoch": 3.2873563218390807, "grad_norm": 3.797921190886921, "learning_rate": 5e-06, "loss": 0.1116, "num_input_tokens_seen": 64536336, "step": 715 }, { "epoch": 3.2873563218390807, "loss": 0.11108782887458801, "loss_ce": 6.487606151495129e-05, "loss_iou": 0.34375, "loss_num": 0.022216796875, "loss_xval": 0.11083984375, "num_input_tokens_seen": 64536336, "step": 715 }, { "epoch": 3.291954022988506, "grad_norm": 7.680464395220625, "learning_rate": 5e-06, "loss": 0.1062, "num_input_tokens_seen": 64626672, "step": 716 }, { "epoch": 3.291954022988506, "loss": 0.0874263346195221, "loss_ce": 8.502809214405715e-05, "loss_iou": 0.296875, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 64626672, "step": 716 }, { "epoch": 3.296551724137931, "grad_norm": 29.00223362689999, "learning_rate": 5e-06, "loss": 0.1172, "num_input_tokens_seen": 64717188, "step": 717 }, { "epoch": 3.296551724137931, "loss": 0.09892787039279938, "loss_ce": 2.0401252186275087e-05, "loss_iou": 0.380859375, "loss_num": 0.019775390625, "loss_xval": 0.09912109375, "num_input_tokens_seen": 64717188, "step": 717 }, { "epoch": 3.301149425287356, "grad_norm": 2.963145490619517, "learning_rate": 5e-06, "loss": 0.1561, "num_input_tokens_seen": 64806948, "step": 718 }, { "epoch": 3.301149425287356, "loss": 0.13925355672836304, "loss_ce": 1.7103782738558948e-05, "loss_iou": 0.34375, "loss_num": 0.02783203125, "loss_xval": 0.1396484375, "num_input_tokens_seen": 64806948, "step": 718 }, { "epoch": 3.3057471264367817, "grad_norm": 3.512627488510696, "learning_rate": 5e-06, "loss": 0.0934, "num_input_tokens_seen": 64897416, "step": 719 }, { "epoch": 3.3057471264367817, "loss": 0.09657322615385056, "loss_ce": 1.5609241017955355e-05, "loss_iou": 0.34375, "loss_num": 0.019287109375, "loss_xval": 0.0966796875, "num_input_tokens_seen": 64897416, "step": 719 }, { "epoch": 3.310344827586207, "grad_norm": 9.222756441936497, "learning_rate": 5e-06, "loss": 0.1217, "num_input_tokens_seen": 64986932, "step": 720 }, { "epoch": 3.310344827586207, "loss": 0.11704221367835999, "loss_ce": 3.782210478675552e-05, "loss_iou": 0.2890625, "loss_num": 0.0234375, "loss_xval": 0.1171875, "num_input_tokens_seen": 64986932, "step": 720 }, { "epoch": 3.314942528735632, "grad_norm": 9.715496271101662, "learning_rate": 5e-06, "loss": 0.1336, "num_input_tokens_seen": 65077284, "step": 721 }, { "epoch": 3.314942528735632, "loss": 0.09352642297744751, "loss_ce": 9.685942495707422e-05, "loss_iou": 0.38671875, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 65077284, "step": 721 }, { "epoch": 3.3195402298850576, "grad_norm": 5.296050089954301, "learning_rate": 5e-06, "loss": 0.1506, "num_input_tokens_seen": 65167528, "step": 722 }, { "epoch": 3.3195402298850576, "loss": 0.19446220993995667, "loss_ce": 4.202499439998064e-06, "loss_iou": 0.361328125, "loss_num": 0.038818359375, "loss_xval": 0.1943359375, "num_input_tokens_seen": 65167528, "step": 722 }, { "epoch": 3.3241379310344827, "grad_norm": 4.298113327898983, "learning_rate": 5e-06, "loss": 0.1306, "num_input_tokens_seen": 65257968, "step": 723 }, { "epoch": 3.3241379310344827, "loss": 0.12031020224094391, "loss_ce": 9.91147771856049e-06, "loss_iou": 0.41015625, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 65257968, "step": 723 }, { "epoch": 3.328735632183908, "grad_norm": 5.779893006341727, "learning_rate": 5e-06, "loss": 0.1416, "num_input_tokens_seen": 65348448, "step": 724 }, { "epoch": 3.328735632183908, "loss": 0.19137105345726013, "loss_ce": 5.6363111070822924e-05, "loss_iou": 0.34375, "loss_num": 0.038330078125, "loss_xval": 0.19140625, "num_input_tokens_seen": 65348448, "step": 724 }, { "epoch": 3.3333333333333335, "grad_norm": 22.187024011658394, "learning_rate": 5e-06, "loss": 0.0947, "num_input_tokens_seen": 65438876, "step": 725 }, { "epoch": 3.3333333333333335, "loss": 0.06496837735176086, "loss_ce": 2.6973326384904794e-05, "loss_iou": 0.359375, "loss_num": 0.01300048828125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 65438876, "step": 725 }, { "epoch": 3.3379310344827586, "grad_norm": 12.062384100708867, "learning_rate": 5e-06, "loss": 0.1188, "num_input_tokens_seen": 65529280, "step": 726 }, { "epoch": 3.3379310344827586, "loss": 0.1274305284023285, "loss_ce": 1.9637809600681067e-05, "loss_iou": 0.359375, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 65529280, "step": 726 }, { "epoch": 3.342528735632184, "grad_norm": 3.5029991469109394, "learning_rate": 5e-06, "loss": 0.1202, "num_input_tokens_seen": 65619760, "step": 727 }, { "epoch": 3.342528735632184, "loss": 0.11032354086637497, "loss_ce": 1.7757560272002593e-05, "loss_iou": 0.36328125, "loss_num": 0.0220947265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 65619760, "step": 727 }, { "epoch": 3.3471264367816094, "grad_norm": 4.52005838961759, "learning_rate": 5e-06, "loss": 0.1815, "num_input_tokens_seen": 65710148, "step": 728 }, { "epoch": 3.3471264367816094, "loss": 0.2110876739025116, "loss_ce": 0.00016542623052373528, "loss_iou": 0.3203125, "loss_num": 0.042236328125, "loss_xval": 0.2109375, "num_input_tokens_seen": 65710148, "step": 728 }, { "epoch": 3.3517241379310345, "grad_norm": 6.440524482881178, "learning_rate": 5e-06, "loss": 0.1338, "num_input_tokens_seen": 65800676, "step": 729 }, { "epoch": 3.3517241379310345, "loss": 0.10761566460132599, "loss_ce": 1.0684560038498603e-05, "loss_iou": 0.458984375, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 65800676, "step": 729 }, { "epoch": 3.3563218390804597, "grad_norm": 4.6993643427058425, "learning_rate": 5e-06, "loss": 0.1129, "num_input_tokens_seen": 65890984, "step": 730 }, { "epoch": 3.3563218390804597, "loss": 0.15184611082077026, "loss_ce": 0.003957913722842932, "loss_iou": 0.421875, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 65890984, "step": 730 }, { "epoch": 3.360919540229885, "grad_norm": 5.235041599676305, "learning_rate": 5e-06, "loss": 0.0842, "num_input_tokens_seen": 65981292, "step": 731 }, { "epoch": 3.360919540229885, "loss": 0.08815214037895203, "loss_ce": 6.314841448329389e-05, "loss_iou": 0.341796875, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 65981292, "step": 731 }, { "epoch": 3.3655172413793104, "grad_norm": 2.2738505705233805, "learning_rate": 5e-06, "loss": 0.1447, "num_input_tokens_seen": 66071596, "step": 732 }, { "epoch": 3.3655172413793104, "loss": 0.10933637619018555, "loss_ce": 2.240698267996777e-05, "loss_iou": 0.33203125, "loss_num": 0.0218505859375, "loss_xval": 0.109375, "num_input_tokens_seen": 66071596, "step": 732 }, { "epoch": 3.3701149425287356, "grad_norm": 3.85647849045901, "learning_rate": 5e-06, "loss": 0.1753, "num_input_tokens_seen": 66162068, "step": 733 }, { "epoch": 3.3701149425287356, "loss": 0.20265939831733704, "loss_ce": 8.371137664653361e-05, "loss_iou": 0.4140625, "loss_num": 0.04052734375, "loss_xval": 0.2021484375, "num_input_tokens_seen": 66162068, "step": 733 }, { "epoch": 3.374712643678161, "grad_norm": 8.57033500179484, "learning_rate": 5e-06, "loss": 0.1791, "num_input_tokens_seen": 66252320, "step": 734 }, { "epoch": 3.374712643678161, "loss": 0.14232100546360016, "loss_ce": 1.754688855726272e-05, "loss_iou": 0.3984375, "loss_num": 0.0284423828125, "loss_xval": 0.142578125, "num_input_tokens_seen": 66252320, "step": 734 }, { "epoch": 3.3793103448275863, "grad_norm": 9.318942386130336, "learning_rate": 5e-06, "loss": 0.1153, "num_input_tokens_seen": 66342832, "step": 735 }, { "epoch": 3.3793103448275863, "loss": 0.11867457628250122, "loss_ce": 2.2227392037166283e-05, "loss_iou": 0.40234375, "loss_num": 0.0238037109375, "loss_xval": 0.11865234375, "num_input_tokens_seen": 66342832, "step": 735 }, { "epoch": 3.3839080459770114, "grad_norm": 19.001081916459317, "learning_rate": 5e-06, "loss": 0.1317, "num_input_tokens_seen": 66433196, "step": 736 }, { "epoch": 3.3839080459770114, "loss": 0.12269207835197449, "loss_ce": 1.1412482308514882e-05, "loss_iou": 0.2890625, "loss_num": 0.0245361328125, "loss_xval": 0.12255859375, "num_input_tokens_seen": 66433196, "step": 736 }, { "epoch": 3.3885057471264366, "grad_norm": 24.510560741814796, "learning_rate": 5e-06, "loss": 0.1506, "num_input_tokens_seen": 66523456, "step": 737 }, { "epoch": 3.3885057471264366, "loss": 0.12029193341732025, "loss_ce": 6.896796548971906e-06, "loss_iou": 0.38671875, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 66523456, "step": 737 }, { "epoch": 3.393103448275862, "grad_norm": 5.6235077208306095, "learning_rate": 5e-06, "loss": 0.1803, "num_input_tokens_seen": 66613868, "step": 738 }, { "epoch": 3.393103448275862, "loss": 0.16011008620262146, "loss_ce": 1.4873921827529557e-05, "loss_iou": 0.40625, "loss_num": 0.031982421875, "loss_xval": 0.16015625, "num_input_tokens_seen": 66613868, "step": 738 }, { "epoch": 3.3977011494252873, "grad_norm": 5.027102839955047, "learning_rate": 5e-06, "loss": 0.1569, "num_input_tokens_seen": 66704308, "step": 739 }, { "epoch": 3.3977011494252873, "loss": 0.17483371496200562, "loss_ce": 0.00015872399671934545, "loss_iou": 0.31640625, "loss_num": 0.034912109375, "loss_xval": 0.1748046875, "num_input_tokens_seen": 66704308, "step": 739 }, { "epoch": 3.4022988505747125, "grad_norm": 7.661403465008269, "learning_rate": 5e-06, "loss": 0.1496, "num_input_tokens_seen": 66794608, "step": 740 }, { "epoch": 3.4022988505747125, "loss": 0.1427173763513565, "loss_ce": 1.7180107533931732e-05, "loss_iou": 0.435546875, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 66794608, "step": 740 }, { "epoch": 3.406896551724138, "grad_norm": 10.312505826375784, "learning_rate": 5e-06, "loss": 0.1326, "num_input_tokens_seen": 66884952, "step": 741 }, { "epoch": 3.406896551724138, "loss": 0.19520951807498932, "loss_ce": 1.9079348930972628e-05, "loss_iou": 0.361328125, "loss_num": 0.0390625, "loss_xval": 0.1953125, "num_input_tokens_seen": 66884952, "step": 741 }, { "epoch": 3.4114942528735632, "grad_norm": 13.89068989533051, "learning_rate": 5e-06, "loss": 0.1618, "num_input_tokens_seen": 66975348, "step": 742 }, { "epoch": 3.4114942528735632, "loss": 0.15027475357055664, "loss_ce": 0.00012826563033740968, "loss_iou": 0.439453125, "loss_num": 0.030029296875, "loss_xval": 0.150390625, "num_input_tokens_seen": 66975348, "step": 742 }, { "epoch": 3.4160919540229884, "grad_norm": 4.196181753619327, "learning_rate": 5e-06, "loss": 0.161, "num_input_tokens_seen": 67065664, "step": 743 }, { "epoch": 3.4160919540229884, "loss": 0.13950249552726746, "loss_ce": 0.00028130554710514843, "loss_iou": 0.390625, "loss_num": 0.02783203125, "loss_xval": 0.1396484375, "num_input_tokens_seen": 67065664, "step": 743 }, { "epoch": 3.420689655172414, "grad_norm": 211.3346828186652, "learning_rate": 5e-06, "loss": 0.137, "num_input_tokens_seen": 67155980, "step": 744 }, { "epoch": 3.420689655172414, "loss": 0.17403674125671387, "loss_ce": 2.5510042178211734e-05, "loss_iou": 0.3984375, "loss_num": 0.03466796875, "loss_xval": 0.173828125, "num_input_tokens_seen": 67155980, "step": 744 }, { "epoch": 3.425287356321839, "grad_norm": 13.754629395643422, "learning_rate": 5e-06, "loss": 0.1489, "num_input_tokens_seen": 67246368, "step": 745 }, { "epoch": 3.425287356321839, "loss": 0.13543261587619781, "loss_ce": 0.0008653545519337058, "loss_iou": 0.38671875, "loss_num": 0.02685546875, "loss_xval": 0.134765625, "num_input_tokens_seen": 67246368, "step": 745 }, { "epoch": 3.4298850574712643, "grad_norm": 3.5887963625227863, "learning_rate": 5e-06, "loss": 0.0877, "num_input_tokens_seen": 67336752, "step": 746 }, { "epoch": 3.4298850574712643, "loss": 0.08411843329668045, "loss_ce": 1.1989444828941487e-05, "loss_iou": 0.33984375, "loss_num": 0.016845703125, "loss_xval": 0.083984375, "num_input_tokens_seen": 67336752, "step": 746 }, { "epoch": 3.43448275862069, "grad_norm": 7.58822953581897, "learning_rate": 5e-06, "loss": 0.1642, "num_input_tokens_seen": 67427208, "step": 747 }, { "epoch": 3.43448275862069, "loss": 0.15234288573265076, "loss_ce": 6.0174254031153396e-05, "loss_iou": 0.376953125, "loss_num": 0.0303955078125, "loss_xval": 0.15234375, "num_input_tokens_seen": 67427208, "step": 747 }, { "epoch": 3.439080459770115, "grad_norm": 11.338073894134098, "learning_rate": 5e-06, "loss": 0.1288, "num_input_tokens_seen": 67517560, "step": 748 }, { "epoch": 3.439080459770115, "loss": 0.13652345538139343, "loss_ce": 1.832299676607363e-05, "loss_iou": 0.296875, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 67517560, "step": 748 }, { "epoch": 3.44367816091954, "grad_norm": 6.967548952942209, "learning_rate": 5e-06, "loss": 0.1256, "num_input_tokens_seen": 67607920, "step": 749 }, { "epoch": 3.44367816091954, "loss": 0.08051057159900665, "loss_ce": 2.0457886421354488e-05, "loss_iou": 0.40234375, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 67607920, "step": 749 }, { "epoch": 3.4482758620689653, "grad_norm": 6.965059583863805, "learning_rate": 5e-06, "loss": 0.1495, "num_input_tokens_seen": 67698360, "step": 750 }, { "epoch": 3.4482758620689653, "eval_seeclick_CIoU": 0.484468474984169, "eval_seeclick_GIoU": 0.467425137758255, "eval_seeclick_IoU": 0.5228271484375, "eval_seeclick_MAE_all": 0.05892105959355831, "eval_seeclick_MAE_h": 0.04424383118748665, "eval_seeclick_MAE_w": 0.1074240393936634, "eval_seeclick_MAE_x_boxes": 0.10459480062127113, "eval_seeclick_MAE_y_boxes": 0.04638782888650894, "eval_seeclick_NUM_probability": 0.9999992251396179, "eval_seeclick_inside_bbox": 0.8764204680919647, "eval_seeclick_loss": 0.3601701557636261, "eval_seeclick_loss_ce": 0.06872005760669708, "eval_seeclick_loss_iou": 0.46417236328125, "eval_seeclick_loss_num": 0.061737060546875, "eval_seeclick_loss_xval": 0.30859375, "eval_seeclick_runtime": 76.4319, "eval_seeclick_samples_per_second": 0.563, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 67698360, "step": 750 }, { "epoch": 3.4482758620689653, "eval_icons_CIoU": 0.5398552417755127, "eval_icons_GIoU": 0.5399449467658997, "eval_icons_IoU": 0.5800551474094391, "eval_icons_MAE_all": 0.044397372752428055, "eval_icons_MAE_h": 0.0814935453236103, "eval_icons_MAE_w": 0.0699278824031353, "eval_icons_MAE_x_boxes": 0.06737468019127846, "eval_icons_MAE_y_boxes": 0.08113038912415504, "eval_icons_NUM_probability": 0.9999993741512299, "eval_icons_inside_bbox": 0.7795138955116272, "eval_icons_loss": 0.22252009809017181, "eval_icons_loss_ce": 7.370136074769107e-07, "eval_icons_loss_iou": 0.4132080078125, "eval_icons_loss_num": 0.047031402587890625, "eval_icons_loss_xval": 0.234954833984375, "eval_icons_runtime": 96.2118, "eval_icons_samples_per_second": 0.52, "eval_icons_steps_per_second": 0.021, "num_input_tokens_seen": 67698360, "step": 750 }, { "epoch": 3.4482758620689653, "eval_screenspot_CIoU": 0.41030613084634143, "eval_screenspot_GIoU": 0.3897818972667058, "eval_screenspot_IoU": 0.4746982256571452, "eval_screenspot_MAE_all": 0.08831173926591873, "eval_screenspot_MAE_h": 0.0866956611474355, "eval_screenspot_MAE_w": 0.17451148480176926, "eval_screenspot_MAE_x_boxes": 0.16739005843798319, "eval_screenspot_MAE_y_boxes": 0.08148317784070969, "eval_screenspot_NUM_probability": 0.9999986886978149, "eval_screenspot_inside_bbox": 0.753333330154419, "eval_screenspot_loss": 0.44167619943618774, "eval_screenspot_loss_ce": 0.00013617607813406116, "eval_screenspot_loss_iou": 0.4222005208333333, "eval_screenspot_loss_num": 0.09005228678385417, "eval_screenspot_loss_xval": 0.4504191080729167, "eval_screenspot_runtime": 157.7137, "eval_screenspot_samples_per_second": 0.564, "eval_screenspot_steps_per_second": 0.019, "num_input_tokens_seen": 67698360, "step": 750 }, { "epoch": 3.4482758620689653, "eval_compot_CIoU": 0.4846196174621582, "eval_compot_GIoU": 0.46163466572761536, "eval_compot_IoU": 0.5454895496368408, "eval_compot_MAE_all": 0.05586290545761585, "eval_compot_MAE_h": 0.07578141614794731, "eval_compot_MAE_w": 0.11417003348469734, "eval_compot_MAE_x_boxes": 0.09730804339051247, "eval_compot_MAE_y_boxes": 0.07610557973384857, "eval_compot_NUM_probability": 0.9999949038028717, "eval_compot_inside_bbox": 0.7638888955116272, "eval_compot_loss": 0.30647769570350647, "eval_compot_loss_ce": 0.012538184644654393, "eval_compot_loss_iou": 0.5220947265625, "eval_compot_loss_num": 0.050449371337890625, "eval_compot_loss_xval": 0.2524261474609375, "eval_compot_runtime": 90.9711, "eval_compot_samples_per_second": 0.55, "eval_compot_steps_per_second": 0.022, "num_input_tokens_seen": 67698360, "step": 750 }, { "epoch": 3.4482758620689653, "loss": 0.18523982167243958, "loss_ce": 0.007658033166080713, "loss_iou": 0.55078125, "loss_num": 0.03564453125, "loss_xval": 0.177734375, "num_input_tokens_seen": 67698360, "step": 750 }, { "epoch": 3.452873563218391, "grad_norm": 5.387146730993583, "learning_rate": 5e-06, "loss": 0.141, "num_input_tokens_seen": 67788820, "step": 751 }, { "epoch": 3.452873563218391, "loss": 0.1267109513282776, "loss_ce": 3.247974018449895e-05, "loss_iou": 0.373046875, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 67788820, "step": 751 }, { "epoch": 3.457471264367816, "grad_norm": 13.911926672220762, "learning_rate": 5e-06, "loss": 0.1078, "num_input_tokens_seen": 67879216, "step": 752 }, { "epoch": 3.457471264367816, "loss": 0.10455399006605148, "loss_ce": 6.180434866109863e-05, "loss_iou": 0.330078125, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 67879216, "step": 752 }, { "epoch": 3.462068965517241, "grad_norm": 5.6856250871143486, "learning_rate": 5e-06, "loss": 0.1927, "num_input_tokens_seen": 67969500, "step": 753 }, { "epoch": 3.462068965517241, "loss": 0.19283702969551086, "loss_ce": 8.800413343124092e-05, "loss_iou": 0.5, "loss_num": 0.03857421875, "loss_xval": 0.1923828125, "num_input_tokens_seen": 67969500, "step": 753 }, { "epoch": 3.466666666666667, "grad_norm": 7.868167270167063, "learning_rate": 5e-06, "loss": 0.0733, "num_input_tokens_seen": 68059952, "step": 754 }, { "epoch": 3.466666666666667, "loss": 0.05673067271709442, "loss_ce": 1.3753228813584428e-05, "loss_iou": 0.34375, "loss_num": 0.0113525390625, "loss_xval": 0.056640625, "num_input_tokens_seen": 68059952, "step": 754 }, { "epoch": 3.471264367816092, "grad_norm": 12.024869069661348, "learning_rate": 5e-06, "loss": 0.1488, "num_input_tokens_seen": 68150204, "step": 755 }, { "epoch": 3.471264367816092, "loss": 0.16925933957099915, "loss_ce": 8.86403904587496e-06, "loss_iou": 0.302734375, "loss_num": 0.033935546875, "loss_xval": 0.1689453125, "num_input_tokens_seen": 68150204, "step": 755 }, { "epoch": 3.475862068965517, "grad_norm": 10.067626482591868, "learning_rate": 5e-06, "loss": 0.1228, "num_input_tokens_seen": 68240748, "step": 756 }, { "epoch": 3.475862068965517, "loss": 0.12415396422147751, "loss_ce": 8.463855920126662e-06, "loss_iou": 0.310546875, "loss_num": 0.02490234375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 68240748, "step": 756 }, { "epoch": 3.4804597701149427, "grad_norm": 13.763593458927147, "learning_rate": 5e-06, "loss": 0.2071, "num_input_tokens_seen": 68331096, "step": 757 }, { "epoch": 3.4804597701149427, "loss": 0.22060084342956543, "loss_ce": 1.9778890418820083e-05, "loss_iou": 0.392578125, "loss_num": 0.044189453125, "loss_xval": 0.220703125, "num_input_tokens_seen": 68331096, "step": 757 }, { "epoch": 3.485057471264368, "grad_norm": 37.000929048972885, "learning_rate": 5e-06, "loss": 0.1891, "num_input_tokens_seen": 68421584, "step": 758 }, { "epoch": 3.485057471264368, "loss": 0.1451530158519745, "loss_ce": 7.243985601235181e-05, "loss_iou": 0.40625, "loss_num": 0.029052734375, "loss_xval": 0.1455078125, "num_input_tokens_seen": 68421584, "step": 758 }, { "epoch": 3.489655172413793, "grad_norm": 12.987634056120026, "learning_rate": 5e-06, "loss": 0.0816, "num_input_tokens_seen": 68511940, "step": 759 }, { "epoch": 3.489655172413793, "loss": 0.051099397242069244, "loss_ce": 1.2974151104572229e-05, "loss_iou": 0.37890625, "loss_num": 0.01025390625, "loss_xval": 0.051025390625, "num_input_tokens_seen": 68511940, "step": 759 }, { "epoch": 3.4942528735632186, "grad_norm": 4.05554262967813, "learning_rate": 5e-06, "loss": 0.1256, "num_input_tokens_seen": 68602372, "step": 760 }, { "epoch": 3.4942528735632186, "loss": 0.10796726495027542, "loss_ce": 1.1330963388900273e-05, "loss_iou": 0.333984375, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 68602372, "step": 760 }, { "epoch": 3.4988505747126437, "grad_norm": 2.872197937123995, "learning_rate": 5e-06, "loss": 0.0834, "num_input_tokens_seen": 68692692, "step": 761 }, { "epoch": 3.4988505747126437, "loss": 0.10546976327896118, "loss_ce": 1.009384050121298e-06, "loss_iou": 0.380859375, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 68692692, "step": 761 }, { "epoch": 3.503448275862069, "grad_norm": 2.7436969971531573, "learning_rate": 5e-06, "loss": 0.125, "num_input_tokens_seen": 68783112, "step": 762 }, { "epoch": 3.503448275862069, "loss": 0.0926920622587204, "loss_ce": 1.017252270685276e-05, "loss_iou": 0.38671875, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 68783112, "step": 762 }, { "epoch": 3.508045977011494, "grad_norm": 12.740068722129191, "learning_rate": 5e-06, "loss": 0.1955, "num_input_tokens_seen": 68872560, "step": 763 }, { "epoch": 3.508045977011494, "loss": 0.20338374376296997, "loss_ce": 0.0019982485100626945, "loss_iou": 0.310546875, "loss_num": 0.040283203125, "loss_xval": 0.201171875, "num_input_tokens_seen": 68872560, "step": 763 }, { "epoch": 3.5126436781609196, "grad_norm": 7.842380690749216, "learning_rate": 5e-06, "loss": 0.1222, "num_input_tokens_seen": 68962972, "step": 764 }, { "epoch": 3.5126436781609196, "loss": 0.14544086158275604, "loss_ce": 2.4600009055575356e-05, "loss_iou": 0.423828125, "loss_num": 0.029052734375, "loss_xval": 0.1455078125, "num_input_tokens_seen": 68962972, "step": 764 }, { "epoch": 3.5172413793103448, "grad_norm": 5.499498755138326, "learning_rate": 5e-06, "loss": 0.1058, "num_input_tokens_seen": 69053244, "step": 765 }, { "epoch": 3.5172413793103448, "loss": 0.09770219027996063, "loss_ce": 4.593187622958794e-05, "loss_iou": 0.373046875, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 69053244, "step": 765 }, { "epoch": 3.5218390804597703, "grad_norm": 4.548325999877122, "learning_rate": 5e-06, "loss": 0.0985, "num_input_tokens_seen": 69143592, "step": 766 }, { "epoch": 3.5218390804597703, "loss": 0.06963898241519928, "loss_ce": 2.8384643883327954e-05, "loss_iou": 0.298828125, "loss_num": 0.013916015625, "loss_xval": 0.06982421875, "num_input_tokens_seen": 69143592, "step": 766 }, { "epoch": 3.5264367816091955, "grad_norm": 13.276586752236186, "learning_rate": 5e-06, "loss": 0.1276, "num_input_tokens_seen": 69233884, "step": 767 }, { "epoch": 3.5264367816091955, "loss": 0.14585262537002563, "loss_ce": 0.0010162012185901403, "loss_iou": 0.267578125, "loss_num": 0.0289306640625, "loss_xval": 0.14453125, "num_input_tokens_seen": 69233884, "step": 767 }, { "epoch": 3.5310344827586206, "grad_norm": 3.288042195364263, "learning_rate": 5e-06, "loss": 0.1279, "num_input_tokens_seen": 69324272, "step": 768 }, { "epoch": 3.5310344827586206, "loss": 0.10515020787715912, "loss_ce": 0.00013922779180575162, "loss_iou": 0.470703125, "loss_num": 0.02099609375, "loss_xval": 0.10498046875, "num_input_tokens_seen": 69324272, "step": 768 }, { "epoch": 3.535632183908046, "grad_norm": 20.003220474729524, "learning_rate": 5e-06, "loss": 0.1411, "num_input_tokens_seen": 69413868, "step": 769 }, { "epoch": 3.535632183908046, "loss": 0.1137920469045639, "loss_ce": 2.2520007405546494e-05, "loss_iou": 0.3984375, "loss_num": 0.022705078125, "loss_xval": 0.11376953125, "num_input_tokens_seen": 69413868, "step": 769 }, { "epoch": 3.5402298850574714, "grad_norm": 14.09650460702761, "learning_rate": 5e-06, "loss": 0.1811, "num_input_tokens_seen": 69504204, "step": 770 }, { "epoch": 3.5402298850574714, "loss": 0.16392265260219574, "loss_ce": 1.2739261364913546e-05, "loss_iou": 0.41796875, "loss_num": 0.03271484375, "loss_xval": 0.1640625, "num_input_tokens_seen": 69504204, "step": 770 }, { "epoch": 3.5448275862068965, "grad_norm": 5.589999305942861, "learning_rate": 5e-06, "loss": 0.1689, "num_input_tokens_seen": 69594556, "step": 771 }, { "epoch": 3.5448275862068965, "loss": 0.1477949023246765, "loss_ce": 2.8777922125300393e-05, "loss_iou": 0.390625, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 69594556, "step": 771 }, { "epoch": 3.5494252873563217, "grad_norm": 6.623212624230002, "learning_rate": 5e-06, "loss": 0.1129, "num_input_tokens_seen": 69684820, "step": 772 }, { "epoch": 3.5494252873563217, "loss": 0.06927981972694397, "loss_ce": 4.912180884275585e-06, "loss_iou": 0.30859375, "loss_num": 0.01385498046875, "loss_xval": 0.0693359375, "num_input_tokens_seen": 69684820, "step": 772 }, { "epoch": 3.5540229885057473, "grad_norm": 6.206244364237178, "learning_rate": 5e-06, "loss": 0.127, "num_input_tokens_seen": 69773688, "step": 773 }, { "epoch": 3.5540229885057473, "loss": 0.12721136212348938, "loss_ce": 1.4094775906414725e-05, "loss_iou": 0.42578125, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 69773688, "step": 773 }, { "epoch": 3.5586206896551724, "grad_norm": 6.055994010471258, "learning_rate": 5e-06, "loss": 0.1533, "num_input_tokens_seen": 69864012, "step": 774 }, { "epoch": 3.5586206896551724, "loss": 0.1802106499671936, "loss_ce": 9.590051922714338e-05, "loss_iou": 0.375, "loss_num": 0.035888671875, "loss_xval": 0.1796875, "num_input_tokens_seen": 69864012, "step": 774 }, { "epoch": 3.5632183908045976, "grad_norm": 4.739416821882011, "learning_rate": 5e-06, "loss": 0.1308, "num_input_tokens_seen": 69954324, "step": 775 }, { "epoch": 3.5632183908045976, "loss": 0.16380281746387482, "loss_ce": 0.0002896270889323205, "loss_iou": 0.384765625, "loss_num": 0.03271484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 69954324, "step": 775 }, { "epoch": 3.5678160919540227, "grad_norm": 8.653175282625192, "learning_rate": 5e-06, "loss": 0.1024, "num_input_tokens_seen": 70044580, "step": 776 }, { "epoch": 3.5678160919540227, "loss": 0.09520787745714188, "loss_ce": 8.29543569125235e-06, "loss_iou": 0.333984375, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 70044580, "step": 776 }, { "epoch": 3.5724137931034483, "grad_norm": 21.81495303272478, "learning_rate": 5e-06, "loss": 0.1423, "num_input_tokens_seen": 70134884, "step": 777 }, { "epoch": 3.5724137931034483, "loss": 0.11333857476711273, "loss_ce": 0.000103099322586786, "loss_iou": 0.33203125, "loss_num": 0.0225830078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 70134884, "step": 777 }, { "epoch": 3.5770114942528735, "grad_norm": 15.12571431595152, "learning_rate": 5e-06, "loss": 0.1656, "num_input_tokens_seen": 70225164, "step": 778 }, { "epoch": 3.5770114942528735, "loss": 0.2089380919933319, "loss_ce": 1.4762490536668338e-05, "loss_iou": 0.4609375, "loss_num": 0.041748046875, "loss_xval": 0.208984375, "num_input_tokens_seen": 70225164, "step": 778 }, { "epoch": 3.581609195402299, "grad_norm": 10.383950864076555, "learning_rate": 5e-06, "loss": 0.1416, "num_input_tokens_seen": 70315464, "step": 779 }, { "epoch": 3.581609195402299, "loss": 0.11370711028575897, "loss_ce": 2.912801573984325e-05, "loss_iou": 0.40625, "loss_num": 0.022705078125, "loss_xval": 0.11376953125, "num_input_tokens_seen": 70315464, "step": 779 }, { "epoch": 3.586206896551724, "grad_norm": 2.1421328839083684, "learning_rate": 5e-06, "loss": 0.1222, "num_input_tokens_seen": 70405788, "step": 780 }, { "epoch": 3.586206896551724, "loss": 0.1896582543849945, "loss_ce": 2.202134237450082e-05, "loss_iou": 0.3203125, "loss_num": 0.037841796875, "loss_xval": 0.189453125, "num_input_tokens_seen": 70405788, "step": 780 }, { "epoch": 3.5908045977011493, "grad_norm": 9.380525583505861, "learning_rate": 5e-06, "loss": 0.2045, "num_input_tokens_seen": 70496136, "step": 781 }, { "epoch": 3.5908045977011493, "loss": 0.21958574652671814, "loss_ce": 1.1782582078012638e-05, "loss_iou": 0.267578125, "loss_num": 0.0439453125, "loss_xval": 0.2197265625, "num_input_tokens_seen": 70496136, "step": 781 }, { "epoch": 3.5954022988505745, "grad_norm": 3.9371848580056517, "learning_rate": 5e-06, "loss": 0.0866, "num_input_tokens_seen": 70586496, "step": 782 }, { "epoch": 3.5954022988505745, "loss": 0.05908702313899994, "loss_ce": 2.025284084083978e-05, "loss_iou": 0.3828125, "loss_num": 0.0118408203125, "loss_xval": 0.05908203125, "num_input_tokens_seen": 70586496, "step": 782 }, { "epoch": 3.6, "grad_norm": 10.508397334914141, "learning_rate": 5e-06, "loss": 0.0969, "num_input_tokens_seen": 70676784, "step": 783 }, { "epoch": 3.6, "loss": 0.11284907907247543, "loss_ce": 1.0333604222978465e-05, "loss_iou": 0.27734375, "loss_num": 0.0225830078125, "loss_xval": 0.11279296875, "num_input_tokens_seen": 70676784, "step": 783 }, { "epoch": 3.6045977011494252, "grad_norm": 12.149659037494654, "learning_rate": 5e-06, "loss": 0.1, "num_input_tokens_seen": 70767128, "step": 784 }, { "epoch": 3.6045977011494252, "loss": 0.10474172979593277, "loss_ce": 5.399918336479459e-06, "loss_iou": 0.44140625, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 70767128, "step": 784 }, { "epoch": 3.609195402298851, "grad_norm": 32.31492723615795, "learning_rate": 5e-06, "loss": 0.1335, "num_input_tokens_seen": 70857476, "step": 785 }, { "epoch": 3.609195402298851, "loss": 0.11612387001514435, "loss_ce": 4.4922308006789535e-06, "loss_iou": 0.404296875, "loss_num": 0.023193359375, "loss_xval": 0.1162109375, "num_input_tokens_seen": 70857476, "step": 785 }, { "epoch": 3.613793103448276, "grad_norm": 13.784691745076751, "learning_rate": 5e-06, "loss": 0.0975, "num_input_tokens_seen": 70947968, "step": 786 }, { "epoch": 3.613793103448276, "loss": 0.09685492515563965, "loss_ce": 2.2650790924672037e-05, "loss_iou": 0.37890625, "loss_num": 0.0194091796875, "loss_xval": 0.0966796875, "num_input_tokens_seen": 70947968, "step": 786 }, { "epoch": 3.618390804597701, "grad_norm": 5.938316209673259, "learning_rate": 5e-06, "loss": 0.1268, "num_input_tokens_seen": 71038404, "step": 787 }, { "epoch": 3.618390804597701, "loss": 0.13221478462219238, "loss_ce": 1.2635980965569615e-05, "loss_iou": 0.421875, "loss_num": 0.0264892578125, "loss_xval": 0.1318359375, "num_input_tokens_seen": 71038404, "step": 787 }, { "epoch": 3.6229885057471263, "grad_norm": 13.011958974161733, "learning_rate": 5e-06, "loss": 0.1329, "num_input_tokens_seen": 71128788, "step": 788 }, { "epoch": 3.6229885057471263, "loss": 0.14329373836517334, "loss_ce": 1.3719867638428695e-05, "loss_iou": 0.451171875, "loss_num": 0.0286865234375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 71128788, "step": 788 }, { "epoch": 3.627586206896552, "grad_norm": 11.936048890187932, "learning_rate": 5e-06, "loss": 0.149, "num_input_tokens_seen": 71219196, "step": 789 }, { "epoch": 3.627586206896552, "loss": 0.11237190663814545, "loss_ce": 8.247944788308814e-05, "loss_iou": 0.3046875, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 71219196, "step": 789 }, { "epoch": 3.632183908045977, "grad_norm": 6.308330166762513, "learning_rate": 5e-06, "loss": 0.1135, "num_input_tokens_seen": 71309520, "step": 790 }, { "epoch": 3.632183908045977, "loss": 0.1292654573917389, "loss_ce": 5.403875547926873e-05, "loss_iou": 0.341796875, "loss_num": 0.02587890625, "loss_xval": 0.12890625, "num_input_tokens_seen": 71309520, "step": 790 }, { "epoch": 3.636781609195402, "grad_norm": 5.137471316885716, "learning_rate": 5e-06, "loss": 0.1365, "num_input_tokens_seen": 71399780, "step": 791 }, { "epoch": 3.636781609195402, "loss": 0.13990908861160278, "loss_ce": 1.6500023775734007e-05, "loss_iou": 0.419921875, "loss_num": 0.0279541015625, "loss_xval": 0.1396484375, "num_input_tokens_seen": 71399780, "step": 791 }, { "epoch": 3.6413793103448278, "grad_norm": 9.275105119831409, "learning_rate": 5e-06, "loss": 0.0812, "num_input_tokens_seen": 71490120, "step": 792 }, { "epoch": 3.6413793103448278, "loss": 0.06399580091238022, "loss_ce": 3.09558781736996e-05, "loss_iou": 0.34375, "loss_num": 0.01275634765625, "loss_xval": 0.06396484375, "num_input_tokens_seen": 71490120, "step": 792 }, { "epoch": 3.645977011494253, "grad_norm": 11.0637012290359, "learning_rate": 5e-06, "loss": 0.1358, "num_input_tokens_seen": 71580584, "step": 793 }, { "epoch": 3.645977011494253, "loss": 0.15851935744285583, "loss_ce": 4.156233626417816e-05, "loss_iou": 0.421875, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 71580584, "step": 793 }, { "epoch": 3.650574712643678, "grad_norm": 16.625197592400877, "learning_rate": 5e-06, "loss": 0.1372, "num_input_tokens_seen": 71670196, "step": 794 }, { "epoch": 3.650574712643678, "loss": 0.15688209235668182, "loss_ce": 2.1741370801464655e-05, "loss_iou": 0.380859375, "loss_num": 0.03125, "loss_xval": 0.1572265625, "num_input_tokens_seen": 71670196, "step": 794 }, { "epoch": 3.655172413793103, "grad_norm": 5.3921763962194005, "learning_rate": 5e-06, "loss": 0.1317, "num_input_tokens_seen": 71760612, "step": 795 }, { "epoch": 3.655172413793103, "loss": 0.10780765116214752, "loss_ce": 4.304832600610098e-06, "loss_iou": 0.349609375, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 71760612, "step": 795 }, { "epoch": 3.659770114942529, "grad_norm": 11.704393512510705, "learning_rate": 5e-06, "loss": 0.1639, "num_input_tokens_seen": 71850916, "step": 796 }, { "epoch": 3.659770114942529, "loss": 0.1306782066822052, "loss_ce": 1.929806785483379e-06, "loss_iou": 0.451171875, "loss_num": 0.026123046875, "loss_xval": 0.130859375, "num_input_tokens_seen": 71850916, "step": 796 }, { "epoch": 3.664367816091954, "grad_norm": 16.992640998013997, "learning_rate": 5e-06, "loss": 0.1134, "num_input_tokens_seen": 71941204, "step": 797 }, { "epoch": 3.664367816091954, "loss": 0.11268985271453857, "loss_ce": 1.8956783605972305e-05, "loss_iou": 0.435546875, "loss_num": 0.0225830078125, "loss_xval": 0.11279296875, "num_input_tokens_seen": 71941204, "step": 797 }, { "epoch": 3.6689655172413795, "grad_norm": 11.094649026288351, "learning_rate": 5e-06, "loss": 0.1312, "num_input_tokens_seen": 72031516, "step": 798 }, { "epoch": 3.6689655172413795, "loss": 0.15380123257637024, "loss_ce": 2.316595782758668e-05, "loss_iou": 0.4765625, "loss_num": 0.03076171875, "loss_xval": 0.1533203125, "num_input_tokens_seen": 72031516, "step": 798 }, { "epoch": 3.6735632183908047, "grad_norm": 9.218816872173734, "learning_rate": 5e-06, "loss": 0.0814, "num_input_tokens_seen": 72121944, "step": 799 }, { "epoch": 3.6735632183908047, "loss": 0.10079614073038101, "loss_ce": 2.710282933549024e-05, "loss_iou": 0.365234375, "loss_num": 0.0201416015625, "loss_xval": 0.1005859375, "num_input_tokens_seen": 72121944, "step": 799 }, { "epoch": 3.67816091954023, "grad_norm": 15.692187876785969, "learning_rate": 5e-06, "loss": 0.09, "num_input_tokens_seen": 72212332, "step": 800 }, { "epoch": 3.67816091954023, "loss": 0.08001083880662918, "loss_ce": 9.010436770040542e-06, "loss_iou": 0.470703125, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 72212332, "step": 800 }, { "epoch": 3.682758620689655, "grad_norm": 3.364192050864013, "learning_rate": 5e-06, "loss": 0.0848, "num_input_tokens_seen": 72302688, "step": 801 }, { "epoch": 3.682758620689655, "loss": 0.060924116522073746, "loss_ce": 0.00010258361726300791, "loss_iou": 0.359375, "loss_num": 0.01214599609375, "loss_xval": 0.060791015625, "num_input_tokens_seen": 72302688, "step": 801 }, { "epoch": 3.6873563218390806, "grad_norm": 13.859096616510685, "learning_rate": 5e-06, "loss": 0.1194, "num_input_tokens_seen": 72393104, "step": 802 }, { "epoch": 3.6873563218390806, "loss": 0.1117105633020401, "loss_ce": 1.622791023692116e-05, "loss_iou": 0.423828125, "loss_num": 0.0223388671875, "loss_xval": 0.11181640625, "num_input_tokens_seen": 72393104, "step": 802 }, { "epoch": 3.6919540229885057, "grad_norm": 13.956089000343894, "learning_rate": 5e-06, "loss": 0.0844, "num_input_tokens_seen": 72483416, "step": 803 }, { "epoch": 3.6919540229885057, "loss": 0.0864303782582283, "loss_ce": 4.60219871456502e-06, "loss_iou": 0.39453125, "loss_num": 0.017333984375, "loss_xval": 0.08642578125, "num_input_tokens_seen": 72483416, "step": 803 }, { "epoch": 3.696551724137931, "grad_norm": 10.769508987312188, "learning_rate": 5e-06, "loss": 0.117, "num_input_tokens_seen": 72573912, "step": 804 }, { "epoch": 3.696551724137931, "loss": 0.12728144228458405, "loss_ce": 7.876707968534902e-06, "loss_iou": 0.3203125, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 72573912, "step": 804 }, { "epoch": 3.7011494252873565, "grad_norm": 3.244021220688608, "learning_rate": 5e-06, "loss": 0.1219, "num_input_tokens_seen": 72664244, "step": 805 }, { "epoch": 3.7011494252873565, "loss": 0.14200298488140106, "loss_ce": 5.0472979637561366e-05, "loss_iou": 0.388671875, "loss_num": 0.0284423828125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 72664244, "step": 805 }, { "epoch": 3.7057471264367816, "grad_norm": 4.828124192128039, "learning_rate": 5e-06, "loss": 0.0874, "num_input_tokens_seen": 72753736, "step": 806 }, { "epoch": 3.7057471264367816, "loss": 0.0866774469614029, "loss_ce": 7.529814411100233e-06, "loss_iou": 0.35546875, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 72753736, "step": 806 }, { "epoch": 3.7103448275862068, "grad_norm": 10.090685737843089, "learning_rate": 5e-06, "loss": 0.1321, "num_input_tokens_seen": 72844080, "step": 807 }, { "epoch": 3.7103448275862068, "loss": 0.14415577054023743, "loss_ce": 3.650196231319569e-05, "loss_iou": 0.416015625, "loss_num": 0.02880859375, "loss_xval": 0.14453125, "num_input_tokens_seen": 72844080, "step": 807 }, { "epoch": 3.714942528735632, "grad_norm": 11.901790776803901, "learning_rate": 5e-06, "loss": 0.196, "num_input_tokens_seen": 72934468, "step": 808 }, { "epoch": 3.714942528735632, "loss": 0.21217146515846252, "loss_ce": 2.8517068130895495e-05, "loss_iou": 0.44921875, "loss_num": 0.04248046875, "loss_xval": 0.2119140625, "num_input_tokens_seen": 72934468, "step": 808 }, { "epoch": 3.7195402298850575, "grad_norm": 24.55270500907039, "learning_rate": 5e-06, "loss": 0.1236, "num_input_tokens_seen": 73024692, "step": 809 }, { "epoch": 3.7195402298850575, "loss": 0.10771137475967407, "loss_ce": 4.536272172117606e-05, "loss_iou": 0.427734375, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 73024692, "step": 809 }, { "epoch": 3.7241379310344827, "grad_norm": 15.553335565571802, "learning_rate": 5e-06, "loss": 0.1397, "num_input_tokens_seen": 73115056, "step": 810 }, { "epoch": 3.7241379310344827, "loss": 0.14749327301979065, "loss_ce": 3.232985909562558e-05, "loss_iou": 0.404296875, "loss_num": 0.029541015625, "loss_xval": 0.1474609375, "num_input_tokens_seen": 73115056, "step": 810 }, { "epoch": 3.7287356321839082, "grad_norm": 21.65190443149484, "learning_rate": 5e-06, "loss": 0.1222, "num_input_tokens_seen": 73205556, "step": 811 }, { "epoch": 3.7287356321839082, "loss": 0.10954000800848007, "loss_ce": 0.0008363968227058649, "loss_iou": 0.3671875, "loss_num": 0.021728515625, "loss_xval": 0.10888671875, "num_input_tokens_seen": 73205556, "step": 811 }, { "epoch": 3.7333333333333334, "grad_norm": 3.28808735511797, "learning_rate": 5e-06, "loss": 0.1166, "num_input_tokens_seen": 73296004, "step": 812 }, { "epoch": 3.7333333333333334, "loss": 0.09380966424942017, "loss_ce": 1.3889082765672356e-05, "loss_iou": 0.392578125, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 73296004, "step": 812 }, { "epoch": 3.7379310344827585, "grad_norm": 7.828759411600456, "learning_rate": 5e-06, "loss": 0.1538, "num_input_tokens_seen": 73386480, "step": 813 }, { "epoch": 3.7379310344827585, "loss": 0.167840838432312, "loss_ce": 0.0001162344342446886, "loss_iou": 0.412109375, "loss_num": 0.033447265625, "loss_xval": 0.16796875, "num_input_tokens_seen": 73386480, "step": 813 }, { "epoch": 3.7425287356321837, "grad_norm": 27.164938076027006, "learning_rate": 5e-06, "loss": 0.1196, "num_input_tokens_seen": 73476720, "step": 814 }, { "epoch": 3.7425287356321837, "loss": 0.13790945708751678, "loss_ce": 6.155119626782835e-05, "loss_iou": 0.46875, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 73476720, "step": 814 }, { "epoch": 3.7471264367816093, "grad_norm": 7.036276194280488, "learning_rate": 5e-06, "loss": 0.1105, "num_input_tokens_seen": 73566368, "step": 815 }, { "epoch": 3.7471264367816093, "loss": 0.11970978230237961, "loss_ce": 3.510632086545229e-05, "loss_iou": 0.29296875, "loss_num": 0.02392578125, "loss_xval": 0.11962890625, "num_input_tokens_seen": 73566368, "step": 815 }, { "epoch": 3.7517241379310344, "grad_norm": 6.291798562723388, "learning_rate": 5e-06, "loss": 0.1301, "num_input_tokens_seen": 73656748, "step": 816 }, { "epoch": 3.7517241379310344, "loss": 0.15857738256454468, "loss_ce": 3.092413680860773e-05, "loss_iou": 0.255859375, "loss_num": 0.03173828125, "loss_xval": 0.158203125, "num_input_tokens_seen": 73656748, "step": 816 }, { "epoch": 3.75632183908046, "grad_norm": 4.395310325624689, "learning_rate": 5e-06, "loss": 0.119, "num_input_tokens_seen": 73747096, "step": 817 }, { "epoch": 3.75632183908046, "loss": 0.1381591111421585, "loss_ce": 6.042053428245708e-06, "loss_iou": 0.314453125, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 73747096, "step": 817 }, { "epoch": 3.760919540229885, "grad_norm": 4.574856648294534, "learning_rate": 5e-06, "loss": 0.1236, "num_input_tokens_seen": 73837368, "step": 818 }, { "epoch": 3.760919540229885, "loss": 0.1333150714635849, "loss_ce": 0.00028894448769278824, "loss_iou": 0.419921875, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 73837368, "step": 818 }, { "epoch": 3.7655172413793103, "grad_norm": 20.895031812009428, "learning_rate": 5e-06, "loss": 0.111, "num_input_tokens_seen": 73927844, "step": 819 }, { "epoch": 3.7655172413793103, "loss": 0.10864730179309845, "loss_ce": 3.5239758290117607e-05, "loss_iou": 0.375, "loss_num": 0.021728515625, "loss_xval": 0.1083984375, "num_input_tokens_seen": 73927844, "step": 819 }, { "epoch": 3.7701149425287355, "grad_norm": 15.134371186789252, "learning_rate": 5e-06, "loss": 0.0982, "num_input_tokens_seen": 74018240, "step": 820 }, { "epoch": 3.7701149425287355, "loss": 0.09329765290021896, "loss_ce": 0.00020377803593873978, "loss_iou": 0.27734375, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 74018240, "step": 820 }, { "epoch": 3.774712643678161, "grad_norm": 15.175525511944207, "learning_rate": 5e-06, "loss": 0.1241, "num_input_tokens_seen": 74108676, "step": 821 }, { "epoch": 3.774712643678161, "loss": 0.13372212648391724, "loss_ce": 7.039559568511322e-05, "loss_iou": 0.400390625, "loss_num": 0.0267333984375, "loss_xval": 0.1337890625, "num_input_tokens_seen": 74108676, "step": 821 }, { "epoch": 3.779310344827586, "grad_norm": 27.19521596346353, "learning_rate": 5e-06, "loss": 0.1087, "num_input_tokens_seen": 74199076, "step": 822 }, { "epoch": 3.779310344827586, "loss": 0.14935675263404846, "loss_ce": 0.00011052708578063175, "loss_iou": 0.3828125, "loss_num": 0.0299072265625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 74199076, "step": 822 }, { "epoch": 3.7839080459770114, "grad_norm": 10.150253946725481, "learning_rate": 5e-06, "loss": 0.1654, "num_input_tokens_seen": 74288004, "step": 823 }, { "epoch": 3.7839080459770114, "loss": 0.1666683554649353, "loss_ce": 0.0001491915318183601, "loss_iou": 0.51953125, "loss_num": 0.033203125, "loss_xval": 0.1669921875, "num_input_tokens_seen": 74288004, "step": 823 }, { "epoch": 3.788505747126437, "grad_norm": 3.7134569300100755, "learning_rate": 5e-06, "loss": 0.1074, "num_input_tokens_seen": 74378480, "step": 824 }, { "epoch": 3.788505747126437, "loss": 0.118888720870018, "loss_ce": 5.327704275259748e-05, "loss_iou": 0.337890625, "loss_num": 0.0238037109375, "loss_xval": 0.11865234375, "num_input_tokens_seen": 74378480, "step": 824 }, { "epoch": 3.793103448275862, "grad_norm": 5.136391279757606, "learning_rate": 5e-06, "loss": 0.0807, "num_input_tokens_seen": 74468904, "step": 825 }, { "epoch": 3.793103448275862, "loss": 0.10247914493083954, "loss_ce": 0.00015370690380223095, "loss_iou": 0.380859375, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 74468904, "step": 825 }, { "epoch": 3.7977011494252872, "grad_norm": 3.4776597012299777, "learning_rate": 5e-06, "loss": 0.0855, "num_input_tokens_seen": 74559308, "step": 826 }, { "epoch": 3.7977011494252872, "loss": 0.08007881045341492, "loss_ce": 0.00010749474313342944, "loss_iou": 0.328125, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 74559308, "step": 826 }, { "epoch": 3.8022988505747124, "grad_norm": 13.304246207016769, "learning_rate": 5e-06, "loss": 0.142, "num_input_tokens_seen": 74649636, "step": 827 }, { "epoch": 3.8022988505747124, "loss": 0.144487202167511, "loss_ce": 1.7336749351670733e-06, "loss_iou": 0.33984375, "loss_num": 0.0289306640625, "loss_xval": 0.14453125, "num_input_tokens_seen": 74649636, "step": 827 }, { "epoch": 3.806896551724138, "grad_norm": 8.020543075324909, "learning_rate": 5e-06, "loss": 0.1005, "num_input_tokens_seen": 74740060, "step": 828 }, { "epoch": 3.806896551724138, "loss": 0.09062141180038452, "loss_ce": 1.4721297702635638e-05, "loss_iou": 0.416015625, "loss_num": 0.0181884765625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 74740060, "step": 828 }, { "epoch": 3.811494252873563, "grad_norm": 14.35279728790665, "learning_rate": 5e-06, "loss": 0.0936, "num_input_tokens_seen": 74830480, "step": 829 }, { "epoch": 3.811494252873563, "loss": 0.06638382375240326, "loss_ce": 8.095349585346412e-06, "loss_iou": 0.37109375, "loss_num": 0.0133056640625, "loss_xval": 0.06640625, "num_input_tokens_seen": 74830480, "step": 829 }, { "epoch": 3.8160919540229887, "grad_norm": 14.243365874652318, "learning_rate": 5e-06, "loss": 0.1133, "num_input_tokens_seen": 74920808, "step": 830 }, { "epoch": 3.8160919540229887, "loss": 0.1177852526307106, "loss_ce": 2.654241143318359e-06, "loss_iou": 0.28125, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 74920808, "step": 830 }, { "epoch": 3.820689655172414, "grad_norm": 10.427621446470681, "learning_rate": 5e-06, "loss": 0.1127, "num_input_tokens_seen": 75011112, "step": 831 }, { "epoch": 3.820689655172414, "loss": 0.09321253001689911, "loss_ce": 0.00024072162341326475, "loss_iou": 0.482421875, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 75011112, "step": 831 }, { "epoch": 3.825287356321839, "grad_norm": 3.5623265997795768, "learning_rate": 5e-06, "loss": 0.1278, "num_input_tokens_seen": 75101356, "step": 832 }, { "epoch": 3.825287356321839, "loss": 0.1735125035047531, "loss_ce": 5.05839052493684e-05, "loss_iou": 0.455078125, "loss_num": 0.03466796875, "loss_xval": 0.173828125, "num_input_tokens_seen": 75101356, "step": 832 }, { "epoch": 3.829885057471264, "grad_norm": 4.146150429296354, "learning_rate": 5e-06, "loss": 0.1608, "num_input_tokens_seen": 75190144, "step": 833 }, { "epoch": 3.829885057471264, "loss": 0.1830693781375885, "loss_ce": 0.0004521902301348746, "loss_iou": 0.439453125, "loss_num": 0.03662109375, "loss_xval": 0.1826171875, "num_input_tokens_seen": 75190144, "step": 833 }, { "epoch": 3.8344827586206898, "grad_norm": 6.028978250099342, "learning_rate": 5e-06, "loss": 0.1115, "num_input_tokens_seen": 75280528, "step": 834 }, { "epoch": 3.8344827586206898, "loss": 0.12013056129217148, "loss_ce": 4.389049718156457e-05, "loss_iou": 0.29296875, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 75280528, "step": 834 }, { "epoch": 3.839080459770115, "grad_norm": 5.005804908205812, "learning_rate": 5e-06, "loss": 0.1431, "num_input_tokens_seen": 75370936, "step": 835 }, { "epoch": 3.839080459770115, "loss": 0.17678186297416687, "loss_ce": 2.404209772066679e-05, "loss_iou": 0.41015625, "loss_num": 0.035400390625, "loss_xval": 0.1767578125, "num_input_tokens_seen": 75370936, "step": 835 }, { "epoch": 3.84367816091954, "grad_norm": 11.780827894531459, "learning_rate": 5e-06, "loss": 0.0943, "num_input_tokens_seen": 75461384, "step": 836 }, { "epoch": 3.84367816091954, "loss": 0.0846051275730133, "loss_ce": 8.669264207128435e-05, "loss_iou": 0.38671875, "loss_num": 0.0169677734375, "loss_xval": 0.08447265625, "num_input_tokens_seen": 75461384, "step": 836 }, { "epoch": 3.8482758620689657, "grad_norm": 9.132134187365157, "learning_rate": 5e-06, "loss": 0.1159, "num_input_tokens_seen": 75551680, "step": 837 }, { "epoch": 3.8482758620689657, "loss": 0.12721320986747742, "loss_ce": 6.894965736137237e-07, "loss_iou": 0.37109375, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 75551680, "step": 837 }, { "epoch": 3.852873563218391, "grad_norm": 26.114035834080024, "learning_rate": 5e-06, "loss": 0.0882, "num_input_tokens_seen": 75642112, "step": 838 }, { "epoch": 3.852873563218391, "loss": 0.08660604059696198, "loss_ce": 5.8186371461488307e-05, "loss_iou": 0.333984375, "loss_num": 0.017333984375, "loss_xval": 0.08642578125, "num_input_tokens_seen": 75642112, "step": 838 }, { "epoch": 3.857471264367816, "grad_norm": 11.804335104556394, "learning_rate": 5e-06, "loss": 0.0909, "num_input_tokens_seen": 75732404, "step": 839 }, { "epoch": 3.857471264367816, "loss": 0.06891857087612152, "loss_ce": 9.877001502900384e-06, "loss_iou": 0.4296875, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 75732404, "step": 839 }, { "epoch": 3.862068965517241, "grad_norm": 8.148540211603276, "learning_rate": 5e-06, "loss": 0.1853, "num_input_tokens_seen": 75822708, "step": 840 }, { "epoch": 3.862068965517241, "loss": 0.1470450758934021, "loss_ce": 1.1382823686290067e-05, "loss_iou": 0.263671875, "loss_num": 0.0294189453125, "loss_xval": 0.1474609375, "num_input_tokens_seen": 75822708, "step": 840 }, { "epoch": 3.8666666666666667, "grad_norm": 2.959579079163402, "learning_rate": 5e-06, "loss": 0.1515, "num_input_tokens_seen": 75912948, "step": 841 }, { "epoch": 3.8666666666666667, "loss": 0.14687013626098633, "loss_ce": 4.280491793906549e-06, "loss_iou": 0.33984375, "loss_num": 0.0294189453125, "loss_xval": 0.146484375, "num_input_tokens_seen": 75912948, "step": 841 }, { "epoch": 3.871264367816092, "grad_norm": 3.2788587846844477, "learning_rate": 5e-06, "loss": 0.1256, "num_input_tokens_seen": 76003272, "step": 842 }, { "epoch": 3.871264367816092, "loss": 0.1434180587530136, "loss_ce": 1.5959709344315343e-05, "loss_iou": 0.330078125, "loss_num": 0.0286865234375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 76003272, "step": 842 }, { "epoch": 3.8758620689655174, "grad_norm": 6.603617127888935, "learning_rate": 5e-06, "loss": 0.1436, "num_input_tokens_seen": 76093704, "step": 843 }, { "epoch": 3.8758620689655174, "loss": 0.18056106567382812, "loss_ce": 4.960661317454651e-05, "loss_iou": 0.39453125, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 76093704, "step": 843 }, { "epoch": 3.8804597701149426, "grad_norm": 2.2614424128970096, "learning_rate": 5e-06, "loss": 0.1349, "num_input_tokens_seen": 76183952, "step": 844 }, { "epoch": 3.8804597701149426, "loss": 0.11200746148824692, "loss_ce": 7.948267921165098e-06, "loss_iou": 0.298828125, "loss_num": 0.0224609375, "loss_xval": 0.11181640625, "num_input_tokens_seen": 76183952, "step": 844 }, { "epoch": 3.8850574712643677, "grad_norm": 9.237162950267727, "learning_rate": 5e-06, "loss": 0.1355, "num_input_tokens_seen": 76274384, "step": 845 }, { "epoch": 3.8850574712643677, "loss": 0.13711686432361603, "loss_ce": 3.19016762659885e-05, "loss_iou": 0.291015625, "loss_num": 0.0274658203125, "loss_xval": 0.13671875, "num_input_tokens_seen": 76274384, "step": 845 }, { "epoch": 3.889655172413793, "grad_norm": 3.4174539263582626, "learning_rate": 5e-06, "loss": 0.1565, "num_input_tokens_seen": 76364656, "step": 846 }, { "epoch": 3.889655172413793, "loss": 0.15261296927928925, "loss_ce": 5.560090721701272e-05, "loss_iou": 0.4609375, "loss_num": 0.030517578125, "loss_xval": 0.15234375, "num_input_tokens_seen": 76364656, "step": 846 }, { "epoch": 3.8942528735632185, "grad_norm": 5.269387019291706, "learning_rate": 5e-06, "loss": 0.127, "num_input_tokens_seen": 76455084, "step": 847 }, { "epoch": 3.8942528735632185, "loss": 0.12015949189662933, "loss_ce": 4.230613194522448e-05, "loss_iou": 0.306640625, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 76455084, "step": 847 }, { "epoch": 3.8988505747126436, "grad_norm": 33.611586771121246, "learning_rate": 5e-06, "loss": 0.1088, "num_input_tokens_seen": 76545436, "step": 848 }, { "epoch": 3.8988505747126436, "loss": 0.11951969563961029, "loss_ce": 1.2851842257077806e-05, "loss_iou": 0.28125, "loss_num": 0.02392578125, "loss_xval": 0.11962890625, "num_input_tokens_seen": 76545436, "step": 848 }, { "epoch": 3.903448275862069, "grad_norm": 3.5055722837258783, "learning_rate": 5e-06, "loss": 0.1269, "num_input_tokens_seen": 76634196, "step": 849 }, { "epoch": 3.903448275862069, "loss": 0.12913568317890167, "loss_ce": 1.5821904526092112e-05, "loss_iou": 0.30859375, "loss_num": 0.02587890625, "loss_xval": 0.12890625, "num_input_tokens_seen": 76634196, "step": 849 }, { "epoch": 3.9080459770114944, "grad_norm": 8.97172584427251, "learning_rate": 5e-06, "loss": 0.0848, "num_input_tokens_seen": 76724524, "step": 850 }, { "epoch": 3.9080459770114944, "loss": 0.10759300738573074, "loss_ce": 1.8539070879342034e-05, "loss_iou": 0.41015625, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 76724524, "step": 850 }, { "epoch": 3.9126436781609195, "grad_norm": 2.3536210529744888, "learning_rate": 5e-06, "loss": 0.0893, "num_input_tokens_seen": 76815068, "step": 851 }, { "epoch": 3.9126436781609195, "loss": 0.0927945151925087, "loss_ce": 0.00011263469059485942, "loss_iou": 0.33984375, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 76815068, "step": 851 }, { "epoch": 3.9172413793103447, "grad_norm": 6.886385253300276, "learning_rate": 5e-06, "loss": 0.0821, "num_input_tokens_seen": 76905484, "step": 852 }, { "epoch": 3.9172413793103447, "loss": 0.09015928208827972, "loss_ce": 7.139760418795049e-05, "loss_iou": 0.390625, "loss_num": 0.01806640625, "loss_xval": 0.08984375, "num_input_tokens_seen": 76905484, "step": 852 }, { "epoch": 3.9218390804597703, "grad_norm": 8.891761743105253, "learning_rate": 5e-06, "loss": 0.1436, "num_input_tokens_seen": 76995800, "step": 853 }, { "epoch": 3.9218390804597703, "loss": 0.15627999603748322, "loss_ce": 3.000300239364151e-05, "loss_iou": 0.388671875, "loss_num": 0.03125, "loss_xval": 0.15625, "num_input_tokens_seen": 76995800, "step": 853 }, { "epoch": 3.9264367816091954, "grad_norm": 5.132513236049143, "learning_rate": 5e-06, "loss": 0.1321, "num_input_tokens_seen": 77085280, "step": 854 }, { "epoch": 3.9264367816091954, "loss": 0.10330615192651749, "loss_ce": 4.150414042669581e-06, "loss_iou": 0.33984375, "loss_num": 0.0206298828125, "loss_xval": 0.103515625, "num_input_tokens_seen": 77085280, "step": 854 }, { "epoch": 3.9310344827586206, "grad_norm": 11.013320716371243, "learning_rate": 5e-06, "loss": 0.1201, "num_input_tokens_seen": 77174948, "step": 855 }, { "epoch": 3.9310344827586206, "loss": 0.09394749999046326, "loss_ce": 1.439205152564682e-05, "loss_iou": 0.279296875, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 77174948, "step": 855 }, { "epoch": 3.935632183908046, "grad_norm": 6.10534980851738, "learning_rate": 5e-06, "loss": 0.0767, "num_input_tokens_seen": 77265376, "step": 856 }, { "epoch": 3.935632183908046, "loss": 0.06424582004547119, "loss_ce": 6.319404747046065e-06, "loss_iou": 0.359375, "loss_num": 0.01287841796875, "loss_xval": 0.064453125, "num_input_tokens_seen": 77265376, "step": 856 }, { "epoch": 3.9402298850574713, "grad_norm": 15.651592669210814, "learning_rate": 5e-06, "loss": 0.0864, "num_input_tokens_seen": 77355760, "step": 857 }, { "epoch": 3.9402298850574713, "loss": 0.0786062479019165, "loss_ce": 2.3480024538002908e-05, "loss_iou": 0.322265625, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 77355760, "step": 857 }, { "epoch": 3.9448275862068964, "grad_norm": 6.800574472479352, "learning_rate": 5e-06, "loss": 0.1223, "num_input_tokens_seen": 77446208, "step": 858 }, { "epoch": 3.9448275862068964, "loss": 0.09560946375131607, "loss_ce": 0.0001809898967621848, "loss_iou": 0.384765625, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 77446208, "step": 858 }, { "epoch": 3.9494252873563216, "grad_norm": 14.49037795832371, "learning_rate": 5e-06, "loss": 0.1543, "num_input_tokens_seen": 77536448, "step": 859 }, { "epoch": 3.9494252873563216, "loss": 0.14268243312835693, "loss_ce": 1.2764152415911667e-05, "loss_iou": 0.369140625, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 77536448, "step": 859 }, { "epoch": 3.954022988505747, "grad_norm": 3.136445093906524, "learning_rate": 5e-06, "loss": 0.099, "num_input_tokens_seen": 77626764, "step": 860 }, { "epoch": 3.954022988505747, "loss": 0.09950034320354462, "loss_ce": 1.3036394193477463e-05, "loss_iou": 0.3203125, "loss_num": 0.0198974609375, "loss_xval": 0.099609375, "num_input_tokens_seen": 77626764, "step": 860 }, { "epoch": 3.9586206896551723, "grad_norm": 5.399769066083071, "learning_rate": 5e-06, "loss": 0.2064, "num_input_tokens_seen": 77717140, "step": 861 }, { "epoch": 3.9586206896551723, "loss": 0.19695694744586945, "loss_ce": 1.1762541362259071e-05, "loss_iou": 0.392578125, "loss_num": 0.039306640625, "loss_xval": 0.197265625, "num_input_tokens_seen": 77717140, "step": 861 }, { "epoch": 3.963218390804598, "grad_norm": 18.891060577417118, "learning_rate": 5e-06, "loss": 0.1318, "num_input_tokens_seen": 77807488, "step": 862 }, { "epoch": 3.963218390804598, "loss": 0.09799706935882568, "loss_ce": 5.1299293772899546e-06, "loss_iou": 0.4296875, "loss_num": 0.01953125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 77807488, "step": 862 }, { "epoch": 3.967816091954023, "grad_norm": 5.482893632604601, "learning_rate": 5e-06, "loss": 0.0955, "num_input_tokens_seen": 77897924, "step": 863 }, { "epoch": 3.967816091954023, "loss": 0.08787819743156433, "loss_ce": 1.808757588150911e-05, "loss_iou": 0.341796875, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 77897924, "step": 863 }, { "epoch": 3.972413793103448, "grad_norm": 22.690601947302575, "learning_rate": 5e-06, "loss": 0.1084, "num_input_tokens_seen": 77988320, "step": 864 }, { "epoch": 3.972413793103448, "loss": 0.14658576250076294, "loss_ce": 9.828477232076693e-06, "loss_iou": 0.337890625, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 77988320, "step": 864 }, { "epoch": 3.9770114942528734, "grad_norm": 6.60885795449724, "learning_rate": 5e-06, "loss": 0.1123, "num_input_tokens_seen": 78078664, "step": 865 }, { "epoch": 3.9770114942528734, "loss": 0.14846225082874298, "loss_ce": 4.0008031646721065e-05, "loss_iou": 0.470703125, "loss_num": 0.0296630859375, "loss_xval": 0.1484375, "num_input_tokens_seen": 78078664, "step": 865 }, { "epoch": 3.981609195402299, "grad_norm": 8.750514149287007, "learning_rate": 5e-06, "loss": 0.1133, "num_input_tokens_seen": 78168928, "step": 866 }, { "epoch": 3.981609195402299, "loss": 0.11572016775608063, "loss_ce": 2.8033886337652802e-05, "loss_iou": 0.306640625, "loss_num": 0.023193359375, "loss_xval": 0.11572265625, "num_input_tokens_seen": 78168928, "step": 866 }, { "epoch": 3.986206896551724, "grad_norm": 7.203279541438859, "learning_rate": 5e-06, "loss": 0.1171, "num_input_tokens_seen": 78259188, "step": 867 }, { "epoch": 3.986206896551724, "loss": 0.10868757218122482, "loss_ce": 1.4477924196398817e-05, "loss_iou": 0.306640625, "loss_num": 0.021728515625, "loss_xval": 0.10888671875, "num_input_tokens_seen": 78259188, "step": 867 }, { "epoch": 3.9908045977011493, "grad_norm": 3.5527677092629077, "learning_rate": 5e-06, "loss": 0.0888, "num_input_tokens_seen": 78349456, "step": 868 }, { "epoch": 3.9908045977011493, "loss": 0.061215296387672424, "loss_ce": 2.7547748686629348e-05, "loss_iou": 0.326171875, "loss_num": 0.01220703125, "loss_xval": 0.061279296875, "num_input_tokens_seen": 78349456, "step": 868 }, { "epoch": 3.995402298850575, "grad_norm": 13.601719729469064, "learning_rate": 5e-06, "loss": 0.1335, "num_input_tokens_seen": 78439776, "step": 869 }, { "epoch": 3.995402298850575, "loss": 0.06812618672847748, "loss_ce": 2.621453495521564e-05, "loss_iou": 0.2734375, "loss_num": 0.01361083984375, "loss_xval": 0.06787109375, "num_input_tokens_seen": 78439776, "step": 869 }, { "epoch": 4.0, "grad_norm": 12.850021260799155, "learning_rate": 5e-06, "loss": 0.0894, "num_input_tokens_seen": 78530044, "step": 870 }, { "epoch": 4.0, "loss": 0.07683277875185013, "loss_ce": 4.778397851623595e-06, "loss_iou": 0.322265625, "loss_num": 0.015380859375, "loss_xval": 0.07666015625, "num_input_tokens_seen": 78530044, "step": 870 }, { "epoch": 4.004597701149425, "grad_norm": 12.896033435436056, "learning_rate": 5e-06, "loss": 0.0941, "num_input_tokens_seen": 78620484, "step": 871 }, { "epoch": 4.004597701149425, "loss": 0.07385321706533432, "loss_ce": 1.593406886968296e-05, "loss_iou": 0.3515625, "loss_num": 0.0147705078125, "loss_xval": 0.07373046875, "num_input_tokens_seen": 78620484, "step": 871 }, { "epoch": 4.00919540229885, "grad_norm": 10.038095509850997, "learning_rate": 5e-06, "loss": 0.1039, "num_input_tokens_seen": 78710856, "step": 872 }, { "epoch": 4.00919540229885, "loss": 0.12039355933666229, "loss_ce": 0.00012378332030493766, "loss_iou": 0.36328125, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 78710856, "step": 872 }, { "epoch": 4.0137931034482754, "grad_norm": 7.577320908621721, "learning_rate": 5e-06, "loss": 0.0845, "num_input_tokens_seen": 78801316, "step": 873 }, { "epoch": 4.0137931034482754, "loss": 0.08987805247306824, "loss_ce": 3.429901698837057e-05, "loss_iou": 0.29296875, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 78801316, "step": 873 }, { "epoch": 4.0183908045977015, "grad_norm": 8.350163864558647, "learning_rate": 5e-06, "loss": 0.0975, "num_input_tokens_seen": 78891520, "step": 874 }, { "epoch": 4.0183908045977015, "loss": 0.0876561775803566, "loss_ce": 9.694716936792247e-06, "loss_iou": 0.333984375, "loss_num": 0.0174560546875, "loss_xval": 0.087890625, "num_input_tokens_seen": 78891520, "step": 874 }, { "epoch": 4.022988505747127, "grad_norm": 8.769669621263414, "learning_rate": 5e-06, "loss": 0.155, "num_input_tokens_seen": 78981876, "step": 875 }, { "epoch": 4.022988505747127, "loss": 0.15618135035037994, "loss_ce": 2.289763870066963e-05, "loss_iou": 0.390625, "loss_num": 0.03125, "loss_xval": 0.15625, "num_input_tokens_seen": 78981876, "step": 875 }, { "epoch": 4.027586206896552, "grad_norm": 16.167283950116676, "learning_rate": 5e-06, "loss": 0.1022, "num_input_tokens_seen": 79072252, "step": 876 }, { "epoch": 4.027586206896552, "loss": 0.09752378612756729, "loss_ce": 2.0128632968408056e-05, "loss_iou": 0.35546875, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 79072252, "step": 876 }, { "epoch": 4.032183908045977, "grad_norm": 8.509588312264029, "learning_rate": 5e-06, "loss": 0.1147, "num_input_tokens_seen": 79162600, "step": 877 }, { "epoch": 4.032183908045977, "loss": 0.1270662546157837, "loss_ce": 8.26121904538013e-05, "loss_iou": 0.34765625, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 79162600, "step": 877 }, { "epoch": 4.036781609195402, "grad_norm": 10.729375299726154, "learning_rate": 5e-06, "loss": 0.0858, "num_input_tokens_seen": 79253000, "step": 878 }, { "epoch": 4.036781609195402, "loss": 0.07850313931703568, "loss_ce": 1.1927713785553351e-05, "loss_iou": 0.3125, "loss_num": 0.0157470703125, "loss_xval": 0.07861328125, "num_input_tokens_seen": 79253000, "step": 878 }, { "epoch": 4.041379310344827, "grad_norm": 9.480711461561928, "learning_rate": 5e-06, "loss": 0.092, "num_input_tokens_seen": 79343448, "step": 879 }, { "epoch": 4.041379310344827, "loss": 0.11301976442337036, "loss_ce": 4.369192538433708e-05, "loss_iou": 0.361328125, "loss_num": 0.0225830078125, "loss_xval": 0.11279296875, "num_input_tokens_seen": 79343448, "step": 879 }, { "epoch": 4.045977011494253, "grad_norm": 20.962984913528285, "learning_rate": 5e-06, "loss": 0.0826, "num_input_tokens_seen": 79433948, "step": 880 }, { "epoch": 4.045977011494253, "loss": 0.060304559767246246, "loss_ce": 1.8244732018501963e-06, "loss_iou": 0.33203125, "loss_num": 0.0120849609375, "loss_xval": 0.060302734375, "num_input_tokens_seen": 79433948, "step": 880 }, { "epoch": 4.050574712643678, "grad_norm": 27.220075486414753, "learning_rate": 5e-06, "loss": 0.082, "num_input_tokens_seen": 79524228, "step": 881 }, { "epoch": 4.050574712643678, "loss": 0.07480692863464355, "loss_ce": 8.341698048752733e-06, "loss_iou": 0.4375, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 79524228, "step": 881 }, { "epoch": 4.055172413793104, "grad_norm": 9.60955652874984, "learning_rate": 5e-06, "loss": 0.1416, "num_input_tokens_seen": 79614560, "step": 882 }, { "epoch": 4.055172413793104, "loss": 0.2088443636894226, "loss_ce": 0.00010411619587102905, "loss_iou": 0.42578125, "loss_num": 0.041748046875, "loss_xval": 0.208984375, "num_input_tokens_seen": 79614560, "step": 882 }, { "epoch": 4.059770114942529, "grad_norm": 13.229041315882611, "learning_rate": 5e-06, "loss": 0.1282, "num_input_tokens_seen": 79705048, "step": 883 }, { "epoch": 4.059770114942529, "loss": 0.11971049755811691, "loss_ce": 0.00014262790500652045, "loss_iou": 0.38671875, "loss_num": 0.02392578125, "loss_xval": 0.11962890625, "num_input_tokens_seen": 79705048, "step": 883 }, { "epoch": 4.064367816091954, "grad_norm": 11.345287474225346, "learning_rate": 5e-06, "loss": 0.1273, "num_input_tokens_seen": 79795468, "step": 884 }, { "epoch": 4.064367816091954, "loss": 0.15525750815868378, "loss_ce": 7.562241080449894e-05, "loss_iou": 0.4453125, "loss_num": 0.031005859375, "loss_xval": 0.1552734375, "num_input_tokens_seen": 79795468, "step": 884 }, { "epoch": 4.068965517241379, "grad_norm": 5.046401225257881, "learning_rate": 5e-06, "loss": 0.1107, "num_input_tokens_seen": 79885904, "step": 885 }, { "epoch": 4.068965517241379, "loss": 0.09430328011512756, "loss_ce": 3.965928954130504e-06, "loss_iou": 0.3671875, "loss_num": 0.0189208984375, "loss_xval": 0.09423828125, "num_input_tokens_seen": 79885904, "step": 885 }, { "epoch": 4.073563218390804, "grad_norm": 7.149591467541043, "learning_rate": 5e-06, "loss": 0.0859, "num_input_tokens_seen": 79976308, "step": 886 }, { "epoch": 4.073563218390804, "loss": 0.06827942281961441, "loss_ce": 2.6857467673835345e-05, "loss_iou": 0.322265625, "loss_num": 0.013671875, "loss_xval": 0.068359375, "num_input_tokens_seen": 79976308, "step": 886 }, { "epoch": 4.07816091954023, "grad_norm": 14.125533156236072, "learning_rate": 5e-06, "loss": 0.1158, "num_input_tokens_seen": 80066680, "step": 887 }, { "epoch": 4.07816091954023, "loss": 0.11485080420970917, "loss_ce": 1.3159661648387555e-05, "loss_iou": 0.302734375, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 80066680, "step": 887 }, { "epoch": 4.082758620689655, "grad_norm": 7.34272733165029, "learning_rate": 5e-06, "loss": 0.1636, "num_input_tokens_seen": 80157060, "step": 888 }, { "epoch": 4.082758620689655, "loss": 0.13511279225349426, "loss_ce": 7.250368071254343e-05, "loss_iou": 0.37890625, "loss_num": 0.027099609375, "loss_xval": 0.134765625, "num_input_tokens_seen": 80157060, "step": 888 }, { "epoch": 4.0873563218390805, "grad_norm": 7.121447843023311, "learning_rate": 5e-06, "loss": 0.1113, "num_input_tokens_seen": 80247628, "step": 889 }, { "epoch": 4.0873563218390805, "loss": 0.1320275515317917, "loss_ce": 8.516525667801034e-06, "loss_iou": 0.3984375, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 80247628, "step": 889 }, { "epoch": 4.091954022988506, "grad_norm": 9.329754076401972, "learning_rate": 5e-06, "loss": 0.1159, "num_input_tokens_seen": 80337164, "step": 890 }, { "epoch": 4.091954022988506, "loss": 0.10165956616401672, "loss_ce": 5.516031251318054e-06, "loss_iou": 0.3828125, "loss_num": 0.0203857421875, "loss_xval": 0.1015625, "num_input_tokens_seen": 80337164, "step": 890 }, { "epoch": 4.096551724137931, "grad_norm": 4.7416331340636875, "learning_rate": 5e-06, "loss": 0.1112, "num_input_tokens_seen": 80426632, "step": 891 }, { "epoch": 4.096551724137931, "loss": 0.14343947172164917, "loss_ce": 3.7382742448244244e-05, "loss_iou": 0.353515625, "loss_num": 0.0286865234375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 80426632, "step": 891 }, { "epoch": 4.101149425287356, "grad_norm": 23.466418685698162, "learning_rate": 5e-06, "loss": 0.1543, "num_input_tokens_seen": 80516208, "step": 892 }, { "epoch": 4.101149425287356, "loss": 0.18008266389369965, "loss_ce": 2.8955022571608424e-05, "loss_iou": 0.4375, "loss_num": 0.0361328125, "loss_xval": 0.1796875, "num_input_tokens_seen": 80516208, "step": 892 }, { "epoch": 4.105747126436782, "grad_norm": 20.195736171276238, "learning_rate": 5e-06, "loss": 0.103, "num_input_tokens_seen": 80606536, "step": 893 }, { "epoch": 4.105747126436782, "loss": 0.09322860836982727, "loss_ce": 5.844266706844792e-05, "loss_iou": 0.390625, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 80606536, "step": 893 }, { "epoch": 4.110344827586207, "grad_norm": 4.925478130148734, "learning_rate": 5e-06, "loss": 0.1001, "num_input_tokens_seen": 80696904, "step": 894 }, { "epoch": 4.110344827586207, "loss": 0.1146705150604248, "loss_ce": 1.5975076166796498e-05, "loss_iou": 0.29296875, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 80696904, "step": 894 }, { "epoch": 4.114942528735632, "grad_norm": 18.783331502764156, "learning_rate": 5e-06, "loss": 0.1221, "num_input_tokens_seen": 80787324, "step": 895 }, { "epoch": 4.114942528735632, "loss": 0.10798490047454834, "loss_ce": 0.0011123453732579947, "loss_iou": 0.384765625, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 80787324, "step": 895 }, { "epoch": 4.119540229885057, "grad_norm": 6.031295114157052, "learning_rate": 5e-06, "loss": 0.1003, "num_input_tokens_seen": 80877804, "step": 896 }, { "epoch": 4.119540229885057, "loss": 0.12141025811433792, "loss_ce": 8.762812649365515e-05, "loss_iou": 0.34765625, "loss_num": 0.0242919921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 80877804, "step": 896 }, { "epoch": 4.124137931034483, "grad_norm": 4.270748379873812, "learning_rate": 5e-06, "loss": 0.0957, "num_input_tokens_seen": 80967380, "step": 897 }, { "epoch": 4.124137931034483, "loss": 0.08645440638065338, "loss_ce": 2.8627207939280197e-05, "loss_iou": 0.349609375, "loss_num": 0.017333984375, "loss_xval": 0.08642578125, "num_input_tokens_seen": 80967380, "step": 897 }, { "epoch": 4.128735632183908, "grad_norm": 4.246728442816423, "learning_rate": 5e-06, "loss": 0.1038, "num_input_tokens_seen": 81057784, "step": 898 }, { "epoch": 4.128735632183908, "loss": 0.09482555836439133, "loss_ce": 6.847563054179773e-05, "loss_iou": 0.369140625, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 81057784, "step": 898 }, { "epoch": 4.133333333333334, "grad_norm": 4.522025434777465, "learning_rate": 5e-06, "loss": 0.1549, "num_input_tokens_seen": 81148164, "step": 899 }, { "epoch": 4.133333333333334, "loss": 0.20047786831855774, "loss_ce": 3.8414596929214895e-05, "loss_iou": 0.349609375, "loss_num": 0.0400390625, "loss_xval": 0.2001953125, "num_input_tokens_seen": 81148164, "step": 899 }, { "epoch": 4.137931034482759, "grad_norm": 12.60515016043542, "learning_rate": 5e-06, "loss": 0.1024, "num_input_tokens_seen": 81237064, "step": 900 }, { "epoch": 4.137931034482759, "loss": 0.09033560007810593, "loss_ce": 3.5681719054991845e-06, "loss_iou": 0.326171875, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 81237064, "step": 900 }, { "epoch": 4.142528735632184, "grad_norm": 12.203513946052261, "learning_rate": 5e-06, "loss": 0.1242, "num_input_tokens_seen": 81327320, "step": 901 }, { "epoch": 4.142528735632184, "loss": 0.09698610007762909, "loss_ce": 1.236280354532937e-06, "loss_iou": 0.322265625, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 81327320, "step": 901 }, { "epoch": 4.147126436781609, "grad_norm": 5.265820346765914, "learning_rate": 5e-06, "loss": 0.0975, "num_input_tokens_seen": 81417700, "step": 902 }, { "epoch": 4.147126436781609, "loss": 0.13237115740776062, "loss_ce": 1.6427336959168315e-05, "loss_iou": 0.412109375, "loss_num": 0.0264892578125, "loss_xval": 0.1328125, "num_input_tokens_seen": 81417700, "step": 902 }, { "epoch": 4.151724137931034, "grad_norm": 5.402431273469456, "learning_rate": 5e-06, "loss": 0.0975, "num_input_tokens_seen": 81508048, "step": 903 }, { "epoch": 4.151724137931034, "loss": 0.10200782120227814, "loss_ce": 1.8078684661304578e-05, "loss_iou": 0.373046875, "loss_num": 0.0203857421875, "loss_xval": 0.10205078125, "num_input_tokens_seen": 81508048, "step": 903 }, { "epoch": 4.1563218390804595, "grad_norm": 7.802227527691613, "learning_rate": 5e-06, "loss": 0.1275, "num_input_tokens_seen": 81598384, "step": 904 }, { "epoch": 4.1563218390804595, "loss": 0.0613405816257, "loss_ce": 4.60283481515944e-05, "loss_iou": 0.345703125, "loss_num": 0.01226806640625, "loss_xval": 0.061279296875, "num_input_tokens_seen": 81598384, "step": 904 }, { "epoch": 4.160919540229885, "grad_norm": 16.9420578658568, "learning_rate": 5e-06, "loss": 0.1179, "num_input_tokens_seen": 81688732, "step": 905 }, { "epoch": 4.160919540229885, "loss": 0.16473174095153809, "loss_ce": 1.3099732314003631e-05, "loss_iou": 0.41796875, "loss_num": 0.032958984375, "loss_xval": 0.1650390625, "num_input_tokens_seen": 81688732, "step": 905 }, { "epoch": 4.165517241379311, "grad_norm": 3.5038961317329633, "learning_rate": 5e-06, "loss": 0.0949, "num_input_tokens_seen": 81779124, "step": 906 }, { "epoch": 4.165517241379311, "loss": 0.09057803452014923, "loss_ce": 1.8635427068147692e-06, "loss_iou": 0.345703125, "loss_num": 0.01806640625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 81779124, "step": 906 }, { "epoch": 4.170114942528736, "grad_norm": 6.320699544904618, "learning_rate": 5e-06, "loss": 0.0818, "num_input_tokens_seen": 81869528, "step": 907 }, { "epoch": 4.170114942528736, "loss": 0.0921754315495491, "loss_ce": 4.286535840947181e-05, "loss_iou": 0.341796875, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 81869528, "step": 907 }, { "epoch": 4.174712643678161, "grad_norm": 20.78428241303839, "learning_rate": 5e-06, "loss": 0.1552, "num_input_tokens_seen": 81959744, "step": 908 }, { "epoch": 4.174712643678161, "loss": 0.17669041454792023, "loss_ce": 8.907296432880685e-06, "loss_iou": 0.37109375, "loss_num": 0.035400390625, "loss_xval": 0.1767578125, "num_input_tokens_seen": 81959744, "step": 908 }, { "epoch": 4.179310344827586, "grad_norm": 8.303007780105153, "learning_rate": 5e-06, "loss": 0.0563, "num_input_tokens_seen": 82050236, "step": 909 }, { "epoch": 4.179310344827586, "loss": 0.04546171426773071, "loss_ce": 2.1037711121607572e-05, "loss_iou": 0.279296875, "loss_num": 0.00909423828125, "loss_xval": 0.04541015625, "num_input_tokens_seen": 82050236, "step": 909 }, { "epoch": 4.183908045977011, "grad_norm": 11.886180061278875, "learning_rate": 5e-06, "loss": 0.1232, "num_input_tokens_seen": 82140600, "step": 910 }, { "epoch": 4.183908045977011, "loss": 0.10049276053905487, "loss_ce": 2.888959352276288e-05, "loss_iou": 0.41796875, "loss_num": 0.0201416015625, "loss_xval": 0.1005859375, "num_input_tokens_seen": 82140600, "step": 910 }, { "epoch": 4.188505747126436, "grad_norm": 8.89001202277643, "learning_rate": 5e-06, "loss": 0.1434, "num_input_tokens_seen": 82231004, "step": 911 }, { "epoch": 4.188505747126436, "loss": 0.156759113073349, "loss_ce": 2.0833926100749522e-05, "loss_iou": 0.34765625, "loss_num": 0.03125, "loss_xval": 0.15625, "num_input_tokens_seen": 82231004, "step": 911 }, { "epoch": 4.1931034482758625, "grad_norm": 12.14512131743274, "learning_rate": 5e-06, "loss": 0.07, "num_input_tokens_seen": 82321456, "step": 912 }, { "epoch": 4.1931034482758625, "loss": 0.0638713464140892, "loss_ce": 2.8572279916261323e-05, "loss_iou": 0.365234375, "loss_num": 0.01275634765625, "loss_xval": 0.06396484375, "num_input_tokens_seen": 82321456, "step": 912 }, { "epoch": 4.197701149425288, "grad_norm": 6.273360573385607, "learning_rate": 5e-06, "loss": 0.1092, "num_input_tokens_seen": 82411704, "step": 913 }, { "epoch": 4.197701149425288, "loss": 0.08992119133472443, "loss_ce": 1.1499548691062955e-06, "loss_iou": 0.376953125, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 82411704, "step": 913 }, { "epoch": 4.202298850574713, "grad_norm": 4.863327392449604, "learning_rate": 5e-06, "loss": 0.1271, "num_input_tokens_seen": 82500532, "step": 914 }, { "epoch": 4.202298850574713, "loss": 0.15036578476428986, "loss_ce": 5.677235094481148e-06, "loss_iou": 0.296875, "loss_num": 0.030029296875, "loss_xval": 0.150390625, "num_input_tokens_seen": 82500532, "step": 914 }, { "epoch": 4.206896551724138, "grad_norm": 6.0320903674371795, "learning_rate": 5e-06, "loss": 0.0956, "num_input_tokens_seen": 82591044, "step": 915 }, { "epoch": 4.206896551724138, "loss": 0.10052984952926636, "loss_ce": 3.5464396205497906e-05, "loss_iou": 0.34765625, "loss_num": 0.0201416015625, "loss_xval": 0.1005859375, "num_input_tokens_seen": 82591044, "step": 915 }, { "epoch": 4.211494252873563, "grad_norm": 15.82805235612327, "learning_rate": 5e-06, "loss": 0.1294, "num_input_tokens_seen": 82681520, "step": 916 }, { "epoch": 4.211494252873563, "loss": 0.09116413444280624, "loss_ce": 2.3386701286653988e-05, "loss_iou": 0.3125, "loss_num": 0.0181884765625, "loss_xval": 0.09130859375, "num_input_tokens_seen": 82681520, "step": 916 }, { "epoch": 4.216091954022988, "grad_norm": 2.3437945707030305, "learning_rate": 5e-06, "loss": 0.095, "num_input_tokens_seen": 82771904, "step": 917 }, { "epoch": 4.216091954022988, "loss": 0.09512968361377716, "loss_ce": 6.396087883331347e-06, "loss_iou": 0.392578125, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 82771904, "step": 917 }, { "epoch": 4.220689655172414, "grad_norm": 8.364208607807933, "learning_rate": 5e-06, "loss": 0.0796, "num_input_tokens_seen": 82862244, "step": 918 }, { "epoch": 4.220689655172414, "loss": 0.08399280905723572, "loss_ce": 8.435705240117386e-06, "loss_iou": 0.375, "loss_num": 0.016845703125, "loss_xval": 0.083984375, "num_input_tokens_seen": 82862244, "step": 918 }, { "epoch": 4.225287356321839, "grad_norm": 3.326406479061023, "learning_rate": 5e-06, "loss": 0.0811, "num_input_tokens_seen": 82952644, "step": 919 }, { "epoch": 4.225287356321839, "loss": 0.06023194640874863, "loss_ce": 2.0768720787600614e-05, "loss_iou": 0.32421875, "loss_num": 0.01202392578125, "loss_xval": 0.060302734375, "num_input_tokens_seen": 82952644, "step": 919 }, { "epoch": 4.2298850574712645, "grad_norm": 3.4665111443776944, "learning_rate": 5e-06, "loss": 0.131, "num_input_tokens_seen": 83042996, "step": 920 }, { "epoch": 4.2298850574712645, "loss": 0.19263532757759094, "loss_ce": 8.370232535526156e-06, "loss_iou": 0.40625, "loss_num": 0.03857421875, "loss_xval": 0.1923828125, "num_input_tokens_seen": 83042996, "step": 920 }, { "epoch": 4.23448275862069, "grad_norm": 9.069959650004307, "learning_rate": 5e-06, "loss": 0.1234, "num_input_tokens_seen": 83133396, "step": 921 }, { "epoch": 4.23448275862069, "loss": 0.11099687963724136, "loss_ce": 3.496557837934233e-05, "loss_iou": 0.32421875, "loss_num": 0.022216796875, "loss_xval": 0.11083984375, "num_input_tokens_seen": 83133396, "step": 921 }, { "epoch": 4.239080459770115, "grad_norm": 2.5690493136384003, "learning_rate": 5e-06, "loss": 0.0982, "num_input_tokens_seen": 83223872, "step": 922 }, { "epoch": 4.239080459770115, "loss": 0.11521795392036438, "loss_ce": 1.4101822671364062e-05, "loss_iou": 0.33203125, "loss_num": 0.0230712890625, "loss_xval": 0.115234375, "num_input_tokens_seen": 83223872, "step": 922 }, { "epoch": 4.24367816091954, "grad_norm": 7.69995972428386, "learning_rate": 5e-06, "loss": 0.1505, "num_input_tokens_seen": 83314296, "step": 923 }, { "epoch": 4.24367816091954, "loss": 0.125550776720047, "loss_ce": 1.4667482446384383e-06, "loss_iou": 0.30078125, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 83314296, "step": 923 }, { "epoch": 4.248275862068965, "grad_norm": 6.582981564440316, "learning_rate": 5e-06, "loss": 0.1091, "num_input_tokens_seen": 83404576, "step": 924 }, { "epoch": 4.248275862068965, "loss": 0.11165105551481247, "loss_ce": 1.7746649973560125e-05, "loss_iou": 0.427734375, "loss_num": 0.0223388671875, "loss_xval": 0.11181640625, "num_input_tokens_seen": 83404576, "step": 924 }, { "epoch": 4.252873563218391, "grad_norm": 3.1233405856262006, "learning_rate": 5e-06, "loss": 0.1114, "num_input_tokens_seen": 83494884, "step": 925 }, { "epoch": 4.252873563218391, "loss": 0.13092860579490662, "loss_ce": 8.193834219127893e-06, "loss_iou": 0.345703125, "loss_num": 0.0262451171875, "loss_xval": 0.130859375, "num_input_tokens_seen": 83494884, "step": 925 }, { "epoch": 4.257471264367816, "grad_norm": 11.295201522679497, "learning_rate": 5e-06, "loss": 0.1043, "num_input_tokens_seen": 83585372, "step": 926 }, { "epoch": 4.257471264367816, "loss": 0.09147991985082626, "loss_ce": 3.3995776902884245e-05, "loss_iou": 0.37109375, "loss_num": 0.018310546875, "loss_xval": 0.09130859375, "num_input_tokens_seen": 83585372, "step": 926 }, { "epoch": 4.2620689655172415, "grad_norm": 8.74331179817824, "learning_rate": 5e-06, "loss": 0.1084, "num_input_tokens_seen": 83675776, "step": 927 }, { "epoch": 4.2620689655172415, "loss": 0.11818952858448029, "loss_ce": 1.0204122190771159e-05, "loss_iou": 0.43359375, "loss_num": 0.023681640625, "loss_xval": 0.1181640625, "num_input_tokens_seen": 83675776, "step": 927 }, { "epoch": 4.266666666666667, "grad_norm": 36.691461336314475, "learning_rate": 5e-06, "loss": 0.1246, "num_input_tokens_seen": 83766024, "step": 928 }, { "epoch": 4.266666666666667, "loss": 0.11256686598062515, "loss_ce": 2.7873388717125636e-06, "loss_iou": 0.404296875, "loss_num": 0.0224609375, "loss_xval": 0.11279296875, "num_input_tokens_seen": 83766024, "step": 928 }, { "epoch": 4.271264367816092, "grad_norm": 20.28534876927393, "learning_rate": 5e-06, "loss": 0.1201, "num_input_tokens_seen": 83856372, "step": 929 }, { "epoch": 4.271264367816092, "loss": 0.1561427265405655, "loss_ce": 0.0033259547781199217, "loss_iou": 0.376953125, "loss_num": 0.0306396484375, "loss_xval": 0.15234375, "num_input_tokens_seen": 83856372, "step": 929 }, { "epoch": 4.275862068965517, "grad_norm": 21.074305399488686, "learning_rate": 5e-06, "loss": 0.1115, "num_input_tokens_seen": 83946688, "step": 930 }, { "epoch": 4.275862068965517, "loss": 0.10977804660797119, "loss_ce": 6.307951480266638e-06, "loss_iou": 0.298828125, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 83946688, "step": 930 }, { "epoch": 4.280459770114943, "grad_norm": 18.474400113113084, "learning_rate": 5e-06, "loss": 0.1092, "num_input_tokens_seen": 84037100, "step": 931 }, { "epoch": 4.280459770114943, "loss": 0.08449871093034744, "loss_ce": 5.657064320985228e-05, "loss_iou": 0.431640625, "loss_num": 0.0169677734375, "loss_xval": 0.08447265625, "num_input_tokens_seen": 84037100, "step": 931 }, { "epoch": 4.285057471264368, "grad_norm": 6.907766753439544, "learning_rate": 5e-06, "loss": 0.105, "num_input_tokens_seen": 84127468, "step": 932 }, { "epoch": 4.285057471264368, "loss": 0.10681600868701935, "loss_ce": 4.487107617023867e-06, "loss_iou": 0.400390625, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 84127468, "step": 932 }, { "epoch": 4.289655172413793, "grad_norm": 11.040747277806908, "learning_rate": 5e-06, "loss": 0.1174, "num_input_tokens_seen": 84216928, "step": 933 }, { "epoch": 4.289655172413793, "loss": 0.1147032380104065, "loss_ce": 4.869817348662764e-05, "loss_iou": 0.40234375, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 84216928, "step": 933 }, { "epoch": 4.294252873563218, "grad_norm": 5.536586522856837, "learning_rate": 5e-06, "loss": 0.1089, "num_input_tokens_seen": 84307292, "step": 934 }, { "epoch": 4.294252873563218, "loss": 0.14283494651317596, "loss_ce": 0.0001957785279955715, "loss_iou": 0.349609375, "loss_num": 0.028564453125, "loss_xval": 0.142578125, "num_input_tokens_seen": 84307292, "step": 934 }, { "epoch": 4.2988505747126435, "grad_norm": 2.131997523716501, "learning_rate": 5e-06, "loss": 0.1153, "num_input_tokens_seen": 84396872, "step": 935 }, { "epoch": 4.2988505747126435, "loss": 0.12867018580436707, "loss_ce": 8.078463906713296e-06, "loss_iou": 0.359375, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 84396872, "step": 935 }, { "epoch": 4.303448275862069, "grad_norm": 11.27288052453101, "learning_rate": 5e-06, "loss": 0.1092, "num_input_tokens_seen": 84487316, "step": 936 }, { "epoch": 4.303448275862069, "loss": 0.06220962479710579, "loss_ce": 0.0010371371172368526, "loss_iou": 0.349609375, "loss_num": 0.01226806640625, "loss_xval": 0.061279296875, "num_input_tokens_seen": 84487316, "step": 936 }, { "epoch": 4.308045977011494, "grad_norm": 3.4892119545715974, "learning_rate": 5e-06, "loss": 0.1456, "num_input_tokens_seen": 84577824, "step": 937 }, { "epoch": 4.308045977011494, "loss": 0.1438092142343521, "loss_ce": 1.0385462701378856e-05, "loss_iou": 0.37109375, "loss_num": 0.0286865234375, "loss_xval": 0.1435546875, "num_input_tokens_seen": 84577824, "step": 937 }, { "epoch": 4.31264367816092, "grad_norm": 6.088518946016204, "learning_rate": 5e-06, "loss": 0.0929, "num_input_tokens_seen": 84668208, "step": 938 }, { "epoch": 4.31264367816092, "loss": 0.08154906332492828, "loss_ce": 2.134743226633873e-05, "loss_iou": 0.37109375, "loss_num": 0.016357421875, "loss_xval": 0.08154296875, "num_input_tokens_seen": 84668208, "step": 938 }, { "epoch": 4.317241379310345, "grad_norm": 10.741692080234259, "learning_rate": 5e-06, "loss": 0.1028, "num_input_tokens_seen": 84758388, "step": 939 }, { "epoch": 4.317241379310345, "loss": 0.12125536799430847, "loss_ce": 9.027476153278258e-06, "loss_iou": 0.4140625, "loss_num": 0.024169921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 84758388, "step": 939 }, { "epoch": 4.32183908045977, "grad_norm": 12.104640117183845, "learning_rate": 5e-06, "loss": 0.1025, "num_input_tokens_seen": 84848688, "step": 940 }, { "epoch": 4.32183908045977, "loss": 0.125244140625, "loss_ce": 0.0001831004919949919, "loss_iou": 0.40234375, "loss_num": 0.0250244140625, "loss_xval": 0.125, "num_input_tokens_seen": 84848688, "step": 940 }, { "epoch": 4.326436781609195, "grad_norm": 13.842704032927974, "learning_rate": 5e-06, "loss": 0.1121, "num_input_tokens_seen": 84938896, "step": 941 }, { "epoch": 4.326436781609195, "loss": 0.08458052575588226, "loss_ce": 1.0556854022070183e-06, "loss_iou": 0.345703125, "loss_num": 0.016845703125, "loss_xval": 0.08447265625, "num_input_tokens_seen": 84938896, "step": 941 }, { "epoch": 4.3310344827586205, "grad_norm": 27.64969062986627, "learning_rate": 5e-06, "loss": 0.0912, "num_input_tokens_seen": 85029124, "step": 942 }, { "epoch": 4.3310344827586205, "loss": 0.11936682462692261, "loss_ce": 0.00025671368348412216, "loss_iou": 0.294921875, "loss_num": 0.0238037109375, "loss_xval": 0.119140625, "num_input_tokens_seen": 85029124, "step": 942 }, { "epoch": 4.335632183908046, "grad_norm": 7.051941352640006, "learning_rate": 5e-06, "loss": 0.0948, "num_input_tokens_seen": 85119572, "step": 943 }, { "epoch": 4.335632183908046, "loss": 0.08214512467384338, "loss_ce": 7.063375960569829e-06, "loss_iou": 0.34375, "loss_num": 0.0164794921875, "loss_xval": 0.08203125, "num_input_tokens_seen": 85119572, "step": 943 }, { "epoch": 4.340229885057472, "grad_norm": 5.222428783369038, "learning_rate": 5e-06, "loss": 0.1232, "num_input_tokens_seen": 85209984, "step": 944 }, { "epoch": 4.340229885057472, "loss": 0.13096053898334503, "loss_ce": 9.61720070336014e-06, "loss_iou": 0.32421875, "loss_num": 0.026123046875, "loss_xval": 0.130859375, "num_input_tokens_seen": 85209984, "step": 944 }, { "epoch": 4.344827586206897, "grad_norm": 8.261994222444807, "learning_rate": 5e-06, "loss": 0.1248, "num_input_tokens_seen": 85300436, "step": 945 }, { "epoch": 4.344827586206897, "loss": 0.11582186818122864, "loss_ce": 0.0001144730849773623, "loss_iou": 0.28515625, "loss_num": 0.023193359375, "loss_xval": 0.11572265625, "num_input_tokens_seen": 85300436, "step": 945 }, { "epoch": 4.349425287356322, "grad_norm": 4.239609844753722, "learning_rate": 5e-06, "loss": 0.1083, "num_input_tokens_seen": 85390688, "step": 946 }, { "epoch": 4.349425287356322, "loss": 0.08635582774877548, "loss_ce": 6.338353159662802e-06, "loss_iou": 0.37109375, "loss_num": 0.017333984375, "loss_xval": 0.08642578125, "num_input_tokens_seen": 85390688, "step": 946 }, { "epoch": 4.354022988505747, "grad_norm": 15.411419922368433, "learning_rate": 5e-06, "loss": 0.1401, "num_input_tokens_seen": 85481212, "step": 947 }, { "epoch": 4.354022988505747, "loss": 0.11237078905105591, "loss_ce": 5.069013241154607e-06, "loss_iou": 0.39453125, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 85481212, "step": 947 }, { "epoch": 4.358620689655172, "grad_norm": 3.634268474402103, "learning_rate": 5e-06, "loss": 0.127, "num_input_tokens_seen": 85570804, "step": 948 }, { "epoch": 4.358620689655172, "loss": 0.1522293984889984, "loss_ce": 7.724566785327625e-06, "loss_iou": 0.31640625, "loss_num": 0.030517578125, "loss_xval": 0.15234375, "num_input_tokens_seen": 85570804, "step": 948 }, { "epoch": 4.363218390804597, "grad_norm": 8.013548594881062, "learning_rate": 5e-06, "loss": 0.1133, "num_input_tokens_seen": 85661312, "step": 949 }, { "epoch": 4.363218390804597, "loss": 0.14923954010009766, "loss_ce": 8.487531158607453e-05, "loss_iou": 0.32421875, "loss_num": 0.0299072265625, "loss_xval": 0.1494140625, "num_input_tokens_seen": 85661312, "step": 949 }, { "epoch": 4.3678160919540225, "grad_norm": 2.762105405490039, "learning_rate": 5e-06, "loss": 0.1162, "num_input_tokens_seen": 85751636, "step": 950 }, { "epoch": 4.3678160919540225, "loss": 0.1016370952129364, "loss_ce": 1.3553465578297619e-05, "loss_iou": 0.35546875, "loss_num": 0.020263671875, "loss_xval": 0.1015625, "num_input_tokens_seen": 85751636, "step": 950 }, { "epoch": 4.372413793103449, "grad_norm": 5.105256776246609, "learning_rate": 5e-06, "loss": 0.1105, "num_input_tokens_seen": 85842060, "step": 951 }, { "epoch": 4.372413793103449, "loss": 0.10755231976509094, "loss_ce": 8.379087375942618e-06, "loss_iou": 0.35546875, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 85842060, "step": 951 }, { "epoch": 4.377011494252874, "grad_norm": 5.570764796509669, "learning_rate": 5e-06, "loss": 0.0922, "num_input_tokens_seen": 85932444, "step": 952 }, { "epoch": 4.377011494252874, "loss": 0.0883231908082962, "loss_ce": 3.58409270120319e-05, "loss_iou": 0.392578125, "loss_num": 0.0177001953125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 85932444, "step": 952 }, { "epoch": 4.381609195402299, "grad_norm": 10.856984323349382, "learning_rate": 5e-06, "loss": 0.1382, "num_input_tokens_seen": 86022780, "step": 953 }, { "epoch": 4.381609195402299, "loss": 0.18059954047203064, "loss_ce": 5.753596997237764e-05, "loss_iou": 0.37109375, "loss_num": 0.0361328125, "loss_xval": 0.1806640625, "num_input_tokens_seen": 86022780, "step": 953 }, { "epoch": 4.386206896551724, "grad_norm": 13.39000045516796, "learning_rate": 5e-06, "loss": 0.1156, "num_input_tokens_seen": 86113048, "step": 954 }, { "epoch": 4.386206896551724, "loss": 0.10757958889007568, "loss_ce": 3.564245707821101e-05, "loss_iou": 0.37890625, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 86113048, "step": 954 }, { "epoch": 4.390804597701149, "grad_norm": 3.0587557281723505, "learning_rate": 5e-06, "loss": 0.0907, "num_input_tokens_seen": 86203536, "step": 955 }, { "epoch": 4.390804597701149, "loss": 0.08970493823289871, "loss_ce": 1.377277476422023e-05, "loss_iou": 0.47265625, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 86203536, "step": 955 }, { "epoch": 4.395402298850574, "grad_norm": 7.714066456128423, "learning_rate": 5e-06, "loss": 0.1126, "num_input_tokens_seen": 86293828, "step": 956 }, { "epoch": 4.395402298850574, "loss": 0.14037680625915527, "loss_ce": 8.751240238780156e-05, "loss_iou": 0.40234375, "loss_num": 0.028076171875, "loss_xval": 0.140625, "num_input_tokens_seen": 86293828, "step": 956 }, { "epoch": 4.4, "grad_norm": 6.43769065897863, "learning_rate": 5e-06, "loss": 0.0938, "num_input_tokens_seen": 86384244, "step": 957 }, { "epoch": 4.4, "loss": 0.12558409571647644, "loss_ce": 0.0003399590204935521, "loss_iou": 0.326171875, "loss_num": 0.0250244140625, "loss_xval": 0.125, "num_input_tokens_seen": 86384244, "step": 957 }, { "epoch": 4.4045977011494255, "grad_norm": 7.325715877251544, "learning_rate": 5e-06, "loss": 0.1319, "num_input_tokens_seen": 86474564, "step": 958 }, { "epoch": 4.4045977011494255, "loss": 0.07348855584859848, "loss_ce": 3.274789924034849e-05, "loss_iou": 0.32421875, "loss_num": 0.01470947265625, "loss_xval": 0.0732421875, "num_input_tokens_seen": 86474564, "step": 958 }, { "epoch": 4.409195402298851, "grad_norm": 3.1430755970159763, "learning_rate": 5e-06, "loss": 0.0965, "num_input_tokens_seen": 86565012, "step": 959 }, { "epoch": 4.409195402298851, "loss": 0.10845954716205597, "loss_ce": 9.163121285382658e-05, "loss_iou": 0.267578125, "loss_num": 0.021728515625, "loss_xval": 0.1083984375, "num_input_tokens_seen": 86565012, "step": 959 }, { "epoch": 4.413793103448276, "grad_norm": 2.479253070842102, "learning_rate": 5e-06, "loss": 0.0849, "num_input_tokens_seen": 86655508, "step": 960 }, { "epoch": 4.413793103448276, "loss": 0.09556898474693298, "loss_ce": 3.370657577761449e-05, "loss_iou": 0.328125, "loss_num": 0.01904296875, "loss_xval": 0.095703125, "num_input_tokens_seen": 86655508, "step": 960 }, { "epoch": 4.418390804597701, "grad_norm": 8.037612054391559, "learning_rate": 5e-06, "loss": 0.1075, "num_input_tokens_seen": 86744356, "step": 961 }, { "epoch": 4.418390804597701, "loss": 0.13194900751113892, "loss_ce": 2.151706939912401e-05, "loss_iou": 0.365234375, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 86744356, "step": 961 }, { "epoch": 4.422988505747126, "grad_norm": 4.558636962872201, "learning_rate": 5e-06, "loss": 0.0655, "num_input_tokens_seen": 86834692, "step": 962 }, { "epoch": 4.422988505747126, "loss": 0.05887310206890106, "loss_ce": 4.69345968667767e-06, "loss_iou": 0.31640625, "loss_num": 0.01177978515625, "loss_xval": 0.058837890625, "num_input_tokens_seen": 86834692, "step": 962 }, { "epoch": 4.427586206896552, "grad_norm": 11.446145366052058, "learning_rate": 5e-06, "loss": 0.1219, "num_input_tokens_seen": 86925088, "step": 963 }, { "epoch": 4.427586206896552, "loss": 0.0905739963054657, "loss_ce": 1.3087726983940229e-05, "loss_iou": 0.349609375, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 86925088, "step": 963 }, { "epoch": 4.432183908045977, "grad_norm": 9.136099668207656, "learning_rate": 5e-06, "loss": 0.1263, "num_input_tokens_seen": 87015388, "step": 964 }, { "epoch": 4.432183908045977, "loss": 0.16606704890727997, "loss_ce": 2.0905536075588316e-05, "loss_iou": 0.42578125, "loss_num": 0.033203125, "loss_xval": 0.166015625, "num_input_tokens_seen": 87015388, "step": 964 }, { "epoch": 4.436781609195402, "grad_norm": 5.4148068009345645, "learning_rate": 5e-06, "loss": 0.1154, "num_input_tokens_seen": 87104260, "step": 965 }, { "epoch": 4.436781609195402, "loss": 0.1504197120666504, "loss_ce": 5.959868576610461e-05, "loss_iou": 0.296875, "loss_num": 0.030029296875, "loss_xval": 0.150390625, "num_input_tokens_seen": 87104260, "step": 965 }, { "epoch": 4.441379310344828, "grad_norm": 7.551140001822664, "learning_rate": 5e-06, "loss": 0.072, "num_input_tokens_seen": 87194688, "step": 966 }, { "epoch": 4.441379310344828, "loss": 0.0605931356549263, "loss_ce": 0.0001225552405230701, "loss_iou": 0.34375, "loss_num": 0.0120849609375, "loss_xval": 0.060546875, "num_input_tokens_seen": 87194688, "step": 966 }, { "epoch": 4.445977011494253, "grad_norm": 5.492004326107786, "learning_rate": 5e-06, "loss": 0.1128, "num_input_tokens_seen": 87285044, "step": 967 }, { "epoch": 4.445977011494253, "loss": 0.13285866379737854, "loss_ce": 4.616224396158941e-05, "loss_iou": 0.333984375, "loss_num": 0.026611328125, "loss_xval": 0.1328125, "num_input_tokens_seen": 87285044, "step": 967 }, { "epoch": 4.450574712643678, "grad_norm": 7.815560936649822, "learning_rate": 5e-06, "loss": 0.0891, "num_input_tokens_seen": 87375332, "step": 968 }, { "epoch": 4.450574712643678, "loss": 0.12036024034023285, "loss_ce": 2.943439358205069e-05, "loss_iou": 0.337890625, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 87375332, "step": 968 }, { "epoch": 4.455172413793104, "grad_norm": 19.127660857216004, "learning_rate": 5e-06, "loss": 0.1198, "num_input_tokens_seen": 87465720, "step": 969 }, { "epoch": 4.455172413793104, "loss": 0.09645415097475052, "loss_ce": 3.339698650961509e-06, "loss_iou": 0.380859375, "loss_num": 0.019287109375, "loss_xval": 0.0966796875, "num_input_tokens_seen": 87465720, "step": 969 }, { "epoch": 4.459770114942529, "grad_norm": 2.8889092802079226, "learning_rate": 5e-06, "loss": 0.0854, "num_input_tokens_seen": 87555976, "step": 970 }, { "epoch": 4.459770114942529, "loss": 0.07163398712873459, "loss_ce": 9.234347089659423e-06, "loss_iou": 0.287109375, "loss_num": 0.01434326171875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 87555976, "step": 970 }, { "epoch": 4.464367816091954, "grad_norm": 4.010750930380829, "learning_rate": 5e-06, "loss": 0.1133, "num_input_tokens_seen": 87646328, "step": 971 }, { "epoch": 4.464367816091954, "loss": 0.11213794350624084, "loss_ce": 1.108814331018948e-06, "loss_iou": 0.435546875, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 87646328, "step": 971 }, { "epoch": 4.468965517241379, "grad_norm": 8.210323216736478, "learning_rate": 5e-06, "loss": 0.1592, "num_input_tokens_seen": 87736608, "step": 972 }, { "epoch": 4.468965517241379, "loss": 0.18182888627052307, "loss_ce": 5.140444045537151e-06, "loss_iou": 0.412109375, "loss_num": 0.036376953125, "loss_xval": 0.181640625, "num_input_tokens_seen": 87736608, "step": 972 }, { "epoch": 4.4735632183908045, "grad_norm": 9.865783278700588, "learning_rate": 5e-06, "loss": 0.129, "num_input_tokens_seen": 87826276, "step": 973 }, { "epoch": 4.4735632183908045, "loss": 0.06133665144443512, "loss_ce": 1.1581903891055845e-05, "loss_iou": 0.33984375, "loss_num": 0.01226806640625, "loss_xval": 0.061279296875, "num_input_tokens_seen": 87826276, "step": 973 }, { "epoch": 4.47816091954023, "grad_norm": 4.060783363175244, "learning_rate": 5e-06, "loss": 0.1517, "num_input_tokens_seen": 87915748, "step": 974 }, { "epoch": 4.47816091954023, "loss": 0.1866350769996643, "loss_ce": 2.0090221369173378e-05, "loss_iou": 0.337890625, "loss_num": 0.037353515625, "loss_xval": 0.1865234375, "num_input_tokens_seen": 87915748, "step": 974 }, { "epoch": 4.482758620689655, "grad_norm": 4.078469036538909, "learning_rate": 5e-06, "loss": 0.1309, "num_input_tokens_seen": 88006180, "step": 975 }, { "epoch": 4.482758620689655, "loss": 0.16530264914035797, "loss_ce": 1.9446521037025377e-05, "loss_iou": 0.345703125, "loss_num": 0.032958984375, "loss_xval": 0.1650390625, "num_input_tokens_seen": 88006180, "step": 975 }, { "epoch": 4.487356321839081, "grad_norm": 14.551219328912012, "learning_rate": 5e-06, "loss": 0.1101, "num_input_tokens_seen": 88096576, "step": 976 }, { "epoch": 4.487356321839081, "loss": 0.09992989897727966, "loss_ce": 1.5359226381406188e-05, "loss_iou": 0.3515625, "loss_num": 0.02001953125, "loss_xval": 0.10009765625, "num_input_tokens_seen": 88096576, "step": 976 }, { "epoch": 4.491954022988506, "grad_norm": 15.074496002244894, "learning_rate": 5e-06, "loss": 0.1115, "num_input_tokens_seen": 88186864, "step": 977 }, { "epoch": 4.491954022988506, "loss": 0.098409004509449, "loss_ce": 5.076651632407447e-06, "loss_iou": 0.205078125, "loss_num": 0.0196533203125, "loss_xval": 0.0986328125, "num_input_tokens_seen": 88186864, "step": 977 }, { "epoch": 4.496551724137931, "grad_norm": 8.357602751730505, "learning_rate": 5e-06, "loss": 0.094, "num_input_tokens_seen": 88277108, "step": 978 }, { "epoch": 4.496551724137931, "loss": 0.12131184339523315, "loss_ce": 4.475433343031909e-06, "loss_iou": 0.318359375, "loss_num": 0.0242919921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 88277108, "step": 978 }, { "epoch": 4.501149425287356, "grad_norm": 4.402022300916744, "learning_rate": 5e-06, "loss": 0.0903, "num_input_tokens_seen": 88367416, "step": 979 }, { "epoch": 4.501149425287356, "loss": 0.07329022884368896, "loss_ce": 7.855774310883135e-05, "loss_iou": 0.3046875, "loss_num": 0.0146484375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 88367416, "step": 979 }, { "epoch": 4.505747126436781, "grad_norm": 14.341101022137071, "learning_rate": 5e-06, "loss": 0.0673, "num_input_tokens_seen": 88457876, "step": 980 }, { "epoch": 4.505747126436781, "loss": 0.053125277161598206, "loss_ce": 9.428293196833692e-06, "loss_iou": 0.296875, "loss_num": 0.0106201171875, "loss_xval": 0.05322265625, "num_input_tokens_seen": 88457876, "step": 980 }, { "epoch": 4.510344827586207, "grad_norm": 10.397148286106711, "learning_rate": 5e-06, "loss": 0.1395, "num_input_tokens_seen": 88548152, "step": 981 }, { "epoch": 4.510344827586207, "loss": 0.17081327736377716, "loss_ce": 2.164947181881871e-05, "loss_iou": 0.392578125, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 88548152, "step": 981 }, { "epoch": 4.514942528735633, "grad_norm": 4.078102449108354, "learning_rate": 5e-06, "loss": 0.1652, "num_input_tokens_seen": 88638536, "step": 982 }, { "epoch": 4.514942528735633, "loss": 0.1220250129699707, "loss_ce": 7.67759484006092e-05, "loss_iou": 0.37109375, "loss_num": 0.0244140625, "loss_xval": 0.1220703125, "num_input_tokens_seen": 88638536, "step": 982 }, { "epoch": 4.519540229885058, "grad_norm": 11.283949648161077, "learning_rate": 5e-06, "loss": 0.0946, "num_input_tokens_seen": 88728900, "step": 983 }, { "epoch": 4.519540229885058, "loss": 0.09133091568946838, "loss_ce": 7.060511507006595e-06, "loss_iou": 0.29296875, "loss_num": 0.018310546875, "loss_xval": 0.09130859375, "num_input_tokens_seen": 88728900, "step": 983 }, { "epoch": 4.524137931034483, "grad_norm": 9.40893126127556, "learning_rate": 5e-06, "loss": 0.0908, "num_input_tokens_seen": 88819192, "step": 984 }, { "epoch": 4.524137931034483, "loss": 0.08349698781967163, "loss_ce": 8.912971338759235e-07, "loss_iou": 0.380859375, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 88819192, "step": 984 }, { "epoch": 4.528735632183908, "grad_norm": 7.932363219291443, "learning_rate": 5e-06, "loss": 0.1179, "num_input_tokens_seen": 88909616, "step": 985 }, { "epoch": 4.528735632183908, "loss": 0.10644952207803726, "loss_ce": 3.472416938166134e-05, "loss_iou": 0.396484375, "loss_num": 0.021240234375, "loss_xval": 0.1064453125, "num_input_tokens_seen": 88909616, "step": 985 }, { "epoch": 4.533333333333333, "grad_norm": 2.4934351424602568, "learning_rate": 5e-06, "loss": 0.0931, "num_input_tokens_seen": 88999228, "step": 986 }, { "epoch": 4.533333333333333, "loss": 0.09027735888957977, "loss_ce": 6.3577076616638806e-06, "loss_iou": 0.349609375, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 88999228, "step": 986 }, { "epoch": 4.537931034482758, "grad_norm": 4.191824583419648, "learning_rate": 5e-06, "loss": 0.0875, "num_input_tokens_seen": 89089628, "step": 987 }, { "epoch": 4.537931034482758, "loss": 0.05755450576543808, "loss_ce": 5.9386413340689614e-05, "loss_iou": 0.36328125, "loss_num": 0.011474609375, "loss_xval": 0.0576171875, "num_input_tokens_seen": 89089628, "step": 987 }, { "epoch": 4.5425287356321835, "grad_norm": 4.980888074280528, "learning_rate": 5e-06, "loss": 0.0923, "num_input_tokens_seen": 89179956, "step": 988 }, { "epoch": 4.5425287356321835, "loss": 0.06999941170215607, "loss_ce": 7.347244263655739e-06, "loss_iou": 0.330078125, "loss_num": 0.0140380859375, "loss_xval": 0.06982421875, "num_input_tokens_seen": 89179956, "step": 988 }, { "epoch": 4.5471264367816095, "grad_norm": 6.574375021382182, "learning_rate": 5e-06, "loss": 0.1534, "num_input_tokens_seen": 89270268, "step": 989 }, { "epoch": 4.5471264367816095, "loss": 0.1247449517250061, "loss_ce": 4.3468216972541995e-06, "loss_iou": 0.359375, "loss_num": 0.02490234375, "loss_xval": 0.12451171875, "num_input_tokens_seen": 89270268, "step": 989 }, { "epoch": 4.551724137931035, "grad_norm": 5.770679845977342, "learning_rate": 5e-06, "loss": 0.0699, "num_input_tokens_seen": 89360680, "step": 990 }, { "epoch": 4.551724137931035, "loss": 0.05505823343992233, "loss_ce": 4.520721631706692e-06, "loss_iou": 0.38671875, "loss_num": 0.010986328125, "loss_xval": 0.05517578125, "num_input_tokens_seen": 89360680, "step": 990 }, { "epoch": 4.55632183908046, "grad_norm": 3.4680196649545714, "learning_rate": 5e-06, "loss": 0.1073, "num_input_tokens_seen": 89451124, "step": 991 }, { "epoch": 4.55632183908046, "loss": 0.1168176680803299, "loss_ce": 2.6902296667685732e-05, "loss_iou": 0.435546875, "loss_num": 0.0233154296875, "loss_xval": 0.11669921875, "num_input_tokens_seen": 89451124, "step": 991 }, { "epoch": 4.560919540229885, "grad_norm": 6.00004334718162, "learning_rate": 5e-06, "loss": 0.1098, "num_input_tokens_seen": 89541568, "step": 992 }, { "epoch": 4.560919540229885, "loss": 0.11234864592552185, "loss_ce": 1.3441143892123364e-05, "loss_iou": 0.380859375, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 89541568, "step": 992 }, { "epoch": 4.56551724137931, "grad_norm": 6.73223448714399, "learning_rate": 5e-06, "loss": 0.0733, "num_input_tokens_seen": 89632052, "step": 993 }, { "epoch": 4.56551724137931, "loss": 0.07431768625974655, "loss_ce": 7.383117917925119e-06, "loss_iou": 0.32421875, "loss_num": 0.01483154296875, "loss_xval": 0.07421875, "num_input_tokens_seen": 89632052, "step": 993 }, { "epoch": 4.570114942528735, "grad_norm": 11.420047670032142, "learning_rate": 5e-06, "loss": 0.0873, "num_input_tokens_seen": 89722536, "step": 994 }, { "epoch": 4.570114942528735, "loss": 0.09373007714748383, "loss_ce": 0.008082492277026176, "loss_iou": 0.32421875, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 89722536, "step": 994 }, { "epoch": 4.574712643678161, "grad_norm": 14.176346167444665, "learning_rate": 5e-06, "loss": 0.113, "num_input_tokens_seen": 89812932, "step": 995 }, { "epoch": 4.574712643678161, "loss": 0.12351539731025696, "loss_ce": 1.0755041330412496e-05, "loss_iou": 0.373046875, "loss_num": 0.024658203125, "loss_xval": 0.12353515625, "num_input_tokens_seen": 89812932, "step": 995 }, { "epoch": 4.5793103448275865, "grad_norm": 10.667470789331928, "learning_rate": 5e-06, "loss": 0.1011, "num_input_tokens_seen": 89903292, "step": 996 }, { "epoch": 4.5793103448275865, "loss": 0.11320923268795013, "loss_ce": 1.1899982382601593e-05, "loss_iou": 0.32421875, "loss_num": 0.0225830078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 89903292, "step": 996 }, { "epoch": 4.583908045977012, "grad_norm": 7.2249750864413205, "learning_rate": 5e-06, "loss": 0.115, "num_input_tokens_seen": 89993576, "step": 997 }, { "epoch": 4.583908045977012, "loss": 0.11794456839561462, "loss_ce": 2.465084253344685e-05, "loss_iou": 0.306640625, "loss_num": 0.0235595703125, "loss_xval": 0.1181640625, "num_input_tokens_seen": 89993576, "step": 997 }, { "epoch": 4.588505747126437, "grad_norm": 10.042877155900863, "learning_rate": 5e-06, "loss": 0.1231, "num_input_tokens_seen": 90083884, "step": 998 }, { "epoch": 4.588505747126437, "loss": 0.1203087866306305, "loss_ce": 8.497871931467671e-06, "loss_iou": 0.345703125, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 90083884, "step": 998 }, { "epoch": 4.593103448275862, "grad_norm": 4.373260926369358, "learning_rate": 5e-06, "loss": 0.1232, "num_input_tokens_seen": 90174300, "step": 999 }, { "epoch": 4.593103448275862, "loss": 0.08949369937181473, "loss_ce": 1.6161524399649352e-05, "loss_iou": 0.3984375, "loss_num": 0.017822265625, "loss_xval": 0.08935546875, "num_input_tokens_seen": 90174300, "step": 999 }, { "epoch": 4.597701149425287, "grad_norm": 25.49877787319248, "learning_rate": 5e-06, "loss": 0.1146, "num_input_tokens_seen": 90264624, "step": 1000 }, { "epoch": 4.597701149425287, "eval_seeclick_CIoU": 0.4661611318588257, "eval_seeclick_GIoU": 0.4498722702264786, "eval_seeclick_IoU": 0.5080912709236145, "eval_seeclick_MAE_all": 0.060889746993780136, "eval_seeclick_MAE_h": 0.05166458524763584, "eval_seeclick_MAE_w": 0.10992535948753357, "eval_seeclick_MAE_x_boxes": 0.1068052388727665, "eval_seeclick_MAE_y_boxes": 0.054549889639019966, "eval_seeclick_NUM_probability": 0.9999993443489075, "eval_seeclick_inside_bbox": 0.7698863744735718, "eval_seeclick_loss": 0.3709671199321747, "eval_seeclick_loss_ce": 0.07325447350740433, "eval_seeclick_loss_iou": 0.47247314453125, "eval_seeclick_loss_num": 0.06253814697265625, "eval_seeclick_loss_xval": 0.3125, "eval_seeclick_runtime": 75.293, "eval_seeclick_samples_per_second": 0.571, "eval_seeclick_steps_per_second": 0.027, "num_input_tokens_seen": 90264624, "step": 1000 }, { "epoch": 4.597701149425287, "eval_icons_CIoU": 0.5760022103786469, "eval_icons_GIoU": 0.576844722032547, "eval_icons_IoU": 0.613362729549408, "eval_icons_MAE_all": 0.04183553345501423, "eval_icons_MAE_h": 0.07954326272010803, "eval_icons_MAE_w": 0.06392070464789867, "eval_icons_MAE_x_boxes": 0.059913450852036476, "eval_icons_MAE_y_boxes": 0.07867859303951263, "eval_icons_NUM_probability": 0.9999994337558746, "eval_icons_inside_bbox": 0.8107638955116272, "eval_icons_loss": 0.20698395371437073, "eval_icons_loss_ce": 1.277818611811199e-06, "eval_icons_loss_iou": 0.42694091796875, "eval_icons_loss_num": 0.04441070556640625, "eval_icons_loss_xval": 0.2221221923828125, "eval_icons_runtime": 88.2999, "eval_icons_samples_per_second": 0.566, "eval_icons_steps_per_second": 0.023, "num_input_tokens_seen": 90264624, "step": 1000 }, { "epoch": 4.597701149425287, "eval_screenspot_CIoU": 0.4215618173281352, "eval_screenspot_GIoU": 0.4092308084170024, "eval_screenspot_IoU": 0.48668718338012695, "eval_screenspot_MAE_all": 0.08650621399283409, "eval_screenspot_MAE_h": 0.08026436219612758, "eval_screenspot_MAE_w": 0.17552465697129568, "eval_screenspot_MAE_x_boxes": 0.16668692231178284, "eval_screenspot_MAE_y_boxes": 0.07600387185811996, "eval_screenspot_NUM_probability": 0.9999993443489075, "eval_screenspot_inside_bbox": 0.7637499968210856, "eval_screenspot_loss": 0.43140459060668945, "eval_screenspot_loss_ce": 0.00010789081594945553, "eval_screenspot_loss_iou": 0.4072265625, "eval_screenspot_loss_num": 0.08810933430989583, "eval_screenspot_loss_xval": 0.4403483072916667, "eval_screenspot_runtime": 149.3508, "eval_screenspot_samples_per_second": 0.596, "eval_screenspot_steps_per_second": 0.02, "num_input_tokens_seen": 90264624, "step": 1000 }, { "epoch": 4.597701149425287, "eval_compot_CIoU": 0.47925493121147156, "eval_compot_GIoU": 0.4560560882091522, "eval_compot_IoU": 0.5442279279232025, "eval_compot_MAE_all": 0.0558595210313797, "eval_compot_MAE_h": 0.07545717805624008, "eval_compot_MAE_w": 0.11159718781709671, "eval_compot_MAE_x_boxes": 0.10114440321922302, "eval_compot_MAE_y_boxes": 0.07544495910406113, "eval_compot_NUM_probability": 0.9999988079071045, "eval_compot_inside_bbox": 0.8072916567325592, "eval_compot_loss": 0.3027111887931824, "eval_compot_loss_ce": 0.01121709169819951, "eval_compot_loss_iou": 0.4888916015625, "eval_compot_loss_num": 0.05005645751953125, "eval_compot_loss_xval": 0.250244140625, "eval_compot_runtime": 86.8458, "eval_compot_samples_per_second": 0.576, "eval_compot_steps_per_second": 0.023, "num_input_tokens_seen": 90264624, "step": 1000 }, { "epoch": 4.597701149425287, "loss": 0.18884405493736267, "loss_ce": 0.006959293968975544, "loss_iou": 0.51171875, "loss_num": 0.036376953125, "loss_xval": 0.181640625, "num_input_tokens_seen": 90264624, "step": 1000 }, { "epoch": 4.602298850574712, "grad_norm": 9.40244134173418, "learning_rate": 5e-06, "loss": 0.1057, "num_input_tokens_seen": 90355016, "step": 1001 }, { "epoch": 4.602298850574712, "loss": 0.08515298366546631, "loss_ce": 5.4719315812690184e-05, "loss_iou": 0.33984375, "loss_num": 0.0169677734375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 90355016, "step": 1001 }, { "epoch": 4.606896551724138, "grad_norm": 11.782183850460743, "learning_rate": 5e-06, "loss": 0.1077, "num_input_tokens_seen": 90445512, "step": 1002 }, { "epoch": 4.606896551724138, "loss": 0.08575557172298431, "loss_ce": 1.6436308214906603e-05, "loss_iou": 0.365234375, "loss_num": 0.01708984375, "loss_xval": 0.0859375, "num_input_tokens_seen": 90445512, "step": 1002 }, { "epoch": 4.611494252873563, "grad_norm": 17.076982393187414, "learning_rate": 5e-06, "loss": 0.1481, "num_input_tokens_seen": 90536000, "step": 1003 }, { "epoch": 4.611494252873563, "loss": 0.10569582879543304, "loss_ce": 4.39713076048065e-05, "loss_iou": 0.37890625, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 90536000, "step": 1003 }, { "epoch": 4.6160919540229886, "grad_norm": 16.92181833640432, "learning_rate": 5e-06, "loss": 0.0887, "num_input_tokens_seen": 90626368, "step": 1004 }, { "epoch": 4.6160919540229886, "loss": 0.10335642099380493, "loss_ce": 8.644953595648985e-06, "loss_iou": 0.2890625, "loss_num": 0.0206298828125, "loss_xval": 0.103515625, "num_input_tokens_seen": 90626368, "step": 1004 }, { "epoch": 4.620689655172414, "grad_norm": 5.272669970898714, "learning_rate": 5e-06, "loss": 0.111, "num_input_tokens_seen": 90716756, "step": 1005 }, { "epoch": 4.620689655172414, "loss": 0.12034394592046738, "loss_ce": 1.3136214874975849e-05, "loss_iou": 0.3125, "loss_num": 0.0240478515625, "loss_xval": 0.1201171875, "num_input_tokens_seen": 90716756, "step": 1005 }, { "epoch": 4.625287356321839, "grad_norm": 5.5450374658240476, "learning_rate": 5e-06, "loss": 0.1157, "num_input_tokens_seen": 90806388, "step": 1006 }, { "epoch": 4.625287356321839, "loss": 0.12717095017433167, "loss_ce": 4.190186700725462e-06, "loss_iou": 0.333984375, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 90806388, "step": 1006 }, { "epoch": 4.629885057471264, "grad_norm": 10.252060177606076, "learning_rate": 5e-06, "loss": 0.0924, "num_input_tokens_seen": 90896652, "step": 1007 }, { "epoch": 4.629885057471264, "loss": 0.10986798256635666, "loss_ce": 4.700279987446265e-06, "loss_iou": 0.306640625, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 90896652, "step": 1007 }, { "epoch": 4.63448275862069, "grad_norm": 18.179346667786145, "learning_rate": 5e-06, "loss": 0.1041, "num_input_tokens_seen": 90986972, "step": 1008 }, { "epoch": 4.63448275862069, "loss": 0.09801249206066132, "loss_ce": 5.289793534757337e-06, "loss_iou": 0.341796875, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 90986972, "step": 1008 }, { "epoch": 4.639080459770115, "grad_norm": 7.486242267110328, "learning_rate": 5e-06, "loss": 0.0906, "num_input_tokens_seen": 91077320, "step": 1009 }, { "epoch": 4.639080459770115, "loss": 0.0817301869392395, "loss_ce": 4.111347152502276e-06, "loss_iou": 0.396484375, "loss_num": 0.016357421875, "loss_xval": 0.08154296875, "num_input_tokens_seen": 91077320, "step": 1009 }, { "epoch": 4.64367816091954, "grad_norm": 7.378317449558524, "learning_rate": 5e-06, "loss": 0.111, "num_input_tokens_seen": 91167668, "step": 1010 }, { "epoch": 4.64367816091954, "loss": 0.11923195421695709, "loss_ce": 1.5039631762192585e-05, "loss_iou": 0.384765625, "loss_num": 0.0238037109375, "loss_xval": 0.119140625, "num_input_tokens_seen": 91167668, "step": 1010 }, { "epoch": 4.6482758620689655, "grad_norm": 6.873925032428975, "learning_rate": 5e-06, "loss": 0.1342, "num_input_tokens_seen": 91257984, "step": 1011 }, { "epoch": 4.6482758620689655, "loss": 0.13492737710475922, "loss_ce": 9.166550626105163e-06, "loss_iou": 0.3828125, "loss_num": 0.0269775390625, "loss_xval": 0.134765625, "num_input_tokens_seen": 91257984, "step": 1011 }, { "epoch": 4.652873563218391, "grad_norm": 13.994177721226423, "learning_rate": 5e-06, "loss": 0.087, "num_input_tokens_seen": 91348380, "step": 1012 }, { "epoch": 4.652873563218391, "loss": 0.07507447898387909, "loss_ce": 1.2354627187960432e-06, "loss_iou": 0.369140625, "loss_num": 0.0150146484375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 91348380, "step": 1012 }, { "epoch": 4.657471264367816, "grad_norm": 3.014366999503986, "learning_rate": 5e-06, "loss": 0.1253, "num_input_tokens_seen": 91438616, "step": 1013 }, { "epoch": 4.657471264367816, "loss": 0.16335362195968628, "loss_ce": 2.354383104830049e-05, "loss_iou": 0.37890625, "loss_num": 0.03271484375, "loss_xval": 0.1630859375, "num_input_tokens_seen": 91438616, "step": 1013 }, { "epoch": 4.662068965517241, "grad_norm": 17.72787254515268, "learning_rate": 5e-06, "loss": 0.1255, "num_input_tokens_seen": 91529032, "step": 1014 }, { "epoch": 4.662068965517241, "loss": 0.13172364234924316, "loss_ce": 9.771740224095993e-06, "loss_iou": 0.4140625, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 91529032, "step": 1014 }, { "epoch": 4.666666666666667, "grad_norm": 3.6520684848160605, "learning_rate": 5e-06, "loss": 0.126, "num_input_tokens_seen": 91618688, "step": 1015 }, { "epoch": 4.666666666666667, "loss": 0.12861014902591705, "loss_ce": 9.073386536329053e-06, "loss_iou": 0.36328125, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 91618688, "step": 1015 }, { "epoch": 4.671264367816092, "grad_norm": 6.527601418047689, "learning_rate": 5e-06, "loss": 0.1132, "num_input_tokens_seen": 91709076, "step": 1016 }, { "epoch": 4.671264367816092, "loss": 0.09489475935697556, "loss_ce": 3.086655124207027e-05, "loss_iou": 0.361328125, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 91709076, "step": 1016 }, { "epoch": 4.675862068965517, "grad_norm": 14.192420949264795, "learning_rate": 5e-06, "loss": 0.0839, "num_input_tokens_seen": 91799384, "step": 1017 }, { "epoch": 4.675862068965517, "loss": 0.08747270703315735, "loss_ce": 9.329753083875403e-06, "loss_iou": 0.333984375, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 91799384, "step": 1017 }, { "epoch": 4.680459770114942, "grad_norm": 5.653695210589391, "learning_rate": 5e-06, "loss": 0.0976, "num_input_tokens_seen": 91889776, "step": 1018 }, { "epoch": 4.680459770114942, "loss": 0.08493378758430481, "loss_ce": 3.372960463821073e-06, "loss_iou": 0.357421875, "loss_num": 0.0169677734375, "loss_xval": 0.0849609375, "num_input_tokens_seen": 91889776, "step": 1018 }, { "epoch": 4.685057471264368, "grad_norm": 4.63777017993729, "learning_rate": 5e-06, "loss": 0.0869, "num_input_tokens_seen": 91980096, "step": 1019 }, { "epoch": 4.685057471264368, "loss": 0.07615326344966888, "loss_ce": 1.1908843589480966e-05, "loss_iou": 0.392578125, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 91980096, "step": 1019 }, { "epoch": 4.689655172413794, "grad_norm": 4.472710831706082, "learning_rate": 5e-06, "loss": 0.1042, "num_input_tokens_seen": 92070584, "step": 1020 }, { "epoch": 4.689655172413794, "loss": 0.11327888071537018, "loss_ce": 2.8146971089881845e-05, "loss_iou": 0.369140625, "loss_num": 0.022705078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 92070584, "step": 1020 }, { "epoch": 4.694252873563219, "grad_norm": 3.4544586422872907, "learning_rate": 5e-06, "loss": 0.096, "num_input_tokens_seen": 92161012, "step": 1021 }, { "epoch": 4.694252873563219, "loss": 0.06871644407510757, "loss_ce": 6.117389602877665e-06, "loss_iou": 0.328125, "loss_num": 0.01373291015625, "loss_xval": 0.06884765625, "num_input_tokens_seen": 92161012, "step": 1021 }, { "epoch": 4.698850574712644, "grad_norm": 7.42414792402292, "learning_rate": 5e-06, "loss": 0.0925, "num_input_tokens_seen": 92251412, "step": 1022 }, { "epoch": 4.698850574712644, "loss": 0.10006897151470184, "loss_ce": 9.33870833250694e-05, "loss_iou": 0.26953125, "loss_num": 0.02001953125, "loss_xval": 0.10009765625, "num_input_tokens_seen": 92251412, "step": 1022 }, { "epoch": 4.703448275862069, "grad_norm": 6.69514367017083, "learning_rate": 5e-06, "loss": 0.1546, "num_input_tokens_seen": 92341956, "step": 1023 }, { "epoch": 4.703448275862069, "loss": 0.14697983860969543, "loss_ce": 6.821977149229497e-05, "loss_iou": 0.380859375, "loss_num": 0.0294189453125, "loss_xval": 0.146484375, "num_input_tokens_seen": 92341956, "step": 1023 }, { "epoch": 4.708045977011494, "grad_norm": 14.417454722090794, "learning_rate": 5e-06, "loss": 0.1256, "num_input_tokens_seen": 92432144, "step": 1024 }, { "epoch": 4.708045977011494, "loss": 0.13804002106189728, "loss_ce": 9.01172825251706e-06, "loss_iou": 0.359375, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 92432144, "step": 1024 }, { "epoch": 4.712643678160919, "grad_norm": 7.595947463221086, "learning_rate": 5e-06, "loss": 0.1053, "num_input_tokens_seen": 92522620, "step": 1025 }, { "epoch": 4.712643678160919, "loss": 0.10795444995164871, "loss_ce": 2.1407777239801362e-05, "loss_iou": 0.41796875, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 92522620, "step": 1025 }, { "epoch": 4.7172413793103445, "grad_norm": 3.0300131913282713, "learning_rate": 5e-06, "loss": 0.1128, "num_input_tokens_seen": 92612984, "step": 1026 }, { "epoch": 4.7172413793103445, "loss": 0.06870204210281372, "loss_ce": 6.9793131842743605e-06, "loss_iou": 0.302734375, "loss_num": 0.01373291015625, "loss_xval": 0.06884765625, "num_input_tokens_seen": 92612984, "step": 1026 }, { "epoch": 4.72183908045977, "grad_norm": 7.887427930687986, "learning_rate": 5e-06, "loss": 0.0749, "num_input_tokens_seen": 92702504, "step": 1027 }, { "epoch": 4.72183908045977, "loss": 0.07237892597913742, "loss_ce": 2.1748670405941084e-05, "loss_iou": 0.302734375, "loss_num": 0.01446533203125, "loss_xval": 0.072265625, "num_input_tokens_seen": 92702504, "step": 1027 }, { "epoch": 4.726436781609196, "grad_norm": 9.408243473561752, "learning_rate": 5e-06, "loss": 0.0949, "num_input_tokens_seen": 92792888, "step": 1028 }, { "epoch": 4.726436781609196, "loss": 0.07289677858352661, "loss_ce": 5.5426976359740365e-06, "loss_iou": 0.2734375, "loss_num": 0.01458740234375, "loss_xval": 0.07275390625, "num_input_tokens_seen": 92792888, "step": 1028 }, { "epoch": 4.731034482758621, "grad_norm": 9.742419905000565, "learning_rate": 5e-06, "loss": 0.1004, "num_input_tokens_seen": 92883336, "step": 1029 }, { "epoch": 4.731034482758621, "loss": 0.10518839955329895, "loss_ce": 2.4827564629958943e-05, "loss_iou": 0.326171875, "loss_num": 0.02099609375, "loss_xval": 0.10498046875, "num_input_tokens_seen": 92883336, "step": 1029 }, { "epoch": 4.735632183908046, "grad_norm": 2.7055194664846183, "learning_rate": 5e-06, "loss": 0.1231, "num_input_tokens_seen": 92972936, "step": 1030 }, { "epoch": 4.735632183908046, "loss": 0.10875724256038666, "loss_ce": 8.414130570599809e-05, "loss_iou": 0.373046875, "loss_num": 0.021728515625, "loss_xval": 0.10888671875, "num_input_tokens_seen": 92972936, "step": 1030 }, { "epoch": 4.740229885057471, "grad_norm": 11.304101815777114, "learning_rate": 5e-06, "loss": 0.1415, "num_input_tokens_seen": 93063392, "step": 1031 }, { "epoch": 4.740229885057471, "loss": 0.09561444818973541, "loss_ce": 2.8712015591736417e-06, "loss_iou": 0.3203125, "loss_num": 0.0191650390625, "loss_xval": 0.095703125, "num_input_tokens_seen": 93063392, "step": 1031 }, { "epoch": 4.744827586206896, "grad_norm": 15.258461645144374, "learning_rate": 5e-06, "loss": 0.0956, "num_input_tokens_seen": 93152916, "step": 1032 }, { "epoch": 4.744827586206896, "loss": 0.11910735070705414, "loss_ce": 4.878862000623485e-06, "loss_iou": 0.39453125, "loss_num": 0.0238037109375, "loss_xval": 0.119140625, "num_input_tokens_seen": 93152916, "step": 1032 }, { "epoch": 4.749425287356322, "grad_norm": 2.356961429838356, "learning_rate": 5e-06, "loss": 0.0921, "num_input_tokens_seen": 93243212, "step": 1033 }, { "epoch": 4.749425287356322, "loss": 0.11762569844722748, "loss_ce": 0.0005450131138786674, "loss_iou": 0.375, "loss_num": 0.0234375, "loss_xval": 0.1171875, "num_input_tokens_seen": 93243212, "step": 1033 }, { "epoch": 4.7540229885057474, "grad_norm": 15.1692410364505, "learning_rate": 5e-06, "loss": 0.1611, "num_input_tokens_seen": 93333636, "step": 1034 }, { "epoch": 4.7540229885057474, "loss": 0.147009015083313, "loss_ce": 5.839167897647712e-06, "loss_iou": 0.33984375, "loss_num": 0.0294189453125, "loss_xval": 0.1474609375, "num_input_tokens_seen": 93333636, "step": 1034 }, { "epoch": 4.758620689655173, "grad_norm": 7.946502842662023, "learning_rate": 5e-06, "loss": 0.1427, "num_input_tokens_seen": 93423992, "step": 1035 }, { "epoch": 4.758620689655173, "loss": 0.1617765724658966, "loss_ce": 3.341192496009171e-05, "loss_iou": 0.36328125, "loss_num": 0.032470703125, "loss_xval": 0.162109375, "num_input_tokens_seen": 93423992, "step": 1035 }, { "epoch": 4.763218390804598, "grad_norm": 15.109850534366604, "learning_rate": 5e-06, "loss": 0.0785, "num_input_tokens_seen": 93514320, "step": 1036 }, { "epoch": 4.763218390804598, "loss": 0.0966891348361969, "loss_ce": 9.446171134186443e-06, "loss_iou": 0.41015625, "loss_num": 0.019287109375, "loss_xval": 0.0966796875, "num_input_tokens_seen": 93514320, "step": 1036 }, { "epoch": 4.767816091954023, "grad_norm": 4.842981275277206, "learning_rate": 5e-06, "loss": 0.1265, "num_input_tokens_seen": 93603144, "step": 1037 }, { "epoch": 4.767816091954023, "loss": 0.11073972284793854, "loss_ce": 2.194262560806237e-05, "loss_iou": 0.46875, "loss_num": 0.0220947265625, "loss_xval": 0.11083984375, "num_input_tokens_seen": 93603144, "step": 1037 }, { "epoch": 4.772413793103448, "grad_norm": 21.01549868516104, "learning_rate": 5e-06, "loss": 0.0947, "num_input_tokens_seen": 93693660, "step": 1038 }, { "epoch": 4.772413793103448, "loss": 0.08537042886018753, "loss_ce": 1.2760566278302576e-05, "loss_iou": 0.283203125, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 93693660, "step": 1038 }, { "epoch": 4.777011494252873, "grad_norm": 10.712563478328072, "learning_rate": 5e-06, "loss": 0.1014, "num_input_tokens_seen": 93783296, "step": 1039 }, { "epoch": 4.777011494252873, "loss": 0.11916904151439667, "loss_ce": 2.8420639864634722e-05, "loss_iou": 0.388671875, "loss_num": 0.0238037109375, "loss_xval": 0.119140625, "num_input_tokens_seen": 93783296, "step": 1039 }, { "epoch": 4.781609195402299, "grad_norm": 7.345903871032101, "learning_rate": 5e-06, "loss": 0.0869, "num_input_tokens_seen": 93873768, "step": 1040 }, { "epoch": 4.781609195402299, "loss": 0.05897822231054306, "loss_ce": 3.0012429306225386e-06, "loss_iou": 0.35546875, "loss_num": 0.01177978515625, "loss_xval": 0.05908203125, "num_input_tokens_seen": 93873768, "step": 1040 }, { "epoch": 4.786206896551724, "grad_norm": 4.133426960663791, "learning_rate": 5e-06, "loss": 0.1015, "num_input_tokens_seen": 93964260, "step": 1041 }, { "epoch": 4.786206896551724, "loss": 0.0858168751001358, "loss_ce": 1.6706742826499976e-05, "loss_iou": 0.375, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 93964260, "step": 1041 }, { "epoch": 4.7908045977011495, "grad_norm": 7.686995682812649, "learning_rate": 5e-06, "loss": 0.12, "num_input_tokens_seen": 94054556, "step": 1042 }, { "epoch": 4.7908045977011495, "loss": 0.12363208830356598, "loss_ce": 5.381094069889514e-06, "loss_iou": 0.322265625, "loss_num": 0.0247802734375, "loss_xval": 0.12353515625, "num_input_tokens_seen": 94054556, "step": 1042 }, { "epoch": 4.795402298850575, "grad_norm": 1.6904479508606451, "learning_rate": 5e-06, "loss": 0.0778, "num_input_tokens_seen": 94145004, "step": 1043 }, { "epoch": 4.795402298850575, "loss": 0.06091611459851265, "loss_ce": 3.0297642297227867e-06, "loss_iou": 0.349609375, "loss_num": 0.01214599609375, "loss_xval": 0.06103515625, "num_input_tokens_seen": 94145004, "step": 1043 }, { "epoch": 4.8, "grad_norm": 3.669144596101602, "learning_rate": 5e-06, "loss": 0.1243, "num_input_tokens_seen": 94235372, "step": 1044 }, { "epoch": 4.8, "loss": 0.13778972625732422, "loss_ce": 2.869950094464002e-06, "loss_iou": 0.3828125, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 94235372, "step": 1044 }, { "epoch": 4.804597701149425, "grad_norm": 6.97279172429298, "learning_rate": 5e-06, "loss": 0.0761, "num_input_tokens_seen": 94325696, "step": 1045 }, { "epoch": 4.804597701149425, "loss": 0.05801108479499817, "loss_ce": 0.0023165077436715364, "loss_iou": 0.349609375, "loss_num": 0.0111083984375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 94325696, "step": 1045 }, { "epoch": 4.809195402298851, "grad_norm": 11.58305315850037, "learning_rate": 5e-06, "loss": 0.091, "num_input_tokens_seen": 94416004, "step": 1046 }, { "epoch": 4.809195402298851, "loss": 0.1216389536857605, "loss_ce": 1.1142959920107387e-05, "loss_iou": 0.396484375, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 94416004, "step": 1046 }, { "epoch": 4.813793103448276, "grad_norm": 16.0940836142544, "learning_rate": 5e-06, "loss": 0.0916, "num_input_tokens_seen": 94506316, "step": 1047 }, { "epoch": 4.813793103448276, "loss": 0.10546806454658508, "loss_ce": 2.9831333449692465e-05, "loss_iou": 0.30859375, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 94506316, "step": 1047 }, { "epoch": 4.818390804597701, "grad_norm": 10.955820630731282, "learning_rate": 5e-06, "loss": 0.1403, "num_input_tokens_seen": 94596544, "step": 1048 }, { "epoch": 4.818390804597701, "loss": 0.16584840416908264, "loss_ce": 1.5889574569882825e-05, "loss_iou": 0.333984375, "loss_num": 0.033203125, "loss_xval": 0.166015625, "num_input_tokens_seen": 94596544, "step": 1048 }, { "epoch": 4.8229885057471265, "grad_norm": 6.034273447193758, "learning_rate": 5e-06, "loss": 0.0587, "num_input_tokens_seen": 94686864, "step": 1049 }, { "epoch": 4.8229885057471265, "loss": 0.06973493099212646, "loss_ce": 2.2640974748355802e-06, "loss_iou": 0.359375, "loss_num": 0.013916015625, "loss_xval": 0.06982421875, "num_input_tokens_seen": 94686864, "step": 1049 }, { "epoch": 4.827586206896552, "grad_norm": 4.252496465368457, "learning_rate": 5e-06, "loss": 0.097, "num_input_tokens_seen": 94777296, "step": 1050 }, { "epoch": 4.827586206896552, "loss": 0.10622747987508774, "loss_ce": 7.20810130587779e-05, "loss_iou": 0.42578125, "loss_num": 0.021240234375, "loss_xval": 0.10595703125, "num_input_tokens_seen": 94777296, "step": 1050 }, { "epoch": 4.832183908045977, "grad_norm": 3.0187179996735334, "learning_rate": 5e-06, "loss": 0.11, "num_input_tokens_seen": 94867600, "step": 1051 }, { "epoch": 4.832183908045977, "loss": 0.1049966886639595, "loss_ce": 9.584927056494053e-07, "loss_iou": 0.33984375, "loss_num": 0.02099609375, "loss_xval": 0.10498046875, "num_input_tokens_seen": 94867600, "step": 1051 }, { "epoch": 4.836781609195402, "grad_norm": 16.087126169900518, "learning_rate": 5e-06, "loss": 0.082, "num_input_tokens_seen": 94958008, "step": 1052 }, { "epoch": 4.836781609195402, "loss": 0.1101231724023819, "loss_ce": 1.5750989405205473e-05, "loss_iou": 0.4296875, "loss_num": 0.02197265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 94958008, "step": 1052 }, { "epoch": 4.841379310344828, "grad_norm": 12.550203705796418, "learning_rate": 5e-06, "loss": 0.0795, "num_input_tokens_seen": 95048356, "step": 1053 }, { "epoch": 4.841379310344828, "loss": 0.08209192752838135, "loss_ce": 1.4905896023265086e-05, "loss_iou": 0.31640625, "loss_num": 0.016357421875, "loss_xval": 0.08203125, "num_input_tokens_seen": 95048356, "step": 1053 }, { "epoch": 4.845977011494253, "grad_norm": 4.5783225747939555, "learning_rate": 5e-06, "loss": 0.1238, "num_input_tokens_seen": 95138768, "step": 1054 }, { "epoch": 4.845977011494253, "loss": 0.1782418191432953, "loss_ce": 3.904753612005152e-06, "loss_iou": 0.35546875, "loss_num": 0.03564453125, "loss_xval": 0.1787109375, "num_input_tokens_seen": 95138768, "step": 1054 }, { "epoch": 4.850574712643678, "grad_norm": 17.243051689458277, "learning_rate": 5e-06, "loss": 0.1833, "num_input_tokens_seen": 95229088, "step": 1055 }, { "epoch": 4.850574712643678, "loss": 0.11458714306354523, "loss_ce": 2.4146735086105764e-05, "loss_iou": 0.40234375, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 95229088, "step": 1055 }, { "epoch": 4.855172413793103, "grad_norm": 6.0951803316443876, "learning_rate": 5e-06, "loss": 0.0885, "num_input_tokens_seen": 95318604, "step": 1056 }, { "epoch": 4.855172413793103, "loss": 0.09546582400798798, "loss_ce": 6.840703463240061e-06, "loss_iou": 0.36328125, "loss_num": 0.01904296875, "loss_xval": 0.095703125, "num_input_tokens_seen": 95318604, "step": 1056 }, { "epoch": 4.8597701149425285, "grad_norm": 2.513374225059268, "learning_rate": 5e-06, "loss": 0.0997, "num_input_tokens_seen": 95409032, "step": 1057 }, { "epoch": 4.8597701149425285, "loss": 0.105290487408638, "loss_ce": 0.00047786219511181116, "loss_iou": 0.330078125, "loss_num": 0.02099609375, "loss_xval": 0.10498046875, "num_input_tokens_seen": 95409032, "step": 1057 }, { "epoch": 4.864367816091954, "grad_norm": 3.376425029688418, "learning_rate": 5e-06, "loss": 0.1288, "num_input_tokens_seen": 95499352, "step": 1058 }, { "epoch": 4.864367816091954, "loss": 0.1294291913509369, "loss_ce": 0.0005839776713401079, "loss_iou": 0.369140625, "loss_num": 0.0257568359375, "loss_xval": 0.12890625, "num_input_tokens_seen": 95499352, "step": 1058 }, { "epoch": 4.86896551724138, "grad_norm": 4.718684401574836, "learning_rate": 5e-06, "loss": 0.1409, "num_input_tokens_seen": 95589880, "step": 1059 }, { "epoch": 4.86896551724138, "loss": 0.15736906230449677, "loss_ce": 2.0424929971341044e-05, "loss_iou": 0.376953125, "loss_num": 0.031494140625, "loss_xval": 0.1572265625, "num_input_tokens_seen": 95589880, "step": 1059 }, { "epoch": 4.873563218390805, "grad_norm": 8.564949517103082, "learning_rate": 5e-06, "loss": 0.0653, "num_input_tokens_seen": 95680168, "step": 1060 }, { "epoch": 4.873563218390805, "loss": 0.07507706433534622, "loss_ce": 3.8181360650924034e-06, "loss_iou": 0.421875, "loss_num": 0.0150146484375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 95680168, "step": 1060 }, { "epoch": 4.87816091954023, "grad_norm": 10.678428697285344, "learning_rate": 5e-06, "loss": 0.102, "num_input_tokens_seen": 95770520, "step": 1061 }, { "epoch": 4.87816091954023, "loss": 0.13752196729183197, "loss_ce": 4.027618342661299e-05, "loss_iou": 0.349609375, "loss_num": 0.0274658203125, "loss_xval": 0.1376953125, "num_input_tokens_seen": 95770520, "step": 1061 }, { "epoch": 4.882758620689655, "grad_norm": 5.476416486972618, "learning_rate": 5e-06, "loss": 0.1138, "num_input_tokens_seen": 95860856, "step": 1062 }, { "epoch": 4.882758620689655, "loss": 0.10755479335784912, "loss_ce": 1.0849686077563092e-05, "loss_iou": 0.302734375, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 95860856, "step": 1062 }, { "epoch": 4.88735632183908, "grad_norm": 7.341716557148394, "learning_rate": 5e-06, "loss": 0.1133, "num_input_tokens_seen": 95951252, "step": 1063 }, { "epoch": 4.88735632183908, "loss": 0.08713214844465256, "loss_ce": 4.465159690880682e-06, "loss_iou": 0.29296875, "loss_num": 0.0174560546875, "loss_xval": 0.0869140625, "num_input_tokens_seen": 95951252, "step": 1063 }, { "epoch": 4.8919540229885055, "grad_norm": 17.624799280413747, "learning_rate": 5e-06, "loss": 0.097, "num_input_tokens_seen": 96041592, "step": 1064 }, { "epoch": 4.8919540229885055, "loss": 0.1143837422132492, "loss_ce": 3.4374650567770004e-05, "loss_iou": 0.306640625, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 96041592, "step": 1064 }, { "epoch": 4.896551724137931, "grad_norm": 9.416499244614185, "learning_rate": 5e-06, "loss": 0.1198, "num_input_tokens_seen": 96132008, "step": 1065 }, { "epoch": 4.896551724137931, "loss": 0.11976835876703262, "loss_ce": 4.7893059672787786e-05, "loss_iou": 0.42578125, "loss_num": 0.02392578125, "loss_xval": 0.11962890625, "num_input_tokens_seen": 96132008, "step": 1065 }, { "epoch": 4.901149425287357, "grad_norm": 6.806788764833692, "learning_rate": 5e-06, "loss": 0.1071, "num_input_tokens_seen": 96222480, "step": 1066 }, { "epoch": 4.901149425287357, "loss": 0.09189382195472717, "loss_ce": 5.3979856602381915e-06, "loss_iou": 0.302734375, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 96222480, "step": 1066 }, { "epoch": 4.905747126436782, "grad_norm": 5.549592637186866, "learning_rate": 5e-06, "loss": 0.0893, "num_input_tokens_seen": 96312836, "step": 1067 }, { "epoch": 4.905747126436782, "loss": 0.08161412179470062, "loss_ce": 4.0634346078149974e-05, "loss_iou": 0.349609375, "loss_num": 0.016357421875, "loss_xval": 0.08154296875, "num_input_tokens_seen": 96312836, "step": 1067 }, { "epoch": 4.910344827586207, "grad_norm": 6.49206654790828, "learning_rate": 5e-06, "loss": 0.1035, "num_input_tokens_seen": 96403204, "step": 1068 }, { "epoch": 4.910344827586207, "loss": 0.10242622345685959, "loss_ce": 9.231689546140842e-06, "loss_iou": 0.240234375, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 96403204, "step": 1068 }, { "epoch": 4.914942528735632, "grad_norm": 3.007842215921885, "learning_rate": 5e-06, "loss": 0.1008, "num_input_tokens_seen": 96493592, "step": 1069 }, { "epoch": 4.914942528735632, "loss": 0.09680457413196564, "loss_ce": 6.385076267179102e-05, "loss_iou": 0.337890625, "loss_num": 0.0194091796875, "loss_xval": 0.0966796875, "num_input_tokens_seen": 96493592, "step": 1069 }, { "epoch": 4.919540229885057, "grad_norm": 10.319391804852176, "learning_rate": 5e-06, "loss": 0.1097, "num_input_tokens_seen": 96583944, "step": 1070 }, { "epoch": 4.919540229885057, "loss": 0.08242271840572357, "loss_ce": 5.577308911597356e-05, "loss_iou": 0.310546875, "loss_num": 0.0164794921875, "loss_xval": 0.08251953125, "num_input_tokens_seen": 96583944, "step": 1070 }, { "epoch": 4.924137931034482, "grad_norm": 2.879146983290818, "learning_rate": 5e-06, "loss": 0.0895, "num_input_tokens_seen": 96674328, "step": 1071 }, { "epoch": 4.924137931034482, "loss": 0.10010585933923721, "loss_ce": 8.201654054573737e-06, "loss_iou": 0.38671875, "loss_num": 0.02001953125, "loss_xval": 0.10009765625, "num_input_tokens_seen": 96674328, "step": 1071 }, { "epoch": 4.928735632183908, "grad_norm": 15.403924468504222, "learning_rate": 5e-06, "loss": 0.0968, "num_input_tokens_seen": 96764696, "step": 1072 }, { "epoch": 4.928735632183908, "loss": 0.12627822160720825, "loss_ce": 2.7013094950234517e-05, "loss_iou": 0.34375, "loss_num": 0.0252685546875, "loss_xval": 0.1259765625, "num_input_tokens_seen": 96764696, "step": 1072 }, { "epoch": 4.933333333333334, "grad_norm": 24.679178097861215, "learning_rate": 5e-06, "loss": 0.0828, "num_input_tokens_seen": 96855144, "step": 1073 }, { "epoch": 4.933333333333334, "loss": 0.11346793919801712, "loss_ce": 3.5857883631251752e-06, "loss_iou": 0.400390625, "loss_num": 0.022705078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 96855144, "step": 1073 }, { "epoch": 4.937931034482759, "grad_norm": 17.443099366886536, "learning_rate": 5e-06, "loss": 0.1108, "num_input_tokens_seen": 96945600, "step": 1074 }, { "epoch": 4.937931034482759, "loss": 0.09844504296779633, "loss_ce": 5.636948117171414e-05, "loss_iou": 0.361328125, "loss_num": 0.0196533203125, "loss_xval": 0.0986328125, "num_input_tokens_seen": 96945600, "step": 1074 }, { "epoch": 4.942528735632184, "grad_norm": 11.649607579048189, "learning_rate": 5e-06, "loss": 0.0836, "num_input_tokens_seen": 97035976, "step": 1075 }, { "epoch": 4.942528735632184, "loss": 0.09174925833940506, "loss_ce": 1.3420964933175128e-05, "loss_iou": 0.357421875, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 97035976, "step": 1075 }, { "epoch": 4.947126436781609, "grad_norm": 8.321494891514785, "learning_rate": 5e-06, "loss": 0.0985, "num_input_tokens_seen": 97126184, "step": 1076 }, { "epoch": 4.947126436781609, "loss": 0.1108294129371643, "loss_ce": 4.831304522667779e-06, "loss_iou": 0.3203125, "loss_num": 0.022216796875, "loss_xval": 0.11083984375, "num_input_tokens_seen": 97126184, "step": 1076 }, { "epoch": 4.951724137931034, "grad_norm": 2.7753841701876714, "learning_rate": 5e-06, "loss": 0.0898, "num_input_tokens_seen": 97215208, "step": 1077 }, { "epoch": 4.951724137931034, "loss": 0.10865768790245056, "loss_ce": 7.61427145334892e-05, "loss_iou": 0.265625, "loss_num": 0.021728515625, "loss_xval": 0.1083984375, "num_input_tokens_seen": 97215208, "step": 1077 }, { "epoch": 4.956321839080459, "grad_norm": 3.502619253449482, "learning_rate": 5e-06, "loss": 0.1241, "num_input_tokens_seen": 97305660, "step": 1078 }, { "epoch": 4.956321839080459, "loss": 0.07795646786689758, "loss_ce": 1.4574313354387414e-05, "loss_iou": 0.376953125, "loss_num": 0.01556396484375, "loss_xval": 0.078125, "num_input_tokens_seen": 97305660, "step": 1078 }, { "epoch": 4.960919540229885, "grad_norm": 2.214432026018128, "learning_rate": 5e-06, "loss": 0.106, "num_input_tokens_seen": 97395964, "step": 1079 }, { "epoch": 4.960919540229885, "loss": 0.09012555330991745, "loss_ce": 3.7664180126739666e-05, "loss_iou": 0.322265625, "loss_num": 0.01806640625, "loss_xval": 0.08984375, "num_input_tokens_seen": 97395964, "step": 1079 }, { "epoch": 4.9655172413793105, "grad_norm": 6.4074181213192105, "learning_rate": 5e-06, "loss": 0.0809, "num_input_tokens_seen": 97486300, "step": 1080 }, { "epoch": 4.9655172413793105, "loss": 0.07362109422683716, "loss_ce": 1.2694945326074958e-05, "loss_iou": 0.345703125, "loss_num": 0.01470947265625, "loss_xval": 0.07373046875, "num_input_tokens_seen": 97486300, "step": 1080 }, { "epoch": 4.970114942528736, "grad_norm": 10.501486543841219, "learning_rate": 5e-06, "loss": 0.1014, "num_input_tokens_seen": 97576584, "step": 1081 }, { "epoch": 4.970114942528736, "loss": 0.07947252690792084, "loss_ce": 4.7581602302670944e-06, "loss_iou": 0.3515625, "loss_num": 0.015869140625, "loss_xval": 0.07958984375, "num_input_tokens_seen": 97576584, "step": 1081 }, { "epoch": 4.974712643678161, "grad_norm": 5.4733017757114615, "learning_rate": 5e-06, "loss": 0.1519, "num_input_tokens_seen": 97666176, "step": 1082 }, { "epoch": 4.974712643678161, "loss": 0.2110956907272339, "loss_ce": 5.60448916075984e-06, "loss_iou": 0.4765625, "loss_num": 0.042236328125, "loss_xval": 0.2109375, "num_input_tokens_seen": 97666176, "step": 1082 }, { "epoch": 4.979310344827586, "grad_norm": 4.717726419007518, "learning_rate": 5e-06, "loss": 0.1306, "num_input_tokens_seen": 97756556, "step": 1083 }, { "epoch": 4.979310344827586, "loss": 0.12327490001916885, "loss_ce": 1.4395311154657975e-05, "loss_iou": 0.328125, "loss_num": 0.024658203125, "loss_xval": 0.123046875, "num_input_tokens_seen": 97756556, "step": 1083 }, { "epoch": 4.983908045977012, "grad_norm": 6.386820886831165, "learning_rate": 5e-06, "loss": 0.1261, "num_input_tokens_seen": 97845408, "step": 1084 }, { "epoch": 4.983908045977012, "loss": 0.1393074244260788, "loss_ce": 0.00013201337424106896, "loss_iou": 0.376953125, "loss_num": 0.02783203125, "loss_xval": 0.1396484375, "num_input_tokens_seen": 97845408, "step": 1084 }, { "epoch": 4.988505747126437, "grad_norm": 17.923854404624937, "learning_rate": 5e-06, "loss": 0.1309, "num_input_tokens_seen": 97935700, "step": 1085 }, { "epoch": 4.988505747126437, "loss": 0.1198144182562828, "loss_ce": 2.406018211331684e-06, "loss_iou": 0.33203125, "loss_num": 0.0240478515625, "loss_xval": 0.11962890625, "num_input_tokens_seen": 97935700, "step": 1085 }, { "epoch": 4.993103448275862, "grad_norm": 8.89371007698411, "learning_rate": 5e-06, "loss": 0.1061, "num_input_tokens_seen": 98026076, "step": 1086 }, { "epoch": 4.993103448275862, "loss": 0.08179621398448944, "loss_ce": 3.9620990719413385e-05, "loss_iou": 0.373046875, "loss_num": 0.016357421875, "loss_xval": 0.08154296875, "num_input_tokens_seen": 98026076, "step": 1086 }, { "epoch": 4.997701149425287, "grad_norm": 3.2738674103409777, "learning_rate": 5e-06, "loss": 0.1316, "num_input_tokens_seen": 98116464, "step": 1087 }, { "epoch": 4.997701149425287, "loss": 0.1348174810409546, "loss_ce": 2.13292059925152e-05, "loss_iou": 0.341796875, "loss_num": 0.0269775390625, "loss_xval": 0.134765625, "num_input_tokens_seen": 98116464, "step": 1087 }, { "epoch": 4.997701149425287, "loss": 0.11705206334590912, "loss_ce": 0.00010870777623495087, "loss_iou": 0.447265625, "loss_num": 0.0234375, "loss_xval": 0.1171875, "num_input_tokens_seen": 98160852, "step": 1087 }, { "epoch": 5.002298850574713, "grad_norm": 3.097439221083733, "learning_rate": 5e-06, "loss": 0.1079, "num_input_tokens_seen": 98206020, "step": 1088 }, { "epoch": 5.002298850574713, "loss": 0.09875764697790146, "loss_ce": 6.379698606906459e-05, "loss_iou": 0.328125, "loss_num": 0.019775390625, "loss_xval": 0.0986328125, "num_input_tokens_seen": 98206020, "step": 1088 }, { "epoch": 5.006896551724138, "grad_norm": 12.127549215108248, "learning_rate": 5e-06, "loss": 0.1121, "num_input_tokens_seen": 98296448, "step": 1089 }, { "epoch": 5.006896551724138, "loss": 0.1051742285490036, "loss_ce": 5.6425316870445386e-05, "loss_iou": 0.40234375, "loss_num": 0.02099609375, "loss_xval": 0.10498046875, "num_input_tokens_seen": 98296448, "step": 1089 }, { "epoch": 5.011494252873563, "grad_norm": 3.0110884831761777, "learning_rate": 5e-06, "loss": 0.1177, "num_input_tokens_seen": 98386704, "step": 1090 }, { "epoch": 5.011494252873563, "loss": 0.117917500436306, "loss_ce": 1.2841372154070996e-05, "loss_iou": 0.390625, "loss_num": 0.0235595703125, "loss_xval": 0.11767578125, "num_input_tokens_seen": 98386704, "step": 1090 }, { "epoch": 5.016091954022989, "grad_norm": 8.800434556379146, "learning_rate": 5e-06, "loss": 0.1628, "num_input_tokens_seen": 98475456, "step": 1091 }, { "epoch": 5.016091954022989, "loss": 0.15691357851028442, "loss_ce": 0.0011213450925424695, "loss_iou": 0.390625, "loss_num": 0.0311279296875, "loss_xval": 0.15625, "num_input_tokens_seen": 98475456, "step": 1091 }, { "epoch": 5.020689655172414, "grad_norm": 17.875153503898837, "learning_rate": 5e-06, "loss": 0.1127, "num_input_tokens_seen": 98565748, "step": 1092 }, { "epoch": 5.020689655172414, "loss": 0.08784059435129166, "loss_ce": 2.6260444428771734e-05, "loss_iou": 0.3671875, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 98565748, "step": 1092 }, { "epoch": 5.025287356321839, "grad_norm": 1.9310524258242443, "learning_rate": 5e-06, "loss": 0.105, "num_input_tokens_seen": 98656216, "step": 1093 }, { "epoch": 5.025287356321839, "loss": 0.10792776942253113, "loss_ce": 1.7617947378312238e-05, "loss_iou": 0.328125, "loss_num": 0.0216064453125, "loss_xval": 0.10791015625, "num_input_tokens_seen": 98656216, "step": 1093 }, { "epoch": 5.029885057471264, "grad_norm": 5.2811306139564245, "learning_rate": 5e-06, "loss": 0.082, "num_input_tokens_seen": 98746572, "step": 1094 }, { "epoch": 5.029885057471264, "loss": 0.05932975560426712, "loss_ce": 3.58494685315236e-06, "loss_iou": 0.302734375, "loss_num": 0.0118408203125, "loss_xval": 0.059326171875, "num_input_tokens_seen": 98746572, "step": 1094 }, { "epoch": 5.0344827586206895, "grad_norm": 14.24076067000812, "learning_rate": 5e-06, "loss": 0.0862, "num_input_tokens_seen": 98836948, "step": 1095 }, { "epoch": 5.0344827586206895, "loss": 0.08336181938648224, "loss_ce": 4.8835227062227204e-05, "loss_iou": 0.34375, "loss_num": 0.0166015625, "loss_xval": 0.08349609375, "num_input_tokens_seen": 98836948, "step": 1095 }, { "epoch": 5.039080459770115, "grad_norm": 7.5844239875355415, "learning_rate": 5e-06, "loss": 0.1292, "num_input_tokens_seen": 98927324, "step": 1096 }, { "epoch": 5.039080459770115, "loss": 0.16165432333946228, "loss_ce": 3.3243813959416e-05, "loss_iou": 0.271484375, "loss_num": 0.0322265625, "loss_xval": 0.162109375, "num_input_tokens_seen": 98927324, "step": 1096 }, { "epoch": 5.04367816091954, "grad_norm": 8.539898456692784, "learning_rate": 5e-06, "loss": 0.0702, "num_input_tokens_seen": 99017760, "step": 1097 }, { "epoch": 5.04367816091954, "loss": 0.08533644676208496, "loss_ce": 2.455884896335192e-05, "loss_iou": 0.3828125, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 99017760, "step": 1097 }, { "epoch": 5.048275862068966, "grad_norm": 8.297830161901864, "learning_rate": 5e-06, "loss": 0.1184, "num_input_tokens_seen": 99108192, "step": 1098 }, { "epoch": 5.048275862068966, "loss": 0.084136001765728, "loss_ce": 2.95531572191976e-05, "loss_iou": 0.314453125, "loss_num": 0.016845703125, "loss_xval": 0.083984375, "num_input_tokens_seen": 99108192, "step": 1098 }, { "epoch": 5.052873563218391, "grad_norm": 10.472390357500974, "learning_rate": 5e-06, "loss": 0.1182, "num_input_tokens_seen": 99198528, "step": 1099 }, { "epoch": 5.052873563218391, "loss": 0.10827698558568954, "loss_ce": 3.113792627118528e-05, "loss_iou": 0.318359375, "loss_num": 0.021728515625, "loss_xval": 0.1083984375, "num_input_tokens_seen": 99198528, "step": 1099 }, { "epoch": 5.057471264367816, "grad_norm": 9.292431953931642, "learning_rate": 5e-06, "loss": 0.1232, "num_input_tokens_seen": 99288908, "step": 1100 }, { "epoch": 5.057471264367816, "loss": 0.14652323722839355, "loss_ce": 9.990337275667116e-05, "loss_iou": 0.306640625, "loss_num": 0.029296875, "loss_xval": 0.146484375, "num_input_tokens_seen": 99288908, "step": 1100 }, { "epoch": 5.062068965517241, "grad_norm": 6.538074511107595, "learning_rate": 5e-06, "loss": 0.0735, "num_input_tokens_seen": 99379280, "step": 1101 }, { "epoch": 5.062068965517241, "loss": 0.07154995948076248, "loss_ce": 3.2016840123105794e-05, "loss_iou": 0.279296875, "loss_num": 0.0142822265625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 99379280, "step": 1101 }, { "epoch": 5.066666666666666, "grad_norm": 11.332646756842689, "learning_rate": 5e-06, "loss": 0.142, "num_input_tokens_seen": 99469616, "step": 1102 }, { "epoch": 5.066666666666666, "loss": 0.13791075348854065, "loss_ce": 1.82047119778872e-06, "loss_iou": 0.365234375, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 99469616, "step": 1102 }, { "epoch": 5.071264367816092, "grad_norm": 4.595074957103007, "learning_rate": 5e-06, "loss": 0.0588, "num_input_tokens_seen": 99559192, "step": 1103 }, { "epoch": 5.071264367816092, "loss": 0.06943254172801971, "loss_ce": 0.0002797079214360565, "loss_iou": 0.39453125, "loss_num": 0.0137939453125, "loss_xval": 0.0693359375, "num_input_tokens_seen": 99559192, "step": 1103 }, { "epoch": 5.075862068965518, "grad_norm": 9.00228465896505, "learning_rate": 5e-06, "loss": 0.0921, "num_input_tokens_seen": 99649544, "step": 1104 }, { "epoch": 5.075862068965518, "loss": 0.09139812737703323, "loss_ce": 5.6064582167891786e-06, "loss_iou": 0.3203125, "loss_num": 0.018310546875, "loss_xval": 0.09130859375, "num_input_tokens_seen": 99649544, "step": 1104 }, { "epoch": 5.080459770114943, "grad_norm": 6.434548797647199, "learning_rate": 5e-06, "loss": 0.0789, "num_input_tokens_seen": 99739820, "step": 1105 }, { "epoch": 5.080459770114943, "loss": 0.07129809260368347, "loss_ce": 2.4284672690555453e-05, "loss_iou": 0.35546875, "loss_num": 0.01422119140625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 99739820, "step": 1105 }, { "epoch": 5.085057471264368, "grad_norm": 4.039364108519724, "learning_rate": 5e-06, "loss": 0.086, "num_input_tokens_seen": 99830196, "step": 1106 }, { "epoch": 5.085057471264368, "loss": 0.07135014235973358, "loss_ce": 1.5301206076401286e-05, "loss_iou": 0.388671875, "loss_num": 0.0142822265625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 99830196, "step": 1106 }, { "epoch": 5.089655172413793, "grad_norm": 3.655688996317076, "learning_rate": 5e-06, "loss": 0.0962, "num_input_tokens_seen": 99920560, "step": 1107 }, { "epoch": 5.089655172413793, "loss": 0.10698340833187103, "loss_ce": 1.9302660803077742e-05, "loss_iou": 0.345703125, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 99920560, "step": 1107 }, { "epoch": 5.094252873563218, "grad_norm": 2.852608379852805, "learning_rate": 5e-06, "loss": 0.0846, "num_input_tokens_seen": 100010896, "step": 1108 }, { "epoch": 5.094252873563218, "loss": 0.10475088655948639, "loss_ce": 1.4554200788552407e-05, "loss_iou": 0.37890625, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 100010896, "step": 1108 }, { "epoch": 5.098850574712643, "grad_norm": 4.647429106957139, "learning_rate": 5e-06, "loss": 0.0739, "num_input_tokens_seen": 100101200, "step": 1109 }, { "epoch": 5.098850574712643, "loss": 0.0705321729183197, "loss_ce": 6.04609158472158e-06, "loss_iou": 0.36328125, "loss_num": 0.01409912109375, "loss_xval": 0.0703125, "num_input_tokens_seen": 100101200, "step": 1109 }, { "epoch": 5.103448275862069, "grad_norm": 4.542358695843599, "learning_rate": 5e-06, "loss": 0.1007, "num_input_tokens_seen": 100191484, "step": 1110 }, { "epoch": 5.103448275862069, "loss": 0.11061207950115204, "loss_ce": 1.637555760680698e-05, "loss_iou": 0.314453125, "loss_num": 0.0220947265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 100191484, "step": 1110 }, { "epoch": 5.1080459770114945, "grad_norm": 3.826753741184423, "learning_rate": 5e-06, "loss": 0.0499, "num_input_tokens_seen": 100281800, "step": 1111 }, { "epoch": 5.1080459770114945, "loss": 0.06089954450726509, "loss_ce": 1.7182098872581264e-06, "loss_iou": 0.28515625, "loss_num": 0.01220703125, "loss_xval": 0.060791015625, "num_input_tokens_seen": 100281800, "step": 1111 }, { "epoch": 5.11264367816092, "grad_norm": 6.265815731921294, "learning_rate": 5e-06, "loss": 0.1078, "num_input_tokens_seen": 100371368, "step": 1112 }, { "epoch": 5.11264367816092, "loss": 0.12157793343067169, "loss_ce": 1.1157572771480773e-05, "loss_iou": 0.314453125, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 100371368, "step": 1112 }, { "epoch": 5.117241379310345, "grad_norm": 15.357245102652293, "learning_rate": 5e-06, "loss": 0.0944, "num_input_tokens_seen": 100461748, "step": 1113 }, { "epoch": 5.117241379310345, "loss": 0.07990702241659164, "loss_ce": 1.2005009921267629e-05, "loss_iou": 0.234375, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 100461748, "step": 1113 }, { "epoch": 5.12183908045977, "grad_norm": 18.749188171843112, "learning_rate": 5e-06, "loss": 0.0803, "num_input_tokens_seen": 100552144, "step": 1114 }, { "epoch": 5.12183908045977, "loss": 0.08655305206775665, "loss_ce": 5.20814774063183e-06, "loss_iou": 0.359375, "loss_num": 0.017333984375, "loss_xval": 0.08642578125, "num_input_tokens_seen": 100552144, "step": 1114 }, { "epoch": 5.126436781609195, "grad_norm": 11.884415815340965, "learning_rate": 5e-06, "loss": 0.0712, "num_input_tokens_seen": 100642552, "step": 1115 }, { "epoch": 5.126436781609195, "loss": 0.09320227056741714, "loss_ce": 1.5868474747549044e-06, "loss_iou": 0.337890625, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 100642552, "step": 1115 }, { "epoch": 5.13103448275862, "grad_norm": 8.417856730574274, "learning_rate": 5e-06, "loss": 0.1048, "num_input_tokens_seen": 100732936, "step": 1116 }, { "epoch": 5.13103448275862, "loss": 0.10708998143672943, "loss_ce": 1.9059289115830325e-05, "loss_iou": 0.328125, "loss_num": 0.021484375, "loss_xval": 0.10693359375, "num_input_tokens_seen": 100732936, "step": 1116 }, { "epoch": 5.135632183908046, "grad_norm": 10.398734680131996, "learning_rate": 5e-06, "loss": 0.062, "num_input_tokens_seen": 100823412, "step": 1117 }, { "epoch": 5.135632183908046, "loss": 0.0529075488448143, "loss_ce": 3.5846733226208016e-05, "loss_iou": 0.298828125, "loss_num": 0.01055908203125, "loss_xval": 0.052978515625, "num_input_tokens_seen": 100823412, "step": 1117 }, { "epoch": 5.1402298850574715, "grad_norm": 15.371538486553407, "learning_rate": 5e-06, "loss": 0.0618, "num_input_tokens_seen": 100913968, "step": 1118 }, { "epoch": 5.1402298850574715, "loss": 0.05358627811074257, "loss_ce": 8.896154758986086e-05, "loss_iou": 0.345703125, "loss_num": 0.01068115234375, "loss_xval": 0.053466796875, "num_input_tokens_seen": 100913968, "step": 1118 }, { "epoch": 5.144827586206897, "grad_norm": 3.823708765476094, "learning_rate": 5e-06, "loss": 0.1162, "num_input_tokens_seen": 101004252, "step": 1119 }, { "epoch": 5.144827586206897, "loss": 0.08534368127584457, "loss_ce": 1.6531796063645743e-05, "loss_iou": 0.34765625, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 101004252, "step": 1119 }, { "epoch": 5.149425287356322, "grad_norm": 3.7000663900486743, "learning_rate": 5e-06, "loss": 0.0922, "num_input_tokens_seen": 101094664, "step": 1120 }, { "epoch": 5.149425287356322, "loss": 0.09906667470932007, "loss_ce": 0.0002965327585116029, "loss_iou": 0.341796875, "loss_num": 0.019775390625, "loss_xval": 0.0986328125, "num_input_tokens_seen": 101094664, "step": 1120 }, { "epoch": 5.154022988505747, "grad_norm": 10.501783275375674, "learning_rate": 5e-06, "loss": 0.0775, "num_input_tokens_seen": 101185136, "step": 1121 }, { "epoch": 5.154022988505747, "loss": 0.06211956962943077, "loss_ce": 1.6294376109726727e-05, "loss_iou": 0.310546875, "loss_num": 0.012451171875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 101185136, "step": 1121 }, { "epoch": 5.158620689655172, "grad_norm": 5.293184104736612, "learning_rate": 5e-06, "loss": 0.1864, "num_input_tokens_seen": 101275444, "step": 1122 }, { "epoch": 5.158620689655172, "loss": 0.13397647440433502, "loss_ce": 4.307346898713149e-06, "loss_iou": 0.337890625, "loss_num": 0.02685546875, "loss_xval": 0.1337890625, "num_input_tokens_seen": 101275444, "step": 1122 }, { "epoch": 5.163218390804598, "grad_norm": 7.75756649950046, "learning_rate": 5e-06, "loss": 0.0811, "num_input_tokens_seen": 101365108, "step": 1123 }, { "epoch": 5.163218390804598, "loss": 0.07096749544143677, "loss_ce": 1.4126431779004633e-05, "loss_iou": 0.396484375, "loss_num": 0.01422119140625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 101365108, "step": 1123 }, { "epoch": 5.167816091954023, "grad_norm": 7.295865334996816, "learning_rate": 5e-06, "loss": 0.0943, "num_input_tokens_seen": 101455576, "step": 1124 }, { "epoch": 5.167816091954023, "loss": 0.07797445356845856, "loss_ce": 0.00015463109593838453, "loss_iou": 0.451171875, "loss_num": 0.01556396484375, "loss_xval": 0.07763671875, "num_input_tokens_seen": 101455576, "step": 1124 }, { "epoch": 5.172413793103448, "grad_norm": 13.03580556396787, "learning_rate": 5e-06, "loss": 0.1326, "num_input_tokens_seen": 101546000, "step": 1125 }, { "epoch": 5.172413793103448, "loss": 0.15164653956890106, "loss_ce": 4.69675296699279e-06, "loss_iou": 0.37890625, "loss_num": 0.0302734375, "loss_xval": 0.1513671875, "num_input_tokens_seen": 101546000, "step": 1125 }, { "epoch": 5.1770114942528735, "grad_norm": 6.396126737755581, "learning_rate": 5e-06, "loss": 0.1145, "num_input_tokens_seen": 101636448, "step": 1126 }, { "epoch": 5.1770114942528735, "loss": 0.07902251929044724, "loss_ce": 7.354409899562597e-05, "loss_iou": 0.27734375, "loss_num": 0.015869140625, "loss_xval": 0.0791015625, "num_input_tokens_seen": 101636448, "step": 1126 }, { "epoch": 5.181609195402299, "grad_norm": 10.026494787186246, "learning_rate": 5e-06, "loss": 0.1172, "num_input_tokens_seen": 101725992, "step": 1127 }, { "epoch": 5.181609195402299, "loss": 0.15227390825748444, "loss_ce": 6.44809642835753e-06, "loss_iou": 0.376953125, "loss_num": 0.0303955078125, "loss_xval": 0.15234375, "num_input_tokens_seen": 101725992, "step": 1127 }, { "epoch": 5.186206896551724, "grad_norm": 4.0752860409330935, "learning_rate": 5e-06, "loss": 0.0827, "num_input_tokens_seen": 101816380, "step": 1128 }, { "epoch": 5.186206896551724, "loss": 0.06123349815607071, "loss_ce": 1.5233559679472819e-05, "loss_iou": 0.32421875, "loss_num": 0.01226806640625, "loss_xval": 0.061279296875, "num_input_tokens_seen": 101816380, "step": 1128 }, { "epoch": 5.190804597701149, "grad_norm": 2.938814507290908, "learning_rate": 5e-06, "loss": 0.0785, "num_input_tokens_seen": 101906756, "step": 1129 }, { "epoch": 5.190804597701149, "loss": 0.06633394211530685, "loss_ce": 1.9246750525780953e-05, "loss_iou": 0.310546875, "loss_num": 0.01324462890625, "loss_xval": 0.06640625, "num_input_tokens_seen": 101906756, "step": 1129 }, { "epoch": 5.195402298850575, "grad_norm": 4.357895106909357, "learning_rate": 5e-06, "loss": 0.0746, "num_input_tokens_seen": 101997112, "step": 1130 }, { "epoch": 5.195402298850575, "loss": 0.062284551560878754, "loss_ce": 1.343094845651649e-05, "loss_iou": 0.345703125, "loss_num": 0.012451171875, "loss_xval": 0.062255859375, "num_input_tokens_seen": 101997112, "step": 1130 }, { "epoch": 5.2, "grad_norm": 3.8775406744142624, "learning_rate": 5e-06, "loss": 0.0906, "num_input_tokens_seen": 102087380, "step": 1131 }, { "epoch": 5.2, "loss": 0.08111874014139175, "loss_ce": 3.01617683362565e-06, "loss_iou": 0.33984375, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 102087380, "step": 1131 }, { "epoch": 5.204597701149425, "grad_norm": 4.692014408620994, "learning_rate": 5e-06, "loss": 0.087, "num_input_tokens_seen": 102177880, "step": 1132 }, { "epoch": 5.204597701149425, "loss": 0.11302285641431808, "loss_ce": 3.152359931846149e-05, "loss_iou": 0.32421875, "loss_num": 0.0225830078125, "loss_xval": 0.11279296875, "num_input_tokens_seen": 102177880, "step": 1132 }, { "epoch": 5.2091954022988505, "grad_norm": 3.3544978398050005, "learning_rate": 5e-06, "loss": 0.1119, "num_input_tokens_seen": 102267436, "step": 1133 }, { "epoch": 5.2091954022988505, "loss": 0.15970894694328308, "loss_ce": 1.0458235919941217e-05, "loss_iou": 0.35546875, "loss_num": 0.031982421875, "loss_xval": 0.16015625, "num_input_tokens_seen": 102267436, "step": 1133 }, { "epoch": 5.213793103448276, "grad_norm": 2.4937616664035356, "learning_rate": 5e-06, "loss": 0.0925, "num_input_tokens_seen": 102357772, "step": 1134 }, { "epoch": 5.213793103448276, "loss": 0.07588402926921844, "loss_ce": 4.784332122653723e-05, "loss_iou": 0.34765625, "loss_num": 0.01519775390625, "loss_xval": 0.07568359375, "num_input_tokens_seen": 102357772, "step": 1134 }, { "epoch": 5.218390804597701, "grad_norm": 11.78713407810388, "learning_rate": 5e-06, "loss": 0.112, "num_input_tokens_seen": 102448228, "step": 1135 }, { "epoch": 5.218390804597701, "loss": 0.12686412036418915, "loss_ce": 2.5471808839938603e-06, "loss_iou": 0.333984375, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 102448228, "step": 1135 }, { "epoch": 5.222988505747127, "grad_norm": 7.345884023163392, "learning_rate": 5e-06, "loss": 0.0977, "num_input_tokens_seen": 102538620, "step": 1136 }, { "epoch": 5.222988505747127, "loss": 0.07740141451358795, "loss_ce": 2.40960180235561e-05, "loss_iou": 0.373046875, "loss_num": 0.0155029296875, "loss_xval": 0.0771484375, "num_input_tokens_seen": 102538620, "step": 1136 }, { "epoch": 5.227586206896552, "grad_norm": 9.68121463931108, "learning_rate": 5e-06, "loss": 0.1204, "num_input_tokens_seen": 102628180, "step": 1137 }, { "epoch": 5.227586206896552, "loss": 0.09852880239486694, "loss_ce": 1.8055738109978847e-05, "loss_iou": 0.412109375, "loss_num": 0.0196533203125, "loss_xval": 0.0986328125, "num_input_tokens_seen": 102628180, "step": 1137 }, { "epoch": 5.232183908045977, "grad_norm": 3.7617569064718013, "learning_rate": 5e-06, "loss": 0.0771, "num_input_tokens_seen": 102717736, "step": 1138 }, { "epoch": 5.232183908045977, "loss": 0.08675423264503479, "loss_ce": 2.3268486984306946e-05, "loss_iou": 0.32421875, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 102717736, "step": 1138 }, { "epoch": 5.236781609195402, "grad_norm": 3.9125840477144767, "learning_rate": 5e-06, "loss": 0.1054, "num_input_tokens_seen": 102808252, "step": 1139 }, { "epoch": 5.236781609195402, "loss": 0.1034986600279808, "loss_ce": 1.3551202755479608e-05, "loss_iou": 0.328125, "loss_num": 0.020751953125, "loss_xval": 0.103515625, "num_input_tokens_seen": 102808252, "step": 1139 }, { "epoch": 5.241379310344827, "grad_norm": 2.097538565665048, "learning_rate": 5e-06, "loss": 0.0827, "num_input_tokens_seen": 102898628, "step": 1140 }, { "epoch": 5.241379310344827, "loss": 0.06704915314912796, "loss_ce": 2.035472562056384e-06, "loss_iou": 0.296875, "loss_num": 0.013427734375, "loss_xval": 0.06689453125, "num_input_tokens_seen": 102898628, "step": 1140 }, { "epoch": 5.2459770114942526, "grad_norm": 10.818635533541947, "learning_rate": 5e-06, "loss": 0.0848, "num_input_tokens_seen": 102988948, "step": 1141 }, { "epoch": 5.2459770114942526, "loss": 0.07962171733379364, "loss_ce": 2.4237215257016942e-05, "loss_iou": 0.275390625, "loss_num": 0.015869140625, "loss_xval": 0.07958984375, "num_input_tokens_seen": 102988948, "step": 1141 }, { "epoch": 5.250574712643678, "grad_norm": 7.192797833007836, "learning_rate": 5e-06, "loss": 0.0685, "num_input_tokens_seen": 103079352, "step": 1142 }, { "epoch": 5.250574712643678, "loss": 0.08450967818498611, "loss_ce": 6.502830729004927e-06, "loss_iou": 0.34375, "loss_num": 0.0169677734375, "loss_xval": 0.08447265625, "num_input_tokens_seen": 103079352, "step": 1142 }, { "epoch": 5.255172413793104, "grad_norm": 6.327504848695889, "learning_rate": 5e-06, "loss": 0.0917, "num_input_tokens_seen": 103169612, "step": 1143 }, { "epoch": 5.255172413793104, "loss": 0.12516465783119202, "loss_ce": 1.2068423529854044e-05, "loss_iou": 0.28125, "loss_num": 0.0250244140625, "loss_xval": 0.125, "num_input_tokens_seen": 103169612, "step": 1143 }, { "epoch": 5.259770114942529, "grad_norm": 3.5430313631820884, "learning_rate": 5e-06, "loss": 0.0836, "num_input_tokens_seen": 103260048, "step": 1144 }, { "epoch": 5.259770114942529, "loss": 0.06030872091650963, "loss_ce": 5.9857538872165605e-06, "loss_iou": 0.33203125, "loss_num": 0.0120849609375, "loss_xval": 0.060302734375, "num_input_tokens_seen": 103260048, "step": 1144 }, { "epoch": 5.264367816091954, "grad_norm": 15.453671778683573, "learning_rate": 5e-06, "loss": 0.0821, "num_input_tokens_seen": 103350296, "step": 1145 }, { "epoch": 5.264367816091954, "loss": 0.08191235363483429, "loss_ce": 3.175610572725418e-06, "loss_iou": 0.35546875, "loss_num": 0.016357421875, "loss_xval": 0.08203125, "num_input_tokens_seen": 103350296, "step": 1145 }, { "epoch": 5.268965517241379, "grad_norm": 6.718717304064819, "learning_rate": 5e-06, "loss": 0.0728, "num_input_tokens_seen": 103440532, "step": 1146 }, { "epoch": 5.268965517241379, "loss": 0.06786315143108368, "loss_ce": 2.2574939066544175e-05, "loss_iou": 0.40234375, "loss_num": 0.0135498046875, "loss_xval": 0.06787109375, "num_input_tokens_seen": 103440532, "step": 1146 }, { "epoch": 5.273563218390804, "grad_norm": 17.691788722797366, "learning_rate": 5e-06, "loss": 0.1213, "num_input_tokens_seen": 103530900, "step": 1147 }, { "epoch": 5.273563218390804, "loss": 0.11429747939109802, "loss_ce": 9.143472198047675e-06, "loss_iou": 0.38671875, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 103530900, "step": 1147 }, { "epoch": 5.2781609195402295, "grad_norm": 18.892872568107364, "learning_rate": 5e-06, "loss": 0.0927, "num_input_tokens_seen": 103621200, "step": 1148 }, { "epoch": 5.2781609195402295, "loss": 0.05986202508211136, "loss_ce": 1.7945828858501045e-06, "loss_iou": 0.373046875, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 103621200, "step": 1148 }, { "epoch": 5.2827586206896555, "grad_norm": 19.67550320719364, "learning_rate": 5e-06, "loss": 0.1304, "num_input_tokens_seen": 103711424, "step": 1149 }, { "epoch": 5.2827586206896555, "loss": 0.15571549534797668, "loss_ce": 1.4810606444370933e-05, "loss_iou": 0.31640625, "loss_num": 0.0311279296875, "loss_xval": 0.1552734375, "num_input_tokens_seen": 103711424, "step": 1149 }, { "epoch": 5.287356321839081, "grad_norm": 10.959254538309226, "learning_rate": 5e-06, "loss": 0.1328, "num_input_tokens_seen": 103801748, "step": 1150 }, { "epoch": 5.287356321839081, "loss": 0.08585269004106522, "loss_ce": 6.745824521203758e-06, "loss_iou": 0.369140625, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 103801748, "step": 1150 }, { "epoch": 5.291954022988506, "grad_norm": 6.5980592228912345, "learning_rate": 5e-06, "loss": 0.1155, "num_input_tokens_seen": 103892096, "step": 1151 }, { "epoch": 5.291954022988506, "loss": 0.13581782579421997, "loss_ce": 1.4601598195440602e-05, "loss_iou": 0.291015625, "loss_num": 0.0272216796875, "loss_xval": 0.1357421875, "num_input_tokens_seen": 103892096, "step": 1151 }, { "epoch": 5.296551724137931, "grad_norm": 4.876191398306122, "learning_rate": 5e-06, "loss": 0.0575, "num_input_tokens_seen": 103982468, "step": 1152 }, { "epoch": 5.296551724137931, "loss": 0.03845709189772606, "loss_ce": 4.945307409798261e-06, "loss_iou": 0.283203125, "loss_num": 0.0076904296875, "loss_xval": 0.03857421875, "num_input_tokens_seen": 103982468, "step": 1152 }, { "epoch": 5.301149425287356, "grad_norm": 8.328380265990786, "learning_rate": 5e-06, "loss": 0.0661, "num_input_tokens_seen": 104072856, "step": 1153 }, { "epoch": 5.301149425287356, "loss": 0.07374840974807739, "loss_ce": 2.674235474842135e-06, "loss_iou": 0.296875, "loss_num": 0.0147705078125, "loss_xval": 0.07373046875, "num_input_tokens_seen": 104072856, "step": 1153 }, { "epoch": 5.305747126436781, "grad_norm": 8.532773650525401, "learning_rate": 5e-06, "loss": 0.0816, "num_input_tokens_seen": 104162432, "step": 1154 }, { "epoch": 5.305747126436781, "loss": 0.10141049325466156, "loss_ce": 1.5837054888834246e-05, "loss_iou": 0.302734375, "loss_num": 0.020263671875, "loss_xval": 0.1015625, "num_input_tokens_seen": 104162432, "step": 1154 }, { "epoch": 5.310344827586207, "grad_norm": 3.6361735465088287, "learning_rate": 5e-06, "loss": 0.0861, "num_input_tokens_seen": 104252752, "step": 1155 }, { "epoch": 5.310344827586207, "loss": 0.06680650264024734, "loss_ce": 1.8778815501718782e-05, "loss_iou": 0.3671875, "loss_num": 0.01336669921875, "loss_xval": 0.06689453125, "num_input_tokens_seen": 104252752, "step": 1155 }, { "epoch": 5.314942528735632, "grad_norm": 27.691441920507174, "learning_rate": 5e-06, "loss": 0.1182, "num_input_tokens_seen": 104343184, "step": 1156 }, { "epoch": 5.314942528735632, "loss": 0.11418163776397705, "loss_ce": 1.5387857274618e-05, "loss_iou": 0.294921875, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 104343184, "step": 1156 }, { "epoch": 5.319540229885058, "grad_norm": 8.129126905990978, "learning_rate": 5e-06, "loss": 0.1324, "num_input_tokens_seen": 104433600, "step": 1157 }, { "epoch": 5.319540229885058, "loss": 0.1396959125995636, "loss_ce": 4.747589264297858e-05, "loss_iou": 0.306640625, "loss_num": 0.0279541015625, "loss_xval": 0.1396484375, "num_input_tokens_seen": 104433600, "step": 1157 }, { "epoch": 5.324137931034483, "grad_norm": 2.5246901713346284, "learning_rate": 5e-06, "loss": 0.0845, "num_input_tokens_seen": 104524052, "step": 1158 }, { "epoch": 5.324137931034483, "loss": 0.08805723488330841, "loss_ce": 1.4025717064214405e-05, "loss_iou": 0.390625, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 104524052, "step": 1158 }, { "epoch": 5.328735632183908, "grad_norm": 6.032560857969416, "learning_rate": 5e-06, "loss": 0.0991, "num_input_tokens_seen": 104613016, "step": 1159 }, { "epoch": 5.328735632183908, "loss": 0.10382804274559021, "loss_ce": 2.2498687030747533e-05, "loss_iou": 0.28515625, "loss_num": 0.020751953125, "loss_xval": 0.10400390625, "num_input_tokens_seen": 104613016, "step": 1159 }, { "epoch": 5.333333333333333, "grad_norm": 3.524592869861034, "learning_rate": 5e-06, "loss": 0.0834, "num_input_tokens_seen": 104703380, "step": 1160 }, { "epoch": 5.333333333333333, "loss": 0.0802207663655281, "loss_ce": 5.31496243638685e-06, "loss_iou": 0.279296875, "loss_num": 0.01611328125, "loss_xval": 0.080078125, "num_input_tokens_seen": 104703380, "step": 1160 }, { "epoch": 5.337931034482759, "grad_norm": 4.969952758545786, "learning_rate": 5e-06, "loss": 0.0857, "num_input_tokens_seen": 104793836, "step": 1161 }, { "epoch": 5.337931034482759, "loss": 0.09672747552394867, "loss_ce": 2.0068077901669312e-06, "loss_iou": 0.33203125, "loss_num": 0.0194091796875, "loss_xval": 0.0966796875, "num_input_tokens_seen": 104793836, "step": 1161 }, { "epoch": 5.342528735632184, "grad_norm": 6.83000765916347, "learning_rate": 5e-06, "loss": 0.0675, "num_input_tokens_seen": 104884288, "step": 1162 }, { "epoch": 5.342528735632184, "loss": 0.061434537172317505, "loss_ce": 1.7906917491927743e-05, "loss_iou": 0.3203125, "loss_num": 0.01226806640625, "loss_xval": 0.0615234375, "num_input_tokens_seen": 104884288, "step": 1162 }, { "epoch": 5.347126436781609, "grad_norm": 15.595761412741526, "learning_rate": 5e-06, "loss": 0.0816, "num_input_tokens_seen": 104974708, "step": 1163 }, { "epoch": 5.347126436781609, "loss": 0.09743352234363556, "loss_ce": 6.154959919513203e-06, "loss_iou": 0.37109375, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 104974708, "step": 1163 }, { "epoch": 5.3517241379310345, "grad_norm": 9.98309693591081, "learning_rate": 5e-06, "loss": 0.0775, "num_input_tokens_seen": 105064256, "step": 1164 }, { "epoch": 5.3517241379310345, "loss": 0.06634046137332916, "loss_ce": 7.154278864618391e-05, "loss_iou": 0.322265625, "loss_num": 0.01324462890625, "loss_xval": 0.06640625, "num_input_tokens_seen": 105064256, "step": 1164 }, { "epoch": 5.35632183908046, "grad_norm": 14.072679950902984, "learning_rate": 5e-06, "loss": 0.0807, "num_input_tokens_seen": 105154564, "step": 1165 }, { "epoch": 5.35632183908046, "loss": 0.07407604902982712, "loss_ce": 9.88530428003287e-06, "loss_iou": 0.291015625, "loss_num": 0.01483154296875, "loss_xval": 0.07421875, "num_input_tokens_seen": 105154564, "step": 1165 }, { "epoch": 5.360919540229885, "grad_norm": 18.28267664671943, "learning_rate": 5e-06, "loss": 0.0982, "num_input_tokens_seen": 105244844, "step": 1166 }, { "epoch": 5.360919540229885, "loss": 0.12162074446678162, "loss_ce": 8.195744158001617e-06, "loss_iou": 0.42578125, "loss_num": 0.0242919921875, "loss_xval": 0.12158203125, "num_input_tokens_seen": 105244844, "step": 1166 }, { "epoch": 5.36551724137931, "grad_norm": 20.34315476487668, "learning_rate": 5e-06, "loss": 0.1056, "num_input_tokens_seen": 105335136, "step": 1167 }, { "epoch": 5.36551724137931, "loss": 0.08798626065254211, "loss_ce": 1.933791281771846e-05, "loss_iou": 0.384765625, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 105335136, "step": 1167 }, { "epoch": 5.370114942528736, "grad_norm": 3.8998763710651003, "learning_rate": 5e-06, "loss": 0.1586, "num_input_tokens_seen": 105425568, "step": 1168 }, { "epoch": 5.370114942528736, "loss": 0.13412779569625854, "loss_ce": 3.038228669538512e-06, "loss_iou": 0.2890625, "loss_num": 0.02685546875, "loss_xval": 0.1337890625, "num_input_tokens_seen": 105425568, "step": 1168 }, { "epoch": 5.374712643678161, "grad_norm": 3.9686217500508056, "learning_rate": 5e-06, "loss": 0.1032, "num_input_tokens_seen": 105515932, "step": 1169 }, { "epoch": 5.374712643678161, "loss": 0.12314353883266449, "loss_ce": 5.114484793011798e-06, "loss_iou": 0.31640625, "loss_num": 0.024658203125, "loss_xval": 0.123046875, "num_input_tokens_seen": 105515932, "step": 1169 }, { "epoch": 5.379310344827586, "grad_norm": 9.188483207270677, "learning_rate": 5e-06, "loss": 0.0936, "num_input_tokens_seen": 105606460, "step": 1170 }, { "epoch": 5.379310344827586, "loss": 0.0674906075000763, "loss_ce": 3.1501404009759426e-05, "loss_iou": 0.341796875, "loss_num": 0.01348876953125, "loss_xval": 0.0673828125, "num_input_tokens_seen": 105606460, "step": 1170 }, { "epoch": 5.3839080459770114, "grad_norm": 13.62728698765477, "learning_rate": 5e-06, "loss": 0.0687, "num_input_tokens_seen": 105696844, "step": 1171 }, { "epoch": 5.3839080459770114, "loss": 0.05981367826461792, "loss_ce": 1.448656985303387e-05, "loss_iou": 0.30078125, "loss_num": 0.011962890625, "loss_xval": 0.059814453125, "num_input_tokens_seen": 105696844, "step": 1171 }, { "epoch": 5.388505747126437, "grad_norm": 7.114022345551658, "learning_rate": 5e-06, "loss": 0.0833, "num_input_tokens_seen": 105786460, "step": 1172 }, { "epoch": 5.388505747126437, "loss": 0.09999861568212509, "loss_ce": 2.3021595552563667e-05, "loss_iou": 0.3515625, "loss_num": 0.02001953125, "loss_xval": 0.10009765625, "num_input_tokens_seen": 105786460, "step": 1172 }, { "epoch": 5.393103448275862, "grad_norm": 4.472914999467795, "learning_rate": 5e-06, "loss": 0.0697, "num_input_tokens_seen": 105876844, "step": 1173 }, { "epoch": 5.393103448275862, "loss": 0.0646452009677887, "loss_ce": 8.972834621090442e-06, "loss_iou": 0.34375, "loss_num": 0.012939453125, "loss_xval": 0.064453125, "num_input_tokens_seen": 105876844, "step": 1173 }, { "epoch": 5.397701149425288, "grad_norm": 10.796359253655176, "learning_rate": 5e-06, "loss": 0.1288, "num_input_tokens_seen": 105967272, "step": 1174 }, { "epoch": 5.397701149425288, "loss": 0.07185741513967514, "loss_ce": 3.7756042274850188e-06, "loss_iou": 0.37109375, "loss_num": 0.014404296875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 105967272, "step": 1174 }, { "epoch": 5.402298850574713, "grad_norm": 3.5477662420576417, "learning_rate": 5e-06, "loss": 0.1147, "num_input_tokens_seen": 106057604, "step": 1175 }, { "epoch": 5.402298850574713, "loss": 0.07314296066761017, "loss_ce": 5.335675086826086e-05, "loss_iou": 0.35546875, "loss_num": 0.0146484375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 106057604, "step": 1175 }, { "epoch": 5.406896551724138, "grad_norm": 3.654190105765733, "learning_rate": 5e-06, "loss": 0.0883, "num_input_tokens_seen": 106148012, "step": 1176 }, { "epoch": 5.406896551724138, "loss": 0.0644906684756279, "loss_ce": 7.029044809314655e-06, "loss_iou": 0.353515625, "loss_num": 0.01287841796875, "loss_xval": 0.064453125, "num_input_tokens_seen": 106148012, "step": 1176 }, { "epoch": 5.411494252873563, "grad_norm": 2.0857683176392814, "learning_rate": 5e-06, "loss": 0.0829, "num_input_tokens_seen": 106236844, "step": 1177 }, { "epoch": 5.411494252873563, "loss": 0.10485847294330597, "loss_ce": 1.5335266652982682e-05, "loss_iou": 0.41796875, "loss_num": 0.02099609375, "loss_xval": 0.10498046875, "num_input_tokens_seen": 106236844, "step": 1177 }, { "epoch": 5.416091954022988, "grad_norm": 4.7865908582620245, "learning_rate": 5e-06, "loss": 0.058, "num_input_tokens_seen": 106327196, "step": 1178 }, { "epoch": 5.416091954022988, "loss": 0.07083334028720856, "loss_ce": 2.0433958525245544e-06, "loss_iou": 0.400390625, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 106327196, "step": 1178 }, { "epoch": 5.4206896551724135, "grad_norm": 4.381050617741422, "learning_rate": 5e-06, "loss": 0.1109, "num_input_tokens_seen": 106417488, "step": 1179 }, { "epoch": 5.4206896551724135, "loss": 0.1030469685792923, "loss_ce": 4.370722763269441e-06, "loss_iou": 0.255859375, "loss_num": 0.0206298828125, "loss_xval": 0.10302734375, "num_input_tokens_seen": 106417488, "step": 1179 }, { "epoch": 5.425287356321839, "grad_norm": 13.90099727079854, "learning_rate": 5e-06, "loss": 0.0959, "num_input_tokens_seen": 106507752, "step": 1180 }, { "epoch": 5.425287356321839, "loss": 0.11128388345241547, "loss_ce": 1.5395814898511162e-06, "loss_iou": 0.298828125, "loss_num": 0.022216796875, "loss_xval": 0.111328125, "num_input_tokens_seen": 106507752, "step": 1180 }, { "epoch": 5.429885057471265, "grad_norm": 3.514125111179275, "learning_rate": 5e-06, "loss": 0.0897, "num_input_tokens_seen": 106598104, "step": 1181 }, { "epoch": 5.429885057471265, "loss": 0.05190886929631233, "loss_ce": 1.372814949718304e-05, "loss_iou": 0.2490234375, "loss_num": 0.0103759765625, "loss_xval": 0.052001953125, "num_input_tokens_seen": 106598104, "step": 1181 }, { "epoch": 5.43448275862069, "grad_norm": 7.033128839020526, "learning_rate": 5e-06, "loss": 0.1021, "num_input_tokens_seen": 106688460, "step": 1182 }, { "epoch": 5.43448275862069, "loss": 0.10943891108036041, "loss_ce": 2.8767924504791154e-06, "loss_iou": 0.296875, "loss_num": 0.0218505859375, "loss_xval": 0.109375, "num_input_tokens_seen": 106688460, "step": 1182 }, { "epoch": 5.439080459770115, "grad_norm": 7.6220799806368005, "learning_rate": 5e-06, "loss": 0.1275, "num_input_tokens_seen": 106778892, "step": 1183 }, { "epoch": 5.439080459770115, "loss": 0.13684214651584625, "loss_ce": 3.1839987059356645e-05, "loss_iou": 0.30078125, "loss_num": 0.02734375, "loss_xval": 0.13671875, "num_input_tokens_seen": 106778892, "step": 1183 }, { "epoch": 5.44367816091954, "grad_norm": 3.201233161347327, "learning_rate": 5e-06, "loss": 0.1166, "num_input_tokens_seen": 106869232, "step": 1184 }, { "epoch": 5.44367816091954, "loss": 0.11746550351381302, "loss_ce": 3.350429324200377e-06, "loss_iou": 0.33203125, "loss_num": 0.0234375, "loss_xval": 0.11767578125, "num_input_tokens_seen": 106869232, "step": 1184 }, { "epoch": 5.448275862068965, "grad_norm": 27.107960993966334, "learning_rate": 5e-06, "loss": 0.0706, "num_input_tokens_seen": 106959604, "step": 1185 }, { "epoch": 5.448275862068965, "loss": 0.08980852365493774, "loss_ce": 2.580695945653133e-05, "loss_iou": 0.3359375, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 106959604, "step": 1185 }, { "epoch": 5.4528735632183905, "grad_norm": 5.056052589774106, "learning_rate": 5e-06, "loss": 0.0971, "num_input_tokens_seen": 107049260, "step": 1186 }, { "epoch": 5.4528735632183905, "loss": 0.0906330794095993, "loss_ce": 1.1129596714454237e-05, "loss_iou": 0.396484375, "loss_num": 0.01806640625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 107049260, "step": 1186 }, { "epoch": 5.4574712643678165, "grad_norm": 12.077013452361433, "learning_rate": 5e-06, "loss": 0.1106, "num_input_tokens_seen": 107139660, "step": 1187 }, { "epoch": 5.4574712643678165, "loss": 0.08940693736076355, "loss_ce": 2.0952680642949417e-05, "loss_iou": 0.392578125, "loss_num": 0.017822265625, "loss_xval": 0.08935546875, "num_input_tokens_seen": 107139660, "step": 1187 }, { "epoch": 5.462068965517242, "grad_norm": 5.117358275086491, "learning_rate": 5e-06, "loss": 0.1005, "num_input_tokens_seen": 107229976, "step": 1188 }, { "epoch": 5.462068965517242, "loss": 0.1243157610297203, "loss_ce": 0.0007195670041255653, "loss_iou": 0.36328125, "loss_num": 0.0247802734375, "loss_xval": 0.12353515625, "num_input_tokens_seen": 107229976, "step": 1188 }, { "epoch": 5.466666666666667, "grad_norm": 3.84883008143984, "learning_rate": 5e-06, "loss": 0.063, "num_input_tokens_seen": 107320400, "step": 1189 }, { "epoch": 5.466666666666667, "loss": 0.04762866720557213, "loss_ce": 5.176388003746979e-05, "loss_iou": 0.30859375, "loss_num": 0.009521484375, "loss_xval": 0.047607421875, "num_input_tokens_seen": 107320400, "step": 1189 }, { "epoch": 5.471264367816092, "grad_norm": 11.839810250471555, "learning_rate": 5e-06, "loss": 0.1302, "num_input_tokens_seen": 107410776, "step": 1190 }, { "epoch": 5.471264367816092, "loss": 0.15773043036460876, "loss_ce": 1.558406620461028e-05, "loss_iou": 0.357421875, "loss_num": 0.031494140625, "loss_xval": 0.158203125, "num_input_tokens_seen": 107410776, "step": 1190 }, { "epoch": 5.475862068965517, "grad_norm": 3.3411639885164677, "learning_rate": 5e-06, "loss": 0.0892, "num_input_tokens_seen": 107500968, "step": 1191 }, { "epoch": 5.475862068965517, "loss": 0.08212100714445114, "loss_ce": 0.00012027601769659668, "loss_iou": 0.310546875, "loss_num": 0.016357421875, "loss_xval": 0.08203125, "num_input_tokens_seen": 107500968, "step": 1191 }, { "epoch": 5.480459770114942, "grad_norm": 2.668518928020011, "learning_rate": 5e-06, "loss": 0.0891, "num_input_tokens_seen": 107591340, "step": 1192 }, { "epoch": 5.480459770114942, "loss": 0.11270363628864288, "loss_ce": 6.325638969428837e-05, "loss_iou": 0.3515625, "loss_num": 0.0225830078125, "loss_xval": 0.11279296875, "num_input_tokens_seen": 107591340, "step": 1192 }, { "epoch": 5.485057471264367, "grad_norm": 3.423089746700433, "learning_rate": 5e-06, "loss": 0.0862, "num_input_tokens_seen": 107681688, "step": 1193 }, { "epoch": 5.485057471264367, "loss": 0.06189355626702309, "loss_ce": 3.9072983781807125e-06, "loss_iou": 0.30859375, "loss_num": 0.01239013671875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 107681688, "step": 1193 }, { "epoch": 5.489655172413793, "grad_norm": 10.782127348669686, "learning_rate": 5e-06, "loss": 0.0853, "num_input_tokens_seen": 107772020, "step": 1194 }, { "epoch": 5.489655172413793, "loss": 0.10825426876544952, "loss_ce": 3.8937610952416435e-05, "loss_iou": 0.33984375, "loss_num": 0.0216064453125, "loss_xval": 0.1083984375, "num_input_tokens_seen": 107772020, "step": 1194 }, { "epoch": 5.494252873563219, "grad_norm": 10.321202311466154, "learning_rate": 5e-06, "loss": 0.058, "num_input_tokens_seen": 107862420, "step": 1195 }, { "epoch": 5.494252873563219, "loss": 0.06930500268936157, "loss_ce": 1.4843457392998971e-05, "loss_iou": 0.41015625, "loss_num": 0.01385498046875, "loss_xval": 0.0693359375, "num_input_tokens_seen": 107862420, "step": 1195 }, { "epoch": 5.498850574712644, "grad_norm": 1.7063662050581758, "learning_rate": 5e-06, "loss": 0.1219, "num_input_tokens_seen": 107952696, "step": 1196 }, { "epoch": 5.498850574712644, "loss": 0.09334798157215118, "loss_ce": 9.968431186280213e-06, "loss_iou": 0.328125, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 107952696, "step": 1196 }, { "epoch": 5.503448275862069, "grad_norm": 5.082277526279771, "learning_rate": 5e-06, "loss": 0.1014, "num_input_tokens_seen": 108043016, "step": 1197 }, { "epoch": 5.503448275862069, "loss": 0.0524345263838768, "loss_ce": 5.3265985116013326e-06, "loss_iou": 0.28125, "loss_num": 0.010498046875, "loss_xval": 0.052490234375, "num_input_tokens_seen": 108043016, "step": 1197 }, { "epoch": 5.508045977011494, "grad_norm": 3.926403547610472, "learning_rate": 5e-06, "loss": 0.0637, "num_input_tokens_seen": 108133392, "step": 1198 }, { "epoch": 5.508045977011494, "loss": 0.055503346025943756, "loss_ce": 7.1315121203952e-06, "loss_iou": 0.345703125, "loss_num": 0.0111083984375, "loss_xval": 0.055419921875, "num_input_tokens_seen": 108133392, "step": 1198 }, { "epoch": 5.512643678160919, "grad_norm": 19.466891089230415, "learning_rate": 5e-06, "loss": 0.1123, "num_input_tokens_seen": 108223928, "step": 1199 }, { "epoch": 5.512643678160919, "loss": 0.1550014615058899, "loss_ce": 3.3197462471434847e-05, "loss_iou": 0.3515625, "loss_num": 0.031005859375, "loss_xval": 0.1552734375, "num_input_tokens_seen": 108223928, "step": 1199 }, { "epoch": 5.517241379310345, "grad_norm": 8.883212548201756, "learning_rate": 5e-06, "loss": 0.0952, "num_input_tokens_seen": 108314296, "step": 1200 }, { "epoch": 5.517241379310345, "loss": 0.11142435669898987, "loss_ce": 1.9941233404097147e-05, "loss_iou": 0.39453125, "loss_num": 0.022216796875, "loss_xval": 0.111328125, "num_input_tokens_seen": 108314296, "step": 1200 }, { "epoch": 5.52183908045977, "grad_norm": 4.261788928644466, "learning_rate": 5e-06, "loss": 0.0971, "num_input_tokens_seen": 108404724, "step": 1201 }, { "epoch": 5.52183908045977, "loss": 0.09745465219020844, "loss_ce": 1.2026385775243398e-05, "loss_iou": 0.439453125, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 108404724, "step": 1201 }, { "epoch": 5.5264367816091955, "grad_norm": 2.987130600386024, "learning_rate": 5e-06, "loss": 0.1135, "num_input_tokens_seen": 108495024, "step": 1202 }, { "epoch": 5.5264367816091955, "loss": 0.15418361127376556, "loss_ce": 6.983517960179597e-05, "loss_iou": 0.283203125, "loss_num": 0.03076171875, "loss_xval": 0.154296875, "num_input_tokens_seen": 108495024, "step": 1202 }, { "epoch": 5.531034482758621, "grad_norm": 2.3653856974799323, "learning_rate": 5e-06, "loss": 0.0675, "num_input_tokens_seen": 108585416, "step": 1203 }, { "epoch": 5.531034482758621, "loss": 0.06593763828277588, "loss_ce": 1.96732235053787e-05, "loss_iou": 0.318359375, "loss_num": 0.01318359375, "loss_xval": 0.06591796875, "num_input_tokens_seen": 108585416, "step": 1203 }, { "epoch": 5.535632183908046, "grad_norm": 3.330630153861718, "learning_rate": 5e-06, "loss": 0.0991, "num_input_tokens_seen": 108675664, "step": 1204 }, { "epoch": 5.535632183908046, "loss": 0.11529748141765594, "loss_ce": 2.066693014057819e-06, "loss_iou": 0.302734375, "loss_num": 0.0230712890625, "loss_xval": 0.115234375, "num_input_tokens_seen": 108675664, "step": 1204 }, { "epoch": 5.540229885057471, "grad_norm": 6.486089157651149, "learning_rate": 5e-06, "loss": 0.0872, "num_input_tokens_seen": 108765884, "step": 1205 }, { "epoch": 5.540229885057471, "loss": 0.07939766347408295, "loss_ce": 6.185707206896041e-06, "loss_iou": 0.302734375, "loss_num": 0.015869140625, "loss_xval": 0.07958984375, "num_input_tokens_seen": 108765884, "step": 1205 }, { "epoch": 5.544827586206896, "grad_norm": 5.30690182772674, "learning_rate": 5e-06, "loss": 0.0865, "num_input_tokens_seen": 108856308, "step": 1206 }, { "epoch": 5.544827586206896, "loss": 0.07844828069210052, "loss_ce": 1.810871071938891e-05, "loss_iou": 0.44921875, "loss_num": 0.015625, "loss_xval": 0.07861328125, "num_input_tokens_seen": 108856308, "step": 1206 }, { "epoch": 5.549425287356322, "grad_norm": 3.5054972022904183, "learning_rate": 5e-06, "loss": 0.0758, "num_input_tokens_seen": 108946648, "step": 1207 }, { "epoch": 5.549425287356322, "loss": 0.07729589194059372, "loss_ce": 1.0129077963938471e-05, "loss_iou": 0.287109375, "loss_num": 0.0155029296875, "loss_xval": 0.0771484375, "num_input_tokens_seen": 108946648, "step": 1207 }, { "epoch": 5.554022988505747, "grad_norm": 9.78898145858432, "learning_rate": 5e-06, "loss": 0.0716, "num_input_tokens_seen": 109037112, "step": 1208 }, { "epoch": 5.554022988505747, "loss": 0.07796503603458405, "loss_ce": 7.88130364526296e-06, "loss_iou": 0.30859375, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 109037112, "step": 1208 }, { "epoch": 5.558620689655172, "grad_norm": 4.85707186606737, "learning_rate": 5e-06, "loss": 0.109, "num_input_tokens_seen": 109126120, "step": 1209 }, { "epoch": 5.558620689655172, "loss": 0.12425880879163742, "loss_ce": 2.1751711756223813e-05, "loss_iou": 0.365234375, "loss_num": 0.0247802734375, "loss_xval": 0.1240234375, "num_input_tokens_seen": 109126120, "step": 1209 }, { "epoch": 5.563218390804598, "grad_norm": 8.786051137164725, "learning_rate": 5e-06, "loss": 0.0709, "num_input_tokens_seen": 109216380, "step": 1210 }, { "epoch": 5.563218390804598, "loss": 0.0687236338853836, "loss_ce": 2.855926322808955e-05, "loss_iou": 0.3671875, "loss_num": 0.01373291015625, "loss_xval": 0.06884765625, "num_input_tokens_seen": 109216380, "step": 1210 }, { "epoch": 5.567816091954023, "grad_norm": 3.482256798617991, "learning_rate": 5e-06, "loss": 0.0865, "num_input_tokens_seen": 109306644, "step": 1211 }, { "epoch": 5.567816091954023, "loss": 0.0845988318324089, "loss_ce": 4.1002945181389805e-06, "loss_iou": 0.310546875, "loss_num": 0.016845703125, "loss_xval": 0.08447265625, "num_input_tokens_seen": 109306644, "step": 1211 }, { "epoch": 5.572413793103449, "grad_norm": 4.264015760408713, "learning_rate": 5e-06, "loss": 0.0953, "num_input_tokens_seen": 109397144, "step": 1212 }, { "epoch": 5.572413793103449, "loss": 0.07220568507909775, "loss_ce": 1.6351415979443118e-05, "loss_iou": 0.35546875, "loss_num": 0.01446533203125, "loss_xval": 0.072265625, "num_input_tokens_seen": 109397144, "step": 1212 }, { "epoch": 5.577011494252874, "grad_norm": 3.1255568749212816, "learning_rate": 5e-06, "loss": 0.1222, "num_input_tokens_seen": 109487456, "step": 1213 }, { "epoch": 5.577011494252874, "loss": 0.052804555743932724, "loss_ce": 9.145278454525396e-06, "loss_iou": 0.3046875, "loss_num": 0.01055908203125, "loss_xval": 0.052734375, "num_input_tokens_seen": 109487456, "step": 1213 }, { "epoch": 5.581609195402299, "grad_norm": 3.94664764195231, "learning_rate": 5e-06, "loss": 0.067, "num_input_tokens_seen": 109577820, "step": 1214 }, { "epoch": 5.581609195402299, "loss": 0.04463765025138855, "loss_ce": 5.693935236195102e-06, "loss_iou": 0.314453125, "loss_num": 0.0089111328125, "loss_xval": 0.044677734375, "num_input_tokens_seen": 109577820, "step": 1214 }, { "epoch": 5.586206896551724, "grad_norm": 8.212489399334382, "learning_rate": 5e-06, "loss": 0.0566, "num_input_tokens_seen": 109668260, "step": 1215 }, { "epoch": 5.586206896551724, "loss": 0.06496123224496841, "loss_ce": 5.034160494687967e-05, "loss_iou": 0.34765625, "loss_num": 0.01300048828125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 109668260, "step": 1215 }, { "epoch": 5.590804597701149, "grad_norm": 9.692768417013069, "learning_rate": 5e-06, "loss": 0.0697, "num_input_tokens_seen": 109758576, "step": 1216 }, { "epoch": 5.590804597701149, "loss": 0.07001283019781113, "loss_ce": 5.50604636373464e-06, "loss_iou": 0.296875, "loss_num": 0.01397705078125, "loss_xval": 0.06982421875, "num_input_tokens_seen": 109758576, "step": 1216 }, { "epoch": 5.5954022988505745, "grad_norm": 8.222517521217215, "learning_rate": 5e-06, "loss": 0.0974, "num_input_tokens_seen": 109848992, "step": 1217 }, { "epoch": 5.5954022988505745, "loss": 0.09305752068758011, "loss_ce": 2.4687436962267384e-05, "loss_iou": 0.359375, "loss_num": 0.0185546875, "loss_xval": 0.09326171875, "num_input_tokens_seen": 109848992, "step": 1217 }, { "epoch": 5.6, "grad_norm": 14.945824448106285, "learning_rate": 5e-06, "loss": 0.0871, "num_input_tokens_seen": 109939364, "step": 1218 }, { "epoch": 5.6, "loss": 0.09159138798713684, "loss_ce": 8.140663339872845e-06, "loss_iou": 0.3828125, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 109939364, "step": 1218 }, { "epoch": 5.604597701149425, "grad_norm": 28.416530398349355, "learning_rate": 5e-06, "loss": 0.0836, "num_input_tokens_seen": 110029792, "step": 1219 }, { "epoch": 5.604597701149425, "loss": 0.046073831617832184, "loss_ce": 7.547159839305095e-06, "loss_iou": 0.3984375, "loss_num": 0.00921630859375, "loss_xval": 0.046142578125, "num_input_tokens_seen": 110029792, "step": 1219 }, { "epoch": 5.609195402298851, "grad_norm": 12.008787286117224, "learning_rate": 5e-06, "loss": 0.0934, "num_input_tokens_seen": 110120336, "step": 1220 }, { "epoch": 5.609195402298851, "loss": 0.11564863473176956, "loss_ce": 4.804190029972233e-05, "loss_iou": 0.388671875, "loss_num": 0.0230712890625, "loss_xval": 0.11572265625, "num_input_tokens_seen": 110120336, "step": 1220 }, { "epoch": 5.613793103448276, "grad_norm": 22.46676513633078, "learning_rate": 5e-06, "loss": 0.104, "num_input_tokens_seen": 110210540, "step": 1221 }, { "epoch": 5.613793103448276, "loss": 0.07196103781461716, "loss_ce": 5.928850441705436e-07, "loss_iou": 0.390625, "loss_num": 0.014404296875, "loss_xval": 0.07177734375, "num_input_tokens_seen": 110210540, "step": 1221 }, { "epoch": 5.618390804597701, "grad_norm": 23.841340782969, "learning_rate": 5e-06, "loss": 0.1003, "num_input_tokens_seen": 110299420, "step": 1222 }, { "epoch": 5.618390804597701, "loss": 0.11307486146688461, "loss_ce": 7.235145858430769e-06, "loss_iou": 0.37109375, "loss_num": 0.0225830078125, "loss_xval": 0.11328125, "num_input_tokens_seen": 110299420, "step": 1222 }, { "epoch": 5.622988505747126, "grad_norm": 7.337714001096004, "learning_rate": 5e-06, "loss": 0.088, "num_input_tokens_seen": 110389696, "step": 1223 }, { "epoch": 5.622988505747126, "loss": 0.07472731918096542, "loss_ce": 6.606188981095329e-05, "loss_iou": 0.388671875, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 110389696, "step": 1223 }, { "epoch": 5.627586206896551, "grad_norm": 11.251552468198575, "learning_rate": 5e-06, "loss": 0.1194, "num_input_tokens_seen": 110480088, "step": 1224 }, { "epoch": 5.627586206896551, "loss": 0.0974973812699318, "loss_ce": 1.3505153901860467e-06, "loss_iou": 0.404296875, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 110480088, "step": 1224 }, { "epoch": 5.6321839080459775, "grad_norm": 5.577183353924994, "learning_rate": 5e-06, "loss": 0.0834, "num_input_tokens_seen": 110570532, "step": 1225 }, { "epoch": 5.6321839080459775, "loss": 0.10643593221902847, "loss_ce": 2.1139007003512233e-05, "loss_iou": 0.30078125, "loss_num": 0.021240234375, "loss_xval": 0.1064453125, "num_input_tokens_seen": 110570532, "step": 1225 }, { "epoch": 5.636781609195403, "grad_norm": 6.701001289894439, "learning_rate": 5e-06, "loss": 0.1014, "num_input_tokens_seen": 110659360, "step": 1226 }, { "epoch": 5.636781609195403, "loss": 0.1065259799361229, "loss_ce": 5.0150600145570934e-05, "loss_iou": 0.291015625, "loss_num": 0.0213623046875, "loss_xval": 0.1064453125, "num_input_tokens_seen": 110659360, "step": 1226 }, { "epoch": 5.641379310344828, "grad_norm": 5.386629642258452, "learning_rate": 5e-06, "loss": 0.0618, "num_input_tokens_seen": 110749656, "step": 1227 }, { "epoch": 5.641379310344828, "loss": 0.07365281879901886, "loss_ce": 1.3907001630286686e-05, "loss_iou": 0.27734375, "loss_num": 0.01470947265625, "loss_xval": 0.07373046875, "num_input_tokens_seen": 110749656, "step": 1227 }, { "epoch": 5.645977011494253, "grad_norm": 5.0480129670553175, "learning_rate": 5e-06, "loss": 0.0804, "num_input_tokens_seen": 110839976, "step": 1228 }, { "epoch": 5.645977011494253, "loss": 0.09352147579193115, "loss_ce": 1.5612586139468476e-05, "loss_iou": 0.296875, "loss_num": 0.0186767578125, "loss_xval": 0.09375, "num_input_tokens_seen": 110839976, "step": 1228 }, { "epoch": 5.650574712643678, "grad_norm": 3.2142834138747904, "learning_rate": 5e-06, "loss": 0.1148, "num_input_tokens_seen": 110930364, "step": 1229 }, { "epoch": 5.650574712643678, "loss": 0.1116681694984436, "loss_ce": 4.347142748883925e-06, "loss_iou": 0.427734375, "loss_num": 0.0223388671875, "loss_xval": 0.11181640625, "num_input_tokens_seen": 110930364, "step": 1229 }, { "epoch": 5.655172413793103, "grad_norm": 4.682890336305807, "learning_rate": 5e-06, "loss": 0.1299, "num_input_tokens_seen": 111020076, "step": 1230 }, { "epoch": 5.655172413793103, "loss": 0.14498014748096466, "loss_ce": 6.385195320035564e-06, "loss_iou": 0.29296875, "loss_num": 0.0289306640625, "loss_xval": 0.14453125, "num_input_tokens_seen": 111020076, "step": 1230 }, { "epoch": 5.659770114942528, "grad_norm": 7.235632320302035, "learning_rate": 5e-06, "loss": 0.0682, "num_input_tokens_seen": 111110316, "step": 1231 }, { "epoch": 5.659770114942528, "loss": 0.0729096531867981, "loss_ce": 3.36788289132528e-05, "loss_iou": 0.333984375, "loss_num": 0.01458740234375, "loss_xval": 0.07275390625, "num_input_tokens_seen": 111110316, "step": 1231 }, { "epoch": 5.664367816091954, "grad_norm": 2.3169355950035992, "learning_rate": 5e-06, "loss": 0.0644, "num_input_tokens_seen": 111200728, "step": 1232 }, { "epoch": 5.664367816091954, "loss": 0.07670523226261139, "loss_ce": 1.4556415408151224e-05, "loss_iou": 0.3359375, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 111200728, "step": 1232 }, { "epoch": 5.6689655172413795, "grad_norm": 9.054517929993095, "learning_rate": 5e-06, "loss": 0.1226, "num_input_tokens_seen": 111290364, "step": 1233 }, { "epoch": 5.6689655172413795, "loss": 0.13384370505809784, "loss_ce": 2.4130109522957355e-05, "loss_iou": 0.345703125, "loss_num": 0.0267333984375, "loss_xval": 0.1337890625, "num_input_tokens_seen": 111290364, "step": 1233 }, { "epoch": 5.673563218390805, "grad_norm": 2.4147250325583536, "learning_rate": 5e-06, "loss": 0.08, "num_input_tokens_seen": 111380760, "step": 1234 }, { "epoch": 5.673563218390805, "loss": 0.08226744830608368, "loss_ce": 7.31718364477274e-06, "loss_iou": 0.2353515625, "loss_num": 0.0164794921875, "loss_xval": 0.08203125, "num_input_tokens_seen": 111380760, "step": 1234 }, { "epoch": 5.67816091954023, "grad_norm": 4.243893457772928, "learning_rate": 5e-06, "loss": 0.0735, "num_input_tokens_seen": 111471116, "step": 1235 }, { "epoch": 5.67816091954023, "loss": 0.07256424427032471, "loss_ce": 8.707445886102505e-06, "loss_iou": 0.291015625, "loss_num": 0.0145263671875, "loss_xval": 0.07275390625, "num_input_tokens_seen": 111471116, "step": 1235 }, { "epoch": 5.682758620689655, "grad_norm": 1.8993669782662472, "learning_rate": 5e-06, "loss": 0.1057, "num_input_tokens_seen": 111561436, "step": 1236 }, { "epoch": 5.682758620689655, "loss": 0.13686691224575043, "loss_ce": 3.2013495001592673e-06, "loss_iou": 0.28125, "loss_num": 0.0274658203125, "loss_xval": 0.13671875, "num_input_tokens_seen": 111561436, "step": 1236 }, { "epoch": 5.68735632183908, "grad_norm": 4.878685087840704, "learning_rate": 5e-06, "loss": 0.0888, "num_input_tokens_seen": 111651816, "step": 1237 }, { "epoch": 5.68735632183908, "loss": 0.07835354655981064, "loss_ce": 1.4925600225978997e-05, "loss_iou": 0.283203125, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 111651816, "step": 1237 }, { "epoch": 5.691954022988506, "grad_norm": 8.928325096054786, "learning_rate": 5e-06, "loss": 0.1009, "num_input_tokens_seen": 111742112, "step": 1238 }, { "epoch": 5.691954022988506, "loss": 0.12431655079126358, "loss_ce": 1.8453329175827093e-05, "loss_iou": 0.408203125, "loss_num": 0.0247802734375, "loss_xval": 0.12451171875, "num_input_tokens_seen": 111742112, "step": 1238 }, { "epoch": 5.696551724137931, "grad_norm": 4.857562908474219, "learning_rate": 5e-06, "loss": 0.1258, "num_input_tokens_seen": 111832440, "step": 1239 }, { "epoch": 5.696551724137931, "loss": 0.10367751866579056, "loss_ce": 9.310015229857527e-06, "loss_iou": 0.359375, "loss_num": 0.020751953125, "loss_xval": 0.103515625, "num_input_tokens_seen": 111832440, "step": 1239 }, { "epoch": 5.7011494252873565, "grad_norm": 2.1347164816602997, "learning_rate": 5e-06, "loss": 0.0597, "num_input_tokens_seen": 111922904, "step": 1240 }, { "epoch": 5.7011494252873565, "loss": 0.06786856055259705, "loss_ce": 1.2722593055514153e-05, "loss_iou": 0.341796875, "loss_num": 0.0135498046875, "loss_xval": 0.06787109375, "num_input_tokens_seen": 111922904, "step": 1240 }, { "epoch": 5.705747126436782, "grad_norm": 8.531354475094487, "learning_rate": 5e-06, "loss": 0.0715, "num_input_tokens_seen": 112013212, "step": 1241 }, { "epoch": 5.705747126436782, "loss": 0.061662279069423676, "loss_ce": 1.5145051293075085e-06, "loss_iou": 0.35546875, "loss_num": 0.0123291015625, "loss_xval": 0.061767578125, "num_input_tokens_seen": 112013212, "step": 1241 }, { "epoch": 5.710344827586207, "grad_norm": 18.82697007656036, "learning_rate": 5e-06, "loss": 0.1572, "num_input_tokens_seen": 112103552, "step": 1242 }, { "epoch": 5.710344827586207, "loss": 0.1454518437385559, "loss_ce": 5.055922429164639e-06, "loss_iou": 0.267578125, "loss_num": 0.029052734375, "loss_xval": 0.1455078125, "num_input_tokens_seen": 112103552, "step": 1242 }, { "epoch": 5.714942528735632, "grad_norm": 5.439005001756043, "learning_rate": 5e-06, "loss": 0.0997, "num_input_tokens_seen": 112193936, "step": 1243 }, { "epoch": 5.714942528735632, "loss": 0.04381529614329338, "loss_ce": 3.7829264329047874e-05, "loss_iou": 0.236328125, "loss_num": 0.00872802734375, "loss_xval": 0.043701171875, "num_input_tokens_seen": 112193936, "step": 1243 }, { "epoch": 5.719540229885057, "grad_norm": 18.879016825135775, "learning_rate": 5e-06, "loss": 0.0826, "num_input_tokens_seen": 112284184, "step": 1244 }, { "epoch": 5.719540229885057, "loss": 0.10435809940099716, "loss_ce": 1.8497697965358384e-05, "loss_iou": 0.33984375, "loss_num": 0.0208740234375, "loss_xval": 0.1044921875, "num_input_tokens_seen": 112284184, "step": 1244 }, { "epoch": 5.724137931034483, "grad_norm": 4.028482212688753, "learning_rate": 5e-06, "loss": 0.1301, "num_input_tokens_seen": 112374556, "step": 1245 }, { "epoch": 5.724137931034483, "loss": 0.1041383296251297, "loss_ce": 1.2350090401014313e-05, "loss_iou": 0.388671875, "loss_num": 0.0208740234375, "loss_xval": 0.10400390625, "num_input_tokens_seen": 112374556, "step": 1245 }, { "epoch": 5.728735632183908, "grad_norm": 6.123310827199504, "learning_rate": 5e-06, "loss": 0.0711, "num_input_tokens_seen": 112464124, "step": 1246 }, { "epoch": 5.728735632183908, "loss": 0.0810786560177803, "loss_ce": 8.710509973752778e-06, "loss_iou": 0.294921875, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 112464124, "step": 1246 }, { "epoch": 5.733333333333333, "grad_norm": 6.108386978174192, "learning_rate": 5e-06, "loss": 0.1032, "num_input_tokens_seen": 112554496, "step": 1247 }, { "epoch": 5.733333333333333, "loss": 0.06712520867586136, "loss_ce": 1.7052239854820073e-05, "loss_iou": 0.32421875, "loss_num": 0.013427734375, "loss_xval": 0.06689453125, "num_input_tokens_seen": 112554496, "step": 1247 }, { "epoch": 5.7379310344827585, "grad_norm": 6.640331304354758, "learning_rate": 5e-06, "loss": 0.0834, "num_input_tokens_seen": 112644988, "step": 1248 }, { "epoch": 5.7379310344827585, "loss": 0.10246039181947708, "loss_ce": 2.814024264807813e-05, "loss_iou": 0.3671875, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 112644988, "step": 1248 }, { "epoch": 5.742528735632184, "grad_norm": 14.848833163560919, "learning_rate": 5e-06, "loss": 0.0836, "num_input_tokens_seen": 112735364, "step": 1249 }, { "epoch": 5.742528735632184, "loss": 0.0761798620223999, "loss_ce": 7.98174187366385e-06, "loss_iou": 0.330078125, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 112735364, "step": 1249 }, { "epoch": 5.747126436781609, "grad_norm": 11.72709348916867, "learning_rate": 5e-06, "loss": 0.0848, "num_input_tokens_seen": 112825776, "step": 1250 }, { "epoch": 5.747126436781609, "eval_seeclick_CIoU": 0.4573807418346405, "eval_seeclick_GIoU": 0.4410470873117447, "eval_seeclick_IoU": 0.49894072115421295, "eval_seeclick_MAE_all": 0.061043232679367065, "eval_seeclick_MAE_h": 0.053627705201506615, "eval_seeclick_MAE_w": 0.11304067447781563, "eval_seeclick_MAE_x_boxes": 0.10517356544733047, "eval_seeclick_MAE_y_boxes": 0.05592040531337261, "eval_seeclick_NUM_probability": 0.9999991357326508, "eval_seeclick_inside_bbox": 0.7542613744735718, "eval_seeclick_loss": 0.37355467677116394, "eval_seeclick_loss_ce": 0.07825379073619843, "eval_seeclick_loss_iou": 0.5123291015625, "eval_seeclick_loss_num": 0.06250762939453125, "eval_seeclick_loss_xval": 0.31231689453125, "eval_seeclick_runtime": 85.2206, "eval_seeclick_samples_per_second": 0.505, "eval_seeclick_steps_per_second": 0.023, "num_input_tokens_seen": 112825776, "step": 1250 }, { "epoch": 5.747126436781609, "eval_icons_CIoU": 0.5825834572315216, "eval_icons_GIoU": 0.5834551155567169, "eval_icons_IoU": 0.616531640291214, "eval_icons_MAE_all": 0.043770069256424904, "eval_icons_MAE_h": 0.07383041456341743, "eval_icons_MAE_w": 0.06421982683241367, "eval_icons_MAE_x_boxes": 0.05823229439556599, "eval_icons_MAE_y_boxes": 0.07248594611883163, "eval_icons_NUM_probability": 0.999999612569809, "eval_icons_inside_bbox": 0.7673611044883728, "eval_icons_loss": 0.21254034340381622, "eval_icons_loss_ce": 4.21214093648814e-06, "eval_icons_loss_iou": 0.447265625, "eval_icons_loss_num": 0.04521942138671875, "eval_icons_loss_xval": 0.226104736328125, "eval_icons_runtime": 85.9172, "eval_icons_samples_per_second": 0.582, "eval_icons_steps_per_second": 0.023, "num_input_tokens_seen": 112825776, "step": 1250 }, { "epoch": 5.747126436781609, "eval_screenspot_CIoU": 0.4307680130004883, "eval_screenspot_GIoU": 0.4167415201663971, "eval_screenspot_IoU": 0.4970965087413788, "eval_screenspot_MAE_all": 0.08507336676120758, "eval_screenspot_MAE_h": 0.07819427798191707, "eval_screenspot_MAE_w": 0.17445524781942368, "eval_screenspot_MAE_x_boxes": 0.1695632884899775, "eval_screenspot_MAE_y_boxes": 0.0742349624633789, "eval_screenspot_NUM_probability": 0.9999984900156657, "eval_screenspot_inside_bbox": 0.7637499968210856, "eval_screenspot_loss": 0.4254446029663086, "eval_screenspot_loss_ce": 6.250914157135412e-05, "eval_screenspot_loss_iou": 0.3890787760416667, "eval_screenspot_loss_num": 0.087158203125, "eval_screenspot_loss_xval": 0.4358723958333333, "eval_screenspot_runtime": 160.2493, "eval_screenspot_samples_per_second": 0.555, "eval_screenspot_steps_per_second": 0.019, "num_input_tokens_seen": 112825776, "step": 1250 }, { "epoch": 5.747126436781609, "eval_compot_CIoU": 0.48233287036418915, "eval_compot_GIoU": 0.4589613825082779, "eval_compot_IoU": 0.5473670959472656, "eval_compot_MAE_all": 0.05806807801127434, "eval_compot_MAE_h": 0.07524906471371651, "eval_compot_MAE_w": 0.11501751467585564, "eval_compot_MAE_x_boxes": 0.10463830083608627, "eval_compot_MAE_y_boxes": 0.07366756349802017, "eval_compot_NUM_probability": 0.999998927116394, "eval_compot_inside_bbox": 0.7760416567325592, "eval_compot_loss": 0.3192625343799591, "eval_compot_loss_ce": 0.0109380844514817, "eval_compot_loss_iou": 0.501953125, "eval_compot_loss_num": 0.0531005859375, "eval_compot_loss_xval": 0.265533447265625, "eval_compot_runtime": 86.8999, "eval_compot_samples_per_second": 0.575, "eval_compot_steps_per_second": 0.023, "num_input_tokens_seen": 112825776, "step": 1250 }, { "epoch": 5.747126436781609, "loss": 0.1921016126871109, "loss_ce": 0.006859918590635061, "loss_iou": 0.53515625, "loss_num": 0.037109375, "loss_xval": 0.185546875, "num_input_tokens_seen": 112825776, "step": 1250 }, { "epoch": 5.751724137931035, "grad_norm": 48.39229529580768, "learning_rate": 5e-06, "loss": 0.1628, "num_input_tokens_seen": 112916064, "step": 1251 }, { "epoch": 5.751724137931035, "loss": 0.14167295396327972, "loss_ce": 4.0873368561733514e-05, "loss_iou": 0.3203125, "loss_num": 0.0283203125, "loss_xval": 0.1416015625, "num_input_tokens_seen": 112916064, "step": 1251 }, { "epoch": 5.75632183908046, "grad_norm": 7.309880590381852, "learning_rate": 5e-06, "loss": 0.1105, "num_input_tokens_seen": 113006264, "step": 1252 }, { "epoch": 5.75632183908046, "loss": 0.1379764974117279, "loss_ce": 6.526359356939793e-06, "loss_iou": 0.3984375, "loss_num": 0.027587890625, "loss_xval": 0.1376953125, "num_input_tokens_seen": 113006264, "step": 1252 }, { "epoch": 5.760919540229885, "grad_norm": 4.7842464607453, "learning_rate": 5e-06, "loss": 0.0712, "num_input_tokens_seen": 113096588, "step": 1253 }, { "epoch": 5.760919540229885, "loss": 0.06593406945466995, "loss_ce": 8.451766007055994e-07, "loss_iou": 0.279296875, "loss_num": 0.01318359375, "loss_xval": 0.06591796875, "num_input_tokens_seen": 113096588, "step": 1253 }, { "epoch": 5.76551724137931, "grad_norm": 9.30963816664603, "learning_rate": 5e-06, "loss": 0.1062, "num_input_tokens_seen": 113186932, "step": 1254 }, { "epoch": 5.76551724137931, "loss": 0.10627120733261108, "loss_ce": 9.002267688629217e-06, "loss_iou": 0.396484375, "loss_num": 0.021240234375, "loss_xval": 0.1064453125, "num_input_tokens_seen": 113186932, "step": 1254 }, { "epoch": 5.7701149425287355, "grad_norm": 5.032701026633435, "learning_rate": 5e-06, "loss": 0.0929, "num_input_tokens_seen": 113277316, "step": 1255 }, { "epoch": 5.7701149425287355, "loss": 0.1125558465719223, "loss_ce": 7.017050393187674e-06, "loss_iou": 0.341796875, "loss_num": 0.0224609375, "loss_xval": 0.1123046875, "num_input_tokens_seen": 113277316, "step": 1255 }, { "epoch": 5.774712643678161, "grad_norm": 7.348457813587205, "learning_rate": 5e-06, "loss": 0.0789, "num_input_tokens_seen": 113367640, "step": 1256 }, { "epoch": 5.774712643678161, "loss": 0.06398795545101166, "loss_ce": 0.00011466428986750543, "loss_iou": 0.365234375, "loss_num": 0.01275634765625, "loss_xval": 0.06396484375, "num_input_tokens_seen": 113367640, "step": 1256 }, { "epoch": 5.779310344827586, "grad_norm": 3.6430091269186304, "learning_rate": 5e-06, "loss": 0.1015, "num_input_tokens_seen": 113457940, "step": 1257 }, { "epoch": 5.779310344827586, "loss": 0.07985039055347443, "loss_ce": 1.6411700926255435e-05, "loss_iou": 0.3359375, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 113457940, "step": 1257 }, { "epoch": 5.783908045977012, "grad_norm": 3.3598636033304454, "learning_rate": 5e-06, "loss": 0.1014, "num_input_tokens_seen": 113548324, "step": 1258 }, { "epoch": 5.783908045977012, "loss": 0.11753726005554199, "loss_ce": 4.458225521375425e-05, "loss_iou": 0.3125, "loss_num": 0.0234375, "loss_xval": 0.11767578125, "num_input_tokens_seen": 113548324, "step": 1258 }, { "epoch": 5.788505747126437, "grad_norm": 4.068043795707238, "learning_rate": 5e-06, "loss": 0.1092, "num_input_tokens_seen": 113638644, "step": 1259 }, { "epoch": 5.788505747126437, "loss": 0.138657346367836, "loss_ce": 7.249969939948642e-07, "loss_iou": 0.2578125, "loss_num": 0.02783203125, "loss_xval": 0.138671875, "num_input_tokens_seen": 113638644, "step": 1259 }, { "epoch": 5.793103448275862, "grad_norm": 2.279662032054951, "learning_rate": 5e-06, "loss": 0.0673, "num_input_tokens_seen": 113729080, "step": 1260 }, { "epoch": 5.793103448275862, "loss": 0.10126355290412903, "loss_ce": 6.22008246864425e-06, "loss_iou": 0.3359375, "loss_num": 0.020263671875, "loss_xval": 0.10107421875, "num_input_tokens_seen": 113729080, "step": 1260 }, { "epoch": 5.797701149425287, "grad_norm": 5.649226738355132, "learning_rate": 5e-06, "loss": 0.0636, "num_input_tokens_seen": 113819484, "step": 1261 }, { "epoch": 5.797701149425287, "loss": 0.08105180412530899, "loss_ce": 4.743245881400071e-06, "loss_iou": 0.275390625, "loss_num": 0.0162353515625, "loss_xval": 0.0810546875, "num_input_tokens_seen": 113819484, "step": 1261 }, { "epoch": 5.802298850574712, "grad_norm": 1.5641332197845963, "learning_rate": 5e-06, "loss": 0.1053, "num_input_tokens_seen": 113909888, "step": 1262 }, { "epoch": 5.802298850574712, "loss": 0.061422187834978104, "loss_ce": 5.559354576689657e-06, "loss_iou": 0.248046875, "loss_num": 0.01226806640625, "loss_xval": 0.0615234375, "num_input_tokens_seen": 113909888, "step": 1262 }, { "epoch": 5.8068965517241375, "grad_norm": 15.670084350710225, "learning_rate": 5e-06, "loss": 0.1055, "num_input_tokens_seen": 114000272, "step": 1263 }, { "epoch": 5.8068965517241375, "loss": 0.11508636921644211, "loss_ce": 4.585368969856063e-06, "loss_iou": 0.326171875, "loss_num": 0.0230712890625, "loss_xval": 0.115234375, "num_input_tokens_seen": 114000272, "step": 1263 }, { "epoch": 5.811494252873564, "grad_norm": 3.835335458461329, "learning_rate": 5e-06, "loss": 0.1287, "num_input_tokens_seen": 114090524, "step": 1264 }, { "epoch": 5.811494252873564, "loss": 0.08618977665901184, "loss_ce": 8.132398761517834e-06, "loss_iou": 0.357421875, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 114090524, "step": 1264 }, { "epoch": 5.816091954022989, "grad_norm": 9.99731943967783, "learning_rate": 5e-06, "loss": 0.1382, "num_input_tokens_seen": 114180868, "step": 1265 }, { "epoch": 5.816091954022989, "loss": 0.1053665354847908, "loss_ce": 4.596740836859681e-06, "loss_iou": 0.30078125, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 114180868, "step": 1265 }, { "epoch": 5.820689655172414, "grad_norm": 2.911511418981655, "learning_rate": 5e-06, "loss": 0.0754, "num_input_tokens_seen": 114271204, "step": 1266 }, { "epoch": 5.820689655172414, "loss": 0.07806918770074844, "loss_ce": 5.221021638135426e-06, "loss_iou": 0.40234375, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 114271204, "step": 1266 }, { "epoch": 5.825287356321839, "grad_norm": 6.2344029082542, "learning_rate": 5e-06, "loss": 0.0833, "num_input_tokens_seen": 114361536, "step": 1267 }, { "epoch": 5.825287356321839, "loss": 0.05028009042143822, "loss_ce": 2.380430260018329e-06, "loss_iou": 0.388671875, "loss_num": 0.01007080078125, "loss_xval": 0.05029296875, "num_input_tokens_seen": 114361536, "step": 1267 }, { "epoch": 5.829885057471264, "grad_norm": 3.7663498969437175, "learning_rate": 5e-06, "loss": 0.0774, "num_input_tokens_seen": 114451868, "step": 1268 }, { "epoch": 5.829885057471264, "loss": 0.08314789086580276, "loss_ce": 2.7451824280433357e-06, "loss_iou": 0.3515625, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 114451868, "step": 1268 }, { "epoch": 5.834482758620689, "grad_norm": 7.1687713447544406, "learning_rate": 5e-06, "loss": 0.0694, "num_input_tokens_seen": 114542224, "step": 1269 }, { "epoch": 5.834482758620689, "loss": 0.08698830008506775, "loss_ce": 4.3723342969315127e-05, "loss_iou": 0.40625, "loss_num": 0.017333984375, "loss_xval": 0.0869140625, "num_input_tokens_seen": 114542224, "step": 1269 }, { "epoch": 5.8390804597701145, "grad_norm": 7.785551994387596, "learning_rate": 5e-06, "loss": 0.0884, "num_input_tokens_seen": 114632548, "step": 1270 }, { "epoch": 5.8390804597701145, "loss": 0.04482455551624298, "loss_ce": 9.493617653788533e-06, "loss_iou": 0.302734375, "loss_num": 0.00897216796875, "loss_xval": 0.044921875, "num_input_tokens_seen": 114632548, "step": 1270 }, { "epoch": 5.8436781609195405, "grad_norm": 4.9792230982606815, "learning_rate": 5e-06, "loss": 0.0807, "num_input_tokens_seen": 114722960, "step": 1271 }, { "epoch": 5.8436781609195405, "loss": 0.0937369242310524, "loss_ce": 2.1818657387484564e-06, "loss_iou": 0.396484375, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 114722960, "step": 1271 }, { "epoch": 5.848275862068966, "grad_norm": 7.05750958333928, "learning_rate": 5e-06, "loss": 0.1077, "num_input_tokens_seen": 114813360, "step": 1272 }, { "epoch": 5.848275862068966, "loss": 0.08838266134262085, "loss_ce": 1.9016588339582086e-05, "loss_iou": 0.283203125, "loss_num": 0.0177001953125, "loss_xval": 0.08837890625, "num_input_tokens_seen": 114813360, "step": 1272 }, { "epoch": 5.852873563218391, "grad_norm": 2.2929149537355933, "learning_rate": 5e-06, "loss": 0.0831, "num_input_tokens_seen": 114903664, "step": 1273 }, { "epoch": 5.852873563218391, "loss": 0.08243682235479355, "loss_ce": 8.84367273101816e-06, "loss_iou": 0.375, "loss_num": 0.0164794921875, "loss_xval": 0.08251953125, "num_input_tokens_seen": 114903664, "step": 1273 }, { "epoch": 5.857471264367816, "grad_norm": 11.633034929488161, "learning_rate": 5e-06, "loss": 0.0983, "num_input_tokens_seen": 114994008, "step": 1274 }, { "epoch": 5.857471264367816, "loss": 0.1149655431509018, "loss_ce": 5.821747436129954e-06, "loss_iou": 0.267578125, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 114994008, "step": 1274 }, { "epoch": 5.862068965517241, "grad_norm": 13.85967700936422, "learning_rate": 5e-06, "loss": 0.0762, "num_input_tokens_seen": 115084404, "step": 1275 }, { "epoch": 5.862068965517241, "loss": 0.07548947632312775, "loss_ce": 4.2469814616197255e-06, "loss_iou": 0.2578125, "loss_num": 0.01507568359375, "loss_xval": 0.07568359375, "num_input_tokens_seen": 115084404, "step": 1275 }, { "epoch": 5.866666666666667, "grad_norm": 8.591339370910877, "learning_rate": 5e-06, "loss": 0.0771, "num_input_tokens_seen": 115174752, "step": 1276 }, { "epoch": 5.866666666666667, "loss": 0.08044753968715668, "loss_ce": 3.201591425749939e-06, "loss_iou": 0.18359375, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 115174752, "step": 1276 }, { "epoch": 5.871264367816092, "grad_norm": 10.263498686615506, "learning_rate": 5e-06, "loss": 0.1024, "num_input_tokens_seen": 115265232, "step": 1277 }, { "epoch": 5.871264367816092, "loss": 0.12569460272789001, "loss_ce": 7.947778613015544e-06, "loss_iou": 0.2490234375, "loss_num": 0.025146484375, "loss_xval": 0.1259765625, "num_input_tokens_seen": 115265232, "step": 1277 }, { "epoch": 5.875862068965517, "grad_norm": 11.400124255336879, "learning_rate": 5e-06, "loss": 0.0914, "num_input_tokens_seen": 115355708, "step": 1278 }, { "epoch": 5.875862068965517, "loss": 0.1197689026594162, "loss_ce": 2.66920051217312e-06, "loss_iou": 0.3125, "loss_num": 0.02392578125, "loss_xval": 0.11962890625, "num_input_tokens_seen": 115355708, "step": 1278 }, { "epoch": 5.880459770114943, "grad_norm": 4.3793822993822715, "learning_rate": 5e-06, "loss": 0.0979, "num_input_tokens_seen": 115446204, "step": 1279 }, { "epoch": 5.880459770114943, "loss": 0.1235429123044014, "loss_ce": 7.75079388404265e-06, "loss_iou": 0.431640625, "loss_num": 0.0247802734375, "loss_xval": 0.12353515625, "num_input_tokens_seen": 115446204, "step": 1279 }, { "epoch": 5.885057471264368, "grad_norm": 8.059241246157443, "learning_rate": 5e-06, "loss": 0.0801, "num_input_tokens_seen": 115536632, "step": 1280 }, { "epoch": 5.885057471264368, "loss": 0.08218298107385635, "loss_ce": 1.439858169760555e-05, "loss_iou": 0.30078125, "loss_num": 0.0164794921875, "loss_xval": 0.08203125, "num_input_tokens_seen": 115536632, "step": 1280 }, { "epoch": 5.889655172413793, "grad_norm": 9.48342209591454, "learning_rate": 5e-06, "loss": 0.0749, "num_input_tokens_seen": 115627096, "step": 1281 }, { "epoch": 5.889655172413793, "loss": 0.08530169725418091, "loss_ce": 5.068644895800389e-06, "loss_iou": 0.3203125, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 115627096, "step": 1281 }, { "epoch": 5.894252873563218, "grad_norm": 4.958627136670461, "learning_rate": 5e-06, "loss": 0.1206, "num_input_tokens_seen": 115717508, "step": 1282 }, { "epoch": 5.894252873563218, "loss": 0.10856582224369049, "loss_ce": 1.4800659300817642e-05, "loss_iou": 0.369140625, "loss_num": 0.021728515625, "loss_xval": 0.1083984375, "num_input_tokens_seen": 115717508, "step": 1282 }, { "epoch": 5.898850574712643, "grad_norm": 2.619956686440511, "learning_rate": 5e-06, "loss": 0.0937, "num_input_tokens_seen": 115807116, "step": 1283 }, { "epoch": 5.898850574712643, "loss": 0.1267540454864502, "loss_ce": 1.4542956705554388e-05, "loss_iou": 0.28515625, "loss_num": 0.025390625, "loss_xval": 0.126953125, "num_input_tokens_seen": 115807116, "step": 1283 }, { "epoch": 5.903448275862069, "grad_norm": 1.9178286629575005, "learning_rate": 5e-06, "loss": 0.0906, "num_input_tokens_seen": 115897536, "step": 1284 }, { "epoch": 5.903448275862069, "loss": 0.06888516247272491, "loss_ce": 6.993210263317451e-06, "loss_iou": 0.25390625, "loss_num": 0.01373291015625, "loss_xval": 0.06884765625, "num_input_tokens_seen": 115897536, "step": 1284 }, { "epoch": 5.908045977011494, "grad_norm": 4.687249341290416, "learning_rate": 5e-06, "loss": 0.0932, "num_input_tokens_seen": 115987980, "step": 1285 }, { "epoch": 5.908045977011494, "loss": 0.05946702882647514, "loss_ce": 1.87891364475945e-05, "loss_iou": 0.32421875, "loss_num": 0.01190185546875, "loss_xval": 0.0595703125, "num_input_tokens_seen": 115987980, "step": 1285 }, { "epoch": 5.9126436781609195, "grad_norm": 9.032450106052789, "learning_rate": 5e-06, "loss": 0.0588, "num_input_tokens_seen": 116078456, "step": 1286 }, { "epoch": 5.9126436781609195, "loss": 0.051251400262117386, "loss_ce": 1.238779896084452e-05, "loss_iou": 0.2890625, "loss_num": 0.01025390625, "loss_xval": 0.05126953125, "num_input_tokens_seen": 116078456, "step": 1286 }, { "epoch": 5.917241379310345, "grad_norm": 6.2264189550968805, "learning_rate": 5e-06, "loss": 0.1321, "num_input_tokens_seen": 116168752, "step": 1287 }, { "epoch": 5.917241379310345, "loss": 0.1135876327753067, "loss_ce": 1.2120165138185257e-06, "loss_iou": 0.3359375, "loss_num": 0.022705078125, "loss_xval": 0.11376953125, "num_input_tokens_seen": 116168752, "step": 1287 }, { "epoch": 5.92183908045977, "grad_norm": 4.579123122057683, "learning_rate": 5e-06, "loss": 0.0763, "num_input_tokens_seen": 116259044, "step": 1288 }, { "epoch": 5.92183908045977, "loss": 0.09734739363193512, "loss_ce": 2.6833829906536266e-05, "loss_iou": 0.302734375, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 116259044, "step": 1288 }, { "epoch": 5.926436781609196, "grad_norm": 6.37974017894556, "learning_rate": 5e-06, "loss": 0.1262, "num_input_tokens_seen": 116349420, "step": 1289 }, { "epoch": 5.926436781609196, "loss": 0.12337080389261246, "loss_ce": 3.4989259347639745e-06, "loss_iou": 0.37890625, "loss_num": 0.024658203125, "loss_xval": 0.12353515625, "num_input_tokens_seen": 116349420, "step": 1289 }, { "epoch": 5.931034482758621, "grad_norm": 5.236407477085609, "learning_rate": 5e-06, "loss": 0.1198, "num_input_tokens_seen": 116439780, "step": 1290 }, { "epoch": 5.931034482758621, "loss": 0.11725269258022308, "loss_ce": 4.161834567639744e-06, "loss_iou": 0.2353515625, "loss_num": 0.0234375, "loss_xval": 0.1171875, "num_input_tokens_seen": 116439780, "step": 1290 }, { "epoch": 5.935632183908046, "grad_norm": 6.939019502168552, "learning_rate": 5e-06, "loss": 0.0819, "num_input_tokens_seen": 116530192, "step": 1291 }, { "epoch": 5.935632183908046, "loss": 0.10045474022626877, "loss_ce": 5.191009404370561e-05, "loss_iou": 0.36328125, "loss_num": 0.0201416015625, "loss_xval": 0.1005859375, "num_input_tokens_seen": 116530192, "step": 1291 }, { "epoch": 5.940229885057471, "grad_norm": 5.923858548372062, "learning_rate": 5e-06, "loss": 0.0684, "num_input_tokens_seen": 116620592, "step": 1292 }, { "epoch": 5.940229885057471, "loss": 0.061371780931949615, "loss_ce": 9.32706370804226e-07, "loss_iou": 0.28515625, "loss_num": 0.01226806640625, "loss_xval": 0.061279296875, "num_input_tokens_seen": 116620592, "step": 1292 }, { "epoch": 5.944827586206896, "grad_norm": 5.436359048849817, "learning_rate": 5e-06, "loss": 0.0778, "num_input_tokens_seen": 116710964, "step": 1293 }, { "epoch": 5.944827586206896, "loss": 0.05396192520856857, "loss_ce": 6.846322776254965e-06, "loss_iou": 0.32421875, "loss_num": 0.01080322265625, "loss_xval": 0.053955078125, "num_input_tokens_seen": 116710964, "step": 1293 }, { "epoch": 5.949425287356322, "grad_norm": 7.23604737625205, "learning_rate": 5e-06, "loss": 0.0746, "num_input_tokens_seen": 116800480, "step": 1294 }, { "epoch": 5.949425287356322, "loss": 0.08210571110248566, "loss_ce": 1.3423375094134826e-05, "loss_iou": 0.326171875, "loss_num": 0.016357421875, "loss_xval": 0.08203125, "num_input_tokens_seen": 116800480, "step": 1294 }, { "epoch": 5.954022988505747, "grad_norm": 2.028553713875407, "learning_rate": 5e-06, "loss": 0.1246, "num_input_tokens_seen": 116890808, "step": 1295 }, { "epoch": 5.954022988505747, "loss": 0.09970621019601822, "loss_ce": 3.580232078093104e-05, "loss_iou": 0.29296875, "loss_num": 0.0198974609375, "loss_xval": 0.099609375, "num_input_tokens_seen": 116890808, "step": 1295 }, { "epoch": 5.958620689655173, "grad_norm": 8.871343329486912, "learning_rate": 5e-06, "loss": 0.0929, "num_input_tokens_seen": 116981168, "step": 1296 }, { "epoch": 5.958620689655173, "loss": 0.0988428071141243, "loss_ce": 2.6887415515375324e-05, "loss_iou": 0.31640625, "loss_num": 0.019775390625, "loss_xval": 0.0986328125, "num_input_tokens_seen": 116981168, "step": 1296 }, { "epoch": 5.963218390804598, "grad_norm": 6.717350736332018, "learning_rate": 5e-06, "loss": 0.0807, "num_input_tokens_seen": 117071416, "step": 1297 }, { "epoch": 5.963218390804598, "loss": 0.10025615990161896, "loss_ce": 5.9157691794098355e-06, "loss_iou": 0.35546875, "loss_num": 0.02001953125, "loss_xval": 0.10009765625, "num_input_tokens_seen": 117071416, "step": 1297 }, { "epoch": 5.967816091954023, "grad_norm": 8.58100221704676, "learning_rate": 5e-06, "loss": 0.1051, "num_input_tokens_seen": 117161736, "step": 1298 }, { "epoch": 5.967816091954023, "loss": 0.10571430623531342, "loss_ce": 3.193915108568035e-05, "loss_iou": 0.35546875, "loss_num": 0.0211181640625, "loss_xval": 0.10546875, "num_input_tokens_seen": 117161736, "step": 1298 }, { "epoch": 5.972413793103448, "grad_norm": 11.414603962669956, "learning_rate": 5e-06, "loss": 0.0951, "num_input_tokens_seen": 117252180, "step": 1299 }, { "epoch": 5.972413793103448, "loss": 0.09330064058303833, "loss_ce": 3.891930464305915e-05, "loss_iou": 0.337890625, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 117252180, "step": 1299 }, { "epoch": 5.977011494252873, "grad_norm": 14.302282290509867, "learning_rate": 5e-06, "loss": 0.1022, "num_input_tokens_seen": 117342672, "step": 1300 }, { "epoch": 5.977011494252873, "loss": 0.09649240970611572, "loss_ce": 1.108789183490444e-05, "loss_iou": 0.322265625, "loss_num": 0.019287109375, "loss_xval": 0.0966796875, "num_input_tokens_seen": 117342672, "step": 1300 }, { "epoch": 5.9816091954022985, "grad_norm": 4.2991078616550515, "learning_rate": 5e-06, "loss": 0.1158, "num_input_tokens_seen": 117433108, "step": 1301 }, { "epoch": 5.9816091954022985, "loss": 0.11118273437023163, "loss_ce": 7.203209406725364e-06, "loss_iou": 0.3984375, "loss_num": 0.022216796875, "loss_xval": 0.111328125, "num_input_tokens_seen": 117433108, "step": 1301 }, { "epoch": 5.9862068965517246, "grad_norm": 8.51577704791888, "learning_rate": 5e-06, "loss": 0.0691, "num_input_tokens_seen": 117523460, "step": 1302 }, { "epoch": 5.9862068965517246, "loss": 0.07513649761676788, "loss_ce": 2.222008106400608e-06, "loss_iou": 0.32421875, "loss_num": 0.0150146484375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 117523460, "step": 1302 }, { "epoch": 5.99080459770115, "grad_norm": 2.677824187892606, "learning_rate": 5e-06, "loss": 0.0844, "num_input_tokens_seen": 117613652, "step": 1303 }, { "epoch": 5.99080459770115, "loss": 0.12823861837387085, "loss_ce": 1.9021113985218108e-05, "loss_iou": 0.359375, "loss_num": 0.025634765625, "loss_xval": 0.1279296875, "num_input_tokens_seen": 117613652, "step": 1303 }, { "epoch": 5.995402298850575, "grad_norm": 7.857525256311372, "learning_rate": 5e-06, "loss": 0.0986, "num_input_tokens_seen": 117703216, "step": 1304 }, { "epoch": 5.995402298850575, "loss": 0.12224046885967255, "loss_ce": 3.282908437540755e-05, "loss_iou": 0.296875, "loss_num": 0.0244140625, "loss_xval": 0.1220703125, "num_input_tokens_seen": 117703216, "step": 1304 }, { "epoch": 6.0, "grad_norm": 3.121211959039489, "learning_rate": 5e-06, "loss": 0.0844, "num_input_tokens_seen": 117793536, "step": 1305 }, { "epoch": 6.0, "loss": 0.0870618224143982, "loss_ce": 2.569025309639983e-05, "loss_iou": 0.34765625, "loss_num": 0.0174560546875, "loss_xval": 0.0869140625, "num_input_tokens_seen": 117793536, "step": 1305 }, { "epoch": 6.004597701149425, "grad_norm": 10.240533439865134, "learning_rate": 5e-06, "loss": 0.1416, "num_input_tokens_seen": 117883776, "step": 1306 }, { "epoch": 6.004597701149425, "loss": 0.1209506243467331, "loss_ce": 1.8270541204401525e-06, "loss_iou": 0.361328125, "loss_num": 0.024169921875, "loss_xval": 0.12109375, "num_input_tokens_seen": 117883776, "step": 1306 }, { "epoch": 6.00919540229885, "grad_norm": 9.764972826473027, "learning_rate": 5e-06, "loss": 0.0707, "num_input_tokens_seen": 117974056, "step": 1307 }, { "epoch": 6.00919540229885, "loss": 0.05394062399864197, "loss_ce": 8.061311973506236e-07, "loss_iou": 0.318359375, "loss_num": 0.01080322265625, "loss_xval": 0.053955078125, "num_input_tokens_seen": 117974056, "step": 1307 }, { "epoch": 6.0137931034482754, "grad_norm": 2.2633345565968166, "learning_rate": 5e-06, "loss": 0.1079, "num_input_tokens_seen": 118064428, "step": 1308 }, { "epoch": 6.0137931034482754, "loss": 0.09164643287658691, "loss_ce": 2.152237811969826e-06, "loss_iou": 0.287109375, "loss_num": 0.018310546875, "loss_xval": 0.091796875, "num_input_tokens_seen": 118064428, "step": 1308 }, { "epoch": 6.0183908045977015, "grad_norm": 16.306990357156884, "learning_rate": 5e-06, "loss": 0.0845, "num_input_tokens_seen": 118154820, "step": 1309 }, { "epoch": 6.0183908045977015, "loss": 0.0783190131187439, "loss_ce": 2.6165927920374088e-05, "loss_iou": 0.279296875, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 118154820, "step": 1309 }, { "epoch": 6.022988505747127, "grad_norm": 5.540727316669466, "learning_rate": 5e-06, "loss": 0.093, "num_input_tokens_seen": 118245100, "step": 1310 }, { "epoch": 6.022988505747127, "loss": 0.07801572233438492, "loss_ce": 1.2792523193638772e-05, "loss_iou": 0.271484375, "loss_num": 0.015625, "loss_xval": 0.078125, "num_input_tokens_seen": 118245100, "step": 1310 }, { "epoch": 6.027586206896552, "grad_norm": 6.643253537988018, "learning_rate": 5e-06, "loss": 0.0683, "num_input_tokens_seen": 118335420, "step": 1311 }, { "epoch": 6.027586206896552, "loss": 0.07764066010713577, "loss_ce": 3.9446958908229135e-06, "loss_iou": 0.322265625, "loss_num": 0.0155029296875, "loss_xval": 0.07763671875, "num_input_tokens_seen": 118335420, "step": 1311 }, { "epoch": 6.032183908045977, "grad_norm": 6.788294653006372, "learning_rate": 5e-06, "loss": 0.0802, "num_input_tokens_seen": 118425852, "step": 1312 }, { "epoch": 6.032183908045977, "loss": 0.09127424657344818, "loss_ce": 2.6687364879762754e-05, "loss_iou": 0.326171875, "loss_num": 0.0181884765625, "loss_xval": 0.09130859375, "num_input_tokens_seen": 118425852, "step": 1312 }, { "epoch": 6.036781609195402, "grad_norm": 3.573469877891761, "learning_rate": 5e-06, "loss": 0.0963, "num_input_tokens_seen": 118516176, "step": 1313 }, { "epoch": 6.036781609195402, "loss": 0.10177914798259735, "loss_ce": 3.0242531465773936e-06, "loss_iou": 0.341796875, "loss_num": 0.0203857421875, "loss_xval": 0.1015625, "num_input_tokens_seen": 118516176, "step": 1313 }, { "epoch": 6.041379310344827, "grad_norm": 2.587490346214488, "learning_rate": 5e-06, "loss": 0.1042, "num_input_tokens_seen": 118606480, "step": 1314 }, { "epoch": 6.041379310344827, "loss": 0.07138314843177795, "loss_ce": 2.536031388444826e-06, "loss_iou": 0.294921875, "loss_num": 0.0142822265625, "loss_xval": 0.0712890625, "num_input_tokens_seen": 118606480, "step": 1314 }, { "epoch": 6.045977011494253, "grad_norm": 8.936376542422272, "learning_rate": 5e-06, "loss": 0.0712, "num_input_tokens_seen": 118696880, "step": 1315 }, { "epoch": 6.045977011494253, "loss": 0.07341183722019196, "loss_ce": 1.8055545751849422e-06, "loss_iou": 0.263671875, "loss_num": 0.01470947265625, "loss_xval": 0.0732421875, "num_input_tokens_seen": 118696880, "step": 1315 }, { "epoch": 6.050574712643678, "grad_norm": 12.767342588452228, "learning_rate": 5e-06, "loss": 0.1053, "num_input_tokens_seen": 118787220, "step": 1316 }, { "epoch": 6.050574712643678, "loss": 0.062339168041944504, "loss_ce": 0.00011382619413780048, "loss_iou": 0.3203125, "loss_num": 0.012451171875, "loss_xval": 0.062255859375, "num_input_tokens_seen": 118787220, "step": 1316 }, { "epoch": 6.055172413793104, "grad_norm": 10.482681799720183, "learning_rate": 5e-06, "loss": 0.0796, "num_input_tokens_seen": 118877572, "step": 1317 }, { "epoch": 6.055172413793104, "loss": 0.04859533905982971, "loss_ce": 1.1354484740877524e-05, "loss_iou": 0.322265625, "loss_num": 0.00970458984375, "loss_xval": 0.048583984375, "num_input_tokens_seen": 118877572, "step": 1317 }, { "epoch": 6.059770114942529, "grad_norm": 12.105797073827482, "learning_rate": 5e-06, "loss": 0.1086, "num_input_tokens_seen": 118967880, "step": 1318 }, { "epoch": 6.059770114942529, "loss": 0.1294127106666565, "loss_ce": 2.920265615102835e-06, "loss_iou": 0.3125, "loss_num": 0.02587890625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 118967880, "step": 1318 }, { "epoch": 6.064367816091954, "grad_norm": 1.9154409626850923, "learning_rate": 5e-06, "loss": 0.0643, "num_input_tokens_seen": 119058296, "step": 1319 }, { "epoch": 6.064367816091954, "loss": 0.09125930815935135, "loss_ce": 2.701348603295628e-05, "loss_iou": 0.40234375, "loss_num": 0.018310546875, "loss_xval": 0.09130859375, "num_input_tokens_seen": 119058296, "step": 1319 }, { "epoch": 6.068965517241379, "grad_norm": 1.9985880290089337, "learning_rate": 5e-06, "loss": 0.0933, "num_input_tokens_seen": 119148684, "step": 1320 }, { "epoch": 6.068965517241379, "loss": 0.1152973622083664, "loss_ce": 1.952744241862092e-06, "loss_iou": 0.353515625, "loss_num": 0.0230712890625, "loss_xval": 0.115234375, "num_input_tokens_seen": 119148684, "step": 1320 }, { "epoch": 6.073563218390804, "grad_norm": 6.069434025871692, "learning_rate": 5e-06, "loss": 0.0762, "num_input_tokens_seen": 119239004, "step": 1321 }, { "epoch": 6.073563218390804, "loss": 0.060495294630527496, "loss_ce": 5.5230746511369944e-05, "loss_iou": 0.306640625, "loss_num": 0.0120849609375, "loss_xval": 0.060546875, "num_input_tokens_seen": 119239004, "step": 1321 }, { "epoch": 6.07816091954023, "grad_norm": 10.274039091733039, "learning_rate": 5e-06, "loss": 0.0822, "num_input_tokens_seen": 119329324, "step": 1322 }, { "epoch": 6.07816091954023, "loss": 0.10678257048130035, "loss_ce": 1.5705069245086634e-06, "loss_iou": 0.302734375, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 119329324, "step": 1322 }, { "epoch": 6.082758620689655, "grad_norm": 3.0466261855959544, "learning_rate": 5e-06, "loss": 0.0874, "num_input_tokens_seen": 119419688, "step": 1323 }, { "epoch": 6.082758620689655, "loss": 0.06655121594667435, "loss_ce": 7.638142960786354e-06, "loss_iou": 0.26171875, "loss_num": 0.0133056640625, "loss_xval": 0.06640625, "num_input_tokens_seen": 119419688, "step": 1323 }, { "epoch": 6.0873563218390805, "grad_norm": 3.872617007458599, "learning_rate": 5e-06, "loss": 0.0655, "num_input_tokens_seen": 119510040, "step": 1324 }, { "epoch": 6.0873563218390805, "loss": 0.06537115573883057, "loss_ce": 4.8276036977767944e-05, "loss_iou": 0.28515625, "loss_num": 0.0130615234375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 119510040, "step": 1324 }, { "epoch": 6.091954022988506, "grad_norm": 14.262285949730552, "learning_rate": 5e-06, "loss": 0.0775, "num_input_tokens_seen": 119600444, "step": 1325 }, { "epoch": 6.091954022988506, "loss": 0.06211470812559128, "loss_ce": 1.1435970009188168e-05, "loss_iou": 0.337890625, "loss_num": 0.012451171875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 119600444, "step": 1325 }, { "epoch": 6.096551724137931, "grad_norm": 5.582731893935351, "learning_rate": 5e-06, "loss": 0.0856, "num_input_tokens_seen": 119690784, "step": 1326 }, { "epoch": 6.096551724137931, "loss": 0.08985207229852676, "loss_ce": 6.9361602072604e-05, "loss_iou": 0.404296875, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 119690784, "step": 1326 }, { "epoch": 6.101149425287356, "grad_norm": 8.165861222533081, "learning_rate": 5e-06, "loss": 0.0707, "num_input_tokens_seen": 119781104, "step": 1327 }, { "epoch": 6.101149425287356, "loss": 0.06276784837245941, "loss_ce": 8.212633133553027e-07, "loss_iou": 0.2890625, "loss_num": 0.0125732421875, "loss_xval": 0.06298828125, "num_input_tokens_seen": 119781104, "step": 1327 }, { "epoch": 6.105747126436782, "grad_norm": 11.7019496158795, "learning_rate": 5e-06, "loss": 0.0855, "num_input_tokens_seen": 119871456, "step": 1328 }, { "epoch": 6.105747126436782, "loss": 0.06695958971977234, "loss_ce": 4.02033947466407e-06, "loss_iou": 0.306640625, "loss_num": 0.01336669921875, "loss_xval": 0.06689453125, "num_input_tokens_seen": 119871456, "step": 1328 }, { "epoch": 6.110344827586207, "grad_norm": 11.09087965893546, "learning_rate": 5e-06, "loss": 0.0629, "num_input_tokens_seen": 119961804, "step": 1329 }, { "epoch": 6.110344827586207, "loss": 0.06232057511806488, "loss_ce": 3.6832166188105475e-06, "loss_iou": 0.36328125, "loss_num": 0.012451171875, "loss_xval": 0.062255859375, "num_input_tokens_seen": 119961804, "step": 1329 }, { "epoch": 6.114942528735632, "grad_norm": 1.7273225707646134, "learning_rate": 5e-06, "loss": 0.0615, "num_input_tokens_seen": 120052288, "step": 1330 }, { "epoch": 6.114942528735632, "loss": 0.05487591400742531, "loss_ce": 5.3095800467417575e-06, "loss_iou": 0.33984375, "loss_num": 0.010986328125, "loss_xval": 0.054931640625, "num_input_tokens_seen": 120052288, "step": 1330 }, { "epoch": 6.119540229885057, "grad_norm": 1.895873348755958, "learning_rate": 5e-06, "loss": 0.0637, "num_input_tokens_seen": 120142748, "step": 1331 }, { "epoch": 6.119540229885057, "loss": 0.08318766951560974, "loss_ce": 1.2013872037641704e-05, "loss_iou": 0.296875, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 120142748, "step": 1331 }, { "epoch": 6.124137931034483, "grad_norm": 12.365279177414369, "learning_rate": 5e-06, "loss": 0.0879, "num_input_tokens_seen": 120231640, "step": 1332 }, { "epoch": 6.124137931034483, "loss": 0.11951163411140442, "loss_ce": 2.006320391956251e-05, "loss_iou": 0.3671875, "loss_num": 0.02392578125, "loss_xval": 0.11962890625, "num_input_tokens_seen": 120231640, "step": 1332 }, { "epoch": 6.128735632183908, "grad_norm": 8.100321593651932, "learning_rate": 5e-06, "loss": 0.0759, "num_input_tokens_seen": 120322024, "step": 1333 }, { "epoch": 6.128735632183908, "loss": 0.07466816902160645, "loss_ce": 6.915668109286344e-06, "loss_iou": 0.3125, "loss_num": 0.01495361328125, "loss_xval": 0.07470703125, "num_input_tokens_seen": 120322024, "step": 1333 }, { "epoch": 6.133333333333334, "grad_norm": 10.20027536647147, "learning_rate": 5e-06, "loss": 0.0724, "num_input_tokens_seen": 120412328, "step": 1334 }, { "epoch": 6.133333333333334, "loss": 0.07674149423837662, "loss_ce": 5.042035354563268e-06, "loss_iou": 0.36328125, "loss_num": 0.015380859375, "loss_xval": 0.07666015625, "num_input_tokens_seen": 120412328, "step": 1334 }, { "epoch": 6.137931034482759, "grad_norm": 15.927575829987184, "learning_rate": 5e-06, "loss": 0.0715, "num_input_tokens_seen": 120502728, "step": 1335 }, { "epoch": 6.137931034482759, "loss": 0.07090835273265839, "loss_ce": 1.6013153071980923e-05, "loss_iou": 0.296875, "loss_num": 0.01422119140625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 120502728, "step": 1335 }, { "epoch": 6.142528735632184, "grad_norm": 6.003633344367404, "learning_rate": 5e-06, "loss": 0.093, "num_input_tokens_seen": 120592968, "step": 1336 }, { "epoch": 6.142528735632184, "loss": 0.08331505209207535, "loss_ce": 2.065743046841817e-06, "loss_iou": 0.30859375, "loss_num": 0.0167236328125, "loss_xval": 0.08349609375, "num_input_tokens_seen": 120592968, "step": 1336 }, { "epoch": 6.147126436781609, "grad_norm": 16.256143484292767, "learning_rate": 5e-06, "loss": 0.0959, "num_input_tokens_seen": 120683224, "step": 1337 }, { "epoch": 6.147126436781609, "loss": 0.05023811012506485, "loss_ce": 6.1769642343278974e-06, "loss_iou": 0.314453125, "loss_num": 0.01007080078125, "loss_xval": 0.05029296875, "num_input_tokens_seen": 120683224, "step": 1337 }, { "epoch": 6.151724137931034, "grad_norm": 18.84221449609671, "learning_rate": 5e-06, "loss": 0.1238, "num_input_tokens_seen": 120773604, "step": 1338 }, { "epoch": 6.151724137931034, "loss": 0.1148335188627243, "loss_ce": 1.1128912774438504e-05, "loss_iou": 0.2890625, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 120773604, "step": 1338 }, { "epoch": 6.1563218390804595, "grad_norm": 3.6340664937584357, "learning_rate": 5e-06, "loss": 0.1097, "num_input_tokens_seen": 120863972, "step": 1339 }, { "epoch": 6.1563218390804595, "loss": 0.10637908428907394, "loss_ce": 2.5327462935820222e-05, "loss_iou": 0.376953125, "loss_num": 0.021240234375, "loss_xval": 0.1064453125, "num_input_tokens_seen": 120863972, "step": 1339 }, { "epoch": 6.160919540229885, "grad_norm": 12.35950915110001, "learning_rate": 5e-06, "loss": 0.0642, "num_input_tokens_seen": 120954292, "step": 1340 }, { "epoch": 6.160919540229885, "loss": 0.050802938640117645, "loss_ce": 2.1687461412511766e-05, "loss_iou": 0.279296875, "loss_num": 0.0101318359375, "loss_xval": 0.05078125, "num_input_tokens_seen": 120954292, "step": 1340 }, { "epoch": 6.165517241379311, "grad_norm": 13.05285924563968, "learning_rate": 5e-06, "loss": 0.0771, "num_input_tokens_seen": 121044680, "step": 1341 }, { "epoch": 6.165517241379311, "loss": 0.09323087334632874, "loss_ce": 1.4931548321328592e-05, "loss_iou": 0.26171875, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 121044680, "step": 1341 }, { "epoch": 6.170114942528736, "grad_norm": 3.75282762212465, "learning_rate": 5e-06, "loss": 0.0687, "num_input_tokens_seen": 121135088, "step": 1342 }, { "epoch": 6.170114942528736, "loss": 0.06280642002820969, "loss_ce": 3.1762905564391986e-05, "loss_iou": 0.337890625, "loss_num": 0.0125732421875, "loss_xval": 0.06298828125, "num_input_tokens_seen": 121135088, "step": 1342 }, { "epoch": 6.174712643678161, "grad_norm": 9.367037578077188, "learning_rate": 5e-06, "loss": 0.1212, "num_input_tokens_seen": 121225428, "step": 1343 }, { "epoch": 6.174712643678161, "loss": 0.1399770975112915, "loss_ce": 8.234508641180582e-06, "loss_iou": 0.296875, "loss_num": 0.028076171875, "loss_xval": 0.1396484375, "num_input_tokens_seen": 121225428, "step": 1343 }, { "epoch": 6.179310344827586, "grad_norm": 19.171060340321993, "learning_rate": 5e-06, "loss": 0.087, "num_input_tokens_seen": 121315824, "step": 1344 }, { "epoch": 6.179310344827586, "loss": 0.06587206572294235, "loss_ce": 1.513374263595324e-05, "loss_iou": 0.275390625, "loss_num": 0.01318359375, "loss_xval": 0.06591796875, "num_input_tokens_seen": 121315824, "step": 1344 }, { "epoch": 6.183908045977011, "grad_norm": 4.876284313063547, "learning_rate": 5e-06, "loss": 0.0771, "num_input_tokens_seen": 121406176, "step": 1345 }, { "epoch": 6.183908045977011, "loss": 0.06390294432640076, "loss_ce": 6.762678367522312e-06, "loss_iou": 0.30078125, "loss_num": 0.01275634765625, "loss_xval": 0.06396484375, "num_input_tokens_seen": 121406176, "step": 1345 }, { "epoch": 6.188505747126436, "grad_norm": 16.38884750049181, "learning_rate": 5e-06, "loss": 0.108, "num_input_tokens_seen": 121496544, "step": 1346 }, { "epoch": 6.188505747126436, "loss": 0.1316826045513153, "loss_ce": 2.9771730623906478e-05, "loss_iou": 0.29296875, "loss_num": 0.0263671875, "loss_xval": 0.1318359375, "num_input_tokens_seen": 121496544, "step": 1346 }, { "epoch": 6.1931034482758625, "grad_norm": 11.544406525213137, "learning_rate": 5e-06, "loss": 0.0773, "num_input_tokens_seen": 121587024, "step": 1347 }, { "epoch": 6.1931034482758625, "loss": 0.08613596856594086, "loss_ce": 1.535906267235987e-05, "loss_iou": 0.25390625, "loss_num": 0.0172119140625, "loss_xval": 0.0859375, "num_input_tokens_seen": 121587024, "step": 1347 }, { "epoch": 6.197701149425288, "grad_norm": 2.445337746033652, "learning_rate": 5e-06, "loss": 0.0589, "num_input_tokens_seen": 121677320, "step": 1348 }, { "epoch": 6.197701149425288, "loss": 0.04452810809016228, "loss_ce": 2.960706069643493e-06, "loss_iou": 0.267578125, "loss_num": 0.0089111328125, "loss_xval": 0.04443359375, "num_input_tokens_seen": 121677320, "step": 1348 }, { "epoch": 6.202298850574713, "grad_norm": 6.030279032225266, "learning_rate": 5e-06, "loss": 0.1041, "num_input_tokens_seen": 121767660, "step": 1349 }, { "epoch": 6.202298850574713, "loss": 0.10142619907855988, "loss_ce": 1.0309655635865056e-06, "loss_iou": 0.392578125, "loss_num": 0.020263671875, "loss_xval": 0.1015625, "num_input_tokens_seen": 121767660, "step": 1349 }, { "epoch": 6.206896551724138, "grad_norm": 13.218267366174958, "learning_rate": 5e-06, "loss": 0.0749, "num_input_tokens_seen": 121857952, "step": 1350 }, { "epoch": 6.206896551724138, "loss": 0.08160283416509628, "loss_ce": 1.4093273421167396e-05, "loss_iou": 0.31640625, "loss_num": 0.016357421875, "loss_xval": 0.08154296875, "num_input_tokens_seen": 121857952, "step": 1350 }, { "epoch": 6.211494252873563, "grad_norm": 7.3910687621686115, "learning_rate": 5e-06, "loss": 0.1011, "num_input_tokens_seen": 121948348, "step": 1351 }, { "epoch": 6.211494252873563, "loss": 0.1298474818468094, "loss_ce": 4.096345946891233e-05, "loss_iou": 0.2734375, "loss_num": 0.0260009765625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 121948348, "step": 1351 }, { "epoch": 6.216091954022988, "grad_norm": 3.477775072702562, "learning_rate": 5e-06, "loss": 0.0811, "num_input_tokens_seen": 122038552, "step": 1352 }, { "epoch": 6.216091954022988, "loss": 0.06851699203252792, "loss_ce": 5.026361122872913e-06, "loss_iou": 0.27734375, "loss_num": 0.01373291015625, "loss_xval": 0.068359375, "num_input_tokens_seen": 122038552, "step": 1352 }, { "epoch": 6.220689655172414, "grad_norm": 4.659650631220517, "learning_rate": 5e-06, "loss": 0.1012, "num_input_tokens_seen": 122128880, "step": 1353 }, { "epoch": 6.220689655172414, "loss": 0.09352925419807434, "loss_ce": 2.3392831280943938e-05, "loss_iou": 0.361328125, "loss_num": 0.0186767578125, "loss_xval": 0.09375, "num_input_tokens_seen": 122128880, "step": 1353 }, { "epoch": 6.225287356321839, "grad_norm": 7.501436100973512, "learning_rate": 5e-06, "loss": 0.0835, "num_input_tokens_seen": 122219280, "step": 1354 }, { "epoch": 6.225287356321839, "loss": 0.11285711824893951, "loss_ce": 1.0735830983321648e-05, "loss_iou": 0.35546875, "loss_num": 0.0225830078125, "loss_xval": 0.11279296875, "num_input_tokens_seen": 122219280, "step": 1354 }, { "epoch": 6.2298850574712645, "grad_norm": 7.168147699957736, "learning_rate": 5e-06, "loss": 0.0745, "num_input_tokens_seen": 122308872, "step": 1355 }, { "epoch": 6.2298850574712645, "loss": 0.07650821655988693, "loss_ce": 6.474406859524606e-07, "loss_iou": 0.3515625, "loss_num": 0.01531982421875, "loss_xval": 0.07666015625, "num_input_tokens_seen": 122308872, "step": 1355 }, { "epoch": 6.23448275862069, "grad_norm": 2.803107500761448, "learning_rate": 5e-06, "loss": 0.1236, "num_input_tokens_seen": 122399160, "step": 1356 }, { "epoch": 6.23448275862069, "loss": 0.17111368477344513, "loss_ce": 1.622533659428882e-06, "loss_iou": 0.380859375, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 122399160, "step": 1356 }, { "epoch": 6.239080459770115, "grad_norm": 1.8395032972537242, "learning_rate": 5e-06, "loss": 0.0541, "num_input_tokens_seen": 122489652, "step": 1357 }, { "epoch": 6.239080459770115, "loss": 0.07030150294303894, "loss_ce": 4.265766619937494e-06, "loss_iou": 0.353515625, "loss_num": 0.0140380859375, "loss_xval": 0.0703125, "num_input_tokens_seen": 122489652, "step": 1357 }, { "epoch": 6.24367816091954, "grad_norm": 1.8235275088698975, "learning_rate": 5e-06, "loss": 0.0971, "num_input_tokens_seen": 122580024, "step": 1358 }, { "epoch": 6.24367816091954, "loss": 0.05960645154118538, "loss_ce": 5.619423973257653e-06, "loss_iou": 0.3046875, "loss_num": 0.01190185546875, "loss_xval": 0.0595703125, "num_input_tokens_seen": 122580024, "step": 1358 }, { "epoch": 6.248275862068965, "grad_norm": 2.3308796814869392, "learning_rate": 5e-06, "loss": 0.0665, "num_input_tokens_seen": 122670360, "step": 1359 }, { "epoch": 6.248275862068965, "loss": 0.08280164748430252, "loss_ce": 7.4597710408852436e-06, "loss_iou": 0.298828125, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 122670360, "step": 1359 }, { "epoch": 6.252873563218391, "grad_norm": 9.546289417744838, "learning_rate": 5e-06, "loss": 0.0745, "num_input_tokens_seen": 122760712, "step": 1360 }, { "epoch": 6.252873563218391, "loss": 0.06621609628200531, "loss_ce": 8.209115549107082e-06, "loss_iou": 0.306640625, "loss_num": 0.01324462890625, "loss_xval": 0.06640625, "num_input_tokens_seen": 122760712, "step": 1360 }, { "epoch": 6.257471264367816, "grad_norm": 21.086995926043166, "learning_rate": 5e-06, "loss": 0.083, "num_input_tokens_seen": 122850908, "step": 1361 }, { "epoch": 6.257471264367816, "loss": 0.08541100472211838, "loss_ce": 7.5566276791505516e-06, "loss_iou": 0.404296875, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 122850908, "step": 1361 }, { "epoch": 6.2620689655172415, "grad_norm": 9.01280263169444, "learning_rate": 5e-06, "loss": 0.1113, "num_input_tokens_seen": 122941356, "step": 1362 }, { "epoch": 6.2620689655172415, "loss": 0.1276589035987854, "loss_ce": 3.869728971039876e-06, "loss_iou": 0.31640625, "loss_num": 0.0255126953125, "loss_xval": 0.1279296875, "num_input_tokens_seen": 122941356, "step": 1362 }, { "epoch": 6.266666666666667, "grad_norm": 20.135580773469314, "learning_rate": 5e-06, "loss": 0.136, "num_input_tokens_seen": 123031748, "step": 1363 }, { "epoch": 6.266666666666667, "loss": 0.10605251789093018, "loss_ce": 3.935991117032245e-06, "loss_iou": 0.431640625, "loss_num": 0.021240234375, "loss_xval": 0.10595703125, "num_input_tokens_seen": 123031748, "step": 1363 }, { "epoch": 6.271264367816092, "grad_norm": 13.434332845560562, "learning_rate": 5e-06, "loss": 0.0927, "num_input_tokens_seen": 123121280, "step": 1364 }, { "epoch": 6.271264367816092, "loss": 0.07541698217391968, "loss_ce": 8.042437002586666e-06, "loss_iou": 0.38671875, "loss_num": 0.01507568359375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 123121280, "step": 1364 }, { "epoch": 6.275862068965517, "grad_norm": 4.409333354948716, "learning_rate": 5e-06, "loss": 0.1115, "num_input_tokens_seen": 123211620, "step": 1365 }, { "epoch": 6.275862068965517, "loss": 0.10695058107376099, "loss_ce": 1.728949655444012e-06, "loss_iou": 0.333984375, "loss_num": 0.0213623046875, "loss_xval": 0.10693359375, "num_input_tokens_seen": 123211620, "step": 1365 }, { "epoch": 6.280459770114943, "grad_norm": 3.154737625505015, "learning_rate": 5e-06, "loss": 0.075, "num_input_tokens_seen": 123301896, "step": 1366 }, { "epoch": 6.280459770114943, "loss": 0.07635073363780975, "loss_ce": 1.1014501978934277e-05, "loss_iou": 0.328125, "loss_num": 0.0152587890625, "loss_xval": 0.076171875, "num_input_tokens_seen": 123301896, "step": 1366 }, { "epoch": 6.285057471264368, "grad_norm": 2.211275058736632, "learning_rate": 5e-06, "loss": 0.1017, "num_input_tokens_seen": 123392472, "step": 1367 }, { "epoch": 6.285057471264368, "loss": 0.07074186205863953, "loss_ce": 1.7377531548845582e-05, "loss_iou": 0.314453125, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 123392472, "step": 1367 }, { "epoch": 6.289655172413793, "grad_norm": 10.219890906499858, "learning_rate": 5e-06, "loss": 0.091, "num_input_tokens_seen": 123482900, "step": 1368 }, { "epoch": 6.289655172413793, "loss": 0.07744231820106506, "loss_ce": 3.968204964621691e-06, "loss_iou": 0.365234375, "loss_num": 0.0155029296875, "loss_xval": 0.07763671875, "num_input_tokens_seen": 123482900, "step": 1368 }, { "epoch": 6.294252873563218, "grad_norm": 5.327344530549374, "learning_rate": 5e-06, "loss": 0.0759, "num_input_tokens_seen": 123573300, "step": 1369 }, { "epoch": 6.294252873563218, "loss": 0.09067036956548691, "loss_ce": 9.419472189620137e-05, "loss_iou": 0.37890625, "loss_num": 0.0181884765625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 123573300, "step": 1369 }, { "epoch": 6.2988505747126435, "grad_norm": 9.772421156940164, "learning_rate": 5e-06, "loss": 0.0702, "num_input_tokens_seen": 123663696, "step": 1370 }, { "epoch": 6.2988505747126435, "loss": 0.0640704333782196, "loss_ce": 2.9297052606125362e-05, "loss_iou": 0.37890625, "loss_num": 0.0128173828125, "loss_xval": 0.06396484375, "num_input_tokens_seen": 123663696, "step": 1370 }, { "epoch": 6.303448275862069, "grad_norm": 18.114247114460188, "learning_rate": 5e-06, "loss": 0.0684, "num_input_tokens_seen": 123754076, "step": 1371 }, { "epoch": 6.303448275862069, "loss": 0.07899777591228485, "loss_ce": 3.0194664759619627e-06, "loss_iou": 0.35546875, "loss_num": 0.015869140625, "loss_xval": 0.0791015625, "num_input_tokens_seen": 123754076, "step": 1371 }, { "epoch": 6.308045977011494, "grad_norm": 3.332469449720361, "learning_rate": 5e-06, "loss": 0.0676, "num_input_tokens_seen": 123844508, "step": 1372 }, { "epoch": 6.308045977011494, "loss": 0.06758559495210648, "loss_ce": 4.417397576617077e-06, "loss_iou": 0.275390625, "loss_num": 0.01348876953125, "loss_xval": 0.0673828125, "num_input_tokens_seen": 123844508, "step": 1372 }, { "epoch": 6.31264367816092, "grad_norm": 6.634405744544336, "learning_rate": 5e-06, "loss": 0.0945, "num_input_tokens_seen": 123934812, "step": 1373 }, { "epoch": 6.31264367816092, "loss": 0.08992957323789597, "loss_ce": 9.527670954412315e-06, "loss_iou": 0.33984375, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 123934812, "step": 1373 }, { "epoch": 6.317241379310345, "grad_norm": 10.333106801799953, "learning_rate": 5e-06, "loss": 0.0718, "num_input_tokens_seen": 124025200, "step": 1374 }, { "epoch": 6.317241379310345, "loss": 0.04593438282608986, "loss_ce": 2.068851244985126e-05, "loss_iou": 0.326171875, "loss_num": 0.0091552734375, "loss_xval": 0.0458984375, "num_input_tokens_seen": 124025200, "step": 1374 }, { "epoch": 6.32183908045977, "grad_norm": 6.513003814868987, "learning_rate": 5e-06, "loss": 0.0896, "num_input_tokens_seen": 124115436, "step": 1375 }, { "epoch": 6.32183908045977, "loss": 0.07307623326778412, "loss_ce": 1.8943998156828457e-06, "loss_iou": 0.29296875, "loss_num": 0.0146484375, "loss_xval": 0.0732421875, "num_input_tokens_seen": 124115436, "step": 1375 }, { "epoch": 6.326436781609195, "grad_norm": 7.020049530325385, "learning_rate": 5e-06, "loss": 0.0601, "num_input_tokens_seen": 124205816, "step": 1376 }, { "epoch": 6.326436781609195, "loss": 0.062137503176927567, "loss_ce": 3.71252508557518e-06, "loss_iou": 0.314453125, "loss_num": 0.012451171875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 124205816, "step": 1376 }, { "epoch": 6.3310344827586205, "grad_norm": 6.7984756030733005, "learning_rate": 5e-06, "loss": 0.0769, "num_input_tokens_seen": 124296160, "step": 1377 }, { "epoch": 6.3310344827586205, "loss": 0.09830499440431595, "loss_ce": 6.89133012201637e-05, "loss_iou": 0.375, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 124296160, "step": 1377 }, { "epoch": 6.335632183908046, "grad_norm": 9.550154500204966, "learning_rate": 5e-06, "loss": 0.0343, "num_input_tokens_seen": 124386652, "step": 1378 }, { "epoch": 6.335632183908046, "loss": 0.032549649477005005, "loss_ce": 2.6519669518165756e-06, "loss_iou": 0.29296875, "loss_num": 0.006500244140625, "loss_xval": 0.032470703125, "num_input_tokens_seen": 124386652, "step": 1378 }, { "epoch": 6.340229885057472, "grad_norm": 15.868279771186451, "learning_rate": 5e-06, "loss": 0.1227, "num_input_tokens_seen": 124477136, "step": 1379 }, { "epoch": 6.340229885057472, "loss": 0.10919815301895142, "loss_ce": 6.263924660743214e-06, "loss_iou": 0.244140625, "loss_num": 0.0218505859375, "loss_xval": 0.109375, "num_input_tokens_seen": 124477136, "step": 1379 }, { "epoch": 6.344827586206897, "grad_norm": 9.149745216189178, "learning_rate": 5e-06, "loss": 0.0605, "num_input_tokens_seen": 124567584, "step": 1380 }, { "epoch": 6.344827586206897, "loss": 0.043360911309719086, "loss_ce": 1.0688753718568478e-05, "loss_iou": 0.255859375, "loss_num": 0.0086669921875, "loss_xval": 0.04345703125, "num_input_tokens_seen": 124567584, "step": 1380 }, { "epoch": 6.349425287356322, "grad_norm": 6.148379301187082, "learning_rate": 5e-06, "loss": 0.0698, "num_input_tokens_seen": 124657880, "step": 1381 }, { "epoch": 6.349425287356322, "loss": 0.07773515582084656, "loss_ce": 6.885257789690513e-06, "loss_iou": 0.3671875, "loss_num": 0.0155029296875, "loss_xval": 0.07763671875, "num_input_tokens_seen": 124657880, "step": 1381 }, { "epoch": 6.354022988505747, "grad_norm": 6.06320096249193, "learning_rate": 5e-06, "loss": 0.0709, "num_input_tokens_seen": 124748200, "step": 1382 }, { "epoch": 6.354022988505747, "loss": 0.08277206867933273, "loss_ce": 8.399106263823342e-06, "loss_iou": 0.333984375, "loss_num": 0.0166015625, "loss_xval": 0.0830078125, "num_input_tokens_seen": 124748200, "step": 1382 }, { "epoch": 6.358620689655172, "grad_norm": 6.051287795431972, "learning_rate": 5e-06, "loss": 0.0837, "num_input_tokens_seen": 124838752, "step": 1383 }, { "epoch": 6.358620689655172, "loss": 0.056419774889945984, "loss_ce": 8.03236889623804e-06, "loss_iou": 0.35546875, "loss_num": 0.01129150390625, "loss_xval": 0.056396484375, "num_input_tokens_seen": 124838752, "step": 1383 }, { "epoch": 6.363218390804597, "grad_norm": 13.7747801267887, "learning_rate": 5e-06, "loss": 0.1003, "num_input_tokens_seen": 124929112, "step": 1384 }, { "epoch": 6.363218390804597, "loss": 0.07443118095397949, "loss_ce": 7.509520219173282e-05, "loss_iou": 0.302734375, "loss_num": 0.014892578125, "loss_xval": 0.07421875, "num_input_tokens_seen": 124929112, "step": 1384 }, { "epoch": 6.3678160919540225, "grad_norm": 2.09029899827099, "learning_rate": 5e-06, "loss": 0.0845, "num_input_tokens_seen": 125019604, "step": 1385 }, { "epoch": 6.3678160919540225, "loss": 0.08999022096395493, "loss_ce": 9.14329484658083e-06, "loss_iou": 0.365234375, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 125019604, "step": 1385 }, { "epoch": 6.372413793103449, "grad_norm": 4.966748140294823, "learning_rate": 5e-06, "loss": 0.1045, "num_input_tokens_seen": 125109988, "step": 1386 }, { "epoch": 6.372413793103449, "loss": 0.08931561559438705, "loss_ce": 5.922461241425481e-06, "loss_iou": 0.298828125, "loss_num": 0.017822265625, "loss_xval": 0.08935546875, "num_input_tokens_seen": 125109988, "step": 1386 }, { "epoch": 6.377011494252874, "grad_norm": 1.8678820550766893, "learning_rate": 5e-06, "loss": 0.0808, "num_input_tokens_seen": 125200440, "step": 1387 }, { "epoch": 6.377011494252874, "loss": 0.0770668312907219, "loss_ce": 5.572132067754865e-05, "loss_iou": 0.283203125, "loss_num": 0.01544189453125, "loss_xval": 0.0771484375, "num_input_tokens_seen": 125200440, "step": 1387 }, { "epoch": 6.381609195402299, "grad_norm": 16.6653828543424, "learning_rate": 5e-06, "loss": 0.0891, "num_input_tokens_seen": 125290784, "step": 1388 }, { "epoch": 6.381609195402299, "loss": 0.09468194842338562, "loss_ce": 1.167914092548017e-06, "loss_iou": 0.25, "loss_num": 0.0189208984375, "loss_xval": 0.0947265625, "num_input_tokens_seen": 125290784, "step": 1388 }, { "epoch": 6.386206896551724, "grad_norm": 9.34884327902768, "learning_rate": 5e-06, "loss": 0.0721, "num_input_tokens_seen": 125381168, "step": 1389 }, { "epoch": 6.386206896551724, "loss": 0.060101285576820374, "loss_ce": 2.7430016416474245e-05, "loss_iou": 0.30078125, "loss_num": 0.01202392578125, "loss_xval": 0.06005859375, "num_input_tokens_seen": 125381168, "step": 1389 }, { "epoch": 6.390804597701149, "grad_norm": 4.121494614520851, "learning_rate": 5e-06, "loss": 0.0703, "num_input_tokens_seen": 125471516, "step": 1390 }, { "epoch": 6.390804597701149, "loss": 0.09581901133060455, "loss_ce": 2.433588269923348e-05, "loss_iou": 0.291015625, "loss_num": 0.0191650390625, "loss_xval": 0.095703125, "num_input_tokens_seen": 125471516, "step": 1390 }, { "epoch": 6.395402298850574, "grad_norm": 14.675052033868381, "learning_rate": 5e-06, "loss": 0.1022, "num_input_tokens_seen": 125561880, "step": 1391 }, { "epoch": 6.395402298850574, "loss": 0.1025380939245224, "loss_ce": 4.480792267713696e-05, "loss_iou": 0.251953125, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 125561880, "step": 1391 }, { "epoch": 6.4, "grad_norm": 7.391101398441527, "learning_rate": 5e-06, "loss": 0.0976, "num_input_tokens_seen": 125651460, "step": 1392 }, { "epoch": 6.4, "loss": 0.1120477020740509, "loss_ce": 2.4182809283956885e-06, "loss_iou": 0.345703125, "loss_num": 0.0224609375, "loss_xval": 0.11181640625, "num_input_tokens_seen": 125651460, "step": 1392 }, { "epoch": 6.4045977011494255, "grad_norm": 4.598424061793132, "learning_rate": 5e-06, "loss": 0.084, "num_input_tokens_seen": 125741836, "step": 1393 }, { "epoch": 6.4045977011494255, "loss": 0.08984687924385071, "loss_ce": 3.1254223813448334e-06, "loss_iou": 0.296875, "loss_num": 0.0179443359375, "loss_xval": 0.08984375, "num_input_tokens_seen": 125741836, "step": 1393 }, { "epoch": 6.409195402298851, "grad_norm": 10.543183735469755, "learning_rate": 5e-06, "loss": 0.0705, "num_input_tokens_seen": 125832188, "step": 1394 }, { "epoch": 6.409195402298851, "loss": 0.06207028031349182, "loss_ce": 1.2784106729668565e-05, "loss_iou": 0.306640625, "loss_num": 0.01239013671875, "loss_xval": 0.06201171875, "num_input_tokens_seen": 125832188, "step": 1394 }, { "epoch": 6.413793103448276, "grad_norm": 11.282419963710336, "learning_rate": 5e-06, "loss": 0.0785, "num_input_tokens_seen": 125922616, "step": 1395 }, { "epoch": 6.413793103448276, "loss": 0.09065688401460648, "loss_ce": 4.416605861479184e-06, "loss_iou": 0.359375, "loss_num": 0.0181884765625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 125922616, "step": 1395 }, { "epoch": 6.418390804597701, "grad_norm": 6.53067476157625, "learning_rate": 5e-06, "loss": 0.1083, "num_input_tokens_seen": 126012968, "step": 1396 }, { "epoch": 6.418390804597701, "loss": 0.11000474542379379, "loss_ce": 4.139963493798859e-06, "loss_iou": 0.408203125, "loss_num": 0.02197265625, "loss_xval": 0.10986328125, "num_input_tokens_seen": 126012968, "step": 1396 }, { "epoch": 6.422988505747126, "grad_norm": 5.213448872545618, "learning_rate": 5e-06, "loss": 0.0968, "num_input_tokens_seen": 126102532, "step": 1397 }, { "epoch": 6.422988505747126, "loss": 0.10766983777284622, "loss_ce": 3.8197222238522954e-06, "loss_iou": 0.3515625, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 126102532, "step": 1397 }, { "epoch": 6.427586206896552, "grad_norm": 19.99194183396297, "learning_rate": 5e-06, "loss": 0.069, "num_input_tokens_seen": 126192844, "step": 1398 }, { "epoch": 6.427586206896552, "loss": 0.07072758674621582, "loss_ce": 3.0986652745923493e-06, "loss_iou": 0.271484375, "loss_num": 0.01416015625, "loss_xval": 0.07080078125, "num_input_tokens_seen": 126192844, "step": 1398 }, { "epoch": 6.432183908045977, "grad_norm": 2.972490756323674, "learning_rate": 5e-06, "loss": 0.0783, "num_input_tokens_seen": 126282472, "step": 1399 }, { "epoch": 6.432183908045977, "loss": 0.0742267370223999, "loss_ce": 5.376310582505539e-05, "loss_iou": 0.279296875, "loss_num": 0.01483154296875, "loss_xval": 0.07421875, "num_input_tokens_seen": 126282472, "step": 1399 }, { "epoch": 6.436781609195402, "grad_norm": 3.093510331762982, "learning_rate": 5e-06, "loss": 0.0807, "num_input_tokens_seen": 126372972, "step": 1400 }, { "epoch": 6.436781609195402, "loss": 0.06427352130413055, "loss_ce": 1.876192618510686e-05, "loss_iou": 0.322265625, "loss_num": 0.0128173828125, "loss_xval": 0.064453125, "num_input_tokens_seen": 126372972, "step": 1400 }, { "epoch": 6.441379310344828, "grad_norm": 8.92440544924096, "learning_rate": 5e-06, "loss": 0.092, "num_input_tokens_seen": 126463292, "step": 1401 }, { "epoch": 6.441379310344828, "loss": 0.07695218920707703, "loss_ce": 2.11300380215107e-06, "loss_iou": 0.314453125, "loss_num": 0.015380859375, "loss_xval": 0.0771484375, "num_input_tokens_seen": 126463292, "step": 1401 }, { "epoch": 6.445977011494253, "grad_norm": 5.049167412556098, "learning_rate": 5e-06, "loss": 0.096, "num_input_tokens_seen": 126553568, "step": 1402 }, { "epoch": 6.445977011494253, "loss": 0.12945988774299622, "loss_ce": 4.316266313253436e-06, "loss_iou": 0.26953125, "loss_num": 0.02587890625, "loss_xval": 0.1298828125, "num_input_tokens_seen": 126553568, "step": 1402 }, { "epoch": 6.450574712643678, "grad_norm": 7.016975178744314, "learning_rate": 5e-06, "loss": 0.0665, "num_input_tokens_seen": 126643956, "step": 1403 }, { "epoch": 6.450574712643678, "loss": 0.0685601681470871, "loss_ce": 2.4308599222422345e-06, "loss_iou": 0.328125, "loss_num": 0.01373291015625, "loss_xval": 0.068359375, "num_input_tokens_seen": 126643956, "step": 1403 }, { "epoch": 6.455172413793104, "grad_norm": 4.991828966505337, "learning_rate": 5e-06, "loss": 0.1069, "num_input_tokens_seen": 126734232, "step": 1404 }, { "epoch": 6.455172413793104, "loss": 0.13235914707183838, "loss_ce": 4.41625525127165e-06, "loss_iou": 0.28515625, "loss_num": 0.0264892578125, "loss_xval": 0.1328125, "num_input_tokens_seen": 126734232, "step": 1404 }, { "epoch": 6.459770114942529, "grad_norm": 4.41915443276764, "learning_rate": 5e-06, "loss": 0.0643, "num_input_tokens_seen": 126824688, "step": 1405 }, { "epoch": 6.459770114942529, "loss": 0.05354461073875427, "loss_ce": 1.52462337155157e-06, "loss_iou": 0.298828125, "loss_num": 0.0107421875, "loss_xval": 0.053466796875, "num_input_tokens_seen": 126824688, "step": 1405 }, { "epoch": 6.464367816091954, "grad_norm": 27.333175354486055, "learning_rate": 5e-06, "loss": 0.0835, "num_input_tokens_seen": 126914992, "step": 1406 }, { "epoch": 6.464367816091954, "loss": 0.08278225362300873, "loss_ce": 3.324659246572992e-06, "loss_iou": 0.25390625, "loss_num": 0.0164794921875, "loss_xval": 0.0830078125, "num_input_tokens_seen": 126914992, "step": 1406 }, { "epoch": 6.468965517241379, "grad_norm": 15.2120482775101, "learning_rate": 5e-06, "loss": 0.0878, "num_input_tokens_seen": 127005376, "step": 1407 }, { "epoch": 6.468965517241379, "loss": 0.07776758074760437, "loss_ce": 8.793870620138478e-06, "loss_iou": 0.306640625, "loss_num": 0.01556396484375, "loss_xval": 0.07763671875, "num_input_tokens_seen": 127005376, "step": 1407 }, { "epoch": 6.4735632183908045, "grad_norm": 5.4209480783540585, "learning_rate": 5e-06, "loss": 0.0926, "num_input_tokens_seen": 127094892, "step": 1408 }, { "epoch": 6.4735632183908045, "loss": 0.11467273533344269, "loss_ce": 2.9248494683997706e-06, "loss_iou": 0.333984375, "loss_num": 0.02294921875, "loss_xval": 0.11474609375, "num_input_tokens_seen": 127094892, "step": 1408 }, { "epoch": 6.47816091954023, "grad_norm": 6.961408738662496, "learning_rate": 5e-06, "loss": 0.1103, "num_input_tokens_seen": 127184420, "step": 1409 }, { "epoch": 6.47816091954023, "loss": 0.08804384618997574, "loss_ce": 6.312624805104861e-07, "loss_iou": 0.294921875, "loss_num": 0.017578125, "loss_xval": 0.087890625, "num_input_tokens_seen": 127184420, "step": 1409 }, { "epoch": 6.482758620689655, "grad_norm": 5.090185336665533, "learning_rate": 5e-06, "loss": 0.0674, "num_input_tokens_seen": 127274716, "step": 1410 }, { "epoch": 6.482758620689655, "loss": 0.09357266128063202, "loss_ce": 2.103199039993342e-05, "loss_iou": 0.294921875, "loss_num": 0.0186767578125, "loss_xval": 0.09375, "num_input_tokens_seen": 127274716, "step": 1410 }, { "epoch": 6.487356321839081, "grad_norm": 3.1966959428594626, "learning_rate": 5e-06, "loss": 0.0994, "num_input_tokens_seen": 127365044, "step": 1411 }, { "epoch": 6.487356321839081, "loss": 0.04377942159771919, "loss_ce": 1.9543890630302485e-06, "loss_iou": 0.26953125, "loss_num": 0.0087890625, "loss_xval": 0.043701171875, "num_input_tokens_seen": 127365044, "step": 1411 }, { "epoch": 6.491954022988506, "grad_norm": 8.572838699176348, "learning_rate": 5e-06, "loss": 0.0988, "num_input_tokens_seen": 127455440, "step": 1412 }, { "epoch": 6.491954022988506, "loss": 0.09225938469171524, "loss_ce": 4.743013505503768e-06, "loss_iou": 0.29296875, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 127455440, "step": 1412 }, { "epoch": 6.496551724137931, "grad_norm": 13.582941616327785, "learning_rate": 5e-06, "loss": 0.0462, "num_input_tokens_seen": 127545812, "step": 1413 }, { "epoch": 6.496551724137931, "loss": 0.044346556067466736, "loss_ce": 4.514195552474121e-06, "loss_iou": 0.2734375, "loss_num": 0.00885009765625, "loss_xval": 0.04443359375, "num_input_tokens_seen": 127545812, "step": 1413 }, { "epoch": 6.501149425287356, "grad_norm": 8.548817159191486, "learning_rate": 5e-06, "loss": 0.0947, "num_input_tokens_seen": 127636132, "step": 1414 }, { "epoch": 6.501149425287356, "loss": 0.10972800850868225, "loss_ce": 2.04993671104603e-06, "loss_iou": 0.3125, "loss_num": 0.0218505859375, "loss_xval": 0.10986328125, "num_input_tokens_seen": 127636132, "step": 1414 }, { "epoch": 6.505747126436781, "grad_norm": 5.849875972289943, "learning_rate": 5e-06, "loss": 0.0632, "num_input_tokens_seen": 127724944, "step": 1415 }, { "epoch": 6.505747126436781, "loss": 0.04938147962093353, "loss_ce": 4.03994590669754e-06, "loss_iou": 0.3515625, "loss_num": 0.0098876953125, "loss_xval": 0.04931640625, "num_input_tokens_seen": 127724944, "step": 1415 }, { "epoch": 6.510344827586207, "grad_norm": 2.6655195062146237, "learning_rate": 5e-06, "loss": 0.1079, "num_input_tokens_seen": 127815308, "step": 1416 }, { "epoch": 6.510344827586207, "loss": 0.17061986029148102, "loss_ce": 5.711633275495842e-05, "loss_iou": 0.328125, "loss_num": 0.0341796875, "loss_xval": 0.1708984375, "num_input_tokens_seen": 127815308, "step": 1416 }, { "epoch": 6.514942528735633, "grad_norm": 11.77741222539578, "learning_rate": 5e-06, "loss": 0.0706, "num_input_tokens_seen": 127905652, "step": 1417 }, { "epoch": 6.514942528735633, "loss": 0.06571365892887115, "loss_ce": 9.316358045907691e-06, "loss_iou": 0.2578125, "loss_num": 0.01312255859375, "loss_xval": 0.06591796875, "num_input_tokens_seen": 127905652, "step": 1417 }, { "epoch": 6.519540229885058, "grad_norm": 7.004727703721243, "learning_rate": 5e-06, "loss": 0.0649, "num_input_tokens_seen": 127996084, "step": 1418 }, { "epoch": 6.519540229885058, "loss": 0.0499795600771904, "loss_ce": 7.023472790024243e-06, "loss_iou": 0.28515625, "loss_num": 0.010009765625, "loss_xval": 0.050048828125, "num_input_tokens_seen": 127996084, "step": 1418 }, { "epoch": 6.524137931034483, "grad_norm": 11.550186444604932, "learning_rate": 5e-06, "loss": 0.0998, "num_input_tokens_seen": 128086452, "step": 1419 }, { "epoch": 6.524137931034483, "loss": 0.1050664559006691, "loss_ce": 2.4955639673862606e-05, "loss_iou": 0.29296875, "loss_num": 0.02099609375, "loss_xval": 0.10498046875, "num_input_tokens_seen": 128086452, "step": 1419 }, { "epoch": 6.528735632183908, "grad_norm": 8.09948814876527, "learning_rate": 5e-06, "loss": 0.0952, "num_input_tokens_seen": 128176808, "step": 1420 }, { "epoch": 6.528735632183908, "loss": 0.09395314753055573, "loss_ce": 4.7787834773771465e-06, "loss_iou": 0.314453125, "loss_num": 0.018798828125, "loss_xval": 0.09375, "num_input_tokens_seen": 128176808, "step": 1420 }, { "epoch": 6.533333333333333, "grad_norm": 23.81012046657335, "learning_rate": 5e-06, "loss": 0.085, "num_input_tokens_seen": 128267156, "step": 1421 }, { "epoch": 6.533333333333333, "loss": 0.1023278459906578, "loss_ce": 2.4068172024271917e-06, "loss_iou": 0.322265625, "loss_num": 0.0203857421875, "loss_xval": 0.1025390625, "num_input_tokens_seen": 128267156, "step": 1421 }, { "epoch": 6.537931034482758, "grad_norm": 4.4144379527625315, "learning_rate": 5e-06, "loss": 0.0728, "num_input_tokens_seen": 128357584, "step": 1422 }, { "epoch": 6.537931034482758, "loss": 0.0876457542181015, "loss_ce": 2.978431984956842e-05, "loss_iou": 0.30078125, "loss_num": 0.0174560546875, "loss_xval": 0.08740234375, "num_input_tokens_seen": 128357584, "step": 1422 }, { "epoch": 6.5425287356321835, "grad_norm": 3.0109543272903045, "learning_rate": 5e-06, "loss": 0.0654, "num_input_tokens_seen": 128447236, "step": 1423 }, { "epoch": 6.5425287356321835, "loss": 0.06799499690532684, "loss_ce": 6.287074938882142e-05, "loss_iou": 0.32421875, "loss_num": 0.01361083984375, "loss_xval": 0.06787109375, "num_input_tokens_seen": 128447236, "step": 1423 }, { "epoch": 6.5471264367816095, "grad_norm": 3.6963053002280675, "learning_rate": 5e-06, "loss": 0.0752, "num_input_tokens_seen": 128537560, "step": 1424 }, { "epoch": 6.5471264367816095, "loss": 0.06803764402866364, "loss_ce": 1.3963182937004603e-05, "loss_iou": 0.333984375, "loss_num": 0.01361083984375, "loss_xval": 0.06787109375, "num_input_tokens_seen": 128537560, "step": 1424 }, { "epoch": 6.551724137931035, "grad_norm": 3.0893766848228514, "learning_rate": 5e-06, "loss": 0.0874, "num_input_tokens_seen": 128627956, "step": 1425 }, { "epoch": 6.551724137931035, "loss": 0.0690060555934906, "loss_ce": 2.1069548893137835e-05, "loss_iou": 0.3359375, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 128627956, "step": 1425 }, { "epoch": 6.55632183908046, "grad_norm": 2.559876796681711, "learning_rate": 5e-06, "loss": 0.0667, "num_input_tokens_seen": 128718388, "step": 1426 }, { "epoch": 6.55632183908046, "loss": 0.08054003119468689, "loss_ce": 4.145399088884005e-06, "loss_iou": 0.333984375, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 128718388, "step": 1426 }, { "epoch": 6.560919540229885, "grad_norm": 13.711469163719162, "learning_rate": 5e-06, "loss": 0.1533, "num_input_tokens_seen": 128808732, "step": 1427 }, { "epoch": 6.560919540229885, "loss": 0.1875627636909485, "loss_ce": 3.2246465707430616e-05, "loss_iou": 0.30078125, "loss_num": 0.03759765625, "loss_xval": 0.1875, "num_input_tokens_seen": 128808732, "step": 1427 }, { "epoch": 6.56551724137931, "grad_norm": 8.158037791630148, "learning_rate": 5e-06, "loss": 0.0782, "num_input_tokens_seen": 128898284, "step": 1428 }, { "epoch": 6.56551724137931, "loss": 0.08450591564178467, "loss_ce": 2.7392322863306617e-06, "loss_iou": 0.2578125, "loss_num": 0.016845703125, "loss_xval": 0.08447265625, "num_input_tokens_seen": 128898284, "step": 1428 }, { "epoch": 6.570114942528735, "grad_norm": 16.342416907286246, "learning_rate": 5e-06, "loss": 0.0653, "num_input_tokens_seen": 128988672, "step": 1429 }, { "epoch": 6.570114942528735, "loss": 0.06996608525514603, "loss_ce": 4.540259851637529e-06, "loss_iou": 0.328125, "loss_num": 0.01397705078125, "loss_xval": 0.06982421875, "num_input_tokens_seen": 128988672, "step": 1429 }, { "epoch": 6.574712643678161, "grad_norm": 2.9446395510222763, "learning_rate": 5e-06, "loss": 0.0912, "num_input_tokens_seen": 129079076, "step": 1430 }, { "epoch": 6.574712643678161, "loss": 0.07431189715862274, "loss_ce": 6.262610259000212e-05, "loss_iou": 0.298828125, "loss_num": 0.01483154296875, "loss_xval": 0.07421875, "num_input_tokens_seen": 129079076, "step": 1430 }, { "epoch": 6.5793103448275865, "grad_norm": 5.022862929220284, "learning_rate": 5e-06, "loss": 0.0586, "num_input_tokens_seen": 129169404, "step": 1431 }, { "epoch": 6.5793103448275865, "loss": 0.042931437492370605, "loss_ce": 8.465913197142072e-06, "loss_iou": 0.322265625, "loss_num": 0.00860595703125, "loss_xval": 0.04296875, "num_input_tokens_seen": 129169404, "step": 1431 }, { "epoch": 6.583908045977012, "grad_norm": 5.866439214352878, "learning_rate": 5e-06, "loss": 0.0915, "num_input_tokens_seen": 129258248, "step": 1432 }, { "epoch": 6.583908045977012, "loss": 0.11402902007102966, "loss_ce": 1.534522198198829e-05, "loss_iou": 0.3046875, "loss_num": 0.0228271484375, "loss_xval": 0.1142578125, "num_input_tokens_seen": 129258248, "step": 1432 }, { "epoch": 6.588505747126437, "grad_norm": 2.4783513803938586, "learning_rate": 5e-06, "loss": 0.0603, "num_input_tokens_seen": 129348620, "step": 1433 }, { "epoch": 6.588505747126437, "loss": 0.0649535059928894, "loss_ce": 4.467387498152675e-06, "loss_iou": 0.322265625, "loss_num": 0.01300048828125, "loss_xval": 0.06494140625, "num_input_tokens_seen": 129348620, "step": 1433 }, { "epoch": 6.593103448275862, "grad_norm": 44.36894368647196, "learning_rate": 5e-06, "loss": 0.0781, "num_input_tokens_seen": 129439104, "step": 1434 }, { "epoch": 6.593103448275862, "loss": 0.06779413670301437, "loss_ce": 6.966247383388691e-06, "loss_iou": 0.25, "loss_num": 0.0135498046875, "loss_xval": 0.06787109375, "num_input_tokens_seen": 129439104, "step": 1434 }, { "epoch": 6.597701149425287, "grad_norm": 6.9878597014692225, "learning_rate": 5e-06, "loss": 0.0779, "num_input_tokens_seen": 129529500, "step": 1435 }, { "epoch": 6.597701149425287, "loss": 0.06153077632188797, "loss_ce": 7.337920578720514e-06, "loss_iou": 0.318359375, "loss_num": 0.0123291015625, "loss_xval": 0.0615234375, "num_input_tokens_seen": 129529500, "step": 1435 }, { "epoch": 6.602298850574712, "grad_norm": 14.338290986997505, "learning_rate": 5e-06, "loss": 0.0907, "num_input_tokens_seen": 129619932, "step": 1436 }, { "epoch": 6.602298850574712, "loss": 0.09894341975450516, "loss_ce": 3.59489640686661e-05, "loss_iou": 0.322265625, "loss_num": 0.019775390625, "loss_xval": 0.09912109375, "num_input_tokens_seen": 129619932, "step": 1436 }, { "epoch": 6.606896551724138, "grad_norm": 8.118572650309352, "learning_rate": 5e-06, "loss": 0.0468, "num_input_tokens_seen": 129710368, "step": 1437 }, { "epoch": 6.606896551724138, "loss": 0.05395686253905296, "loss_ce": 1.782671802175173e-06, "loss_iou": 0.357421875, "loss_num": 0.01080322265625, "loss_xval": 0.053955078125, "num_input_tokens_seen": 129710368, "step": 1437 }, { "epoch": 6.611494252873563, "grad_norm": 13.176137538777503, "learning_rate": 5e-06, "loss": 0.0747, "num_input_tokens_seen": 129800728, "step": 1438 }, { "epoch": 6.611494252873563, "loss": 0.10741189867258072, "loss_ce": 2.0546713130897842e-05, "loss_iou": 0.2734375, "loss_num": 0.021484375, "loss_xval": 0.107421875, "num_input_tokens_seen": 129800728, "step": 1438 }, { "epoch": 6.6160919540229886, "grad_norm": 2.9757478665850132, "learning_rate": 5e-06, "loss": 0.1168, "num_input_tokens_seen": 129891160, "step": 1439 }, { "epoch": 6.6160919540229886, "loss": 0.11056976020336151, "loss_ce": 4.582754172588466e-06, "loss_iou": 0.35546875, "loss_num": 0.0220947265625, "loss_xval": 0.1103515625, "num_input_tokens_seen": 129891160, "step": 1439 }, { "epoch": 6.620689655172414, "grad_norm": 2.0836625866660023, "learning_rate": 5e-06, "loss": 0.093, "num_input_tokens_seen": 129981596, "step": 1440 }, { "epoch": 6.620689655172414, "loss": 0.07989851385354996, "loss_ce": 3.400920468266122e-05, "loss_iou": 0.328125, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 129981596, "step": 1440 }, { "epoch": 6.625287356321839, "grad_norm": 11.346287982435365, "learning_rate": 5e-06, "loss": 0.0822, "num_input_tokens_seen": 130070368, "step": 1441 }, { "epoch": 6.625287356321839, "loss": 0.1127094179391861, "loss_ce": 8.00615543994354e-06, "loss_iou": 0.359375, "loss_num": 0.0225830078125, "loss_xval": 0.11279296875, "num_input_tokens_seen": 130070368, "step": 1441 }, { "epoch": 6.629885057471264, "grad_norm": 11.945688660639014, "learning_rate": 5e-06, "loss": 0.0604, "num_input_tokens_seen": 130160760, "step": 1442 }, { "epoch": 6.629885057471264, "loss": 0.06225637346506119, "loss_ce": 1.5771218386362307e-05, "loss_iou": 0.314453125, "loss_num": 0.012451171875, "loss_xval": 0.062255859375, "num_input_tokens_seen": 130160760, "step": 1442 }, { "epoch": 6.63448275862069, "grad_norm": 5.975406754271047, "learning_rate": 5e-06, "loss": 0.0666, "num_input_tokens_seen": 130251268, "step": 1443 }, { "epoch": 6.63448275862069, "loss": 0.0756722092628479, "loss_ce": 1.9129147403873503e-05, "loss_iou": 0.30078125, "loss_num": 0.01513671875, "loss_xval": 0.07568359375, "num_input_tokens_seen": 130251268, "step": 1443 }, { "epoch": 6.639080459770115, "grad_norm": 4.4111769643184005, "learning_rate": 5e-06, "loss": 0.0914, "num_input_tokens_seen": 130341548, "step": 1444 }, { "epoch": 6.639080459770115, "loss": 0.09213773906230927, "loss_ce": 5.1713404900510795e-06, "loss_iou": 0.275390625, "loss_num": 0.0184326171875, "loss_xval": 0.09228515625, "num_input_tokens_seen": 130341548, "step": 1444 }, { "epoch": 6.64367816091954, "grad_norm": 1.9533172759588555, "learning_rate": 5e-06, "loss": 0.07, "num_input_tokens_seen": 130431868, "step": 1445 }, { "epoch": 6.64367816091954, "loss": 0.03545193374156952, "loss_ce": 5.765972673543729e-06, "loss_iou": 0.2470703125, "loss_num": 0.007080078125, "loss_xval": 0.035400390625, "num_input_tokens_seen": 130431868, "step": 1445 }, { "epoch": 6.6482758620689655, "grad_norm": 1.6643815600000895, "learning_rate": 5e-06, "loss": 0.0661, "num_input_tokens_seen": 130522236, "step": 1446 }, { "epoch": 6.6482758620689655, "loss": 0.06866565346717834, "loss_ce": 1.09890947896929e-06, "loss_iou": 0.287109375, "loss_num": 0.01373291015625, "loss_xval": 0.06884765625, "num_input_tokens_seen": 130522236, "step": 1446 }, { "epoch": 6.652873563218391, "grad_norm": 8.868765611159223, "learning_rate": 5e-06, "loss": 0.0745, "num_input_tokens_seen": 130612552, "step": 1447 }, { "epoch": 6.652873563218391, "loss": 0.07027009129524231, "loss_ce": 3.366212013133918e-06, "loss_iou": 0.3125, "loss_num": 0.0140380859375, "loss_xval": 0.0703125, "num_input_tokens_seen": 130612552, "step": 1447 }, { "epoch": 6.657471264367816, "grad_norm": 8.270525030687544, "learning_rate": 5e-06, "loss": 0.0658, "num_input_tokens_seen": 130702956, "step": 1448 }, { "epoch": 6.657471264367816, "loss": 0.09026643633842468, "loss_ce": 1.069144764187513e-05, "loss_iou": 0.33984375, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 130702956, "step": 1448 }, { "epoch": 6.662068965517241, "grad_norm": 3.3647597113536123, "learning_rate": 5e-06, "loss": 0.0854, "num_input_tokens_seen": 130793328, "step": 1449 }, { "epoch": 6.662068965517241, "loss": 0.09332942962646484, "loss_ce": 6.675434633507393e-06, "loss_iou": 0.38671875, "loss_num": 0.0186767578125, "loss_xval": 0.09326171875, "num_input_tokens_seen": 130793328, "step": 1449 }, { "epoch": 6.666666666666667, "grad_norm": 131.97107940993814, "learning_rate": 5e-06, "loss": 0.0658, "num_input_tokens_seen": 130883796, "step": 1450 }, { "epoch": 6.666666666666667, "loss": 0.057534217834472656, "loss_ce": 2.383916944381781e-05, "loss_iou": 0.30078125, "loss_num": 0.01153564453125, "loss_xval": 0.0576171875, "num_input_tokens_seen": 130883796, "step": 1450 }, { "epoch": 6.671264367816092, "grad_norm": 23.1470764725479, "learning_rate": 5e-06, "loss": 0.0969, "num_input_tokens_seen": 130974220, "step": 1451 }, { "epoch": 6.671264367816092, "loss": 0.09790711104869843, "loss_ce": 6.722117177559994e-06, "loss_iou": 0.326171875, "loss_num": 0.0196533203125, "loss_xval": 0.09765625, "num_input_tokens_seen": 130974220, "step": 1451 }, { "epoch": 6.675862068965517, "grad_norm": 10.16468406745285, "learning_rate": 5e-06, "loss": 0.0803, "num_input_tokens_seen": 131064624, "step": 1452 }, { "epoch": 6.675862068965517, "loss": 0.061509158462285995, "loss_ce": 3.912944157491438e-05, "loss_iou": 0.30859375, "loss_num": 0.01226806640625, "loss_xval": 0.0615234375, "num_input_tokens_seen": 131064624, "step": 1452 }, { "epoch": 6.680459770114942, "grad_norm": 4.911042024533096, "learning_rate": 5e-06, "loss": 0.0926, "num_input_tokens_seen": 131154952, "step": 1453 }, { "epoch": 6.680459770114942, "loss": 0.09288729727268219, "loss_ce": 2.2301146600511856e-05, "loss_iou": 0.375, "loss_num": 0.0185546875, "loss_xval": 0.0927734375, "num_input_tokens_seen": 131154952, "step": 1453 }, { "epoch": 6.685057471264368, "grad_norm": 2.22095720475405, "learning_rate": 5e-06, "loss": 0.0656, "num_input_tokens_seen": 131245408, "step": 1454 }, { "epoch": 6.685057471264368, "loss": 0.07592545449733734, "loss_ce": 5.346221314539434e-06, "loss_iou": 0.361328125, "loss_num": 0.01519775390625, "loss_xval": 0.07568359375, "num_input_tokens_seen": 131245408, "step": 1454 }, { "epoch": 6.689655172413794, "grad_norm": 11.830161052137443, "learning_rate": 5e-06, "loss": 0.0673, "num_input_tokens_seen": 131335812, "step": 1455 }, { "epoch": 6.689655172413794, "loss": 0.06331153213977814, "loss_ce": 3.3334035833831877e-05, "loss_iou": 0.33984375, "loss_num": 0.01263427734375, "loss_xval": 0.0634765625, "num_input_tokens_seen": 131335812, "step": 1455 }, { "epoch": 6.694252873563219, "grad_norm": 3.0578099363890328, "learning_rate": 5e-06, "loss": 0.0779, "num_input_tokens_seen": 131426204, "step": 1456 }, { "epoch": 6.694252873563219, "loss": 0.07001975178718567, "loss_ce": 1.2430735296220519e-05, "loss_iou": 0.267578125, "loss_num": 0.01397705078125, "loss_xval": 0.06982421875, "num_input_tokens_seen": 131426204, "step": 1456 }, { "epoch": 6.698850574712644, "grad_norm": 5.750992935778732, "learning_rate": 5e-06, "loss": 0.0576, "num_input_tokens_seen": 131516528, "step": 1457 }, { "epoch": 6.698850574712644, "loss": 0.05864344537258148, "loss_ce": 3.921055849787081e-06, "loss_iou": 0.34375, "loss_num": 0.01171875, "loss_xval": 0.05859375, "num_input_tokens_seen": 131516528, "step": 1457 }, { "epoch": 6.703448275862069, "grad_norm": 10.893446380591408, "learning_rate": 5e-06, "loss": 0.1175, "num_input_tokens_seen": 131606848, "step": 1458 }, { "epoch": 6.703448275862069, "loss": 0.11121590435504913, "loss_ce": 9.852826224232558e-06, "loss_iou": 0.322265625, "loss_num": 0.022216796875, "loss_xval": 0.111328125, "num_input_tokens_seen": 131606848, "step": 1458 }, { "epoch": 6.708045977011494, "grad_norm": 27.414007106230482, "learning_rate": 5e-06, "loss": 0.1243, "num_input_tokens_seen": 131697268, "step": 1459 }, { "epoch": 6.708045977011494, "loss": 0.11673033237457275, "loss_ce": 1.5854706362006254e-05, "loss_iou": 0.259765625, "loss_num": 0.0233154296875, "loss_xval": 0.11669921875, "num_input_tokens_seen": 131697268, "step": 1459 }, { "epoch": 6.712643678160919, "grad_norm": 5.804991202555291, "learning_rate": 5e-06, "loss": 0.0791, "num_input_tokens_seen": 131787672, "step": 1460 }, { "epoch": 6.712643678160919, "loss": 0.09766550362110138, "loss_ce": 9.254023098037578e-06, "loss_iou": 0.275390625, "loss_num": 0.01953125, "loss_xval": 0.09765625, "num_input_tokens_seen": 131787672, "step": 1460 }, { "epoch": 6.7172413793103445, "grad_norm": 3.1386099033424135, "learning_rate": 5e-06, "loss": 0.0823, "num_input_tokens_seen": 131878152, "step": 1461 }, { "epoch": 6.7172413793103445, "loss": 0.13510626554489136, "loss_ce": 4.960803835274419e-06, "loss_iou": 0.28515625, "loss_num": 0.027099609375, "loss_xval": 0.134765625, "num_input_tokens_seen": 131878152, "step": 1461 }, { "epoch": 6.72183908045977, "grad_norm": 9.852881694264854, "learning_rate": 5e-06, "loss": 0.0609, "num_input_tokens_seen": 131968480, "step": 1462 }, { "epoch": 6.72183908045977, "loss": 0.06898908317089081, "loss_ce": 4.097350029041991e-06, "loss_iou": 0.328125, "loss_num": 0.0137939453125, "loss_xval": 0.06884765625, "num_input_tokens_seen": 131968480, "step": 1462 }, { "epoch": 6.726436781609196, "grad_norm": 2.7071823032354185, "learning_rate": 5e-06, "loss": 0.0568, "num_input_tokens_seen": 132059024, "step": 1463 }, { "epoch": 6.726436781609196, "loss": 0.059280022978782654, "loss_ce": 1.489080568717327e-05, "loss_iou": 0.3125, "loss_num": 0.0118408203125, "loss_xval": 0.059326171875, "num_input_tokens_seen": 132059024, "step": 1463 }, { "epoch": 6.731034482758621, "grad_norm": 7.777141629876652, "learning_rate": 5e-06, "loss": 0.0897, "num_input_tokens_seen": 132149448, "step": 1464 }, { "epoch": 6.731034482758621, "loss": 0.07515916228294373, "loss_ce": 2.0007742023153696e-06, "loss_iou": 0.337890625, "loss_num": 0.0150146484375, "loss_xval": 0.0751953125, "num_input_tokens_seen": 132149448, "step": 1464 }, { "epoch": 6.735632183908046, "grad_norm": 10.178285238489115, "learning_rate": 5e-06, "loss": 0.0693, "num_input_tokens_seen": 132239800, "step": 1465 }, { "epoch": 6.735632183908046, "loss": 0.08457478135824203, "loss_ce": 7.161090616136789e-05, "loss_iou": 0.328125, "loss_num": 0.016845703125, "loss_xval": 0.08447265625, "num_input_tokens_seen": 132239800, "step": 1465 }, { "epoch": 6.740229885057471, "grad_norm": 6.325965805360473, "learning_rate": 5e-06, "loss": 0.0763, "num_input_tokens_seen": 132330264, "step": 1466 }, { "epoch": 6.740229885057471, "loss": 0.0843062549829483, "loss_ce": 1.4451411516347434e-06, "loss_iou": 0.275390625, "loss_num": 0.016845703125, "loss_xval": 0.08447265625, "num_input_tokens_seen": 132330264, "step": 1466 }, { "epoch": 6.744827586206896, "grad_norm": 6.120131274499989, "learning_rate": 5e-06, "loss": 0.0625, "num_input_tokens_seen": 132420728, "step": 1467 }, { "epoch": 6.744827586206896, "loss": 0.07916628569364548, "loss_ce": 3.6849833122687414e-06, "loss_iou": 0.375, "loss_num": 0.015869140625, "loss_xval": 0.0791015625, "num_input_tokens_seen": 132420728, "step": 1467 }, { "epoch": 6.749425287356322, "grad_norm": 19.37810950613584, "learning_rate": 5e-06, "loss": 0.0693, "num_input_tokens_seen": 132511056, "step": 1468 }, { "epoch": 6.749425287356322, "loss": 0.06741497665643692, "loss_ce": 1.6471570916110068e-06, "loss_iou": 0.353515625, "loss_num": 0.01348876953125, "loss_xval": 0.0673828125, "num_input_tokens_seen": 132511056, "step": 1468 }, { "epoch": 6.7540229885057474, "grad_norm": 1.9425313486502551, "learning_rate": 5e-06, "loss": 0.0781, "num_input_tokens_seen": 132601416, "step": 1469 }, { "epoch": 6.7540229885057474, "loss": 0.09817390143871307, "loss_ce": 1.4117615137365647e-05, "loss_iou": 0.26171875, "loss_num": 0.0196533203125, "loss_xval": 0.09814453125, "num_input_tokens_seen": 132601416, "step": 1469 }, { "epoch": 6.758620689655173, "grad_norm": 2.6250049604837926, "learning_rate": 5e-06, "loss": 0.1059, "num_input_tokens_seen": 132691676, "step": 1470 }, { "epoch": 6.758620689655173, "loss": 0.08522357046604156, "loss_ce": 3.2353859751310665e-06, "loss_iou": 0.421875, "loss_num": 0.01708984375, "loss_xval": 0.08544921875, "num_input_tokens_seen": 132691676, "step": 1470 }, { "epoch": 6.763218390804598, "grad_norm": 2.724148470573844, "learning_rate": 5e-06, "loss": 0.0697, "num_input_tokens_seen": 132782044, "step": 1471 }, { "epoch": 6.763218390804598, "loss": 0.06568065285682678, "loss_ce": 6.820980161137413e-06, "loss_iou": 0.322265625, "loss_num": 0.01318359375, "loss_xval": 0.0654296875, "num_input_tokens_seen": 132782044, "step": 1471 }, { "epoch": 6.767816091954023, "grad_norm": 1.3975437384035634, "learning_rate": 5e-06, "loss": 0.0587, "num_input_tokens_seen": 132872492, "step": 1472 }, { "epoch": 6.767816091954023, "loss": 0.03942399471998215, "loss_ce": 1.0540796210989356e-05, "loss_iou": 0.234375, "loss_num": 0.00787353515625, "loss_xval": 0.039306640625, "num_input_tokens_seen": 132872492, "step": 1472 }, { "epoch": 6.772413793103448, "grad_norm": 3.6657098912800854, "learning_rate": 5e-06, "loss": 0.0834, "num_input_tokens_seen": 132962916, "step": 1473 }, { "epoch": 6.772413793103448, "loss": 0.09079530835151672, "loss_ce": 5.5107075240812264e-06, "loss_iou": 0.330078125, "loss_num": 0.0181884765625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 132962916, "step": 1473 }, { "epoch": 6.777011494252873, "grad_norm": 6.399156287473962, "learning_rate": 5e-06, "loss": 0.0924, "num_input_tokens_seen": 133053228, "step": 1474 }, { "epoch": 6.777011494252873, "loss": 0.10846640169620514, "loss_ce": 6.926821697561536e-06, "loss_iou": 0.30078125, "loss_num": 0.021728515625, "loss_xval": 0.1083984375, "num_input_tokens_seen": 133053228, "step": 1474 }, { "epoch": 6.781609195402299, "grad_norm": 13.807814737472036, "learning_rate": 5e-06, "loss": 0.1142, "num_input_tokens_seen": 133143584, "step": 1475 }, { "epoch": 6.781609195402299, "loss": 0.10274454951286316, "loss_ce": 9.868255438050255e-05, "loss_iou": 0.373046875, "loss_num": 0.0205078125, "loss_xval": 0.1025390625, "num_input_tokens_seen": 133143584, "step": 1475 }, { "epoch": 6.786206896551724, "grad_norm": 4.909907187391009, "learning_rate": 5e-06, "loss": 0.0846, "num_input_tokens_seen": 133233944, "step": 1476 }, { "epoch": 6.786206896551724, "loss": 0.037097539752721786, "loss_ce": 3.4241477351315552e-06, "loss_iou": 0.255859375, "loss_num": 0.007415771484375, "loss_xval": 0.037109375, "num_input_tokens_seen": 133233944, "step": 1476 }, { "epoch": 6.7908045977011495, "grad_norm": 10.088133006314749, "learning_rate": 5e-06, "loss": 0.0748, "num_input_tokens_seen": 133324236, "step": 1477 }, { "epoch": 6.7908045977011495, "loss": 0.07998788356781006, "loss_ce": 1.318014255957678e-06, "loss_iou": 0.380859375, "loss_num": 0.0159912109375, "loss_xval": 0.080078125, "num_input_tokens_seen": 133324236, "step": 1477 }, { "epoch": 6.795402298850575, "grad_norm": 23.2424717637074, "learning_rate": 5e-06, "loss": 0.0614, "num_input_tokens_seen": 133414528, "step": 1478 }, { "epoch": 6.795402298850575, "loss": 0.060220953077077866, "loss_ce": 9.768824384082109e-06, "loss_iou": 0.32421875, "loss_num": 0.01202392578125, "loss_xval": 0.060302734375, "num_input_tokens_seen": 133414528, "step": 1478 }, { "epoch": 6.8, "grad_norm": 5.280653577103845, "learning_rate": 5e-06, "loss": 0.0916, "num_input_tokens_seen": 133504924, "step": 1479 }, { "epoch": 6.8, "loss": 0.09979541599750519, "loss_ce": 2.9406905923679005e-06, "loss_iou": 0.38671875, "loss_num": 0.0198974609375, "loss_xval": 0.099609375, "num_input_tokens_seen": 133504924, "step": 1479 }, { "epoch": 6.804597701149425, "grad_norm": 5.817883669287115, "learning_rate": 5e-06, "loss": 0.078, "num_input_tokens_seen": 133595204, "step": 1480 }, { "epoch": 6.804597701149425, "loss": 0.05428166314959526, "loss_ce": 6.149261480459245e-06, "loss_iou": 0.3203125, "loss_num": 0.0108642578125, "loss_xval": 0.05419921875, "num_input_tokens_seen": 133595204, "step": 1480 }, { "epoch": 6.809195402298851, "grad_norm": 23.814326518568098, "learning_rate": 5e-06, "loss": 0.0782, "num_input_tokens_seen": 133685616, "step": 1481 }, { "epoch": 6.809195402298851, "loss": 0.11151270568370819, "loss_ce": 1.4743751535206684e-06, "loss_iou": 0.26171875, "loss_num": 0.0223388671875, "loss_xval": 0.111328125, "num_input_tokens_seen": 133685616, "step": 1481 }, { "epoch": 6.813793103448276, "grad_norm": 8.881491653313477, "learning_rate": 5e-06, "loss": 0.0439, "num_input_tokens_seen": 133775972, "step": 1482 }, { "epoch": 6.813793103448276, "loss": 0.04635809361934662, "loss_ce": 9.521063475403935e-06, "loss_iou": 0.28125, "loss_num": 0.00927734375, "loss_xval": 0.04638671875, "num_input_tokens_seen": 133775972, "step": 1482 }, { "epoch": 6.818390804597701, "grad_norm": 11.628206791707466, "learning_rate": 5e-06, "loss": 0.076, "num_input_tokens_seen": 133866336, "step": 1483 }, { "epoch": 6.818390804597701, "loss": 0.08055493235588074, "loss_ce": 3.7818542750756023e-06, "loss_iou": 0.291015625, "loss_num": 0.01611328125, "loss_xval": 0.08056640625, "num_input_tokens_seen": 133866336, "step": 1483 }, { "epoch": 6.8229885057471265, "grad_norm": 2.105574615398993, "learning_rate": 5e-06, "loss": 0.0922, "num_input_tokens_seen": 133956784, "step": 1484 }, { "epoch": 6.8229885057471265, "loss": 0.09039635956287384, "loss_ce": 3.2979687603074126e-06, "loss_iou": 0.33203125, "loss_num": 0.01806640625, "loss_xval": 0.09033203125, "num_input_tokens_seen": 133956784, "step": 1484 }, { "epoch": 6.827586206896552, "grad_norm": 3.8079419854599075, "learning_rate": 5e-06, "loss": 0.1018, "num_input_tokens_seen": 134047040, "step": 1485 }, { "epoch": 6.827586206896552, "loss": 0.09102049469947815, "loss_ce": 0.00044431857531890273, "loss_iou": 0.34765625, "loss_num": 0.01806640625, "loss_xval": 0.0908203125, "num_input_tokens_seen": 134047040, "step": 1485 }, { "epoch": 6.832183908045977, "grad_norm": 21.116070086676018, "learning_rate": 5e-06, "loss": 0.1082, "num_input_tokens_seen": 134137528, "step": 1486 }, { "epoch": 6.832183908045977, "loss": 0.0954207181930542, "loss_ce": 7.509706392738735e-06, "loss_iou": 0.31640625, "loss_num": 0.01904296875, "loss_xval": 0.09521484375, "num_input_tokens_seen": 134137528, "step": 1486 }, { "epoch": 6.836781609195402, "grad_norm": 14.756310820481287, "learning_rate": 5e-06, "loss": 0.0697, "num_input_tokens_seen": 134227828, "step": 1487 }, { "epoch": 6.836781609195402, "loss": 0.06289245933294296, "loss_ce": 1.098936172638787e-05, "loss_iou": 0.33203125, "loss_num": 0.0125732421875, "loss_xval": 0.06298828125, "num_input_tokens_seen": 134227828, "step": 1487 }, { "epoch": 6.841379310344828, "grad_norm": 2.276241824617552, "learning_rate": 5e-06, "loss": 0.073, "num_input_tokens_seen": 134318180, "step": 1488 }, { "epoch": 6.841379310344828, "loss": 0.07049933075904846, "loss_ce": 3.7283075471350458e-06, "loss_iou": 0.326171875, "loss_num": 0.01409912109375, "loss_xval": 0.0703125, "num_input_tokens_seen": 134318180, "step": 1488 }, { "epoch": 6.845977011494253, "grad_norm": 9.670203919462505, "learning_rate": 5e-06, "loss": 0.0647, "num_input_tokens_seen": 134408616, "step": 1489 }, { "epoch": 6.845977011494253, "loss": 0.05651029944419861, "loss_ce": 7.007898602751084e-06, "loss_iou": 0.322265625, "loss_num": 0.01129150390625, "loss_xval": 0.056396484375, "num_input_tokens_seen": 134408616, "step": 1489 }, { "epoch": 6.850574712643678, "grad_norm": 4.106681680803513, "learning_rate": 5e-06, "loss": 0.0605, "num_input_tokens_seen": 134498968, "step": 1490 }, { "epoch": 6.850574712643678, "loss": 0.055772751569747925, "loss_ce": 1.8806914567903732e-06, "loss_iou": 0.2353515625, "loss_num": 0.01116943359375, "loss_xval": 0.0556640625, "num_input_tokens_seen": 134498968, "step": 1490 }, { "epoch": 6.855172413793103, "grad_norm": 8.347874999340169, "learning_rate": 5e-06, "loss": 0.0872, "num_input_tokens_seen": 134589296, "step": 1491 }, { "epoch": 6.855172413793103, "loss": 0.10776175558567047, "loss_ce": 1.9452994820312597e-05, "loss_iou": 0.3828125, "loss_num": 0.021484375, "loss_xval": 0.10791015625, "num_input_tokens_seen": 134589296, "step": 1491 }, { "epoch": 6.8597701149425285, "grad_norm": 11.405237946200163, "learning_rate": 5e-06, "loss": 0.0905, "num_input_tokens_seen": 134679776, "step": 1492 }, { "epoch": 6.8597701149425285, "loss": 0.09717725962400436, "loss_ce": 9.286872227676213e-06, "loss_iou": 0.361328125, "loss_num": 0.0194091796875, "loss_xval": 0.09716796875, "num_input_tokens_seen": 134679776, "step": 1492 }, { "epoch": 6.864367816091954, "grad_norm": 3.675422132416597, "learning_rate": 5e-06, "loss": 0.1053, "num_input_tokens_seen": 134769980, "step": 1493 }, { "epoch": 6.864367816091954, "loss": 0.0675213634967804, "loss_ce": 1.2203744290673058e-06, "loss_iou": 0.29296875, "loss_num": 0.01348876953125, "loss_xval": 0.0673828125, "num_input_tokens_seen": 134769980, "step": 1493 }, { "epoch": 6.86896551724138, "grad_norm": 5.2386125622777415, "learning_rate": 5e-06, "loss": 0.0628, "num_input_tokens_seen": 134860216, "step": 1494 }, { "epoch": 6.86896551724138, "loss": 0.07779578119516373, "loss_ce": 2.9361290216911584e-05, "loss_iou": 0.27734375, "loss_num": 0.01556396484375, "loss_xval": 0.07763671875, "num_input_tokens_seen": 134860216, "step": 1494 }, { "epoch": 6.873563218390805, "grad_norm": 3.015206476190183, "learning_rate": 5e-06, "loss": 0.0875, "num_input_tokens_seen": 134950708, "step": 1495 }, { "epoch": 6.873563218390805, "loss": 0.06314820051193237, "loss_ce": 7.3293504101457074e-06, "loss_iou": 0.255859375, "loss_num": 0.01263427734375, "loss_xval": 0.06298828125, "num_input_tokens_seen": 134950708, "step": 1495 }, { "epoch": 6.87816091954023, "grad_norm": 2.4409371869924272, "learning_rate": 5e-06, "loss": 0.0583, "num_input_tokens_seen": 135041104, "step": 1496 }, { "epoch": 6.87816091954023, "loss": 0.046701833605766296, "loss_ce": 9.940345989889465e-06, "loss_iou": 0.271484375, "loss_num": 0.00933837890625, "loss_xval": 0.046630859375, "num_input_tokens_seen": 135041104, "step": 1496 }, { "epoch": 6.882758620689655, "grad_norm": 4.738944993160565, "learning_rate": 5e-06, "loss": 0.0552, "num_input_tokens_seen": 135131460, "step": 1497 }, { "epoch": 6.882758620689655, "loss": 0.04962436109781265, "loss_ce": 2.7832118121295935e-06, "loss_iou": 0.3125, "loss_num": 0.00994873046875, "loss_xval": 0.049560546875, "num_input_tokens_seen": 135131460, "step": 1497 }, { "epoch": 6.88735632183908, "grad_norm": 8.49862390193789, "learning_rate": 5e-06, "loss": 0.0653, "num_input_tokens_seen": 135221032, "step": 1498 }, { "epoch": 6.88735632183908, "loss": 0.041058339178562164, "loss_ce": 4.569673365040217e-06, "loss_iou": 0.2890625, "loss_num": 0.0081787109375, "loss_xval": 0.041015625, "num_input_tokens_seen": 135221032, "step": 1498 }, { "epoch": 6.8919540229885055, "grad_norm": 15.242863888922875, "learning_rate": 5e-06, "loss": 0.064, "num_input_tokens_seen": 135311548, "step": 1499 }, { "epoch": 6.8919540229885055, "loss": 0.06402122974395752, "loss_ce": 1.0608761840558145e-05, "loss_iou": 0.33203125, "loss_num": 0.0128173828125, "loss_xval": 0.06396484375, "num_input_tokens_seen": 135311548, "step": 1499 }, { "epoch": 6.896551724137931, "grad_norm": 2.704452207506696, "learning_rate": 5e-06, "loss": 0.0922, "num_input_tokens_seen": 135401836, "step": 1500 }, { "epoch": 6.896551724137931, "eval_seeclick_CIoU": 0.46876952052116394, "eval_seeclick_GIoU": 0.4556391090154648, "eval_seeclick_IoU": 0.5108004212379456, "eval_seeclick_MAE_all": 0.061132827773690224, "eval_seeclick_MAE_h": 0.04886363446712494, "eval_seeclick_MAE_w": 0.12054737657308578, "eval_seeclick_MAE_x_boxes": 0.10729708895087242, "eval_seeclick_MAE_y_boxes": 0.052372606471180916, "eval_seeclick_NUM_probability": 0.9999991655349731, "eval_seeclick_inside_bbox": 0.7542613744735718, "eval_seeclick_loss": 0.37735113501548767, "eval_seeclick_loss_ce": 0.07988087832927704, "eval_seeclick_loss_iou": 0.4791259765625, "eval_seeclick_loss_num": 0.06346893310546875, "eval_seeclick_loss_xval": 0.317535400390625, "eval_seeclick_runtime": 75.9161, "eval_seeclick_samples_per_second": 0.566, "eval_seeclick_steps_per_second": 0.026, "num_input_tokens_seen": 135401836, "step": 1500 }, { "epoch": 6.896551724137931, "eval_icons_CIoU": 0.6196990609169006, "eval_icons_GIoU": 0.6126499474048615, "eval_icons_IoU": 0.6482784748077393, "eval_icons_MAE_all": 0.03874216973781586, "eval_icons_MAE_h": 0.0633212048560381, "eval_icons_MAE_w": 0.06230618245899677, "eval_icons_MAE_x_boxes": 0.056700803339481354, "eval_icons_MAE_y_boxes": 0.06309299729764462, "eval_icons_NUM_probability": 0.9999997913837433, "eval_icons_inside_bbox": 0.8107638955116272, "eval_icons_loss": 0.1911141723394394, "eval_icons_loss_ce": 1.2011703915959515e-06, "eval_icons_loss_iou": 0.45794677734375, "eval_icons_loss_num": 0.04096221923828125, "eval_icons_loss_xval": 0.2046356201171875, "eval_icons_runtime": 99.4274, "eval_icons_samples_per_second": 0.503, "eval_icons_steps_per_second": 0.02, "num_input_tokens_seen": 135401836, "step": 1500 }, { "epoch": 6.896551724137931, "eval_screenspot_CIoU": 0.4273842175801595, "eval_screenspot_GIoU": 0.4152764479319255, "eval_screenspot_IoU": 0.4939347803592682, "eval_screenspot_MAE_all": 0.083914448817571, "eval_screenspot_MAE_h": 0.07696965336799622, "eval_screenspot_MAE_w": 0.17438126603762308, "eval_screenspot_MAE_x_boxes": 0.1727352738380432, "eval_screenspot_MAE_y_boxes": 0.0719092587629954, "eval_screenspot_NUM_probability": 0.9999995628992716, "eval_screenspot_inside_bbox": 0.7504166762034098, "eval_screenspot_loss": 0.41863083839416504, "eval_screenspot_loss_ce": 4.884489499090705e-05, "eval_screenspot_loss_iou": 0.3917236328125, "eval_screenspot_loss_num": 0.08594767252604167, "eval_screenspot_loss_xval": 0.4297281901041667, "eval_screenspot_runtime": 164.7584, "eval_screenspot_samples_per_second": 0.54, "eval_screenspot_steps_per_second": 0.018, "num_input_tokens_seen": 135401836, "step": 1500 }, { "epoch": 6.896551724137931, "eval_compot_CIoU": 0.4806292653083801, "eval_compot_GIoU": 0.4541057199239731, "eval_compot_IoU": 0.546671450138092, "eval_compot_MAE_all": 0.05766832269728184, "eval_compot_MAE_h": 0.07456954568624496, "eval_compot_MAE_w": 0.11397556215524673, "eval_compot_MAE_x_boxes": 0.10434515029191971, "eval_compot_MAE_y_boxes": 0.07447323948144913, "eval_compot_NUM_probability": 0.9999996423721313, "eval_compot_inside_bbox": 0.7604166567325592, "eval_compot_loss": 0.31393927335739136, "eval_compot_loss_ce": 0.01060745446011424, "eval_compot_loss_iou": 0.49609375, "eval_compot_loss_num": 0.05194854736328125, "eval_compot_loss_xval": 0.259796142578125, "eval_compot_runtime": 92.9548, "eval_compot_samples_per_second": 0.538, "eval_compot_steps_per_second": 0.022, "num_input_tokens_seen": 135401836, "step": 1500 } ], "logging_steps": 1.0, "max_steps": 10000, "num_input_tokens_seen": 135401836, "num_train_epochs": 47, "save_steps": 250, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 839231021613056.0, "train_batch_size": 4, "trial_name": null, "trial_params": null }