diff --git "a/trainer_state.json" "b/trainer_state.json" new file mode 100644--- /dev/null +++ "b/trainer_state.json" @@ -0,0 +1,160743 @@ +{ + "best_metric": null, + "best_model_checkpoint": null, + "epoch": 0.9929078014184397, + "eval_steps": 250, + "global_step": 8750, + "is_hyper_param_search": false, + "is_local_process_zero": true, + "is_world_process_zero": true, + "log_history": [ + { + "epoch": 0.00011347517730496454, + "grad_norm": 90.5235595703125, + "learning_rate": 5e-05, + "loss": 8.3287, + "num_input_tokens_seen": 67560, + "step": 1 + }, + { + "epoch": 0.00011347517730496454, + "loss": 8.36860466003418, + "loss_ce": 4.23383903503418, + "loss_iou": 1.234375, + "loss_num": 0.3359375, + "loss_xval": 4.125, + "num_input_tokens_seen": 67560, + "step": 1 + }, + { + "epoch": 0.00022695035460992908, + "grad_norm": 196.30015563964844, + "learning_rate": 5e-05, + "loss": 10.0887, + "num_input_tokens_seen": 134684, + "step": 2 + }, + { + "epoch": 0.00022695035460992908, + "loss": 10.255687713623047, + "loss_ce": 6.415843963623047, + "loss_iou": 1.1640625, + "loss_num": 0.30078125, + "loss_xval": 3.84375, + "num_input_tokens_seen": 134684, + "step": 2 + }, + { + "epoch": 0.00034042553191489364, + "grad_norm": 41.12844467163086, + "learning_rate": 5e-05, + "loss": 6.8578, + "num_input_tokens_seen": 201604, + "step": 3 + }, + { + "epoch": 0.00034042553191489364, + "loss": 6.811771392822266, + "loss_ce": 3.1340367794036865, + "loss_iou": 1.2890625, + "loss_num": 0.220703125, + "loss_xval": 3.671875, + "num_input_tokens_seen": 201604, + "step": 3 + }, + { + "epoch": 0.00045390070921985816, + "grad_norm": 39.89942169189453, + "learning_rate": 5e-05, + "loss": 5.2252, + "num_input_tokens_seen": 268056, + "step": 4 + }, + { + "epoch": 0.00045390070921985816, + "loss": 5.276898384094238, + "loss_ce": 1.0522890090942383, + "loss_iou": 1.46875, + "loss_num": 0.25390625, + "loss_xval": 4.21875, + "num_input_tokens_seen": 268056, + "step": 4 + }, + { + "epoch": 0.0005673758865248227, + "grad_norm": 34.42279815673828, + "learning_rate": 5e-05, + "loss": 5.2637, + "num_input_tokens_seen": 335760, + "step": 5 + }, + { + "epoch": 0.0005673758865248227, + "loss": 5.27735710144043, + "loss_ce": 0.2929823398590088, + "loss_iou": 1.4453125, + "loss_num": 0.419921875, + "loss_xval": 5.0, + "num_input_tokens_seen": 335760, + "step": 5 + }, + { + "epoch": 0.0006808510638297873, + "grad_norm": 33.10007095336914, + "learning_rate": 5e-05, + "loss": 4.6843, + "num_input_tokens_seen": 402820, + "step": 6 + }, + { + "epoch": 0.0006808510638297873, + "loss": 4.603710174560547, + "loss_ce": 0.6896477341651917, + "loss_iou": 1.3359375, + "loss_num": 0.248046875, + "loss_xval": 3.90625, + "num_input_tokens_seen": 402820, + "step": 6 + }, + { + "epoch": 0.0007943262411347517, + "grad_norm": 15.588043212890625, + "learning_rate": 5e-05, + "loss": 4.4173, + "num_input_tokens_seen": 468516, + "step": 7 + }, + { + "epoch": 0.0007943262411347517, + "loss": 4.321022033691406, + "loss_ce": 0.2995373010635376, + "loss_iou": 1.1875, + "loss_num": 0.330078125, + "loss_xval": 4.03125, + "num_input_tokens_seen": 468516, + "step": 7 + }, + { + "epoch": 0.0009078014184397163, + "grad_norm": 31.09037971496582, + "learning_rate": 5e-05, + "loss": 5.1042, + "num_input_tokens_seen": 536656, + "step": 8 + }, + { + "epoch": 0.0009078014184397163, + "loss": 5.1623077392578125, + "loss_ce": 0.513870120048523, + "loss_iou": 1.6640625, + "loss_num": 0.263671875, + "loss_xval": 4.65625, + "num_input_tokens_seen": 536656, + "step": 8 + }, + { + "epoch": 0.0010212765957446808, + "grad_norm": 18.963869094848633, + "learning_rate": 5e-05, + "loss": 4.7356, + "num_input_tokens_seen": 604644, + "step": 9 + }, + { + "epoch": 0.0010212765957446808, + "loss": 4.693884372711182, + "loss_ce": 0.5688842535018921, + "loss_iou": 1.3828125, + "loss_num": 0.2734375, + "loss_xval": 4.125, + "num_input_tokens_seen": 604644, + "step": 9 + }, + { + "epoch": 0.0011347517730496454, + "grad_norm": 15.757165908813477, + "learning_rate": 5e-05, + "loss": 4.5426, + "num_input_tokens_seen": 671740, + "step": 10 + }, + { + "epoch": 0.0011347517730496454, + "loss": 4.356407165527344, + "loss_ce": 0.30171966552734375, + "loss_iou": 1.15625, + "loss_num": 0.349609375, + "loss_xval": 4.0625, + "num_input_tokens_seen": 671740, + "step": 10 + }, + { + "epoch": 0.00124822695035461, + "grad_norm": 13.840594291687012, + "learning_rate": 5e-05, + "loss": 4.1432, + "num_input_tokens_seen": 738508, + "step": 11 + }, + { + "epoch": 0.00124822695035461, + "loss": 4.0591301918029785, + "loss_ce": 0.19389574229717255, + "loss_iou": 1.2421875, + "loss_num": 0.279296875, + "loss_xval": 3.859375, + "num_input_tokens_seen": 738508, + "step": 11 + }, + { + "epoch": 0.0013617021276595745, + "grad_norm": 17.356962203979492, + "learning_rate": 5e-05, + "loss": 4.2318, + "num_input_tokens_seen": 805704, + "step": 12 + }, + { + "epoch": 0.0013617021276595745, + "loss": 4.3097639083862305, + "loss_ce": 0.16913913190364838, + "loss_iou": 1.4296875, + "loss_num": 0.255859375, + "loss_xval": 4.125, + "num_input_tokens_seen": 805704, + "step": 12 + }, + { + "epoch": 0.001475177304964539, + "grad_norm": 8.666651725769043, + "learning_rate": 5e-05, + "loss": 3.6895, + "num_input_tokens_seen": 871512, + "step": 13 + }, + { + "epoch": 0.001475177304964539, + "loss": 3.736276149749756, + "loss_ce": 0.10737009346485138, + "loss_iou": 1.1171875, + "loss_num": 0.279296875, + "loss_xval": 3.625, + "num_input_tokens_seen": 871512, + "step": 13 + }, + { + "epoch": 0.0015886524822695035, + "grad_norm": 6.279486179351807, + "learning_rate": 5e-05, + "loss": 3.7583, + "num_input_tokens_seen": 937096, + "step": 14 + }, + { + "epoch": 0.0015886524822695035, + "loss": 3.754384994506836, + "loss_ce": 0.049306854605674744, + "loss_iou": 1.265625, + "loss_num": 0.236328125, + "loss_xval": 3.703125, + "num_input_tokens_seen": 937096, + "step": 14 + }, + { + "epoch": 0.001702127659574468, + "grad_norm": 2.6924006938934326, + "learning_rate": 5e-05, + "loss": 3.9531, + "num_input_tokens_seen": 1004368, + "step": 15 + }, + { + "epoch": 0.001702127659574468, + "loss": 3.9087319374084473, + "loss_ce": 0.041544634848833084, + "loss_iou": 1.203125, + "loss_num": 0.291015625, + "loss_xval": 3.875, + "num_input_tokens_seen": 1004368, + "step": 15 + }, + { + "epoch": 0.0018156028368794327, + "grad_norm": 4.079937934875488, + "learning_rate": 5e-05, + "loss": 3.4095, + "num_input_tokens_seen": 1071004, + "step": 16 + }, + { + "epoch": 0.0018156028368794327, + "loss": 3.150897979736328, + "loss_ce": 0.02882775105535984, + "loss_iou": 0.9375, + "loss_num": 0.2490234375, + "loss_xval": 3.125, + "num_input_tokens_seen": 1071004, + "step": 16 + }, + { + "epoch": 0.0019290780141843972, + "grad_norm": 3.5311455726623535, + "learning_rate": 5e-05, + "loss": 3.7254, + "num_input_tokens_seen": 1137736, + "step": 17 + }, + { + "epoch": 0.0019290780141843972, + "loss": 3.6325912475585938, + "loss_ce": 0.01931004226207733, + "loss_iou": 1.203125, + "loss_num": 0.2421875, + "loss_xval": 3.609375, + "num_input_tokens_seen": 1137736, + "step": 17 + }, + { + "epoch": 0.0020425531914893616, + "grad_norm": 2.7977824211120605, + "learning_rate": 5e-05, + "loss": 3.5443, + "num_input_tokens_seen": 1205736, + "step": 18 + }, + { + "epoch": 0.0020425531914893616, + "loss": 3.518348217010498, + "loss_ce": 0.01834831014275551, + "loss_iou": 1.109375, + "loss_num": 0.255859375, + "loss_xval": 3.5, + "num_input_tokens_seen": 1205736, + "step": 18 + }, + { + "epoch": 0.002156028368794326, + "grad_norm": 3.9670426845550537, + "learning_rate": 5e-05, + "loss": 3.6942, + "num_input_tokens_seen": 1272608, + "step": 19 + }, + { + "epoch": 0.002156028368794326, + "loss": 3.572042942047119, + "loss_ce": 0.025167986750602722, + "loss_iou": 1.1875, + "loss_num": 0.236328125, + "loss_xval": 3.546875, + "num_input_tokens_seen": 1272608, + "step": 19 + }, + { + "epoch": 0.0022695035460992908, + "grad_norm": 3.912769317626953, + "learning_rate": 5e-05, + "loss": 3.52, + "num_input_tokens_seen": 1338988, + "step": 20 + }, + { + "epoch": 0.0022695035460992908, + "loss": 3.642185926437378, + "loss_ce": 0.019139010459184647, + "loss_iou": 1.125, + "loss_num": 0.2734375, + "loss_xval": 3.625, + "num_input_tokens_seen": 1338988, + "step": 20 + }, + { + "epoch": 0.0023829787234042553, + "grad_norm": 2.488839626312256, + "learning_rate": 5e-05, + "loss": 3.4574, + "num_input_tokens_seen": 1406196, + "step": 21 + }, + { + "epoch": 0.0023829787234042553, + "loss": 3.2799673080444336, + "loss_ce": 0.01824856549501419, + "loss_iou": 1.0234375, + "loss_num": 0.2421875, + "loss_xval": 3.265625, + "num_input_tokens_seen": 1406196, + "step": 21 + }, + { + "epoch": 0.00249645390070922, + "grad_norm": 6.84212589263916, + "learning_rate": 5e-05, + "loss": 3.5528, + "num_input_tokens_seen": 1472464, + "step": 22 + }, + { + "epoch": 0.00249645390070922, + "loss": 3.775001287460327, + "loss_ce": 0.01718885265290737, + "loss_iou": 1.3125, + "loss_num": 0.2255859375, + "loss_xval": 3.75, + "num_input_tokens_seen": 1472464, + "step": 22 + }, + { + "epoch": 0.0026099290780141845, + "grad_norm": 3.387972116470337, + "learning_rate": 5e-05, + "loss": 3.8574, + "num_input_tokens_seen": 1540132, + "step": 23 + }, + { + "epoch": 0.0026099290780141845, + "loss": 3.813058376312256, + "loss_ce": 0.020089438185095787, + "loss_iou": 1.15625, + "loss_num": 0.294921875, + "loss_xval": 3.796875, + "num_input_tokens_seen": 1540132, + "step": 23 + }, + { + "epoch": 0.002723404255319149, + "grad_norm": 4.467876434326172, + "learning_rate": 5e-05, + "loss": 3.8548, + "num_input_tokens_seen": 1607860, + "step": 24 + }, + { + "epoch": 0.002723404255319149, + "loss": 3.8982768058776855, + "loss_ce": 0.021323656663298607, + "loss_iou": 1.1875, + "loss_num": 0.30078125, + "loss_xval": 3.875, + "num_input_tokens_seen": 1607860, + "step": 24 + }, + { + "epoch": 0.0028368794326241137, + "grad_norm": 3.6098039150238037, + "learning_rate": 5e-05, + "loss": 3.451, + "num_input_tokens_seen": 1673944, + "step": 25 + }, + { + "epoch": 0.0028368794326241137, + "loss": 3.481443405151367, + "loss_ce": 0.01855286955833435, + "loss_iou": 1.1171875, + "loss_num": 0.24609375, + "loss_xval": 3.46875, + "num_input_tokens_seen": 1673944, + "step": 25 + }, + { + "epoch": 0.002950354609929078, + "grad_norm": 3.585299491882324, + "learning_rate": 5e-05, + "loss": 3.4337, + "num_input_tokens_seen": 1739840, + "step": 26 + }, + { + "epoch": 0.002950354609929078, + "loss": 3.3967514038085938, + "loss_ce": 0.019798345863819122, + "loss_iou": 1.15625, + "loss_num": 0.2119140625, + "loss_xval": 3.375, + "num_input_tokens_seen": 1739840, + "step": 26 + }, + { + "epoch": 0.0030638297872340424, + "grad_norm": 2.177669048309326, + "learning_rate": 5e-05, + "loss": 3.3965, + "num_input_tokens_seen": 1806380, + "step": 27 + }, + { + "epoch": 0.0030638297872340424, + "loss": 3.493107318878174, + "loss_ce": 0.020450910553336143, + "loss_iou": 1.1640625, + "loss_num": 0.2294921875, + "loss_xval": 3.46875, + "num_input_tokens_seen": 1806380, + "step": 27 + }, + { + "epoch": 0.003177304964539007, + "grad_norm": 3.5389018058776855, + "learning_rate": 5e-05, + "loss": 3.486, + "num_input_tokens_seen": 1873216, + "step": 28 + }, + { + "epoch": 0.003177304964539007, + "loss": 3.4427835941314697, + "loss_ce": 0.011143043637275696, + "loss_iou": 1.125, + "loss_num": 0.234375, + "loss_xval": 3.4375, + "num_input_tokens_seen": 1873216, + "step": 28 + }, + { + "epoch": 0.0032907801418439716, + "grad_norm": 3.1117992401123047, + "learning_rate": 5e-05, + "loss": 3.3388, + "num_input_tokens_seen": 1939944, + "step": 29 + }, + { + "epoch": 0.0032907801418439716, + "loss": 3.1803548336029053, + "loss_ce": 0.012386108748614788, + "loss_iou": 1.03125, + "loss_num": 0.220703125, + "loss_xval": 3.171875, + "num_input_tokens_seen": 1939944, + "step": 29 + }, + { + "epoch": 0.003404255319148936, + "grad_norm": 2.4219179153442383, + "learning_rate": 5e-05, + "loss": 3.4426, + "num_input_tokens_seen": 2007080, + "step": 30 + }, + { + "epoch": 0.003404255319148936, + "loss": 3.4650442600250244, + "loss_ce": 0.01387243065983057, + "loss_iou": 1.125, + "loss_num": 0.2412109375, + "loss_xval": 3.453125, + "num_input_tokens_seen": 2007080, + "step": 30 + }, + { + "epoch": 0.0035177304964539007, + "grad_norm": 4.935522556304932, + "learning_rate": 5e-05, + "loss": 3.4124, + "num_input_tokens_seen": 2074076, + "step": 31 + }, + { + "epoch": 0.0035177304964539007, + "loss": 3.330991506576538, + "loss_ce": 0.011655498296022415, + "loss_iou": 1.0703125, + "loss_num": 0.236328125, + "loss_xval": 3.3125, + "num_input_tokens_seen": 2074076, + "step": 31 + }, + { + "epoch": 0.0036312056737588653, + "grad_norm": 5.183689117431641, + "learning_rate": 5e-05, + "loss": 3.4611, + "num_input_tokens_seen": 2141424, + "step": 32 + }, + { + "epoch": 0.0036312056737588653, + "loss": 3.40551495552063, + "loss_ce": 0.009030616842210293, + "loss_iou": 1.140625, + "loss_num": 0.224609375, + "loss_xval": 3.390625, + "num_input_tokens_seen": 2141424, + "step": 32 + }, + { + "epoch": 0.00374468085106383, + "grad_norm": 4.60829496383667, + "learning_rate": 5e-05, + "loss": 3.3806, + "num_input_tokens_seen": 2208988, + "step": 33 + }, + { + "epoch": 0.00374468085106383, + "loss": 3.4679558277130127, + "loss_ce": 0.016784006729722023, + "loss_iou": 1.109375, + "loss_num": 0.24609375, + "loss_xval": 3.453125, + "num_input_tokens_seen": 2208988, + "step": 33 + }, + { + "epoch": 0.0038581560283687945, + "grad_norm": 4.031424522399902, + "learning_rate": 5e-05, + "loss": 3.331, + "num_input_tokens_seen": 2276336, + "step": 34 + }, + { + "epoch": 0.0038581560283687945, + "loss": 3.347846031188965, + "loss_ce": 0.009955615736544132, + "loss_iou": 1.109375, + "loss_num": 0.22265625, + "loss_xval": 3.34375, + "num_input_tokens_seen": 2276336, + "step": 34 + }, + { + "epoch": 0.003971631205673759, + "grad_norm": 7.9593586921691895, + "learning_rate": 5e-05, + "loss": 3.2893, + "num_input_tokens_seen": 2344096, + "step": 35 + }, + { + "epoch": 0.003971631205673759, + "loss": 3.2575840950012207, + "loss_ce": 0.0075841862708330154, + "loss_iou": 1.140625, + "loss_num": 0.1923828125, + "loss_xval": 3.25, + "num_input_tokens_seen": 2344096, + "step": 35 + }, + { + "epoch": 0.004085106382978723, + "grad_norm": 7.610040664672852, + "learning_rate": 5e-05, + "loss": 3.6787, + "num_input_tokens_seen": 2411364, + "step": 36 + }, + { + "epoch": 0.004085106382978723, + "loss": 3.6526670455932617, + "loss_ce": 0.015948139131069183, + "loss_iou": 1.203125, + "loss_num": 0.24609375, + "loss_xval": 3.640625, + "num_input_tokens_seen": 2411364, + "step": 36 + }, + { + "epoch": 0.004198581560283688, + "grad_norm": 5.338761329650879, + "learning_rate": 5e-05, + "loss": 3.2705, + "num_input_tokens_seen": 2478556, + "step": 37 + }, + { + "epoch": 0.004198581560283688, + "loss": 3.1766364574432373, + "loss_ce": 0.010620858520269394, + "loss_iou": 1.1328125, + "loss_num": 0.181640625, + "loss_xval": 3.171875, + "num_input_tokens_seen": 2478556, + "step": 37 + }, + { + "epoch": 0.004312056737588652, + "grad_norm": 10.389577865600586, + "learning_rate": 5e-05, + "loss": 3.2395, + "num_input_tokens_seen": 2544552, + "step": 38 + }, + { + "epoch": 0.004312056737588652, + "loss": 3.384225368499756, + "loss_ce": 0.007272328250110149, + "loss_iou": 1.2265625, + "loss_num": 0.185546875, + "loss_xval": 3.375, + "num_input_tokens_seen": 2544552, + "step": 38 + }, + { + "epoch": 0.004425531914893617, + "grad_norm": 13.238604545593262, + "learning_rate": 5e-05, + "loss": 3.7036, + "num_input_tokens_seen": 2611888, + "step": 39 + }, + { + "epoch": 0.004425531914893617, + "loss": 3.6673314571380615, + "loss_ce": 0.032565705478191376, + "loss_iou": 1.2265625, + "loss_num": 0.23828125, + "loss_xval": 3.640625, + "num_input_tokens_seen": 2611888, + "step": 39 + }, + { + "epoch": 0.0045390070921985815, + "grad_norm": 12.695770263671875, + "learning_rate": 5e-05, + "loss": 3.3083, + "num_input_tokens_seen": 2678140, + "step": 40 + }, + { + "epoch": 0.0045390070921985815, + "loss": 3.390463352203369, + "loss_ce": 0.01155702956020832, + "loss_iou": 1.140625, + "loss_num": 0.21875, + "loss_xval": 3.375, + "num_input_tokens_seen": 2678140, + "step": 40 + }, + { + "epoch": 0.004652482269503546, + "grad_norm": 11.37156867980957, + "learning_rate": 5e-05, + "loss": 3.1741, + "num_input_tokens_seen": 2744624, + "step": 41 + }, + { + "epoch": 0.004652482269503546, + "loss": 3.1725645065307617, + "loss_ce": 0.022173890843987465, + "loss_iou": 1.09375, + "loss_num": 0.1923828125, + "loss_xval": 3.15625, + "num_input_tokens_seen": 2744624, + "step": 41 + }, + { + "epoch": 0.004765957446808511, + "grad_norm": 10.418060302734375, + "learning_rate": 5e-05, + "loss": 3.4669, + "num_input_tokens_seen": 2810936, + "step": 42 + }, + { + "epoch": 0.004765957446808511, + "loss": 3.4210896492004395, + "loss_ce": 0.007027336861938238, + "loss_iou": 1.109375, + "loss_num": 0.23828125, + "loss_xval": 3.40625, + "num_input_tokens_seen": 2810936, + "step": 42 + }, + { + "epoch": 0.004879432624113475, + "grad_norm": 8.627490997314453, + "learning_rate": 5e-05, + "loss": 3.0411, + "num_input_tokens_seen": 2877324, + "step": 43 + }, + { + "epoch": 0.004879432624113475, + "loss": 2.9853785037994385, + "loss_ce": 0.014675471931695938, + "loss_iou": 1.015625, + "loss_num": 0.1875, + "loss_xval": 2.96875, + "num_input_tokens_seen": 2877324, + "step": 43 + }, + { + "epoch": 0.00499290780141844, + "grad_norm": 16.632253646850586, + "learning_rate": 5e-05, + "loss": 3.265, + "num_input_tokens_seen": 2944792, + "step": 44 + }, + { + "epoch": 0.00499290780141844, + "loss": 3.265839099884033, + "loss_ce": 0.011932829394936562, + "loss_iou": 1.1640625, + "loss_num": 0.1845703125, + "loss_xval": 3.25, + "num_input_tokens_seen": 2944792, + "step": 44 + }, + { + "epoch": 0.005106382978723404, + "grad_norm": 9.82559871673584, + "learning_rate": 5e-05, + "loss": 3.0431, + "num_input_tokens_seen": 3011688, + "step": 45 + }, + { + "epoch": 0.005106382978723404, + "loss": 3.136415958404541, + "loss_ce": 0.011416109278798103, + "loss_iou": 1.03125, + "loss_num": 0.2119140625, + "loss_xval": 3.125, + "num_input_tokens_seen": 3011688, + "step": 45 + }, + { + "epoch": 0.005219858156028369, + "grad_norm": 16.515214920043945, + "learning_rate": 5e-05, + "loss": 3.2692, + "num_input_tokens_seen": 3079392, + "step": 46 + }, + { + "epoch": 0.005219858156028369, + "loss": 3.2275028228759766, + "loss_ce": 0.006799888797104359, + "loss_iou": 1.25, + "loss_num": 0.14453125, + "loss_xval": 3.21875, + "num_input_tokens_seen": 3079392, + "step": 46 + }, + { + "epoch": 0.005333333333333333, + "grad_norm": 5.54909610748291, + "learning_rate": 5e-05, + "loss": 3.5026, + "num_input_tokens_seen": 3147208, + "step": 47 + }, + { + "epoch": 0.005333333333333333, + "loss": 3.5218372344970703, + "loss_ce": 0.008165373466908932, + "loss_iou": 1.1171875, + "loss_num": 0.255859375, + "loss_xval": 3.515625, + "num_input_tokens_seen": 3147208, + "step": 47 + }, + { + "epoch": 0.005446808510638298, + "grad_norm": 6.135898113250732, + "learning_rate": 5e-05, + "loss": 3.3064, + "num_input_tokens_seen": 3214660, + "step": 48 + }, + { + "epoch": 0.005446808510638298, + "loss": 3.2836520671844482, + "loss_ce": 0.010214436799287796, + "loss_iou": 1.078125, + "loss_num": 0.22265625, + "loss_xval": 3.28125, + "num_input_tokens_seen": 3214660, + "step": 48 + }, + { + "epoch": 0.005560283687943262, + "grad_norm": 5.578330993652344, + "learning_rate": 5e-05, + "loss": 3.0506, + "num_input_tokens_seen": 3281676, + "step": 49 + }, + { + "epoch": 0.005560283687943262, + "loss": 3.146181106567383, + "loss_ce": 0.013368489220738411, + "loss_iou": 1.0859375, + "loss_num": 0.1923828125, + "loss_xval": 3.125, + "num_input_tokens_seen": 3281676, + "step": 49 + }, + { + "epoch": 0.005673758865248227, + "grad_norm": 6.92474365234375, + "learning_rate": 5e-05, + "loss": 2.8251, + "num_input_tokens_seen": 3348316, + "step": 50 + }, + { + "epoch": 0.005673758865248227, + "loss": 2.927767753601074, + "loss_ce": 0.007845941931009293, + "loss_iou": 1.0625, + "loss_num": 0.158203125, + "loss_xval": 2.921875, + "num_input_tokens_seen": 3348316, + "step": 50 + }, + { + "epoch": 0.0057872340425531915, + "grad_norm": 11.40666389465332, + "learning_rate": 5e-05, + "loss": 3.2826, + "num_input_tokens_seen": 3414016, + "step": 51 + }, + { + "epoch": 0.0057872340425531915, + "loss": 3.0495896339416504, + "loss_ce": 0.008574165403842926, + "loss_iou": 1.1328125, + "loss_num": 0.15625, + "loss_xval": 3.046875, + "num_input_tokens_seen": 3414016, + "step": 51 + }, + { + "epoch": 0.005900709219858156, + "grad_norm": 17.173086166381836, + "learning_rate": 5e-05, + "loss": 3.2056, + "num_input_tokens_seen": 3481692, + "step": 52 + }, + { + "epoch": 0.005900709219858156, + "loss": 3.1950430870056152, + "loss_ce": 0.015355678275227547, + "loss_iou": 1.109375, + "loss_num": 0.193359375, + "loss_xval": 3.1875, + "num_input_tokens_seen": 3481692, + "step": 52 + }, + { + "epoch": 0.006014184397163121, + "grad_norm": 10.94240951538086, + "learning_rate": 5e-05, + "loss": 2.9328, + "num_input_tokens_seen": 3549808, + "step": 53 + }, + { + "epoch": 0.006014184397163121, + "loss": 2.91818904876709, + "loss_ce": 0.008032746613025665, + "loss_iou": 1.078125, + "loss_num": 0.1513671875, + "loss_xval": 2.90625, + "num_input_tokens_seen": 3549808, + "step": 53 + }, + { + "epoch": 0.006127659574468085, + "grad_norm": 5.602839469909668, + "learning_rate": 5e-05, + "loss": 2.6637, + "num_input_tokens_seen": 3616348, + "step": 54 + }, + { + "epoch": 0.006127659574468085, + "loss": 2.895622730255127, + "loss_ce": 0.006950977724045515, + "loss_iou": 1.109375, + "loss_num": 0.134765625, + "loss_xval": 2.890625, + "num_input_tokens_seen": 3616348, + "step": 54 + }, + { + "epoch": 0.00624113475177305, + "grad_norm": 6.273082733154297, + "learning_rate": 5e-05, + "loss": 2.8492, + "num_input_tokens_seen": 3683492, + "step": 55 + }, + { + "epoch": 0.00624113475177305, + "loss": 2.956972122192383, + "loss_ce": 0.013612963259220123, + "loss_iou": 1.1015625, + "loss_num": 0.1484375, + "loss_xval": 2.9375, + "num_input_tokens_seen": 3683492, + "step": 55 + }, + { + "epoch": 0.006354609929078014, + "grad_norm": 9.646655082702637, + "learning_rate": 5e-05, + "loss": 2.8677, + "num_input_tokens_seen": 3750624, + "step": 56 + }, + { + "epoch": 0.006354609929078014, + "loss": 2.83807373046875, + "loss_ce": 0.00799555703997612, + "loss_iou": 1.0546875, + "loss_num": 0.14453125, + "loss_xval": 2.828125, + "num_input_tokens_seen": 3750624, + "step": 56 + }, + { + "epoch": 0.006468085106382979, + "grad_norm": 13.432528495788574, + "learning_rate": 5e-05, + "loss": 2.6141, + "num_input_tokens_seen": 3817720, + "step": 57 + }, + { + "epoch": 0.006468085106382979, + "loss": 2.6622002124786377, + "loss_ce": 0.0059502627700567245, + "loss_iou": 1.0546875, + "loss_num": 0.1083984375, + "loss_xval": 2.65625, + "num_input_tokens_seen": 3817720, + "step": 57 + }, + { + "epoch": 0.006581560283687943, + "grad_norm": 18.2421875, + "learning_rate": 5e-05, + "loss": 3.0801, + "num_input_tokens_seen": 3884444, + "step": 58 + }, + { + "epoch": 0.006581560283687943, + "loss": 3.0577456951141357, + "loss_ce": 0.01087074913084507, + "loss_iou": 1.109375, + "loss_num": 0.1650390625, + "loss_xval": 3.046875, + "num_input_tokens_seen": 3884444, + "step": 58 + }, + { + "epoch": 0.006695035460992908, + "grad_norm": 13.229713439941406, + "learning_rate": 5e-05, + "loss": 2.6497, + "num_input_tokens_seen": 3951036, + "step": 59 + }, + { + "epoch": 0.006695035460992908, + "loss": 2.6308655738830566, + "loss_ce": 0.0058656251057982445, + "loss_iou": 1.0078125, + "loss_num": 0.12060546875, + "loss_xval": 2.625, + "num_input_tokens_seen": 3951036, + "step": 59 + }, + { + "epoch": 0.006808510638297872, + "grad_norm": 6.990492820739746, + "learning_rate": 5e-05, + "loss": 2.6705, + "num_input_tokens_seen": 4017340, + "step": 60 + }, + { + "epoch": 0.006808510638297872, + "loss": 2.5699586868286133, + "loss_ce": 0.007458396255970001, + "loss_iou": 0.984375, + "loss_num": 0.1201171875, + "loss_xval": 2.5625, + "num_input_tokens_seen": 4017340, + "step": 60 + }, + { + "epoch": 0.006921985815602837, + "grad_norm": 6.6131110191345215, + "learning_rate": 5e-05, + "loss": 2.6534, + "num_input_tokens_seen": 4084468, + "step": 61 + }, + { + "epoch": 0.006921985815602837, + "loss": 2.8547329902648926, + "loss_ce": 0.009029660373926163, + "loss_iou": 1.0859375, + "loss_num": 0.1337890625, + "loss_xval": 2.84375, + "num_input_tokens_seen": 4084468, + "step": 61 + }, + { + "epoch": 0.0070354609929078014, + "grad_norm": 9.116194725036621, + "learning_rate": 5e-05, + "loss": 2.7026, + "num_input_tokens_seen": 4151828, + "step": 62 + }, + { + "epoch": 0.0070354609929078014, + "loss": 2.6974616050720215, + "loss_ce": 0.008008562959730625, + "loss_iou": 1.0546875, + "loss_num": 0.1171875, + "loss_xval": 2.6875, + "num_input_tokens_seen": 4151828, + "step": 62 + }, + { + "epoch": 0.007148936170212766, + "grad_norm": 10.887898445129395, + "learning_rate": 5e-05, + "loss": 2.9228, + "num_input_tokens_seen": 4219728, + "step": 63 + }, + { + "epoch": 0.007148936170212766, + "loss": 3.015432834625244, + "loss_ce": 0.005667402874678373, + "loss_iou": 1.1875, + "loss_num": 0.126953125, + "loss_xval": 3.015625, + "num_input_tokens_seen": 4219728, + "step": 63 + }, + { + "epoch": 0.007262411347517731, + "grad_norm": 11.986128807067871, + "learning_rate": 5e-05, + "loss": 2.5102, + "num_input_tokens_seen": 4285748, + "step": 64 + }, + { + "epoch": 0.007262411347517731, + "loss": 2.4329214096069336, + "loss_ce": 0.0027457335963845253, + "loss_iou": 0.9375, + "loss_num": 0.111328125, + "loss_xval": 2.4375, + "num_input_tokens_seen": 4285748, + "step": 64 + }, + { + "epoch": 0.007375886524822695, + "grad_norm": 12.336956977844238, + "learning_rate": 5e-05, + "loss": 2.8239, + "num_input_tokens_seen": 4352364, + "step": 65 + }, + { + "epoch": 0.007375886524822695, + "loss": 2.779641628265381, + "loss_ce": 0.008157076314091682, + "loss_iou": 1.1171875, + "loss_num": 0.107421875, + "loss_xval": 2.765625, + "num_input_tokens_seen": 4352364, + "step": 65 + }, + { + "epoch": 0.00748936170212766, + "grad_norm": 9.20727252960205, + "learning_rate": 5e-05, + "loss": 3.0446, + "num_input_tokens_seen": 4420600, + "step": 66 + }, + { + "epoch": 0.00748936170212766, + "loss": 3.011997699737549, + "loss_ce": 0.008091485127806664, + "loss_iou": 1.046875, + "loss_num": 0.1806640625, + "loss_xval": 3.0, + "num_input_tokens_seen": 4420600, + "step": 66 + }, + { + "epoch": 0.007602836879432624, + "grad_norm": 7.669826507568359, + "learning_rate": 5e-05, + "loss": 2.6254, + "num_input_tokens_seen": 4487200, + "step": 67 + }, + { + "epoch": 0.007602836879432624, + "loss": 2.64054536819458, + "loss_ce": 0.009685940109193325, + "loss_iou": 1.0234375, + "loss_num": 0.1162109375, + "loss_xval": 2.625, + "num_input_tokens_seen": 4487200, + "step": 67 + }, + { + "epoch": 0.007716312056737589, + "grad_norm": 13.128554344177246, + "learning_rate": 5e-05, + "loss": 2.5327, + "num_input_tokens_seen": 4554376, + "step": 68 + }, + { + "epoch": 0.007716312056737589, + "loss": 2.6060943603515625, + "loss_ce": 0.010391267016530037, + "loss_iou": 1.0390625, + "loss_num": 0.1015625, + "loss_xval": 2.59375, + "num_input_tokens_seen": 4554376, + "step": 68 + }, + { + "epoch": 0.007829787234042554, + "grad_norm": 12.700163841247559, + "learning_rate": 5e-05, + "loss": 2.5059, + "num_input_tokens_seen": 4621396, + "step": 69 + }, + { + "epoch": 0.007829787234042554, + "loss": 2.4898605346679688, + "loss_ce": 0.003532308153808117, + "loss_iou": 1.03125, + "loss_num": 0.08642578125, + "loss_xval": 2.484375, + "num_input_tokens_seen": 4621396, + "step": 69 + }, + { + "epoch": 0.007943262411347518, + "grad_norm": 21.011924743652344, + "learning_rate": 5e-05, + "loss": 2.7317, + "num_input_tokens_seen": 4687856, + "step": 70 + }, + { + "epoch": 0.007943262411347518, + "loss": 2.686310052871704, + "loss_ce": 0.0046693808399140835, + "loss_iou": 1.109375, + "loss_num": 0.09228515625, + "loss_xval": 2.6875, + "num_input_tokens_seen": 4687856, + "step": 70 + }, + { + "epoch": 0.008056737588652482, + "grad_norm": 7.314874172210693, + "learning_rate": 5e-05, + "loss": 3.0733, + "num_input_tokens_seen": 4755752, + "step": 71 + }, + { + "epoch": 0.008056737588652482, + "loss": 3.1057815551757812, + "loss_ce": 0.01593760773539543, + "loss_iou": 1.1640625, + "loss_num": 0.1533203125, + "loss_xval": 3.09375, + "num_input_tokens_seen": 4755752, + "step": 71 + }, + { + "epoch": 0.008170212765957446, + "grad_norm": 8.18610668182373, + "learning_rate": 5e-05, + "loss": 2.7481, + "num_input_tokens_seen": 4823160, + "step": 72 + }, + { + "epoch": 0.008170212765957446, + "loss": 2.7782371044158936, + "loss_ce": 0.0087058674544096, + "loss_iou": 1.03125, + "loss_num": 0.140625, + "loss_xval": 2.765625, + "num_input_tokens_seen": 4823160, + "step": 72 + }, + { + "epoch": 0.00828368794326241, + "grad_norm": 15.738393783569336, + "learning_rate": 5e-05, + "loss": 2.4942, + "num_input_tokens_seen": 4890716, + "step": 73 + }, + { + "epoch": 0.00828368794326241, + "loss": 2.3533406257629395, + "loss_ce": 0.007881749421358109, + "loss_iou": 0.90234375, + "loss_num": 0.10791015625, + "loss_xval": 2.34375, + "num_input_tokens_seen": 4890716, + "step": 73 + }, + { + "epoch": 0.008397163120567376, + "grad_norm": 8.318984031677246, + "learning_rate": 5e-05, + "loss": 2.4547, + "num_input_tokens_seen": 4958708, + "step": 74 + }, + { + "epoch": 0.008397163120567376, + "loss": 2.4409358501434326, + "loss_ce": 0.011248283088207245, + "loss_iou": 1.015625, + "loss_num": 0.0810546875, + "loss_xval": 2.4375, + "num_input_tokens_seen": 4958708, + "step": 74 + }, + { + "epoch": 0.00851063829787234, + "grad_norm": 36.43742752075195, + "learning_rate": 5e-05, + "loss": 2.6262, + "num_input_tokens_seen": 5026356, + "step": 75 + }, + { + "epoch": 0.00851063829787234, + "loss": 2.6768808364868164, + "loss_ce": 0.008912023156881332, + "loss_iou": 1.0546875, + "loss_num": 0.1123046875, + "loss_xval": 2.671875, + "num_input_tokens_seen": 5026356, + "step": 75 + }, + { + "epoch": 0.008624113475177305, + "grad_norm": 9.238595008850098, + "learning_rate": 5e-05, + "loss": 2.5042, + "num_input_tokens_seen": 5093400, + "step": 76 + }, + { + "epoch": 0.008624113475177305, + "loss": 2.356269359588623, + "loss_ce": 0.004706800449639559, + "loss_iou": 0.9921875, + "loss_num": 0.07421875, + "loss_xval": 2.34375, + "num_input_tokens_seen": 5093400, + "step": 76 + }, + { + "epoch": 0.008737588652482269, + "grad_norm": 13.477540969848633, + "learning_rate": 5e-05, + "loss": 2.6462, + "num_input_tokens_seen": 5159720, + "step": 77 + }, + { + "epoch": 0.008737588652482269, + "loss": 2.5845227241516113, + "loss_ce": 0.006397773511707783, + "loss_iou": 1.0234375, + "loss_num": 0.10498046875, + "loss_xval": 2.578125, + "num_input_tokens_seen": 5159720, + "step": 77 + }, + { + "epoch": 0.008851063829787235, + "grad_norm": 11.283504486083984, + "learning_rate": 5e-05, + "loss": 2.4404, + "num_input_tokens_seen": 5226792, + "step": 78 + }, + { + "epoch": 0.008851063829787235, + "loss": 2.394350528717041, + "loss_ce": 0.007631853222846985, + "loss_iou": 1.0, + "loss_num": 0.07666015625, + "loss_xval": 2.390625, + "num_input_tokens_seen": 5226792, + "step": 78 + }, + { + "epoch": 0.008964539007092199, + "grad_norm": 11.463418006896973, + "learning_rate": 5e-05, + "loss": 2.8799, + "num_input_tokens_seen": 5293220, + "step": 79 + }, + { + "epoch": 0.008964539007092199, + "loss": 2.9511749744415283, + "loss_ce": 0.00976875051856041, + "loss_iou": 1.1796875, + "loss_num": 0.11572265625, + "loss_xval": 2.9375, + "num_input_tokens_seen": 5293220, + "step": 79 + }, + { + "epoch": 0.009078014184397163, + "grad_norm": 15.295690536499023, + "learning_rate": 5e-05, + "loss": 2.6627, + "num_input_tokens_seen": 5360832, + "step": 80 + }, + { + "epoch": 0.009078014184397163, + "loss": 2.5305275917053223, + "loss_ce": 0.007089939434081316, + "loss_iou": 1.046875, + "loss_num": 0.08740234375, + "loss_xval": 2.53125, + "num_input_tokens_seen": 5360832, + "step": 80 + }, + { + "epoch": 0.009191489361702127, + "grad_norm": 15.175496101379395, + "learning_rate": 5e-05, + "loss": 2.5766, + "num_input_tokens_seen": 5427464, + "step": 81 + }, + { + "epoch": 0.009191489361702127, + "loss": 2.704346179962158, + "loss_ce": 0.007080406881868839, + "loss_iou": 1.078125, + "loss_num": 0.1083984375, + "loss_xval": 2.703125, + "num_input_tokens_seen": 5427464, + "step": 81 + }, + { + "epoch": 0.009304964539007091, + "grad_norm": 9.961980819702148, + "learning_rate": 5e-05, + "loss": 2.3606, + "num_input_tokens_seen": 5493840, + "step": 82 + }, + { + "epoch": 0.009304964539007091, + "loss": 2.1796529293060303, + "loss_ce": 0.007533712778240442, + "loss_iou": 0.88671875, + "loss_num": 0.07958984375, + "loss_xval": 2.171875, + "num_input_tokens_seen": 5493840, + "step": 82 + }, + { + "epoch": 0.009418439716312057, + "grad_norm": 9.985162734985352, + "learning_rate": 5e-05, + "loss": 2.4102, + "num_input_tokens_seen": 5561752, + "step": 83 + }, + { + "epoch": 0.009418439716312057, + "loss": 2.4403955936431885, + "loss_ce": 0.008754943497478962, + "loss_iou": 1.015625, + "loss_num": 0.07958984375, + "loss_xval": 2.4375, + "num_input_tokens_seen": 5561752, + "step": 83 + }, + { + "epoch": 0.009531914893617021, + "grad_norm": 32.70734786987305, + "learning_rate": 5e-05, + "loss": 2.7913, + "num_input_tokens_seen": 5628484, + "step": 84 + }, + { + "epoch": 0.009531914893617021, + "loss": 2.967644691467285, + "loss_ce": 0.00670715793967247, + "loss_iou": 1.3046875, + "loss_num": 0.0693359375, + "loss_xval": 2.96875, + "num_input_tokens_seen": 5628484, + "step": 84 + }, + { + "epoch": 0.009645390070921986, + "grad_norm": 10.327466011047363, + "learning_rate": 5e-05, + "loss": 3.309, + "num_input_tokens_seen": 5695340, + "step": 85 + }, + { + "epoch": 0.009645390070921986, + "loss": 3.366931915283203, + "loss_ce": 0.009510194882750511, + "loss_iou": 1.1796875, + "loss_num": 0.2001953125, + "loss_xval": 3.359375, + "num_input_tokens_seen": 5695340, + "step": 85 + }, + { + "epoch": 0.00975886524822695, + "grad_norm": 7.2246928215026855, + "learning_rate": 5e-05, + "loss": 2.9581, + "num_input_tokens_seen": 5762656, + "step": 86 + }, + { + "epoch": 0.00975886524822695, + "loss": 2.818636417388916, + "loss_ce": 0.00808948464691639, + "loss_iou": 0.9921875, + "loss_num": 0.1640625, + "loss_xval": 2.8125, + "num_input_tokens_seen": 5762656, + "step": 86 + }, + { + "epoch": 0.009872340425531916, + "grad_norm": 8.491337776184082, + "learning_rate": 5e-05, + "loss": 2.7215, + "num_input_tokens_seen": 5829504, + "step": 87 + }, + { + "epoch": 0.009872340425531916, + "loss": 2.5756654739379883, + "loss_ce": 0.00877094455063343, + "loss_iou": 0.8984375, + "loss_num": 0.154296875, + "loss_xval": 2.5625, + "num_input_tokens_seen": 5829504, + "step": 87 + }, + { + "epoch": 0.00998581560283688, + "grad_norm": 7.848683834075928, + "learning_rate": 5e-05, + "loss": 2.5085, + "num_input_tokens_seen": 5895564, + "step": 88 + }, + { + "epoch": 0.00998581560283688, + "loss": 2.634725332260132, + "loss_ce": 0.007772136013954878, + "loss_iou": 1.015625, + "loss_num": 0.12060546875, + "loss_xval": 2.625, + "num_input_tokens_seen": 5895564, + "step": 88 + }, + { + "epoch": 0.010099290780141844, + "grad_norm": 7.273768901824951, + "learning_rate": 5e-05, + "loss": 2.5985, + "num_input_tokens_seen": 5963620, + "step": 89 + }, + { + "epoch": 0.010099290780141844, + "loss": 2.588622808456421, + "loss_ce": 0.010497660376131535, + "loss_iou": 0.9921875, + "loss_num": 0.11767578125, + "loss_xval": 2.578125, + "num_input_tokens_seen": 5963620, + "step": 89 + }, + { + "epoch": 0.010212765957446808, + "grad_norm": 18.910690307617188, + "learning_rate": 5e-05, + "loss": 2.4834, + "num_input_tokens_seen": 6030704, + "step": 90 + }, + { + "epoch": 0.010212765957446808, + "loss": 2.498807907104492, + "loss_ce": 0.006620313972234726, + "loss_iou": 0.99609375, + "loss_num": 0.1005859375, + "loss_xval": 2.5, + "num_input_tokens_seen": 6030704, + "step": 90 + }, + { + "epoch": 0.010326241134751774, + "grad_norm": 30.465431213378906, + "learning_rate": 5e-05, + "loss": 2.4956, + "num_input_tokens_seen": 6097956, + "step": 91 + }, + { + "epoch": 0.010326241134751774, + "loss": 2.4898793697357178, + "loss_ce": 0.00941071193665266, + "loss_iou": 1.0078125, + "loss_num": 0.09326171875, + "loss_xval": 2.484375, + "num_input_tokens_seen": 6097956, + "step": 91 + }, + { + "epoch": 0.010439716312056738, + "grad_norm": 11.278782844543457, + "learning_rate": 5e-05, + "loss": 2.548, + "num_input_tokens_seen": 6163920, + "step": 92 + }, + { + "epoch": 0.010439716312056738, + "loss": 2.6571731567382812, + "loss_ce": 0.010688611306250095, + "loss_iou": 1.078125, + "loss_num": 0.09814453125, + "loss_xval": 2.640625, + "num_input_tokens_seen": 6163920, + "step": 92 + }, + { + "epoch": 0.010553191489361702, + "grad_norm": 12.679636001586914, + "learning_rate": 5e-05, + "loss": 2.5286, + "num_input_tokens_seen": 6229992, + "step": 93 + }, + { + "epoch": 0.010553191489361702, + "loss": 2.572551727294922, + "loss_ce": 0.0041922349482774734, + "loss_iou": 1.078125, + "loss_num": 0.0830078125, + "loss_xval": 2.5625, + "num_input_tokens_seen": 6229992, + "step": 93 + }, + { + "epoch": 0.010666666666666666, + "grad_norm": 11.156278610229492, + "learning_rate": 5e-05, + "loss": 2.5555, + "num_input_tokens_seen": 6297076, + "step": 94 + }, + { + "epoch": 0.010666666666666666, + "loss": 2.678504705429077, + "loss_ce": 0.01248905435204506, + "loss_iou": 1.0703125, + "loss_num": 0.10498046875, + "loss_xval": 2.671875, + "num_input_tokens_seen": 6297076, + "step": 94 + }, + { + "epoch": 0.01078014184397163, + "grad_norm": 11.589823722839355, + "learning_rate": 5e-05, + "loss": 2.3912, + "num_input_tokens_seen": 6362696, + "step": 95 + }, + { + "epoch": 0.01078014184397163, + "loss": 2.400977849960327, + "loss_ce": 0.006446637213230133, + "loss_iou": 1.0078125, + "loss_num": 0.076171875, + "loss_xval": 2.390625, + "num_input_tokens_seen": 6362696, + "step": 95 + }, + { + "epoch": 0.010893617021276596, + "grad_norm": 14.1033353805542, + "learning_rate": 5e-05, + "loss": 2.0466, + "num_input_tokens_seen": 6428768, + "step": 96 + }, + { + "epoch": 0.010893617021276596, + "loss": 1.8193912506103516, + "loss_ce": 0.0112856924533844, + "loss_iou": 0.73046875, + "loss_num": 0.0703125, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 6428768, + "step": 96 + }, + { + "epoch": 0.01100709219858156, + "grad_norm": 14.90049934387207, + "learning_rate": 5e-05, + "loss": 2.4124, + "num_input_tokens_seen": 6495888, + "step": 97 + }, + { + "epoch": 0.01100709219858156, + "loss": 2.4790687561035156, + "loss_ce": 0.010318921878933907, + "loss_iou": 1.0390625, + "loss_num": 0.07666015625, + "loss_xval": 2.46875, + "num_input_tokens_seen": 6495888, + "step": 97 + }, + { + "epoch": 0.011120567375886525, + "grad_norm": 22.12006187438965, + "learning_rate": 5e-05, + "loss": 2.6754, + "num_input_tokens_seen": 6562792, + "step": 98 + }, + { + "epoch": 0.011120567375886525, + "loss": 2.8142433166503906, + "loss_ce": 0.005649348255246878, + "loss_iou": 1.15625, + "loss_num": 0.1005859375, + "loss_xval": 2.8125, + "num_input_tokens_seen": 6562792, + "step": 98 + }, + { + "epoch": 0.011234042553191489, + "grad_norm": 9.519781112670898, + "learning_rate": 5e-05, + "loss": 2.5856, + "num_input_tokens_seen": 6628764, + "step": 99 + }, + { + "epoch": 0.011234042553191489, + "loss": 2.4742918014526367, + "loss_ce": 0.01726033166050911, + "loss_iou": 0.99609375, + "loss_num": 0.0927734375, + "loss_xval": 2.453125, + "num_input_tokens_seen": 6628764, + "step": 99 + }, + { + "epoch": 0.011347517730496455, + "grad_norm": 7.815848350524902, + "learning_rate": 5e-05, + "loss": 2.4106, + "num_input_tokens_seen": 6695816, + "step": 100 + }, + { + "epoch": 0.011347517730496455, + "loss": 2.491739273071289, + "loss_ce": 0.011270495131611824, + "loss_iou": 1.0390625, + "loss_num": 0.0810546875, + "loss_xval": 2.484375, + "num_input_tokens_seen": 6695816, + "step": 100 + }, + { + "epoch": 0.011460992907801419, + "grad_norm": 10.62049388885498, + "learning_rate": 5e-05, + "loss": 2.3533, + "num_input_tokens_seen": 6763276, + "step": 101 + }, + { + "epoch": 0.011460992907801419, + "loss": 2.2163610458374023, + "loss_ce": 0.006400047801434994, + "loss_iou": 0.94921875, + "loss_num": 0.0625, + "loss_xval": 2.203125, + "num_input_tokens_seen": 6763276, + "step": 101 + }, + { + "epoch": 0.011574468085106383, + "grad_norm": 12.495512008666992, + "learning_rate": 5e-05, + "loss": 2.4888, + "num_input_tokens_seen": 6830444, + "step": 102 + }, + { + "epoch": 0.011574468085106383, + "loss": 2.5892333984375, + "loss_ce": 0.016967855393886566, + "loss_iou": 1.0546875, + "loss_num": 0.091796875, + "loss_xval": 2.578125, + "num_input_tokens_seen": 6830444, + "step": 102 + }, + { + "epoch": 0.011687943262411347, + "grad_norm": 26.562650680541992, + "learning_rate": 5e-05, + "loss": 2.5084, + "num_input_tokens_seen": 6896724, + "step": 103 + }, + { + "epoch": 0.011687943262411347, + "loss": 2.6496243476867676, + "loss_ce": 0.008999311365187168, + "loss_iou": 1.125, + "loss_num": 0.07861328125, + "loss_xval": 2.640625, + "num_input_tokens_seen": 6896724, + "step": 103 + }, + { + "epoch": 0.011801418439716311, + "grad_norm": 9.034788131713867, + "learning_rate": 5e-05, + "loss": 2.8842, + "num_input_tokens_seen": 6964152, + "step": 104 + }, + { + "epoch": 0.011801418439716311, + "loss": 2.865622043609619, + "loss_ce": 0.008200171403586864, + "loss_iou": 1.09375, + "loss_num": 0.1328125, + "loss_xval": 2.859375, + "num_input_tokens_seen": 6964152, + "step": 104 + }, + { + "epoch": 0.011914893617021277, + "grad_norm": 10.767837524414062, + "learning_rate": 5e-05, + "loss": 2.2865, + "num_input_tokens_seen": 7030408, + "step": 105 + }, + { + "epoch": 0.011914893617021277, + "loss": 2.2559080123901367, + "loss_ce": 0.00761684263125062, + "loss_iou": 0.875, + "loss_num": 0.1005859375, + "loss_xval": 2.25, + "num_input_tokens_seen": 7030408, + "step": 105 + }, + { + "epoch": 0.012028368794326241, + "grad_norm": 7.837512016296387, + "learning_rate": 5e-05, + "loss": 2.4447, + "num_input_tokens_seen": 7097128, + "step": 106 + }, + { + "epoch": 0.012028368794326241, + "loss": 2.5091142654418945, + "loss_ce": 0.007161081302911043, + "loss_iou": 1.015625, + "loss_num": 0.0947265625, + "loss_xval": 2.5, + "num_input_tokens_seen": 7097128, + "step": 106 + }, + { + "epoch": 0.012141843971631205, + "grad_norm": 13.136813163757324, + "learning_rate": 5e-05, + "loss": 2.3716, + "num_input_tokens_seen": 7164304, + "step": 107 + }, + { + "epoch": 0.012141843971631205, + "loss": 2.366776943206787, + "loss_ce": 0.009354899637401104, + "loss_iou": 0.97265625, + "loss_num": 0.08251953125, + "loss_xval": 2.359375, + "num_input_tokens_seen": 7164304, + "step": 107 + }, + { + "epoch": 0.01225531914893617, + "grad_norm": 8.0120267868042, + "learning_rate": 5e-05, + "loss": 2.3944, + "num_input_tokens_seen": 7231808, + "step": 108 + }, + { + "epoch": 0.01225531914893617, + "loss": 2.392862558364868, + "loss_ce": 0.0051672388799488544, + "loss_iou": 0.984375, + "loss_num": 0.083984375, + "loss_xval": 2.390625, + "num_input_tokens_seen": 7231808, + "step": 108 + }, + { + "epoch": 0.012368794326241135, + "grad_norm": 23.499563217163086, + "learning_rate": 5e-05, + "loss": 2.2743, + "num_input_tokens_seen": 7299120, + "step": 109 + }, + { + "epoch": 0.012368794326241135, + "loss": 2.175337314605713, + "loss_ce": 0.007368695456534624, + "loss_iou": 0.91015625, + "loss_num": 0.0703125, + "loss_xval": 2.171875, + "num_input_tokens_seen": 7299120, + "step": 109 + }, + { + "epoch": 0.0124822695035461, + "grad_norm": 9.691670417785645, + "learning_rate": 5e-05, + "loss": 3.0051, + "num_input_tokens_seen": 7366356, + "step": 110 + }, + { + "epoch": 0.0124822695035461, + "loss": 2.8973937034606934, + "loss_ce": 0.010674742981791496, + "loss_iou": 1.109375, + "loss_num": 0.134765625, + "loss_xval": 2.890625, + "num_input_tokens_seen": 7366356, + "step": 110 + }, + { + "epoch": 0.012595744680851064, + "grad_norm": 6.900918960571289, + "learning_rate": 5e-05, + "loss": 2.7265, + "num_input_tokens_seen": 7432788, + "step": 111 + }, + { + "epoch": 0.012595744680851064, + "loss": 2.867152214050293, + "loss_ce": 0.011683541350066662, + "loss_iou": 1.09375, + "loss_num": 0.1328125, + "loss_xval": 2.859375, + "num_input_tokens_seen": 7432788, + "step": 111 + }, + { + "epoch": 0.012709219858156028, + "grad_norm": 10.436232566833496, + "learning_rate": 5e-05, + "loss": 2.4815, + "num_input_tokens_seen": 7500592, + "step": 112 + }, + { + "epoch": 0.012709219858156028, + "loss": 2.561492919921875, + "loss_ce": 0.01266486756503582, + "loss_iou": 1.015625, + "loss_num": 0.10205078125, + "loss_xval": 2.546875, + "num_input_tokens_seen": 7500592, + "step": 112 + }, + { + "epoch": 0.012822695035460992, + "grad_norm": 21.603513717651367, + "learning_rate": 5e-05, + "loss": 2.1651, + "num_input_tokens_seen": 7566208, + "step": 113 + }, + { + "epoch": 0.012822695035460992, + "loss": 2.025979995727539, + "loss_ce": 0.0074251932092010975, + "loss_iou": 0.82421875, + "loss_num": 0.07373046875, + "loss_xval": 2.015625, + "num_input_tokens_seen": 7566208, + "step": 113 + }, + { + "epoch": 0.012936170212765958, + "grad_norm": 15.403499603271484, + "learning_rate": 5e-05, + "loss": 2.5153, + "num_input_tokens_seen": 7633472, + "step": 114 + }, + { + "epoch": 0.012936170212765958, + "loss": 2.4700775146484375, + "loss_ce": 0.007186883594840765, + "loss_iou": 1.0390625, + "loss_num": 0.078125, + "loss_xval": 2.46875, + "num_input_tokens_seen": 7633472, + "step": 114 + }, + { + "epoch": 0.013049645390070922, + "grad_norm": 9.678199768066406, + "learning_rate": 5e-05, + "loss": 2.571, + "num_input_tokens_seen": 7701388, + "step": 115 + }, + { + "epoch": 0.013049645390070922, + "loss": 2.604154109954834, + "loss_ce": 0.008450953289866447, + "loss_iou": 1.0546875, + "loss_num": 0.0986328125, + "loss_xval": 2.59375, + "num_input_tokens_seen": 7701388, + "step": 115 + }, + { + "epoch": 0.013163120567375886, + "grad_norm": 7.475813388824463, + "learning_rate": 5e-05, + "loss": 2.1054, + "num_input_tokens_seen": 7767432, + "step": 116 + }, + { + "epoch": 0.013163120567375886, + "loss": 1.95319664478302, + "loss_ce": 0.007762027904391289, + "loss_iou": 0.796875, + "loss_num": 0.07080078125, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 7767432, + "step": 116 + }, + { + "epoch": 0.01327659574468085, + "grad_norm": 18.160633087158203, + "learning_rate": 5e-05, + "loss": 2.2096, + "num_input_tokens_seen": 7834484, + "step": 117 + }, + { + "epoch": 0.01327659574468085, + "loss": 2.3236114978790283, + "loss_ce": 0.009158373810350895, + "loss_iou": 1.0078125, + "loss_num": 0.059326171875, + "loss_xval": 2.3125, + "num_input_tokens_seen": 7834484, + "step": 117 + }, + { + "epoch": 0.013390070921985816, + "grad_norm": 10.65219497680664, + "learning_rate": 5e-05, + "loss": 2.4445, + "num_input_tokens_seen": 7901544, + "step": 118 + }, + { + "epoch": 0.013390070921985816, + "loss": 2.4725282192230225, + "loss_ce": 0.013055614195764065, + "loss_iou": 0.9453125, + "loss_num": 0.11328125, + "loss_xval": 2.453125, + "num_input_tokens_seen": 7901544, + "step": 118 + }, + { + "epoch": 0.01350354609929078, + "grad_norm": 14.269102096557617, + "learning_rate": 5e-05, + "loss": 2.2962, + "num_input_tokens_seen": 7968748, + "step": 119 + }, + { + "epoch": 0.01350354609929078, + "loss": 2.209003210067749, + "loss_ce": 0.009296232834458351, + "loss_iou": 0.875, + "loss_num": 0.09033203125, + "loss_xval": 2.203125, + "num_input_tokens_seen": 7968748, + "step": 119 + }, + { + "epoch": 0.013617021276595745, + "grad_norm": 9.299524307250977, + "learning_rate": 5e-05, + "loss": 2.4374, + "num_input_tokens_seen": 8035692, + "step": 120 + }, + { + "epoch": 0.013617021276595745, + "loss": 2.3716607093811035, + "loss_ce": 0.008379592560231686, + "loss_iou": 0.94140625, + "loss_num": 0.095703125, + "loss_xval": 2.359375, + "num_input_tokens_seen": 8035692, + "step": 120 + }, + { + "epoch": 0.013730496453900709, + "grad_norm": 7.784370422363281, + "learning_rate": 5e-05, + "loss": 1.9827, + "num_input_tokens_seen": 8101412, + "step": 121 + }, + { + "epoch": 0.013730496453900709, + "loss": 1.881495475769043, + "loss_ce": 0.01186654344201088, + "loss_iou": 0.74609375, + "loss_num": 0.07568359375, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 8101412, + "step": 121 + }, + { + "epoch": 0.013843971631205675, + "grad_norm": 11.692572593688965, + "learning_rate": 5e-05, + "loss": 2.1395, + "num_input_tokens_seen": 8169244, + "step": 122 + }, + { + "epoch": 0.013843971631205675, + "loss": 2.378270387649536, + "loss_ce": 0.009129786863923073, + "loss_iou": 1.015625, + "loss_num": 0.06884765625, + "loss_xval": 2.375, + "num_input_tokens_seen": 8169244, + "step": 122 + }, + { + "epoch": 0.013957446808510639, + "grad_norm": 17.866104125976562, + "learning_rate": 5e-05, + "loss": 2.5533, + "num_input_tokens_seen": 8236468, + "step": 123 + }, + { + "epoch": 0.013957446808510639, + "loss": 2.457265853881836, + "loss_ce": 0.006094034761190414, + "loss_iou": 1.0546875, + "loss_num": 0.0673828125, + "loss_xval": 2.453125, + "num_input_tokens_seen": 8236468, + "step": 123 + }, + { + "epoch": 0.014070921985815603, + "grad_norm": 9.165362358093262, + "learning_rate": 5e-05, + "loss": 2.2207, + "num_input_tokens_seen": 8303000, + "step": 124 + }, + { + "epoch": 0.014070921985815603, + "loss": 2.1637072563171387, + "loss_ce": 0.007457318715751171, + "loss_iou": 0.91796875, + "loss_num": 0.0634765625, + "loss_xval": 2.15625, + "num_input_tokens_seen": 8303000, + "step": 124 + }, + { + "epoch": 0.014184397163120567, + "grad_norm": 9.156044006347656, + "learning_rate": 5e-05, + "loss": 2.2287, + "num_input_tokens_seen": 8371712, + "step": 125 + }, + { + "epoch": 0.014184397163120567, + "loss": 2.153995990753174, + "loss_ce": 0.007511702366173267, + "loss_iou": 0.9375, + "loss_num": 0.055419921875, + "loss_xval": 2.140625, + "num_input_tokens_seen": 8371712, + "step": 125 + }, + { + "epoch": 0.014297872340425531, + "grad_norm": 24.355314254760742, + "learning_rate": 5e-05, + "loss": 2.2482, + "num_input_tokens_seen": 8438572, + "step": 126 + }, + { + "epoch": 0.014297872340425531, + "loss": 2.2000668048858643, + "loss_ce": 0.009881218895316124, + "loss_iou": 0.953125, + "loss_num": 0.0576171875, + "loss_xval": 2.1875, + "num_input_tokens_seen": 8438572, + "step": 126 + }, + { + "epoch": 0.014411347517730497, + "grad_norm": 14.52636432647705, + "learning_rate": 5e-05, + "loss": 2.5859, + "num_input_tokens_seen": 8505876, + "step": 127 + }, + { + "epoch": 0.014411347517730497, + "loss": 2.7124860286712646, + "loss_ce": 0.005454742815345526, + "loss_iou": 1.09375, + "loss_num": 0.10546875, + "loss_xval": 2.703125, + "num_input_tokens_seen": 8505876, + "step": 127 + }, + { + "epoch": 0.014524822695035461, + "grad_norm": 11.758727073669434, + "learning_rate": 5e-05, + "loss": 2.5773, + "num_input_tokens_seen": 8572344, + "step": 128 + }, + { + "epoch": 0.014524822695035461, + "loss": 2.619189739227295, + "loss_ce": 0.010791425593197346, + "loss_iou": 1.0546875, + "loss_num": 0.0986328125, + "loss_xval": 2.609375, + "num_input_tokens_seen": 8572344, + "step": 128 + }, + { + "epoch": 0.014638297872340425, + "grad_norm": 5.697221755981445, + "learning_rate": 5e-05, + "loss": 2.3766, + "num_input_tokens_seen": 8638248, + "step": 129 + }, + { + "epoch": 0.014638297872340425, + "loss": 2.4092721939086914, + "loss_ce": 0.006928377319127321, + "loss_iou": 1.0, + "loss_num": 0.08056640625, + "loss_xval": 2.40625, + "num_input_tokens_seen": 8638248, + "step": 129 + }, + { + "epoch": 0.01475177304964539, + "grad_norm": 6.488142967224121, + "learning_rate": 5e-05, + "loss": 2.0621, + "num_input_tokens_seen": 8704576, + "step": 130 + }, + { + "epoch": 0.01475177304964539, + "loss": 2.3174753189086914, + "loss_ce": 0.007904846221208572, + "loss_iou": 0.96484375, + "loss_num": 0.07568359375, + "loss_xval": 2.3125, + "num_input_tokens_seen": 8704576, + "step": 130 + }, + { + "epoch": 0.014865248226950355, + "grad_norm": 12.161070823669434, + "learning_rate": 5e-05, + "loss": 2.1923, + "num_input_tokens_seen": 8771796, + "step": 131 + }, + { + "epoch": 0.014865248226950355, + "loss": 2.377084732055664, + "loss_ce": 0.009897224605083466, + "loss_iou": 1.03125, + "loss_num": 0.0615234375, + "loss_xval": 2.375, + "num_input_tokens_seen": 8771796, + "step": 131 + }, + { + "epoch": 0.01497872340425532, + "grad_norm": 12.347914695739746, + "learning_rate": 5e-05, + "loss": 2.311, + "num_input_tokens_seen": 8837924, + "step": 132 + }, + { + "epoch": 0.01497872340425532, + "loss": 2.020324945449829, + "loss_ce": 0.006897184997797012, + "loss_iou": 0.83203125, + "loss_num": 0.0703125, + "loss_xval": 2.015625, + "num_input_tokens_seen": 8837924, + "step": 132 + }, + { + "epoch": 0.015092198581560284, + "grad_norm": 17.58194923400879, + "learning_rate": 5e-05, + "loss": 2.1781, + "num_input_tokens_seen": 8905332, + "step": 133 + }, + { + "epoch": 0.015092198581560284, + "loss": 2.245748281478882, + "loss_ce": 0.009420037269592285, + "loss_iou": 0.9609375, + "loss_num": 0.0625, + "loss_xval": 2.234375, + "num_input_tokens_seen": 8905332, + "step": 133 + }, + { + "epoch": 0.015205673758865248, + "grad_norm": 12.385845184326172, + "learning_rate": 5e-05, + "loss": 2.2731, + "num_input_tokens_seen": 8971924, + "step": 134 + }, + { + "epoch": 0.015205673758865248, + "loss": 2.2352652549743652, + "loss_ce": 0.011388363316655159, + "loss_iou": 0.9296875, + "loss_num": 0.0732421875, + "loss_xval": 2.21875, + "num_input_tokens_seen": 8971924, + "step": 134 + }, + { + "epoch": 0.015319148936170212, + "grad_norm": 12.352046966552734, + "learning_rate": 5e-05, + "loss": 2.3171, + "num_input_tokens_seen": 9039020, + "step": 135 + }, + { + "epoch": 0.015319148936170212, + "loss": 2.3259048461914062, + "loss_ce": 0.005592202767729759, + "loss_iou": 0.99609375, + "loss_num": 0.06591796875, + "loss_xval": 2.3125, + "num_input_tokens_seen": 9039020, + "step": 135 + }, + { + "epoch": 0.015432624113475178, + "grad_norm": 13.13872241973877, + "learning_rate": 5e-05, + "loss": 2.4708, + "num_input_tokens_seen": 9106088, + "step": 136 + }, + { + "epoch": 0.015432624113475178, + "loss": 2.5536632537841797, + "loss_ce": 0.0067883385345339775, + "loss_iou": 1.0625, + "loss_num": 0.08544921875, + "loss_xval": 2.546875, + "num_input_tokens_seen": 9106088, + "step": 136 + }, + { + "epoch": 0.015546099290780142, + "grad_norm": 26.445117950439453, + "learning_rate": 5e-05, + "loss": 2.2685, + "num_input_tokens_seen": 9173256, + "step": 137 + }, + { + "epoch": 0.015546099290780142, + "loss": 2.459465503692627, + "loss_ce": 0.008293556049466133, + "loss_iou": 1.0234375, + "loss_num": 0.08056640625, + "loss_xval": 2.453125, + "num_input_tokens_seen": 9173256, + "step": 137 + }, + { + "epoch": 0.015659574468085108, + "grad_norm": 28.742244720458984, + "learning_rate": 5e-05, + "loss": 2.2315, + "num_input_tokens_seen": 9239452, + "step": 138 + }, + { + "epoch": 0.015659574468085108, + "loss": 2.4442527294158936, + "loss_ce": 0.01065905299037695, + "loss_iou": 1.046875, + "loss_num": 0.0693359375, + "loss_xval": 2.4375, + "num_input_tokens_seen": 9239452, + "step": 138 + }, + { + "epoch": 0.015773049645390072, + "grad_norm": 15.194764137268066, + "learning_rate": 5e-05, + "loss": 2.287, + "num_input_tokens_seen": 9306336, + "step": 139 + }, + { + "epoch": 0.015773049645390072, + "loss": 2.299636125564575, + "loss_ce": 0.00959711242467165, + "loss_iou": 0.9765625, + "loss_num": 0.06787109375, + "loss_xval": 2.296875, + "num_input_tokens_seen": 9306336, + "step": 139 + }, + { + "epoch": 0.015886524822695036, + "grad_norm": 15.860917091369629, + "learning_rate": 5e-05, + "loss": 2.5415, + "num_input_tokens_seen": 9372836, + "step": 140 + }, + { + "epoch": 0.015886524822695036, + "loss": 2.4263200759887695, + "loss_ce": 0.008351380005478859, + "loss_iou": 1.0859375, + "loss_num": 0.05029296875, + "loss_xval": 2.421875, + "num_input_tokens_seen": 9372836, + "step": 140 + }, + { + "epoch": 0.016, + "grad_norm": 11.801057815551758, + "learning_rate": 5e-05, + "loss": 2.4627, + "num_input_tokens_seen": 9439144, + "step": 141 + }, + { + "epoch": 0.016, + "loss": 2.525806188583374, + "loss_ce": 0.0043218135833740234, + "loss_iou": 1.03125, + "loss_num": 0.091796875, + "loss_xval": 2.515625, + "num_input_tokens_seen": 9439144, + "step": 141 + }, + { + "epoch": 0.016113475177304964, + "grad_norm": 8.322868347167969, + "learning_rate": 5e-05, + "loss": 2.1026, + "num_input_tokens_seen": 9505856, + "step": 142 + }, + { + "epoch": 0.016113475177304964, + "loss": 1.8941514492034912, + "loss_ce": 0.005479699932038784, + "loss_iou": 0.79296875, + "loss_num": 0.0615234375, + "loss_xval": 1.890625, + "num_input_tokens_seen": 9505856, + "step": 142 + }, + { + "epoch": 0.01622695035460993, + "grad_norm": 25.476938247680664, + "learning_rate": 5e-05, + "loss": 2.2959, + "num_input_tokens_seen": 9571852, + "step": 143 + }, + { + "epoch": 0.01622695035460993, + "loss": 2.658219337463379, + "loss_ce": 0.007828928530216217, + "loss_iou": 1.1328125, + "loss_num": 0.07763671875, + "loss_xval": 2.65625, + "num_input_tokens_seen": 9571852, + "step": 143 + }, + { + "epoch": 0.016340425531914893, + "grad_norm": 7.491905212402344, + "learning_rate": 5e-05, + "loss": 2.6088, + "num_input_tokens_seen": 9637568, + "step": 144 + }, + { + "epoch": 0.016340425531914893, + "loss": 2.470900058746338, + "loss_ce": 0.008009620010852814, + "loss_iou": 0.9375, + "loss_num": 0.11767578125, + "loss_xval": 2.46875, + "num_input_tokens_seen": 9637568, + "step": 144 + }, + { + "epoch": 0.016453900709219857, + "grad_norm": 6.734194755554199, + "learning_rate": 5e-05, + "loss": 2.527, + "num_input_tokens_seen": 9703276, + "step": 145 + }, + { + "epoch": 0.016453900709219857, + "loss": 2.49906587600708, + "loss_ce": 0.006878357380628586, + "loss_iou": 1.0234375, + "loss_num": 0.08984375, + "loss_xval": 2.5, + "num_input_tokens_seen": 9703276, + "step": 145 + }, + { + "epoch": 0.01656737588652482, + "grad_norm": 9.095154762268066, + "learning_rate": 5e-05, + "loss": 2.4167, + "num_input_tokens_seen": 9770664, + "step": 146 + }, + { + "epoch": 0.01656737588652482, + "loss": 2.4846527576446533, + "loss_ce": 0.00809038057923317, + "loss_iou": 1.0, + "loss_num": 0.09423828125, + "loss_xval": 2.46875, + "num_input_tokens_seen": 9770664, + "step": 146 + }, + { + "epoch": 0.01668085106382979, + "grad_norm": 9.772058486938477, + "learning_rate": 5e-05, + "loss": 2.4012, + "num_input_tokens_seen": 9838392, + "step": 147 + }, + { + "epoch": 0.01668085106382979, + "loss": 2.394804000854492, + "loss_ce": 0.004178861156105995, + "loss_iou": 0.97265625, + "loss_num": 0.08984375, + "loss_xval": 2.390625, + "num_input_tokens_seen": 9838392, + "step": 147 + }, + { + "epoch": 0.016794326241134753, + "grad_norm": 15.710801124572754, + "learning_rate": 5e-05, + "loss": 2.1769, + "num_input_tokens_seen": 9905168, + "step": 148 + }, + { + "epoch": 0.016794326241134753, + "loss": 2.273308038711548, + "loss_ce": 0.007683010771870613, + "loss_iou": 0.94921875, + "loss_num": 0.072265625, + "loss_xval": 2.265625, + "num_input_tokens_seen": 9905168, + "step": 148 + }, + { + "epoch": 0.016907801418439717, + "grad_norm": 10.77956485748291, + "learning_rate": 5e-05, + "loss": 2.2434, + "num_input_tokens_seen": 9973072, + "step": 149 + }, + { + "epoch": 0.016907801418439717, + "loss": 2.313485860824585, + "loss_ce": 0.0068451762199401855, + "loss_iou": 0.9609375, + "loss_num": 0.0771484375, + "loss_xval": 2.3125, + "num_input_tokens_seen": 9973072, + "step": 149 + }, + { + "epoch": 0.01702127659574468, + "grad_norm": 40.77583694458008, + "learning_rate": 5e-05, + "loss": 2.2813, + "num_input_tokens_seen": 10039732, + "step": 150 + }, + { + "epoch": 0.01702127659574468, + "loss": 1.9825935363769531, + "loss_ce": 0.008472280576825142, + "loss_iou": 0.83984375, + "loss_num": 0.059326171875, + "loss_xval": 1.9765625, + "num_input_tokens_seen": 10039732, + "step": 150 + }, + { + "epoch": 0.017134751773049645, + "grad_norm": 15.640267372131348, + "learning_rate": 5e-05, + "loss": 2.5361, + "num_input_tokens_seen": 10107288, + "step": 151 + }, + { + "epoch": 0.017134751773049645, + "loss": 2.493063449859619, + "loss_ce": 0.006735186092555523, + "loss_iou": 1.0390625, + "loss_num": 0.0830078125, + "loss_xval": 2.484375, + "num_input_tokens_seen": 10107288, + "step": 151 + }, + { + "epoch": 0.01724822695035461, + "grad_norm": 19.055850982666016, + "learning_rate": 5e-05, + "loss": 2.0099, + "num_input_tokens_seen": 10173236, + "step": 152 + }, + { + "epoch": 0.01724822695035461, + "loss": 1.8129035234451294, + "loss_ce": 0.005774603225290775, + "loss_iou": 0.7578125, + "loss_num": 0.05859375, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 10173236, + "step": 152 + }, + { + "epoch": 0.017361702127659574, + "grad_norm": 14.081707000732422, + "learning_rate": 5e-05, + "loss": 2.4088, + "num_input_tokens_seen": 10238992, + "step": 153 + }, + { + "epoch": 0.017361702127659574, + "loss": 2.377406597137451, + "loss_ce": 0.011195572093129158, + "loss_iou": 1.0234375, + "loss_num": 0.0625, + "loss_xval": 2.359375, + "num_input_tokens_seen": 10238992, + "step": 153 + }, + { + "epoch": 0.017475177304964538, + "grad_norm": 8.095298767089844, + "learning_rate": 5e-05, + "loss": 2.1621, + "num_input_tokens_seen": 10306444, + "step": 154 + }, + { + "epoch": 0.017475177304964538, + "loss": 2.0957350730895996, + "loss_ce": 0.011506602168083191, + "loss_iou": 0.8671875, + "loss_num": 0.0703125, + "loss_xval": 2.078125, + "num_input_tokens_seen": 10306444, + "step": 154 + }, + { + "epoch": 0.017588652482269502, + "grad_norm": 7.532504558563232, + "learning_rate": 5e-05, + "loss": 1.7575, + "num_input_tokens_seen": 10372824, + "step": 155 + }, + { + "epoch": 0.017588652482269502, + "loss": 1.8177725076675415, + "loss_ce": 0.006737336982041597, + "loss_iou": 0.77734375, + "loss_num": 0.0517578125, + "loss_xval": 1.8125, + "num_input_tokens_seen": 10372824, + "step": 155 + }, + { + "epoch": 0.01770212765957447, + "grad_norm": 14.862618446350098, + "learning_rate": 5e-05, + "loss": 2.2321, + "num_input_tokens_seen": 10440644, + "step": 156 + }, + { + "epoch": 0.01770212765957447, + "loss": 2.2479302883148193, + "loss_ce": 0.005742753855884075, + "loss_iou": 0.96484375, + "loss_num": 0.062255859375, + "loss_xval": 2.25, + "num_input_tokens_seen": 10440644, + "step": 156 + }, + { + "epoch": 0.017815602836879434, + "grad_norm": 12.881443977355957, + "learning_rate": 5e-05, + "loss": 2.2026, + "num_input_tokens_seen": 10508236, + "step": 157 + }, + { + "epoch": 0.017815602836879434, + "loss": 2.242950201034546, + "loss_ce": 0.00857517309486866, + "loss_iou": 0.9609375, + "loss_num": 0.06298828125, + "loss_xval": 2.234375, + "num_input_tokens_seen": 10508236, + "step": 157 + }, + { + "epoch": 0.017929078014184398, + "grad_norm": 14.258028030395508, + "learning_rate": 5e-05, + "loss": 2.3204, + "num_input_tokens_seen": 10575600, + "step": 158 + }, + { + "epoch": 0.017929078014184398, + "loss": 2.2515978813171387, + "loss_ce": 0.004527468699961901, + "loss_iou": 0.953125, + "loss_num": 0.0673828125, + "loss_xval": 2.25, + "num_input_tokens_seen": 10575600, + "step": 158 + }, + { + "epoch": 0.018042553191489362, + "grad_norm": 29.44944190979004, + "learning_rate": 5e-05, + "loss": 2.5112, + "num_input_tokens_seen": 10642792, + "step": 159 + }, + { + "epoch": 0.018042553191489362, + "loss": 2.7171785831451416, + "loss_ce": 0.0062411511316895485, + "loss_iou": 1.1875, + "loss_num": 0.06689453125, + "loss_xval": 2.71875, + "num_input_tokens_seen": 10642792, + "step": 159 + }, + { + "epoch": 0.018156028368794326, + "grad_norm": 9.456683158874512, + "learning_rate": 5e-05, + "loss": 2.5736, + "num_input_tokens_seen": 10710472, + "step": 160 + }, + { + "epoch": 0.018156028368794326, + "loss": 2.371212959289551, + "loss_ce": 0.006222669035196304, + "loss_iou": 0.94921875, + "loss_num": 0.09423828125, + "loss_xval": 2.359375, + "num_input_tokens_seen": 10710472, + "step": 160 + }, + { + "epoch": 0.01826950354609929, + "grad_norm": 6.43477201461792, + "learning_rate": 5e-05, + "loss": 2.459, + "num_input_tokens_seen": 10777740, + "step": 161 + }, + { + "epoch": 0.01826950354609929, + "loss": 2.519258499145508, + "loss_ce": 0.005586634390056133, + "loss_iou": 1.0078125, + "loss_num": 0.0986328125, + "loss_xval": 2.515625, + "num_input_tokens_seen": 10777740, + "step": 161 + }, + { + "epoch": 0.018382978723404254, + "grad_norm": 8.330636024475098, + "learning_rate": 5e-05, + "loss": 2.3664, + "num_input_tokens_seen": 10845044, + "step": 162 + }, + { + "epoch": 0.018382978723404254, + "loss": 2.344101905822754, + "loss_ce": 0.003281618934124708, + "loss_iou": 0.984375, + "loss_num": 0.07568359375, + "loss_xval": 2.34375, + "num_input_tokens_seen": 10845044, + "step": 162 + }, + { + "epoch": 0.01849645390070922, + "grad_norm": 6.454036235809326, + "learning_rate": 5e-05, + "loss": 2.1012, + "num_input_tokens_seen": 10912076, + "step": 163 + }, + { + "epoch": 0.01849645390070922, + "loss": 2.0271120071411133, + "loss_ce": 0.006604090798646212, + "loss_iou": 0.84765625, + "loss_num": 0.06494140625, + "loss_xval": 2.015625, + "num_input_tokens_seen": 10912076, + "step": 163 + }, + { + "epoch": 0.018609929078014183, + "grad_norm": 8.917473793029785, + "learning_rate": 5e-05, + "loss": 2.2623, + "num_input_tokens_seen": 10978308, + "step": 164 + }, + { + "epoch": 0.018609929078014183, + "loss": 2.3023457527160645, + "loss_ce": 0.009376918897032738, + "loss_iou": 0.984375, + "loss_num": 0.064453125, + "loss_xval": 2.296875, + "num_input_tokens_seen": 10978308, + "step": 164 + }, + { + "epoch": 0.01872340425531915, + "grad_norm": 15.595718383789062, + "learning_rate": 5e-05, + "loss": 2.0887, + "num_input_tokens_seen": 11045028, + "step": 165 + }, + { + "epoch": 0.01872340425531915, + "loss": 2.3348593711853027, + "loss_ce": 0.008687597699463367, + "loss_iou": 0.9609375, + "loss_num": 0.0810546875, + "loss_xval": 2.328125, + "num_input_tokens_seen": 11045028, + "step": 165 + }, + { + "epoch": 0.018836879432624114, + "grad_norm": 11.929919242858887, + "learning_rate": 5e-05, + "loss": 2.155, + "num_input_tokens_seen": 11111112, + "step": 166 + }, + { + "epoch": 0.018836879432624114, + "loss": 2.239671468734741, + "loss_ce": 0.00920274667441845, + "loss_iou": 0.96484375, + "loss_num": 0.06005859375, + "loss_xval": 2.234375, + "num_input_tokens_seen": 11111112, + "step": 166 + }, + { + "epoch": 0.01895035460992908, + "grad_norm": 10.62031364440918, + "learning_rate": 5e-05, + "loss": 2.1292, + "num_input_tokens_seen": 11177048, + "step": 167 + }, + { + "epoch": 0.01895035460992908, + "loss": 2.040104627609253, + "loss_ce": 0.006901508662849665, + "loss_iou": 0.83984375, + "loss_num": 0.06982421875, + "loss_xval": 2.03125, + "num_input_tokens_seen": 11177048, + "step": 167 + }, + { + "epoch": 0.019063829787234043, + "grad_norm": 17.681596755981445, + "learning_rate": 5e-05, + "loss": 2.238, + "num_input_tokens_seen": 11243676, + "step": 168 + }, + { + "epoch": 0.019063829787234043, + "loss": 2.243464469909668, + "loss_ce": 0.005183221772313118, + "loss_iou": 0.94140625, + "loss_num": 0.07177734375, + "loss_xval": 2.234375, + "num_input_tokens_seen": 11243676, + "step": 168 + }, + { + "epoch": 0.019177304964539007, + "grad_norm": 14.8980712890625, + "learning_rate": 5e-05, + "loss": 2.198, + "num_input_tokens_seen": 11309852, + "step": 169 + }, + { + "epoch": 0.019177304964539007, + "loss": 2.2008564472198486, + "loss_ce": 0.007497118785977364, + "loss_iou": 0.93359375, + "loss_num": 0.06494140625, + "loss_xval": 2.1875, + "num_input_tokens_seen": 11309852, + "step": 169 + }, + { + "epoch": 0.01929078014184397, + "grad_norm": 9.550142288208008, + "learning_rate": 5e-05, + "loss": 2.0831, + "num_input_tokens_seen": 11375876, + "step": 170 + }, + { + "epoch": 0.01929078014184397, + "loss": 2.0494604110717773, + "loss_ce": 0.010886171832680702, + "loss_iou": 0.7734375, + "loss_num": 0.09716796875, + "loss_xval": 2.03125, + "num_input_tokens_seen": 11375876, + "step": 170 + }, + { + "epoch": 0.019404255319148935, + "grad_norm": 14.172388076782227, + "learning_rate": 5e-05, + "loss": 1.8608, + "num_input_tokens_seen": 11441688, + "step": 171 + }, + { + "epoch": 0.019404255319148935, + "loss": 1.9323945045471191, + "loss_ce": 0.011496144346892834, + "loss_iou": 0.7890625, + "loss_num": 0.06787109375, + "loss_xval": 1.921875, + "num_input_tokens_seen": 11441688, + "step": 171 + }, + { + "epoch": 0.0195177304964539, + "grad_norm": 24.15854835510254, + "learning_rate": 5e-05, + "loss": 2.2257, + "num_input_tokens_seen": 11508848, + "step": 172 + }, + { + "epoch": 0.0195177304964539, + "loss": 2.258111000061035, + "loss_ce": 0.005181225016713142, + "loss_iou": 1.0, + "loss_num": 0.049560546875, + "loss_xval": 2.25, + "num_input_tokens_seen": 11508848, + "step": 172 + }, + { + "epoch": 0.019631205673758867, + "grad_norm": 10.075818061828613, + "learning_rate": 5e-05, + "loss": 2.3624, + "num_input_tokens_seen": 11575384, + "step": 173 + }, + { + "epoch": 0.019631205673758867, + "loss": 2.3573083877563477, + "loss_ce": 0.005745973438024521, + "loss_iou": 0.9765625, + "loss_num": 0.0791015625, + "loss_xval": 2.34375, + "num_input_tokens_seen": 11575384, + "step": 173 + }, + { + "epoch": 0.01974468085106383, + "grad_norm": 23.228261947631836, + "learning_rate": 5e-05, + "loss": 2.1602, + "num_input_tokens_seen": 11641596, + "step": 174 + }, + { + "epoch": 0.01974468085106383, + "loss": 2.324051856994629, + "loss_ce": 0.008621986024081707, + "loss_iou": 0.98828125, + "loss_num": 0.06884765625, + "loss_xval": 2.3125, + "num_input_tokens_seen": 11641596, + "step": 174 + }, + { + "epoch": 0.019858156028368795, + "grad_norm": 9.37069320678711, + "learning_rate": 5e-05, + "loss": 2.4694, + "num_input_tokens_seen": 11707268, + "step": 175 + }, + { + "epoch": 0.019858156028368795, + "loss": 2.582968235015869, + "loss_ce": 0.004843095317482948, + "loss_iou": 1.0546875, + "loss_num": 0.0947265625, + "loss_xval": 2.578125, + "num_input_tokens_seen": 11707268, + "step": 175 + }, + { + "epoch": 0.01997163120567376, + "grad_norm": 7.287182807922363, + "learning_rate": 5e-05, + "loss": 2.5182, + "num_input_tokens_seen": 11774812, + "step": 176 + }, + { + "epoch": 0.01997163120567376, + "loss": 2.4732017517089844, + "loss_ce": 0.012264142744243145, + "loss_iou": 1.0, + "loss_num": 0.09130859375, + "loss_xval": 2.46875, + "num_input_tokens_seen": 11774812, + "step": 176 + }, + { + "epoch": 0.020085106382978724, + "grad_norm": 9.82468318939209, + "learning_rate": 5e-05, + "loss": 2.396, + "num_input_tokens_seen": 11839220, + "step": 177 + }, + { + "epoch": 0.020085106382978724, + "loss": 2.357565402984619, + "loss_ce": 0.010885726660490036, + "loss_iou": 0.96875, + "loss_num": 0.08154296875, + "loss_xval": 2.34375, + "num_input_tokens_seen": 11839220, + "step": 177 + }, + { + "epoch": 0.020198581560283688, + "grad_norm": 6.261472225189209, + "learning_rate": 5e-05, + "loss": 1.8897, + "num_input_tokens_seen": 11905056, + "step": 178 + }, + { + "epoch": 0.020198581560283688, + "loss": 1.5840513706207275, + "loss_ce": 0.008245827630162239, + "loss_iou": 0.65625, + "loss_num": 0.052490234375, + "loss_xval": 1.578125, + "num_input_tokens_seen": 11905056, + "step": 178 + }, + { + "epoch": 0.020312056737588652, + "grad_norm": 15.622884750366211, + "learning_rate": 5e-05, + "loss": 1.9574, + "num_input_tokens_seen": 11971676, + "step": 179 + }, + { + "epoch": 0.020312056737588652, + "loss": 1.9973119497299194, + "loss_ce": 0.0021947131026536226, + "loss_iou": 0.88671875, + "loss_num": 0.044921875, + "loss_xval": 1.9921875, + "num_input_tokens_seen": 11971676, + "step": 179 + }, + { + "epoch": 0.020425531914893616, + "grad_norm": 59.51787567138672, + "learning_rate": 5e-05, + "loss": 2.345, + "num_input_tokens_seen": 12037952, + "step": 180 + }, + { + "epoch": 0.020425531914893616, + "loss": 2.379415988922119, + "loss_ce": 0.012228390201926231, + "loss_iou": 0.98828125, + "loss_num": 0.078125, + "loss_xval": 2.375, + "num_input_tokens_seen": 12037952, + "step": 180 + }, + { + "epoch": 0.02053900709219858, + "grad_norm": 11.42221736907959, + "learning_rate": 5e-05, + "loss": 2.0541, + "num_input_tokens_seen": 12105324, + "step": 181 + }, + { + "epoch": 0.02053900709219858, + "loss": 1.8487247228622437, + "loss_ce": 0.013031408190727234, + "loss_iou": 0.76953125, + "loss_num": 0.059814453125, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 12105324, + "step": 181 + }, + { + "epoch": 0.020652482269503548, + "grad_norm": 14.592534065246582, + "learning_rate": 5e-05, + "loss": 2.2953, + "num_input_tokens_seen": 12171556, + "step": 182 + }, + { + "epoch": 0.020652482269503548, + "loss": 2.1845197677612305, + "loss_ce": 0.010203314013779163, + "loss_iou": 0.87109375, + "loss_num": 0.0869140625, + "loss_xval": 2.171875, + "num_input_tokens_seen": 12171556, + "step": 182 + }, + { + "epoch": 0.020765957446808512, + "grad_norm": 30.980749130249023, + "learning_rate": 5e-05, + "loss": 2.5712, + "num_input_tokens_seen": 12239016, + "step": 183 + }, + { + "epoch": 0.020765957446808512, + "loss": 2.712813377380371, + "loss_ce": 0.00871197134256363, + "loss_iou": 1.1640625, + "loss_num": 0.0732421875, + "loss_xval": 2.703125, + "num_input_tokens_seen": 12239016, + "step": 183 + }, + { + "epoch": 0.020879432624113476, + "grad_norm": 8.426289558410645, + "learning_rate": 5e-05, + "loss": 2.3321, + "num_input_tokens_seen": 12306176, + "step": 184 + }, + { + "epoch": 0.020879432624113476, + "loss": 2.161771774291992, + "loss_ce": 0.005521819926798344, + "loss_iou": 0.83984375, + "loss_num": 0.0947265625, + "loss_xval": 2.15625, + "num_input_tokens_seen": 12306176, + "step": 184 + }, + { + "epoch": 0.02099290780141844, + "grad_norm": 9.972886085510254, + "learning_rate": 5e-05, + "loss": 2.1977, + "num_input_tokens_seen": 12372716, + "step": 185 + }, + { + "epoch": 0.02099290780141844, + "loss": 2.157221794128418, + "loss_ce": 0.0029248190112411976, + "loss_iou": 0.87890625, + "loss_num": 0.07958984375, + "loss_xval": 2.15625, + "num_input_tokens_seen": 12372716, + "step": 185 + }, + { + "epoch": 0.021106382978723404, + "grad_norm": 7.874528884887695, + "learning_rate": 5e-05, + "loss": 2.1667, + "num_input_tokens_seen": 12439828, + "step": 186 + }, + { + "epoch": 0.021106382978723404, + "loss": 2.1653811931610107, + "loss_ce": 0.005224934313446283, + "loss_iou": 0.93359375, + "loss_num": 0.058837890625, + "loss_xval": 2.15625, + "num_input_tokens_seen": 12439828, + "step": 186 + }, + { + "epoch": 0.02121985815602837, + "grad_norm": 14.025794982910156, + "learning_rate": 5e-05, + "loss": 1.8013, + "num_input_tokens_seen": 12506732, + "step": 187 + }, + { + "epoch": 0.02121985815602837, + "loss": 1.8064465522766113, + "loss_ce": 0.0046888235956430435, + "loss_iou": 0.77734375, + "loss_num": 0.05029296875, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 12506732, + "step": 187 + }, + { + "epoch": 0.021333333333333333, + "grad_norm": 39.65133285522461, + "learning_rate": 5e-05, + "loss": 2.2144, + "num_input_tokens_seen": 12574328, + "step": 188 + }, + { + "epoch": 0.021333333333333333, + "loss": 1.9959393739700317, + "loss_ce": 0.002775313798338175, + "loss_iou": 0.859375, + "loss_num": 0.05517578125, + "loss_xval": 1.9921875, + "num_input_tokens_seen": 12574328, + "step": 188 + }, + { + "epoch": 0.021446808510638297, + "grad_norm": 15.730154991149902, + "learning_rate": 5e-05, + "loss": 2.0644, + "num_input_tokens_seen": 12641800, + "step": 189 + }, + { + "epoch": 0.021446808510638297, + "loss": 1.9396142959594727, + "loss_ce": 0.005044018384069204, + "loss_iou": 0.82421875, + "loss_num": 0.0576171875, + "loss_xval": 1.9375, + "num_input_tokens_seen": 12641800, + "step": 189 + }, + { + "epoch": 0.02156028368794326, + "grad_norm": 29.442628860473633, + "learning_rate": 5e-05, + "loss": 2.7163, + "num_input_tokens_seen": 12709608, + "step": 190 + }, + { + "epoch": 0.02156028368794326, + "loss": 2.732654571533203, + "loss_ce": 0.008045079186558723, + "loss_iou": 1.1953125, + "loss_num": 0.0654296875, + "loss_xval": 2.71875, + "num_input_tokens_seen": 12709608, + "step": 190 + }, + { + "epoch": 0.02167375886524823, + "grad_norm": 8.636795997619629, + "learning_rate": 5e-05, + "loss": 2.4634, + "num_input_tokens_seen": 12775908, + "step": 191 + }, + { + "epoch": 0.02167375886524823, + "loss": 2.4344940185546875, + "loss_ce": 0.004806513898074627, + "loss_iou": 1.0078125, + "loss_num": 0.0830078125, + "loss_xval": 2.4375, + "num_input_tokens_seen": 12775908, + "step": 191 + }, + { + "epoch": 0.021787234042553193, + "grad_norm": 11.758359909057617, + "learning_rate": 5e-05, + "loss": 2.3311, + "num_input_tokens_seen": 12842548, + "step": 192 + }, + { + "epoch": 0.021787234042553193, + "loss": 2.30667781829834, + "loss_ce": 0.0049200840294361115, + "loss_iou": 0.96875, + "loss_num": 0.07275390625, + "loss_xval": 2.296875, + "num_input_tokens_seen": 12842548, + "step": 192 + }, + { + "epoch": 0.021900709219858157, + "grad_norm": 9.506312370300293, + "learning_rate": 5e-05, + "loss": 1.9877, + "num_input_tokens_seen": 12908936, + "step": 193 + }, + { + "epoch": 0.021900709219858157, + "loss": 2.113755226135254, + "loss_ce": 0.004380068276077509, + "loss_iou": 0.87890625, + "loss_num": 0.06982421875, + "loss_xval": 2.109375, + "num_input_tokens_seen": 12908936, + "step": 193 + }, + { + "epoch": 0.02201418439716312, + "grad_norm": 19.589685440063477, + "learning_rate": 5e-05, + "loss": 2.0752, + "num_input_tokens_seen": 12976060, + "step": 194 + }, + { + "epoch": 0.02201418439716312, + "loss": 2.0855395793914795, + "loss_ce": 0.00741459708660841, + "loss_iou": 0.8828125, + "loss_num": 0.0625, + "loss_xval": 2.078125, + "num_input_tokens_seen": 12976060, + "step": 194 + }, + { + "epoch": 0.022127659574468085, + "grad_norm": 10.653484344482422, + "learning_rate": 5e-05, + "loss": 2.3416, + "num_input_tokens_seen": 13042708, + "step": 195 + }, + { + "epoch": 0.022127659574468085, + "loss": 2.626361846923828, + "loss_ce": 0.005267941392958164, + "loss_iou": 1.0625, + "loss_num": 0.09814453125, + "loss_xval": 2.625, + "num_input_tokens_seen": 13042708, + "step": 195 + }, + { + "epoch": 0.02224113475177305, + "grad_norm": 13.027557373046875, + "learning_rate": 5e-05, + "loss": 2.2496, + "num_input_tokens_seen": 13109248, + "step": 196 + }, + { + "epoch": 0.02224113475177305, + "loss": 2.296215534210205, + "loss_ce": 0.007153189275413752, + "loss_iou": 0.9765625, + "loss_num": 0.0673828125, + "loss_xval": 2.28125, + "num_input_tokens_seen": 13109248, + "step": 196 + }, + { + "epoch": 0.022354609929078013, + "grad_norm": 11.345287322998047, + "learning_rate": 5e-05, + "loss": 2.1497, + "num_input_tokens_seen": 13176516, + "step": 197 + }, + { + "epoch": 0.022354609929078013, + "loss": 2.4519834518432617, + "loss_ce": 0.004717780277132988, + "loss_iou": 1.0234375, + "loss_num": 0.080078125, + "loss_xval": 2.453125, + "num_input_tokens_seen": 13176516, + "step": 197 + }, + { + "epoch": 0.022468085106382978, + "grad_norm": 13.46945858001709, + "learning_rate": 5e-05, + "loss": 2.1934, + "num_input_tokens_seen": 13242888, + "step": 198 + }, + { + "epoch": 0.022468085106382978, + "loss": 2.0941953659057617, + "loss_ce": 0.005328086204826832, + "loss_iou": 0.86328125, + "loss_num": 0.072265625, + "loss_xval": 2.09375, + "num_input_tokens_seen": 13242888, + "step": 198 + }, + { + "epoch": 0.02258156028368794, + "grad_norm": 22.162094116210938, + "learning_rate": 5e-05, + "loss": 2.317, + "num_input_tokens_seen": 13310600, + "step": 199 + }, + { + "epoch": 0.02258156028368794, + "loss": 2.387981653213501, + "loss_ce": 0.00907539576292038, + "loss_iou": 1.0234375, + "loss_num": 0.0673828125, + "loss_xval": 2.375, + "num_input_tokens_seen": 13310600, + "step": 199 + }, + { + "epoch": 0.02269503546099291, + "grad_norm": 16.065637588500977, + "learning_rate": 5e-05, + "loss": 2.5793, + "num_input_tokens_seen": 13378352, + "step": 200 + }, + { + "epoch": 0.02269503546099291, + "loss": 2.7218565940856934, + "loss_ce": 0.00701269693672657, + "loss_iou": 1.09375, + "loss_num": 0.1044921875, + "loss_xval": 2.71875, + "num_input_tokens_seen": 13378352, + "step": 200 + }, + { + "epoch": 0.022808510638297873, + "grad_norm": 14.165543556213379, + "learning_rate": 5e-05, + "loss": 2.2477, + "num_input_tokens_seen": 13444988, + "step": 201 + }, + { + "epoch": 0.022808510638297873, + "loss": 2.3013229370117188, + "loss_ce": 0.008354277350008488, + "loss_iou": 0.984375, + "loss_num": 0.06494140625, + "loss_xval": 2.296875, + "num_input_tokens_seen": 13444988, + "step": 201 + }, + { + "epoch": 0.022921985815602838, + "grad_norm": 11.902998924255371, + "learning_rate": 5e-05, + "loss": 2.0242, + "num_input_tokens_seen": 13512600, + "step": 202 + }, + { + "epoch": 0.022921985815602838, + "loss": 1.9336786270141602, + "loss_ce": 0.013268493115901947, + "loss_iou": 0.7578125, + "loss_num": 0.0810546875, + "loss_xval": 1.921875, + "num_input_tokens_seen": 13512600, + "step": 202 + }, + { + "epoch": 0.023035460992907802, + "grad_norm": 15.115137100219727, + "learning_rate": 5e-05, + "loss": 2.2537, + "num_input_tokens_seen": 13579412, + "step": 203 + }, + { + "epoch": 0.023035460992907802, + "loss": 2.164536714553833, + "loss_ce": 0.008286889642477036, + "loss_iou": 0.9296875, + "loss_num": 0.058837890625, + "loss_xval": 2.15625, + "num_input_tokens_seen": 13579412, + "step": 203 + }, + { + "epoch": 0.023148936170212766, + "grad_norm": 16.98561668395996, + "learning_rate": 5e-05, + "loss": 2.2003, + "num_input_tokens_seen": 13646888, + "step": 204 + }, + { + "epoch": 0.023148936170212766, + "loss": 2.159851551055908, + "loss_ce": 0.00555444648489356, + "loss_iou": 0.93359375, + "loss_num": 0.057861328125, + "loss_xval": 2.15625, + "num_input_tokens_seen": 13646888, + "step": 204 + }, + { + "epoch": 0.02326241134751773, + "grad_norm": 18.33384132385254, + "learning_rate": 5e-05, + "loss": 2.2909, + "num_input_tokens_seen": 13713592, + "step": 205 + }, + { + "epoch": 0.02326241134751773, + "loss": 2.537165641784668, + "loss_ce": 0.00982196256518364, + "loss_iou": 1.0625, + "loss_num": 0.08203125, + "loss_xval": 2.53125, + "num_input_tokens_seen": 13713592, + "step": 205 + }, + { + "epoch": 0.023375886524822694, + "grad_norm": 16.89714241027832, + "learning_rate": 5e-05, + "loss": 2.2636, + "num_input_tokens_seen": 13782684, + "step": 206 + }, + { + "epoch": 0.023375886524822694, + "loss": 2.250458240509033, + "loss_ce": 0.004364306107163429, + "loss_iou": 0.984375, + "loss_num": 0.0556640625, + "loss_xval": 2.25, + "num_input_tokens_seen": 13782684, + "step": 206 + }, + { + "epoch": 0.02348936170212766, + "grad_norm": 31.695232391357422, + "learning_rate": 5e-05, + "loss": 2.365, + "num_input_tokens_seen": 13849328, + "step": 207 + }, + { + "epoch": 0.02348936170212766, + "loss": 2.535949230194092, + "loss_ce": 0.012511824257671833, + "loss_iou": 1.0234375, + "loss_num": 0.0947265625, + "loss_xval": 2.53125, + "num_input_tokens_seen": 13849328, + "step": 207 + }, + { + "epoch": 0.023602836879432623, + "grad_norm": 20.34220314025879, + "learning_rate": 5e-05, + "loss": 2.096, + "num_input_tokens_seen": 13916640, + "step": 208 + }, + { + "epoch": 0.023602836879432623, + "loss": 2.2491819858551025, + "loss_ce": 0.022619549185037613, + "loss_iou": 0.9453125, + "loss_num": 0.0673828125, + "loss_xval": 2.21875, + "num_input_tokens_seen": 13916640, + "step": 208 + }, + { + "epoch": 0.02371631205673759, + "grad_norm": 17.871572494506836, + "learning_rate": 5e-05, + "loss": 2.4367, + "num_input_tokens_seen": 13982540, + "step": 209 + }, + { + "epoch": 0.02371631205673759, + "loss": 2.436494827270508, + "loss_ce": 0.006807452067732811, + "loss_iou": 1.0390625, + "loss_num": 0.0693359375, + "loss_xval": 2.4375, + "num_input_tokens_seen": 13982540, + "step": 209 + }, + { + "epoch": 0.023829787234042554, + "grad_norm": 9.715182304382324, + "learning_rate": 5e-05, + "loss": 2.0218, + "num_input_tokens_seen": 14049752, + "step": 210 + }, + { + "epoch": 0.023829787234042554, + "loss": 2.054166555404663, + "loss_ce": 0.012174433097243309, + "loss_iou": 0.859375, + "loss_num": 0.064453125, + "loss_xval": 2.046875, + "num_input_tokens_seen": 14049752, + "step": 210 + }, + { + "epoch": 0.02394326241134752, + "grad_norm": 11.759466171264648, + "learning_rate": 5e-05, + "loss": 2.0646, + "num_input_tokens_seen": 14116764, + "step": 211 + }, + { + "epoch": 0.02394326241134752, + "loss": 2.145278215408325, + "loss_ce": 0.0046531641855835915, + "loss_iou": 0.921875, + "loss_num": 0.05908203125, + "loss_xval": 2.140625, + "num_input_tokens_seen": 14116764, + "step": 211 + }, + { + "epoch": 0.024056737588652483, + "grad_norm": 11.428049087524414, + "learning_rate": 5e-05, + "loss": 2.1695, + "num_input_tokens_seen": 14183788, + "step": 212 + }, + { + "epoch": 0.024056737588652483, + "loss": 2.1415462493896484, + "loss_ce": 0.001897747628390789, + "loss_iou": 0.921875, + "loss_num": 0.05859375, + "loss_xval": 2.140625, + "num_input_tokens_seen": 14183788, + "step": 212 + }, + { + "epoch": 0.024170212765957447, + "grad_norm": 21.4481143951416, + "learning_rate": 5e-05, + "loss": 2.1289, + "num_input_tokens_seen": 14250744, + "step": 213 + }, + { + "epoch": 0.024170212765957447, + "loss": 2.1781973838806152, + "loss_ce": 0.005345802754163742, + "loss_iou": 0.90234375, + "loss_num": 0.07373046875, + "loss_xval": 2.171875, + "num_input_tokens_seen": 14250744, + "step": 213 + }, + { + "epoch": 0.02428368794326241, + "grad_norm": 11.155306816101074, + "learning_rate": 5e-05, + "loss": 2.4096, + "num_input_tokens_seen": 14319120, + "step": 214 + }, + { + "epoch": 0.02428368794326241, + "loss": 2.3967273235321045, + "loss_ce": 0.006102318875491619, + "loss_iou": 1.0078125, + "loss_num": 0.07421875, + "loss_xval": 2.390625, + "num_input_tokens_seen": 14319120, + "step": 214 + }, + { + "epoch": 0.024397163120567375, + "grad_norm": 9.961708068847656, + "learning_rate": 5e-05, + "loss": 1.9567, + "num_input_tokens_seen": 14386148, + "step": 215 + }, + { + "epoch": 0.024397163120567375, + "loss": 1.7256171703338623, + "loss_ce": 0.0032051322050392628, + "loss_iou": 0.734375, + "loss_num": 0.051513671875, + "loss_xval": 1.71875, + "num_input_tokens_seen": 14386148, + "step": 215 + }, + { + "epoch": 0.02451063829787234, + "grad_norm": 14.514792442321777, + "learning_rate": 5e-05, + "loss": 1.8786, + "num_input_tokens_seen": 14453152, + "step": 216 + }, + { + "epoch": 0.02451063829787234, + "loss": 1.9915218353271484, + "loss_ce": 0.004217098001390696, + "loss_iou": 0.8671875, + "loss_num": 0.05126953125, + "loss_xval": 1.984375, + "num_input_tokens_seen": 14453152, + "step": 216 + }, + { + "epoch": 0.024624113475177303, + "grad_norm": 22.81439971923828, + "learning_rate": 5e-05, + "loss": 2.4335, + "num_input_tokens_seen": 14519708, + "step": 217 + }, + { + "epoch": 0.024624113475177303, + "loss": 2.2442383766174316, + "loss_ce": 0.008398497477173805, + "loss_iou": 0.9609375, + "loss_num": 0.0625, + "loss_xval": 2.234375, + "num_input_tokens_seen": 14519708, + "step": 217 + }, + { + "epoch": 0.02473758865248227, + "grad_norm": 8.119002342224121, + "learning_rate": 5e-05, + "loss": 2.2919, + "num_input_tokens_seen": 14586524, + "step": 218 + }, + { + "epoch": 0.02473758865248227, + "loss": 2.338153839111328, + "loss_ce": 0.010028734803199768, + "loss_iou": 0.96484375, + "loss_num": 0.07958984375, + "loss_xval": 2.328125, + "num_input_tokens_seen": 14586524, + "step": 218 + }, + { + "epoch": 0.024851063829787235, + "grad_norm": 10.298980712890625, + "learning_rate": 5e-05, + "loss": 2.1678, + "num_input_tokens_seen": 14653036, + "step": 219 + }, + { + "epoch": 0.024851063829787235, + "loss": 2.2828779220581055, + "loss_ce": 0.005534076131880283, + "loss_iou": 0.91796875, + "loss_num": 0.08837890625, + "loss_xval": 2.28125, + "num_input_tokens_seen": 14653036, + "step": 219 + }, + { + "epoch": 0.0249645390070922, + "grad_norm": 14.644815444946289, + "learning_rate": 5e-05, + "loss": 2.074, + "num_input_tokens_seen": 14719664, + "step": 220 + }, + { + "epoch": 0.0249645390070922, + "loss": 2.208768367767334, + "loss_ce": 0.003690444165840745, + "loss_iou": 0.9296875, + "loss_num": 0.068359375, + "loss_xval": 2.203125, + "num_input_tokens_seen": 14719664, + "step": 220 + }, + { + "epoch": 0.025078014184397163, + "grad_norm": 27.661447525024414, + "learning_rate": 5e-05, + "loss": 2.3775, + "num_input_tokens_seen": 14786920, + "step": 221 + }, + { + "epoch": 0.025078014184397163, + "loss": 2.493089437484741, + "loss_ce": 0.005784771405160427, + "loss_iou": 1.0390625, + "loss_num": 0.0830078125, + "loss_xval": 2.484375, + "num_input_tokens_seen": 14786920, + "step": 221 + }, + { + "epoch": 0.025191489361702128, + "grad_norm": 10.211207389831543, + "learning_rate": 5e-05, + "loss": 1.8929, + "num_input_tokens_seen": 14853796, + "step": 222 + }, + { + "epoch": 0.025191489361702128, + "loss": 1.7956962585449219, + "loss_ce": 0.010540043003857136, + "loss_iou": 0.76171875, + "loss_num": 0.052490234375, + "loss_xval": 1.78125, + "num_input_tokens_seen": 14853796, + "step": 222 + }, + { + "epoch": 0.02530496453900709, + "grad_norm": 14.632715225219727, + "learning_rate": 5e-05, + "loss": 2.3107, + "num_input_tokens_seen": 14920736, + "step": 223 + }, + { + "epoch": 0.02530496453900709, + "loss": 2.426729202270508, + "loss_ce": 0.005830708891153336, + "loss_iou": 1.046875, + "loss_num": 0.06640625, + "loss_xval": 2.421875, + "num_input_tokens_seen": 14920736, + "step": 223 + }, + { + "epoch": 0.025418439716312056, + "grad_norm": 13.705864906311035, + "learning_rate": 5e-05, + "loss": 2.0474, + "num_input_tokens_seen": 14987488, + "step": 224 + }, + { + "epoch": 0.025418439716312056, + "loss": 2.124129056930542, + "loss_ce": 0.005965095013380051, + "loss_iou": 0.90625, + "loss_num": 0.06201171875, + "loss_xval": 2.125, + "num_input_tokens_seen": 14987488, + "step": 224 + }, + { + "epoch": 0.02553191489361702, + "grad_norm": 15.260683059692383, + "learning_rate": 5e-05, + "loss": 2.1298, + "num_input_tokens_seen": 15055612, + "step": 225 + }, + { + "epoch": 0.02553191489361702, + "loss": 2.2832226753234863, + "loss_ce": 0.007831980474293232, + "loss_iou": 0.98828125, + "loss_num": 0.059326171875, + "loss_xval": 2.28125, + "num_input_tokens_seen": 15055612, + "step": 225 + }, + { + "epoch": 0.025645390070921984, + "grad_norm": 11.914098739624023, + "learning_rate": 5e-05, + "loss": 2.1988, + "num_input_tokens_seen": 15123136, + "step": 226 + }, + { + "epoch": 0.025645390070921984, + "loss": 2.132108211517334, + "loss_ce": 0.01882690191268921, + "loss_iou": 0.9296875, + "loss_num": 0.05029296875, + "loss_xval": 2.109375, + "num_input_tokens_seen": 15123136, + "step": 226 + }, + { + "epoch": 0.02575886524822695, + "grad_norm": 9.40145492553711, + "learning_rate": 5e-05, + "loss": 1.947, + "num_input_tokens_seen": 15189604, + "step": 227 + }, + { + "epoch": 0.02575886524822695, + "loss": 1.9590708017349243, + "loss_ce": 0.005945737473666668, + "loss_iou": 0.8359375, + "loss_num": 0.056884765625, + "loss_xval": 1.953125, + "num_input_tokens_seen": 15189604, + "step": 227 + }, + { + "epoch": 0.025872340425531916, + "grad_norm": 16.560224533081055, + "learning_rate": 5e-05, + "loss": 2.0682, + "num_input_tokens_seen": 15257596, + "step": 228 + }, + { + "epoch": 0.025872340425531916, + "loss": 1.979672908782959, + "loss_ce": 0.005063546821475029, + "loss_iou": 0.8828125, + "loss_num": 0.042236328125, + "loss_xval": 1.9765625, + "num_input_tokens_seen": 15257596, + "step": 228 + }, + { + "epoch": 0.02598581560283688, + "grad_norm": 11.712940216064453, + "learning_rate": 5e-05, + "loss": 2.2313, + "num_input_tokens_seen": 15325072, + "step": 229 + }, + { + "epoch": 0.02598581560283688, + "loss": 2.227478265762329, + "loss_ce": 0.004822056274861097, + "loss_iou": 0.9765625, + "loss_num": 0.054443359375, + "loss_xval": 2.21875, + "num_input_tokens_seen": 15325072, + "step": 229 + }, + { + "epoch": 0.026099290780141844, + "grad_norm": 12.147698402404785, + "learning_rate": 5e-05, + "loss": 2.0748, + "num_input_tokens_seen": 15392584, + "step": 230 + }, + { + "epoch": 0.026099290780141844, + "loss": 2.0002784729003906, + "loss_ce": 0.009067520499229431, + "loss_iou": 0.86328125, + "loss_num": 0.05224609375, + "loss_xval": 1.9921875, + "num_input_tokens_seen": 15392584, + "step": 230 + }, + { + "epoch": 0.02621276595744681, + "grad_norm": 16.654722213745117, + "learning_rate": 5e-05, + "loss": 2.025, + "num_input_tokens_seen": 15459844, + "step": 231 + }, + { + "epoch": 0.02621276595744681, + "loss": 1.8369026184082031, + "loss_ce": 0.0029181567952036858, + "loss_iou": 0.828125, + "loss_num": 0.03564453125, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 15459844, + "step": 231 + }, + { + "epoch": 0.026326241134751772, + "grad_norm": 16.383960723876953, + "learning_rate": 5e-05, + "loss": 2.2349, + "num_input_tokens_seen": 15527580, + "step": 232 + }, + { + "epoch": 0.026326241134751772, + "loss": 2.236982583999634, + "loss_ce": 0.009443474002182484, + "loss_iou": 0.95703125, + "loss_num": 0.0634765625, + "loss_xval": 2.234375, + "num_input_tokens_seen": 15527580, + "step": 232 + }, + { + "epoch": 0.026439716312056737, + "grad_norm": 13.659037590026855, + "learning_rate": 5e-05, + "loss": 2.0123, + "num_input_tokens_seen": 15594212, + "step": 233 + }, + { + "epoch": 0.026439716312056737, + "loss": 1.9384303092956543, + "loss_ce": 0.004836434964090586, + "loss_iou": 0.84765625, + "loss_num": 0.04736328125, + "loss_xval": 1.9375, + "num_input_tokens_seen": 15594212, + "step": 233 + }, + { + "epoch": 0.0265531914893617, + "grad_norm": 15.772038459777832, + "learning_rate": 5e-05, + "loss": 1.9651, + "num_input_tokens_seen": 15661288, + "step": 234 + }, + { + "epoch": 0.0265531914893617, + "loss": 1.9540388584136963, + "loss_ce": 0.007749753072857857, + "loss_iou": 0.83203125, + "loss_num": 0.056396484375, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 15661288, + "step": 234 + }, + { + "epoch": 0.02666666666666667, + "grad_norm": 29.09591293334961, + "learning_rate": 5e-05, + "loss": 2.871, + "num_input_tokens_seen": 15728744, + "step": 235 + }, + { + "epoch": 0.02666666666666667, + "loss": 2.9279699325561523, + "loss_ce": 0.010001220740377903, + "loss_iou": 1.2890625, + "loss_num": 0.0693359375, + "loss_xval": 2.921875, + "num_input_tokens_seen": 15728744, + "step": 235 + }, + { + "epoch": 0.026780141843971633, + "grad_norm": 8.442623138427734, + "learning_rate": 5e-05, + "loss": 2.5535, + "num_input_tokens_seen": 15795204, + "step": 236 + }, + { + "epoch": 0.026780141843971633, + "loss": 2.591973304748535, + "loss_ce": 0.004082642961293459, + "loss_iou": 1.09375, + "loss_num": 0.08203125, + "loss_xval": 2.59375, + "num_input_tokens_seen": 15795204, + "step": 236 + }, + { + "epoch": 0.026893617021276597, + "grad_norm": 9.92133617401123, + "learning_rate": 5e-05, + "loss": 2.2312, + "num_input_tokens_seen": 15862544, + "step": 237 + }, + { + "epoch": 0.026893617021276597, + "loss": 2.2933974266052246, + "loss_ce": 0.008241022005677223, + "loss_iou": 0.9609375, + "loss_num": 0.0712890625, + "loss_xval": 2.28125, + "num_input_tokens_seen": 15862544, + "step": 237 + }, + { + "epoch": 0.02700709219858156, + "grad_norm": 11.868678092956543, + "learning_rate": 5e-05, + "loss": 2.0336, + "num_input_tokens_seen": 15928700, + "step": 238 + }, + { + "epoch": 0.02700709219858156, + "loss": 1.9620743989944458, + "loss_ce": 0.005043056793510914, + "loss_iou": 0.83203125, + "loss_num": 0.058349609375, + "loss_xval": 1.953125, + "num_input_tokens_seen": 15928700, + "step": 238 + }, + { + "epoch": 0.027120567375886525, + "grad_norm": 13.07463264465332, + "learning_rate": 5e-05, + "loss": 2.0741, + "num_input_tokens_seen": 15995004, + "step": 239 + }, + { + "epoch": 0.027120567375886525, + "loss": 2.0220937728881836, + "loss_ce": 0.004515520296990871, + "loss_iou": 0.890625, + "loss_num": 0.04736328125, + "loss_xval": 2.015625, + "num_input_tokens_seen": 15995004, + "step": 239 + }, + { + "epoch": 0.02723404255319149, + "grad_norm": 48.70344543457031, + "learning_rate": 5e-05, + "loss": 2.0038, + "num_input_tokens_seen": 16061764, + "step": 240 + }, + { + "epoch": 0.02723404255319149, + "loss": 2.129913091659546, + "loss_ce": 0.006866262760013342, + "loss_iou": 0.93359375, + "loss_num": 0.05126953125, + "loss_xval": 2.125, + "num_input_tokens_seen": 16061764, + "step": 240 + }, + { + "epoch": 0.027347517730496453, + "grad_norm": 18.327394485473633, + "learning_rate": 5e-05, + "loss": 2.2463, + "num_input_tokens_seen": 16128784, + "step": 241 + }, + { + "epoch": 0.027347517730496453, + "loss": 2.3567495346069336, + "loss_ce": 0.006163523532450199, + "loss_iou": 1.015625, + "loss_num": 0.06396484375, + "loss_xval": 2.34375, + "num_input_tokens_seen": 16128784, + "step": 241 + }, + { + "epoch": 0.027460992907801417, + "grad_norm": 20.137731552124023, + "learning_rate": 5e-05, + "loss": 2.0818, + "num_input_tokens_seen": 16195792, + "step": 242 + }, + { + "epoch": 0.027460992907801417, + "loss": 2.2072885036468506, + "loss_ce": 0.006116527132689953, + "loss_iou": 0.96875, + "loss_num": 0.052490234375, + "loss_xval": 2.203125, + "num_input_tokens_seen": 16195792, + "step": 242 + }, + { + "epoch": 0.02757446808510638, + "grad_norm": 12.905107498168945, + "learning_rate": 5e-05, + "loss": 2.4613, + "num_input_tokens_seen": 16261944, + "step": 243 + }, + { + "epoch": 0.02757446808510638, + "loss": 2.53001070022583, + "loss_ce": 0.00852644070982933, + "loss_iou": 1.0078125, + "loss_num": 0.1015625, + "loss_xval": 2.515625, + "num_input_tokens_seen": 16261944, + "step": 243 + }, + { + "epoch": 0.02768794326241135, + "grad_norm": 30.687755584716797, + "learning_rate": 5e-05, + "loss": 1.9181, + "num_input_tokens_seen": 16329460, + "step": 244 + }, + { + "epoch": 0.02768794326241135, + "loss": 1.950526237487793, + "loss_ce": 0.008143363520503044, + "loss_iou": 0.84375, + "loss_num": 0.051513671875, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 16329460, + "step": 244 + }, + { + "epoch": 0.027801418439716313, + "grad_norm": 24.267324447631836, + "learning_rate": 5e-05, + "loss": 2.2671, + "num_input_tokens_seen": 16396760, + "step": 245 + }, + { + "epoch": 0.027801418439716313, + "loss": 2.194706439971924, + "loss_ce": 0.006230008322745562, + "loss_iou": 0.98046875, + "loss_num": 0.045654296875, + "loss_xval": 2.1875, + "num_input_tokens_seen": 16396760, + "step": 245 + }, + { + "epoch": 0.027914893617021277, + "grad_norm": 12.893952369689941, + "learning_rate": 5e-05, + "loss": 2.365, + "num_input_tokens_seen": 16463324, + "step": 246 + }, + { + "epoch": 0.027914893617021277, + "loss": 2.3139333724975586, + "loss_ce": 0.010222472250461578, + "loss_iou": 0.96875, + "loss_num": 0.0732421875, + "loss_xval": 2.296875, + "num_input_tokens_seen": 16463324, + "step": 246 + }, + { + "epoch": 0.02802836879432624, + "grad_norm": 12.849430084228516, + "learning_rate": 5e-05, + "loss": 2.1331, + "num_input_tokens_seen": 16529868, + "step": 247 + }, + { + "epoch": 0.02802836879432624, + "loss": 2.007035255432129, + "loss_ce": 0.003129031043499708, + "loss_iou": 0.87109375, + "loss_num": 0.052734375, + "loss_xval": 2.0, + "num_input_tokens_seen": 16529868, + "step": 247 + }, + { + "epoch": 0.028141843971631206, + "grad_norm": 20.962507247924805, + "learning_rate": 5e-05, + "loss": 1.978, + "num_input_tokens_seen": 16596884, + "step": 248 + }, + { + "epoch": 0.028141843971631206, + "loss": 1.8284767866134644, + "loss_ce": 0.007920119911432266, + "loss_iou": 0.7734375, + "loss_num": 0.053955078125, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 16596884, + "step": 248 + }, + { + "epoch": 0.02825531914893617, + "grad_norm": 12.846863746643066, + "learning_rate": 5e-05, + "loss": 2.4699, + "num_input_tokens_seen": 16663304, + "step": 249 + }, + { + "epoch": 0.02825531914893617, + "loss": 2.47305965423584, + "loss_ce": 0.014075379818677902, + "loss_iou": 1.015625, + "loss_num": 0.0859375, + "loss_xval": 2.453125, + "num_input_tokens_seen": 16663304, + "step": 249 + }, + { + "epoch": 0.028368794326241134, + "grad_norm": 12.178105354309082, + "learning_rate": 5e-05, + "loss": 2.1096, + "num_input_tokens_seen": 16730680, + "step": 250 + }, + { + "epoch": 0.028368794326241134, + "eval_seeclick_CIoU": 0.32427559792995453, + "eval_seeclick_GIoU": 0.28391535580158234, + "eval_seeclick_IoU": 0.4095884710550308, + "eval_seeclick_MAE_all": 0.11548175290226936, + "eval_seeclick_MAE_h": 0.09852463193237782, + "eval_seeclick_MAE_w": 0.15311992913484573, + "eval_seeclick_MAE_x_boxes": 0.19465716183185577, + "eval_seeclick_MAE_y_boxes": 0.10810388997197151, + "eval_seeclick_NUM_probability": 0.9966083765029907, + "eval_seeclick_inside_bbox": 0.6302083432674408, + "eval_seeclick_loss": 2.7866692543029785, + "eval_seeclick_loss_ce": 0.017056038603186607, + "eval_seeclick_loss_iou": 1.096435546875, + "eval_seeclick_loss_num": 0.12347412109375, + "eval_seeclick_loss_xval": 2.80712890625, + "eval_seeclick_runtime": 66.1518, + "eval_seeclick_samples_per_second": 0.71, + "eval_seeclick_steps_per_second": 0.03, + "num_input_tokens_seen": 16730680, + "step": 250 + }, + { + "epoch": 0.028368794326241134, + "eval_icons_CIoU": 0.3093728572130203, + "eval_icons_GIoU": 0.27377574145793915, + "eval_icons_IoU": 0.35352860391139984, + "eval_icons_MAE_all": 0.1484254151582718, + "eval_icons_MAE_h": 0.1407543197274208, + "eval_icons_MAE_w": 0.17480797320604324, + "eval_icons_MAE_x_boxes": 0.11987460032105446, + "eval_icons_MAE_y_boxes": 0.10343674197793007, + "eval_icons_NUM_probability": 0.9961378872394562, + "eval_icons_inside_bbox": 0.6892361044883728, + "eval_icons_loss": 2.998514413833618, + "eval_icons_loss_ce": 0.0031077886233106256, + "eval_icons_loss_iou": 1.14892578125, + "eval_icons_loss_num": 0.1424407958984375, + "eval_icons_loss_xval": 3.009765625, + "eval_icons_runtime": 70.7262, + "eval_icons_samples_per_second": 0.707, + "eval_icons_steps_per_second": 0.028, + "num_input_tokens_seen": 16730680, + "step": 250 + }, + { + "epoch": 0.028368794326241134, + "eval_screenspot_CIoU": 0.31915730237960815, + "eval_screenspot_GIoU": 0.2788233458995819, + "eval_screenspot_IoU": 0.39820700883865356, + "eval_screenspot_MAE_all": 0.1353934034705162, + "eval_screenspot_MAE_h": 0.10655747105677922, + "eval_screenspot_MAE_w": 0.20005754133065543, + "eval_screenspot_MAE_x_boxes": 0.19660205642382303, + "eval_screenspot_MAE_y_boxes": 0.10931446899970372, + "eval_screenspot_NUM_probability": 0.9963526328404745, + "eval_screenspot_inside_bbox": 0.6670833428700765, + "eval_screenspot_loss": 3.0285592079162598, + "eval_screenspot_loss_ce": 0.007115527056157589, + "eval_screenspot_loss_iou": 1.1783854166666667, + "eval_screenspot_loss_num": 0.14058430989583334, + "eval_screenspot_loss_xval": 3.0592447916666665, + "eval_screenspot_runtime": 118.7351, + "eval_screenspot_samples_per_second": 0.75, + "eval_screenspot_steps_per_second": 0.025, + "num_input_tokens_seen": 16730680, + "step": 250 + }, + { + "epoch": 0.028368794326241134, + "eval_compot_CIoU": 0.30759716033935547, + "eval_compot_GIoU": 0.2786458432674408, + "eval_compot_IoU": 0.3863833546638489, + "eval_compot_MAE_all": 0.11193690448999405, + "eval_compot_MAE_h": 0.10568635165691376, + "eval_compot_MAE_w": 0.1489948257803917, + "eval_compot_MAE_x_boxes": 0.1587367132306099, + "eval_compot_MAE_y_boxes": 0.09995973482728004, + "eval_compot_NUM_probability": 0.9886038899421692, + "eval_compot_inside_bbox": 0.5902777910232544, + "eval_compot_loss": 2.946561813354492, + "eval_compot_loss_ce": 0.014837796799838543, + "eval_compot_loss_iou": 1.169677734375, + "eval_compot_loss_num": 0.105560302734375, + "eval_compot_loss_xval": 2.865234375, + "eval_compot_runtime": 67.605, + "eval_compot_samples_per_second": 0.74, + "eval_compot_steps_per_second": 0.03, + "num_input_tokens_seen": 16730680, + "step": 250 + }, + { + "epoch": 0.028368794326241134, + "loss": 2.859528064727783, + "loss_ce": 0.0157778337597847, + "loss_iou": 1.1640625, + "loss_num": 0.1025390625, + "loss_xval": 2.84375, + "num_input_tokens_seen": 16730680, + "step": 250 + }, + { + "epoch": 0.028482269503546098, + "grad_norm": 16.530298233032227, + "learning_rate": 5e-05, + "loss": 1.9448, + "num_input_tokens_seen": 16797244, + "step": 251 + }, + { + "epoch": 0.028482269503546098, + "loss": 1.8805159330368042, + "loss_ce": 0.011375375092029572, + "loss_iou": 0.84765625, + "loss_num": 0.03515625, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 16797244, + "step": 251 + }, + { + "epoch": 0.028595744680851062, + "grad_norm": 12.706256866455078, + "learning_rate": 5e-05, + "loss": 1.9037, + "num_input_tokens_seen": 16863448, + "step": 252 + }, + { + "epoch": 0.028595744680851062, + "loss": 1.7878880500793457, + "loss_ce": 0.005539409350603819, + "loss_iou": 0.75390625, + "loss_num": 0.054931640625, + "loss_xval": 1.78125, + "num_input_tokens_seen": 16863448, + "step": 252 + }, + { + "epoch": 0.02870921985815603, + "grad_norm": 8.887663841247559, + "learning_rate": 5e-05, + "loss": 2.0412, + "num_input_tokens_seen": 16931296, + "step": 253 + }, + { + "epoch": 0.02870921985815603, + "loss": 2.1030683517456055, + "loss_ce": 0.007365386001765728, + "loss_iou": 0.90625, + "loss_num": 0.057373046875, + "loss_xval": 2.09375, + "num_input_tokens_seen": 16931296, + "step": 253 + }, + { + "epoch": 0.028822695035460994, + "grad_norm": 20.58161163330078, + "learning_rate": 5e-05, + "loss": 1.9607, + "num_input_tokens_seen": 16998204, + "step": 254 + }, + { + "epoch": 0.028822695035460994, + "loss": 2.1494736671447754, + "loss_ce": 0.009825145825743675, + "loss_iou": 0.91015625, + "loss_num": 0.06396484375, + "loss_xval": 2.140625, + "num_input_tokens_seen": 16998204, + "step": 254 + }, + { + "epoch": 0.02893617021276596, + "grad_norm": 23.978988647460938, + "learning_rate": 5e-05, + "loss": 2.192, + "num_input_tokens_seen": 17064576, + "step": 255 + }, + { + "epoch": 0.02893617021276596, + "loss": 2.2063419818878174, + "loss_ce": 0.008099878206849098, + "loss_iou": 0.9921875, + "loss_num": 0.042724609375, + "loss_xval": 2.203125, + "num_input_tokens_seen": 17064576, + "step": 255 + }, + { + "epoch": 0.029049645390070922, + "grad_norm": 10.894899368286133, + "learning_rate": 5e-05, + "loss": 2.1363, + "num_input_tokens_seen": 17130260, + "step": 256 + }, + { + "epoch": 0.029049645390070922, + "loss": 2.1060192584991455, + "loss_ce": 0.007874669507145882, + "loss_iou": 0.859375, + "loss_num": 0.07666015625, + "loss_xval": 2.09375, + "num_input_tokens_seen": 17130260, + "step": 256 + }, + { + "epoch": 0.029163120567375887, + "grad_norm": 12.22327995300293, + "learning_rate": 5e-05, + "loss": 1.9441, + "num_input_tokens_seen": 17197560, + "step": 257 + }, + { + "epoch": 0.029163120567375887, + "loss": 1.9899498224258423, + "loss_ce": 0.007527906447649002, + "loss_iou": 0.88671875, + "loss_num": 0.041748046875, + "loss_xval": 1.984375, + "num_input_tokens_seen": 17197560, + "step": 257 + }, + { + "epoch": 0.02927659574468085, + "grad_norm": 29.612520217895508, + "learning_rate": 5e-05, + "loss": 2.1689, + "num_input_tokens_seen": 17265668, + "step": 258 + }, + { + "epoch": 0.02927659574468085, + "loss": 2.1394004821777344, + "loss_ce": 0.005611300002783537, + "loss_iou": 0.95703125, + "loss_num": 0.043701171875, + "loss_xval": 2.140625, + "num_input_tokens_seen": 17265668, + "step": 258 + }, + { + "epoch": 0.029390070921985815, + "grad_norm": 10.715404510498047, + "learning_rate": 5e-05, + "loss": 2.3815, + "num_input_tokens_seen": 17332476, + "step": 259 + }, + { + "epoch": 0.029390070921985815, + "loss": 2.4313323497772217, + "loss_ce": 0.003597873728722334, + "loss_iou": 1.0390625, + "loss_num": 0.0693359375, + "loss_xval": 2.421875, + "num_input_tokens_seen": 17332476, + "step": 259 + }, + { + "epoch": 0.02950354609929078, + "grad_norm": 20.119739532470703, + "learning_rate": 5e-05, + "loss": 1.977, + "num_input_tokens_seen": 17399516, + "step": 260 + }, + { + "epoch": 0.02950354609929078, + "loss": 1.8278207778930664, + "loss_ce": 0.006531699560582638, + "loss_iou": 0.7265625, + "loss_num": 0.0732421875, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 17399516, + "step": 260 + }, + { + "epoch": 0.029617021276595743, + "grad_norm": 17.739049911499023, + "learning_rate": 5e-05, + "loss": 2.0529, + "num_input_tokens_seen": 17465968, + "step": 261 + }, + { + "epoch": 0.029617021276595743, + "loss": 2.1175010204315186, + "loss_ce": 0.0061728921718895435, + "loss_iou": 0.8671875, + "loss_num": 0.0751953125, + "loss_xval": 2.109375, + "num_input_tokens_seen": 17465968, + "step": 261 + }, + { + "epoch": 0.02973049645390071, + "grad_norm": 12.125555992126465, + "learning_rate": 5e-05, + "loss": 1.7664, + "num_input_tokens_seen": 17533168, + "step": 262 + }, + { + "epoch": 0.02973049645390071, + "loss": 1.8231654167175293, + "loss_ce": 0.007735772989690304, + "loss_iou": 0.80859375, + "loss_num": 0.0400390625, + "loss_xval": 1.8125, + "num_input_tokens_seen": 17533168, + "step": 262 + }, + { + "epoch": 0.029843971631205675, + "grad_norm": 36.605369567871094, + "learning_rate": 5e-05, + "loss": 2.3049, + "num_input_tokens_seen": 17599360, + "step": 263 + }, + { + "epoch": 0.029843971631205675, + "loss": 2.3389480113983154, + "loss_ce": 0.00594024732708931, + "loss_iou": 1.046875, + "loss_num": 0.04833984375, + "loss_xval": 2.328125, + "num_input_tokens_seen": 17599360, + "step": 263 + }, + { + "epoch": 0.02995744680851064, + "grad_norm": 8.869305610656738, + "learning_rate": 5e-05, + "loss": 2.5965, + "num_input_tokens_seen": 17666636, + "step": 264 + }, + { + "epoch": 0.02995744680851064, + "loss": 2.5324809551239014, + "loss_ce": 0.005137257277965546, + "loss_iou": 1.0703125, + "loss_num": 0.078125, + "loss_xval": 2.53125, + "num_input_tokens_seen": 17666636, + "step": 264 + }, + { + "epoch": 0.030070921985815603, + "grad_norm": 7.377559661865234, + "learning_rate": 5e-05, + "loss": 2.3443, + "num_input_tokens_seen": 17733916, + "step": 265 + }, + { + "epoch": 0.030070921985815603, + "loss": 2.296736717224121, + "loss_ce": 0.003767886897549033, + "loss_iou": 0.9609375, + "loss_num": 0.07373046875, + "loss_xval": 2.296875, + "num_input_tokens_seen": 17733916, + "step": 265 + }, + { + "epoch": 0.030184397163120567, + "grad_norm": 14.062477111816406, + "learning_rate": 5e-05, + "loss": 2.2755, + "num_input_tokens_seen": 17802216, + "step": 266 + }, + { + "epoch": 0.030184397163120567, + "loss": 2.205850601196289, + "loss_ce": 0.0046785203740000725, + "loss_iou": 0.93359375, + "loss_num": 0.06689453125, + "loss_xval": 2.203125, + "num_input_tokens_seen": 17802216, + "step": 266 + }, + { + "epoch": 0.03029787234042553, + "grad_norm": 16.201812744140625, + "learning_rate": 5e-05, + "loss": 1.9918, + "num_input_tokens_seen": 17869360, + "step": 267 + }, + { + "epoch": 0.03029787234042553, + "loss": 1.981341004371643, + "loss_ce": 0.004778553731739521, + "loss_iou": 0.859375, + "loss_num": 0.052001953125, + "loss_xval": 1.9765625, + "num_input_tokens_seen": 17869360, + "step": 267 + }, + { + "epoch": 0.030411347517730496, + "grad_norm": 8.225858688354492, + "learning_rate": 5e-05, + "loss": 1.8105, + "num_input_tokens_seen": 17935644, + "step": 268 + }, + { + "epoch": 0.030411347517730496, + "loss": 1.6652848720550537, + "loss_ce": 0.006715471390634775, + "loss_iou": 0.71875, + "loss_num": 0.044189453125, + "loss_xval": 1.65625, + "num_input_tokens_seen": 17935644, + "step": 268 + }, + { + "epoch": 0.03052482269503546, + "grad_norm": 13.133440971374512, + "learning_rate": 5e-05, + "loss": 2.0708, + "num_input_tokens_seen": 18002156, + "step": 269 + }, + { + "epoch": 0.03052482269503546, + "loss": 1.9457656145095825, + "loss_ce": 0.009242206811904907, + "loss_iou": 0.8203125, + "loss_num": 0.06005859375, + "loss_xval": 1.9375, + "num_input_tokens_seen": 18002156, + "step": 269 + }, + { + "epoch": 0.030638297872340424, + "grad_norm": 18.670259475708008, + "learning_rate": 5e-05, + "loss": 1.8239, + "num_input_tokens_seen": 18067916, + "step": 270 + }, + { + "epoch": 0.030638297872340424, + "loss": 1.928750991821289, + "loss_ce": 0.007608296349644661, + "loss_iou": 0.78515625, + "loss_num": 0.06982421875, + "loss_xval": 1.921875, + "num_input_tokens_seen": 18067916, + "step": 270 + }, + { + "epoch": 0.03075177304964539, + "grad_norm": 12.89663314819336, + "learning_rate": 5e-05, + "loss": 2.3815, + "num_input_tokens_seen": 18133816, + "step": 271 + }, + { + "epoch": 0.03075177304964539, + "loss": 2.344308376312256, + "loss_ce": 0.00641779275611043, + "loss_iou": 1.0078125, + "loss_num": 0.06494140625, + "loss_xval": 2.34375, + "num_input_tokens_seen": 18133816, + "step": 271 + }, + { + "epoch": 0.030865248226950356, + "grad_norm": 11.394756317138672, + "learning_rate": 5e-05, + "loss": 2.1308, + "num_input_tokens_seen": 18201148, + "step": 272 + }, + { + "epoch": 0.030865248226950356, + "loss": 2.175142288208008, + "loss_ce": 0.007173540536314249, + "loss_iou": 0.94921875, + "loss_num": 0.05419921875, + "loss_xval": 2.171875, + "num_input_tokens_seen": 18201148, + "step": 272 + }, + { + "epoch": 0.03097872340425532, + "grad_norm": 13.135052680969238, + "learning_rate": 5e-05, + "loss": 2.0685, + "num_input_tokens_seen": 18268480, + "step": 273 + }, + { + "epoch": 0.03097872340425532, + "loss": 2.0116658210754395, + "loss_ce": 0.009712656028568745, + "loss_iou": 0.86328125, + "loss_num": 0.05517578125, + "loss_xval": 2.0, + "num_input_tokens_seen": 18268480, + "step": 273 + }, + { + "epoch": 0.031092198581560284, + "grad_norm": 37.635189056396484, + "learning_rate": 5e-05, + "loss": 2.2904, + "num_input_tokens_seen": 18335168, + "step": 274 + }, + { + "epoch": 0.031092198581560284, + "loss": 2.3303685188293457, + "loss_ce": 0.007126430980861187, + "loss_iou": 1.0390625, + "loss_num": 0.0498046875, + "loss_xval": 2.328125, + "num_input_tokens_seen": 18335168, + "step": 274 + }, + { + "epoch": 0.031205673758865248, + "grad_norm": 9.171529769897461, + "learning_rate": 5e-05, + "loss": 2.6609, + "num_input_tokens_seen": 18401924, + "step": 275 + }, + { + "epoch": 0.031205673758865248, + "loss": 2.7242178916931152, + "loss_ce": 0.013280438259243965, + "loss_iou": 1.1328125, + "loss_num": 0.08984375, + "loss_xval": 2.71875, + "num_input_tokens_seen": 18401924, + "step": 275 + }, + { + "epoch": 0.031319148936170216, + "grad_norm": 10.929903030395508, + "learning_rate": 5e-05, + "loss": 2.3293, + "num_input_tokens_seen": 18469024, + "step": 276 + }, + { + "epoch": 0.031319148936170216, + "loss": 2.211719036102295, + "loss_ce": 0.006641052663326263, + "loss_iou": 0.9296875, + "loss_num": 0.0693359375, + "loss_xval": 2.203125, + "num_input_tokens_seen": 18469024, + "step": 276 + }, + { + "epoch": 0.03143262411347518, + "grad_norm": 11.157551765441895, + "learning_rate": 5e-05, + "loss": 2.0402, + "num_input_tokens_seen": 18534832, + "step": 277 + }, + { + "epoch": 0.03143262411347518, + "loss": 2.0882465839385986, + "loss_ce": 0.004262187518179417, + "loss_iou": 0.875, + "loss_num": 0.06689453125, + "loss_xval": 2.078125, + "num_input_tokens_seen": 18534832, + "step": 277 + }, + { + "epoch": 0.031546099290780144, + "grad_norm": 11.417973518371582, + "learning_rate": 5e-05, + "loss": 2.0852, + "num_input_tokens_seen": 18602876, + "step": 278 + }, + { + "epoch": 0.031546099290780144, + "loss": 2.056793212890625, + "loss_ce": 0.008941804990172386, + "loss_iou": 0.84375, + "loss_num": 0.07080078125, + "loss_xval": 2.046875, + "num_input_tokens_seen": 18602876, + "step": 278 + }, + { + "epoch": 0.03165957446808511, + "grad_norm": 16.818071365356445, + "learning_rate": 5e-05, + "loss": 2.0695, + "num_input_tokens_seen": 18669648, + "step": 279 + }, + { + "epoch": 0.03165957446808511, + "loss": 2.132762908935547, + "loss_ce": 0.009716182015836239, + "loss_iou": 0.90234375, + "loss_num": 0.06298828125, + "loss_xval": 2.125, + "num_input_tokens_seen": 18669648, + "step": 279 + }, + { + "epoch": 0.03177304964539007, + "grad_norm": 15.39789867401123, + "learning_rate": 5e-05, + "loss": 1.8981, + "num_input_tokens_seen": 18735692, + "step": 280 + }, + { + "epoch": 0.03177304964539007, + "loss": 1.8417631387710571, + "loss_ce": 0.005459452047944069, + "loss_iou": 0.79296875, + "loss_num": 0.050048828125, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 18735692, + "step": 280 + }, + { + "epoch": 0.031886524822695037, + "grad_norm": 21.809755325317383, + "learning_rate": 5e-05, + "loss": 2.1069, + "num_input_tokens_seen": 18803004, + "step": 281 + }, + { + "epoch": 0.031886524822695037, + "loss": 2.1319336891174316, + "loss_ce": 0.008886843919754028, + "loss_iou": 0.9453125, + "loss_num": 0.046142578125, + "loss_xval": 2.125, + "num_input_tokens_seen": 18803004, + "step": 281 + }, + { + "epoch": 0.032, + "grad_norm": 12.24260425567627, + "learning_rate": 5e-05, + "loss": 2.4036, + "num_input_tokens_seen": 18870204, + "step": 282 + }, + { + "epoch": 0.032, + "loss": 2.4431052207946777, + "loss_ce": 0.011464760638773441, + "loss_iou": 1.015625, + "loss_num": 0.08056640625, + "loss_xval": 2.4375, + "num_input_tokens_seen": 18870204, + "step": 282 + }, + { + "epoch": 0.032113475177304965, + "grad_norm": 14.834857940673828, + "learning_rate": 5e-05, + "loss": 1.8863, + "num_input_tokens_seen": 18936772, + "step": 283 + }, + { + "epoch": 0.032113475177304965, + "loss": 1.8956716060638428, + "loss_ce": 0.009929394349455833, + "loss_iou": 0.79296875, + "loss_num": 0.06005859375, + "loss_xval": 1.8828125, + "num_input_tokens_seen": 18936772, + "step": 283 + }, + { + "epoch": 0.03222695035460993, + "grad_norm": 24.607969284057617, + "learning_rate": 5e-05, + "loss": 1.7384, + "num_input_tokens_seen": 19003368, + "step": 284 + }, + { + "epoch": 0.03222695035460993, + "loss": 1.7091071605682373, + "loss_ce": 0.006653572432696819, + "loss_iou": 0.72265625, + "loss_num": 0.050537109375, + "loss_xval": 1.703125, + "num_input_tokens_seen": 19003368, + "step": 284 + }, + { + "epoch": 0.03234042553191489, + "grad_norm": 11.258136749267578, + "learning_rate": 5e-05, + "loss": 2.1056, + "num_input_tokens_seen": 19070136, + "step": 285 + }, + { + "epoch": 0.03234042553191489, + "loss": 1.9623677730560303, + "loss_ce": 0.009730996564030647, + "loss_iou": 0.80859375, + "loss_num": 0.06787109375, + "loss_xval": 1.953125, + "num_input_tokens_seen": 19070136, + "step": 285 + }, + { + "epoch": 0.03245390070921986, + "grad_norm": 9.923100471496582, + "learning_rate": 5e-05, + "loss": 2.0012, + "num_input_tokens_seen": 19136788, + "step": 286 + }, + { + "epoch": 0.03245390070921986, + "loss": 1.8890573978424072, + "loss_ce": 0.005268281325697899, + "loss_iou": 0.82421875, + "loss_num": 0.046630859375, + "loss_xval": 1.8828125, + "num_input_tokens_seen": 19136788, + "step": 286 + }, + { + "epoch": 0.03256737588652482, + "grad_norm": 10.654122352600098, + "learning_rate": 5e-05, + "loss": 2.059, + "num_input_tokens_seen": 19203200, + "step": 287 + }, + { + "epoch": 0.03256737588652482, + "loss": 2.064732551574707, + "loss_ce": 0.009068410843610764, + "loss_iou": 0.88671875, + "loss_num": 0.05615234375, + "loss_xval": 2.0625, + "num_input_tokens_seen": 19203200, + "step": 287 + }, + { + "epoch": 0.032680851063829786, + "grad_norm": 187.18394470214844, + "learning_rate": 5e-05, + "loss": 2.0173, + "num_input_tokens_seen": 19270688, + "step": 288 + }, + { + "epoch": 0.032680851063829786, + "loss": 2.111915349960327, + "loss_ce": 0.0035168956965208054, + "loss_iou": 0.93359375, + "loss_num": 0.048828125, + "loss_xval": 2.109375, + "num_input_tokens_seen": 19270688, + "step": 288 + }, + { + "epoch": 0.03279432624113475, + "grad_norm": 28.703584671020508, + "learning_rate": 5e-05, + "loss": 2.0627, + "num_input_tokens_seen": 19337464, + "step": 289 + }, + { + "epoch": 0.03279432624113475, + "loss": 2.046271324157715, + "loss_ce": 0.0062322234734892845, + "loss_iou": 0.89453125, + "loss_num": 0.05126953125, + "loss_xval": 2.046875, + "num_input_tokens_seen": 19337464, + "step": 289 + }, + { + "epoch": 0.032907801418439714, + "grad_norm": 15.595372200012207, + "learning_rate": 5e-05, + "loss": 2.0658, + "num_input_tokens_seen": 19403796, + "step": 290 + }, + { + "epoch": 0.032907801418439714, + "loss": 1.9970816373825073, + "loss_ce": 0.007823779247701168, + "loss_iou": 0.85546875, + "loss_num": 0.05517578125, + "loss_xval": 1.9921875, + "num_input_tokens_seen": 19403796, + "step": 290 + }, + { + "epoch": 0.03302127659574468, + "grad_norm": 24.786096572875977, + "learning_rate": 5e-05, + "loss": 1.7617, + "num_input_tokens_seen": 19471248, + "step": 291 + }, + { + "epoch": 0.03302127659574468, + "loss": 1.448961615562439, + "loss_ce": 0.00828784704208374, + "loss_iou": 0.609375, + "loss_num": 0.045166015625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 19471248, + "step": 291 + }, + { + "epoch": 0.03313475177304964, + "grad_norm": 19.570913314819336, + "learning_rate": 5e-05, + "loss": 2.0752, + "num_input_tokens_seen": 19539304, + "step": 292 + }, + { + "epoch": 0.03313475177304964, + "loss": 2.078388214111328, + "loss_ce": 0.0031929383985698223, + "loss_iou": 0.90625, + "loss_num": 0.052001953125, + "loss_xval": 2.078125, + "num_input_tokens_seen": 19539304, + "step": 292 + }, + { + "epoch": 0.03324822695035461, + "grad_norm": 25.94386863708496, + "learning_rate": 5e-05, + "loss": 2.1166, + "num_input_tokens_seen": 19605524, + "step": 293 + }, + { + "epoch": 0.03324822695035461, + "loss": 2.082476854324341, + "loss_ce": 0.0053283111192286015, + "loss_iou": 0.93359375, + "loss_num": 0.041748046875, + "loss_xval": 2.078125, + "num_input_tokens_seen": 19605524, + "step": 293 + }, + { + "epoch": 0.03336170212765958, + "grad_norm": 13.034207344055176, + "learning_rate": 5e-05, + "loss": 2.2818, + "num_input_tokens_seen": 19672408, + "step": 294 + }, + { + "epoch": 0.03336170212765958, + "loss": 2.35365629196167, + "loss_ce": 0.009906151331961155, + "loss_iou": 1.0, + "loss_num": 0.06884765625, + "loss_xval": 2.34375, + "num_input_tokens_seen": 19672408, + "step": 294 + }, + { + "epoch": 0.03347517730496454, + "grad_norm": 9.039133071899414, + "learning_rate": 5e-05, + "loss": 2.0695, + "num_input_tokens_seen": 19739184, + "step": 295 + }, + { + "epoch": 0.03347517730496454, + "loss": 2.2256674766540527, + "loss_ce": 0.009847037494182587, + "loss_iou": 0.90625, + "loss_num": 0.07958984375, + "loss_xval": 2.21875, + "num_input_tokens_seen": 19739184, + "step": 295 + }, + { + "epoch": 0.033588652482269506, + "grad_norm": 13.72494125366211, + "learning_rate": 5e-05, + "loss": 1.9618, + "num_input_tokens_seen": 19806740, + "step": 296 + }, + { + "epoch": 0.033588652482269506, + "loss": 1.9488213062286377, + "loss_ce": 0.008391615003347397, + "loss_iou": 0.8515625, + "loss_num": 0.04833984375, + "loss_xval": 1.9375, + "num_input_tokens_seen": 19806740, + "step": 296 + }, + { + "epoch": 0.03370212765957447, + "grad_norm": 13.92967414855957, + "learning_rate": 5e-05, + "loss": 2.1887, + "num_input_tokens_seen": 19874496, + "step": 297 + }, + { + "epoch": 0.03370212765957447, + "loss": 2.0813045501708984, + "loss_ce": 0.009039005264639854, + "loss_iou": 0.91015625, + "loss_num": 0.0498046875, + "loss_xval": 2.078125, + "num_input_tokens_seen": 19874496, + "step": 297 + }, + { + "epoch": 0.033815602836879434, + "grad_norm": 22.84300994873047, + "learning_rate": 5e-05, + "loss": 1.8684, + "num_input_tokens_seen": 19940996, + "step": 298 + }, + { + "epoch": 0.033815602836879434, + "loss": 1.9182816743850708, + "loss_ce": 0.0071489098481833935, + "loss_iou": 0.7890625, + "loss_num": 0.06640625, + "loss_xval": 1.9140625, + "num_input_tokens_seen": 19940996, + "step": 298 + }, + { + "epoch": 0.0339290780141844, + "grad_norm": 16.450529098510742, + "learning_rate": 5e-05, + "loss": 1.6587, + "num_input_tokens_seen": 20007312, + "step": 299 + }, + { + "epoch": 0.0339290780141844, + "loss": 1.6375080347061157, + "loss_ce": 0.009822511114180088, + "loss_iou": 0.70703125, + "loss_num": 0.04296875, + "loss_xval": 1.625, + "num_input_tokens_seen": 20007312, + "step": 299 + }, + { + "epoch": 0.03404255319148936, + "grad_norm": 17.05329132080078, + "learning_rate": 5e-05, + "loss": 1.954, + "num_input_tokens_seen": 20073004, + "step": 300 + }, + { + "epoch": 0.03404255319148936, + "loss": 2.1207549571990967, + "loss_ce": 0.0035674578975886106, + "loss_iou": 0.91015625, + "loss_num": 0.059326171875, + "loss_xval": 2.125, + "num_input_tokens_seen": 20073004, + "step": 300 + }, + { + "epoch": 0.034156028368794326, + "grad_norm": 17.390138626098633, + "learning_rate": 5e-05, + "loss": 1.9799, + "num_input_tokens_seen": 20139976, + "step": 301 + }, + { + "epoch": 0.034156028368794326, + "loss": 2.031141519546509, + "loss_ce": 0.005751009099185467, + "loss_iou": 0.890625, + "loss_num": 0.049072265625, + "loss_xval": 2.03125, + "num_input_tokens_seen": 20139976, + "step": 301 + }, + { + "epoch": 0.03426950354609929, + "grad_norm": 23.89825439453125, + "learning_rate": 5e-05, + "loss": 1.9982, + "num_input_tokens_seen": 20207492, + "step": 302 + }, + { + "epoch": 0.03426950354609929, + "loss": 1.8535802364349365, + "loss_ce": 0.006900569424033165, + "loss_iou": 0.80078125, + "loss_num": 0.04931640625, + "loss_xval": 1.84375, + "num_input_tokens_seen": 20207492, + "step": 302 + }, + { + "epoch": 0.034382978723404255, + "grad_norm": 17.60811424255371, + "learning_rate": 5e-05, + "loss": 2.097, + "num_input_tokens_seen": 20274256, + "step": 303 + }, + { + "epoch": 0.034382978723404255, + "loss": 1.7964730262756348, + "loss_ce": 0.0064338273368775845, + "loss_iou": 0.796875, + "loss_num": 0.03955078125, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 20274256, + "step": 303 + }, + { + "epoch": 0.03449645390070922, + "grad_norm": 13.237966537475586, + "learning_rate": 5e-05, + "loss": 2.015, + "num_input_tokens_seen": 20341212, + "step": 304 + }, + { + "epoch": 0.03449645390070922, + "loss": 2.108531951904297, + "loss_ce": 0.005992960184812546, + "loss_iou": 0.9140625, + "loss_num": 0.053955078125, + "loss_xval": 2.109375, + "num_input_tokens_seen": 20341212, + "step": 304 + }, + { + "epoch": 0.03460992907801418, + "grad_norm": 80.10095977783203, + "learning_rate": 5e-05, + "loss": 1.9572, + "num_input_tokens_seen": 20408760, + "step": 305 + }, + { + "epoch": 0.03460992907801418, + "loss": 1.8983691930770874, + "loss_ce": 0.007744227536022663, + "loss_iou": 0.78125, + "loss_num": 0.06640625, + "loss_xval": 1.890625, + "num_input_tokens_seen": 20408760, + "step": 305 + }, + { + "epoch": 0.03472340425531915, + "grad_norm": 17.569639205932617, + "learning_rate": 5e-05, + "loss": 1.8684, + "num_input_tokens_seen": 20475928, + "step": 306 + }, + { + "epoch": 0.03472340425531915, + "loss": 1.8273829221725464, + "loss_ce": 0.00658206082880497, + "loss_iou": 0.76953125, + "loss_num": 0.056884765625, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 20475928, + "step": 306 + }, + { + "epoch": 0.03483687943262411, + "grad_norm": 24.871156692504883, + "learning_rate": 5e-05, + "loss": 1.969, + "num_input_tokens_seen": 20543888, + "step": 307 + }, + { + "epoch": 0.03483687943262411, + "loss": 1.952850103378296, + "loss_ce": 0.0046079885214567184, + "loss_iou": 0.8359375, + "loss_num": 0.05517578125, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 20543888, + "step": 307 + }, + { + "epoch": 0.034950354609929075, + "grad_norm": 23.9863338470459, + "learning_rate": 5e-05, + "loss": 2.0159, + "num_input_tokens_seen": 20610712, + "step": 308 + }, + { + "epoch": 0.034950354609929075, + "loss": 1.9886412620544434, + "loss_ce": 0.007195951417088509, + "loss_iou": 0.875, + "loss_num": 0.04638671875, + "loss_xval": 1.984375, + "num_input_tokens_seen": 20610712, + "step": 308 + }, + { + "epoch": 0.03506382978723404, + "grad_norm": 17.97776985168457, + "learning_rate": 5e-05, + "loss": 2.3001, + "num_input_tokens_seen": 20678580, + "step": 309 + }, + { + "epoch": 0.03506382978723404, + "loss": 2.331566333770752, + "loss_ce": 0.007347676903009415, + "loss_iou": 1.015625, + "loss_num": 0.05712890625, + "loss_xval": 2.328125, + "num_input_tokens_seen": 20678580, + "step": 309 + }, + { + "epoch": 0.035177304964539004, + "grad_norm": 12.606724739074707, + "learning_rate": 5e-05, + "loss": 2.0582, + "num_input_tokens_seen": 20745352, + "step": 310 + }, + { + "epoch": 0.035177304964539004, + "loss": 1.858565092086792, + "loss_ce": 0.009932287037372589, + "loss_iou": 0.8046875, + "loss_num": 0.047607421875, + "loss_xval": 1.8515625, + "num_input_tokens_seen": 20745352, + "step": 310 + }, + { + "epoch": 0.035290780141843975, + "grad_norm": 24.85018539428711, + "learning_rate": 5e-05, + "loss": 1.9257, + "num_input_tokens_seen": 20810976, + "step": 311 + }, + { + "epoch": 0.035290780141843975, + "loss": 2.044266700744629, + "loss_ce": 0.006180662661790848, + "loss_iou": 0.8671875, + "loss_num": 0.06103515625, + "loss_xval": 2.03125, + "num_input_tokens_seen": 20810976, + "step": 311 + }, + { + "epoch": 0.03540425531914894, + "grad_norm": 12.83482837677002, + "learning_rate": 5e-05, + "loss": 2.3324, + "num_input_tokens_seen": 20877244, + "step": 312 + }, + { + "epoch": 0.03540425531914894, + "loss": 2.3788301944732666, + "loss_ce": 0.006759959273040295, + "loss_iou": 0.98828125, + "loss_num": 0.07958984375, + "loss_xval": 2.375, + "num_input_tokens_seen": 20877244, + "step": 312 + }, + { + "epoch": 0.0355177304964539, + "grad_norm": 14.982930183410645, + "learning_rate": 5e-05, + "loss": 1.949, + "num_input_tokens_seen": 20944684, + "step": 313 + }, + { + "epoch": 0.0355177304964539, + "loss": 1.9424282312393188, + "loss_ce": 0.005904749967157841, + "loss_iou": 0.84375, + "loss_num": 0.048828125, + "loss_xval": 1.9375, + "num_input_tokens_seen": 20944684, + "step": 313 + }, + { + "epoch": 0.03563120567375887, + "grad_norm": 27.526506423950195, + "learning_rate": 5e-05, + "loss": 1.9378, + "num_input_tokens_seen": 21011112, + "step": 314 + }, + { + "epoch": 0.03563120567375887, + "loss": 1.8566550016403198, + "loss_ce": 0.009975343942642212, + "loss_iou": 0.796875, + "loss_num": 0.05078125, + "loss_xval": 1.84375, + "num_input_tokens_seen": 21011112, + "step": 314 + }, + { + "epoch": 0.03574468085106383, + "grad_norm": 12.763190269470215, + "learning_rate": 5e-05, + "loss": 2.3626, + "num_input_tokens_seen": 21078524, + "step": 315 + }, + { + "epoch": 0.03574468085106383, + "loss": 2.493199348449707, + "loss_ce": 0.006871393881738186, + "loss_iou": 1.078125, + "loss_num": 0.06640625, + "loss_xval": 2.484375, + "num_input_tokens_seen": 21078524, + "step": 315 + }, + { + "epoch": 0.035858156028368796, + "grad_norm": 13.380082130432129, + "learning_rate": 5e-05, + "loss": 2.0778, + "num_input_tokens_seen": 21146732, + "step": 316 + }, + { + "epoch": 0.035858156028368796, + "loss": 2.1106765270233154, + "loss_ce": 0.0032546264119446278, + "loss_iou": 0.9140625, + "loss_num": 0.055908203125, + "loss_xval": 2.109375, + "num_input_tokens_seen": 21146732, + "step": 316 + }, + { + "epoch": 0.03597163120567376, + "grad_norm": 15.731895446777344, + "learning_rate": 5e-05, + "loss": 1.7834, + "num_input_tokens_seen": 21211720, + "step": 317 + }, + { + "epoch": 0.03597163120567376, + "loss": 1.8188426494598389, + "loss_ce": 0.006342640146613121, + "loss_iou": 0.7890625, + "loss_num": 0.046630859375, + "loss_xval": 1.8125, + "num_input_tokens_seen": 21211720, + "step": 317 + }, + { + "epoch": 0.036085106382978724, + "grad_norm": 16.81956672668457, + "learning_rate": 5e-05, + "loss": 1.811, + "num_input_tokens_seen": 21278112, + "step": 318 + }, + { + "epoch": 0.036085106382978724, + "loss": 1.972276210784912, + "loss_ce": 0.010362189263105392, + "loss_iou": 0.83984375, + "loss_num": 0.055908203125, + "loss_xval": 1.9609375, + "num_input_tokens_seen": 21278112, + "step": 318 + }, + { + "epoch": 0.03619858156028369, + "grad_norm": 31.082778930664062, + "learning_rate": 5e-05, + "loss": 1.9857, + "num_input_tokens_seen": 21344868, + "step": 319 + }, + { + "epoch": 0.03619858156028369, + "loss": 2.0218372344970703, + "loss_ce": 0.012071753852069378, + "loss_iou": 0.88671875, + "loss_num": 0.046630859375, + "loss_xval": 2.015625, + "num_input_tokens_seen": 21344868, + "step": 319 + }, + { + "epoch": 0.03631205673758865, + "grad_norm": 11.371471405029297, + "learning_rate": 5e-05, + "loss": 2.3631, + "num_input_tokens_seen": 21411720, + "step": 320 + }, + { + "epoch": 0.03631205673758865, + "loss": 2.4354872703552246, + "loss_ce": 0.00384648609906435, + "loss_iou": 1.0234375, + "loss_num": 0.0771484375, + "loss_xval": 2.4375, + "num_input_tokens_seen": 21411720, + "step": 320 + }, + { + "epoch": 0.036425531914893616, + "grad_norm": 11.917170524597168, + "learning_rate": 5e-05, + "loss": 2.1742, + "num_input_tokens_seen": 21479168, + "step": 321 + }, + { + "epoch": 0.036425531914893616, + "loss": 2.227330446243286, + "loss_ce": 0.009557077661156654, + "loss_iou": 0.96875, + "loss_num": 0.055908203125, + "loss_xval": 2.21875, + "num_input_tokens_seen": 21479168, + "step": 321 + }, + { + "epoch": 0.03653900709219858, + "grad_norm": 19.4932804107666, + "learning_rate": 5e-05, + "loss": 1.9712, + "num_input_tokens_seen": 21546400, + "step": 322 + }, + { + "epoch": 0.03653900709219858, + "loss": 1.9577786922454834, + "loss_ce": 0.004653653129935265, + "loss_iou": 0.83203125, + "loss_num": 0.058349609375, + "loss_xval": 1.953125, + "num_input_tokens_seen": 21546400, + "step": 322 + }, + { + "epoch": 0.036652482269503545, + "grad_norm": 19.31272315979004, + "learning_rate": 5e-05, + "loss": 1.8821, + "num_input_tokens_seen": 21612536, + "step": 323 + }, + { + "epoch": 0.036652482269503545, + "loss": 1.7742459774017334, + "loss_ce": 0.006667857989668846, + "loss_iou": 0.7421875, + "loss_num": 0.056884765625, + "loss_xval": 1.765625, + "num_input_tokens_seen": 21612536, + "step": 323 + }, + { + "epoch": 0.03676595744680851, + "grad_norm": 22.05327796936035, + "learning_rate": 5e-05, + "loss": 2.1364, + "num_input_tokens_seen": 21679820, + "step": 324 + }, + { + "epoch": 0.03676595744680851, + "loss": 2.1946849822998047, + "loss_ce": 0.005231771618127823, + "loss_iou": 0.94921875, + "loss_num": 0.058349609375, + "loss_xval": 2.1875, + "num_input_tokens_seen": 21679820, + "step": 324 + }, + { + "epoch": 0.03687943262411347, + "grad_norm": 17.050397872924805, + "learning_rate": 5e-05, + "loss": 1.9914, + "num_input_tokens_seen": 21747364, + "step": 325 + }, + { + "epoch": 0.03687943262411347, + "loss": 1.9609088897705078, + "loss_ce": 0.005830854177474976, + "loss_iou": 0.85546875, + "loss_num": 0.04931640625, + "loss_xval": 1.953125, + "num_input_tokens_seen": 21747364, + "step": 325 + }, + { + "epoch": 0.03699290780141844, + "grad_norm": 12.990550994873047, + "learning_rate": 5e-05, + "loss": 1.9473, + "num_input_tokens_seen": 21814560, + "step": 326 + }, + { + "epoch": 0.03699290780141844, + "loss": 1.926056981086731, + "loss_ce": 0.007111618760973215, + "loss_iou": 0.80859375, + "loss_num": 0.059814453125, + "loss_xval": 1.921875, + "num_input_tokens_seen": 21814560, + "step": 326 + }, + { + "epoch": 0.0371063829787234, + "grad_norm": 20.72391700744629, + "learning_rate": 5e-05, + "loss": 1.8068, + "num_input_tokens_seen": 21880964, + "step": 327 + }, + { + "epoch": 0.0371063829787234, + "loss": 1.8409650325775146, + "loss_ce": 0.007957208901643753, + "loss_iou": 0.80078125, + "loss_num": 0.04638671875, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 21880964, + "step": 327 + }, + { + "epoch": 0.037219858156028365, + "grad_norm": 14.223793029785156, + "learning_rate": 5e-05, + "loss": 2.0758, + "num_input_tokens_seen": 21947348, + "step": 328 + }, + { + "epoch": 0.037219858156028365, + "loss": 2.0101938247680664, + "loss_ce": 0.005310883279889822, + "loss_iou": 0.875, + "loss_num": 0.050537109375, + "loss_xval": 2.0, + "num_input_tokens_seen": 21947348, + "step": 328 + }, + { + "epoch": 0.037333333333333336, + "grad_norm": 14.364019393920898, + "learning_rate": 5e-05, + "loss": 1.7181, + "num_input_tokens_seen": 22015288, + "step": 329 + }, + { + "epoch": 0.037333333333333336, + "loss": 1.6094064712524414, + "loss_ce": 0.008820479735732079, + "loss_iou": 0.6875, + "loss_num": 0.044921875, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 22015288, + "step": 329 + }, + { + "epoch": 0.0374468085106383, + "grad_norm": 15.050311088562012, + "learning_rate": 5e-05, + "loss": 2.1588, + "num_input_tokens_seen": 22082068, + "step": 330 + }, + { + "epoch": 0.0374468085106383, + "loss": 2.3089585304260254, + "loss_ce": 0.011107079684734344, + "loss_iou": 0.9453125, + "loss_num": 0.08203125, + "loss_xval": 2.296875, + "num_input_tokens_seen": 22082068, + "step": 330 + }, + { + "epoch": 0.037560283687943265, + "grad_norm": 16.324932098388672, + "learning_rate": 5e-05, + "loss": 2.0549, + "num_input_tokens_seen": 22149196, + "step": 331 + }, + { + "epoch": 0.037560283687943265, + "loss": 2.1143362522125244, + "loss_ce": 0.013750461861491203, + "loss_iou": 0.91015625, + "loss_num": 0.05615234375, + "loss_xval": 2.09375, + "num_input_tokens_seen": 22149196, + "step": 331 + }, + { + "epoch": 0.03767375886524823, + "grad_norm": 23.821434020996094, + "learning_rate": 5e-05, + "loss": 1.9516, + "num_input_tokens_seen": 22216484, + "step": 332 + }, + { + "epoch": 0.03767375886524823, + "loss": 1.9380347728729248, + "loss_ce": 0.004440983757376671, + "loss_iou": 0.87109375, + "loss_num": 0.037841796875, + "loss_xval": 1.9375, + "num_input_tokens_seen": 22216484, + "step": 332 + }, + { + "epoch": 0.03778723404255319, + "grad_norm": 16.641050338745117, + "learning_rate": 5e-05, + "loss": 2.0085, + "num_input_tokens_seen": 22283648, + "step": 333 + }, + { + "epoch": 0.03778723404255319, + "loss": 2.110280990600586, + "loss_ce": 0.003835737006738782, + "loss_iou": 0.9140625, + "loss_num": 0.055419921875, + "loss_xval": 2.109375, + "num_input_tokens_seen": 22283648, + "step": 333 + }, + { + "epoch": 0.03790070921985816, + "grad_norm": 11.583755493164062, + "learning_rate": 5e-05, + "loss": 1.7364, + "num_input_tokens_seen": 22350436, + "step": 334 + }, + { + "epoch": 0.03790070921985816, + "loss": 1.7042889595031738, + "loss_ce": 0.007511581759899855, + "loss_iou": 0.71484375, + "loss_num": 0.052490234375, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 22350436, + "step": 334 + }, + { + "epoch": 0.03801418439716312, + "grad_norm": 15.644664764404297, + "learning_rate": 5e-05, + "loss": 1.8023, + "num_input_tokens_seen": 22417040, + "step": 335 + }, + { + "epoch": 0.03801418439716312, + "loss": 1.5863478183746338, + "loss_ce": 0.004316456150263548, + "loss_iou": 0.7109375, + "loss_num": 0.031494140625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 22417040, + "step": 335 + }, + { + "epoch": 0.038127659574468085, + "grad_norm": 26.321117401123047, + "learning_rate": 5e-05, + "loss": 2.0553, + "num_input_tokens_seen": 22483908, + "step": 336 + }, + { + "epoch": 0.038127659574468085, + "loss": 2.0162549018859863, + "loss_ce": 0.006489438936114311, + "loss_iou": 0.9140625, + "loss_num": 0.036865234375, + "loss_xval": 2.015625, + "num_input_tokens_seen": 22483908, + "step": 336 + }, + { + "epoch": 0.03824113475177305, + "grad_norm": 8.617659568786621, + "learning_rate": 5e-05, + "loss": 2.2736, + "num_input_tokens_seen": 22550284, + "step": 337 + }, + { + "epoch": 0.03824113475177305, + "loss": 2.0531113147735596, + "loss_ce": 0.004283236339688301, + "loss_iou": 0.890625, + "loss_num": 0.052490234375, + "loss_xval": 2.046875, + "num_input_tokens_seen": 22550284, + "step": 337 + }, + { + "epoch": 0.038354609929078014, + "grad_norm": 27.03020477294922, + "learning_rate": 5e-05, + "loss": 1.882, + "num_input_tokens_seen": 22617544, + "step": 338 + }, + { + "epoch": 0.038354609929078014, + "loss": 1.9284515380859375, + "loss_ce": 0.005599904805421829, + "loss_iou": 0.83984375, + "loss_num": 0.049560546875, + "loss_xval": 1.921875, + "num_input_tokens_seen": 22617544, + "step": 338 + }, + { + "epoch": 0.03846808510638298, + "grad_norm": 19.37912368774414, + "learning_rate": 5e-05, + "loss": 1.7219, + "num_input_tokens_seen": 22683704, + "step": 339 + }, + { + "epoch": 0.03846808510638298, + "loss": 1.9599034786224365, + "loss_ce": 0.006778496317565441, + "loss_iou": 0.8359375, + "loss_num": 0.056640625, + "loss_xval": 1.953125, + "num_input_tokens_seen": 22683704, + "step": 339 + }, + { + "epoch": 0.03858156028368794, + "grad_norm": 19.592845916748047, + "learning_rate": 5e-05, + "loss": 2.0827, + "num_input_tokens_seen": 22750628, + "step": 340 + }, + { + "epoch": 0.03858156028368794, + "loss": 1.9885246753692627, + "loss_ce": 0.009032540023326874, + "loss_iou": 0.859375, + "loss_num": 0.05322265625, + "loss_xval": 1.9765625, + "num_input_tokens_seen": 22750628, + "step": 340 + }, + { + "epoch": 0.038695035460992906, + "grad_norm": 27.92302894592285, + "learning_rate": 5e-05, + "loss": 2.027, + "num_input_tokens_seen": 22816900, + "step": 341 + }, + { + "epoch": 0.038695035460992906, + "loss": 2.0320067405700684, + "loss_ce": 0.0066160717979073524, + "loss_iou": 0.8203125, + "loss_num": 0.07666015625, + "loss_xval": 2.03125, + "num_input_tokens_seen": 22816900, + "step": 341 + }, + { + "epoch": 0.03880851063829787, + "grad_norm": 16.177770614624023, + "learning_rate": 5e-05, + "loss": 2.0031, + "num_input_tokens_seen": 22884016, + "step": 342 + }, + { + "epoch": 0.03880851063829787, + "loss": 1.9985079765319824, + "loss_ce": 0.0063204094767570496, + "loss_iou": 0.85546875, + "loss_num": 0.055908203125, + "loss_xval": 1.9921875, + "num_input_tokens_seen": 22884016, + "step": 342 + }, + { + "epoch": 0.038921985815602834, + "grad_norm": 15.160003662109375, + "learning_rate": 5e-05, + "loss": 1.9174, + "num_input_tokens_seen": 22951136, + "step": 343 + }, + { + "epoch": 0.038921985815602834, + "loss": 1.871896743774414, + "loss_ce": 0.004709221422672272, + "loss_iou": 0.8359375, + "loss_num": 0.038818359375, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 22951136, + "step": 343 + }, + { + "epoch": 0.0390354609929078, + "grad_norm": 21.420700073242188, + "learning_rate": 5e-05, + "loss": 1.8554, + "num_input_tokens_seen": 23018376, + "step": 344 + }, + { + "epoch": 0.0390354609929078, + "loss": 1.919924259185791, + "loss_ce": 0.007815049029886723, + "loss_iou": 0.80078125, + "loss_num": 0.06201171875, + "loss_xval": 1.9140625, + "num_input_tokens_seen": 23018376, + "step": 344 + }, + { + "epoch": 0.03914893617021276, + "grad_norm": 37.49563980102539, + "learning_rate": 5e-05, + "loss": 2.4817, + "num_input_tokens_seen": 23084824, + "step": 345 + }, + { + "epoch": 0.03914893617021276, + "loss": 2.4827969074249268, + "loss_ce": 0.00818747840821743, + "loss_iou": 1.1015625, + "loss_num": 0.055908203125, + "loss_xval": 2.46875, + "num_input_tokens_seen": 23084824, + "step": 345 + }, + { + "epoch": 0.039262411347517734, + "grad_norm": 10.814724922180176, + "learning_rate": 5e-05, + "loss": 1.9776, + "num_input_tokens_seen": 23151624, + "step": 346 + }, + { + "epoch": 0.039262411347517734, + "loss": 1.8231441974639893, + "loss_ce": 0.005883420817553997, + "loss_iou": 0.7890625, + "loss_num": 0.046875, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 23151624, + "step": 346 + }, + { + "epoch": 0.0393758865248227, + "grad_norm": 11.98166561126709, + "learning_rate": 5e-05, + "loss": 2.1081, + "num_input_tokens_seen": 23219304, + "step": 347 + }, + { + "epoch": 0.0393758865248227, + "loss": 2.1103925704956055, + "loss_ce": 0.00590042769908905, + "loss_iou": 0.87890625, + "loss_num": 0.06982421875, + "loss_xval": 2.109375, + "num_input_tokens_seen": 23219304, + "step": 347 + }, + { + "epoch": 0.03948936170212766, + "grad_norm": 8.415793418884277, + "learning_rate": 5e-05, + "loss": 1.8075, + "num_input_tokens_seen": 23286940, + "step": 348 + }, + { + "epoch": 0.03948936170212766, + "loss": 1.7353081703186035, + "loss_ce": 0.008135177195072174, + "loss_iou": 0.7421875, + "loss_num": 0.049072265625, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 23286940, + "step": 348 + }, + { + "epoch": 0.039602836879432626, + "grad_norm": 10.83703327178955, + "learning_rate": 5e-05, + "loss": 1.9752, + "num_input_tokens_seen": 23354428, + "step": 349 + }, + { + "epoch": 0.039602836879432626, + "loss": 2.257213592529297, + "loss_ce": 0.015026046894490719, + "loss_iou": 0.921875, + "loss_num": 0.0791015625, + "loss_xval": 2.25, + "num_input_tokens_seen": 23354428, + "step": 349 + }, + { + "epoch": 0.03971631205673759, + "grad_norm": 11.7138090133667, + "learning_rate": 5e-05, + "loss": 1.836, + "num_input_tokens_seen": 23422100, + "step": 350 + }, + { + "epoch": 0.03971631205673759, + "loss": 1.7925347089767456, + "loss_ce": 0.008355054073035717, + "loss_iou": 0.765625, + "loss_num": 0.05126953125, + "loss_xval": 1.78125, + "num_input_tokens_seen": 23422100, + "step": 350 + }, + { + "epoch": 0.039829787234042555, + "grad_norm": 33.9190788269043, + "learning_rate": 5e-05, + "loss": 1.7021, + "num_input_tokens_seen": 23488876, + "step": 351 + }, + { + "epoch": 0.039829787234042555, + "loss": 1.9477087259292603, + "loss_ce": 0.005325893871486187, + "loss_iou": 0.859375, + "loss_num": 0.045166015625, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 23488876, + "step": 351 + }, + { + "epoch": 0.03994326241134752, + "grad_norm": 15.383682250976562, + "learning_rate": 5e-05, + "loss": 1.9611, + "num_input_tokens_seen": 23555700, + "step": 352 + }, + { + "epoch": 0.03994326241134752, + "loss": 1.810509443283081, + "loss_ce": 0.010704773478209972, + "loss_iou": 0.80078125, + "loss_num": 0.0400390625, + "loss_xval": 1.796875, + "num_input_tokens_seen": 23555700, + "step": 352 + }, + { + "epoch": 0.04005673758865248, + "grad_norm": 20.810914993286133, + "learning_rate": 5e-05, + "loss": 1.9245, + "num_input_tokens_seen": 23622976, + "step": 353 + }, + { + "epoch": 0.04005673758865248, + "loss": 1.8417471647262573, + "loss_ce": 0.005809661000967026, + "loss_iou": 0.80078125, + "loss_num": 0.047607421875, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 23622976, + "step": 353 + }, + { + "epoch": 0.04017021276595745, + "grad_norm": 31.200944900512695, + "learning_rate": 5e-05, + "loss": 2.1201, + "num_input_tokens_seen": 23689896, + "step": 354 + }, + { + "epoch": 0.04017021276595745, + "loss": 2.1143691539764404, + "loss_ce": 0.006947353947907686, + "loss_iou": 0.90625, + "loss_num": 0.059326171875, + "loss_xval": 2.109375, + "num_input_tokens_seen": 23689896, + "step": 354 + }, + { + "epoch": 0.04028368794326241, + "grad_norm": 10.366345405578613, + "learning_rate": 5e-05, + "loss": 2.3444, + "num_input_tokens_seen": 23757256, + "step": 355 + }, + { + "epoch": 0.04028368794326241, + "loss": 2.3451461791992188, + "loss_ce": 0.008232180029153824, + "loss_iou": 1.0078125, + "loss_num": 0.0654296875, + "loss_xval": 2.34375, + "num_input_tokens_seen": 23757256, + "step": 355 + }, + { + "epoch": 0.040397163120567375, + "grad_norm": 10.943886756896973, + "learning_rate": 5e-05, + "loss": 1.9184, + "num_input_tokens_seen": 23823564, + "step": 356 + }, + { + "epoch": 0.040397163120567375, + "loss": 1.8347113132476807, + "loss_ce": 0.0037175891920924187, + "loss_iou": 0.7734375, + "loss_num": 0.056640625, + "loss_xval": 1.828125, + "num_input_tokens_seen": 23823564, + "step": 356 + }, + { + "epoch": 0.04051063829787234, + "grad_norm": 7.189153671264648, + "learning_rate": 5e-05, + "loss": 2.0007, + "num_input_tokens_seen": 23890644, + "step": 357 + }, + { + "epoch": 0.04051063829787234, + "loss": 2.008484363555908, + "loss_ce": 0.008484400808811188, + "loss_iou": 0.85546875, + "loss_num": 0.05712890625, + "loss_xval": 2.0, + "num_input_tokens_seen": 23890644, + "step": 357 + }, + { + "epoch": 0.040624113475177304, + "grad_norm": 47.64419937133789, + "learning_rate": 5e-05, + "loss": 2.0353, + "num_input_tokens_seen": 23956292, + "step": 358 + }, + { + "epoch": 0.040624113475177304, + "loss": 1.9517167806625366, + "loss_ce": 0.008662581443786621, + "loss_iou": 0.80859375, + "loss_num": 0.0654296875, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 23956292, + "step": 358 + }, + { + "epoch": 0.04073758865248227, + "grad_norm": 11.886468887329102, + "learning_rate": 5e-05, + "loss": 2.0335, + "num_input_tokens_seen": 24022980, + "step": 359 + }, + { + "epoch": 0.04073758865248227, + "loss": 1.9836323261260986, + "loss_ce": 0.009022878482937813, + "loss_iou": 0.8359375, + "loss_num": 0.060546875, + "loss_xval": 1.9765625, + "num_input_tokens_seen": 24022980, + "step": 359 + }, + { + "epoch": 0.04085106382978723, + "grad_norm": 29.570642471313477, + "learning_rate": 5e-05, + "loss": 1.8009, + "num_input_tokens_seen": 24089852, + "step": 360 + }, + { + "epoch": 0.04085106382978723, + "loss": 2.0421156883239746, + "loss_ce": 0.01086555514484644, + "loss_iou": 0.84765625, + "loss_num": 0.06787109375, + "loss_xval": 2.03125, + "num_input_tokens_seen": 24089852, + "step": 360 + }, + { + "epoch": 0.040964539007092196, + "grad_norm": 17.361576080322266, + "learning_rate": 5e-05, + "loss": 2.4415, + "num_input_tokens_seen": 24155968, + "step": 361 + }, + { + "epoch": 0.040964539007092196, + "loss": 2.4438889026641846, + "loss_ce": 0.006388947367668152, + "loss_iou": 1.0234375, + "loss_num": 0.0771484375, + "loss_xval": 2.4375, + "num_input_tokens_seen": 24155968, + "step": 361 + }, + { + "epoch": 0.04107801418439716, + "grad_norm": 14.371408462524414, + "learning_rate": 5e-05, + "loss": 2.0888, + "num_input_tokens_seen": 24222832, + "step": 362 + }, + { + "epoch": 0.04107801418439716, + "loss": 2.1933016777038574, + "loss_ce": 0.005801578518003225, + "loss_iou": 0.90625, + "loss_num": 0.07470703125, + "loss_xval": 2.1875, + "num_input_tokens_seen": 24222832, + "step": 362 + }, + { + "epoch": 0.041191489361702124, + "grad_norm": 12.326679229736328, + "learning_rate": 5e-05, + "loss": 1.8755, + "num_input_tokens_seen": 24289984, + "step": 363 + }, + { + "epoch": 0.041191489361702124, + "loss": 1.8623886108398438, + "loss_ce": 0.0059433081187307835, + "loss_iou": 0.734375, + "loss_num": 0.0771484375, + "loss_xval": 1.859375, + "num_input_tokens_seen": 24289984, + "step": 363 + }, + { + "epoch": 0.041304964539007095, + "grad_norm": 18.585954666137695, + "learning_rate": 5e-05, + "loss": 1.7945, + "num_input_tokens_seen": 24356684, + "step": 364 + }, + { + "epoch": 0.041304964539007095, + "loss": 2.0290579795837402, + "loss_ce": 0.007573373150080442, + "loss_iou": 0.87890625, + "loss_num": 0.05224609375, + "loss_xval": 2.015625, + "num_input_tokens_seen": 24356684, + "step": 364 + }, + { + "epoch": 0.04141843971631206, + "grad_norm": 11.892111778259277, + "learning_rate": 5e-05, + "loss": 1.9181, + "num_input_tokens_seen": 24424628, + "step": 365 + }, + { + "epoch": 0.04141843971631206, + "loss": 1.9860646724700928, + "loss_ce": 0.002666260115802288, + "loss_iou": 0.84765625, + "loss_num": 0.057861328125, + "loss_xval": 1.984375, + "num_input_tokens_seen": 24424628, + "step": 365 + }, + { + "epoch": 0.041531914893617024, + "grad_norm": 11.419357299804688, + "learning_rate": 5e-05, + "loss": 1.6628, + "num_input_tokens_seen": 24491120, + "step": 366 + }, + { + "epoch": 0.041531914893617024, + "loss": 1.6986411809921265, + "loss_ce": 0.006258298177272081, + "loss_iou": 0.7421875, + "loss_num": 0.041748046875, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 24491120, + "step": 366 + }, + { + "epoch": 0.04164539007092199, + "grad_norm": 24.315397262573242, + "learning_rate": 5e-05, + "loss": 1.8798, + "num_input_tokens_seen": 24557596, + "step": 367 + }, + { + "epoch": 0.04164539007092199, + "loss": 2.0086288452148438, + "loss_ce": 0.009605586528778076, + "loss_iou": 0.83203125, + "loss_num": 0.0673828125, + "loss_xval": 2.0, + "num_input_tokens_seen": 24557596, + "step": 367 + }, + { + "epoch": 0.04175886524822695, + "grad_norm": 14.301680564880371, + "learning_rate": 5e-05, + "loss": 1.8485, + "num_input_tokens_seen": 24624544, + "step": 368 + }, + { + "epoch": 0.04175886524822695, + "loss": 1.9693312644958496, + "loss_ce": 0.006562657654285431, + "loss_iou": 0.83984375, + "loss_num": 0.056884765625, + "loss_xval": 1.9609375, + "num_input_tokens_seen": 24624544, + "step": 368 + }, + { + "epoch": 0.041872340425531916, + "grad_norm": 10.127808570861816, + "learning_rate": 5e-05, + "loss": 1.7363, + "num_input_tokens_seen": 24691768, + "step": 369 + }, + { + "epoch": 0.041872340425531916, + "loss": 1.757197618484497, + "loss_ce": 0.005244539584964514, + "loss_iou": 0.78125, + "loss_num": 0.03759765625, + "loss_xval": 1.75, + "num_input_tokens_seen": 24691768, + "step": 369 + }, + { + "epoch": 0.04198581560283688, + "grad_norm": 24.512542724609375, + "learning_rate": 5e-05, + "loss": 1.7423, + "num_input_tokens_seen": 24758332, + "step": 370 + }, + { + "epoch": 0.04198581560283688, + "loss": 1.4955024719238281, + "loss_ce": 0.007465363014489412, + "loss_iou": 0.65234375, + "loss_num": 0.0361328125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 24758332, + "step": 370 + }, + { + "epoch": 0.042099290780141845, + "grad_norm": 13.494495391845703, + "learning_rate": 5e-05, + "loss": 2.1773, + "num_input_tokens_seen": 24825288, + "step": 371 + }, + { + "epoch": 0.042099290780141845, + "loss": 2.214827299118042, + "loss_ce": 0.011702418327331543, + "loss_iou": 0.94140625, + "loss_num": 0.06396484375, + "loss_xval": 2.203125, + "num_input_tokens_seen": 24825288, + "step": 371 + }, + { + "epoch": 0.04221276595744681, + "grad_norm": 17.149709701538086, + "learning_rate": 5e-05, + "loss": 1.8599, + "num_input_tokens_seen": 24891720, + "step": 372 + }, + { + "epoch": 0.04221276595744681, + "loss": 1.9843990802764893, + "loss_ce": 0.0029537726659327745, + "loss_iou": 0.828125, + "loss_num": 0.064453125, + "loss_xval": 1.984375, + "num_input_tokens_seen": 24891720, + "step": 372 + }, + { + "epoch": 0.04232624113475177, + "grad_norm": 27.133699417114258, + "learning_rate": 5e-05, + "loss": 1.7671, + "num_input_tokens_seen": 24959248, + "step": 373 + }, + { + "epoch": 0.04232624113475177, + "loss": 1.7949475049972534, + "loss_ce": 0.002955310046672821, + "loss_iou": 0.81640625, + "loss_num": 0.03271484375, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 24959248, + "step": 373 + }, + { + "epoch": 0.04243971631205674, + "grad_norm": 18.976503372192383, + "learning_rate": 5e-05, + "loss": 2.2409, + "num_input_tokens_seen": 25027480, + "step": 374 + }, + { + "epoch": 0.04243971631205674, + "loss": 2.1371207237243652, + "loss_ce": 0.0062612853944301605, + "loss_iou": 0.91015625, + "loss_num": 0.0615234375, + "loss_xval": 2.125, + "num_input_tokens_seen": 25027480, + "step": 374 + }, + { + "epoch": 0.0425531914893617, + "grad_norm": 13.973775863647461, + "learning_rate": 5e-05, + "loss": 1.7039, + "num_input_tokens_seen": 25094956, + "step": 375 + }, + { + "epoch": 0.0425531914893617, + "loss": 1.5422462224960327, + "loss_ce": 0.006113489158451557, + "loss_iou": 0.6953125, + "loss_num": 0.029052734375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 25094956, + "step": 375 + }, + { + "epoch": 0.042666666666666665, + "grad_norm": 30.427942276000977, + "learning_rate": 5e-05, + "loss": 2.2806, + "num_input_tokens_seen": 25162072, + "step": 376 + }, + { + "epoch": 0.042666666666666665, + "loss": 2.1853606700897217, + "loss_ce": 0.004696577787399292, + "loss_iou": 0.9375, + "loss_num": 0.06201171875, + "loss_xval": 2.1875, + "num_input_tokens_seen": 25162072, + "step": 376 + }, + { + "epoch": 0.04278014184397163, + "grad_norm": 15.456019401550293, + "learning_rate": 5e-05, + "loss": 2.3031, + "num_input_tokens_seen": 25228304, + "step": 377 + }, + { + "epoch": 0.04278014184397163, + "loss": 2.298508405685425, + "loss_ce": 0.0074928333051502705, + "loss_iou": 0.95703125, + "loss_num": 0.0751953125, + "loss_xval": 2.296875, + "num_input_tokens_seen": 25228304, + "step": 377 + }, + { + "epoch": 0.042893617021276594, + "grad_norm": 11.896172523498535, + "learning_rate": 5e-05, + "loss": 1.983, + "num_input_tokens_seen": 25295860, + "step": 378 + }, + { + "epoch": 0.042893617021276594, + "loss": 1.909468412399292, + "loss_ce": 0.00614805705845356, + "loss_iou": 0.8046875, + "loss_num": 0.05810546875, + "loss_xval": 1.90625, + "num_input_tokens_seen": 25295860, + "step": 378 + }, + { + "epoch": 0.04300709219858156, + "grad_norm": 21.1990909576416, + "learning_rate": 5e-05, + "loss": 1.8352, + "num_input_tokens_seen": 25362104, + "step": 379 + }, + { + "epoch": 0.04300709219858156, + "loss": 1.758427381515503, + "loss_ce": 0.005497641395777464, + "loss_iou": 0.7734375, + "loss_num": 0.0419921875, + "loss_xval": 1.75, + "num_input_tokens_seen": 25362104, + "step": 379 + }, + { + "epoch": 0.04312056737588652, + "grad_norm": 17.638792037963867, + "learning_rate": 5e-05, + "loss": 1.971, + "num_input_tokens_seen": 25429176, + "step": 380 + }, + { + "epoch": 0.04312056737588652, + "loss": 1.942181944847107, + "loss_ce": 0.006635100580751896, + "loss_iou": 0.82421875, + "loss_num": 0.05712890625, + "loss_xval": 1.9375, + "num_input_tokens_seen": 25429176, + "step": 380 + }, + { + "epoch": 0.043234042553191486, + "grad_norm": 17.683996200561523, + "learning_rate": 5e-05, + "loss": 1.5165, + "num_input_tokens_seen": 25495960, + "step": 381 + }, + { + "epoch": 0.043234042553191486, + "loss": 1.5000046491622925, + "loss_ce": 0.007328850217163563, + "loss_iou": 0.6328125, + "loss_num": 0.0458984375, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 25495960, + "step": 381 + }, + { + "epoch": 0.04334751773049646, + "grad_norm": 20.54450798034668, + "learning_rate": 5e-05, + "loss": 1.7502, + "num_input_tokens_seen": 25562488, + "step": 382 + }, + { + "epoch": 0.04334751773049646, + "loss": 1.6530723571777344, + "loss_ce": 0.008541116490960121, + "loss_iou": 0.7265625, + "loss_num": 0.0390625, + "loss_xval": 1.640625, + "num_input_tokens_seen": 25562488, + "step": 382 + }, + { + "epoch": 0.04346099290780142, + "grad_norm": 29.097423553466797, + "learning_rate": 5e-05, + "loss": 1.8608, + "num_input_tokens_seen": 25627928, + "step": 383 + }, + { + "epoch": 0.04346099290780142, + "loss": 1.6565628051757812, + "loss_ce": 0.005073409993201494, + "loss_iou": 0.68359375, + "loss_num": 0.05712890625, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 25627928, + "step": 383 + }, + { + "epoch": 0.043574468085106385, + "grad_norm": 13.79610538482666, + "learning_rate": 5e-05, + "loss": 2.142, + "num_input_tokens_seen": 25694488, + "step": 384 + }, + { + "epoch": 0.043574468085106385, + "loss": 2.1877968311309814, + "loss_ce": 0.0061562154442071915, + "loss_iou": 0.921875, + "loss_num": 0.0673828125, + "loss_xval": 2.1875, + "num_input_tokens_seen": 25694488, + "step": 384 + }, + { + "epoch": 0.04368794326241135, + "grad_norm": 15.085979461669922, + "learning_rate": 5e-05, + "loss": 1.8249, + "num_input_tokens_seen": 25761256, + "step": 385 + }, + { + "epoch": 0.04368794326241135, + "loss": 1.8089090585708618, + "loss_ce": 0.0022684545256197453, + "loss_iou": 0.78515625, + "loss_num": 0.046630859375, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 25761256, + "step": 385 + }, + { + "epoch": 0.043801418439716314, + "grad_norm": 27.244121551513672, + "learning_rate": 5e-05, + "loss": 1.8088, + "num_input_tokens_seen": 25827964, + "step": 386 + }, + { + "epoch": 0.043801418439716314, + "loss": 1.7166683673858643, + "loss_ce": 0.0037776269018650055, + "loss_iou": 0.7578125, + "loss_num": 0.03857421875, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 25827964, + "step": 386 + }, + { + "epoch": 0.04391489361702128, + "grad_norm": 12.86385726928711, + "learning_rate": 5e-05, + "loss": 2.166, + "num_input_tokens_seen": 25895172, + "step": 387 + }, + { + "epoch": 0.04391489361702128, + "loss": 2.261471748352051, + "loss_ce": 0.004635747522115707, + "loss_iou": 0.97265625, + "loss_num": 0.0625, + "loss_xval": 2.25, + "num_input_tokens_seen": 25895172, + "step": 387 + }, + { + "epoch": 0.04402836879432624, + "grad_norm": 14.875954627990723, + "learning_rate": 5e-05, + "loss": 1.9218, + "num_input_tokens_seen": 25962720, + "step": 388 + }, + { + "epoch": 0.04402836879432624, + "loss": 1.8489075899124146, + "loss_ce": 0.0061342669650912285, + "loss_iou": 0.80859375, + "loss_num": 0.045166015625, + "loss_xval": 1.84375, + "num_input_tokens_seen": 25962720, + "step": 388 + }, + { + "epoch": 0.044141843971631206, + "grad_norm": 16.628625869750977, + "learning_rate": 5e-05, + "loss": 1.9982, + "num_input_tokens_seen": 26028672, + "step": 389 + }, + { + "epoch": 0.044141843971631206, + "loss": 1.9583289623260498, + "loss_ce": 0.010086705908179283, + "loss_iou": 0.875, + "loss_num": 0.039306640625, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 26028672, + "step": 389 + }, + { + "epoch": 0.04425531914893617, + "grad_norm": 13.506182670593262, + "learning_rate": 5e-05, + "loss": 2.0894, + "num_input_tokens_seen": 26097200, + "step": 390 + }, + { + "epoch": 0.04425531914893617, + "loss": 2.038412570953369, + "loss_ce": 0.007162605877965689, + "loss_iou": 0.84765625, + "loss_num": 0.06689453125, + "loss_xval": 2.03125, + "num_input_tokens_seen": 26097200, + "step": 390 + }, + { + "epoch": 0.044368794326241134, + "grad_norm": 17.39616584777832, + "learning_rate": 5e-05, + "loss": 1.6584, + "num_input_tokens_seen": 26164996, + "step": 391 + }, + { + "epoch": 0.044368794326241134, + "loss": 1.6289080381393433, + "loss_ce": 0.005861132405698299, + "loss_iou": 0.7421875, + "loss_num": 0.0283203125, + "loss_xval": 1.625, + "num_input_tokens_seen": 26164996, + "step": 391 + }, + { + "epoch": 0.0444822695035461, + "grad_norm": 21.010517120361328, + "learning_rate": 5e-05, + "loss": 1.9293, + "num_input_tokens_seen": 26231452, + "step": 392 + }, + { + "epoch": 0.0444822695035461, + "loss": 2.014920949935913, + "loss_ce": 0.009061525575816631, + "loss_iou": 0.8515625, + "loss_num": 0.059814453125, + "loss_xval": 2.0, + "num_input_tokens_seen": 26231452, + "step": 392 + }, + { + "epoch": 0.04459574468085106, + "grad_norm": 15.272978782653809, + "learning_rate": 5e-05, + "loss": 1.7866, + "num_input_tokens_seen": 26297872, + "step": 393 + }, + { + "epoch": 0.04459574468085106, + "loss": 1.7726294994354248, + "loss_ce": 0.01042239647358656, + "loss_iou": 0.734375, + "loss_num": 0.058837890625, + "loss_xval": 1.765625, + "num_input_tokens_seen": 26297872, + "step": 393 + }, + { + "epoch": 0.04470921985815603, + "grad_norm": 17.96276092529297, + "learning_rate": 5e-05, + "loss": 2.0989, + "num_input_tokens_seen": 26365088, + "step": 394 + }, + { + "epoch": 0.04470921985815603, + "loss": 1.874945878982544, + "loss_ce": 0.008734903298318386, + "loss_iou": 0.83203125, + "loss_num": 0.04052734375, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 26365088, + "step": 394 + }, + { + "epoch": 0.04482269503546099, + "grad_norm": 19.307680130004883, + "learning_rate": 5e-05, + "loss": 1.9721, + "num_input_tokens_seen": 26432760, + "step": 395 + }, + { + "epoch": 0.04482269503546099, + "loss": 2.066202163696289, + "loss_ce": 0.0027256272733211517, + "loss_iou": 0.875, + "loss_num": 0.06298828125, + "loss_xval": 2.0625, + "num_input_tokens_seen": 26432760, + "step": 395 + }, + { + "epoch": 0.044936170212765955, + "grad_norm": 19.767057418823242, + "learning_rate": 5e-05, + "loss": 1.9033, + "num_input_tokens_seen": 26500084, + "step": 396 + }, + { + "epoch": 0.044936170212765955, + "loss": 1.8677425384521484, + "loss_ce": 0.005437841638922691, + "loss_iou": 0.7890625, + "loss_num": 0.05712890625, + "loss_xval": 1.859375, + "num_input_tokens_seen": 26500084, + "step": 396 + }, + { + "epoch": 0.04504964539007092, + "grad_norm": 18.39133071899414, + "learning_rate": 5e-05, + "loss": 1.883, + "num_input_tokens_seen": 26566264, + "step": 397 + }, + { + "epoch": 0.04504964539007092, + "loss": 1.8298132419586182, + "loss_ce": 0.007547713816165924, + "loss_iou": 0.79296875, + "loss_num": 0.046630859375, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 26566264, + "step": 397 + }, + { + "epoch": 0.04516312056737588, + "grad_norm": 17.197866439819336, + "learning_rate": 5e-05, + "loss": 1.8493, + "num_input_tokens_seen": 26633552, + "step": 398 + }, + { + "epoch": 0.04516312056737588, + "loss": 1.8741916418075562, + "loss_ce": 0.007004134822636843, + "loss_iou": 0.796875, + "loss_num": 0.05419921875, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 26633552, + "step": 398 + }, + { + "epoch": 0.045276595744680855, + "grad_norm": 26.693199157714844, + "learning_rate": 5e-05, + "loss": 1.7141, + "num_input_tokens_seen": 26700668, + "step": 399 + }, + { + "epoch": 0.045276595744680855, + "loss": 1.7023206949234009, + "loss_ce": 0.0070082335732877254, + "loss_iou": 0.734375, + "loss_num": 0.044677734375, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 26700668, + "step": 399 + }, + { + "epoch": 0.04539007092198582, + "grad_norm": 13.10283088684082, + "learning_rate": 5e-05, + "loss": 2.1334, + "num_input_tokens_seen": 26768080, + "step": 400 + }, + { + "epoch": 0.04539007092198582, + "loss": 1.9935219287872314, + "loss_ce": 0.004264097195118666, + "loss_iou": 0.83984375, + "loss_num": 0.06201171875, + "loss_xval": 1.9921875, + "num_input_tokens_seen": 26768080, + "step": 400 + }, + { + "epoch": 0.04550354609929078, + "grad_norm": 17.129901885986328, + "learning_rate": 5e-05, + "loss": 1.8802, + "num_input_tokens_seen": 26834496, + "step": 401 + }, + { + "epoch": 0.04550354609929078, + "loss": 1.943077802658081, + "loss_ce": 0.01339033618569374, + "loss_iou": 0.83203125, + "loss_num": 0.05322265625, + "loss_xval": 1.9296875, + "num_input_tokens_seen": 26834496, + "step": 401 + }, + { + "epoch": 0.04561702127659575, + "grad_norm": 17.141881942749023, + "learning_rate": 5e-05, + "loss": 1.7355, + "num_input_tokens_seen": 26901308, + "step": 402 + }, + { + "epoch": 0.04561702127659575, + "loss": 1.8338518142700195, + "loss_ce": 0.01353929378092289, + "loss_iou": 0.78515625, + "loss_num": 0.050048828125, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 26901308, + "step": 402 + }, + { + "epoch": 0.04573049645390071, + "grad_norm": 14.657854080200195, + "learning_rate": 5e-05, + "loss": 2.0074, + "num_input_tokens_seen": 26968500, + "step": 403 + }, + { + "epoch": 0.04573049645390071, + "loss": 1.9217885732650757, + "loss_ce": 0.012120627798140049, + "loss_iou": 0.85546875, + "loss_num": 0.039794921875, + "loss_xval": 1.90625, + "num_input_tokens_seen": 26968500, + "step": 403 + }, + { + "epoch": 0.045843971631205675, + "grad_norm": 22.068668365478516, + "learning_rate": 5e-05, + "loss": 1.8078, + "num_input_tokens_seen": 27035828, + "step": 404 + }, + { + "epoch": 0.045843971631205675, + "loss": 1.827065110206604, + "loss_ce": 0.0067526125349104404, + "loss_iou": 0.796875, + "loss_num": 0.045654296875, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 27035828, + "step": 404 + }, + { + "epoch": 0.04595744680851064, + "grad_norm": 20.81964111328125, + "learning_rate": 5e-05, + "loss": 1.7289, + "num_input_tokens_seen": 27102964, + "step": 405 + }, + { + "epoch": 0.04595744680851064, + "loss": 1.7249011993408203, + "loss_ce": 0.0032214459497481585, + "loss_iou": 0.75, + "loss_num": 0.044921875, + "loss_xval": 1.71875, + "num_input_tokens_seen": 27102964, + "step": 405 + }, + { + "epoch": 0.046070921985815604, + "grad_norm": 56.11640548706055, + "learning_rate": 5e-05, + "loss": 2.3406, + "num_input_tokens_seen": 27169756, + "step": 406 + }, + { + "epoch": 0.046070921985815604, + "loss": 2.3455066680908203, + "loss_ce": 0.003709995886310935, + "loss_iou": 1.0625, + "loss_num": 0.04345703125, + "loss_xval": 2.34375, + "num_input_tokens_seen": 27169756, + "step": 406 + }, + { + "epoch": 0.04618439716312057, + "grad_norm": 12.530000686645508, + "learning_rate": 5e-05, + "loss": 2.1508, + "num_input_tokens_seen": 27235808, + "step": 407 + }, + { + "epoch": 0.04618439716312057, + "loss": 2.127427339553833, + "loss_ce": 0.0043803672306239605, + "loss_iou": 0.92578125, + "loss_num": 0.053955078125, + "loss_xval": 2.125, + "num_input_tokens_seen": 27235808, + "step": 407 + }, + { + "epoch": 0.04629787234042553, + "grad_norm": 15.539756774902344, + "learning_rate": 5e-05, + "loss": 1.8271, + "num_input_tokens_seen": 27302280, + "step": 408 + }, + { + "epoch": 0.04629787234042553, + "loss": 1.778533935546875, + "loss_ce": 0.004913278389722109, + "loss_iou": 0.75, + "loss_num": 0.05419921875, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 27302280, + "step": 408 + }, + { + "epoch": 0.046411347517730496, + "grad_norm": 9.61454963684082, + "learning_rate": 5e-05, + "loss": 1.617, + "num_input_tokens_seen": 27369344, + "step": 409 + }, + { + "epoch": 0.046411347517730496, + "loss": 1.6264002323150635, + "loss_ce": 0.005306573584675789, + "loss_iou": 0.6953125, + "loss_num": 0.04541015625, + "loss_xval": 1.625, + "num_input_tokens_seen": 27369344, + "step": 409 + }, + { + "epoch": 0.04652482269503546, + "grad_norm": 10.622180938720703, + "learning_rate": 5e-05, + "loss": 1.5892, + "num_input_tokens_seen": 27436968, + "step": 410 + }, + { + "epoch": 0.04652482269503546, + "loss": 1.7291643619537354, + "loss_ce": 0.005531529430299997, + "loss_iou": 0.765625, + "loss_num": 0.0390625, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 27436968, + "step": 410 + }, + { + "epoch": 0.046638297872340424, + "grad_norm": 15.376582145690918, + "learning_rate": 5e-05, + "loss": 1.8487, + "num_input_tokens_seen": 27505404, + "step": 411 + }, + { + "epoch": 0.046638297872340424, + "loss": 1.8327088356018066, + "loss_ce": 0.0026307208463549614, + "loss_iou": 0.8359375, + "loss_num": 0.032470703125, + "loss_xval": 1.828125, + "num_input_tokens_seen": 27505404, + "step": 411 + }, + { + "epoch": 0.04675177304964539, + "grad_norm": 11.872221946716309, + "learning_rate": 5e-05, + "loss": 1.6469, + "num_input_tokens_seen": 27572352, + "step": 412 + }, + { + "epoch": 0.04675177304964539, + "loss": 1.5787746906280518, + "loss_ce": 0.004067587666213512, + "loss_iou": 0.6953125, + "loss_num": 0.03662109375, + "loss_xval": 1.578125, + "num_input_tokens_seen": 27572352, + "step": 412 + }, + { + "epoch": 0.04686524822695035, + "grad_norm": 18.680891036987305, + "learning_rate": 5e-05, + "loss": 1.6542, + "num_input_tokens_seen": 27638188, + "step": 413 + }, + { + "epoch": 0.04686524822695035, + "loss": 1.6176832914352417, + "loss_ce": 0.004890418611466885, + "loss_iou": 0.6953125, + "loss_num": 0.044189453125, + "loss_xval": 1.609375, + "num_input_tokens_seen": 27638188, + "step": 413 + }, + { + "epoch": 0.04697872340425532, + "grad_norm": 22.172426223754883, + "learning_rate": 5e-05, + "loss": 2.0528, + "num_input_tokens_seen": 27704732, + "step": 414 + }, + { + "epoch": 0.04697872340425532, + "loss": 2.256389617919922, + "loss_ce": 0.005412990693002939, + "loss_iou": 0.96875, + "loss_num": 0.0625, + "loss_xval": 2.25, + "num_input_tokens_seen": 27704732, + "step": 414 + }, + { + "epoch": 0.04709219858156028, + "grad_norm": 17.14741325378418, + "learning_rate": 5e-05, + "loss": 1.8043, + "num_input_tokens_seen": 27772420, + "step": 415 + }, + { + "epoch": 0.04709219858156028, + "loss": 1.8111803531646729, + "loss_ce": 0.0074694352224469185, + "loss_iou": 0.78515625, + "loss_num": 0.046875, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 27772420, + "step": 415 + }, + { + "epoch": 0.047205673758865245, + "grad_norm": 32.592994689941406, + "learning_rate": 5e-05, + "loss": 1.7625, + "num_input_tokens_seen": 27839640, + "step": 416 + }, + { + "epoch": 0.047205673758865245, + "loss": 1.7363560199737549, + "loss_ce": 0.0029575261287391186, + "loss_iou": 0.77734375, + "loss_num": 0.035888671875, + "loss_xval": 1.734375, + "num_input_tokens_seen": 27839640, + "step": 416 + }, + { + "epoch": 0.047319148936170216, + "grad_norm": 13.362582206726074, + "learning_rate": 5e-05, + "loss": 2.0609, + "num_input_tokens_seen": 27906964, + "step": 417 + }, + { + "epoch": 0.047319148936170216, + "loss": 2.083091974258423, + "loss_ce": 0.0020371973514556885, + "loss_iou": 0.890625, + "loss_num": 0.059326171875, + "loss_xval": 2.078125, + "num_input_tokens_seen": 27906964, + "step": 417 + }, + { + "epoch": 0.04743262411347518, + "grad_norm": 11.892099380493164, + "learning_rate": 5e-05, + "loss": 1.9758, + "num_input_tokens_seen": 27972572, + "step": 418 + }, + { + "epoch": 0.04743262411347518, + "loss": 1.8345317840576172, + "loss_ce": 0.003477022284641862, + "loss_iou": 0.828125, + "loss_num": 0.03466796875, + "loss_xval": 1.828125, + "num_input_tokens_seen": 27972572, + "step": 418 + }, + { + "epoch": 0.047546099290780144, + "grad_norm": 12.974828720092773, + "learning_rate": 5e-05, + "loss": 2.0553, + "num_input_tokens_seen": 28040116, + "step": 419 + }, + { + "epoch": 0.047546099290780144, + "loss": 2.1069111824035645, + "loss_ce": 0.004372338764369488, + "loss_iou": 0.921875, + "loss_num": 0.05224609375, + "loss_xval": 2.109375, + "num_input_tokens_seen": 28040116, + "step": 419 + }, + { + "epoch": 0.04765957446808511, + "grad_norm": 15.413476943969727, + "learning_rate": 5e-05, + "loss": 1.5704, + "num_input_tokens_seen": 28106012, + "step": 420 + }, + { + "epoch": 0.04765957446808511, + "loss": 1.5764727592468262, + "loss_ce": 0.006160158663988113, + "loss_iou": 0.7109375, + "loss_num": 0.02978515625, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 28106012, + "step": 420 + }, + { + "epoch": 0.04777304964539007, + "grad_norm": 43.5842170715332, + "learning_rate": 5e-05, + "loss": 1.8986, + "num_input_tokens_seen": 28173232, + "step": 421 + }, + { + "epoch": 0.04777304964539007, + "loss": 2.0166220664978027, + "loss_ce": 0.009786136448383331, + "loss_iou": 0.875, + "loss_num": 0.05224609375, + "loss_xval": 2.0, + "num_input_tokens_seen": 28173232, + "step": 421 + }, + { + "epoch": 0.04788652482269504, + "grad_norm": 14.385200500488281, + "learning_rate": 5e-05, + "loss": 2.342, + "num_input_tokens_seen": 28240552, + "step": 422 + }, + { + "epoch": 0.04788652482269504, + "loss": 2.291661024093628, + "loss_ce": 0.003575080307200551, + "loss_iou": 0.98046875, + "loss_num": 0.06494140625, + "loss_xval": 2.28125, + "num_input_tokens_seen": 28240552, + "step": 422 + }, + { + "epoch": 0.048, + "grad_norm": 14.22166633605957, + "learning_rate": 5e-05, + "loss": 2.0707, + "num_input_tokens_seen": 28307892, + "step": 423 + }, + { + "epoch": 0.048, + "loss": 2.004258632659912, + "loss_ce": 0.0052353194914758205, + "loss_iou": 0.8671875, + "loss_num": 0.052734375, + "loss_xval": 2.0, + "num_input_tokens_seen": 28307892, + "step": 423 + }, + { + "epoch": 0.048113475177304965, + "grad_norm": 14.730013847351074, + "learning_rate": 5e-05, + "loss": 1.8701, + "num_input_tokens_seen": 28375712, + "step": 424 + }, + { + "epoch": 0.048113475177304965, + "loss": 1.9105980396270752, + "loss_ce": 0.002394943730905652, + "loss_iou": 0.82421875, + "loss_num": 0.052001953125, + "loss_xval": 1.90625, + "num_input_tokens_seen": 28375712, + "step": 424 + }, + { + "epoch": 0.04822695035460993, + "grad_norm": 26.609506607055664, + "learning_rate": 5e-05, + "loss": 1.9807, + "num_input_tokens_seen": 28442992, + "step": 425 + }, + { + "epoch": 0.04822695035460993, + "loss": 1.723523497581482, + "loss_ce": 0.004407280590385199, + "loss_iou": 0.71484375, + "loss_num": 0.057373046875, + "loss_xval": 1.71875, + "num_input_tokens_seen": 28442992, + "step": 425 + }, + { + "epoch": 0.04834042553191489, + "grad_norm": 27.97642707824707, + "learning_rate": 5e-05, + "loss": 1.5819, + "num_input_tokens_seen": 28509048, + "step": 426 + }, + { + "epoch": 0.04834042553191489, + "loss": 1.4827972650527954, + "loss_ce": 0.008187826722860336, + "loss_iou": 0.6640625, + "loss_num": 0.0291748046875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 28509048, + "step": 426 + }, + { + "epoch": 0.04845390070921986, + "grad_norm": 15.974344253540039, + "learning_rate": 5e-05, + "loss": 1.6493, + "num_input_tokens_seen": 28577208, + "step": 427 + }, + { + "epoch": 0.04845390070921986, + "loss": 1.7086944580078125, + "loss_ce": 0.0026397653855383396, + "loss_iou": 0.78125, + "loss_num": 0.0277099609375, + "loss_xval": 1.703125, + "num_input_tokens_seen": 28577208, + "step": 427 + }, + { + "epoch": 0.04856737588652482, + "grad_norm": 28.1011962890625, + "learning_rate": 5e-05, + "loss": 1.8516, + "num_input_tokens_seen": 28643628, + "step": 428 + }, + { + "epoch": 0.04856737588652482, + "loss": 2.223907470703125, + "loss_ce": 0.007110418751835823, + "loss_iou": 0.9765625, + "loss_num": 0.053466796875, + "loss_xval": 2.21875, + "num_input_tokens_seen": 28643628, + "step": 428 + }, + { + "epoch": 0.048680851063829786, + "grad_norm": 12.834981918334961, + "learning_rate": 5e-05, + "loss": 1.8158, + "num_input_tokens_seen": 28710732, + "step": 429 + }, + { + "epoch": 0.048680851063829786, + "loss": 1.7045972347259521, + "loss_ce": 0.003913596272468567, + "loss_iou": 0.7421875, + "loss_num": 0.043212890625, + "loss_xval": 1.703125, + "num_input_tokens_seen": 28710732, + "step": 429 + }, + { + "epoch": 0.04879432624113475, + "grad_norm": 16.12135124206543, + "learning_rate": 5e-05, + "loss": 1.7662, + "num_input_tokens_seen": 28777148, + "step": 430 + }, + { + "epoch": 0.04879432624113475, + "loss": 1.725816249847412, + "loss_ce": 0.009019425138831139, + "loss_iou": 0.75390625, + "loss_num": 0.04150390625, + "loss_xval": 1.71875, + "num_input_tokens_seen": 28777148, + "step": 430 + }, + { + "epoch": 0.048907801418439714, + "grad_norm": 32.157508850097656, + "learning_rate": 5e-05, + "loss": 1.6668, + "num_input_tokens_seen": 28844640, + "step": 431 + }, + { + "epoch": 0.048907801418439714, + "loss": 1.588759422302246, + "loss_ce": 0.005751689430326223, + "loss_iou": 0.703125, + "loss_num": 0.03466796875, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 28844640, + "step": 431 + }, + { + "epoch": 0.04902127659574468, + "grad_norm": 11.871682167053223, + "learning_rate": 5e-05, + "loss": 2.0238, + "num_input_tokens_seen": 28911908, + "step": 432 + }, + { + "epoch": 0.04902127659574468, + "loss": 2.2106776237487793, + "loss_ce": 0.00559955183416605, + "loss_iou": 0.9609375, + "loss_num": 0.055908203125, + "loss_xval": 2.203125, + "num_input_tokens_seen": 28911908, + "step": 432 + }, + { + "epoch": 0.04913475177304964, + "grad_norm": 18.667699813842773, + "learning_rate": 5e-05, + "loss": 1.876, + "num_input_tokens_seen": 28978748, + "step": 433 + }, + { + "epoch": 0.04913475177304964, + "loss": 1.7919790744781494, + "loss_ce": 0.003893126267939806, + "loss_iou": 0.80078125, + "loss_num": 0.036865234375, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 28978748, + "step": 433 + }, + { + "epoch": 0.04924822695035461, + "grad_norm": 15.345803260803223, + "learning_rate": 5e-05, + "loss": 1.8145, + "num_input_tokens_seen": 29044848, + "step": 434 + }, + { + "epoch": 0.04924822695035461, + "loss": 1.8462426662445068, + "loss_ce": 0.004445869475603104, + "loss_iou": 0.796875, + "loss_num": 0.04931640625, + "loss_xval": 1.84375, + "num_input_tokens_seen": 29044848, + "step": 434 + }, + { + "epoch": 0.04936170212765958, + "grad_norm": 19.918176651000977, + "learning_rate": 5e-05, + "loss": 1.9222, + "num_input_tokens_seen": 29111776, + "step": 435 + }, + { + "epoch": 0.04936170212765958, + "loss": 1.8073511123657227, + "loss_ce": 0.00949951633810997, + "loss_iou": 0.80078125, + "loss_num": 0.039306640625, + "loss_xval": 1.796875, + "num_input_tokens_seen": 29111776, + "step": 435 + }, + { + "epoch": 0.04947517730496454, + "grad_norm": 15.709465980529785, + "learning_rate": 5e-05, + "loss": 1.7676, + "num_input_tokens_seen": 29178652, + "step": 436 + }, + { + "epoch": 0.04947517730496454, + "loss": 1.886029839515686, + "loss_ce": 0.005170505493879318, + "loss_iou": 0.83203125, + "loss_num": 0.043212890625, + "loss_xval": 1.8828125, + "num_input_tokens_seen": 29178652, + "step": 436 + }, + { + "epoch": 0.049588652482269506, + "grad_norm": 15.689748764038086, + "learning_rate": 5e-05, + "loss": 1.7474, + "num_input_tokens_seen": 29244812, + "step": 437 + }, + { + "epoch": 0.049588652482269506, + "loss": 1.8377293348312378, + "loss_ce": 0.011557423509657383, + "loss_iou": 0.76171875, + "loss_num": 0.06005859375, + "loss_xval": 1.828125, + "num_input_tokens_seen": 29244812, + "step": 437 + }, + { + "epoch": 0.04970212765957447, + "grad_norm": 16.3919677734375, + "learning_rate": 5e-05, + "loss": 1.9327, + "num_input_tokens_seen": 29312016, + "step": 438 + }, + { + "epoch": 0.04970212765957447, + "loss": 1.7913858890533447, + "loss_ce": 0.005131026264280081, + "loss_iou": 0.76953125, + "loss_num": 0.049560546875, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 29312016, + "step": 438 + }, + { + "epoch": 0.049815602836879434, + "grad_norm": 20.11259651184082, + "learning_rate": 5e-05, + "loss": 1.9031, + "num_input_tokens_seen": 29378240, + "step": 439 + }, + { + "epoch": 0.049815602836879434, + "loss": 1.8458583354949951, + "loss_ce": 0.005038070492446423, + "loss_iou": 0.765625, + "loss_num": 0.061767578125, + "loss_xval": 1.84375, + "num_input_tokens_seen": 29378240, + "step": 439 + }, + { + "epoch": 0.0499290780141844, + "grad_norm": 16.678369522094727, + "learning_rate": 5e-05, + "loss": 2.0528, + "num_input_tokens_seen": 29443208, + "step": 440 + }, + { + "epoch": 0.0499290780141844, + "loss": 1.919705867767334, + "loss_ce": 0.004666781984269619, + "loss_iou": 0.8671875, + "loss_num": 0.036376953125, + "loss_xval": 1.9140625, + "num_input_tokens_seen": 29443208, + "step": 440 + }, + { + "epoch": 0.05004255319148936, + "grad_norm": 18.385400772094727, + "learning_rate": 5e-05, + "loss": 1.7291, + "num_input_tokens_seen": 29510676, + "step": 441 + }, + { + "epoch": 0.05004255319148936, + "loss": 1.703932285308838, + "loss_ce": 0.0086197629570961, + "loss_iou": 0.7421875, + "loss_num": 0.04150390625, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 29510676, + "step": 441 + }, + { + "epoch": 0.05015602836879433, + "grad_norm": 26.662628173828125, + "learning_rate": 5e-05, + "loss": 1.5999, + "num_input_tokens_seen": 29577388, + "step": 442 + }, + { + "epoch": 0.05015602836879433, + "loss": 1.5883127450942993, + "loss_ce": 0.005060762632638216, + "loss_iou": 0.6875, + "loss_num": 0.041015625, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 29577388, + "step": 442 + }, + { + "epoch": 0.05026950354609929, + "grad_norm": 23.787948608398438, + "learning_rate": 5e-05, + "loss": 1.8857, + "num_input_tokens_seen": 29644896, + "step": 443 + }, + { + "epoch": 0.05026950354609929, + "loss": 1.8763279914855957, + "loss_ce": 0.003281082957983017, + "loss_iou": 0.8046875, + "loss_num": 0.05322265625, + "loss_xval": 1.875, + "num_input_tokens_seen": 29644896, + "step": 443 + }, + { + "epoch": 0.050382978723404255, + "grad_norm": 14.178801536560059, + "learning_rate": 5e-05, + "loss": 1.8033, + "num_input_tokens_seen": 29711208, + "step": 444 + }, + { + "epoch": 0.050382978723404255, + "loss": 1.8651177883148193, + "loss_ce": 0.004277949221432209, + "loss_iou": 0.77734375, + "loss_num": 0.061279296875, + "loss_xval": 1.859375, + "num_input_tokens_seen": 29711208, + "step": 444 + }, + { + "epoch": 0.05049645390070922, + "grad_norm": 16.82393455505371, + "learning_rate": 5e-05, + "loss": 1.602, + "num_input_tokens_seen": 29778100, + "step": 445 + }, + { + "epoch": 0.05049645390070922, + "loss": 1.7924050092697144, + "loss_ce": 0.006272176280617714, + "loss_iou": 0.796875, + "loss_num": 0.03759765625, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 29778100, + "step": 445 + }, + { + "epoch": 0.05060992907801418, + "grad_norm": 20.641925811767578, + "learning_rate": 5e-05, + "loss": 1.7391, + "num_input_tokens_seen": 29844460, + "step": 446 + }, + { + "epoch": 0.05060992907801418, + "loss": 1.6806879043579102, + "loss_ce": 0.007836369797587395, + "loss_iou": 0.765625, + "loss_num": 0.0283203125, + "loss_xval": 1.671875, + "num_input_tokens_seen": 29844460, + "step": 446 + }, + { + "epoch": 0.05072340425531915, + "grad_norm": 16.847999572753906, + "learning_rate": 5e-05, + "loss": 1.8047, + "num_input_tokens_seen": 29912188, + "step": 447 + }, + { + "epoch": 0.05072340425531915, + "loss": 1.7125816345214844, + "loss_ce": 0.008480105549097061, + "loss_iou": 0.73828125, + "loss_num": 0.04541015625, + "loss_xval": 1.703125, + "num_input_tokens_seen": 29912188, + "step": 447 + }, + { + "epoch": 0.05083687943262411, + "grad_norm": 29.198991775512695, + "learning_rate": 5e-05, + "loss": 1.7333, + "num_input_tokens_seen": 29978576, + "step": 448 + }, + { + "epoch": 0.05083687943262411, + "loss": 1.581322431564331, + "loss_ce": 0.004174027591943741, + "loss_iou": 0.703125, + "loss_num": 0.03466796875, + "loss_xval": 1.578125, + "num_input_tokens_seen": 29978576, + "step": 448 + }, + { + "epoch": 0.050950354609929076, + "grad_norm": 37.53202438354492, + "learning_rate": 5e-05, + "loss": 2.0893, + "num_input_tokens_seen": 30045076, + "step": 449 + }, + { + "epoch": 0.050950354609929076, + "loss": 2.154865026473999, + "loss_ce": 0.004474320448935032, + "loss_iou": 0.953125, + "loss_num": 0.047607421875, + "loss_xval": 2.15625, + "num_input_tokens_seen": 30045076, + "step": 449 + }, + { + "epoch": 0.05106382978723404, + "grad_norm": 12.828842163085938, + "learning_rate": 5e-05, + "loss": 1.8488, + "num_input_tokens_seen": 30111984, + "step": 450 + }, + { + "epoch": 0.05106382978723404, + "loss": 1.8974921703338623, + "loss_ce": 0.004914054647088051, + "loss_iou": 0.828125, + "loss_num": 0.046630859375, + "loss_xval": 1.890625, + "num_input_tokens_seen": 30111984, + "step": 450 + }, + { + "epoch": 0.051177304964539004, + "grad_norm": 20.155534744262695, + "learning_rate": 5e-05, + "loss": 1.7468, + "num_input_tokens_seen": 30179340, + "step": 451 + }, + { + "epoch": 0.051177304964539004, + "loss": 1.9067672491073608, + "loss_ce": 0.0073531754314899445, + "loss_iou": 0.8125, + "loss_num": 0.05419921875, + "loss_xval": 1.8984375, + "num_input_tokens_seen": 30179340, + "step": 451 + }, + { + "epoch": 0.05129078014184397, + "grad_norm": 18.95163917541504, + "learning_rate": 5e-05, + "loss": 1.8774, + "num_input_tokens_seen": 30246704, + "step": 452 + }, + { + "epoch": 0.05129078014184397, + "loss": 1.65592622756958, + "loss_ce": 0.0035823718644678593, + "loss_iou": 0.71875, + "loss_num": 0.04248046875, + "loss_xval": 1.65625, + "num_input_tokens_seen": 30246704, + "step": 452 + }, + { + "epoch": 0.05140425531914894, + "grad_norm": 18.39621353149414, + "learning_rate": 5e-05, + "loss": 1.7149, + "num_input_tokens_seen": 30313880, + "step": 453 + }, + { + "epoch": 0.05140425531914894, + "loss": 1.4568465948104858, + "loss_ce": 0.006163023877888918, + "loss_iou": 0.65625, + "loss_num": 0.028076171875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 30313880, + "step": 453 + }, + { + "epoch": 0.0515177304964539, + "grad_norm": 14.48949146270752, + "learning_rate": 5e-05, + "loss": 1.8896, + "num_input_tokens_seen": 30381020, + "step": 454 + }, + { + "epoch": 0.0515177304964539, + "loss": 2.050734519958496, + "loss_ce": 0.005812693387269974, + "loss_iou": 0.890625, + "loss_num": 0.052978515625, + "loss_xval": 2.046875, + "num_input_tokens_seen": 30381020, + "step": 454 + }, + { + "epoch": 0.05163120567375887, + "grad_norm": 9.998414039611816, + "learning_rate": 5e-05, + "loss": 1.583, + "num_input_tokens_seen": 30447816, + "step": 455 + }, + { + "epoch": 0.05163120567375887, + "loss": 1.6678377389907837, + "loss_ce": 0.004751769360154867, + "loss_iou": 0.7265625, + "loss_num": 0.042236328125, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 30447816, + "step": 455 + }, + { + "epoch": 0.05174468085106383, + "grad_norm": 13.091293334960938, + "learning_rate": 5e-05, + "loss": 1.5934, + "num_input_tokens_seen": 30514876, + "step": 456 + }, + { + "epoch": 0.05174468085106383, + "loss": 1.3308260440826416, + "loss_ce": 0.005630653351545334, + "loss_iou": 0.609375, + "loss_num": 0.021484375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 30514876, + "step": 456 + }, + { + "epoch": 0.051858156028368796, + "grad_norm": 16.565256118774414, + "learning_rate": 5e-05, + "loss": 1.6661, + "num_input_tokens_seen": 30581384, + "step": 457 + }, + { + "epoch": 0.051858156028368796, + "loss": 1.584135890007019, + "loss_ce": 0.004057720303535461, + "loss_iou": 0.7109375, + "loss_num": 0.031494140625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 30581384, + "step": 457 + }, + { + "epoch": 0.05197163120567376, + "grad_norm": 17.234827041625977, + "learning_rate": 5e-05, + "loss": 1.835, + "num_input_tokens_seen": 30647236, + "step": 458 + }, + { + "epoch": 0.05197163120567376, + "loss": 1.961125135421753, + "loss_ce": 0.004093952011317015, + "loss_iou": 0.8359375, + "loss_num": 0.0576171875, + "loss_xval": 1.953125, + "num_input_tokens_seen": 30647236, + "step": 458 + }, + { + "epoch": 0.052085106382978724, + "grad_norm": 16.93844223022461, + "learning_rate": 5e-05, + "loss": 1.7352, + "num_input_tokens_seen": 30714296, + "step": 459 + }, + { + "epoch": 0.052085106382978724, + "loss": 1.764148235321045, + "loss_ce": 0.0014528923202306032, + "loss_iou": 0.75390625, + "loss_num": 0.0517578125, + "loss_xval": 1.765625, + "num_input_tokens_seen": 30714296, + "step": 459 + }, + { + "epoch": 0.05219858156028369, + "grad_norm": 56.34878921508789, + "learning_rate": 5e-05, + "loss": 1.9018, + "num_input_tokens_seen": 30781476, + "step": 460 + }, + { + "epoch": 0.05219858156028369, + "loss": 1.7753562927246094, + "loss_ce": 0.003871969413012266, + "loss_iou": 0.77734375, + "loss_num": 0.04296875, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 30781476, + "step": 460 + }, + { + "epoch": 0.05231205673758865, + "grad_norm": 14.536331176757812, + "learning_rate": 5e-05, + "loss": 1.9647, + "num_input_tokens_seen": 30849172, + "step": 461 + }, + { + "epoch": 0.05231205673758865, + "loss": 1.9756975173950195, + "loss_ce": 0.004994324408471584, + "loss_iou": 0.85546875, + "loss_num": 0.052734375, + "loss_xval": 1.96875, + "num_input_tokens_seen": 30849172, + "step": 461 + }, + { + "epoch": 0.05242553191489362, + "grad_norm": 14.186684608459473, + "learning_rate": 5e-05, + "loss": 1.7168, + "num_input_tokens_seen": 30915884, + "step": 462 + }, + { + "epoch": 0.05242553191489362, + "loss": 1.7926448583602905, + "loss_ce": 0.005535459145903587, + "loss_iou": 0.8046875, + "loss_num": 0.03466796875, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 30915884, + "step": 462 + }, + { + "epoch": 0.05253900709219858, + "grad_norm": 27.914928436279297, + "learning_rate": 5e-05, + "loss": 1.9929, + "num_input_tokens_seen": 30983564, + "step": 463 + }, + { + "epoch": 0.05253900709219858, + "loss": 1.9725689888000488, + "loss_ce": 0.008701816201210022, + "loss_iou": 0.828125, + "loss_num": 0.0625, + "loss_xval": 1.9609375, + "num_input_tokens_seen": 30983564, + "step": 463 + }, + { + "epoch": 0.052652482269503545, + "grad_norm": 14.861388206481934, + "learning_rate": 5e-05, + "loss": 2.1049, + "num_input_tokens_seen": 31050428, + "step": 464 + }, + { + "epoch": 0.052652482269503545, + "loss": 2.025031089782715, + "loss_ce": 0.009406102821230888, + "loss_iou": 0.8828125, + "loss_num": 0.04931640625, + "loss_xval": 2.015625, + "num_input_tokens_seen": 31050428, + "step": 464 + }, + { + "epoch": 0.05276595744680851, + "grad_norm": 13.904372215270996, + "learning_rate": 5e-05, + "loss": 1.808, + "num_input_tokens_seen": 31117272, + "step": 465 + }, + { + "epoch": 0.05276595744680851, + "loss": 1.8502850532531738, + "loss_ce": 0.006535022519528866, + "loss_iou": 0.796875, + "loss_num": 0.050537109375, + "loss_xval": 1.84375, + "num_input_tokens_seen": 31117272, + "step": 465 + }, + { + "epoch": 0.05287943262411347, + "grad_norm": 13.449167251586914, + "learning_rate": 5e-05, + "loss": 1.8736, + "num_input_tokens_seen": 31184324, + "step": 466 + }, + { + "epoch": 0.05287943262411347, + "loss": 1.8294992446899414, + "loss_ce": 0.009186818264424801, + "loss_iou": 0.79296875, + "loss_num": 0.04638671875, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 31184324, + "step": 466 + }, + { + "epoch": 0.05299290780141844, + "grad_norm": 14.974588394165039, + "learning_rate": 5e-05, + "loss": 1.826, + "num_input_tokens_seen": 31251184, + "step": 467 + }, + { + "epoch": 0.05299290780141844, + "loss": 1.988067865371704, + "loss_ce": 0.006622529588639736, + "loss_iou": 0.84375, + "loss_num": 0.05810546875, + "loss_xval": 1.984375, + "num_input_tokens_seen": 31251184, + "step": 467 + }, + { + "epoch": 0.0531063829787234, + "grad_norm": 27.92066764831543, + "learning_rate": 5e-05, + "loss": 1.5861, + "num_input_tokens_seen": 31318000, + "step": 468 + }, + { + "epoch": 0.0531063829787234, + "loss": 1.5720558166503906, + "loss_ce": 0.006626160349696875, + "loss_iou": 0.70703125, + "loss_num": 0.0306396484375, + "loss_xval": 1.5625, + "num_input_tokens_seen": 31318000, + "step": 468 + }, + { + "epoch": 0.053219858156028366, + "grad_norm": 11.846701622009277, + "learning_rate": 5e-05, + "loss": 2.2184, + "num_input_tokens_seen": 31385032, + "step": 469 + }, + { + "epoch": 0.053219858156028366, + "loss": 2.2774927616119385, + "loss_ce": 0.0069850143045187, + "loss_iou": 0.9609375, + "loss_num": 0.0693359375, + "loss_xval": 2.265625, + "num_input_tokens_seen": 31385032, + "step": 469 + }, + { + "epoch": 0.05333333333333334, + "grad_norm": 24.641067504882812, + "learning_rate": 5e-05, + "loss": 1.8831, + "num_input_tokens_seen": 31451824, + "step": 470 + }, + { + "epoch": 0.05333333333333334, + "loss": 1.865307331085205, + "loss_ce": 0.004955767188221216, + "loss_iou": 0.8203125, + "loss_num": 0.04443359375, + "loss_xval": 1.859375, + "num_input_tokens_seen": 31451824, + "step": 470 + }, + { + "epoch": 0.0534468085106383, + "grad_norm": 12.863563537597656, + "learning_rate": 5e-05, + "loss": 1.7695, + "num_input_tokens_seen": 31519036, + "step": 471 + }, + { + "epoch": 0.0534468085106383, + "loss": 1.7237727642059326, + "loss_ce": 0.0030695577152073383, + "loss_iou": 0.78125, + "loss_num": 0.03271484375, + "loss_xval": 1.71875, + "num_input_tokens_seen": 31519036, + "step": 471 + }, + { + "epoch": 0.053560283687943265, + "grad_norm": 27.22347640991211, + "learning_rate": 5e-05, + "loss": 1.5874, + "num_input_tokens_seen": 31587096, + "step": 472 + }, + { + "epoch": 0.053560283687943265, + "loss": 1.5669457912445068, + "loss_ce": 0.0034692443441599607, + "loss_iou": 0.69140625, + "loss_num": 0.035400390625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 31587096, + "step": 472 + }, + { + "epoch": 0.05367375886524823, + "grad_norm": 18.264036178588867, + "learning_rate": 5e-05, + "loss": 1.8191, + "num_input_tokens_seen": 31654240, + "step": 473 + }, + { + "epoch": 0.05367375886524823, + "loss": 1.7429664134979248, + "loss_ce": 0.0037084869109094143, + "loss_iou": 0.78125, + "loss_num": 0.035888671875, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 31654240, + "step": 473 + }, + { + "epoch": 0.05378723404255319, + "grad_norm": 18.404027938842773, + "learning_rate": 5e-05, + "loss": 1.5807, + "num_input_tokens_seen": 31721244, + "step": 474 + }, + { + "epoch": 0.05378723404255319, + "loss": 1.5907866954803467, + "loss_ce": 0.004849134013056755, + "loss_iou": 0.70703125, + "loss_num": 0.033935546875, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 31721244, + "step": 474 + }, + { + "epoch": 0.05390070921985816, + "grad_norm": 20.07286834716797, + "learning_rate": 5e-05, + "loss": 1.5235, + "num_input_tokens_seen": 31787384, + "step": 475 + }, + { + "epoch": 0.05390070921985816, + "loss": 1.7349066734313965, + "loss_ce": 0.002973106224089861, + "loss_iou": 0.77734375, + "loss_num": 0.0361328125, + "loss_xval": 1.734375, + "num_input_tokens_seen": 31787384, + "step": 475 + }, + { + "epoch": 0.05401418439716312, + "grad_norm": 16.9139404296875, + "learning_rate": 5e-05, + "loss": 1.9671, + "num_input_tokens_seen": 31855040, + "step": 476 + }, + { + "epoch": 0.05401418439716312, + "loss": 1.9531457424163818, + "loss_ce": 0.010762862861156464, + "loss_iou": 0.87109375, + "loss_num": 0.040283203125, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 31855040, + "step": 476 + }, + { + "epoch": 0.054127659574468086, + "grad_norm": 18.149900436401367, + "learning_rate": 5e-05, + "loss": 1.9113, + "num_input_tokens_seen": 31921380, + "step": 477 + }, + { + "epoch": 0.054127659574468086, + "loss": 1.9605334997177124, + "loss_ce": 0.004478826653212309, + "loss_iou": 0.8671875, + "loss_num": 0.045166015625, + "loss_xval": 1.953125, + "num_input_tokens_seen": 31921380, + "step": 477 + }, + { + "epoch": 0.05424113475177305, + "grad_norm": 14.876150131225586, + "learning_rate": 5e-05, + "loss": 1.6897, + "num_input_tokens_seen": 31987088, + "step": 478 + }, + { + "epoch": 0.05424113475177305, + "loss": 1.6967155933380127, + "loss_ce": 0.004332706332206726, + "loss_iou": 0.7265625, + "loss_num": 0.046875, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 31987088, + "step": 478 + }, + { + "epoch": 0.054354609929078014, + "grad_norm": 16.95387077331543, + "learning_rate": 5e-05, + "loss": 1.7193, + "num_input_tokens_seen": 32054652, + "step": 479 + }, + { + "epoch": 0.054354609929078014, + "loss": 1.7380940914154053, + "loss_ce": 0.004695648793131113, + "loss_iou": 0.80078125, + "loss_num": 0.0262451171875, + "loss_xval": 1.734375, + "num_input_tokens_seen": 32054652, + "step": 479 + }, + { + "epoch": 0.05446808510638298, + "grad_norm": 26.43242645263672, + "learning_rate": 5e-05, + "loss": 1.7469, + "num_input_tokens_seen": 32122256, + "step": 480 + }, + { + "epoch": 0.05446808510638298, + "loss": 1.8175668716430664, + "loss_ce": 0.007019917480647564, + "loss_iou": 0.796875, + "loss_num": 0.04345703125, + "loss_xval": 1.8125, + "num_input_tokens_seen": 32122256, + "step": 480 + }, + { + "epoch": 0.05458156028368794, + "grad_norm": 16.85852813720703, + "learning_rate": 5e-05, + "loss": 2.1315, + "num_input_tokens_seen": 32188972, + "step": 481 + }, + { + "epoch": 0.05458156028368794, + "loss": 2.125433921813965, + "loss_ce": 0.0062933010049164295, + "loss_iou": 0.890625, + "loss_num": 0.06689453125, + "loss_xval": 2.125, + "num_input_tokens_seen": 32188972, + "step": 481 + }, + { + "epoch": 0.054695035460992907, + "grad_norm": 17.447099685668945, + "learning_rate": 5e-05, + "loss": 1.5436, + "num_input_tokens_seen": 32255776, + "step": 482 + }, + { + "epoch": 0.054695035460992907, + "loss": 1.4522266387939453, + "loss_ce": 0.004961016587913036, + "loss_iou": 0.65234375, + "loss_num": 0.02880859375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 32255776, + "step": 482 + }, + { + "epoch": 0.05480851063829787, + "grad_norm": 40.85325622558594, + "learning_rate": 5e-05, + "loss": 2.0576, + "num_input_tokens_seen": 32323804, + "step": 483 + }, + { + "epoch": 0.05480851063829787, + "loss": 2.176208972930908, + "loss_ce": 0.006287143100053072, + "loss_iou": 0.9453125, + "loss_num": 0.05615234375, + "loss_xval": 2.171875, + "num_input_tokens_seen": 32323804, + "step": 483 + }, + { + "epoch": 0.054921985815602835, + "grad_norm": 12.550103187561035, + "learning_rate": 5e-05, + "loss": 2.0926, + "num_input_tokens_seen": 32390072, + "step": 484 + }, + { + "epoch": 0.054921985815602835, + "loss": 2.095818042755127, + "loss_ce": 0.007927569560706615, + "loss_iou": 0.890625, + "loss_num": 0.0615234375, + "loss_xval": 2.09375, + "num_input_tokens_seen": 32390072, + "step": 484 + }, + { + "epoch": 0.0550354609929078, + "grad_norm": 16.776363372802734, + "learning_rate": 5e-05, + "loss": 1.7821, + "num_input_tokens_seen": 32456472, + "step": 485 + }, + { + "epoch": 0.0550354609929078, + "loss": 1.6837393045425415, + "loss_ce": 0.004051723517477512, + "loss_iou": 0.76171875, + "loss_num": 0.031005859375, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 32456472, + "step": 485 + }, + { + "epoch": 0.05514893617021276, + "grad_norm": 11.00390338897705, + "learning_rate": 5e-05, + "loss": 1.6557, + "num_input_tokens_seen": 32523264, + "step": 486 + }, + { + "epoch": 0.05514893617021276, + "loss": 1.5232501029968262, + "loss_ce": 0.0037188236601650715, + "loss_iou": 0.69140625, + "loss_num": 0.02685546875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 32523264, + "step": 486 + }, + { + "epoch": 0.05526241134751773, + "grad_norm": 15.09769058227539, + "learning_rate": 5e-05, + "loss": 1.6505, + "num_input_tokens_seen": 32590036, + "step": 487 + }, + { + "epoch": 0.05526241134751773, + "loss": 1.6616194248199463, + "loss_ce": 0.006345957517623901, + "loss_iou": 0.73046875, + "loss_num": 0.0390625, + "loss_xval": 1.65625, + "num_input_tokens_seen": 32590036, + "step": 487 + }, + { + "epoch": 0.0553758865248227, + "grad_norm": 16.690452575683594, + "learning_rate": 5e-05, + "loss": 1.7599, + "num_input_tokens_seen": 32657960, + "step": 488 + }, + { + "epoch": 0.0553758865248227, + "loss": 1.929264783859253, + "loss_ce": 0.004460081458091736, + "loss_iou": 0.8515625, + "loss_num": 0.043701171875, + "loss_xval": 1.921875, + "num_input_tokens_seen": 32657960, + "step": 488 + }, + { + "epoch": 0.05548936170212766, + "grad_norm": 17.8673038482666, + "learning_rate": 5e-05, + "loss": 1.7368, + "num_input_tokens_seen": 32725108, + "step": 489 + }, + { + "epoch": 0.05548936170212766, + "loss": 1.740478277206421, + "loss_ce": 0.0070797959342598915, + "loss_iou": 0.765625, + "loss_num": 0.0400390625, + "loss_xval": 1.734375, + "num_input_tokens_seen": 32725108, + "step": 489 + }, + { + "epoch": 0.05560283687943263, + "grad_norm": 28.986629486083984, + "learning_rate": 5e-05, + "loss": 1.6352, + "num_input_tokens_seen": 32792940, + "step": 490 + }, + { + "epoch": 0.05560283687943263, + "loss": 1.655182123184204, + "loss_ce": 0.004577907733619213, + "loss_iou": 0.734375, + "loss_num": 0.037353515625, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 32792940, + "step": 490 + }, + { + "epoch": 0.05571631205673759, + "grad_norm": 17.518753051757812, + "learning_rate": 5e-05, + "loss": 1.8386, + "num_input_tokens_seen": 32860936, + "step": 491 + }, + { + "epoch": 0.05571631205673759, + "loss": 1.8705873489379883, + "loss_ce": 0.007306218612939119, + "loss_iou": 0.82421875, + "loss_num": 0.043701171875, + "loss_xval": 1.859375, + "num_input_tokens_seen": 32860936, + "step": 491 + }, + { + "epoch": 0.055829787234042555, + "grad_norm": 18.854087829589844, + "learning_rate": 5e-05, + "loss": 1.6949, + "num_input_tokens_seen": 32927628, + "step": 492 + }, + { + "epoch": 0.055829787234042555, + "loss": 1.7307536602020264, + "loss_ce": 0.006144308485090733, + "loss_iou": 0.7578125, + "loss_num": 0.04248046875, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 32927628, + "step": 492 + }, + { + "epoch": 0.05594326241134752, + "grad_norm": 41.258792877197266, + "learning_rate": 5e-05, + "loss": 1.8674, + "num_input_tokens_seen": 32995868, + "step": 493 + }, + { + "epoch": 0.05594326241134752, + "loss": 1.9919437170028687, + "loss_ce": 0.005615574307739735, + "loss_iou": 0.859375, + "loss_num": 0.0537109375, + "loss_xval": 1.984375, + "num_input_tokens_seen": 32995868, + "step": 493 + }, + { + "epoch": 0.05605673758865248, + "grad_norm": 16.930034637451172, + "learning_rate": 5e-05, + "loss": 1.8587, + "num_input_tokens_seen": 33062884, + "step": 494 + }, + { + "epoch": 0.05605673758865248, + "loss": 1.9667967557907104, + "loss_ce": 0.005859252065420151, + "loss_iou": 0.859375, + "loss_num": 0.048095703125, + "loss_xval": 1.9609375, + "num_input_tokens_seen": 33062884, + "step": 494 + }, + { + "epoch": 0.05617021276595745, + "grad_norm": 17.537187576293945, + "learning_rate": 5e-05, + "loss": 1.8695, + "num_input_tokens_seen": 33128360, + "step": 495 + }, + { + "epoch": 0.05617021276595745, + "loss": 1.824951171875, + "loss_ce": 0.009521433152258396, + "loss_iou": 0.78515625, + "loss_num": 0.0498046875, + "loss_xval": 1.8125, + "num_input_tokens_seen": 33128360, + "step": 495 + }, + { + "epoch": 0.05628368794326241, + "grad_norm": 16.505859375, + "learning_rate": 5e-05, + "loss": 1.8071, + "num_input_tokens_seen": 33195268, + "step": 496 + }, + { + "epoch": 0.05628368794326241, + "loss": 1.869712471961975, + "loss_ce": 0.010337421670556068, + "loss_iou": 0.84375, + "loss_num": 0.034423828125, + "loss_xval": 1.859375, + "num_input_tokens_seen": 33195268, + "step": 496 + }, + { + "epoch": 0.056397163120567376, + "grad_norm": 15.579615592956543, + "learning_rate": 5e-05, + "loss": 1.5516, + "num_input_tokens_seen": 33262248, + "step": 497 + }, + { + "epoch": 0.056397163120567376, + "loss": 1.5239934921264648, + "loss_ce": 0.001532452180981636, + "loss_iou": 0.69140625, + "loss_num": 0.028076171875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 33262248, + "step": 497 + }, + { + "epoch": 0.05651063829787234, + "grad_norm": 21.267019271850586, + "learning_rate": 5e-05, + "loss": 1.7544, + "num_input_tokens_seen": 33328812, + "step": 498 + }, + { + "epoch": 0.05651063829787234, + "loss": 1.7380146980285645, + "loss_ce": 0.004616203717887402, + "loss_iou": 0.76953125, + "loss_num": 0.03955078125, + "loss_xval": 1.734375, + "num_input_tokens_seen": 33328812, + "step": 498 + }, + { + "epoch": 0.056624113475177304, + "grad_norm": 21.656267166137695, + "learning_rate": 5e-05, + "loss": 2.1568, + "num_input_tokens_seen": 33395540, + "step": 499 + }, + { + "epoch": 0.056624113475177304, + "loss": 2.0941357612609863, + "loss_ce": 0.005268476903438568, + "loss_iou": 0.890625, + "loss_num": 0.06201171875, + "loss_xval": 2.09375, + "num_input_tokens_seen": 33395540, + "step": 499 + }, + { + "epoch": 0.05673758865248227, + "grad_norm": 32.38337707519531, + "learning_rate": 5e-05, + "loss": 1.7074, + "num_input_tokens_seen": 33461724, + "step": 500 + }, + { + "epoch": 0.05673758865248227, + "eval_seeclick_CIoU": 0.33865346014499664, + "eval_seeclick_GIoU": 0.2991623878479004, + "eval_seeclick_IoU": 0.42661620676517487, + "eval_seeclick_MAE_all": 0.13601338118314743, + "eval_seeclick_MAE_h": 0.083095483481884, + "eval_seeclick_MAE_w": 0.155178964138031, + "eval_seeclick_MAE_x_boxes": 0.21663396060466766, + "eval_seeclick_MAE_y_boxes": 0.12643355131149292, + "eval_seeclick_NUM_probability": 0.9994606673717499, + "eval_seeclick_inside_bbox": 0.628125011920929, + "eval_seeclick_loss": 2.6845502853393555, + "eval_seeclick_loss_ce": 0.01725900825113058, + "eval_seeclick_loss_iou": 1.012451171875, + "eval_seeclick_loss_num": 0.1344757080078125, + "eval_seeclick_loss_xval": 2.6982421875, + "eval_seeclick_runtime": 65.5282, + "eval_seeclick_samples_per_second": 0.717, + "eval_seeclick_steps_per_second": 0.031, + "num_input_tokens_seen": 33461724, + "step": 500 + }, + { + "epoch": 0.05673758865248227, + "eval_icons_CIoU": 0.40222132205963135, + "eval_icons_GIoU": 0.3799079954624176, + "eval_icons_IoU": 0.4372557997703552, + "eval_icons_MAE_all": 0.1533055528998375, + "eval_icons_MAE_h": 0.1419243887066841, + "eval_icons_MAE_w": 0.16972815990447998, + "eval_icons_MAE_x_boxes": 0.10452849045395851, + "eval_icons_MAE_y_boxes": 0.09756794571876526, + "eval_icons_NUM_probability": 0.9998435378074646, + "eval_icons_inside_bbox": 0.7986111044883728, + "eval_icons_loss": 2.755291223526001, + "eval_icons_loss_ce": 0.0003269288135925308, + "eval_icons_loss_iou": 1.0068359375, + "eval_icons_loss_num": 0.14710235595703125, + "eval_icons_loss_xval": 2.74951171875, + "eval_icons_runtime": 66.2616, + "eval_icons_samples_per_second": 0.755, + "eval_icons_steps_per_second": 0.03, + "num_input_tokens_seen": 33461724, + "step": 500 + }, + { + "epoch": 0.05673758865248227, + "eval_screenspot_CIoU": 0.35266049206256866, + "eval_screenspot_GIoU": 0.31387072801589966, + "eval_screenspot_IoU": 0.42824824651082355, + "eval_screenspot_MAE_all": 0.14570995916922888, + "eval_screenspot_MAE_h": 0.07303617149591446, + "eval_screenspot_MAE_w": 0.16847039262453714, + "eval_screenspot_MAE_x_boxes": 0.22992166380087534, + "eval_screenspot_MAE_y_boxes": 0.10540262361367543, + "eval_screenspot_NUM_probability": 0.9996037681897482, + "eval_screenspot_inside_bbox": 0.6879166762034098, + "eval_screenspot_loss": 2.853323221206665, + "eval_screenspot_loss_ce": 0.01056032751997312, + "eval_screenspot_loss_iou": 1.0725911458333333, + "eval_screenspot_loss_num": 0.157745361328125, + "eval_screenspot_loss_xval": 2.9342447916666665, + "eval_screenspot_runtime": 126.4546, + "eval_screenspot_samples_per_second": 0.704, + "eval_screenspot_steps_per_second": 0.024, + "num_input_tokens_seen": 33461724, + "step": 500 + }, + { + "epoch": 0.05673758865248227, + "eval_compot_CIoU": 0.39955495297908783, + "eval_compot_GIoU": 0.37072110176086426, + "eval_compot_IoU": 0.460318848490715, + "eval_compot_MAE_all": 0.11998276039958, + "eval_compot_MAE_h": 0.07390171103179455, + "eval_compot_MAE_w": 0.10969673469662666, + "eval_compot_MAE_x_boxes": 0.12593408674001694, + "eval_compot_MAE_y_boxes": 0.12005547434091568, + "eval_compot_NUM_probability": 0.9996008276939392, + "eval_compot_inside_bbox": 0.6614583432674408, + "eval_compot_loss": 2.7516956329345703, + "eval_compot_loss_ce": 0.006225240183994174, + "eval_compot_loss_iou": 1.05322265625, + "eval_compot_loss_num": 0.1226654052734375, + "eval_compot_loss_xval": 2.72021484375, + "eval_compot_runtime": 72.6289, + "eval_compot_samples_per_second": 0.688, + "eval_compot_steps_per_second": 0.028, + "num_input_tokens_seen": 33461724, + "step": 500 + }, + { + "epoch": 0.05673758865248227, + "loss": 2.6813271045684814, + "loss_ce": 0.004569365177303553, + "loss_iou": 1.0625, + "loss_num": 0.109375, + "loss_xval": 2.671875, + "num_input_tokens_seen": 33461724, + "step": 500 + }, + { + "epoch": 0.05685106382978723, + "grad_norm": 9.927964210510254, + "learning_rate": 5e-05, + "loss": 1.6579, + "num_input_tokens_seen": 33527956, + "step": 501 + }, + { + "epoch": 0.05685106382978723, + "loss": 1.6656379699707031, + "loss_ce": 0.004505157470703125, + "loss_iou": 0.734375, + "loss_num": 0.0380859375, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 33527956, + "step": 501 + }, + { + "epoch": 0.056964539007092196, + "grad_norm": 15.336969375610352, + "learning_rate": 5e-05, + "loss": 1.723, + "num_input_tokens_seen": 33595680, + "step": 502 + }, + { + "epoch": 0.056964539007092196, + "loss": 1.547257423400879, + "loss_ce": 0.0042886752635240555, + "loss_iou": 0.67578125, + "loss_num": 0.038330078125, + "loss_xval": 1.546875, + "num_input_tokens_seen": 33595680, + "step": 502 + }, + { + "epoch": 0.05707801418439716, + "grad_norm": 37.03543472290039, + "learning_rate": 5e-05, + "loss": 1.8667, + "num_input_tokens_seen": 33662220, + "step": 503 + }, + { + "epoch": 0.05707801418439716, + "loss": 1.8445765972137451, + "loss_ce": 0.0037563536316156387, + "loss_iou": 0.8125, + "loss_num": 0.04248046875, + "loss_xval": 1.84375, + "num_input_tokens_seen": 33662220, + "step": 503 + }, + { + "epoch": 0.057191489361702125, + "grad_norm": 12.613948822021484, + "learning_rate": 5e-05, + "loss": 2.0824, + "num_input_tokens_seen": 33728636, + "step": 504 + }, + { + "epoch": 0.057191489361702125, + "loss": 2.1450679302215576, + "loss_ce": 0.004442976787686348, + "loss_iou": 0.90625, + "loss_num": 0.0654296875, + "loss_xval": 2.140625, + "num_input_tokens_seen": 33728636, + "step": 504 + }, + { + "epoch": 0.05730496453900709, + "grad_norm": 26.202863693237305, + "learning_rate": 5e-05, + "loss": 1.987, + "num_input_tokens_seen": 33796092, + "step": 505 + }, + { + "epoch": 0.05730496453900709, + "loss": 1.97000253200531, + "loss_ce": 0.005158800631761551, + "loss_iou": 0.84375, + "loss_num": 0.05517578125, + "loss_xval": 1.96875, + "num_input_tokens_seen": 33796092, + "step": 505 + }, + { + "epoch": 0.05741843971631206, + "grad_norm": 11.607341766357422, + "learning_rate": 5e-05, + "loss": 1.6481, + "num_input_tokens_seen": 33863928, + "step": 506 + }, + { + "epoch": 0.05741843971631206, + "loss": 1.4929484128952026, + "loss_ce": 0.0036905594170093536, + "loss_iou": 0.64453125, + "loss_num": 0.040283203125, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 33863928, + "step": 506 + }, + { + "epoch": 0.057531914893617024, + "grad_norm": 17.151288986206055, + "learning_rate": 5e-05, + "loss": 1.658, + "num_input_tokens_seen": 33931420, + "step": 507 + }, + { + "epoch": 0.057531914893617024, + "loss": 1.6835674047470093, + "loss_ce": 0.002903369953855872, + "loss_iou": 0.73828125, + "loss_num": 0.04052734375, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 33931420, + "step": 507 + }, + { + "epoch": 0.05764539007092199, + "grad_norm": 12.774615287780762, + "learning_rate": 5e-05, + "loss": 1.7056, + "num_input_tokens_seen": 33998700, + "step": 508 + }, + { + "epoch": 0.05764539007092199, + "loss": 1.7969893217086792, + "loss_ce": 0.007926814258098602, + "loss_iou": 0.77734375, + "loss_num": 0.046630859375, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 33998700, + "step": 508 + }, + { + "epoch": 0.05775886524822695, + "grad_norm": 18.94835662841797, + "learning_rate": 5e-05, + "loss": 1.8649, + "num_input_tokens_seen": 34065244, + "step": 509 + }, + { + "epoch": 0.05775886524822695, + "loss": 1.8767831325531006, + "loss_ce": 0.006665830034762621, + "loss_iou": 0.77734375, + "loss_num": 0.06298828125, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 34065244, + "step": 509 + }, + { + "epoch": 0.05787234042553192, + "grad_norm": 16.830333709716797, + "learning_rate": 5e-05, + "loss": 2.0858, + "num_input_tokens_seen": 34131516, + "step": 510 + }, + { + "epoch": 0.05787234042553192, + "loss": 1.9188733100891113, + "loss_ce": 0.004810838028788567, + "loss_iou": 0.80859375, + "loss_num": 0.059814453125, + "loss_xval": 1.9140625, + "num_input_tokens_seen": 34131516, + "step": 510 + }, + { + "epoch": 0.05798581560283688, + "grad_norm": 22.24744415283203, + "learning_rate": 5e-05, + "loss": 1.69, + "num_input_tokens_seen": 34198132, + "step": 511 + }, + { + "epoch": 0.05798581560283688, + "loss": 1.5659008026123047, + "loss_ce": 0.005353884771466255, + "loss_iou": 0.69921875, + "loss_num": 0.03271484375, + "loss_xval": 1.5625, + "num_input_tokens_seen": 34198132, + "step": 511 + }, + { + "epoch": 0.058099290780141845, + "grad_norm": 16.553009033203125, + "learning_rate": 5e-05, + "loss": 1.7972, + "num_input_tokens_seen": 34264680, + "step": 512 + }, + { + "epoch": 0.058099290780141845, + "loss": 1.9784189462661743, + "loss_ce": 0.007715813349932432, + "loss_iou": 0.8671875, + "loss_num": 0.047119140625, + "loss_xval": 1.96875, + "num_input_tokens_seen": 34264680, + "step": 512 + }, + { + "epoch": 0.05821276595744681, + "grad_norm": 17.448020935058594, + "learning_rate": 5e-05, + "loss": 1.9145, + "num_input_tokens_seen": 34332212, + "step": 513 + }, + { + "epoch": 0.05821276595744681, + "loss": 2.0494165420532227, + "loss_ce": 0.009377341717481613, + "loss_iou": 0.82421875, + "loss_num": 0.0791015625, + "loss_xval": 2.046875, + "num_input_tokens_seen": 34332212, + "step": 513 + }, + { + "epoch": 0.05832624113475177, + "grad_norm": 34.932010650634766, + "learning_rate": 5e-05, + "loss": 1.759, + "num_input_tokens_seen": 34399064, + "step": 514 + }, + { + "epoch": 0.05832624113475177, + "loss": 1.8284952640533447, + "loss_ce": 0.004276429768651724, + "loss_iou": 0.80859375, + "loss_num": 0.0419921875, + "loss_xval": 1.828125, + "num_input_tokens_seen": 34399064, + "step": 514 + }, + { + "epoch": 0.05843971631205674, + "grad_norm": 11.88154411315918, + "learning_rate": 5e-05, + "loss": 2.1805, + "num_input_tokens_seen": 34465852, + "step": 515 + }, + { + "epoch": 0.05843971631205674, + "loss": 2.080181360244751, + "loss_ce": 0.003032926470041275, + "loss_iou": 0.921875, + "loss_num": 0.04638671875, + "loss_xval": 2.078125, + "num_input_tokens_seen": 34465852, + "step": 515 + }, + { + "epoch": 0.0585531914893617, + "grad_norm": 13.548694610595703, + "learning_rate": 5e-05, + "loss": 1.8112, + "num_input_tokens_seen": 34531936, + "step": 516 + }, + { + "epoch": 0.0585531914893617, + "loss": 1.839542031288147, + "loss_ce": 0.0036045718006789684, + "loss_iou": 0.78125, + "loss_num": 0.05419921875, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 34531936, + "step": 516 + }, + { + "epoch": 0.058666666666666666, + "grad_norm": 12.60060977935791, + "learning_rate": 5e-05, + "loss": 1.6168, + "num_input_tokens_seen": 34599228, + "step": 517 + }, + { + "epoch": 0.058666666666666666, + "loss": 1.5476207733154297, + "loss_ce": 0.003675434272736311, + "loss_iou": 0.69140625, + "loss_num": 0.03271484375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 34599228, + "step": 517 + }, + { + "epoch": 0.05878014184397163, + "grad_norm": 11.212620735168457, + "learning_rate": 5e-05, + "loss": 1.606, + "num_input_tokens_seen": 34665604, + "step": 518 + }, + { + "epoch": 0.05878014184397163, + "loss": 1.6227413415908813, + "loss_ce": 0.0036007219459861517, + "loss_iou": 0.69921875, + "loss_num": 0.0439453125, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 34665604, + "step": 518 + }, + { + "epoch": 0.058893617021276594, + "grad_norm": 10.35257625579834, + "learning_rate": 5e-05, + "loss": 1.6028, + "num_input_tokens_seen": 34733168, + "step": 519 + }, + { + "epoch": 0.058893617021276594, + "loss": 1.4511488676071167, + "loss_ce": 0.005836380645632744, + "loss_iou": 0.6171875, + "loss_num": 0.041748046875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 34733168, + "step": 519 + }, + { + "epoch": 0.05900709219858156, + "grad_norm": 10.087233543395996, + "learning_rate": 5e-05, + "loss": 1.6143, + "num_input_tokens_seen": 34800360, + "step": 520 + }, + { + "epoch": 0.05900709219858156, + "loss": 1.6431810855865479, + "loss_ce": 0.005485733039677143, + "loss_iou": 0.71484375, + "loss_num": 0.04052734375, + "loss_xval": 1.640625, + "num_input_tokens_seen": 34800360, + "step": 520 + }, + { + "epoch": 0.05912056737588652, + "grad_norm": 96.50509643554688, + "learning_rate": 5e-05, + "loss": 1.7135, + "num_input_tokens_seen": 34866112, + "step": 521 + }, + { + "epoch": 0.05912056737588652, + "loss": 1.6259483098983765, + "loss_ce": 0.007295987103134394, + "loss_iou": 0.7421875, + "loss_num": 0.02734375, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 34866112, + "step": 521 + }, + { + "epoch": 0.059234042553191486, + "grad_norm": 27.14364242553711, + "learning_rate": 5e-05, + "loss": 1.9442, + "num_input_tokens_seen": 34933136, + "step": 522 + }, + { + "epoch": 0.059234042553191486, + "loss": 2.0798401832580566, + "loss_ce": 0.0056213499046862125, + "loss_iou": 0.890625, + "loss_num": 0.058837890625, + "loss_xval": 2.078125, + "num_input_tokens_seen": 34933136, + "step": 522 + }, + { + "epoch": 0.05934751773049645, + "grad_norm": 23.030364990234375, + "learning_rate": 5e-05, + "loss": 1.7259, + "num_input_tokens_seen": 35000800, + "step": 523 + }, + { + "epoch": 0.05934751773049645, + "loss": 1.7205592393875122, + "loss_ce": 0.004738873336464167, + "loss_iou": 0.76171875, + "loss_num": 0.038330078125, + "loss_xval": 1.71875, + "num_input_tokens_seen": 35000800, + "step": 523 + }, + { + "epoch": 0.05946099290780142, + "grad_norm": 35.709556579589844, + "learning_rate": 5e-05, + "loss": 1.8271, + "num_input_tokens_seen": 35067688, + "step": 524 + }, + { + "epoch": 0.05946099290780142, + "loss": 1.7660861015319824, + "loss_ce": 0.005343875847756863, + "loss_iou": 0.7578125, + "loss_num": 0.048583984375, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 35067688, + "step": 524 + }, + { + "epoch": 0.059574468085106386, + "grad_norm": 13.621169090270996, + "learning_rate": 5e-05, + "loss": 1.9801, + "num_input_tokens_seen": 35134988, + "step": 525 + }, + { + "epoch": 0.059574468085106386, + "loss": 1.947714924812317, + "loss_ce": 0.004355520009994507, + "loss_iou": 0.87109375, + "loss_num": 0.0400390625, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 35134988, + "step": 525 + }, + { + "epoch": 0.05968794326241135, + "grad_norm": 25.50764274597168, + "learning_rate": 5e-05, + "loss": 1.6889, + "num_input_tokens_seen": 35201876, + "step": 526 + }, + { + "epoch": 0.05968794326241135, + "loss": 1.7652301788330078, + "loss_ce": 0.0025348812341690063, + "loss_iou": 0.77734375, + "loss_num": 0.041015625, + "loss_xval": 1.765625, + "num_input_tokens_seen": 35201876, + "step": 526 + }, + { + "epoch": 0.059801418439716314, + "grad_norm": 16.913545608520508, + "learning_rate": 5e-05, + "loss": 1.5122, + "num_input_tokens_seen": 35268312, + "step": 527 + }, + { + "epoch": 0.059801418439716314, + "loss": 1.579519510269165, + "loss_ce": 0.010183566249907017, + "loss_iou": 0.6796875, + "loss_num": 0.042724609375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 35268312, + "step": 527 + }, + { + "epoch": 0.05991489361702128, + "grad_norm": 19.254709243774414, + "learning_rate": 5e-05, + "loss": 1.7794, + "num_input_tokens_seen": 35335636, + "step": 528 + }, + { + "epoch": 0.05991489361702128, + "loss": 1.7847877740859985, + "loss_ce": 0.005490846466273069, + "loss_iou": 0.8046875, + "loss_num": 0.03369140625, + "loss_xval": 1.78125, + "num_input_tokens_seen": 35335636, + "step": 528 + }, + { + "epoch": 0.06002836879432624, + "grad_norm": 19.000782012939453, + "learning_rate": 5e-05, + "loss": 1.7019, + "num_input_tokens_seen": 35403144, + "step": 529 + }, + { + "epoch": 0.06002836879432624, + "loss": 1.6729457378387451, + "loss_ce": 0.003023950383067131, + "loss_iou": 0.71484375, + "loss_num": 0.04833984375, + "loss_xval": 1.671875, + "num_input_tokens_seen": 35403144, + "step": 529 + }, + { + "epoch": 0.060141843971631206, + "grad_norm": 23.39441680908203, + "learning_rate": 5e-05, + "loss": 1.596, + "num_input_tokens_seen": 35469976, + "step": 530 + }, + { + "epoch": 0.060141843971631206, + "loss": 1.753605842590332, + "loss_ce": 0.010441720485687256, + "loss_iou": 0.7734375, + "loss_num": 0.039306640625, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 35469976, + "step": 530 + }, + { + "epoch": 0.06025531914893617, + "grad_norm": 20.822715759277344, + "learning_rate": 5e-05, + "loss": 1.7138, + "num_input_tokens_seen": 35536292, + "step": 531 + }, + { + "epoch": 0.06025531914893617, + "loss": 1.4097504615783691, + "loss_ce": 0.00740666501224041, + "loss_iou": 0.60546875, + "loss_num": 0.03759765625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 35536292, + "step": 531 + }, + { + "epoch": 0.060368794326241135, + "grad_norm": 20.405210494995117, + "learning_rate": 5e-05, + "loss": 1.6056, + "num_input_tokens_seen": 35603836, + "step": 532 + }, + { + "epoch": 0.060368794326241135, + "loss": 1.6589752435684204, + "loss_ce": 0.0027252279687672853, + "loss_iou": 0.75390625, + "loss_num": 0.0302734375, + "loss_xval": 1.65625, + "num_input_tokens_seen": 35603836, + "step": 532 + }, + { + "epoch": 0.0604822695035461, + "grad_norm": 18.58323097229004, + "learning_rate": 5e-05, + "loss": 1.9152, + "num_input_tokens_seen": 35670728, + "step": 533 + }, + { + "epoch": 0.0604822695035461, + "loss": 1.8888335227966309, + "loss_ce": 0.004067913629114628, + "loss_iou": 0.8125, + "loss_num": 0.0517578125, + "loss_xval": 1.8828125, + "num_input_tokens_seen": 35670728, + "step": 533 + }, + { + "epoch": 0.06059574468085106, + "grad_norm": 25.258344650268555, + "learning_rate": 5e-05, + "loss": 1.8273, + "num_input_tokens_seen": 35737436, + "step": 534 + }, + { + "epoch": 0.06059574468085106, + "loss": 1.7773313522338867, + "loss_ce": 0.004870472941547632, + "loss_iou": 0.7578125, + "loss_num": 0.0517578125, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 35737436, + "step": 534 + }, + { + "epoch": 0.06070921985815603, + "grad_norm": 20.252824783325195, + "learning_rate": 5e-05, + "loss": 1.8809, + "num_input_tokens_seen": 35804404, + "step": 535 + }, + { + "epoch": 0.06070921985815603, + "loss": 1.8921219110488892, + "loss_ce": 0.006257691420614719, + "loss_iou": 0.8046875, + "loss_num": 0.056396484375, + "loss_xval": 1.8828125, + "num_input_tokens_seen": 35804404, + "step": 535 + }, + { + "epoch": 0.06082269503546099, + "grad_norm": 14.86203670501709, + "learning_rate": 5e-05, + "loss": 1.7341, + "num_input_tokens_seen": 35871276, + "step": 536 + }, + { + "epoch": 0.06082269503546099, + "loss": 1.5810725688934326, + "loss_ce": 0.003924145363271236, + "loss_iou": 0.71875, + "loss_num": 0.0279541015625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 35871276, + "step": 536 + }, + { + "epoch": 0.060936170212765955, + "grad_norm": 14.194769859313965, + "learning_rate": 5e-05, + "loss": 1.5622, + "num_input_tokens_seen": 35938012, + "step": 537 + }, + { + "epoch": 0.060936170212765955, + "loss": 1.627314567565918, + "loss_ce": 0.0013379884185269475, + "loss_iou": 0.7265625, + "loss_num": 0.0341796875, + "loss_xval": 1.625, + "num_input_tokens_seen": 35938012, + "step": 537 + }, + { + "epoch": 0.06104964539007092, + "grad_norm": 17.0379581451416, + "learning_rate": 5e-05, + "loss": 1.5731, + "num_input_tokens_seen": 36003924, + "step": 538 + }, + { + "epoch": 0.06104964539007092, + "loss": 1.7681517601013184, + "loss_ce": 0.007409567013382912, + "loss_iou": 0.734375, + "loss_num": 0.05859375, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 36003924, + "step": 538 + }, + { + "epoch": 0.061163120567375884, + "grad_norm": 20.948001861572266, + "learning_rate": 5e-05, + "loss": 1.908, + "num_input_tokens_seen": 36071564, + "step": 539 + }, + { + "epoch": 0.061163120567375884, + "loss": 2.020449638366699, + "loss_ce": 0.005801196675747633, + "loss_iou": 0.9140625, + "loss_num": 0.03759765625, + "loss_xval": 2.015625, + "num_input_tokens_seen": 36071564, + "step": 539 + }, + { + "epoch": 0.06127659574468085, + "grad_norm": 20.01818084716797, + "learning_rate": 5e-05, + "loss": 1.7123, + "num_input_tokens_seen": 36138760, + "step": 540 + }, + { + "epoch": 0.06127659574468085, + "loss": 1.7278000116348267, + "loss_ce": 0.0031906291842460632, + "loss_iou": 0.77734375, + "loss_num": 0.033447265625, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 36138760, + "step": 540 + }, + { + "epoch": 0.06139007092198582, + "grad_norm": 39.919002532958984, + "learning_rate": 5e-05, + "loss": 1.9079, + "num_input_tokens_seen": 36206164, + "step": 541 + }, + { + "epoch": 0.06139007092198582, + "loss": 1.961904764175415, + "loss_ce": 0.006826545111835003, + "loss_iou": 0.87109375, + "loss_num": 0.042724609375, + "loss_xval": 1.953125, + "num_input_tokens_seen": 36206164, + "step": 541 + }, + { + "epoch": 0.06150354609929078, + "grad_norm": 11.24838924407959, + "learning_rate": 5e-05, + "loss": 2.1925, + "num_input_tokens_seen": 36274936, + "step": 542 + }, + { + "epoch": 0.06150354609929078, + "loss": 2.2931246757507324, + "loss_ce": 0.0021091499365866184, + "loss_iou": 0.98046875, + "loss_num": 0.0654296875, + "loss_xval": 2.296875, + "num_input_tokens_seen": 36274936, + "step": 542 + }, + { + "epoch": 0.06161702127659575, + "grad_norm": 14.300247192382812, + "learning_rate": 5e-05, + "loss": 1.8586, + "num_input_tokens_seen": 36342100, + "step": 543 + }, + { + "epoch": 0.06161702127659575, + "loss": 1.840743064880371, + "loss_ce": 0.007491080090403557, + "loss_iou": 0.8046875, + "loss_num": 0.045166015625, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 36342100, + "step": 543 + }, + { + "epoch": 0.06173049645390071, + "grad_norm": 20.419960021972656, + "learning_rate": 5e-05, + "loss": 1.7275, + "num_input_tokens_seen": 36408516, + "step": 544 + }, + { + "epoch": 0.06173049645390071, + "loss": 1.734588623046875, + "loss_ce": 0.005096470471471548, + "loss_iou": 0.765625, + "loss_num": 0.04052734375, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 36408516, + "step": 544 + }, + { + "epoch": 0.061843971631205676, + "grad_norm": 17.53101348876953, + "learning_rate": 5e-05, + "loss": 1.5373, + "num_input_tokens_seen": 36475432, + "step": 545 + }, + { + "epoch": 0.061843971631205676, + "loss": 1.191654086112976, + "loss_ce": 0.003543720580637455, + "loss_iou": 0.5234375, + "loss_num": 0.027587890625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 36475432, + "step": 545 + }, + { + "epoch": 0.06195744680851064, + "grad_norm": 22.710166931152344, + "learning_rate": 5e-05, + "loss": 1.4326, + "num_input_tokens_seen": 36541316, + "step": 546 + }, + { + "epoch": 0.06195744680851064, + "loss": 1.3182244300842285, + "loss_ce": 0.0076775928027927876, + "loss_iou": 0.5859375, + "loss_num": 0.02783203125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 36541316, + "step": 546 + }, + { + "epoch": 0.062070921985815604, + "grad_norm": 34.3032341003418, + "learning_rate": 5e-05, + "loss": 1.981, + "num_input_tokens_seen": 36608312, + "step": 547 + }, + { + "epoch": 0.062070921985815604, + "loss": 1.8957520723342896, + "loss_ce": 0.0067140525206923485, + "loss_iou": 0.80859375, + "loss_num": 0.0546875, + "loss_xval": 1.890625, + "num_input_tokens_seen": 36608312, + "step": 547 + }, + { + "epoch": 0.06218439716312057, + "grad_norm": 17.283754348754883, + "learning_rate": 5e-05, + "loss": 1.9001, + "num_input_tokens_seen": 36675056, + "step": 548 + }, + { + "epoch": 0.06218439716312057, + "loss": 1.8998531103134155, + "loss_ce": 0.004345144610852003, + "loss_iou": 0.8203125, + "loss_num": 0.051025390625, + "loss_xval": 1.8984375, + "num_input_tokens_seen": 36675056, + "step": 548 + }, + { + "epoch": 0.06229787234042553, + "grad_norm": 16.461484909057617, + "learning_rate": 5e-05, + "loss": 1.768, + "num_input_tokens_seen": 36742528, + "step": 549 + }, + { + "epoch": 0.06229787234042553, + "loss": 1.811762809753418, + "loss_ce": 0.007075282745063305, + "loss_iou": 0.796875, + "loss_num": 0.041748046875, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 36742528, + "step": 549 + }, + { + "epoch": 0.062411347517730496, + "grad_norm": 12.02273178100586, + "learning_rate": 5e-05, + "loss": 1.6257, + "num_input_tokens_seen": 36809732, + "step": 550 + }, + { + "epoch": 0.062411347517730496, + "loss": 1.6335277557373047, + "loss_ce": 0.004621502477675676, + "loss_iou": 0.71875, + "loss_num": 0.0380859375, + "loss_xval": 1.625, + "num_input_tokens_seen": 36809732, + "step": 550 + }, + { + "epoch": 0.06252482269503547, + "grad_norm": 24.176513671875, + "learning_rate": 5e-05, + "loss": 1.7811, + "num_input_tokens_seen": 36876920, + "step": 551 + }, + { + "epoch": 0.06252482269503547, + "loss": 1.7633376121520996, + "loss_ce": 0.007478253450244665, + "loss_iou": 0.78515625, + "loss_num": 0.036865234375, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 36876920, + "step": 551 + }, + { + "epoch": 0.06263829787234043, + "grad_norm": 30.01481819152832, + "learning_rate": 5e-05, + "loss": 1.7696, + "num_input_tokens_seen": 36944688, + "step": 552 + }, + { + "epoch": 0.06263829787234043, + "loss": 1.6620619297027588, + "loss_ce": 0.004835264757275581, + "loss_iou": 0.7421875, + "loss_num": 0.034912109375, + "loss_xval": 1.65625, + "num_input_tokens_seen": 36944688, + "step": 552 + }, + { + "epoch": 0.0627517730496454, + "grad_norm": 34.17885208129883, + "learning_rate": 5e-05, + "loss": 1.9317, + "num_input_tokens_seen": 37011852, + "step": 553 + }, + { + "epoch": 0.0627517730496454, + "loss": 1.9158542156219482, + "loss_ce": 0.0037448746152222157, + "loss_iou": 0.8671875, + "loss_num": 0.036376953125, + "loss_xval": 1.9140625, + "num_input_tokens_seen": 37011852, + "step": 553 + }, + { + "epoch": 0.06286524822695036, + "grad_norm": 12.19680118560791, + "learning_rate": 5e-05, + "loss": 1.5668, + "num_input_tokens_seen": 37078128, + "step": 554 + }, + { + "epoch": 0.06286524822695036, + "loss": 1.4565728902816772, + "loss_ce": 0.00643862085416913, + "loss_iou": 0.62890625, + "loss_num": 0.037841796875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 37078128, + "step": 554 + }, + { + "epoch": 0.06297872340425532, + "grad_norm": 10.264986991882324, + "learning_rate": 5e-05, + "loss": 1.5638, + "num_input_tokens_seen": 37145788, + "step": 555 + }, + { + "epoch": 0.06297872340425532, + "loss": 1.6320509910583496, + "loss_ce": 0.006074397824704647, + "loss_iou": 0.703125, + "loss_num": 0.04345703125, + "loss_xval": 1.625, + "num_input_tokens_seen": 37145788, + "step": 555 + }, + { + "epoch": 0.06309219858156029, + "grad_norm": 17.436941146850586, + "learning_rate": 5e-05, + "loss": 1.4833, + "num_input_tokens_seen": 37212564, + "step": 556 + }, + { + "epoch": 0.06309219858156029, + "loss": 1.5139812231063843, + "loss_ce": 0.0032389964908361435, + "loss_iou": 0.6796875, + "loss_num": 0.029541015625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 37212564, + "step": 556 + }, + { + "epoch": 0.06320567375886525, + "grad_norm": 37.04698944091797, + "learning_rate": 5e-05, + "loss": 1.9496, + "num_input_tokens_seen": 37279736, + "step": 557 + }, + { + "epoch": 0.06320567375886525, + "loss": 2.0118601322174072, + "loss_ce": 0.006977184675633907, + "loss_iou": 0.890625, + "loss_num": 0.04541015625, + "loss_xval": 2.0, + "num_input_tokens_seen": 37279736, + "step": 557 + }, + { + "epoch": 0.06331914893617022, + "grad_norm": 16.589229583740234, + "learning_rate": 5e-05, + "loss": 1.7451, + "num_input_tokens_seen": 37346948, + "step": 558 + }, + { + "epoch": 0.06331914893617022, + "loss": 1.6826958656311035, + "loss_ce": 0.0030082426965236664, + "loss_iou": 0.69921875, + "loss_num": 0.05615234375, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 37346948, + "step": 558 + }, + { + "epoch": 0.06343262411347518, + "grad_norm": 13.61721420288086, + "learning_rate": 5e-05, + "loss": 1.6473, + "num_input_tokens_seen": 37414164, + "step": 559 + }, + { + "epoch": 0.06343262411347518, + "loss": 1.6174900531768799, + "loss_ce": 0.004208875820040703, + "loss_iou": 0.6640625, + "loss_num": 0.056640625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 37414164, + "step": 559 + }, + { + "epoch": 0.06354609929078014, + "grad_norm": 35.18513488769531, + "learning_rate": 5e-05, + "loss": 1.4045, + "num_input_tokens_seen": 37479356, + "step": 560 + }, + { + "epoch": 0.06354609929078014, + "loss": 1.2530691623687744, + "loss_ce": 0.0068533592857420444, + "loss_iou": 0.55859375, + "loss_num": 0.0263671875, + "loss_xval": 1.25, + "num_input_tokens_seen": 37479356, + "step": 560 + }, + { + "epoch": 0.06365957446808511, + "grad_norm": 12.840065956115723, + "learning_rate": 5e-05, + "loss": 2.0335, + "num_input_tokens_seen": 37547240, + "step": 561 + }, + { + "epoch": 0.06365957446808511, + "loss": 2.186563014984131, + "loss_ce": 0.006875517778098583, + "loss_iou": 0.9140625, + "loss_num": 0.06982421875, + "loss_xval": 2.1875, + "num_input_tokens_seen": 37547240, + "step": 561 + }, + { + "epoch": 0.06377304964539007, + "grad_norm": 16.3641357421875, + "learning_rate": 5e-05, + "loss": 1.7715, + "num_input_tokens_seen": 37614288, + "step": 562 + }, + { + "epoch": 0.06377304964539007, + "loss": 1.605953335762024, + "loss_ce": 0.0034142928197979927, + "loss_iou": 0.71875, + "loss_num": 0.033935546875, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 37614288, + "step": 562 + }, + { + "epoch": 0.06388652482269504, + "grad_norm": 17.587444305419922, + "learning_rate": 5e-05, + "loss": 1.5267, + "num_input_tokens_seen": 37681012, + "step": 563 + }, + { + "epoch": 0.06388652482269504, + "loss": 1.5818374156951904, + "loss_ce": 0.012013217434287071, + "loss_iou": 0.68359375, + "loss_num": 0.04052734375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 37681012, + "step": 563 + }, + { + "epoch": 0.064, + "grad_norm": 22.08576011657715, + "learning_rate": 5e-05, + "loss": 1.5907, + "num_input_tokens_seen": 37748060, + "step": 564 + }, + { + "epoch": 0.064, + "loss": 1.6009418964385986, + "loss_ce": 0.004140220582485199, + "loss_iou": 0.671875, + "loss_num": 0.050048828125, + "loss_xval": 1.59375, + "num_input_tokens_seen": 37748060, + "step": 564 + }, + { + "epoch": 0.06411347517730497, + "grad_norm": 17.273958206176758, + "learning_rate": 5e-05, + "loss": 1.6593, + "num_input_tokens_seen": 37815040, + "step": 565 + }, + { + "epoch": 0.06411347517730497, + "loss": 1.7452912330627441, + "loss_ce": 0.0099396463483572, + "loss_iou": 0.7578125, + "loss_num": 0.04345703125, + "loss_xval": 1.734375, + "num_input_tokens_seen": 37815040, + "step": 565 + }, + { + "epoch": 0.06422695035460993, + "grad_norm": 31.871431350708008, + "learning_rate": 5e-05, + "loss": 1.6696, + "num_input_tokens_seen": 37882260, + "step": 566 + }, + { + "epoch": 0.06422695035460993, + "loss": 1.651158332824707, + "loss_ce": 0.002110453322529793, + "loss_iou": 0.70703125, + "loss_num": 0.047607421875, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 37882260, + "step": 566 + }, + { + "epoch": 0.0643404255319149, + "grad_norm": 17.15639305114746, + "learning_rate": 5e-05, + "loss": 1.9853, + "num_input_tokens_seen": 37950128, + "step": 567 + }, + { + "epoch": 0.0643404255319149, + "loss": 2.0380375385284424, + "loss_ce": 0.005811103619635105, + "loss_iou": 0.875, + "loss_num": 0.05712890625, + "loss_xval": 2.03125, + "num_input_tokens_seen": 37950128, + "step": 567 + }, + { + "epoch": 0.06445390070921986, + "grad_norm": 19.684322357177734, + "learning_rate": 5e-05, + "loss": 1.5924, + "num_input_tokens_seen": 38017920, + "step": 568 + }, + { + "epoch": 0.06445390070921986, + "loss": 1.5636759996414185, + "loss_ce": 0.007035387214273214, + "loss_iou": 0.6875, + "loss_num": 0.035400390625, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 38017920, + "step": 568 + }, + { + "epoch": 0.06456737588652482, + "grad_norm": 28.268169403076172, + "learning_rate": 5e-05, + "loss": 1.8215, + "num_input_tokens_seen": 38085340, + "step": 569 + }, + { + "epoch": 0.06456737588652482, + "loss": 1.748263955116272, + "loss_ce": 0.004123330116271973, + "loss_iou": 0.765625, + "loss_num": 0.042236328125, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 38085340, + "step": 569 + }, + { + "epoch": 0.06468085106382979, + "grad_norm": 18.407001495361328, + "learning_rate": 5e-05, + "loss": 1.8403, + "num_input_tokens_seen": 38152512, + "step": 570 + }, + { + "epoch": 0.06468085106382979, + "loss": 1.785724401473999, + "loss_ce": 0.002521247137337923, + "loss_iou": 0.80859375, + "loss_num": 0.033447265625, + "loss_xval": 1.78125, + "num_input_tokens_seen": 38152512, + "step": 570 + }, + { + "epoch": 0.06479432624113475, + "grad_norm": 11.4957914352417, + "learning_rate": 5e-05, + "loss": 1.6404, + "num_input_tokens_seen": 38219120, + "step": 571 + }, + { + "epoch": 0.06479432624113475, + "loss": 1.5621229410171509, + "loss_ce": 0.0035292310640215874, + "loss_iou": 0.703125, + "loss_num": 0.030517578125, + "loss_xval": 1.5625, + "num_input_tokens_seen": 38219120, + "step": 571 + }, + { + "epoch": 0.06490780141843971, + "grad_norm": 13.600722312927246, + "learning_rate": 5e-05, + "loss": 1.5701, + "num_input_tokens_seen": 38286396, + "step": 572 + }, + { + "epoch": 0.06490780141843971, + "loss": 1.454299807548523, + "loss_ce": 0.00703424634411931, + "loss_iou": 0.6484375, + "loss_num": 0.02978515625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 38286396, + "step": 572 + }, + { + "epoch": 0.06502127659574468, + "grad_norm": 10.596799850463867, + "learning_rate": 5e-05, + "loss": 1.5276, + "num_input_tokens_seen": 38353584, + "step": 573 + }, + { + "epoch": 0.06502127659574468, + "loss": 1.7743680477142334, + "loss_ce": 0.002883702050894499, + "loss_iou": 0.77734375, + "loss_num": 0.043701171875, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 38353584, + "step": 573 + }, + { + "epoch": 0.06513475177304964, + "grad_norm": 9.629573822021484, + "learning_rate": 5e-05, + "loss": 1.4846, + "num_input_tokens_seen": 38419416, + "step": 574 + }, + { + "epoch": 0.06513475177304964, + "loss": 1.2836133241653442, + "loss_ce": 0.002790600759908557, + "loss_iou": 0.57421875, + "loss_num": 0.0269775390625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 38419416, + "step": 574 + }, + { + "epoch": 0.06524822695035461, + "grad_norm": 26.703699111938477, + "learning_rate": 5e-05, + "loss": 1.5657, + "num_input_tokens_seen": 38485880, + "step": 575 + }, + { + "epoch": 0.06524822695035461, + "loss": 1.5611311197280884, + "loss_ce": 0.004490496125072241, + "loss_iou": 0.68359375, + "loss_num": 0.037841796875, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 38485880, + "step": 575 + }, + { + "epoch": 0.06536170212765957, + "grad_norm": 20.315994262695312, + "learning_rate": 5e-05, + "loss": 1.6256, + "num_input_tokens_seen": 38552840, + "step": 576 + }, + { + "epoch": 0.06536170212765957, + "loss": 1.717169165611267, + "loss_ce": 0.005682292860001326, + "loss_iou": 0.71875, + "loss_num": 0.0556640625, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 38552840, + "step": 576 + }, + { + "epoch": 0.06547517730496454, + "grad_norm": 31.237037658691406, + "learning_rate": 5e-05, + "loss": 1.6148, + "num_input_tokens_seen": 38619208, + "step": 577 + }, + { + "epoch": 0.06547517730496454, + "loss": 1.52614426612854, + "loss_ce": 0.0034392906818538904, + "loss_iou": 0.67578125, + "loss_num": 0.033935546875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 38619208, + "step": 577 + }, + { + "epoch": 0.0655886524822695, + "grad_norm": 15.365677833557129, + "learning_rate": 5e-05, + "loss": 1.9115, + "num_input_tokens_seen": 38686140, + "step": 578 + }, + { + "epoch": 0.0655886524822695, + "loss": 1.838338017463684, + "loss_ce": 0.0024004983715713024, + "loss_iou": 0.80859375, + "loss_num": 0.04296875, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 38686140, + "step": 578 + }, + { + "epoch": 0.06570212765957446, + "grad_norm": 15.013236045837402, + "learning_rate": 5e-05, + "loss": 1.4891, + "num_input_tokens_seen": 38752384, + "step": 579 + }, + { + "epoch": 0.06570212765957446, + "loss": 1.4565564393997192, + "loss_ce": 0.00245487317442894, + "loss_iou": 0.65234375, + "loss_num": 0.0299072265625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 38752384, + "step": 579 + }, + { + "epoch": 0.06581560283687943, + "grad_norm": 21.928831100463867, + "learning_rate": 5e-05, + "loss": 1.5883, + "num_input_tokens_seen": 38818528, + "step": 580 + }, + { + "epoch": 0.06581560283687943, + "loss": 1.53806471824646, + "loss_ce": 0.007791226264089346, + "loss_iou": 0.66796875, + "loss_num": 0.0390625, + "loss_xval": 1.53125, + "num_input_tokens_seen": 38818528, + "step": 580 + }, + { + "epoch": 0.06592907801418439, + "grad_norm": 16.580617904663086, + "learning_rate": 5e-05, + "loss": 1.7829, + "num_input_tokens_seen": 38884760, + "step": 581 + }, + { + "epoch": 0.06592907801418439, + "loss": 1.5970311164855957, + "loss_ce": 0.006210782565176487, + "loss_iou": 0.66015625, + "loss_num": 0.053466796875, + "loss_xval": 1.59375, + "num_input_tokens_seen": 38884760, + "step": 581 + }, + { + "epoch": 0.06604255319148936, + "grad_norm": 42.71949005126953, + "learning_rate": 5e-05, + "loss": 1.5962, + "num_input_tokens_seen": 38952592, + "step": 582 + }, + { + "epoch": 0.06604255319148936, + "loss": 1.6867098808288574, + "loss_ce": 0.003116261214017868, + "loss_iou": 0.7265625, + "loss_num": 0.04638671875, + "loss_xval": 1.6875, + "num_input_tokens_seen": 38952592, + "step": 582 + }, + { + "epoch": 0.06615602836879432, + "grad_norm": 42.831298828125, + "learning_rate": 5e-05, + "loss": 1.8295, + "num_input_tokens_seen": 39021236, + "step": 583 + }, + { + "epoch": 0.06615602836879432, + "loss": 1.6382262706756592, + "loss_ce": 0.004437285475432873, + "loss_iou": 0.73828125, + "loss_num": 0.031005859375, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 39021236, + "step": 583 + }, + { + "epoch": 0.06626950354609928, + "grad_norm": 14.004083633422852, + "learning_rate": 5e-05, + "loss": 1.8068, + "num_input_tokens_seen": 39087432, + "step": 584 + }, + { + "epoch": 0.06626950354609928, + "loss": 1.7816723585128784, + "loss_ce": 0.00725825410336256, + "loss_iou": 0.7734375, + "loss_num": 0.044921875, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 39087432, + "step": 584 + }, + { + "epoch": 0.06638297872340425, + "grad_norm": 14.19130802154541, + "learning_rate": 5e-05, + "loss": 1.5991, + "num_input_tokens_seen": 39153868, + "step": 585 + }, + { + "epoch": 0.06638297872340425, + "loss": 1.954810619354248, + "loss_ce": 0.006568385753780603, + "loss_iou": 0.82421875, + "loss_num": 0.0595703125, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 39153868, + "step": 585 + }, + { + "epoch": 0.06649645390070923, + "grad_norm": 27.09761619567871, + "learning_rate": 5e-05, + "loss": 1.4602, + "num_input_tokens_seen": 39219928, + "step": 586 + }, + { + "epoch": 0.06649645390070923, + "loss": 1.6036757230758667, + "loss_ce": 0.00553122628480196, + "loss_iou": 0.6875, + "loss_num": 0.044921875, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 39219928, + "step": 586 + }, + { + "epoch": 0.06660992907801419, + "grad_norm": 17.1959285736084, + "learning_rate": 5e-05, + "loss": 1.8165, + "num_input_tokens_seen": 39287044, + "step": 587 + }, + { + "epoch": 0.06660992907801419, + "loss": 1.8531060218811035, + "loss_ce": 0.0054498109966516495, + "loss_iou": 0.80859375, + "loss_num": 0.046142578125, + "loss_xval": 1.84375, + "num_input_tokens_seen": 39287044, + "step": 587 + }, + { + "epoch": 0.06672340425531915, + "grad_norm": 12.897171020507812, + "learning_rate": 5e-05, + "loss": 1.603, + "num_input_tokens_seen": 39355280, + "step": 588 + }, + { + "epoch": 0.06672340425531915, + "loss": 1.6399366855621338, + "loss_ce": 0.0071242013946175575, + "loss_iou": 0.734375, + "loss_num": 0.032470703125, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 39355280, + "step": 588 + }, + { + "epoch": 0.06683687943262412, + "grad_norm": 21.204404830932617, + "learning_rate": 5e-05, + "loss": 1.5218, + "num_input_tokens_seen": 39422348, + "step": 589 + }, + { + "epoch": 0.06683687943262412, + "loss": 1.488194465637207, + "loss_ce": 0.0033312716986984015, + "loss_iou": 0.66015625, + "loss_num": 0.033203125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 39422348, + "step": 589 + }, + { + "epoch": 0.06695035460992908, + "grad_norm": 18.303844451904297, + "learning_rate": 5e-05, + "loss": 1.7793, + "num_input_tokens_seen": 39490508, + "step": 590 + }, + { + "epoch": 0.06695035460992908, + "loss": 1.6556947231292725, + "loss_ce": 0.008233844302594662, + "loss_iou": 0.7265625, + "loss_num": 0.0380859375, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 39490508, + "step": 590 + }, + { + "epoch": 0.06706382978723405, + "grad_norm": 14.349313735961914, + "learning_rate": 5e-05, + "loss": 1.5069, + "num_input_tokens_seen": 39558164, + "step": 591 + }, + { + "epoch": 0.06706382978723405, + "loss": 1.7117242813110352, + "loss_ce": 0.0066461022943258286, + "loss_iou": 0.72265625, + "loss_num": 0.052001953125, + "loss_xval": 1.703125, + "num_input_tokens_seen": 39558164, + "step": 591 + }, + { + "epoch": 0.06717730496453901, + "grad_norm": 21.23827362060547, + "learning_rate": 5e-05, + "loss": 1.5846, + "num_input_tokens_seen": 39625428, + "step": 592 + }, + { + "epoch": 0.06717730496453901, + "loss": 1.6359304189682007, + "loss_ce": 0.008977305144071579, + "loss_iou": 0.7109375, + "loss_num": 0.04150390625, + "loss_xval": 1.625, + "num_input_tokens_seen": 39625428, + "step": 592 + }, + { + "epoch": 0.06729078014184398, + "grad_norm": 31.057449340820312, + "learning_rate": 5e-05, + "loss": 1.977, + "num_input_tokens_seen": 39691844, + "step": 593 + }, + { + "epoch": 0.06729078014184398, + "loss": 2.0832417011260986, + "loss_ce": 0.005116683430969715, + "loss_iou": 0.94921875, + "loss_num": 0.035888671875, + "loss_xval": 2.078125, + "num_input_tokens_seen": 39691844, + "step": 593 + }, + { + "epoch": 0.06740425531914894, + "grad_norm": 12.890153884887695, + "learning_rate": 5e-05, + "loss": 1.8426, + "num_input_tokens_seen": 39759296, + "step": 594 + }, + { + "epoch": 0.06740425531914894, + "loss": 1.6216418743133545, + "loss_ce": 0.007384100928902626, + "loss_iou": 0.67578125, + "loss_num": 0.05224609375, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 39759296, + "step": 594 + }, + { + "epoch": 0.0675177304964539, + "grad_norm": 24.261539459228516, + "learning_rate": 5e-05, + "loss": 1.5583, + "num_input_tokens_seen": 39826336, + "step": 595 + }, + { + "epoch": 0.0675177304964539, + "loss": 1.448413372039795, + "loss_ce": 0.005176061298698187, + "loss_iou": 0.61328125, + "loss_num": 0.04248046875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 39826336, + "step": 595 + }, + { + "epoch": 0.06763120567375887, + "grad_norm": 29.720355987548828, + "learning_rate": 5e-05, + "loss": 1.5918, + "num_input_tokens_seen": 39894040, + "step": 596 + }, + { + "epoch": 0.06763120567375887, + "loss": 1.692023754119873, + "loss_ce": 0.004523801617324352, + "loss_iou": 0.7265625, + "loss_num": 0.046875, + "loss_xval": 1.6875, + "num_input_tokens_seen": 39894040, + "step": 596 + }, + { + "epoch": 0.06774468085106383, + "grad_norm": 19.356027603149414, + "learning_rate": 5e-05, + "loss": 2.1152, + "num_input_tokens_seen": 39960728, + "step": 597 + }, + { + "epoch": 0.06774468085106383, + "loss": 1.98872971534729, + "loss_ce": 0.0033782534301280975, + "loss_iou": 0.88671875, + "loss_num": 0.042236328125, + "loss_xval": 1.984375, + "num_input_tokens_seen": 39960728, + "step": 597 + }, + { + "epoch": 0.0678581560283688, + "grad_norm": 28.932636260986328, + "learning_rate": 5e-05, + "loss": 1.647, + "num_input_tokens_seen": 40028548, + "step": 598 + }, + { + "epoch": 0.0678581560283688, + "loss": 1.6793391704559326, + "loss_ce": 0.005510909017175436, + "loss_iou": 0.703125, + "loss_num": 0.053466796875, + "loss_xval": 1.671875, + "num_input_tokens_seen": 40028548, + "step": 598 + }, + { + "epoch": 0.06797163120567376, + "grad_norm": 200.7366485595703, + "learning_rate": 5e-05, + "loss": 1.7279, + "num_input_tokens_seen": 40096236, + "step": 599 + }, + { + "epoch": 0.06797163120567376, + "loss": 1.7584202289581299, + "loss_ce": 0.003537369892001152, + "loss_iou": 0.765625, + "loss_num": 0.044677734375, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 40096236, + "step": 599 + }, + { + "epoch": 0.06808510638297872, + "grad_norm": 17.00959587097168, + "learning_rate": 5e-05, + "loss": 1.9907, + "num_input_tokens_seen": 40164748, + "step": 600 + }, + { + "epoch": 0.06808510638297872, + "loss": 2.033651113510132, + "loss_ce": 0.004354337230324745, + "loss_iou": 0.87109375, + "loss_num": 0.05712890625, + "loss_xval": 2.03125, + "num_input_tokens_seen": 40164748, + "step": 600 + }, + { + "epoch": 0.06819858156028369, + "grad_norm": 14.241485595703125, + "learning_rate": 5e-05, + "loss": 1.3223, + "num_input_tokens_seen": 40230628, + "step": 601 + }, + { + "epoch": 0.06819858156028369, + "loss": 1.3839753866195679, + "loss_ce": 0.001651229104027152, + "loss_iou": 0.6328125, + "loss_num": 0.0235595703125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 40230628, + "step": 601 + }, + { + "epoch": 0.06831205673758865, + "grad_norm": 19.954608917236328, + "learning_rate": 5e-05, + "loss": 1.6682, + "num_input_tokens_seen": 40297984, + "step": 602 + }, + { + "epoch": 0.06831205673758865, + "loss": 1.5718060731887817, + "loss_ce": 0.012235689908266068, + "loss_iou": 0.6953125, + "loss_num": 0.0341796875, + "loss_xval": 1.5625, + "num_input_tokens_seen": 40297984, + "step": 602 + }, + { + "epoch": 0.06842553191489362, + "grad_norm": 30.409299850463867, + "learning_rate": 5e-05, + "loss": 1.5074, + "num_input_tokens_seen": 40365284, + "step": 603 + }, + { + "epoch": 0.06842553191489362, + "loss": 1.48600435256958, + "loss_ce": 0.005535687319934368, + "loss_iou": 0.6484375, + "loss_num": 0.037109375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 40365284, + "step": 603 + }, + { + "epoch": 0.06853900709219858, + "grad_norm": 19.173152923583984, + "learning_rate": 5e-05, + "loss": 1.558, + "num_input_tokens_seen": 40432952, + "step": 604 + }, + { + "epoch": 0.06853900709219858, + "loss": 1.604048490524292, + "loss_ce": 0.004439132753759623, + "loss_iou": 0.71875, + "loss_num": 0.03173828125, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 40432952, + "step": 604 + }, + { + "epoch": 0.06865248226950355, + "grad_norm": 22.359840393066406, + "learning_rate": 5e-05, + "loss": 1.6639, + "num_input_tokens_seen": 40498860, + "step": 605 + }, + { + "epoch": 0.06865248226950355, + "loss": 1.4971808195114136, + "loss_ce": 0.004993320908397436, + "loss_iou": 0.625, + "loss_num": 0.049072265625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 40498860, + "step": 605 + }, + { + "epoch": 0.06876595744680851, + "grad_norm": 36.40019226074219, + "learning_rate": 5e-05, + "loss": 1.5656, + "num_input_tokens_seen": 40565664, + "step": 606 + }, + { + "epoch": 0.06876595744680851, + "loss": 1.381683588027954, + "loss_ce": 0.0037539247423410416, + "loss_iou": 0.61328125, + "loss_num": 0.029541015625, + "loss_xval": 1.375, + "num_input_tokens_seen": 40565664, + "step": 606 + }, + { + "epoch": 0.06887943262411347, + "grad_norm": 16.74361801147461, + "learning_rate": 5e-05, + "loss": 1.9768, + "num_input_tokens_seen": 40632388, + "step": 607 + }, + { + "epoch": 0.06887943262411347, + "loss": 1.9825143814086914, + "loss_ce": 0.005951808299869299, + "loss_iou": 0.84765625, + "loss_num": 0.056640625, + "loss_xval": 1.9765625, + "num_input_tokens_seen": 40632388, + "step": 607 + }, + { + "epoch": 0.06899290780141844, + "grad_norm": 19.146493911743164, + "learning_rate": 5e-05, + "loss": 1.6641, + "num_input_tokens_seen": 40699780, + "step": 608 + }, + { + "epoch": 0.06899290780141844, + "loss": 1.5728346109390259, + "loss_ce": 0.00838145986199379, + "loss_iou": 0.66796875, + "loss_num": 0.045166015625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 40699780, + "step": 608 + }, + { + "epoch": 0.0691063829787234, + "grad_norm": 46.978729248046875, + "learning_rate": 5e-05, + "loss": 1.8443, + "num_input_tokens_seen": 40767008, + "step": 609 + }, + { + "epoch": 0.0691063829787234, + "loss": 1.9258244037628174, + "loss_ce": 0.003949346020817757, + "loss_iou": 0.859375, + "loss_num": 0.041015625, + "loss_xval": 1.921875, + "num_input_tokens_seen": 40767008, + "step": 609 + }, + { + "epoch": 0.06921985815602837, + "grad_norm": 13.73018741607666, + "learning_rate": 5e-05, + "loss": 2.0167, + "num_input_tokens_seen": 40833984, + "step": 610 + }, + { + "epoch": 0.06921985815602837, + "loss": 1.966957449913025, + "loss_ce": 0.011879325844347477, + "loss_iou": 0.83984375, + "loss_num": 0.054443359375, + "loss_xval": 1.953125, + "num_input_tokens_seen": 40833984, + "step": 610 + }, + { + "epoch": 0.06933333333333333, + "grad_norm": 15.586463928222656, + "learning_rate": 5e-05, + "loss": 1.9009, + "num_input_tokens_seen": 40901004, + "step": 611 + }, + { + "epoch": 0.06933333333333333, + "loss": 2.065028667449951, + "loss_ce": 0.006434790324419737, + "loss_iou": 0.875, + "loss_num": 0.06298828125, + "loss_xval": 2.0625, + "num_input_tokens_seen": 40901004, + "step": 611 + }, + { + "epoch": 0.0694468085106383, + "grad_norm": 15.493803024291992, + "learning_rate": 5e-05, + "loss": 1.5776, + "num_input_tokens_seen": 40967072, + "step": 612 + }, + { + "epoch": 0.0694468085106383, + "loss": 1.6776156425476074, + "loss_ce": 0.007693830877542496, + "loss_iou": 0.7109375, + "loss_num": 0.050048828125, + "loss_xval": 1.671875, + "num_input_tokens_seen": 40967072, + "step": 612 + }, + { + "epoch": 0.06956028368794326, + "grad_norm": 17.19147300720215, + "learning_rate": 5e-05, + "loss": 1.5957, + "num_input_tokens_seen": 41033496, + "step": 613 + }, + { + "epoch": 0.06956028368794326, + "loss": 1.6243622303009033, + "loss_ce": 0.005221731029450893, + "loss_iou": 0.70703125, + "loss_num": 0.040283203125, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 41033496, + "step": 613 + }, + { + "epoch": 0.06967375886524822, + "grad_norm": 23.183326721191406, + "learning_rate": 5e-05, + "loss": 1.4734, + "num_input_tokens_seen": 41100352, + "step": 614 + }, + { + "epoch": 0.06967375886524822, + "loss": 1.5417850017547607, + "loss_ce": 0.005133495200425386, + "loss_iou": 0.671875, + "loss_num": 0.038818359375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 41100352, + "step": 614 + }, + { + "epoch": 0.06978723404255319, + "grad_norm": 15.63121509552002, + "learning_rate": 5e-05, + "loss": 1.9798, + "num_input_tokens_seen": 41167780, + "step": 615 + }, + { + "epoch": 0.06978723404255319, + "loss": 2.0402727127075195, + "loss_ce": 0.004139935132116079, + "loss_iou": 0.8671875, + "loss_num": 0.060302734375, + "loss_xval": 2.03125, + "num_input_tokens_seen": 41167780, + "step": 615 + }, + { + "epoch": 0.06990070921985815, + "grad_norm": 16.013456344604492, + "learning_rate": 5e-05, + "loss": 1.673, + "num_input_tokens_seen": 41234492, + "step": 616 + }, + { + "epoch": 0.06990070921985815, + "loss": 1.5676500797271729, + "loss_ce": 0.004173522815108299, + "loss_iou": 0.7109375, + "loss_num": 0.02880859375, + "loss_xval": 1.5625, + "num_input_tokens_seen": 41234492, + "step": 616 + }, + { + "epoch": 0.07001418439716312, + "grad_norm": 31.274873733520508, + "learning_rate": 5e-05, + "loss": 1.6486, + "num_input_tokens_seen": 41300380, + "step": 617 + }, + { + "epoch": 0.07001418439716312, + "loss": 1.7307112216949463, + "loss_ce": 0.005125194322317839, + "loss_iou": 0.7578125, + "loss_num": 0.0419921875, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 41300380, + "step": 617 + }, + { + "epoch": 0.07012765957446808, + "grad_norm": 16.607988357543945, + "learning_rate": 5e-05, + "loss": 1.8753, + "num_input_tokens_seen": 41367124, + "step": 618 + }, + { + "epoch": 0.07012765957446808, + "loss": 1.872063159942627, + "loss_ce": 0.005852239206433296, + "loss_iou": 0.796875, + "loss_num": 0.053955078125, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 41367124, + "step": 618 + }, + { + "epoch": 0.07024113475177304, + "grad_norm": 17.751312255859375, + "learning_rate": 5e-05, + "loss": 1.3613, + "num_input_tokens_seen": 41433912, + "step": 619 + }, + { + "epoch": 0.07024113475177304, + "loss": 1.3870794773101807, + "loss_ce": 0.008661406114697456, + "loss_iou": 0.56640625, + "loss_num": 0.048583984375, + "loss_xval": 1.375, + "num_input_tokens_seen": 41433912, + "step": 619 + }, + { + "epoch": 0.07035460992907801, + "grad_norm": 27.065013885498047, + "learning_rate": 5e-05, + "loss": 1.7426, + "num_input_tokens_seen": 41500728, + "step": 620 + }, + { + "epoch": 0.07035460992907801, + "loss": 1.9215978384017944, + "loss_ce": 0.010465006344020367, + "loss_iou": 0.8203125, + "loss_num": 0.053955078125, + "loss_xval": 1.9140625, + "num_input_tokens_seen": 41500728, + "step": 620 + }, + { + "epoch": 0.07046808510638299, + "grad_norm": 18.261484146118164, + "learning_rate": 5e-05, + "loss": 1.6751, + "num_input_tokens_seen": 41568252, + "step": 621 + }, + { + "epoch": 0.07046808510638299, + "loss": 1.6646322011947632, + "loss_ce": 0.0054524922743439674, + "loss_iou": 0.76171875, + "loss_num": 0.0267333984375, + "loss_xval": 1.65625, + "num_input_tokens_seen": 41568252, + "step": 621 + }, + { + "epoch": 0.07058156028368795, + "grad_norm": 41.93157196044922, + "learning_rate": 5e-05, + "loss": 1.4697, + "num_input_tokens_seen": 41635800, + "step": 622 + }, + { + "epoch": 0.07058156028368795, + "loss": 1.495113492012024, + "loss_ce": 0.005855731200426817, + "loss_iou": 0.66796875, + "loss_num": 0.03076171875, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 41635800, + "step": 622 + }, + { + "epoch": 0.07069503546099291, + "grad_norm": 14.47586441040039, + "learning_rate": 5e-05, + "loss": 2.0572, + "num_input_tokens_seen": 41702936, + "step": 623 + }, + { + "epoch": 0.07069503546099291, + "loss": 2.1247575283050537, + "loss_ce": 0.004640319384634495, + "loss_iou": 0.921875, + "loss_num": 0.054443359375, + "loss_xval": 2.125, + "num_input_tokens_seen": 41702936, + "step": 623 + }, + { + "epoch": 0.07080851063829788, + "grad_norm": 21.522937774658203, + "learning_rate": 5e-05, + "loss": 1.699, + "num_input_tokens_seen": 41769348, + "step": 624 + }, + { + "epoch": 0.07080851063829788, + "loss": 1.8664143085479736, + "loss_ce": 0.007039325311779976, + "loss_iou": 0.80078125, + "loss_num": 0.0517578125, + "loss_xval": 1.859375, + "num_input_tokens_seen": 41769348, + "step": 624 + }, + { + "epoch": 0.07092198581560284, + "grad_norm": 32.21743392944336, + "learning_rate": 5e-05, + "loss": 1.6392, + "num_input_tokens_seen": 41836048, + "step": 625 + }, + { + "epoch": 0.07092198581560284, + "loss": 1.499435544013977, + "loss_ce": 0.005294930189847946, + "loss_iou": 0.64453125, + "loss_num": 0.040771484375, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 41836048, + "step": 625 + }, + { + "epoch": 0.0710354609929078, + "grad_norm": 15.72370719909668, + "learning_rate": 5e-05, + "loss": 1.8721, + "num_input_tokens_seen": 41902316, + "step": 626 + }, + { + "epoch": 0.0710354609929078, + "loss": 1.9179742336273193, + "loss_ce": 0.0029350793920457363, + "loss_iou": 0.8359375, + "loss_num": 0.048828125, + "loss_xval": 1.9140625, + "num_input_tokens_seen": 41902316, + "step": 626 + }, + { + "epoch": 0.07114893617021277, + "grad_norm": 20.297466278076172, + "learning_rate": 5e-05, + "loss": 1.669, + "num_input_tokens_seen": 41970564, + "step": 627 + }, + { + "epoch": 0.07114893617021277, + "loss": 1.5398619174957275, + "loss_ce": 0.008611924946308136, + "loss_iou": 0.6484375, + "loss_num": 0.04736328125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 41970564, + "step": 627 + }, + { + "epoch": 0.07126241134751773, + "grad_norm": 25.501161575317383, + "learning_rate": 5e-05, + "loss": 1.5661, + "num_input_tokens_seen": 42037140, + "step": 628 + }, + { + "epoch": 0.07126241134751773, + "loss": 1.6113084554672241, + "loss_ce": 0.0048630982637405396, + "loss_iou": 0.703125, + "loss_num": 0.039794921875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 42037140, + "step": 628 + }, + { + "epoch": 0.0713758865248227, + "grad_norm": 14.903608322143555, + "learning_rate": 5e-05, + "loss": 1.8732, + "num_input_tokens_seen": 42103692, + "step": 629 + }, + { + "epoch": 0.0713758865248227, + "loss": 2.121582508087158, + "loss_ce": 0.0034185638651251793, + "loss_iou": 0.91796875, + "loss_num": 0.0556640625, + "loss_xval": 2.125, + "num_input_tokens_seen": 42103692, + "step": 629 + }, + { + "epoch": 0.07148936170212766, + "grad_norm": 15.451590538024902, + "learning_rate": 5e-05, + "loss": 1.6522, + "num_input_tokens_seen": 42170080, + "step": 630 + }, + { + "epoch": 0.07148936170212766, + "loss": 1.7428996562957764, + "loss_ce": 0.006571552250534296, + "loss_iou": 0.734375, + "loss_num": 0.052734375, + "loss_xval": 1.734375, + "num_input_tokens_seen": 42170080, + "step": 630 + }, + { + "epoch": 0.07160283687943263, + "grad_norm": 17.9222412109375, + "learning_rate": 5e-05, + "loss": 1.6141, + "num_input_tokens_seen": 42237444, + "step": 631 + }, + { + "epoch": 0.07160283687943263, + "loss": 1.5796597003936768, + "loss_ce": 0.001534764189273119, + "loss_iou": 0.72265625, + "loss_num": 0.026123046875, + "loss_xval": 1.578125, + "num_input_tokens_seen": 42237444, + "step": 631 + }, + { + "epoch": 0.07171631205673759, + "grad_norm": 21.73451042175293, + "learning_rate": 5e-05, + "loss": 1.5552, + "num_input_tokens_seen": 42304136, + "step": 632 + }, + { + "epoch": 0.07171631205673759, + "loss": 1.5439156293869019, + "loss_ce": 0.00875934585928917, + "loss_iou": 0.6875, + "loss_num": 0.032470703125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 42304136, + "step": 632 + }, + { + "epoch": 0.07182978723404256, + "grad_norm": 37.95937728881836, + "learning_rate": 5e-05, + "loss": 1.9069, + "num_input_tokens_seen": 42371628, + "step": 633 + }, + { + "epoch": 0.07182978723404256, + "loss": 2.047238349914551, + "loss_ce": 0.00719942944124341, + "loss_iou": 0.890625, + "loss_num": 0.051025390625, + "loss_xval": 2.046875, + "num_input_tokens_seen": 42371628, + "step": 633 + }, + { + "epoch": 0.07194326241134752, + "grad_norm": 22.096595764160156, + "learning_rate": 5e-05, + "loss": 1.7623, + "num_input_tokens_seen": 42438312, + "step": 634 + }, + { + "epoch": 0.07194326241134752, + "loss": 1.6697536706924438, + "loss_ce": 0.0037380566354840994, + "loss_iou": 0.73828125, + "loss_num": 0.03759765625, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 42438312, + "step": 634 + }, + { + "epoch": 0.07205673758865248, + "grad_norm": 24.83920669555664, + "learning_rate": 5e-05, + "loss": 1.7479, + "num_input_tokens_seen": 42505548, + "step": 635 + }, + { + "epoch": 0.07205673758865248, + "loss": 1.7047648429870605, + "loss_ce": 0.002616544719785452, + "loss_iou": 0.7421875, + "loss_num": 0.042724609375, + "loss_xval": 1.703125, + "num_input_tokens_seen": 42505548, + "step": 635 + }, + { + "epoch": 0.07217021276595745, + "grad_norm": 19.136213302612305, + "learning_rate": 5e-05, + "loss": 1.7385, + "num_input_tokens_seen": 42572764, + "step": 636 + }, + { + "epoch": 0.07217021276595745, + "loss": 1.7231671810150146, + "loss_ce": 0.006858613342046738, + "loss_iou": 0.75, + "loss_num": 0.042724609375, + "loss_xval": 1.71875, + "num_input_tokens_seen": 42572764, + "step": 636 + }, + { + "epoch": 0.07228368794326241, + "grad_norm": 11.685190200805664, + "learning_rate": 5e-05, + "loss": 1.6859, + "num_input_tokens_seen": 42639768, + "step": 637 + }, + { + "epoch": 0.07228368794326241, + "loss": 1.7636207342147827, + "loss_ce": 0.005319918505847454, + "loss_iou": 0.7265625, + "loss_num": 0.060791015625, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 42639768, + "step": 637 + }, + { + "epoch": 0.07239716312056738, + "grad_norm": 16.970767974853516, + "learning_rate": 5e-05, + "loss": 1.545, + "num_input_tokens_seen": 42706912, + "step": 638 + }, + { + "epoch": 0.07239716312056738, + "loss": 1.5565366744995117, + "loss_ce": 0.0033140783198177814, + "loss_iou": 0.6796875, + "loss_num": 0.038818359375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 42706912, + "step": 638 + }, + { + "epoch": 0.07251063829787234, + "grad_norm": 34.866512298583984, + "learning_rate": 5e-05, + "loss": 1.7656, + "num_input_tokens_seen": 42773784, + "step": 639 + }, + { + "epoch": 0.07251063829787234, + "loss": 1.3847923278808594, + "loss_ce": 0.002468063961714506, + "loss_iou": 0.64453125, + "loss_num": 0.018798828125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 42773784, + "step": 639 + }, + { + "epoch": 0.0726241134751773, + "grad_norm": 19.520736694335938, + "learning_rate": 5e-05, + "loss": 1.7767, + "num_input_tokens_seen": 42840152, + "step": 640 + }, + { + "epoch": 0.0726241134751773, + "loss": 1.769209623336792, + "loss_ce": 0.010420620441436768, + "loss_iou": 0.79296875, + "loss_num": 0.03515625, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 42840152, + "step": 640 + }, + { + "epoch": 0.07273758865248227, + "grad_norm": 18.07004165649414, + "learning_rate": 5e-05, + "loss": 1.8674, + "num_input_tokens_seen": 42905536, + "step": 641 + }, + { + "epoch": 0.07273758865248227, + "loss": 1.7019239664077759, + "loss_ce": 0.001728626899421215, + "loss_iou": 0.7421875, + "loss_num": 0.04345703125, + "loss_xval": 1.703125, + "num_input_tokens_seen": 42905536, + "step": 641 + }, + { + "epoch": 0.07285106382978723, + "grad_norm": 14.855606079101562, + "learning_rate": 5e-05, + "loss": 1.6885, + "num_input_tokens_seen": 42971868, + "step": 642 + }, + { + "epoch": 0.07285106382978723, + "loss": 1.5356006622314453, + "loss_ce": 0.010210078209638596, + "loss_iou": 0.6875, + "loss_num": 0.0308837890625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 42971868, + "step": 642 + }, + { + "epoch": 0.0729645390070922, + "grad_norm": 19.09854507446289, + "learning_rate": 5e-05, + "loss": 1.7215, + "num_input_tokens_seen": 43038428, + "step": 643 + }, + { + "epoch": 0.0729645390070922, + "loss": 1.831164002418518, + "loss_ce": 0.004992165602743626, + "loss_iou": 0.78515625, + "loss_num": 0.05126953125, + "loss_xval": 1.828125, + "num_input_tokens_seen": 43038428, + "step": 643 + }, + { + "epoch": 0.07307801418439716, + "grad_norm": 23.550798416137695, + "learning_rate": 5e-05, + "loss": 1.4365, + "num_input_tokens_seen": 43105356, + "step": 644 + }, + { + "epoch": 0.07307801418439716, + "loss": 1.555166244506836, + "loss_ce": 0.0034084077924489975, + "loss_iou": 0.68359375, + "loss_num": 0.037353515625, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 43105356, + "step": 644 + }, + { + "epoch": 0.07319148936170213, + "grad_norm": 14.13135051727295, + "learning_rate": 5e-05, + "loss": 1.9366, + "num_input_tokens_seen": 43172232, + "step": 645 + }, + { + "epoch": 0.07319148936170213, + "loss": 1.9200648069381714, + "loss_ce": 0.0011194264516234398, + "loss_iou": 0.85546875, + "loss_num": 0.0419921875, + "loss_xval": 1.921875, + "num_input_tokens_seen": 43172232, + "step": 645 + }, + { + "epoch": 0.07330496453900709, + "grad_norm": 16.558300018310547, + "learning_rate": 5e-05, + "loss": 1.7602, + "num_input_tokens_seen": 43239328, + "step": 646 + }, + { + "epoch": 0.07330496453900709, + "loss": 1.7658650875091553, + "loss_ce": 0.004146285355091095, + "loss_iou": 0.76171875, + "loss_num": 0.04833984375, + "loss_xval": 1.765625, + "num_input_tokens_seen": 43239328, + "step": 646 + }, + { + "epoch": 0.07341843971631205, + "grad_norm": 25.28520393371582, + "learning_rate": 5e-05, + "loss": 1.619, + "num_input_tokens_seen": 43305616, + "step": 647 + }, + { + "epoch": 0.07341843971631205, + "loss": 1.721062183380127, + "loss_ce": 0.011101281270384789, + "loss_iou": 0.70703125, + "loss_num": 0.0595703125, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 43305616, + "step": 647 + }, + { + "epoch": 0.07353191489361702, + "grad_norm": 19.347118377685547, + "learning_rate": 5e-05, + "loss": 1.5432, + "num_input_tokens_seen": 43372660, + "step": 648 + }, + { + "epoch": 0.07353191489361702, + "loss": 1.5130882263183594, + "loss_ce": 0.008693680167198181, + "loss_iou": 0.6484375, + "loss_num": 0.041259765625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 43372660, + "step": 648 + }, + { + "epoch": 0.07364539007092198, + "grad_norm": 75.85086059570312, + "learning_rate": 5e-05, + "loss": 1.442, + "num_input_tokens_seen": 43439632, + "step": 649 + }, + { + "epoch": 0.07364539007092198, + "loss": 1.3828843832015991, + "loss_ce": 0.0030321278609335423, + "loss_iou": 0.61328125, + "loss_num": 0.031005859375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 43439632, + "step": 649 + }, + { + "epoch": 0.07375886524822695, + "grad_norm": 16.225360870361328, + "learning_rate": 5e-05, + "loss": 1.6598, + "num_input_tokens_seen": 43505772, + "step": 650 + }, + { + "epoch": 0.07375886524822695, + "loss": 1.7034411430358887, + "loss_ce": 0.0032458030618727207, + "loss_iou": 0.78515625, + "loss_num": 0.02587890625, + "loss_xval": 1.703125, + "num_input_tokens_seen": 43505772, + "step": 650 + }, + { + "epoch": 0.07387234042553191, + "grad_norm": 13.557869911193848, + "learning_rate": 5e-05, + "loss": 1.3636, + "num_input_tokens_seen": 43573512, + "step": 651 + }, + { + "epoch": 0.07387234042553191, + "loss": 1.447350263595581, + "loss_ce": 0.0035025973338633776, + "loss_iou": 0.6328125, + "loss_num": 0.035400390625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 43573512, + "step": 651 + }, + { + "epoch": 0.07398581560283687, + "grad_norm": 17.007198333740234, + "learning_rate": 5e-05, + "loss": 1.5999, + "num_input_tokens_seen": 43640072, + "step": 652 + }, + { + "epoch": 0.07398581560283687, + "loss": 1.8017982244491577, + "loss_ce": 0.010782568715512753, + "loss_iou": 0.7734375, + "loss_num": 0.04833984375, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 43640072, + "step": 652 + }, + { + "epoch": 0.07409929078014184, + "grad_norm": 25.4150333404541, + "learning_rate": 5e-05, + "loss": 1.4667, + "num_input_tokens_seen": 43706236, + "step": 653 + }, + { + "epoch": 0.07409929078014184, + "loss": 1.546722412109375, + "loss_ce": 0.006500257179141045, + "loss_iou": 0.6796875, + "loss_num": 0.03662109375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 43706236, + "step": 653 + }, + { + "epoch": 0.0742127659574468, + "grad_norm": 15.181449890136719, + "learning_rate": 5e-05, + "loss": 1.8084, + "num_input_tokens_seen": 43773352, + "step": 654 + }, + { + "epoch": 0.0742127659574468, + "loss": 1.9355227947235107, + "loss_ce": 0.006811723578721285, + "loss_iou": 0.81640625, + "loss_num": 0.05859375, + "loss_xval": 1.9296875, + "num_input_tokens_seen": 43773352, + "step": 654 + }, + { + "epoch": 0.07432624113475177, + "grad_norm": 13.813889503479004, + "learning_rate": 5e-05, + "loss": 1.6617, + "num_input_tokens_seen": 43840176, + "step": 655 + }, + { + "epoch": 0.07432624113475177, + "loss": 1.6676431894302368, + "loss_ce": 0.004557274281978607, + "loss_iou": 0.72265625, + "loss_num": 0.044189453125, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 43840176, + "step": 655 + }, + { + "epoch": 0.07443971631205673, + "grad_norm": 24.443912506103516, + "learning_rate": 5e-05, + "loss": 1.5317, + "num_input_tokens_seen": 43906288, + "step": 656 + }, + { + "epoch": 0.07443971631205673, + "loss": 1.5114715099334717, + "loss_ce": 0.00439140060916543, + "loss_iou": 0.6484375, + "loss_num": 0.043212890625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 43906288, + "step": 656 + }, + { + "epoch": 0.07455319148936171, + "grad_norm": 23.912803649902344, + "learning_rate": 5e-05, + "loss": 1.7148, + "num_input_tokens_seen": 43972972, + "step": 657 + }, + { + "epoch": 0.07455319148936171, + "loss": 1.762267827987671, + "loss_ce": 0.00787319801747799, + "loss_iou": 0.71875, + "loss_num": 0.064453125, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 43972972, + "step": 657 + }, + { + "epoch": 0.07466666666666667, + "grad_norm": 15.014365196228027, + "learning_rate": 5e-05, + "loss": 1.7383, + "num_input_tokens_seen": 44040464, + "step": 658 + }, + { + "epoch": 0.07466666666666667, + "loss": 1.7010999917984009, + "loss_ce": 0.0038343046326190233, + "loss_iou": 0.75390625, + "loss_num": 0.03759765625, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 44040464, + "step": 658 + }, + { + "epoch": 0.07478014184397164, + "grad_norm": 35.84579849243164, + "learning_rate": 5e-05, + "loss": 1.4936, + "num_input_tokens_seen": 44106816, + "step": 659 + }, + { + "epoch": 0.07478014184397164, + "loss": 1.5964808464050293, + "loss_ce": 0.004683970473706722, + "loss_iou": 0.73046875, + "loss_num": 0.02587890625, + "loss_xval": 1.59375, + "num_input_tokens_seen": 44106816, + "step": 659 + }, + { + "epoch": 0.0748936170212766, + "grad_norm": 20.988666534423828, + "learning_rate": 5e-05, + "loss": 1.8931, + "num_input_tokens_seen": 44173532, + "step": 660 + }, + { + "epoch": 0.0748936170212766, + "loss": 1.898197889328003, + "loss_ce": 0.006596465595066547, + "loss_iou": 0.85546875, + "loss_num": 0.036376953125, + "loss_xval": 1.890625, + "num_input_tokens_seen": 44173532, + "step": 660 + }, + { + "epoch": 0.07500709219858157, + "grad_norm": 15.152715682983398, + "learning_rate": 5e-05, + "loss": 1.4625, + "num_input_tokens_seen": 44240696, + "step": 661 + }, + { + "epoch": 0.07500709219858157, + "loss": 1.534885048866272, + "loss_ce": 0.0036350812297314405, + "loss_iou": 0.68359375, + "loss_num": 0.032470703125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 44240696, + "step": 661 + }, + { + "epoch": 0.07512056737588653, + "grad_norm": 18.247312545776367, + "learning_rate": 5e-05, + "loss": 1.5794, + "num_input_tokens_seen": 44307836, + "step": 662 + }, + { + "epoch": 0.07512056737588653, + "loss": 1.6667065620422363, + "loss_ce": 0.004597214981913567, + "loss_iou": 0.7265625, + "loss_num": 0.04150390625, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 44307836, + "step": 662 + }, + { + "epoch": 0.0752340425531915, + "grad_norm": 36.51422882080078, + "learning_rate": 5e-05, + "loss": 1.8603, + "num_input_tokens_seen": 44375128, + "step": 663 + }, + { + "epoch": 0.0752340425531915, + "loss": 1.9620225429534912, + "loss_ce": 0.0020616992842406034, + "loss_iou": 0.8359375, + "loss_num": 0.05712890625, + "loss_xval": 1.9609375, + "num_input_tokens_seen": 44375128, + "step": 663 + }, + { + "epoch": 0.07534751773049646, + "grad_norm": 14.708157539367676, + "learning_rate": 5e-05, + "loss": 1.9694, + "num_input_tokens_seen": 44442864, + "step": 664 + }, + { + "epoch": 0.07534751773049646, + "loss": 1.8741908073425293, + "loss_ce": 0.005050225183367729, + "loss_iou": 0.8203125, + "loss_num": 0.046142578125, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 44442864, + "step": 664 + }, + { + "epoch": 0.07546099290780142, + "grad_norm": 16.209083557128906, + "learning_rate": 5e-05, + "loss": 1.8724, + "num_input_tokens_seen": 44509540, + "step": 665 + }, + { + "epoch": 0.07546099290780142, + "loss": 1.771254301071167, + "loss_ce": 0.006605927366763353, + "loss_iou": 0.73828125, + "loss_num": 0.05712890625, + "loss_xval": 1.765625, + "num_input_tokens_seen": 44509540, + "step": 665 + }, + { + "epoch": 0.07557446808510639, + "grad_norm": 17.41396141052246, + "learning_rate": 5e-05, + "loss": 1.7246, + "num_input_tokens_seen": 44577284, + "step": 666 + }, + { + "epoch": 0.07557446808510639, + "loss": 1.8856382369995117, + "loss_ce": 0.00966175738722086, + "loss_iou": 0.85546875, + "loss_num": 0.0322265625, + "loss_xval": 1.875, + "num_input_tokens_seen": 44577284, + "step": 666 + }, + { + "epoch": 0.07568794326241135, + "grad_norm": 19.168411254882812, + "learning_rate": 5e-05, + "loss": 1.6312, + "num_input_tokens_seen": 44643312, + "step": 667 + }, + { + "epoch": 0.07568794326241135, + "loss": 1.7313214540481567, + "loss_ce": 0.0037823510356247425, + "loss_iou": 0.7578125, + "loss_num": 0.04248046875, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 44643312, + "step": 667 + }, + { + "epoch": 0.07580141843971631, + "grad_norm": 24.56133270263672, + "learning_rate": 5e-05, + "loss": 1.6067, + "num_input_tokens_seen": 44710608, + "step": 668 + }, + { + "epoch": 0.07580141843971631, + "loss": 1.5522712469100952, + "loss_ce": 0.007349381688982248, + "loss_iou": 0.6953125, + "loss_num": 0.0301513671875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 44710608, + "step": 668 + }, + { + "epoch": 0.07591489361702128, + "grad_norm": 16.823291778564453, + "learning_rate": 5e-05, + "loss": 1.6121, + "num_input_tokens_seen": 44776796, + "step": 669 + }, + { + "epoch": 0.07591489361702128, + "loss": 1.7769262790679932, + "loss_ce": 0.005197878926992416, + "loss_iou": 0.7734375, + "loss_num": 0.045654296875, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 44776796, + "step": 669 + }, + { + "epoch": 0.07602836879432624, + "grad_norm": 17.722679138183594, + "learning_rate": 5e-05, + "loss": 1.5642, + "num_input_tokens_seen": 44843748, + "step": 670 + }, + { + "epoch": 0.07602836879432624, + "loss": 1.6051466464996338, + "loss_ce": 0.0035841413773596287, + "loss_iou": 0.69140625, + "loss_num": 0.043212890625, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 44843748, + "step": 670 + }, + { + "epoch": 0.0761418439716312, + "grad_norm": 17.240638732910156, + "learning_rate": 5e-05, + "loss": 1.582, + "num_input_tokens_seen": 44910748, + "step": 671 + }, + { + "epoch": 0.0761418439716312, + "loss": 1.5371999740600586, + "loss_ce": 0.005949885118752718, + "loss_iou": 0.6875, + "loss_num": 0.030517578125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 44910748, + "step": 671 + }, + { + "epoch": 0.07625531914893617, + "grad_norm": 25.420331954956055, + "learning_rate": 5e-05, + "loss": 1.6871, + "num_input_tokens_seen": 44978596, + "step": 672 + }, + { + "epoch": 0.07625531914893617, + "loss": 1.6853806972503662, + "loss_ce": 0.0076462929137051105, + "loss_iou": 0.734375, + "loss_num": 0.042236328125, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 44978596, + "step": 672 + }, + { + "epoch": 0.07636879432624114, + "grad_norm": 18.401710510253906, + "learning_rate": 5e-05, + "loss": 1.7236, + "num_input_tokens_seen": 45044736, + "step": 673 + }, + { + "epoch": 0.07636879432624114, + "loss": 1.6822423934936523, + "loss_ce": 0.006461138837039471, + "loss_iou": 0.703125, + "loss_num": 0.053955078125, + "loss_xval": 1.671875, + "num_input_tokens_seen": 45044736, + "step": 673 + }, + { + "epoch": 0.0764822695035461, + "grad_norm": 24.96916961669922, + "learning_rate": 5e-05, + "loss": 1.5907, + "num_input_tokens_seen": 45111384, + "step": 674 + }, + { + "epoch": 0.0764822695035461, + "loss": 1.7365130186080933, + "loss_ce": 0.002138033276423812, + "loss_iou": 0.765625, + "loss_num": 0.04052734375, + "loss_xval": 1.734375, + "num_input_tokens_seen": 45111384, + "step": 674 + }, + { + "epoch": 0.07659574468085106, + "grad_norm": 25.16655921936035, + "learning_rate": 5e-05, + "loss": 1.8105, + "num_input_tokens_seen": 45178384, + "step": 675 + }, + { + "epoch": 0.07659574468085106, + "loss": 1.686153531074524, + "loss_ce": 0.005489415489137173, + "loss_iou": 0.72265625, + "loss_num": 0.04736328125, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 45178384, + "step": 675 + }, + { + "epoch": 0.07670921985815603, + "grad_norm": 29.298078536987305, + "learning_rate": 5e-05, + "loss": 1.6105, + "num_input_tokens_seen": 45244944, + "step": 676 + }, + { + "epoch": 0.07670921985815603, + "loss": 1.513092279434204, + "loss_ce": 0.00235012941993773, + "loss_iou": 0.67578125, + "loss_num": 0.0322265625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 45244944, + "step": 676 + }, + { + "epoch": 0.07682269503546099, + "grad_norm": 18.944591522216797, + "learning_rate": 5e-05, + "loss": 1.7726, + "num_input_tokens_seen": 45312136, + "step": 677 + }, + { + "epoch": 0.07682269503546099, + "loss": 1.9149370193481445, + "loss_ce": 0.009663672186434269, + "loss_iou": 0.83984375, + "loss_num": 0.04443359375, + "loss_xval": 1.90625, + "num_input_tokens_seen": 45312136, + "step": 677 + }, + { + "epoch": 0.07693617021276596, + "grad_norm": 25.274906158447266, + "learning_rate": 5e-05, + "loss": 1.8417, + "num_input_tokens_seen": 45378896, + "step": 678 + }, + { + "epoch": 0.07693617021276596, + "loss": 2.047476053237915, + "loss_ce": 0.0064604151993989944, + "loss_iou": 0.89453125, + "loss_num": 0.05029296875, + "loss_xval": 2.046875, + "num_input_tokens_seen": 45378896, + "step": 678 + }, + { + "epoch": 0.07704964539007092, + "grad_norm": 18.454076766967773, + "learning_rate": 5e-05, + "loss": 2.0034, + "num_input_tokens_seen": 45445868, + "step": 679 + }, + { + "epoch": 0.07704964539007092, + "loss": 1.9297082424163818, + "loss_ce": 0.004903499502688646, + "loss_iou": 0.8671875, + "loss_num": 0.038330078125, + "loss_xval": 1.921875, + "num_input_tokens_seen": 45445868, + "step": 679 + }, + { + "epoch": 0.07716312056737588, + "grad_norm": 15.275842666625977, + "learning_rate": 5e-05, + "loss": 1.7064, + "num_input_tokens_seen": 45512380, + "step": 680 + }, + { + "epoch": 0.07716312056737588, + "loss": 1.6094061136245728, + "loss_ce": 0.0039373598992824554, + "loss_iou": 0.703125, + "loss_num": 0.038818359375, + "loss_xval": 1.609375, + "num_input_tokens_seen": 45512380, + "step": 680 + }, + { + "epoch": 0.07727659574468085, + "grad_norm": 18.573942184448242, + "learning_rate": 5e-05, + "loss": 1.5068, + "num_input_tokens_seen": 45579360, + "step": 681 + }, + { + "epoch": 0.07727659574468085, + "loss": 1.530566692352295, + "loss_ce": 0.0051760259084403515, + "loss_iou": 0.6953125, + "loss_num": 0.0264892578125, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 45579360, + "step": 681 + }, + { + "epoch": 0.07739007092198581, + "grad_norm": 24.739429473876953, + "learning_rate": 5e-05, + "loss": 1.5698, + "num_input_tokens_seen": 45645660, + "step": 682 + }, + { + "epoch": 0.07739007092198581, + "loss": 1.6541723012924194, + "loss_ce": 0.005002310499548912, + "loss_iou": 0.66015625, + "loss_num": 0.0654296875, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 45645660, + "step": 682 + }, + { + "epoch": 0.07750354609929078, + "grad_norm": 19.822736740112305, + "learning_rate": 5e-05, + "loss": 1.6541, + "num_input_tokens_seen": 45712708, + "step": 683 + }, + { + "epoch": 0.07750354609929078, + "loss": 1.5968027114868164, + "loss_ce": 0.0030527219641953707, + "loss_iou": 0.69140625, + "loss_num": 0.042236328125, + "loss_xval": 1.59375, + "num_input_tokens_seen": 45712708, + "step": 683 + }, + { + "epoch": 0.07761702127659574, + "grad_norm": 15.604487419128418, + "learning_rate": 5e-05, + "loss": 1.6046, + "num_input_tokens_seen": 45780960, + "step": 684 + }, + { + "epoch": 0.07761702127659574, + "loss": 1.5589898824691772, + "loss_ce": 0.006255472078919411, + "loss_iou": 0.7109375, + "loss_num": 0.0267333984375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 45780960, + "step": 684 + }, + { + "epoch": 0.0777304964539007, + "grad_norm": 20.92840003967285, + "learning_rate": 5e-05, + "loss": 1.5745, + "num_input_tokens_seen": 45848008, + "step": 685 + }, + { + "epoch": 0.0777304964539007, + "loss": 1.6048240661621094, + "loss_ce": 0.011074049398303032, + "loss_iou": 0.7109375, + "loss_num": 0.03466796875, + "loss_xval": 1.59375, + "num_input_tokens_seen": 45848008, + "step": 685 + }, + { + "epoch": 0.07784397163120567, + "grad_norm": 34.17900848388672, + "learning_rate": 5e-05, + "loss": 1.7449, + "num_input_tokens_seen": 45915008, + "step": 686 + }, + { + "epoch": 0.07784397163120567, + "loss": 1.6700234413146973, + "loss_ce": 0.00693745631724596, + "loss_iou": 0.734375, + "loss_num": 0.03759765625, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 45915008, + "step": 686 + }, + { + "epoch": 0.07795744680851063, + "grad_norm": 17.689729690551758, + "learning_rate": 5e-05, + "loss": 1.9119, + "num_input_tokens_seen": 45982056, + "step": 687 + }, + { + "epoch": 0.07795744680851063, + "loss": 1.9450008869171143, + "loss_ce": 0.017266523092985153, + "loss_iou": 0.8515625, + "loss_num": 0.045654296875, + "loss_xval": 1.9296875, + "num_input_tokens_seen": 45982056, + "step": 687 + }, + { + "epoch": 0.0780709219858156, + "grad_norm": 11.768641471862793, + "learning_rate": 5e-05, + "loss": 1.6309, + "num_input_tokens_seen": 46048756, + "step": 688 + }, + { + "epoch": 0.0780709219858156, + "loss": 1.5130690336227417, + "loss_ce": 0.003303410252556205, + "loss_iou": 0.67578125, + "loss_num": 0.031494140625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 46048756, + "step": 688 + }, + { + "epoch": 0.07818439716312056, + "grad_norm": 16.67736053466797, + "learning_rate": 5e-05, + "loss": 1.5638, + "num_input_tokens_seen": 46116116, + "step": 689 + }, + { + "epoch": 0.07818439716312056, + "loss": 1.4591083526611328, + "loss_ce": 0.003053701017051935, + "loss_iou": 0.62890625, + "loss_num": 0.040283203125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 46116116, + "step": 689 + }, + { + "epoch": 0.07829787234042553, + "grad_norm": 14.722221374511719, + "learning_rate": 5e-05, + "loss": 1.6835, + "num_input_tokens_seen": 46183712, + "step": 690 + }, + { + "epoch": 0.07829787234042553, + "loss": 1.5509679317474365, + "loss_ce": 0.00604605209082365, + "loss_iou": 0.6953125, + "loss_num": 0.030029296875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 46183712, + "step": 690 + }, + { + "epoch": 0.07841134751773049, + "grad_norm": 21.824424743652344, + "learning_rate": 5e-05, + "loss": 1.5329, + "num_input_tokens_seen": 46251600, + "step": 691 + }, + { + "epoch": 0.07841134751773049, + "loss": 1.4416675567626953, + "loss_ce": 0.003190958872437477, + "loss_iou": 0.6328125, + "loss_num": 0.03369140625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 46251600, + "step": 691 + }, + { + "epoch": 0.07852482269503547, + "grad_norm": 14.664298057556152, + "learning_rate": 5e-05, + "loss": 1.7757, + "num_input_tokens_seen": 46319840, + "step": 692 + }, + { + "epoch": 0.07852482269503547, + "loss": 1.6435751914978027, + "loss_ce": 0.004414989147335291, + "loss_iou": 0.73828125, + "loss_num": 0.03173828125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 46319840, + "step": 692 + }, + { + "epoch": 0.07863829787234043, + "grad_norm": 20.9152889251709, + "learning_rate": 5e-05, + "loss": 1.5539, + "num_input_tokens_seen": 46386240, + "step": 693 + }, + { + "epoch": 0.07863829787234043, + "loss": 1.4101169109344482, + "loss_ce": 0.009237967431545258, + "loss_iou": 0.5859375, + "loss_num": 0.04638671875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 46386240, + "step": 693 + }, + { + "epoch": 0.0787517730496454, + "grad_norm": 26.207489013671875, + "learning_rate": 5e-05, + "loss": 1.8918, + "num_input_tokens_seen": 46453296, + "step": 694 + }, + { + "epoch": 0.0787517730496454, + "loss": 1.8923001289367676, + "loss_ce": 0.005886686034500599, + "loss_iou": 0.78125, + "loss_num": 0.06494140625, + "loss_xval": 1.8828125, + "num_input_tokens_seen": 46453296, + "step": 694 + }, + { + "epoch": 0.07886524822695036, + "grad_norm": 44.54231262207031, + "learning_rate": 5e-05, + "loss": 1.6717, + "num_input_tokens_seen": 46521172, + "step": 695 + }, + { + "epoch": 0.07886524822695036, + "loss": 1.6627938747406006, + "loss_ce": 0.007520325481891632, + "loss_iou": 0.72265625, + "loss_num": 0.0419921875, + "loss_xval": 1.65625, + "num_input_tokens_seen": 46521172, + "step": 695 + }, + { + "epoch": 0.07897872340425532, + "grad_norm": 19.849374771118164, + "learning_rate": 5e-05, + "loss": 1.5282, + "num_input_tokens_seen": 46588172, + "step": 696 + }, + { + "epoch": 0.07897872340425532, + "loss": 1.4238678216934204, + "loss_ce": 0.0034576449543237686, + "loss_iou": 0.640625, + "loss_num": 0.027099609375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 46588172, + "step": 696 + }, + { + "epoch": 0.07909219858156029, + "grad_norm": 22.37194061279297, + "learning_rate": 5e-05, + "loss": 1.8162, + "num_input_tokens_seen": 46655460, + "step": 697 + }, + { + "epoch": 0.07909219858156029, + "loss": 1.8253737688064575, + "loss_ce": 0.0031081296037882566, + "loss_iou": 0.79296875, + "loss_num": 0.046630859375, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 46655460, + "step": 697 + }, + { + "epoch": 0.07920567375886525, + "grad_norm": 17.294023513793945, + "learning_rate": 5e-05, + "loss": 1.7438, + "num_input_tokens_seen": 46722616, + "step": 698 + }, + { + "epoch": 0.07920567375886525, + "loss": 1.8074175119400024, + "loss_ce": 0.0032183341681957245, + "loss_iou": 0.765625, + "loss_num": 0.053955078125, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 46722616, + "step": 698 + }, + { + "epoch": 0.07931914893617022, + "grad_norm": 18.429819107055664, + "learning_rate": 5e-05, + "loss": 1.7254, + "num_input_tokens_seen": 46789580, + "step": 699 + }, + { + "epoch": 0.07931914893617022, + "loss": 1.6513421535491943, + "loss_ce": 0.0048578958958387375, + "loss_iou": 0.7265625, + "loss_num": 0.03857421875, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 46789580, + "step": 699 + }, + { + "epoch": 0.07943262411347518, + "grad_norm": 18.2891845703125, + "learning_rate": 5e-05, + "loss": 1.4253, + "num_input_tokens_seen": 46855912, + "step": 700 + }, + { + "epoch": 0.07943262411347518, + "loss": 1.6239516735076904, + "loss_ce": 0.010670489631593227, + "loss_iou": 0.6484375, + "loss_num": 0.0634765625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 46855912, + "step": 700 + }, + { + "epoch": 0.07954609929078015, + "grad_norm": 19.264623641967773, + "learning_rate": 5e-05, + "loss": 1.547, + "num_input_tokens_seen": 46921832, + "step": 701 + }, + { + "epoch": 0.07954609929078015, + "loss": 1.5456373691558838, + "loss_ce": 0.010969508439302444, + "loss_iou": 0.6953125, + "loss_num": 0.029296875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 46921832, + "step": 701 + }, + { + "epoch": 0.07965957446808511, + "grad_norm": 30.672008514404297, + "learning_rate": 5e-05, + "loss": 1.5984, + "num_input_tokens_seen": 46988656, + "step": 702 + }, + { + "epoch": 0.07965957446808511, + "loss": 1.6855719089508057, + "loss_ce": 0.0029547729063779116, + "loss_iou": 0.73828125, + "loss_num": 0.041748046875, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 46988656, + "step": 702 + }, + { + "epoch": 0.07977304964539007, + "grad_norm": 17.927194595336914, + "learning_rate": 5e-05, + "loss": 1.9502, + "num_input_tokens_seen": 47055192, + "step": 703 + }, + { + "epoch": 0.07977304964539007, + "loss": 2.0135908126831055, + "loss_ce": 0.010661034844815731, + "loss_iou": 0.83984375, + "loss_num": 0.064453125, + "loss_xval": 2.0, + "num_input_tokens_seen": 47055192, + "step": 703 + }, + { + "epoch": 0.07988652482269504, + "grad_norm": 15.953852653503418, + "learning_rate": 5e-05, + "loss": 1.4741, + "num_input_tokens_seen": 47122772, + "step": 704 + }, + { + "epoch": 0.07988652482269504, + "loss": 1.590693473815918, + "loss_ce": 0.003291067434474826, + "loss_iou": 0.65625, + "loss_num": 0.055419921875, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 47122772, + "step": 704 + }, + { + "epoch": 0.08, + "grad_norm": 35.095245361328125, + "learning_rate": 5e-05, + "loss": 1.7231, + "num_input_tokens_seen": 47189716, + "step": 705 + }, + { + "epoch": 0.08, + "loss": 1.6356078386306763, + "loss_ce": 0.006701529026031494, + "loss_iou": 0.71875, + "loss_num": 0.037353515625, + "loss_xval": 1.625, + "num_input_tokens_seen": 47189716, + "step": 705 + }, + { + "epoch": 0.08011347517730497, + "grad_norm": 19.01734733581543, + "learning_rate": 5e-05, + "loss": 1.7875, + "num_input_tokens_seen": 47255848, + "step": 706 + }, + { + "epoch": 0.08011347517730497, + "loss": 1.838727355003357, + "loss_ce": 0.005658562760800123, + "loss_iou": 0.75, + "loss_num": 0.06591796875, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 47255848, + "step": 706 + }, + { + "epoch": 0.08022695035460993, + "grad_norm": 22.959360122680664, + "learning_rate": 5e-05, + "loss": 1.6839, + "num_input_tokens_seen": 47322840, + "step": 707 + }, + { + "epoch": 0.08022695035460993, + "loss": 1.6669367551803589, + "loss_ce": 0.003850857727229595, + "loss_iou": 0.7265625, + "loss_num": 0.04248046875, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 47322840, + "step": 707 + }, + { + "epoch": 0.0803404255319149, + "grad_norm": 23.11376190185547, + "learning_rate": 5e-05, + "loss": 1.3179, + "num_input_tokens_seen": 47389008, + "step": 708 + }, + { + "epoch": 0.0803404255319149, + "loss": 1.3358192443847656, + "loss_ce": 0.005588560365140438, + "loss_iou": 0.5859375, + "loss_num": 0.032470703125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 47389008, + "step": 708 + }, + { + "epoch": 0.08045390070921986, + "grad_norm": 18.23868179321289, + "learning_rate": 5e-05, + "loss": 1.7991, + "num_input_tokens_seen": 47456164, + "step": 709 + }, + { + "epoch": 0.08045390070921986, + "loss": 1.8743302822113037, + "loss_ce": 0.004213044885545969, + "loss_iou": 0.81640625, + "loss_num": 0.0478515625, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 47456164, + "step": 709 + }, + { + "epoch": 0.08056737588652482, + "grad_norm": 13.362150192260742, + "learning_rate": 5e-05, + "loss": 1.4796, + "num_input_tokens_seen": 47523892, + "step": 710 + }, + { + "epoch": 0.08056737588652482, + "loss": 1.5270318984985352, + "loss_ce": 0.0055474936962127686, + "loss_iou": 0.6640625, + "loss_num": 0.0380859375, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 47523892, + "step": 710 + }, + { + "epoch": 0.08068085106382979, + "grad_norm": 18.79097557067871, + "learning_rate": 5e-05, + "loss": 1.5174, + "num_input_tokens_seen": 47588596, + "step": 711 + }, + { + "epoch": 0.08068085106382979, + "loss": 1.3246132135391235, + "loss_ce": 0.004544837865978479, + "loss_iou": 0.5703125, + "loss_num": 0.03564453125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 47588596, + "step": 711 + }, + { + "epoch": 0.08079432624113475, + "grad_norm": 29.6666259765625, + "learning_rate": 5e-05, + "loss": 1.6657, + "num_input_tokens_seen": 47654828, + "step": 712 + }, + { + "epoch": 0.08079432624113475, + "loss": 1.7086384296417236, + "loss_ce": 0.007466704584658146, + "loss_iou": 0.75390625, + "loss_num": 0.0390625, + "loss_xval": 1.703125, + "num_input_tokens_seen": 47654828, + "step": 712 + }, + { + "epoch": 0.08090780141843971, + "grad_norm": 24.22553253173828, + "learning_rate": 5e-05, + "loss": 1.8094, + "num_input_tokens_seen": 47721972, + "step": 713 + }, + { + "epoch": 0.08090780141843971, + "loss": 1.8500189781188965, + "loss_ce": 0.005292512010782957, + "loss_iou": 0.82421875, + "loss_num": 0.0390625, + "loss_xval": 1.84375, + "num_input_tokens_seen": 47721972, + "step": 713 + }, + { + "epoch": 0.08102127659574468, + "grad_norm": 15.596417427062988, + "learning_rate": 5e-05, + "loss": 1.7158, + "num_input_tokens_seen": 47788472, + "step": 714 + }, + { + "epoch": 0.08102127659574468, + "loss": 1.6099066734313965, + "loss_ce": 0.0024847066961228848, + "loss_iou": 0.703125, + "loss_num": 0.04052734375, + "loss_xval": 1.609375, + "num_input_tokens_seen": 47788472, + "step": 714 + }, + { + "epoch": 0.08113475177304964, + "grad_norm": 34.03910827636719, + "learning_rate": 5e-05, + "loss": 1.5951, + "num_input_tokens_seen": 47855048, + "step": 715 + }, + { + "epoch": 0.08113475177304964, + "loss": 1.3524625301361084, + "loss_ce": 0.005782815162092447, + "loss_iou": 0.61328125, + "loss_num": 0.0240478515625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 47855048, + "step": 715 + }, + { + "epoch": 0.08124822695035461, + "grad_norm": 12.811441421508789, + "learning_rate": 5e-05, + "loss": 1.9778, + "num_input_tokens_seen": 47921344, + "step": 716 + }, + { + "epoch": 0.08124822695035461, + "loss": 1.8780174255371094, + "loss_ce": 0.003993901424109936, + "loss_iou": 0.83203125, + "loss_num": 0.0419921875, + "loss_xval": 1.875, + "num_input_tokens_seen": 47921344, + "step": 716 + }, + { + "epoch": 0.08136170212765957, + "grad_norm": 14.915495872497559, + "learning_rate": 5e-05, + "loss": 1.6379, + "num_input_tokens_seen": 47988872, + "step": 717 + }, + { + "epoch": 0.08136170212765957, + "loss": 1.7214757204055786, + "loss_ce": 0.006631943397223949, + "loss_iou": 0.76171875, + "loss_num": 0.038818359375, + "loss_xval": 1.71875, + "num_input_tokens_seen": 47988872, + "step": 717 + }, + { + "epoch": 0.08147517730496454, + "grad_norm": 9.608490943908691, + "learning_rate": 5e-05, + "loss": 1.5943, + "num_input_tokens_seen": 48055676, + "step": 718 + }, + { + "epoch": 0.08147517730496454, + "loss": 1.5301756858825684, + "loss_ce": 0.0028320499695837498, + "loss_iou": 0.6875, + "loss_num": 0.0308837890625, + "loss_xval": 1.53125, + "num_input_tokens_seen": 48055676, + "step": 718 + }, + { + "epoch": 0.0815886524822695, + "grad_norm": 16.70472526550293, + "learning_rate": 5e-05, + "loss": 1.5083, + "num_input_tokens_seen": 48123300, + "step": 719 + }, + { + "epoch": 0.0815886524822695, + "loss": 1.4887166023254395, + "loss_ce": 0.005318161100149155, + "loss_iou": 0.6640625, + "loss_num": 0.03173828125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 48123300, + "step": 719 + }, + { + "epoch": 0.08170212765957446, + "grad_norm": 18.88929557800293, + "learning_rate": 5e-05, + "loss": 1.594, + "num_input_tokens_seen": 48190748, + "step": 720 + }, + { + "epoch": 0.08170212765957446, + "loss": 1.4999183416366577, + "loss_ce": 0.005777747370302677, + "loss_iou": 0.6953125, + "loss_num": 0.0211181640625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 48190748, + "step": 720 + }, + { + "epoch": 0.08181560283687943, + "grad_norm": 15.317617416381836, + "learning_rate": 5e-05, + "loss": 1.5096, + "num_input_tokens_seen": 48257292, + "step": 721 + }, + { + "epoch": 0.08181560283687943, + "loss": 1.6792237758636475, + "loss_ce": 0.007348775863647461, + "loss_iou": 0.7265625, + "loss_num": 0.044189453125, + "loss_xval": 1.671875, + "num_input_tokens_seen": 48257292, + "step": 721 + }, + { + "epoch": 0.08192907801418439, + "grad_norm": 27.355871200561523, + "learning_rate": 5e-05, + "loss": 1.6545, + "num_input_tokens_seen": 48325244, + "step": 722 + }, + { + "epoch": 0.08192907801418439, + "loss": 1.580106258392334, + "loss_ce": 0.005887486506253481, + "loss_iou": 0.67578125, + "loss_num": 0.044189453125, + "loss_xval": 1.578125, + "num_input_tokens_seen": 48325244, + "step": 722 + }, + { + "epoch": 0.08204255319148936, + "grad_norm": 17.77208137512207, + "learning_rate": 5e-05, + "loss": 1.7525, + "num_input_tokens_seen": 48391756, + "step": 723 + }, + { + "epoch": 0.08204255319148936, + "loss": 1.665010929107666, + "loss_ce": 0.0048547666519880295, + "loss_iou": 0.7421875, + "loss_num": 0.0361328125, + "loss_xval": 1.65625, + "num_input_tokens_seen": 48391756, + "step": 723 + }, + { + "epoch": 0.08215602836879432, + "grad_norm": 25.50852394104004, + "learning_rate": 5e-05, + "loss": 1.8983, + "num_input_tokens_seen": 48458856, + "step": 724 + }, + { + "epoch": 0.08215602836879432, + "loss": 1.9236544370651245, + "loss_ce": 0.004709198605269194, + "loss_iou": 0.828125, + "loss_num": 0.052978515625, + "loss_xval": 1.921875, + "num_input_tokens_seen": 48458856, + "step": 724 + }, + { + "epoch": 0.08226950354609928, + "grad_norm": 15.623908996582031, + "learning_rate": 5e-05, + "loss": 1.4431, + "num_input_tokens_seen": 48525276, + "step": 725 + }, + { + "epoch": 0.08226950354609928, + "loss": 1.6218777894973755, + "loss_ce": 0.00664338655769825, + "loss_iou": 0.71875, + "loss_num": 0.03564453125, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 48525276, + "step": 725 + }, + { + "epoch": 0.08238297872340425, + "grad_norm": 11.794387817382812, + "learning_rate": 5e-05, + "loss": 1.4428, + "num_input_tokens_seen": 48591092, + "step": 726 + }, + { + "epoch": 0.08238297872340425, + "loss": 1.5326839685440063, + "loss_ce": 0.005340162664651871, + "loss_iou": 0.6484375, + "loss_num": 0.0458984375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 48591092, + "step": 726 + }, + { + "epoch": 0.08249645390070923, + "grad_norm": 14.238585472106934, + "learning_rate": 5e-05, + "loss": 1.3313, + "num_input_tokens_seen": 48657728, + "step": 727 + }, + { + "epoch": 0.08249645390070923, + "loss": 1.309812068939209, + "loss_ce": 0.0031713340431451797, + "loss_iou": 0.59765625, + "loss_num": 0.0220947265625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 48657728, + "step": 727 + }, + { + "epoch": 0.08260992907801419, + "grad_norm": 20.2222900390625, + "learning_rate": 5e-05, + "loss": 1.5529, + "num_input_tokens_seen": 48725744, + "step": 728 + }, + { + "epoch": 0.08260992907801419, + "loss": 1.6375668048858643, + "loss_ce": 0.0018245965475216508, + "loss_iou": 0.71484375, + "loss_num": 0.04052734375, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 48725744, + "step": 728 + }, + { + "epoch": 0.08272340425531916, + "grad_norm": 17.30886459350586, + "learning_rate": 5e-05, + "loss": 1.4905, + "num_input_tokens_seen": 48792864, + "step": 729 + }, + { + "epoch": 0.08272340425531916, + "loss": 1.6549975872039795, + "loss_ce": 0.004118672572076321, + "loss_iou": 0.6796875, + "loss_num": 0.057373046875, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 48792864, + "step": 729 + }, + { + "epoch": 0.08283687943262412, + "grad_norm": 23.5001163482666, + "learning_rate": 5e-05, + "loss": 1.509, + "num_input_tokens_seen": 48859692, + "step": 730 + }, + { + "epoch": 0.08283687943262412, + "loss": 1.4416532516479492, + "loss_ce": 0.006106395274400711, + "loss_iou": 0.625, + "loss_num": 0.036865234375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 48859692, + "step": 730 + }, + { + "epoch": 0.08295035460992908, + "grad_norm": 18.617036819458008, + "learning_rate": 5e-05, + "loss": 1.5792, + "num_input_tokens_seen": 48926172, + "step": 731 + }, + { + "epoch": 0.08295035460992908, + "loss": 1.539473533630371, + "loss_ce": 0.011397325433790684, + "loss_iou": 0.671875, + "loss_num": 0.03662109375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 48926172, + "step": 731 + }, + { + "epoch": 0.08306382978723405, + "grad_norm": 13.127779960632324, + "learning_rate": 5e-05, + "loss": 1.4595, + "num_input_tokens_seen": 48994012, + "step": 732 + }, + { + "epoch": 0.08306382978723405, + "loss": 1.5338952541351318, + "loss_ce": 0.0036217770539224148, + "loss_iou": 0.6953125, + "loss_num": 0.0281982421875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 48994012, + "step": 732 + }, + { + "epoch": 0.08317730496453901, + "grad_norm": 25.603330612182617, + "learning_rate": 5e-05, + "loss": 1.7457, + "num_input_tokens_seen": 49059504, + "step": 733 + }, + { + "epoch": 0.08317730496453901, + "loss": 1.8315131664276123, + "loss_ce": 0.00436465535312891, + "loss_iou": 0.75390625, + "loss_num": 0.0634765625, + "loss_xval": 1.828125, + "num_input_tokens_seen": 49059504, + "step": 733 + }, + { + "epoch": 0.08329078014184398, + "grad_norm": 31.105947494506836, + "learning_rate": 5e-05, + "loss": 1.7832, + "num_input_tokens_seen": 49126404, + "step": 734 + }, + { + "epoch": 0.08329078014184398, + "loss": 1.7615737915039062, + "loss_ce": 0.006691013928502798, + "loss_iou": 0.78125, + "loss_num": 0.0390625, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 49126404, + "step": 734 + }, + { + "epoch": 0.08340425531914894, + "grad_norm": 15.67389965057373, + "learning_rate": 5e-05, + "loss": 1.8833, + "num_input_tokens_seen": 49193988, + "step": 735 + }, + { + "epoch": 0.08340425531914894, + "loss": 1.874096393585205, + "loss_ce": 0.0049558174796402454, + "loss_iou": 0.8203125, + "loss_num": 0.0458984375, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 49193988, + "step": 735 + }, + { + "epoch": 0.0835177304964539, + "grad_norm": 43.83589172363281, + "learning_rate": 5e-05, + "loss": 1.5453, + "num_input_tokens_seen": 49260432, + "step": 736 + }, + { + "epoch": 0.0835177304964539, + "loss": 1.6416093111038208, + "loss_ce": 0.002937480341643095, + "loss_iou": 0.7265625, + "loss_num": 0.036865234375, + "loss_xval": 1.640625, + "num_input_tokens_seen": 49260432, + "step": 736 + }, + { + "epoch": 0.08363120567375887, + "grad_norm": 27.64581871032715, + "learning_rate": 5e-05, + "loss": 1.5786, + "num_input_tokens_seen": 49328164, + "step": 737 + }, + { + "epoch": 0.08363120567375887, + "loss": 1.4501533508300781, + "loss_ce": 0.0019111440051347017, + "loss_iou": 0.68359375, + "loss_num": 0.0164794921875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 49328164, + "step": 737 + }, + { + "epoch": 0.08374468085106383, + "grad_norm": 15.874715805053711, + "learning_rate": 5e-05, + "loss": 1.7739, + "num_input_tokens_seen": 49394920, + "step": 738 + }, + { + "epoch": 0.08374468085106383, + "loss": 1.6602613925933838, + "loss_ce": 0.005232160910964012, + "loss_iou": 0.6875, + "loss_num": 0.05517578125, + "loss_xval": 1.65625, + "num_input_tokens_seen": 49394920, + "step": 738 + }, + { + "epoch": 0.0838581560283688, + "grad_norm": 21.814470291137695, + "learning_rate": 5e-05, + "loss": 1.6566, + "num_input_tokens_seen": 49462132, + "step": 739 + }, + { + "epoch": 0.0838581560283688, + "loss": 1.4969618320465088, + "loss_ce": 0.005750866606831551, + "loss_iou": 0.66796875, + "loss_num": 0.0303955078125, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 49462132, + "step": 739 + }, + { + "epoch": 0.08397163120567376, + "grad_norm": 33.32607650756836, + "learning_rate": 5e-05, + "loss": 1.8095, + "num_input_tokens_seen": 49529224, + "step": 740 + }, + { + "epoch": 0.08397163120567376, + "loss": 1.7045317888259888, + "loss_ce": 0.007266193628311157, + "loss_iou": 0.7421875, + "loss_num": 0.04296875, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 49529224, + "step": 740 + }, + { + "epoch": 0.08408510638297872, + "grad_norm": 19.886857986450195, + "learning_rate": 5e-05, + "loss": 1.9462, + "num_input_tokens_seen": 49595808, + "step": 741 + }, + { + "epoch": 0.08408510638297872, + "loss": 2.1168980598449707, + "loss_ce": 0.0026402492076158524, + "loss_iou": 0.90625, + "loss_num": 0.060791015625, + "loss_xval": 2.109375, + "num_input_tokens_seen": 49595808, + "step": 741 + }, + { + "epoch": 0.08419858156028369, + "grad_norm": 10.396571159362793, + "learning_rate": 5e-05, + "loss": 1.4471, + "num_input_tokens_seen": 49663048, + "step": 742 + }, + { + "epoch": 0.08419858156028369, + "loss": 1.4481205940246582, + "loss_ce": 0.007202534936368465, + "loss_iou": 0.640625, + "loss_num": 0.031982421875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 49663048, + "step": 742 + }, + { + "epoch": 0.08431205673758865, + "grad_norm": 30.685224533081055, + "learning_rate": 5e-05, + "loss": 1.4894, + "num_input_tokens_seen": 49729964, + "step": 743 + }, + { + "epoch": 0.08431205673758865, + "loss": 1.476668119430542, + "loss_ce": 0.003035320434719324, + "loss_iou": 0.66015625, + "loss_num": 0.030029296875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 49729964, + "step": 743 + }, + { + "epoch": 0.08442553191489362, + "grad_norm": 21.946016311645508, + "learning_rate": 5e-05, + "loss": 2.136, + "num_input_tokens_seen": 49797640, + "step": 744 + }, + { + "epoch": 0.08442553191489362, + "loss": 2.1260461807250977, + "loss_ce": 0.002999422140419483, + "loss_iou": 0.93359375, + "loss_num": 0.050537109375, + "loss_xval": 2.125, + "num_input_tokens_seen": 49797640, + "step": 744 + }, + { + "epoch": 0.08453900709219858, + "grad_norm": 16.902101516723633, + "learning_rate": 5e-05, + "loss": 1.6968, + "num_input_tokens_seen": 49863832, + "step": 745 + }, + { + "epoch": 0.08453900709219858, + "loss": 1.7303838729858398, + "loss_ce": 0.006751071661710739, + "loss_iou": 0.76953125, + "loss_num": 0.037109375, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 49863832, + "step": 745 + }, + { + "epoch": 0.08465248226950355, + "grad_norm": 30.047975540161133, + "learning_rate": 5e-05, + "loss": 1.3745, + "num_input_tokens_seen": 49930452, + "step": 746 + }, + { + "epoch": 0.08465248226950355, + "loss": 1.334491491317749, + "loss_ce": 0.0014836408663541079, + "loss_iou": 0.61328125, + "loss_num": 0.0220947265625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 49930452, + "step": 746 + }, + { + "epoch": 0.08476595744680851, + "grad_norm": 22.1312255859375, + "learning_rate": 5e-05, + "loss": 1.6329, + "num_input_tokens_seen": 49998184, + "step": 747 + }, + { + "epoch": 0.08476595744680851, + "loss": 1.5850231647491455, + "loss_ce": 0.0034801478032022715, + "loss_iou": 0.69140625, + "loss_num": 0.039794921875, + "loss_xval": 1.578125, + "num_input_tokens_seen": 49998184, + "step": 747 + }, + { + "epoch": 0.08487943262411347, + "grad_norm": 31.97064971923828, + "learning_rate": 5e-05, + "loss": 1.6058, + "num_input_tokens_seen": 50065020, + "step": 748 + }, + { + "epoch": 0.08487943262411347, + "loss": 1.6711034774780273, + "loss_ce": 0.008994065225124359, + "loss_iou": 0.734375, + "loss_num": 0.039306640625, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 50065020, + "step": 748 + }, + { + "epoch": 0.08499290780141844, + "grad_norm": 19.90222930908203, + "learning_rate": 5e-05, + "loss": 1.8274, + "num_input_tokens_seen": 50130552, + "step": 749 + }, + { + "epoch": 0.08499290780141844, + "loss": 1.8674767017364502, + "loss_ce": 0.0032188165932893753, + "loss_iou": 0.8203125, + "loss_num": 0.04443359375, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 50130552, + "step": 749 + }, + { + "epoch": 0.0851063829787234, + "grad_norm": 13.823019981384277, + "learning_rate": 5e-05, + "loss": 1.7015, + "num_input_tokens_seen": 50196792, + "step": 750 + }, + { + "epoch": 0.0851063829787234, + "eval_seeclick_CIoU": 0.3458217531442642, + "eval_seeclick_GIoU": 0.3055790066719055, + "eval_seeclick_IoU": 0.4260387569665909, + "eval_seeclick_MAE_all": 0.1500919908285141, + "eval_seeclick_MAE_h": 0.06563599035143852, + "eval_seeclick_MAE_w": 0.14298540353775024, + "eval_seeclick_MAE_x_boxes": 0.23025017231702805, + "eval_seeclick_MAE_y_boxes": 0.1258874535560608, + "eval_seeclick_NUM_probability": 0.997836172580719, + "eval_seeclick_inside_bbox": 0.6614583432674408, + "eval_seeclick_loss": 2.6708343029022217, + "eval_seeclick_loss_ce": 0.01571960188448429, + "eval_seeclick_loss_iou": 0.96875, + "eval_seeclick_loss_num": 0.14892578125, + "eval_seeclick_loss_xval": 2.684814453125, + "eval_seeclick_runtime": 62.6481, + "eval_seeclick_samples_per_second": 0.75, + "eval_seeclick_steps_per_second": 0.032, + "num_input_tokens_seen": 50196792, + "step": 750 + }, + { + "epoch": 0.0851063829787234, + "eval_icons_CIoU": 0.47106143832206726, + "eval_icons_GIoU": 0.44230514764785767, + "eval_icons_IoU": 0.508140042424202, + "eval_icons_MAE_all": 0.13126909732818604, + "eval_icons_MAE_h": 0.12224357575178146, + "eval_icons_MAE_w": 0.10312636941671371, + "eval_icons_MAE_x_boxes": 0.10615378990769386, + "eval_icons_MAE_y_boxes": 0.09745434299111366, + "eval_icons_NUM_probability": 0.9995380938053131, + "eval_icons_inside_bbox": 0.8420138955116272, + "eval_icons_loss": 2.6187522411346436, + "eval_icons_loss_ce": 0.0018099055741913617, + "eval_icons_loss_iou": 0.99267578125, + "eval_icons_loss_num": 0.11902618408203125, + "eval_icons_loss_xval": 2.57958984375, + "eval_icons_runtime": 76.2017, + "eval_icons_samples_per_second": 0.656, + "eval_icons_steps_per_second": 0.026, + "num_input_tokens_seen": 50196792, + "step": 750 + }, + { + "epoch": 0.0851063829787234, + "eval_screenspot_CIoU": 0.3965337773164113, + "eval_screenspot_GIoU": 0.3738982478777568, + "eval_screenspot_IoU": 0.46775155266125995, + "eval_screenspot_MAE_all": 0.14087557047605515, + "eval_screenspot_MAE_h": 0.08582950383424759, + "eval_screenspot_MAE_w": 0.1655890941619873, + "eval_screenspot_MAE_x_boxes": 0.21583973368008932, + "eval_screenspot_MAE_y_boxes": 0.08186382551987965, + "eval_screenspot_NUM_probability": 0.999514659245809, + "eval_screenspot_inside_bbox": 0.6654166579246521, + "eval_screenspot_loss": 2.7550675868988037, + "eval_screenspot_loss_ce": 0.015450346594055494, + "eval_screenspot_loss_iou": 1.0030924479166667, + "eval_screenspot_loss_num": 0.14992268880208334, + "eval_screenspot_loss_xval": 2.7552083333333335, + "eval_screenspot_runtime": 115.2149, + "eval_screenspot_samples_per_second": 0.772, + "eval_screenspot_steps_per_second": 0.026, + "num_input_tokens_seen": 50196792, + "step": 750 + }, + { + "epoch": 0.0851063829787234, + "eval_compot_CIoU": 0.35109545290470123, + "eval_compot_GIoU": 0.3158632069826126, + "eval_compot_IoU": 0.4339875429868698, + "eval_compot_MAE_all": 0.14379336684942245, + "eval_compot_MAE_h": 0.07161831669509411, + "eval_compot_MAE_w": 0.13649139925837517, + "eval_compot_MAE_x_boxes": 0.1429363712668419, + "eval_compot_MAE_y_boxes": 0.15114034712314606, + "eval_compot_NUM_probability": 0.9996274411678314, + "eval_compot_inside_bbox": 0.5746527910232544, + "eval_compot_loss": 2.710007905960083, + "eval_compot_loss_ce": 0.009638047777116299, + "eval_compot_loss_iou": 1.025146484375, + "eval_compot_loss_num": 0.1433563232421875, + "eval_compot_loss_xval": 2.76611328125, + "eval_compot_runtime": 66.8146, + "eval_compot_samples_per_second": 0.748, + "eval_compot_steps_per_second": 0.03, + "num_input_tokens_seen": 50196792, + "step": 750 + }, + { + "epoch": 0.0851063829787234, + "loss": 2.8623361587524414, + "loss_ce": 0.005890795961022377, + "loss_iou": 1.03125, + "loss_num": 0.1572265625, + "loss_xval": 2.859375, + "num_input_tokens_seen": 50196792, + "step": 750 + }, + { + "epoch": 0.08521985815602837, + "grad_norm": 31.954574584960938, + "learning_rate": 5e-05, + "loss": 1.6098, + "num_input_tokens_seen": 50264152, + "step": 751 + }, + { + "epoch": 0.08521985815602837, + "loss": 1.441852331161499, + "loss_ce": 0.004352182615548372, + "loss_iou": 0.65625, + "loss_num": 0.0257568359375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 50264152, + "step": 751 + }, + { + "epoch": 0.08533333333333333, + "grad_norm": 21.067598342895508, + "learning_rate": 5e-05, + "loss": 1.9994, + "num_input_tokens_seen": 50330700, + "step": 752 + }, + { + "epoch": 0.08533333333333333, + "loss": 1.9694019556045532, + "loss_ce": 0.006511444225907326, + "loss_iou": 0.86328125, + "loss_num": 0.046630859375, + "loss_xval": 1.9609375, + "num_input_tokens_seen": 50330700, + "step": 752 + }, + { + "epoch": 0.0854468085106383, + "grad_norm": 23.862518310546875, + "learning_rate": 5e-05, + "loss": 1.4903, + "num_input_tokens_seen": 50397080, + "step": 753 + }, + { + "epoch": 0.0854468085106383, + "loss": 1.5908591747283936, + "loss_ce": 0.004921680316329002, + "loss_iou": 0.69921875, + "loss_num": 0.037109375, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 50397080, + "step": 753 + }, + { + "epoch": 0.08556028368794326, + "grad_norm": 27.110946655273438, + "learning_rate": 5e-05, + "loss": 1.8199, + "num_input_tokens_seen": 50464208, + "step": 754 + }, + { + "epoch": 0.08556028368794326, + "loss": 1.630350947380066, + "loss_ce": 0.005350962746888399, + "loss_iou": 0.73828125, + "loss_num": 0.0301513671875, + "loss_xval": 1.625, + "num_input_tokens_seen": 50464208, + "step": 754 + }, + { + "epoch": 0.08567375886524822, + "grad_norm": 21.384849548339844, + "learning_rate": 5e-05, + "loss": 1.6079, + "num_input_tokens_seen": 50531168, + "step": 755 + }, + { + "epoch": 0.08567375886524822, + "loss": 1.7542232275009155, + "loss_ce": 0.006176354363560677, + "loss_iou": 0.7734375, + "loss_num": 0.03955078125, + "loss_xval": 1.75, + "num_input_tokens_seen": 50531168, + "step": 755 + }, + { + "epoch": 0.08578723404255319, + "grad_norm": 20.25864601135254, + "learning_rate": 5e-05, + "loss": 1.4024, + "num_input_tokens_seen": 50598780, + "step": 756 + }, + { + "epoch": 0.08578723404255319, + "loss": 1.2771340608596802, + "loss_ce": 0.006137991324067116, + "loss_iou": 0.5625, + "loss_num": 0.029296875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 50598780, + "step": 756 + }, + { + "epoch": 0.08590070921985815, + "grad_norm": 20.11908531188965, + "learning_rate": 5e-05, + "loss": 1.4737, + "num_input_tokens_seen": 50666352, + "step": 757 + }, + { + "epoch": 0.08590070921985815, + "loss": 1.4460527896881104, + "loss_ce": 0.0033648000098764896, + "loss_iou": 0.6171875, + "loss_num": 0.041259765625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 50666352, + "step": 757 + }, + { + "epoch": 0.08601418439716312, + "grad_norm": 21.70969581604004, + "learning_rate": 5e-05, + "loss": 1.4715, + "num_input_tokens_seen": 50733568, + "step": 758 + }, + { + "epoch": 0.08601418439716312, + "loss": 1.5776643753051758, + "loss_ce": 0.0044222078286111355, + "loss_iou": 0.671875, + "loss_num": 0.0458984375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 50733568, + "step": 758 + }, + { + "epoch": 0.08612765957446808, + "grad_norm": 16.853792190551758, + "learning_rate": 5e-05, + "loss": 1.3654, + "num_input_tokens_seen": 50799808, + "step": 759 + }, + { + "epoch": 0.08612765957446808, + "loss": 1.3710861206054688, + "loss_ce": 0.007377680391073227, + "loss_iou": 0.625, + "loss_num": 0.0234375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 50799808, + "step": 759 + }, + { + "epoch": 0.08624113475177304, + "grad_norm": 107.35345458984375, + "learning_rate": 5e-05, + "loss": 1.6086, + "num_input_tokens_seen": 50865888, + "step": 760 + }, + { + "epoch": 0.08624113475177304, + "loss": 1.5944398641586304, + "loss_ce": 0.00703756557777524, + "loss_iou": 0.6953125, + "loss_num": 0.040283203125, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 50865888, + "step": 760 + }, + { + "epoch": 0.08635460992907801, + "grad_norm": 25.430572509765625, + "learning_rate": 5e-05, + "loss": 1.5255, + "num_input_tokens_seen": 50932980, + "step": 761 + }, + { + "epoch": 0.08635460992907801, + "loss": 1.4833720922470093, + "loss_ce": 0.0029034046456217766, + "loss_iou": 0.65234375, + "loss_num": 0.03564453125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 50932980, + "step": 761 + }, + { + "epoch": 0.08646808510638297, + "grad_norm": 32.683509826660156, + "learning_rate": 5e-05, + "loss": 1.9148, + "num_input_tokens_seen": 50999924, + "step": 762 + }, + { + "epoch": 0.08646808510638297, + "loss": 1.8358068466186523, + "loss_ce": 0.0057287756353616714, + "loss_iou": 0.8359375, + "loss_num": 0.03173828125, + "loss_xval": 1.828125, + "num_input_tokens_seen": 50999924, + "step": 762 + }, + { + "epoch": 0.08658156028368795, + "grad_norm": 13.172839164733887, + "learning_rate": 5e-05, + "loss": 1.8618, + "num_input_tokens_seen": 51067112, + "step": 763 + }, + { + "epoch": 0.08658156028368795, + "loss": 1.9397773742675781, + "loss_ce": 0.004230424761772156, + "loss_iou": 0.83203125, + "loss_num": 0.054931640625, + "loss_xval": 1.9375, + "num_input_tokens_seen": 51067112, + "step": 763 + }, + { + "epoch": 0.08669503546099291, + "grad_norm": 16.790111541748047, + "learning_rate": 5e-05, + "loss": 1.8445, + "num_input_tokens_seen": 51134104, + "step": 764 + }, + { + "epoch": 0.08669503546099291, + "loss": 1.654052734375, + "loss_ce": 0.009521461091935635, + "loss_iou": 0.73046875, + "loss_num": 0.035888671875, + "loss_xval": 1.640625, + "num_input_tokens_seen": 51134104, + "step": 764 + }, + { + "epoch": 0.08680851063829788, + "grad_norm": 25.196741104125977, + "learning_rate": 5e-05, + "loss": 1.5967, + "num_input_tokens_seen": 51201784, + "step": 765 + }, + { + "epoch": 0.08680851063829788, + "loss": 1.6444792747497559, + "loss_ce": 0.005807449109852314, + "loss_iou": 0.72265625, + "loss_num": 0.038330078125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 51201784, + "step": 765 + }, + { + "epoch": 0.08692198581560284, + "grad_norm": 16.418292999267578, + "learning_rate": 5e-05, + "loss": 1.7325, + "num_input_tokens_seen": 51268212, + "step": 766 + }, + { + "epoch": 0.08692198581560284, + "loss": 1.8376942873001099, + "loss_ce": 0.005662995856255293, + "loss_iou": 0.78125, + "loss_num": 0.05419921875, + "loss_xval": 1.828125, + "num_input_tokens_seen": 51268212, + "step": 766 + }, + { + "epoch": 0.0870354609929078, + "grad_norm": 13.571344375610352, + "learning_rate": 5e-05, + "loss": 1.4812, + "num_input_tokens_seen": 51335660, + "step": 767 + }, + { + "epoch": 0.0870354609929078, + "loss": 1.6855804920196533, + "loss_ce": 0.008822662755846977, + "loss_iou": 0.7265625, + "loss_num": 0.04443359375, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 51335660, + "step": 767 + }, + { + "epoch": 0.08714893617021277, + "grad_norm": 20.61594009399414, + "learning_rate": 5e-05, + "loss": 1.532, + "num_input_tokens_seen": 51401668, + "step": 768 + }, + { + "epoch": 0.08714893617021277, + "loss": 1.5734541416168213, + "loss_ce": 0.0027144551277160645, + "loss_iou": 0.67578125, + "loss_num": 0.04443359375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 51401668, + "step": 768 + }, + { + "epoch": 0.08726241134751773, + "grad_norm": 17.899015426635742, + "learning_rate": 5e-05, + "loss": 1.4982, + "num_input_tokens_seen": 51469000, + "step": 769 + }, + { + "epoch": 0.08726241134751773, + "loss": 1.499408483505249, + "loss_ce": 0.006244363728910685, + "loss_iou": 0.62890625, + "loss_num": 0.047119140625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 51469000, + "step": 769 + }, + { + "epoch": 0.0873758865248227, + "grad_norm": 18.31424903869629, + "learning_rate": 5e-05, + "loss": 1.6725, + "num_input_tokens_seen": 51536848, + "step": 770 + }, + { + "epoch": 0.0873758865248227, + "loss": 1.7936346530914307, + "loss_ce": 0.0026190278586000204, + "loss_iou": 0.765625, + "loss_num": 0.052001953125, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 51536848, + "step": 770 + }, + { + "epoch": 0.08748936170212766, + "grad_norm": 16.443689346313477, + "learning_rate": 5e-05, + "loss": 1.6107, + "num_input_tokens_seen": 51605028, + "step": 771 + }, + { + "epoch": 0.08748936170212766, + "loss": 1.4856681823730469, + "loss_ce": 0.005687798373401165, + "loss_iou": 0.640625, + "loss_num": 0.040283203125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 51605028, + "step": 771 + }, + { + "epoch": 0.08760283687943263, + "grad_norm": 28.619789123535156, + "learning_rate": 5e-05, + "loss": 1.3842, + "num_input_tokens_seen": 51672516, + "step": 772 + }, + { + "epoch": 0.08760283687943263, + "loss": 1.3776134252548218, + "loss_ce": 0.0026134110521525145, + "loss_iou": 0.60546875, + "loss_num": 0.03271484375, + "loss_xval": 1.375, + "num_input_tokens_seen": 51672516, + "step": 772 + }, + { + "epoch": 0.08771631205673759, + "grad_norm": 15.28627872467041, + "learning_rate": 5e-05, + "loss": 1.5755, + "num_input_tokens_seen": 51737740, + "step": 773 + }, + { + "epoch": 0.08771631205673759, + "loss": 1.450331687927246, + "loss_ce": 0.004958034493029118, + "loss_iou": 0.6015625, + "loss_num": 0.048583984375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 51737740, + "step": 773 + }, + { + "epoch": 0.08782978723404256, + "grad_norm": 18.55095863342285, + "learning_rate": 5e-05, + "loss": 1.645, + "num_input_tokens_seen": 51805056, + "step": 774 + }, + { + "epoch": 0.08782978723404256, + "loss": 1.8248589038848877, + "loss_ce": 0.009429270401597023, + "loss_iou": 0.75390625, + "loss_num": 0.060791015625, + "loss_xval": 1.8125, + "num_input_tokens_seen": 51805056, + "step": 774 + }, + { + "epoch": 0.08794326241134752, + "grad_norm": 297.16156005859375, + "learning_rate": 5e-05, + "loss": 1.5584, + "num_input_tokens_seen": 51872032, + "step": 775 + }, + { + "epoch": 0.08794326241134752, + "loss": 1.6126866340637207, + "loss_ce": 0.0033116918057203293, + "loss_iou": 0.703125, + "loss_num": 0.041259765625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 51872032, + "step": 775 + }, + { + "epoch": 0.08805673758865248, + "grad_norm": 17.563514709472656, + "learning_rate": 5e-05, + "loss": 1.767, + "num_input_tokens_seen": 51938476, + "step": 776 + }, + { + "epoch": 0.08805673758865248, + "loss": 1.7161575555801392, + "loss_ce": 0.006196639034897089, + "loss_iou": 0.73046875, + "loss_num": 0.05029296875, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 51938476, + "step": 776 + }, + { + "epoch": 0.08817021276595745, + "grad_norm": 15.906930923461914, + "learning_rate": 5e-05, + "loss": 1.3709, + "num_input_tokens_seen": 52005500, + "step": 777 + }, + { + "epoch": 0.08817021276595745, + "loss": 1.544050693511963, + "loss_ce": 0.004988114349544048, + "loss_iou": 0.6484375, + "loss_num": 0.048095703125, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 52005500, + "step": 777 + }, + { + "epoch": 0.08828368794326241, + "grad_norm": 20.823074340820312, + "learning_rate": 5e-05, + "loss": 1.7504, + "num_input_tokens_seen": 52072120, + "step": 778 + }, + { + "epoch": 0.08828368794326241, + "loss": 1.7718614339828491, + "loss_ce": 0.007212874013930559, + "loss_iou": 0.7734375, + "loss_num": 0.042724609375, + "loss_xval": 1.765625, + "num_input_tokens_seen": 52072120, + "step": 778 + }, + { + "epoch": 0.08839716312056738, + "grad_norm": 27.406944274902344, + "learning_rate": 5e-05, + "loss": 1.6182, + "num_input_tokens_seen": 52139704, + "step": 779 + }, + { + "epoch": 0.08839716312056738, + "loss": 1.9171180725097656, + "loss_ce": 0.004032071679830551, + "loss_iou": 0.84765625, + "loss_num": 0.042724609375, + "loss_xval": 1.9140625, + "num_input_tokens_seen": 52139704, + "step": 779 + }, + { + "epoch": 0.08851063829787234, + "grad_norm": 18.435264587402344, + "learning_rate": 5e-05, + "loss": 1.7326, + "num_input_tokens_seen": 52207076, + "step": 780 + }, + { + "epoch": 0.08851063829787234, + "loss": 1.6159323453903198, + "loss_ce": 0.006557329557836056, + "loss_iou": 0.7109375, + "loss_num": 0.03759765625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 52207076, + "step": 780 + }, + { + "epoch": 0.0886241134751773, + "grad_norm": 11.830121040344238, + "learning_rate": 5e-05, + "loss": 1.2875, + "num_input_tokens_seen": 52273116, + "step": 781 + }, + { + "epoch": 0.0886241134751773, + "loss": 1.179785966873169, + "loss_ce": 0.004981283098459244, + "loss_iou": 0.44921875, + "loss_num": 0.055419921875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 52273116, + "step": 781 + }, + { + "epoch": 0.08873758865248227, + "grad_norm": 49.63690185546875, + "learning_rate": 5e-05, + "loss": 1.5919, + "num_input_tokens_seen": 52340164, + "step": 782 + }, + { + "epoch": 0.08873758865248227, + "loss": 1.4546170234680176, + "loss_ce": 0.005886581260710955, + "loss_iou": 0.62890625, + "loss_num": 0.03857421875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 52340164, + "step": 782 + }, + { + "epoch": 0.08885106382978723, + "grad_norm": 18.686660766601562, + "learning_rate": 5e-05, + "loss": 1.4578, + "num_input_tokens_seen": 52408076, + "step": 783 + }, + { + "epoch": 0.08885106382978723, + "loss": 1.5704801082611084, + "loss_ce": 0.0021207660902291536, + "loss_iou": 0.6796875, + "loss_num": 0.04248046875, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 52408076, + "step": 783 + }, + { + "epoch": 0.0889645390070922, + "grad_norm": 20.594905853271484, + "learning_rate": 5e-05, + "loss": 1.4181, + "num_input_tokens_seen": 52475352, + "step": 784 + }, + { + "epoch": 0.0889645390070922, + "loss": 1.279883623123169, + "loss_ce": 0.005469520576298237, + "loss_iou": 0.53515625, + "loss_num": 0.040771484375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 52475352, + "step": 784 + }, + { + "epoch": 0.08907801418439716, + "grad_norm": 33.4904899597168, + "learning_rate": 5e-05, + "loss": 1.7938, + "num_input_tokens_seen": 52542136, + "step": 785 + }, + { + "epoch": 0.08907801418439716, + "loss": 1.9261808395385742, + "loss_ce": 0.004305732902139425, + "loss_iou": 0.859375, + "loss_num": 0.041015625, + "loss_xval": 1.921875, + "num_input_tokens_seen": 52542136, + "step": 785 + }, + { + "epoch": 0.08919148936170213, + "grad_norm": 16.518077850341797, + "learning_rate": 5e-05, + "loss": 1.7987, + "num_input_tokens_seen": 52609960, + "step": 786 + }, + { + "epoch": 0.08919148936170213, + "loss": 1.817613124847412, + "loss_ce": 0.004136563744395971, + "loss_iou": 0.82421875, + "loss_num": 0.033447265625, + "loss_xval": 1.8125, + "num_input_tokens_seen": 52609960, + "step": 786 + }, + { + "epoch": 0.08930496453900709, + "grad_norm": 14.95706844329834, + "learning_rate": 5e-05, + "loss": 1.7958, + "num_input_tokens_seen": 52676956, + "step": 787 + }, + { + "epoch": 0.08930496453900709, + "loss": 1.8679392337799072, + "loss_ce": 0.004169677384197712, + "loss_iou": 0.7890625, + "loss_num": 0.056884765625, + "loss_xval": 1.8671875, + "num_input_tokens_seen": 52676956, + "step": 787 + }, + { + "epoch": 0.08941843971631205, + "grad_norm": 18.268545150756836, + "learning_rate": 5e-05, + "loss": 1.5143, + "num_input_tokens_seen": 52743524, + "step": 788 + }, + { + "epoch": 0.08941843971631205, + "loss": 1.4463658332824707, + "loss_ce": 0.007889213971793652, + "loss_iou": 0.62109375, + "loss_num": 0.038818359375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 52743524, + "step": 788 + }, + { + "epoch": 0.08953191489361702, + "grad_norm": 20.99711799621582, + "learning_rate": 5e-05, + "loss": 1.7541, + "num_input_tokens_seen": 52810996, + "step": 789 + }, + { + "epoch": 0.08953191489361702, + "loss": 1.8711323738098145, + "loss_ce": 0.008827675133943558, + "loss_iou": 0.78125, + "loss_num": 0.05908203125, + "loss_xval": 1.859375, + "num_input_tokens_seen": 52810996, + "step": 789 + }, + { + "epoch": 0.08964539007092198, + "grad_norm": 30.979150772094727, + "learning_rate": 5e-05, + "loss": 1.6095, + "num_input_tokens_seen": 52877024, + "step": 790 + }, + { + "epoch": 0.08964539007092198, + "loss": 1.5111528635025024, + "loss_ce": 0.002363745355978608, + "loss_iou": 0.6875, + "loss_num": 0.027099609375, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 52877024, + "step": 790 + }, + { + "epoch": 0.08975886524822695, + "grad_norm": 25.440275192260742, + "learning_rate": 5e-05, + "loss": 1.836, + "num_input_tokens_seen": 52944440, + "step": 791 + }, + { + "epoch": 0.08975886524822695, + "loss": 1.9535841941833496, + "loss_ce": 0.006318553350865841, + "loss_iou": 0.83203125, + "loss_num": 0.056884765625, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 52944440, + "step": 791 + }, + { + "epoch": 0.08987234042553191, + "grad_norm": 12.426030158996582, + "learning_rate": 5e-05, + "loss": 1.4652, + "num_input_tokens_seen": 53011324, + "step": 792 + }, + { + "epoch": 0.08987234042553191, + "loss": 1.3815033435821533, + "loss_ce": 0.0024750875309109688, + "loss_iou": 0.5625, + "loss_num": 0.05029296875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 53011324, + "step": 792 + }, + { + "epoch": 0.08998581560283687, + "grad_norm": 25.717350006103516, + "learning_rate": 5e-05, + "loss": 1.4892, + "num_input_tokens_seen": 53077744, + "step": 793 + }, + { + "epoch": 0.08998581560283687, + "loss": 1.4541170597076416, + "loss_ce": 0.005874848924577236, + "loss_iou": 0.6015625, + "loss_num": 0.048095703125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 53077744, + "step": 793 + }, + { + "epoch": 0.09009929078014184, + "grad_norm": 23.216840744018555, + "learning_rate": 5e-05, + "loss": 1.7282, + "num_input_tokens_seen": 53145104, + "step": 794 + }, + { + "epoch": 0.09009929078014184, + "loss": 1.742894172668457, + "loss_ce": 0.002659738063812256, + "loss_iou": 0.7421875, + "loss_num": 0.050048828125, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 53145104, + "step": 794 + }, + { + "epoch": 0.0902127659574468, + "grad_norm": 22.56049156188965, + "learning_rate": 5e-05, + "loss": 1.4995, + "num_input_tokens_seen": 53212216, + "step": 795 + }, + { + "epoch": 0.0902127659574468, + "loss": 1.5821959972381592, + "loss_ce": 0.005047498736530542, + "loss_iou": 0.6796875, + "loss_num": 0.044189453125, + "loss_xval": 1.578125, + "num_input_tokens_seen": 53212216, + "step": 795 + }, + { + "epoch": 0.09032624113475177, + "grad_norm": 18.144521713256836, + "learning_rate": 5e-05, + "loss": 1.6556, + "num_input_tokens_seen": 53277184, + "step": 796 + }, + { + "epoch": 0.09032624113475177, + "loss": 1.6488068103790283, + "loss_ce": 0.01208798959851265, + "loss_iou": 0.703125, + "loss_num": 0.046630859375, + "loss_xval": 1.640625, + "num_input_tokens_seen": 53277184, + "step": 796 + }, + { + "epoch": 0.09043971631205673, + "grad_norm": 96.45632934570312, + "learning_rate": 5e-05, + "loss": 1.5055, + "num_input_tokens_seen": 53344080, + "step": 797 + }, + { + "epoch": 0.09043971631205673, + "loss": 1.4548461437225342, + "loss_ce": 0.005627441219985485, + "loss_iou": 0.59375, + "loss_num": 0.05224609375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 53344080, + "step": 797 + }, + { + "epoch": 0.09055319148936171, + "grad_norm": 23.396320343017578, + "learning_rate": 5e-05, + "loss": 1.6745, + "num_input_tokens_seen": 53411636, + "step": 798 + }, + { + "epoch": 0.09055319148936171, + "loss": 1.6814558506011963, + "loss_ce": 0.009580906480550766, + "loss_iou": 0.71484375, + "loss_num": 0.048095703125, + "loss_xval": 1.671875, + "num_input_tokens_seen": 53411636, + "step": 798 + }, + { + "epoch": 0.09066666666666667, + "grad_norm": 17.971681594848633, + "learning_rate": 5e-05, + "loss": 1.5637, + "num_input_tokens_seen": 53478980, + "step": 799 + }, + { + "epoch": 0.09066666666666667, + "loss": 1.5615715980529785, + "loss_ce": 0.004381589125841856, + "loss_iou": 0.6484375, + "loss_num": 0.052001953125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 53478980, + "step": 799 + }, + { + "epoch": 0.09078014184397164, + "grad_norm": 17.585458755493164, + "learning_rate": 5e-05, + "loss": 1.6275, + "num_input_tokens_seen": 53546332, + "step": 800 + }, + { + "epoch": 0.09078014184397164, + "loss": 1.5641915798187256, + "loss_ce": 0.0046212137676775455, + "loss_iou": 0.6796875, + "loss_num": 0.03955078125, + "loss_xval": 1.5625, + "num_input_tokens_seen": 53546332, + "step": 800 + }, + { + "epoch": 0.0908936170212766, + "grad_norm": 31.67969512939453, + "learning_rate": 5e-05, + "loss": 1.4374, + "num_input_tokens_seen": 53611268, + "step": 801 + }, + { + "epoch": 0.0908936170212766, + "loss": 1.5715404748916626, + "loss_ce": 0.005134223960340023, + "loss_iou": 0.69140625, + "loss_num": 0.0361328125, + "loss_xval": 1.5625, + "num_input_tokens_seen": 53611268, + "step": 801 + }, + { + "epoch": 0.09100709219858157, + "grad_norm": 42.907127380371094, + "learning_rate": 5e-05, + "loss": 1.956, + "num_input_tokens_seen": 53677744, + "step": 802 + }, + { + "epoch": 0.09100709219858157, + "loss": 1.8957459926605225, + "loss_ce": 0.004144371021538973, + "loss_iou": 0.8125, + "loss_num": 0.052734375, + "loss_xval": 1.890625, + "num_input_tokens_seen": 53677744, + "step": 802 + }, + { + "epoch": 0.09112056737588653, + "grad_norm": 20.152509689331055, + "learning_rate": 5e-05, + "loss": 1.3844, + "num_input_tokens_seen": 53744736, + "step": 803 + }, + { + "epoch": 0.09112056737588653, + "loss": 1.381718397140503, + "loss_ce": 0.004765315912663937, + "loss_iou": 0.625, + "loss_num": 0.0255126953125, + "loss_xval": 1.375, + "num_input_tokens_seen": 53744736, + "step": 803 + }, + { + "epoch": 0.0912340425531915, + "grad_norm": 40.77336120605469, + "learning_rate": 5e-05, + "loss": 1.7435, + "num_input_tokens_seen": 53812168, + "step": 804 + }, + { + "epoch": 0.0912340425531915, + "loss": 1.638660192489624, + "loss_ce": 0.0019414651906117797, + "loss_iou": 0.72265625, + "loss_num": 0.03759765625, + "loss_xval": 1.640625, + "num_input_tokens_seen": 53812168, + "step": 804 + }, + { + "epoch": 0.09134751773049646, + "grad_norm": 16.956029891967773, + "learning_rate": 5e-05, + "loss": 1.8306, + "num_input_tokens_seen": 53879428, + "step": 805 + }, + { + "epoch": 0.09134751773049646, + "loss": 1.8310602903366089, + "loss_ce": 0.004888429772108793, + "loss_iou": 0.79296875, + "loss_num": 0.0478515625, + "loss_xval": 1.828125, + "num_input_tokens_seen": 53879428, + "step": 805 + }, + { + "epoch": 0.09146099290780142, + "grad_norm": 20.882156372070312, + "learning_rate": 5e-05, + "loss": 1.7159, + "num_input_tokens_seen": 53946596, + "step": 806 + }, + { + "epoch": 0.09146099290780142, + "loss": 1.4161516427993774, + "loss_ce": 0.00892513059079647, + "loss_iou": 0.6171875, + "loss_num": 0.034423828125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 53946596, + "step": 806 + }, + { + "epoch": 0.09157446808510639, + "grad_norm": 31.85503387451172, + "learning_rate": 5e-05, + "loss": 1.486, + "num_input_tokens_seen": 54013708, + "step": 807 + }, + { + "epoch": 0.09157446808510639, + "loss": 1.3896002769470215, + "loss_ce": 0.0048346747644245625, + "loss_iou": 0.59375, + "loss_num": 0.039306640625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 54013708, + "step": 807 + }, + { + "epoch": 0.09168794326241135, + "grad_norm": 56.699317932128906, + "learning_rate": 5e-05, + "loss": 1.6016, + "num_input_tokens_seen": 54081740, + "step": 808 + }, + { + "epoch": 0.09168794326241135, + "loss": 1.5749431848526, + "loss_ce": 0.0036540974397212267, + "loss_iou": 0.703125, + "loss_num": 0.033935546875, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 54081740, + "step": 808 + }, + { + "epoch": 0.09180141843971631, + "grad_norm": 91.795654296875, + "learning_rate": 5e-05, + "loss": 1.8093, + "num_input_tokens_seen": 54148156, + "step": 809 + }, + { + "epoch": 0.09180141843971631, + "loss": 1.8489314317703247, + "loss_ce": 0.004204841330647469, + "loss_iou": 0.78515625, + "loss_num": 0.054931640625, + "loss_xval": 1.84375, + "num_input_tokens_seen": 54148156, + "step": 809 + }, + { + "epoch": 0.09191489361702128, + "grad_norm": 18.249204635620117, + "learning_rate": 5e-05, + "loss": 1.9335, + "num_input_tokens_seen": 54214472, + "step": 810 + }, + { + "epoch": 0.09191489361702128, + "loss": 1.8565943241119385, + "loss_ce": 0.00600834097713232, + "loss_iou": 0.82421875, + "loss_num": 0.0400390625, + "loss_xval": 1.8515625, + "num_input_tokens_seen": 54214472, + "step": 810 + }, + { + "epoch": 0.09202836879432624, + "grad_norm": 21.967472076416016, + "learning_rate": 5e-05, + "loss": 1.4951, + "num_input_tokens_seen": 54281784, + "step": 811 + }, + { + "epoch": 0.09202836879432624, + "loss": 1.3790892362594604, + "loss_ce": 0.0050658052787184715, + "loss_iou": 0.640625, + "loss_num": 0.018310546875, + "loss_xval": 1.375, + "num_input_tokens_seen": 54281784, + "step": 811 + }, + { + "epoch": 0.09214184397163121, + "grad_norm": 33.2818489074707, + "learning_rate": 5e-05, + "loss": 1.743, + "num_input_tokens_seen": 54349708, + "step": 812 + }, + { + "epoch": 0.09214184397163121, + "loss": 1.5917787551879883, + "loss_ce": 0.006817752029746771, + "loss_iou": 0.71484375, + "loss_num": 0.031494140625, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 54349708, + "step": 812 + }, + { + "epoch": 0.09225531914893617, + "grad_norm": 19.342716217041016, + "learning_rate": 5e-05, + "loss": 1.8352, + "num_input_tokens_seen": 54416316, + "step": 813 + }, + { + "epoch": 0.09225531914893617, + "loss": 1.9130656719207764, + "loss_ce": 0.005839050747454166, + "loss_iou": 0.83203125, + "loss_num": 0.048828125, + "loss_xval": 1.90625, + "num_input_tokens_seen": 54416316, + "step": 813 + }, + { + "epoch": 0.09236879432624114, + "grad_norm": 13.55836009979248, + "learning_rate": 5e-05, + "loss": 1.4809, + "num_input_tokens_seen": 54483464, + "step": 814 + }, + { + "epoch": 0.09236879432624114, + "loss": 1.6139030456542969, + "loss_ce": 0.002574947662651539, + "loss_iou": 0.69921875, + "loss_num": 0.042236328125, + "loss_xval": 1.609375, + "num_input_tokens_seen": 54483464, + "step": 814 + }, + { + "epoch": 0.0924822695035461, + "grad_norm": 19.430496215820312, + "learning_rate": 5e-05, + "loss": 1.2848, + "num_input_tokens_seen": 54550668, + "step": 815 + }, + { + "epoch": 0.0924822695035461, + "loss": 1.3044404983520508, + "loss_ce": 0.010006858967244625, + "loss_iou": 0.5703125, + "loss_num": 0.03076171875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 54550668, + "step": 815 + }, + { + "epoch": 0.09259574468085106, + "grad_norm": 23.241497039794922, + "learning_rate": 5e-05, + "loss": 1.6463, + "num_input_tokens_seen": 54617716, + "step": 816 + }, + { + "epoch": 0.09259574468085106, + "loss": 1.4167349338531494, + "loss_ce": 0.004625573754310608, + "loss_iou": 0.5859375, + "loss_num": 0.047607421875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 54617716, + "step": 816 + }, + { + "epoch": 0.09270921985815603, + "grad_norm": 27.60972023010254, + "learning_rate": 5e-05, + "loss": 1.3969, + "num_input_tokens_seen": 54684152, + "step": 817 + }, + { + "epoch": 0.09270921985815603, + "loss": 1.308793544769287, + "loss_ce": 0.002366553293541074, + "loss_iou": 0.5546875, + "loss_num": 0.040283203125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 54684152, + "step": 817 + }, + { + "epoch": 0.09282269503546099, + "grad_norm": 17.119647979736328, + "learning_rate": 5e-05, + "loss": 1.3524, + "num_input_tokens_seen": 54750848, + "step": 818 + }, + { + "epoch": 0.09282269503546099, + "loss": 1.3363196849822998, + "loss_ce": 0.008194759488105774, + "loss_iou": 0.59765625, + "loss_num": 0.026123046875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 54750848, + "step": 818 + }, + { + "epoch": 0.09293617021276596, + "grad_norm": 26.20050048828125, + "learning_rate": 5e-05, + "loss": 1.611, + "num_input_tokens_seen": 54817616, + "step": 819 + }, + { + "epoch": 0.09293617021276596, + "loss": 1.6719894409179688, + "loss_ce": 0.00743860425427556, + "loss_iou": 0.6953125, + "loss_num": 0.05419921875, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 54817616, + "step": 819 + }, + { + "epoch": 0.09304964539007092, + "grad_norm": 29.775102615356445, + "learning_rate": 5e-05, + "loss": 1.764, + "num_input_tokens_seen": 54885392, + "step": 820 + }, + { + "epoch": 0.09304964539007092, + "loss": 1.9835455417633057, + "loss_ce": 0.006006523035466671, + "loss_iou": 0.8671875, + "loss_num": 0.04833984375, + "loss_xval": 1.9765625, + "num_input_tokens_seen": 54885392, + "step": 820 + }, + { + "epoch": 0.09316312056737588, + "grad_norm": 22.858633041381836, + "learning_rate": 5e-05, + "loss": 1.6432, + "num_input_tokens_seen": 54952640, + "step": 821 + }, + { + "epoch": 0.09316312056737588, + "loss": 1.6680117845535278, + "loss_ce": 0.005902391858398914, + "loss_iou": 0.7109375, + "loss_num": 0.047607421875, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 54952640, + "step": 821 + }, + { + "epoch": 0.09327659574468085, + "grad_norm": 33.157325744628906, + "learning_rate": 5e-05, + "loss": 1.7225, + "num_input_tokens_seen": 55019160, + "step": 822 + }, + { + "epoch": 0.09327659574468085, + "loss": 1.6897547245025635, + "loss_ce": 0.004207851365208626, + "loss_iou": 0.71484375, + "loss_num": 0.05078125, + "loss_xval": 1.6875, + "num_input_tokens_seen": 55019160, + "step": 822 + }, + { + "epoch": 0.09339007092198581, + "grad_norm": 17.0765323638916, + "learning_rate": 5e-05, + "loss": 1.4395, + "num_input_tokens_seen": 55085900, + "step": 823 + }, + { + "epoch": 0.09339007092198581, + "loss": 1.536466360092163, + "loss_ce": 0.003263133578002453, + "loss_iou": 0.63671875, + "loss_num": 0.052734375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 55085900, + "step": 823 + }, + { + "epoch": 0.09350354609929078, + "grad_norm": 21.59145164489746, + "learning_rate": 5e-05, + "loss": 1.6422, + "num_input_tokens_seen": 55153376, + "step": 824 + }, + { + "epoch": 0.09350354609929078, + "loss": 1.4936800003051758, + "loss_ce": 0.002469028811901808, + "loss_iou": 0.66796875, + "loss_num": 0.031005859375, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 55153376, + "step": 824 + }, + { + "epoch": 0.09361702127659574, + "grad_norm": 41.78168869018555, + "learning_rate": 5e-05, + "loss": 1.6876, + "num_input_tokens_seen": 55221424, + "step": 825 + }, + { + "epoch": 0.09361702127659574, + "loss": 1.7759054899215698, + "loss_ce": 0.006374244578182697, + "loss_iou": 0.734375, + "loss_num": 0.060791015625, + "loss_xval": 1.765625, + "num_input_tokens_seen": 55221424, + "step": 825 + }, + { + "epoch": 0.0937304964539007, + "grad_norm": 12.205693244934082, + "learning_rate": 5e-05, + "loss": 2.0094, + "num_input_tokens_seen": 55287656, + "step": 826 + }, + { + "epoch": 0.0937304964539007, + "loss": 2.0648109912872314, + "loss_ce": 0.009146813303232193, + "loss_iou": 0.875, + "loss_num": 0.060546875, + "loss_xval": 2.0625, + "num_input_tokens_seen": 55287656, + "step": 826 + }, + { + "epoch": 0.09384397163120567, + "grad_norm": 25.003089904785156, + "learning_rate": 5e-05, + "loss": 1.6687, + "num_input_tokens_seen": 55355840, + "step": 827 + }, + { + "epoch": 0.09384397163120567, + "loss": 1.8172005414962769, + "loss_ce": 0.004700571298599243, + "loss_iou": 0.78515625, + "loss_num": 0.048583984375, + "loss_xval": 1.8125, + "num_input_tokens_seen": 55355840, + "step": 827 + }, + { + "epoch": 0.09395744680851063, + "grad_norm": 12.09736156463623, + "learning_rate": 5e-05, + "loss": 1.603, + "num_input_tokens_seen": 55423060, + "step": 828 + }, + { + "epoch": 0.09395744680851063, + "loss": 1.6144505739212036, + "loss_ce": 0.0060522109270095825, + "loss_iou": 0.6875, + "loss_num": 0.0458984375, + "loss_xval": 1.609375, + "num_input_tokens_seen": 55423060, + "step": 828 + }, + { + "epoch": 0.0940709219858156, + "grad_norm": 24.894054412841797, + "learning_rate": 5e-05, + "loss": 1.6026, + "num_input_tokens_seen": 55488272, + "step": 829 + }, + { + "epoch": 0.0940709219858156, + "loss": 1.4918451309204102, + "loss_ce": 0.003075604559853673, + "loss_iou": 0.60546875, + "loss_num": 0.054931640625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 55488272, + "step": 829 + }, + { + "epoch": 0.09418439716312056, + "grad_norm": 20.9389705657959, + "learning_rate": 5e-05, + "loss": 1.9407, + "num_input_tokens_seen": 55556092, + "step": 830 + }, + { + "epoch": 0.09418439716312056, + "loss": 1.9161694049835205, + "loss_ce": 0.006989741697907448, + "loss_iou": 0.84765625, + "loss_num": 0.04296875, + "loss_xval": 1.90625, + "num_input_tokens_seen": 55556092, + "step": 830 + }, + { + "epoch": 0.09429787234042553, + "grad_norm": 17.03272247314453, + "learning_rate": 5e-05, + "loss": 1.6077, + "num_input_tokens_seen": 55624164, + "step": 831 + }, + { + "epoch": 0.09429787234042553, + "loss": 1.483842134475708, + "loss_ce": 0.007279572542756796, + "loss_iou": 0.640625, + "loss_num": 0.03955078125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 55624164, + "step": 831 + }, + { + "epoch": 0.09441134751773049, + "grad_norm": 28.95868682861328, + "learning_rate": 5e-05, + "loss": 1.5372, + "num_input_tokens_seen": 55691916, + "step": 832 + }, + { + "epoch": 0.09441134751773049, + "loss": 1.4816548824310303, + "loss_ce": 0.0031392446253448725, + "loss_iou": 0.6484375, + "loss_num": 0.036865234375, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 55691916, + "step": 832 + }, + { + "epoch": 0.09452482269503545, + "grad_norm": 32.22027587890625, + "learning_rate": 5e-05, + "loss": 1.7496, + "num_input_tokens_seen": 55758232, + "step": 833 + }, + { + "epoch": 0.09452482269503545, + "loss": 1.6144227981567383, + "loss_ce": 0.011395509354770184, + "loss_iou": 0.63671875, + "loss_num": 0.0654296875, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 55758232, + "step": 833 + }, + { + "epoch": 0.09463829787234043, + "grad_norm": 23.651836395263672, + "learning_rate": 5e-05, + "loss": 1.4498, + "num_input_tokens_seen": 55824204, + "step": 834 + }, + { + "epoch": 0.09463829787234043, + "loss": 1.4191510677337646, + "loss_ce": 0.0016705549787729979, + "loss_iou": 0.609375, + "loss_num": 0.0390625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 55824204, + "step": 834 + }, + { + "epoch": 0.0947517730496454, + "grad_norm": 49.97290802001953, + "learning_rate": 5e-05, + "loss": 1.8156, + "num_input_tokens_seen": 55891028, + "step": 835 + }, + { + "epoch": 0.0947517730496454, + "loss": 1.7861336469650269, + "loss_ce": 0.008789919316768646, + "loss_iou": 0.75, + "loss_num": 0.054931640625, + "loss_xval": 1.78125, + "num_input_tokens_seen": 55891028, + "step": 835 + }, + { + "epoch": 0.09486524822695036, + "grad_norm": 23.137989044189453, + "learning_rate": 5e-05, + "loss": 1.4204, + "num_input_tokens_seen": 55956904, + "step": 836 + }, + { + "epoch": 0.09486524822695036, + "loss": 1.3993959426879883, + "loss_ce": 0.0024232580326497555, + "loss_iou": 0.6171875, + "loss_num": 0.031982421875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 55956904, + "step": 836 + }, + { + "epoch": 0.09497872340425532, + "grad_norm": 18.25508689880371, + "learning_rate": 5e-05, + "loss": 1.6392, + "num_input_tokens_seen": 56023608, + "step": 837 + }, + { + "epoch": 0.09497872340425532, + "loss": 1.7368724346160889, + "loss_ce": 0.005427085794508457, + "loss_iou": 0.74609375, + "loss_num": 0.04833984375, + "loss_xval": 1.734375, + "num_input_tokens_seen": 56023608, + "step": 837 + }, + { + "epoch": 0.09509219858156029, + "grad_norm": 17.95037841796875, + "learning_rate": 5e-05, + "loss": 1.4017, + "num_input_tokens_seen": 56089908, + "step": 838 + }, + { + "epoch": 0.09509219858156029, + "loss": 1.291303277015686, + "loss_ce": 0.00565876392647624, + "loss_iou": 0.5546875, + "loss_num": 0.03564453125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 56089908, + "step": 838 + }, + { + "epoch": 0.09520567375886525, + "grad_norm": 30.767669677734375, + "learning_rate": 5e-05, + "loss": 1.5926, + "num_input_tokens_seen": 56157720, + "step": 839 + }, + { + "epoch": 0.09520567375886525, + "loss": 1.7276768684387207, + "loss_ce": 0.008926905691623688, + "loss_iou": 0.7109375, + "loss_num": 0.058837890625, + "loss_xval": 1.71875, + "num_input_tokens_seen": 56157720, + "step": 839 + }, + { + "epoch": 0.09531914893617022, + "grad_norm": 20.84957504272461, + "learning_rate": 5e-05, + "loss": 1.9966, + "num_input_tokens_seen": 56224764, + "step": 840 + }, + { + "epoch": 0.09531914893617022, + "loss": 2.1027610301971436, + "loss_ce": 0.0090109808370471, + "loss_iou": 0.90234375, + "loss_num": 0.057373046875, + "loss_xval": 2.09375, + "num_input_tokens_seen": 56224764, + "step": 840 + }, + { + "epoch": 0.09543262411347518, + "grad_norm": 18.69204330444336, + "learning_rate": 5e-05, + "loss": 1.4662, + "num_input_tokens_seen": 56292172, + "step": 841 + }, + { + "epoch": 0.09543262411347518, + "loss": 1.3895002603530884, + "loss_ce": 0.006687708664685488, + "loss_iou": 0.6328125, + "loss_num": 0.023681640625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 56292172, + "step": 841 + }, + { + "epoch": 0.09554609929078015, + "grad_norm": 27.299137115478516, + "learning_rate": 5e-05, + "loss": 1.3396, + "num_input_tokens_seen": 56358388, + "step": 842 + }, + { + "epoch": 0.09554609929078015, + "loss": 1.4001953601837158, + "loss_ce": 0.00566408084705472, + "loss_iou": 0.61328125, + "loss_num": 0.033203125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 56358388, + "step": 842 + }, + { + "epoch": 0.09565957446808511, + "grad_norm": 21.42784881591797, + "learning_rate": 5e-05, + "loss": 1.7548, + "num_input_tokens_seen": 56425588, + "step": 843 + }, + { + "epoch": 0.09565957446808511, + "loss": 1.8288178443908691, + "loss_ce": 0.005575614981353283, + "loss_iou": 0.81640625, + "loss_num": 0.0380859375, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 56425588, + "step": 843 + }, + { + "epoch": 0.09577304964539007, + "grad_norm": 27.521541595458984, + "learning_rate": 5e-05, + "loss": 1.4779, + "num_input_tokens_seen": 56494136, + "step": 844 + }, + { + "epoch": 0.09577304964539007, + "loss": 1.5311412811279297, + "loss_ce": 0.0028209530282765627, + "loss_iou": 0.671875, + "loss_num": 0.0361328125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 56494136, + "step": 844 + }, + { + "epoch": 0.09588652482269504, + "grad_norm": 17.3616943359375, + "learning_rate": 5e-05, + "loss": 1.5568, + "num_input_tokens_seen": 56561468, + "step": 845 + }, + { + "epoch": 0.09588652482269504, + "loss": 1.6194829940795898, + "loss_ce": 0.007178194355219603, + "loss_iou": 0.6484375, + "loss_num": 0.06298828125, + "loss_xval": 1.609375, + "num_input_tokens_seen": 56561468, + "step": 845 + }, + { + "epoch": 0.096, + "grad_norm": 11.454914093017578, + "learning_rate": 5e-05, + "loss": 1.4343, + "num_input_tokens_seen": 56628028, + "step": 846 + }, + { + "epoch": 0.096, + "loss": 1.4420384168624878, + "loss_ce": 0.003073640400543809, + "loss_iou": 0.60546875, + "loss_num": 0.04541015625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 56628028, + "step": 846 + }, + { + "epoch": 0.09611347517730497, + "grad_norm": 20.56400489807129, + "learning_rate": 5e-05, + "loss": 1.4272, + "num_input_tokens_seen": 56694876, + "step": 847 + }, + { + "epoch": 0.09611347517730497, + "loss": 1.4293917417526245, + "loss_ce": 0.001657345099374652, + "loss_iou": 0.625, + "loss_num": 0.034912109375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 56694876, + "step": 847 + }, + { + "epoch": 0.09622695035460993, + "grad_norm": 90.2083740234375, + "learning_rate": 5e-05, + "loss": 1.7606, + "num_input_tokens_seen": 56761612, + "step": 848 + }, + { + "epoch": 0.09622695035460993, + "loss": 1.675736904144287, + "loss_ce": 0.00483839912340045, + "loss_iou": 0.73046875, + "loss_num": 0.041748046875, + "loss_xval": 1.671875, + "num_input_tokens_seen": 56761612, + "step": 848 + }, + { + "epoch": 0.0963404255319149, + "grad_norm": 20.990720748901367, + "learning_rate": 5e-05, + "loss": 1.6111, + "num_input_tokens_seen": 56828352, + "step": 849 + }, + { + "epoch": 0.0963404255319149, + "loss": 1.744335651397705, + "loss_ce": 0.006054351106286049, + "loss_iou": 0.734375, + "loss_num": 0.053466796875, + "loss_xval": 1.734375, + "num_input_tokens_seen": 56828352, + "step": 849 + }, + { + "epoch": 0.09645390070921986, + "grad_norm": 32.14729309082031, + "learning_rate": 5e-05, + "loss": 1.5696, + "num_input_tokens_seen": 56896448, + "step": 850 + }, + { + "epoch": 0.09645390070921986, + "loss": 1.6601343154907227, + "loss_ce": 0.00632573664188385, + "loss_iou": 0.6796875, + "loss_num": 0.05859375, + "loss_xval": 1.65625, + "num_input_tokens_seen": 56896448, + "step": 850 + }, + { + "epoch": 0.09656737588652482, + "grad_norm": 23.01508903503418, + "learning_rate": 5e-05, + "loss": 1.8103, + "num_input_tokens_seen": 56964724, + "step": 851 + }, + { + "epoch": 0.09656737588652482, + "loss": 1.6989471912384033, + "loss_ce": 0.005587805062532425, + "loss_iou": 0.74609375, + "loss_num": 0.040771484375, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 56964724, + "step": 851 + }, + { + "epoch": 0.09668085106382979, + "grad_norm": 33.06578063964844, + "learning_rate": 5e-05, + "loss": 1.3147, + "num_input_tokens_seen": 57032300, + "step": 852 + }, + { + "epoch": 0.09668085106382979, + "loss": 1.4392099380493164, + "loss_ce": 0.005127930082380772, + "loss_iou": 0.6328125, + "loss_num": 0.03369140625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 57032300, + "step": 852 + }, + { + "epoch": 0.09679432624113475, + "grad_norm": 22.462114334106445, + "learning_rate": 5e-05, + "loss": 1.8455, + "num_input_tokens_seen": 57098512, + "step": 853 + }, + { + "epoch": 0.09679432624113475, + "loss": 1.9380261898040771, + "loss_ce": 0.0073620630428195, + "loss_iou": 0.83203125, + "loss_num": 0.052978515625, + "loss_xval": 1.9296875, + "num_input_tokens_seen": 57098512, + "step": 853 + }, + { + "epoch": 0.09690780141843972, + "grad_norm": 22.937379837036133, + "learning_rate": 5e-05, + "loss": 1.5375, + "num_input_tokens_seen": 57165868, + "step": 854 + }, + { + "epoch": 0.09690780141843972, + "loss": 1.5897343158721924, + "loss_ce": 0.0037967469543218613, + "loss_iou": 0.6953125, + "loss_num": 0.039794921875, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 57165868, + "step": 854 + }, + { + "epoch": 0.09702127659574468, + "grad_norm": 44.4610710144043, + "learning_rate": 5e-05, + "loss": 1.5181, + "num_input_tokens_seen": 57232032, + "step": 855 + }, + { + "epoch": 0.09702127659574468, + "loss": 1.4594223499298096, + "loss_ce": 0.002391199581325054, + "loss_iou": 0.62890625, + "loss_num": 0.0400390625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 57232032, + "step": 855 + }, + { + "epoch": 0.09713475177304964, + "grad_norm": 24.804996490478516, + "learning_rate": 5e-05, + "loss": 1.9102, + "num_input_tokens_seen": 57298764, + "step": 856 + }, + { + "epoch": 0.09713475177304964, + "loss": 1.8566228151321411, + "loss_ce": 0.006036859005689621, + "loss_iou": 0.83984375, + "loss_num": 0.03466796875, + "loss_xval": 1.8515625, + "num_input_tokens_seen": 57298764, + "step": 856 + }, + { + "epoch": 0.09724822695035461, + "grad_norm": 16.310985565185547, + "learning_rate": 5e-05, + "loss": 1.7579, + "num_input_tokens_seen": 57365608, + "step": 857 + }, + { + "epoch": 0.09724822695035461, + "loss": 1.8433642387390137, + "loss_ce": 0.007426684722304344, + "loss_iou": 0.81640625, + "loss_num": 0.041259765625, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 57365608, + "step": 857 + }, + { + "epoch": 0.09736170212765957, + "grad_norm": 16.020423889160156, + "learning_rate": 5e-05, + "loss": 1.3447, + "num_input_tokens_seen": 57431588, + "step": 858 + }, + { + "epoch": 0.09736170212765957, + "loss": 1.3095628023147583, + "loss_ce": 0.0073167006485164165, + "loss_iou": 0.56640625, + "loss_num": 0.034423828125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 57431588, + "step": 858 + }, + { + "epoch": 0.09747517730496454, + "grad_norm": 36.05369186401367, + "learning_rate": 5e-05, + "loss": 1.5119, + "num_input_tokens_seen": 57498700, + "step": 859 + }, + { + "epoch": 0.09747517730496454, + "loss": 1.267701268196106, + "loss_ce": 0.004029359668493271, + "loss_iou": 0.55078125, + "loss_num": 0.033203125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 57498700, + "step": 859 + }, + { + "epoch": 0.0975886524822695, + "grad_norm": 18.75484275817871, + "learning_rate": 5e-05, + "loss": 1.7625, + "num_input_tokens_seen": 57565216, + "step": 860 + }, + { + "epoch": 0.0975886524822695, + "loss": 1.6322083473205566, + "loss_ce": 0.006231869570910931, + "loss_iou": 0.75, + "loss_num": 0.0263671875, + "loss_xval": 1.625, + "num_input_tokens_seen": 57565216, + "step": 860 + }, + { + "epoch": 0.09770212765957446, + "grad_norm": 27.873546600341797, + "learning_rate": 5e-05, + "loss": 1.6826, + "num_input_tokens_seen": 57633536, + "step": 861 + }, + { + "epoch": 0.09770212765957446, + "loss": 1.5805628299713135, + "loss_ce": 0.005367561709135771, + "loss_iou": 0.69921875, + "loss_num": 0.035400390625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 57633536, + "step": 861 + }, + { + "epoch": 0.09781560283687943, + "grad_norm": 18.129547119140625, + "learning_rate": 5e-05, + "loss": 1.5439, + "num_input_tokens_seen": 57700248, + "step": 862 + }, + { + "epoch": 0.09781560283687943, + "loss": 1.5701566934585571, + "loss_ce": 0.008145034313201904, + "loss_iou": 0.65234375, + "loss_num": 0.0517578125, + "loss_xval": 1.5625, + "num_input_tokens_seen": 57700248, + "step": 862 + }, + { + "epoch": 0.09792907801418439, + "grad_norm": 20.90782356262207, + "learning_rate": 5e-05, + "loss": 1.4136, + "num_input_tokens_seen": 57767920, + "step": 863 + }, + { + "epoch": 0.09792907801418439, + "loss": 1.3815139532089233, + "loss_ce": 0.005537385120987892, + "loss_iou": 0.59375, + "loss_num": 0.03857421875, + "loss_xval": 1.375, + "num_input_tokens_seen": 57767920, + "step": 863 + }, + { + "epoch": 0.09804255319148936, + "grad_norm": 17.702007293701172, + "learning_rate": 5e-05, + "loss": 1.5074, + "num_input_tokens_seen": 57835176, + "step": 864 + }, + { + "epoch": 0.09804255319148936, + "loss": 1.407436490058899, + "loss_ce": 0.004116173833608627, + "loss_iou": 0.62890625, + "loss_num": 0.0296630859375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 57835176, + "step": 864 + }, + { + "epoch": 0.09815602836879432, + "grad_norm": 35.0365104675293, + "learning_rate": 5e-05, + "loss": 1.3945, + "num_input_tokens_seen": 57901800, + "step": 865 + }, + { + "epoch": 0.09815602836879432, + "loss": 1.3120355606079102, + "loss_ce": 0.0014886398566886783, + "loss_iou": 0.5390625, + "loss_num": 0.046875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 57901800, + "step": 865 + }, + { + "epoch": 0.09826950354609928, + "grad_norm": 18.39856719970703, + "learning_rate": 5e-05, + "loss": 1.6643, + "num_input_tokens_seen": 57969932, + "step": 866 + }, + { + "epoch": 0.09826950354609928, + "loss": 1.6784346103668213, + "loss_ce": 0.007536145392805338, + "loss_iou": 0.7421875, + "loss_num": 0.036865234375, + "loss_xval": 1.671875, + "num_input_tokens_seen": 57969932, + "step": 866 + }, + { + "epoch": 0.09838297872340425, + "grad_norm": 17.748214721679688, + "learning_rate": 5e-05, + "loss": 1.47, + "num_input_tokens_seen": 58037784, + "step": 867 + }, + { + "epoch": 0.09838297872340425, + "loss": 1.5890501737594604, + "loss_ce": 0.008972043171525002, + "loss_iou": 0.6875, + "loss_num": 0.040283203125, + "loss_xval": 1.578125, + "num_input_tokens_seen": 58037784, + "step": 867 + }, + { + "epoch": 0.09849645390070921, + "grad_norm": 31.895732879638672, + "learning_rate": 5e-05, + "loss": 1.5606, + "num_input_tokens_seen": 58103644, + "step": 868 + }, + { + "epoch": 0.09849645390070921, + "loss": 1.4891254901885986, + "loss_ce": 0.008290433324873447, + "loss_iou": 0.609375, + "loss_num": 0.052490234375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 58103644, + "step": 868 + }, + { + "epoch": 0.09860992907801419, + "grad_norm": 19.790748596191406, + "learning_rate": 5e-05, + "loss": 1.731, + "num_input_tokens_seen": 58171596, + "step": 869 + }, + { + "epoch": 0.09860992907801419, + "loss": 1.6499003171920776, + "loss_ce": 0.004880791995674372, + "loss_iou": 0.73828125, + "loss_num": 0.033203125, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 58171596, + "step": 869 + }, + { + "epoch": 0.09872340425531916, + "grad_norm": 17.478063583374023, + "learning_rate": 5e-05, + "loss": 1.343, + "num_input_tokens_seen": 58238872, + "step": 870 + }, + { + "epoch": 0.09872340425531916, + "loss": 1.3896980285644531, + "loss_ce": 0.00493237841874361, + "loss_iou": 0.62109375, + "loss_num": 0.0291748046875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 58238872, + "step": 870 + }, + { + "epoch": 0.09883687943262412, + "grad_norm": 31.929929733276367, + "learning_rate": 5e-05, + "loss": 1.5758, + "num_input_tokens_seen": 58304644, + "step": 871 + }, + { + "epoch": 0.09883687943262412, + "loss": 1.5036602020263672, + "loss_ce": 0.007322383113205433, + "loss_iou": 0.6484375, + "loss_num": 0.039794921875, + "loss_xval": 1.5, + "num_input_tokens_seen": 58304644, + "step": 871 + }, + { + "epoch": 0.09895035460992908, + "grad_norm": 42.264564514160156, + "learning_rate": 5e-05, + "loss": 1.7255, + "num_input_tokens_seen": 58371108, + "step": 872 + }, + { + "epoch": 0.09895035460992908, + "loss": 1.6640915870666504, + "loss_ce": 0.001982156652957201, + "loss_iou": 0.7421875, + "loss_num": 0.034912109375, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 58371108, + "step": 872 + }, + { + "epoch": 0.09906382978723405, + "grad_norm": 13.602814674377441, + "learning_rate": 5e-05, + "loss": 1.494, + "num_input_tokens_seen": 58437224, + "step": 873 + }, + { + "epoch": 0.09906382978723405, + "loss": 1.4920806884765625, + "loss_ce": 0.005752479191869497, + "loss_iou": 0.65625, + "loss_num": 0.03515625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 58437224, + "step": 873 + }, + { + "epoch": 0.09917730496453901, + "grad_norm": 20.358558654785156, + "learning_rate": 5e-05, + "loss": 1.491, + "num_input_tokens_seen": 58503820, + "step": 874 + }, + { + "epoch": 0.09917730496453901, + "loss": 1.4415106773376465, + "loss_ce": 0.004498881753534079, + "loss_iou": 0.640625, + "loss_num": 0.0311279296875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 58503820, + "step": 874 + }, + { + "epoch": 0.09929078014184398, + "grad_norm": 23.66045570373535, + "learning_rate": 5e-05, + "loss": 1.8752, + "num_input_tokens_seen": 58570788, + "step": 875 + }, + { + "epoch": 0.09929078014184398, + "loss": 1.835862398147583, + "loss_ce": 0.010667141526937485, + "loss_iou": 0.76171875, + "loss_num": 0.0595703125, + "loss_xval": 1.828125, + "num_input_tokens_seen": 58570788, + "step": 875 + }, + { + "epoch": 0.09940425531914894, + "grad_norm": 24.66349983215332, + "learning_rate": 5e-05, + "loss": 1.4438, + "num_input_tokens_seen": 58637036, + "step": 876 + }, + { + "epoch": 0.09940425531914894, + "loss": 1.5261316299438477, + "loss_ce": 0.006600457709282637, + "loss_iou": 0.64453125, + "loss_num": 0.046875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 58637036, + "step": 876 + }, + { + "epoch": 0.0995177304964539, + "grad_norm": 35.523162841796875, + "learning_rate": 5e-05, + "loss": 1.7707, + "num_input_tokens_seen": 58703888, + "step": 877 + }, + { + "epoch": 0.0995177304964539, + "loss": 1.8051576614379883, + "loss_ce": 0.0073061538860201836, + "loss_iou": 0.83203125, + "loss_num": 0.0264892578125, + "loss_xval": 1.796875, + "num_input_tokens_seen": 58703888, + "step": 877 + }, + { + "epoch": 0.09963120567375887, + "grad_norm": 17.925031661987305, + "learning_rate": 5e-05, + "loss": 1.9316, + "num_input_tokens_seen": 58770996, + "step": 878 + }, + { + "epoch": 0.09963120567375887, + "loss": 1.9970366954803467, + "loss_ce": 0.0038725549820810556, + "loss_iou": 0.88671875, + "loss_num": 0.043212890625, + "loss_xval": 1.9921875, + "num_input_tokens_seen": 58770996, + "step": 878 + }, + { + "epoch": 0.09974468085106383, + "grad_norm": 19.882434844970703, + "learning_rate": 5e-05, + "loss": 1.4878, + "num_input_tokens_seen": 58836860, + "step": 879 + }, + { + "epoch": 0.09974468085106383, + "loss": 1.5017132759094238, + "loss_ce": 0.006107820197939873, + "loss_iou": 0.66796875, + "loss_num": 0.032470703125, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 58836860, + "step": 879 + }, + { + "epoch": 0.0998581560283688, + "grad_norm": 30.313718795776367, + "learning_rate": 5e-05, + "loss": 1.6811, + "num_input_tokens_seen": 58903712, + "step": 880 + }, + { + "epoch": 0.0998581560283688, + "loss": 1.620743751525879, + "loss_ce": 0.006485968828201294, + "loss_iou": 0.71484375, + "loss_num": 0.037353515625, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 58903712, + "step": 880 + }, + { + "epoch": 0.09997163120567376, + "grad_norm": 69.32795715332031, + "learning_rate": 5e-05, + "loss": 1.716, + "num_input_tokens_seen": 58969852, + "step": 881 + }, + { + "epoch": 0.09997163120567376, + "loss": 1.7467480897903442, + "loss_ce": 0.00602547824382782, + "loss_iou": 0.74609375, + "loss_num": 0.04931640625, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 58969852, + "step": 881 + }, + { + "epoch": 0.10008510638297873, + "grad_norm": 28.238597869873047, + "learning_rate": 5e-05, + "loss": 1.4939, + "num_input_tokens_seen": 59036988, + "step": 882 + }, + { + "epoch": 0.10008510638297873, + "loss": 1.6332108974456787, + "loss_ce": 0.005281249992549419, + "loss_iou": 0.6796875, + "loss_num": 0.0537109375, + "loss_xval": 1.625, + "num_input_tokens_seen": 59036988, + "step": 882 + }, + { + "epoch": 0.10019858156028369, + "grad_norm": 46.2175178527832, + "learning_rate": 5e-05, + "loss": 1.8545, + "num_input_tokens_seen": 59103536, + "step": 883 + }, + { + "epoch": 0.10019858156028369, + "loss": 1.806476354598999, + "loss_ce": 0.002399187535047531, + "loss_iou": 0.78125, + "loss_num": 0.048095703125, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 59103536, + "step": 883 + }, + { + "epoch": 0.10031205673758865, + "grad_norm": 18.944171905517578, + "learning_rate": 5e-05, + "loss": 1.4668, + "num_input_tokens_seen": 59170904, + "step": 884 + }, + { + "epoch": 0.10031205673758865, + "loss": 1.222984790802002, + "loss_ce": 0.005699696019291878, + "loss_iou": 0.546875, + "loss_num": 0.0252685546875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 59170904, + "step": 884 + }, + { + "epoch": 0.10042553191489362, + "grad_norm": 23.227313995361328, + "learning_rate": 5e-05, + "loss": 1.4485, + "num_input_tokens_seen": 59237704, + "step": 885 + }, + { + "epoch": 0.10042553191489362, + "loss": 1.503526210784912, + "loss_ce": 0.006455935537815094, + "loss_iou": 0.66015625, + "loss_num": 0.035400390625, + "loss_xval": 1.5, + "num_input_tokens_seen": 59237704, + "step": 885 + }, + { + "epoch": 0.10053900709219858, + "grad_norm": 39.93471145629883, + "learning_rate": 5e-05, + "loss": 1.775, + "num_input_tokens_seen": 59304720, + "step": 886 + }, + { + "epoch": 0.10053900709219858, + "loss": 1.8191564083099365, + "loss_ce": 0.0047032698057591915, + "loss_iou": 0.78125, + "loss_num": 0.05078125, + "loss_xval": 1.8125, + "num_input_tokens_seen": 59304720, + "step": 886 + }, + { + "epoch": 0.10065248226950355, + "grad_norm": 15.724457740783691, + "learning_rate": 5e-05, + "loss": 1.7058, + "num_input_tokens_seen": 59372044, + "step": 887 + }, + { + "epoch": 0.10065248226950355, + "loss": 1.5735723972320557, + "loss_ce": 0.005823398008942604, + "loss_iou": 0.6640625, + "loss_num": 0.04833984375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 59372044, + "step": 887 + }, + { + "epoch": 0.10076595744680851, + "grad_norm": 23.819143295288086, + "learning_rate": 5e-05, + "loss": 1.3838, + "num_input_tokens_seen": 59438660, + "step": 888 + }, + { + "epoch": 0.10076595744680851, + "loss": 1.3511333465576172, + "loss_ce": 0.004941939376294613, + "loss_iou": 0.59765625, + "loss_num": 0.0299072265625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 59438660, + "step": 888 + }, + { + "epoch": 0.10087943262411347, + "grad_norm": 18.92305564880371, + "learning_rate": 5e-05, + "loss": 1.502, + "num_input_tokens_seen": 59506848, + "step": 889 + }, + { + "epoch": 0.10087943262411347, + "loss": 1.4711493253707886, + "loss_ce": 0.007282140664756298, + "loss_iou": 0.6328125, + "loss_num": 0.04052734375, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 59506848, + "step": 889 + }, + { + "epoch": 0.10099290780141844, + "grad_norm": 18.321765899658203, + "learning_rate": 5e-05, + "loss": 1.7653, + "num_input_tokens_seen": 59573396, + "step": 890 + }, + { + "epoch": 0.10099290780141844, + "loss": 1.8513023853302002, + "loss_ce": 0.008528854697942734, + "loss_iou": 0.796875, + "loss_num": 0.050537109375, + "loss_xval": 1.84375, + "num_input_tokens_seen": 59573396, + "step": 890 + }, + { + "epoch": 0.1011063829787234, + "grad_norm": 28.603822708129883, + "learning_rate": 5e-05, + "loss": 1.4613, + "num_input_tokens_seen": 59640428, + "step": 891 + }, + { + "epoch": 0.1011063829787234, + "loss": 1.6286158561706543, + "loss_ce": 0.0036158296279609203, + "loss_iou": 0.6953125, + "loss_num": 0.047607421875, + "loss_xval": 1.625, + "num_input_tokens_seen": 59640428, + "step": 891 + }, + { + "epoch": 0.10121985815602837, + "grad_norm": 20.802648544311523, + "learning_rate": 5e-05, + "loss": 1.8469, + "num_input_tokens_seen": 59708252, + "step": 892 + }, + { + "epoch": 0.10121985815602837, + "loss": 1.7525858879089355, + "loss_ce": 0.0045389593578875065, + "loss_iou": 0.75390625, + "loss_num": 0.04833984375, + "loss_xval": 1.75, + "num_input_tokens_seen": 59708252, + "step": 892 + }, + { + "epoch": 0.10133333333333333, + "grad_norm": 12.153145790100098, + "learning_rate": 5e-05, + "loss": 1.3654, + "num_input_tokens_seen": 59775264, + "step": 893 + }, + { + "epoch": 0.10133333333333333, + "loss": 1.4545581340789795, + "loss_ce": 0.006804255302995443, + "loss_iou": 0.609375, + "loss_num": 0.0458984375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 59775264, + "step": 893 + }, + { + "epoch": 0.1014468085106383, + "grad_norm": 16.20708465576172, + "learning_rate": 5e-05, + "loss": 1.3842, + "num_input_tokens_seen": 59840544, + "step": 894 + }, + { + "epoch": 0.1014468085106383, + "loss": 1.5896589756011963, + "loss_ce": 0.003233234863728285, + "loss_iou": 0.6328125, + "loss_num": 0.06396484375, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 59840544, + "step": 894 + }, + { + "epoch": 0.10156028368794326, + "grad_norm": 31.086328506469727, + "learning_rate": 5e-05, + "loss": 1.8729, + "num_input_tokens_seen": 59907844, + "step": 895 + }, + { + "epoch": 0.10156028368794326, + "loss": 1.9398385286331177, + "loss_ce": 0.003315161680802703, + "loss_iou": 0.83203125, + "loss_num": 0.05419921875, + "loss_xval": 1.9375, + "num_input_tokens_seen": 59907844, + "step": 895 + }, + { + "epoch": 0.10167375886524822, + "grad_norm": 15.264491081237793, + "learning_rate": 5e-05, + "loss": 1.8068, + "num_input_tokens_seen": 59974436, + "step": 896 + }, + { + "epoch": 0.10167375886524822, + "loss": 1.8533861637115479, + "loss_ce": 0.009636146016418934, + "loss_iou": 0.796875, + "loss_num": 0.0498046875, + "loss_xval": 1.84375, + "num_input_tokens_seen": 59974436, + "step": 896 + }, + { + "epoch": 0.10178723404255319, + "grad_norm": 23.90875816345215, + "learning_rate": 5e-05, + "loss": 1.6968, + "num_input_tokens_seen": 60041488, + "step": 897 + }, + { + "epoch": 0.10178723404255319, + "loss": 1.9278690814971924, + "loss_ce": 0.004040875472128391, + "loss_iou": 0.796875, + "loss_num": 0.0654296875, + "loss_xval": 1.921875, + "num_input_tokens_seen": 60041488, + "step": 897 + }, + { + "epoch": 0.10190070921985815, + "grad_norm": 44.71595764160156, + "learning_rate": 5e-05, + "loss": 1.7998, + "num_input_tokens_seen": 60107976, + "step": 898 + }, + { + "epoch": 0.10190070921985815, + "loss": 1.729783296585083, + "loss_ce": 0.00712708430364728, + "loss_iou": 0.7734375, + "loss_num": 0.0341796875, + "loss_xval": 1.71875, + "num_input_tokens_seen": 60107976, + "step": 898 + }, + { + "epoch": 0.10201418439716312, + "grad_norm": 81.52040100097656, + "learning_rate": 5e-05, + "loss": 1.8992, + "num_input_tokens_seen": 60174672, + "step": 899 + }, + { + "epoch": 0.10201418439716312, + "loss": 1.9470680952072144, + "loss_ce": 0.0046853357926011086, + "loss_iou": 0.8515625, + "loss_num": 0.048095703125, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 60174672, + "step": 899 + }, + { + "epoch": 0.10212765957446808, + "grad_norm": 21.822711944580078, + "learning_rate": 5e-05, + "loss": 1.5359, + "num_input_tokens_seen": 60241224, + "step": 900 + }, + { + "epoch": 0.10212765957446808, + "loss": 1.6798338890075684, + "loss_ce": 0.004052627366036177, + "loss_iou": 0.71875, + "loss_num": 0.048583984375, + "loss_xval": 1.671875, + "num_input_tokens_seen": 60241224, + "step": 900 + }, + { + "epoch": 0.10224113475177304, + "grad_norm": 17.455562591552734, + "learning_rate": 5e-05, + "loss": 1.5284, + "num_input_tokens_seen": 60308592, + "step": 901 + }, + { + "epoch": 0.10224113475177304, + "loss": 1.463973045349121, + "loss_ce": 0.004012002144008875, + "loss_iou": 0.640625, + "loss_num": 0.03466796875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 60308592, + "step": 901 + }, + { + "epoch": 0.10235460992907801, + "grad_norm": 26.125574111938477, + "learning_rate": 5e-05, + "loss": 1.5979, + "num_input_tokens_seen": 60375548, + "step": 902 + }, + { + "epoch": 0.10235460992907801, + "loss": 1.707105278968811, + "loss_ce": 0.003980295732617378, + "loss_iou": 0.76953125, + "loss_num": 0.03271484375, + "loss_xval": 1.703125, + "num_input_tokens_seen": 60375548, + "step": 902 + }, + { + "epoch": 0.10246808510638297, + "grad_norm": 23.547773361206055, + "learning_rate": 5e-05, + "loss": 1.4909, + "num_input_tokens_seen": 60442536, + "step": 903 + }, + { + "epoch": 0.10246808510638297, + "loss": 1.3742583990097046, + "loss_ce": 0.006094346754252911, + "loss_iou": 0.59765625, + "loss_num": 0.033935546875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 60442536, + "step": 903 + }, + { + "epoch": 0.10258156028368794, + "grad_norm": 28.357574462890625, + "learning_rate": 5e-05, + "loss": 1.8217, + "num_input_tokens_seen": 60509844, + "step": 904 + }, + { + "epoch": 0.10258156028368794, + "loss": 1.843782901763916, + "loss_ce": 0.006868917495012283, + "loss_iou": 0.81640625, + "loss_num": 0.040283203125, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 60509844, + "step": 904 + }, + { + "epoch": 0.10269503546099291, + "grad_norm": 19.474498748779297, + "learning_rate": 5e-05, + "loss": 1.7138, + "num_input_tokens_seen": 60576852, + "step": 905 + }, + { + "epoch": 0.10269503546099291, + "loss": 1.692380428314209, + "loss_ce": 0.006345225498080254, + "loss_iou": 0.74609375, + "loss_num": 0.0390625, + "loss_xval": 1.6875, + "num_input_tokens_seen": 60576852, + "step": 905 + }, + { + "epoch": 0.10280851063829788, + "grad_norm": 17.649824142456055, + "learning_rate": 5e-05, + "loss": 1.301, + "num_input_tokens_seen": 60642556, + "step": 906 + }, + { + "epoch": 0.10280851063829788, + "loss": 1.356646180152893, + "loss_ce": 0.007525085471570492, + "loss_iou": 0.5703125, + "loss_num": 0.04150390625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 60642556, + "step": 906 + }, + { + "epoch": 0.10292198581560284, + "grad_norm": 17.78717613220215, + "learning_rate": 5e-05, + "loss": 1.4931, + "num_input_tokens_seen": 60709152, + "step": 907 + }, + { + "epoch": 0.10292198581560284, + "loss": 1.443891167640686, + "loss_ce": 0.009320788085460663, + "loss_iou": 0.59765625, + "loss_num": 0.0478515625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 60709152, + "step": 907 + }, + { + "epoch": 0.1030354609929078, + "grad_norm": 21.2817325592041, + "learning_rate": 5e-05, + "loss": 1.3966, + "num_input_tokens_seen": 60776532, + "step": 908 + }, + { + "epoch": 0.1030354609929078, + "loss": 1.5401628017425537, + "loss_ce": 0.0074479239992797375, + "loss_iou": 0.65234375, + "loss_num": 0.0458984375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 60776532, + "step": 908 + }, + { + "epoch": 0.10314893617021277, + "grad_norm": 20.051555633544922, + "learning_rate": 5e-05, + "loss": 1.765, + "num_input_tokens_seen": 60843812, + "step": 909 + }, + { + "epoch": 0.10314893617021277, + "loss": 1.5653290748596191, + "loss_ce": 0.006735365837812424, + "loss_iou": 0.68359375, + "loss_num": 0.03759765625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 60843812, + "step": 909 + }, + { + "epoch": 0.10326241134751774, + "grad_norm": 20.177181243896484, + "learning_rate": 5e-05, + "loss": 1.516, + "num_input_tokens_seen": 60909764, + "step": 910 + }, + { + "epoch": 0.10326241134751774, + "loss": 1.4303252696990967, + "loss_ce": 0.001614270033314824, + "loss_iou": 0.640625, + "loss_num": 0.0302734375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 60909764, + "step": 910 + }, + { + "epoch": 0.1033758865248227, + "grad_norm": 20.424039840698242, + "learning_rate": 5e-05, + "loss": 1.504, + "num_input_tokens_seen": 60977756, + "step": 911 + }, + { + "epoch": 0.1033758865248227, + "loss": 1.4884953498840332, + "loss_ce": 0.009491406381130219, + "loss_iou": 0.65234375, + "loss_num": 0.034423828125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 60977756, + "step": 911 + }, + { + "epoch": 0.10348936170212766, + "grad_norm": 19.784936904907227, + "learning_rate": 5e-05, + "loss": 1.6217, + "num_input_tokens_seen": 61045876, + "step": 912 + }, + { + "epoch": 0.10348936170212766, + "loss": 1.5146785974502563, + "loss_ce": 0.009795820340514183, + "loss_iou": 0.65234375, + "loss_num": 0.040283203125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 61045876, + "step": 912 + }, + { + "epoch": 0.10360283687943263, + "grad_norm": 31.849010467529297, + "learning_rate": 5e-05, + "loss": 1.606, + "num_input_tokens_seen": 61113296, + "step": 913 + }, + { + "epoch": 0.10360283687943263, + "loss": 1.6054539680480957, + "loss_ce": 0.003891567699611187, + "loss_iou": 0.6953125, + "loss_num": 0.042724609375, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 61113296, + "step": 913 + }, + { + "epoch": 0.10371631205673759, + "grad_norm": 19.350252151489258, + "learning_rate": 5e-05, + "loss": 1.6115, + "num_input_tokens_seen": 61179292, + "step": 914 + }, + { + "epoch": 0.10371631205673759, + "loss": 1.4388675689697266, + "loss_ce": 0.005273893475532532, + "loss_iou": 0.6484375, + "loss_num": 0.027587890625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 61179292, + "step": 914 + }, + { + "epoch": 0.10382978723404256, + "grad_norm": 40.16206359863281, + "learning_rate": 5e-05, + "loss": 1.4624, + "num_input_tokens_seen": 61246348, + "step": 915 + }, + { + "epoch": 0.10382978723404256, + "loss": 1.5928733348846436, + "loss_ce": 0.006935757584869862, + "loss_iou": 0.67578125, + "loss_num": 0.047607421875, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 61246348, + "step": 915 + }, + { + "epoch": 0.10394326241134752, + "grad_norm": 18.587316513061523, + "learning_rate": 5e-05, + "loss": 1.4811, + "num_input_tokens_seen": 61313476, + "step": 916 + }, + { + "epoch": 0.10394326241134752, + "loss": 1.4110052585601807, + "loss_ce": 0.003290377324447036, + "loss_iou": 0.640625, + "loss_num": 0.0242919921875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 61313476, + "step": 916 + }, + { + "epoch": 0.10405673758865248, + "grad_norm": 36.84904479980469, + "learning_rate": 5e-05, + "loss": 1.4973, + "num_input_tokens_seen": 61380748, + "step": 917 + }, + { + "epoch": 0.10405673758865248, + "loss": 1.3304578065872192, + "loss_ce": 0.007215614430606365, + "loss_iou": 0.5546875, + "loss_num": 0.0419921875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 61380748, + "step": 917 + }, + { + "epoch": 0.10417021276595745, + "grad_norm": 25.050100326538086, + "learning_rate": 5e-05, + "loss": 1.7588, + "num_input_tokens_seen": 61447444, + "step": 918 + }, + { + "epoch": 0.10417021276595745, + "loss": 1.7537875175476074, + "loss_ce": 0.0047640311531722546, + "loss_iou": 0.7265625, + "loss_num": 0.0595703125, + "loss_xval": 1.75, + "num_input_tokens_seen": 61447444, + "step": 918 + }, + { + "epoch": 0.10428368794326241, + "grad_norm": 11.57437801361084, + "learning_rate": 5e-05, + "loss": 1.4041, + "num_input_tokens_seen": 61514272, + "step": 919 + }, + { + "epoch": 0.10428368794326241, + "loss": 1.428722858428955, + "loss_ce": 0.006847892422229052, + "loss_iou": 0.6015625, + "loss_num": 0.04345703125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 61514272, + "step": 919 + }, + { + "epoch": 0.10439716312056738, + "grad_norm": 16.0411376953125, + "learning_rate": 5e-05, + "loss": 1.3599, + "num_input_tokens_seen": 61581544, + "step": 920 + }, + { + "epoch": 0.10439716312056738, + "loss": 1.2842106819152832, + "loss_ce": 0.004913900047540665, + "loss_iou": 0.5703125, + "loss_num": 0.0272216796875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 61581544, + "step": 920 + }, + { + "epoch": 0.10451063829787234, + "grad_norm": 20.891162872314453, + "learning_rate": 5e-05, + "loss": 1.3397, + "num_input_tokens_seen": 61647892, + "step": 921 + }, + { + "epoch": 0.10451063829787234, + "loss": 1.3594539165496826, + "loss_ce": 0.003985092509537935, + "loss_iou": 0.609375, + "loss_num": 0.02734375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 61647892, + "step": 921 + }, + { + "epoch": 0.1046241134751773, + "grad_norm": 43.27165985107422, + "learning_rate": 5e-05, + "loss": 1.5181, + "num_input_tokens_seen": 61714572, + "step": 922 + }, + { + "epoch": 0.1046241134751773, + "loss": 1.409366250038147, + "loss_ce": 0.006534216459840536, + "loss_iou": 0.609375, + "loss_num": 0.037109375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 61714572, + "step": 922 + }, + { + "epoch": 0.10473758865248227, + "grad_norm": 21.234527587890625, + "learning_rate": 5e-05, + "loss": 1.6637, + "num_input_tokens_seen": 61781564, + "step": 923 + }, + { + "epoch": 0.10473758865248227, + "loss": 1.6533092260360718, + "loss_ce": 0.0029185800813138485, + "loss_iou": 0.734375, + "loss_num": 0.035400390625, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 61781564, + "step": 923 + }, + { + "epoch": 0.10485106382978723, + "grad_norm": 20.68224334716797, + "learning_rate": 5e-05, + "loss": 1.497, + "num_input_tokens_seen": 61849492, + "step": 924 + }, + { + "epoch": 0.10485106382978723, + "loss": 1.5301393270492554, + "loss_ce": 0.004748648963868618, + "loss_iou": 0.671875, + "loss_num": 0.036376953125, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 61849492, + "step": 924 + }, + { + "epoch": 0.1049645390070922, + "grad_norm": 36.09031677246094, + "learning_rate": 5e-05, + "loss": 1.4923, + "num_input_tokens_seen": 61916836, + "step": 925 + }, + { + "epoch": 0.1049645390070922, + "loss": 1.5014861822128296, + "loss_ce": 0.0034393721725791693, + "loss_iou": 0.65234375, + "loss_num": 0.03857421875, + "loss_xval": 1.5, + "num_input_tokens_seen": 61916836, + "step": 925 + }, + { + "epoch": 0.10507801418439716, + "grad_norm": 26.162826538085938, + "learning_rate": 5e-05, + "loss": 1.7171, + "num_input_tokens_seen": 61982832, + "step": 926 + }, + { + "epoch": 0.10507801418439716, + "loss": 1.935099720954895, + "loss_ce": 0.004435625858604908, + "loss_iou": 0.83984375, + "loss_num": 0.05029296875, + "loss_xval": 1.9296875, + "num_input_tokens_seen": 61982832, + "step": 926 + }, + { + "epoch": 0.10519148936170213, + "grad_norm": 15.766121864318848, + "learning_rate": 5e-05, + "loss": 1.519, + "num_input_tokens_seen": 62049532, + "step": 927 + }, + { + "epoch": 0.10519148936170213, + "loss": 1.705784797668457, + "loss_ce": 0.0026598733384162188, + "loss_iou": 0.74609375, + "loss_num": 0.042724609375, + "loss_xval": 1.703125, + "num_input_tokens_seen": 62049532, + "step": 927 + }, + { + "epoch": 0.10530496453900709, + "grad_norm": 25.66825294494629, + "learning_rate": 5e-05, + "loss": 1.5247, + "num_input_tokens_seen": 62116520, + "step": 928 + }, + { + "epoch": 0.10530496453900709, + "loss": 1.598768711090088, + "loss_ce": 0.0064836833626031876, + "loss_iou": 0.66015625, + "loss_num": 0.0546875, + "loss_xval": 1.59375, + "num_input_tokens_seen": 62116520, + "step": 928 + }, + { + "epoch": 0.10541843971631205, + "grad_norm": 22.701265335083008, + "learning_rate": 5e-05, + "loss": 1.3767, + "num_input_tokens_seen": 62183600, + "step": 929 + }, + { + "epoch": 0.10541843971631205, + "loss": 1.6332752704620361, + "loss_ce": 0.006322155706584454, + "loss_iou": 0.7109375, + "loss_num": 0.04150390625, + "loss_xval": 1.625, + "num_input_tokens_seen": 62183600, + "step": 929 + }, + { + "epoch": 0.10553191489361702, + "grad_norm": 11.599271774291992, + "learning_rate": 5e-05, + "loss": 1.3993, + "num_input_tokens_seen": 62250204, + "step": 930 + }, + { + "epoch": 0.10553191489361702, + "loss": 1.5190402269363403, + "loss_ce": 0.007321394048631191, + "loss_iou": 0.6328125, + "loss_num": 0.04931640625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 62250204, + "step": 930 + }, + { + "epoch": 0.10564539007092198, + "grad_norm": 24.060415267944336, + "learning_rate": 5e-05, + "loss": 1.6156, + "num_input_tokens_seen": 62317688, + "step": 931 + }, + { + "epoch": 0.10564539007092198, + "loss": 1.6038862466812134, + "loss_ce": 0.0023237625136971474, + "loss_iou": 0.6796875, + "loss_num": 0.0478515625, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 62317688, + "step": 931 + }, + { + "epoch": 0.10575886524822695, + "grad_norm": 19.49755096435547, + "learning_rate": 5e-05, + "loss": 1.6032, + "num_input_tokens_seen": 62384084, + "step": 932 + }, + { + "epoch": 0.10575886524822695, + "loss": 1.5829253196716309, + "loss_ce": 0.0028472470585256815, + "loss_iou": 0.67578125, + "loss_num": 0.04541015625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 62384084, + "step": 932 + }, + { + "epoch": 0.10587234042553191, + "grad_norm": 20.87383460998535, + "learning_rate": 5e-05, + "loss": 1.392, + "num_input_tokens_seen": 62450272, + "step": 933 + }, + { + "epoch": 0.10587234042553191, + "loss": 1.3373322486877441, + "loss_ce": 0.0033478960394859314, + "loss_iou": 0.5859375, + "loss_num": 0.032958984375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 62450272, + "step": 933 + }, + { + "epoch": 0.10598581560283687, + "grad_norm": 21.897951126098633, + "learning_rate": 5e-05, + "loss": 1.6462, + "num_input_tokens_seen": 62517000, + "step": 934 + }, + { + "epoch": 0.10598581560283687, + "loss": 1.6351348161697388, + "loss_ce": 0.006716935429722071, + "loss_iou": 0.75, + "loss_num": 0.02587890625, + "loss_xval": 1.625, + "num_input_tokens_seen": 62517000, + "step": 934 + }, + { + "epoch": 0.10609929078014184, + "grad_norm": 21.751996994018555, + "learning_rate": 5e-05, + "loss": 1.5129, + "num_input_tokens_seen": 62583000, + "step": 935 + }, + { + "epoch": 0.10609929078014184, + "loss": 1.4547611474990845, + "loss_ce": 0.00554237887263298, + "loss_iou": 0.625, + "loss_num": 0.03955078125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 62583000, + "step": 935 + }, + { + "epoch": 0.1062127659574468, + "grad_norm": 34.9748649597168, + "learning_rate": 5e-05, + "loss": 1.515, + "num_input_tokens_seen": 62649876, + "step": 936 + }, + { + "epoch": 0.1062127659574468, + "loss": 1.373112440109253, + "loss_ce": 0.0054367464035749435, + "loss_iou": 0.5859375, + "loss_num": 0.039306640625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 62649876, + "step": 936 + }, + { + "epoch": 0.10632624113475177, + "grad_norm": 24.05430793762207, + "learning_rate": 5e-05, + "loss": 1.4672, + "num_input_tokens_seen": 62716064, + "step": 937 + }, + { + "epoch": 0.10632624113475177, + "loss": 1.5588011741638184, + "loss_ce": 0.0041136713698506355, + "loss_iou": 0.65234375, + "loss_num": 0.049560546875, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 62716064, + "step": 937 + }, + { + "epoch": 0.10643971631205673, + "grad_norm": 26.99909782409668, + "learning_rate": 5e-05, + "loss": 1.3748, + "num_input_tokens_seen": 62782500, + "step": 938 + }, + { + "epoch": 0.10643971631205673, + "loss": 1.373105764389038, + "loss_ce": 0.005918153561651707, + "loss_iou": 0.62109375, + "loss_num": 0.025390625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 62782500, + "step": 938 + }, + { + "epoch": 0.1065531914893617, + "grad_norm": 15.031723976135254, + "learning_rate": 5e-05, + "loss": 1.9221, + "num_input_tokens_seen": 62848696, + "step": 939 + }, + { + "epoch": 0.1065531914893617, + "loss": 1.8377063274383545, + "loss_ce": 0.007628265302628279, + "loss_iou": 0.78125, + "loss_num": 0.052734375, + "loss_xval": 1.828125, + "num_input_tokens_seen": 62848696, + "step": 939 + }, + { + "epoch": 0.10666666666666667, + "grad_norm": 14.655712127685547, + "learning_rate": 5e-05, + "loss": 1.6606, + "num_input_tokens_seen": 62917564, + "step": 940 + }, + { + "epoch": 0.10666666666666667, + "loss": 1.6020830869674683, + "loss_ce": 0.0054034097120165825, + "loss_iou": 0.69140625, + "loss_num": 0.04345703125, + "loss_xval": 1.59375, + "num_input_tokens_seen": 62917564, + "step": 940 + }, + { + "epoch": 0.10678014184397164, + "grad_norm": 17.54239273071289, + "learning_rate": 5e-05, + "loss": 1.4331, + "num_input_tokens_seen": 62984508, + "step": 941 + }, + { + "epoch": 0.10678014184397164, + "loss": 1.3016602993011475, + "loss_ce": 0.0038086981512606144, + "loss_iou": 0.57421875, + "loss_num": 0.030029296875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 62984508, + "step": 941 + }, + { + "epoch": 0.1068936170212766, + "grad_norm": 20.97150993347168, + "learning_rate": 5e-05, + "loss": 1.4294, + "num_input_tokens_seen": 63051276, + "step": 942 + }, + { + "epoch": 0.1068936170212766, + "loss": 1.4965410232543945, + "loss_ce": 0.003865240840241313, + "loss_iou": 0.640625, + "loss_num": 0.04150390625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 63051276, + "step": 942 + }, + { + "epoch": 0.10700709219858157, + "grad_norm": 36.366600036621094, + "learning_rate": 5e-05, + "loss": 1.6939, + "num_input_tokens_seen": 63119036, + "step": 943 + }, + { + "epoch": 0.10700709219858157, + "loss": 1.6698920726776123, + "loss_ce": 0.0087592089548707, + "loss_iou": 0.74609375, + "loss_num": 0.0341796875, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 63119036, + "step": 943 + }, + { + "epoch": 0.10712056737588653, + "grad_norm": 22.416805267333984, + "learning_rate": 5e-05, + "loss": 1.8949, + "num_input_tokens_seen": 63186368, + "step": 944 + }, + { + "epoch": 0.10712056737588653, + "loss": 1.8264784812927246, + "loss_ce": 0.007142486050724983, + "loss_iou": 0.79296875, + "loss_num": 0.046875, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 63186368, + "step": 944 + }, + { + "epoch": 0.1072340425531915, + "grad_norm": 16.294200897216797, + "learning_rate": 5e-05, + "loss": 1.4607, + "num_input_tokens_seen": 63252612, + "step": 945 + }, + { + "epoch": 0.1072340425531915, + "loss": 1.4254059791564941, + "loss_ce": 0.00694888224825263, + "loss_iou": 0.6484375, + "loss_num": 0.0244140625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 63252612, + "step": 945 + }, + { + "epoch": 0.10734751773049646, + "grad_norm": 30.98619270324707, + "learning_rate": 5e-05, + "loss": 1.4262, + "num_input_tokens_seen": 63320308, + "step": 946 + }, + { + "epoch": 0.10734751773049646, + "loss": 1.5618536472320557, + "loss_ce": 0.0061897169798612595, + "loss_iou": 0.64453125, + "loss_num": 0.052734375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 63320308, + "step": 946 + }, + { + "epoch": 0.10746099290780142, + "grad_norm": 18.276412963867188, + "learning_rate": 5e-05, + "loss": 1.6229, + "num_input_tokens_seen": 63387376, + "step": 947 + }, + { + "epoch": 0.10746099290780142, + "loss": 1.502415418624878, + "loss_ce": 0.0047347666695714, + "loss_iou": 0.6796875, + "loss_num": 0.028564453125, + "loss_xval": 1.5, + "num_input_tokens_seen": 63387376, + "step": 947 + }, + { + "epoch": 0.10757446808510639, + "grad_norm": 17.515579223632812, + "learning_rate": 5e-05, + "loss": 1.4741, + "num_input_tokens_seen": 63454440, + "step": 948 + }, + { + "epoch": 0.10757446808510639, + "loss": 1.4879976511001587, + "loss_ce": 0.0031344122253358364, + "loss_iou": 0.64453125, + "loss_num": 0.03955078125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 63454440, + "step": 948 + }, + { + "epoch": 0.10768794326241135, + "grad_norm": 26.648103713989258, + "learning_rate": 5e-05, + "loss": 1.3938, + "num_input_tokens_seen": 63522336, + "step": 949 + }, + { + "epoch": 0.10768794326241135, + "loss": 1.2829192876815796, + "loss_ce": 0.004599020816385746, + "loss_iou": 0.51171875, + "loss_num": 0.05126953125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 63522336, + "step": 949 + }, + { + "epoch": 0.10780141843971631, + "grad_norm": 20.698162078857422, + "learning_rate": 5e-05, + "loss": 1.6811, + "num_input_tokens_seen": 63589376, + "step": 950 + }, + { + "epoch": 0.10780141843971631, + "loss": 1.8217359781265259, + "loss_ce": 0.0033765570260584354, + "loss_iou": 0.765625, + "loss_num": 0.057861328125, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 63589376, + "step": 950 + }, + { + "epoch": 0.10791489361702128, + "grad_norm": 24.178281784057617, + "learning_rate": 5e-05, + "loss": 1.3933, + "num_input_tokens_seen": 63656176, + "step": 951 + }, + { + "epoch": 0.10791489361702128, + "loss": 1.4755955934524536, + "loss_ce": 0.007333868648856878, + "loss_iou": 0.671875, + "loss_num": 0.0255126953125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 63656176, + "step": 951 + }, + { + "epoch": 0.10802836879432624, + "grad_norm": 20.039470672607422, + "learning_rate": 5e-05, + "loss": 1.9504, + "num_input_tokens_seen": 63722320, + "step": 952 + }, + { + "epoch": 0.10802836879432624, + "loss": 2.150566577911377, + "loss_ce": 0.00798848457634449, + "loss_iou": 0.9453125, + "loss_num": 0.051025390625, + "loss_xval": 2.140625, + "num_input_tokens_seen": 63722320, + "step": 952 + }, + { + "epoch": 0.10814184397163121, + "grad_norm": 16.672168731689453, + "learning_rate": 5e-05, + "loss": 1.4547, + "num_input_tokens_seen": 63788376, + "step": 953 + }, + { + "epoch": 0.10814184397163121, + "loss": 1.3815243244171143, + "loss_ce": 0.00847746804356575, + "loss_iou": 0.6015625, + "loss_num": 0.033935546875, + "loss_xval": 1.375, + "num_input_tokens_seen": 63788376, + "step": 953 + }, + { + "epoch": 0.10825531914893617, + "grad_norm": 19.887357711791992, + "learning_rate": 5e-05, + "loss": 1.4283, + "num_input_tokens_seen": 63854568, + "step": 954 + }, + { + "epoch": 0.10825531914893617, + "loss": 1.6020786762237549, + "loss_ce": 0.008328622207045555, + "loss_iou": 0.70703125, + "loss_num": 0.0361328125, + "loss_xval": 1.59375, + "num_input_tokens_seen": 63854568, + "step": 954 + }, + { + "epoch": 0.10836879432624114, + "grad_norm": 34.70006561279297, + "learning_rate": 5e-05, + "loss": 1.562, + "num_input_tokens_seen": 63920976, + "step": 955 + }, + { + "epoch": 0.10836879432624114, + "loss": 1.52976655960083, + "loss_ce": 0.005840771831572056, + "loss_iou": 0.6796875, + "loss_num": 0.033203125, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 63920976, + "step": 955 + }, + { + "epoch": 0.1084822695035461, + "grad_norm": 16.461166381835938, + "learning_rate": 5e-05, + "loss": 1.8583, + "num_input_tokens_seen": 63986668, + "step": 956 + }, + { + "epoch": 0.1084822695035461, + "loss": 1.7727233171463013, + "loss_ce": 0.0046569169498980045, + "loss_iou": 0.703125, + "loss_num": 0.072265625, + "loss_xval": 1.765625, + "num_input_tokens_seen": 63986668, + "step": 956 + }, + { + "epoch": 0.10859574468085106, + "grad_norm": 18.462480545043945, + "learning_rate": 5e-05, + "loss": 1.556, + "num_input_tokens_seen": 64053784, + "step": 957 + }, + { + "epoch": 0.10859574468085106, + "loss": 1.377368450164795, + "loss_ce": 0.007251286413520575, + "loss_iou": 0.59765625, + "loss_num": 0.03515625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 64053784, + "step": 957 + }, + { + "epoch": 0.10870921985815603, + "grad_norm": 28.613121032714844, + "learning_rate": 5e-05, + "loss": 1.65, + "num_input_tokens_seen": 64119724, + "step": 958 + }, + { + "epoch": 0.10870921985815603, + "loss": 1.552912950515747, + "loss_ce": 0.004573051817715168, + "loss_iou": 0.65625, + "loss_num": 0.04736328125, + "loss_xval": 1.546875, + "num_input_tokens_seen": 64119724, + "step": 958 + }, + { + "epoch": 0.10882269503546099, + "grad_norm": 19.167898178100586, + "learning_rate": 5e-05, + "loss": 1.5761, + "num_input_tokens_seen": 64186984, + "step": 959 + }, + { + "epoch": 0.10882269503546099, + "loss": 1.4423370361328125, + "loss_ce": 0.007278463803231716, + "loss_iou": 0.60546875, + "loss_num": 0.04443359375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 64186984, + "step": 959 + }, + { + "epoch": 0.10893617021276596, + "grad_norm": 25.507572174072266, + "learning_rate": 5e-05, + "loss": 1.44, + "num_input_tokens_seen": 64254496, + "step": 960 + }, + { + "epoch": 0.10893617021276596, + "loss": 1.4448392391204834, + "loss_ce": 0.0034330515190958977, + "loss_iou": 0.640625, + "loss_num": 0.032958984375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 64254496, + "step": 960 + }, + { + "epoch": 0.10904964539007092, + "grad_norm": 34.548606872558594, + "learning_rate": 5e-05, + "loss": 1.5156, + "num_input_tokens_seen": 64321480, + "step": 961 + }, + { + "epoch": 0.10904964539007092, + "loss": 1.6723253726959229, + "loss_ce": 0.004356619901955128, + "loss_iou": 0.7265625, + "loss_num": 0.042236328125, + "loss_xval": 1.671875, + "num_input_tokens_seen": 64321480, + "step": 961 + }, + { + "epoch": 0.10916312056737588, + "grad_norm": 18.109092712402344, + "learning_rate": 5e-05, + "loss": 1.5405, + "num_input_tokens_seen": 64388576, + "step": 962 + }, + { + "epoch": 0.10916312056737588, + "loss": 1.767910122871399, + "loss_ce": 0.010097632184624672, + "loss_iou": 0.76953125, + "loss_num": 0.043212890625, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 64388576, + "step": 962 + }, + { + "epoch": 0.10927659574468085, + "grad_norm": 21.540069580078125, + "learning_rate": 5e-05, + "loss": 1.4702, + "num_input_tokens_seen": 64455828, + "step": 963 + }, + { + "epoch": 0.10927659574468085, + "loss": 1.4431819915771484, + "loss_ce": 0.003728893119841814, + "loss_iou": 0.640625, + "loss_num": 0.032470703125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 64455828, + "step": 963 + }, + { + "epoch": 0.10939007092198581, + "grad_norm": 36.49156188964844, + "learning_rate": 5e-05, + "loss": 1.6389, + "num_input_tokens_seen": 64522460, + "step": 964 + }, + { + "epoch": 0.10939007092198581, + "loss": 1.5100687742233276, + "loss_ce": 0.004209452774375677, + "loss_iou": 0.67578125, + "loss_num": 0.0301513671875, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 64522460, + "step": 964 + }, + { + "epoch": 0.10950354609929078, + "grad_norm": 20.88112449645996, + "learning_rate": 5e-05, + "loss": 1.6738, + "num_input_tokens_seen": 64589176, + "step": 965 + }, + { + "epoch": 0.10950354609929078, + "loss": 1.7403920888900757, + "loss_ce": 0.00308740371838212, + "loss_iou": 0.7578125, + "loss_num": 0.044921875, + "loss_xval": 1.734375, + "num_input_tokens_seen": 64589176, + "step": 965 + }, + { + "epoch": 0.10961702127659574, + "grad_norm": 26.589534759521484, + "learning_rate": 5e-05, + "loss": 1.4248, + "num_input_tokens_seen": 64656936, + "step": 966 + }, + { + "epoch": 0.10961702127659574, + "loss": 1.4693329334259033, + "loss_ce": 0.005953948013484478, + "loss_iou": 0.66015625, + "loss_num": 0.0284423828125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 64656936, + "step": 966 + }, + { + "epoch": 0.1097304964539007, + "grad_norm": 81.24604797363281, + "learning_rate": 5e-05, + "loss": 1.449, + "num_input_tokens_seen": 64723108, + "step": 967 + }, + { + "epoch": 0.1097304964539007, + "loss": 1.5792863368988037, + "loss_ce": 0.005067541263997555, + "loss_iou": 0.67578125, + "loss_num": 0.044189453125, + "loss_xval": 1.578125, + "num_input_tokens_seen": 64723108, + "step": 967 + }, + { + "epoch": 0.10984397163120567, + "grad_norm": 18.357561111450195, + "learning_rate": 5e-05, + "loss": 1.6566, + "num_input_tokens_seen": 64790048, + "step": 968 + }, + { + "epoch": 0.10984397163120567, + "loss": 1.7522820234298706, + "loss_ce": 0.005211660638451576, + "loss_iou": 0.7265625, + "loss_num": 0.058837890625, + "loss_xval": 1.75, + "num_input_tokens_seen": 64790048, + "step": 968 + }, + { + "epoch": 0.10995744680851063, + "grad_norm": 18.534198760986328, + "learning_rate": 5e-05, + "loss": 1.4773, + "num_input_tokens_seen": 64856124, + "step": 969 + }, + { + "epoch": 0.10995744680851063, + "loss": 1.3593907356262207, + "loss_ce": 0.005142682231962681, + "loss_iou": 0.55859375, + "loss_num": 0.04638671875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 64856124, + "step": 969 + }, + { + "epoch": 0.1100709219858156, + "grad_norm": 25.720598220825195, + "learning_rate": 5e-05, + "loss": 1.4817, + "num_input_tokens_seen": 64922380, + "step": 970 + }, + { + "epoch": 0.1100709219858156, + "loss": 1.7976574897766113, + "loss_ce": 0.005665300413966179, + "loss_iou": 0.76171875, + "loss_num": 0.052978515625, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 64922380, + "step": 970 + }, + { + "epoch": 0.11018439716312056, + "grad_norm": 24.457313537597656, + "learning_rate": 5e-05, + "loss": 1.5689, + "num_input_tokens_seen": 64989928, + "step": 971 + }, + { + "epoch": 0.11018439716312056, + "loss": 1.5411114692687988, + "loss_ce": 0.0044904546812176704, + "loss_iou": 0.6640625, + "loss_num": 0.042724609375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 64989928, + "step": 971 + }, + { + "epoch": 0.11029787234042553, + "grad_norm": 14.963701248168945, + "learning_rate": 5e-05, + "loss": 1.3578, + "num_input_tokens_seen": 65055960, + "step": 972 + }, + { + "epoch": 0.11029787234042553, + "loss": 1.401698350906372, + "loss_ce": 0.005213978700339794, + "loss_iou": 0.58203125, + "loss_num": 0.046875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 65055960, + "step": 972 + }, + { + "epoch": 0.11041134751773049, + "grad_norm": 33.46040725708008, + "learning_rate": 5e-05, + "loss": 1.6906, + "num_input_tokens_seen": 65123516, + "step": 973 + }, + { + "epoch": 0.11041134751773049, + "loss": 1.8139804601669312, + "loss_ce": 0.003433545120060444, + "loss_iou": 0.78125, + "loss_num": 0.04931640625, + "loss_xval": 1.8125, + "num_input_tokens_seen": 65123516, + "step": 973 + }, + { + "epoch": 0.11052482269503545, + "grad_norm": 30.20699691772461, + "learning_rate": 5e-05, + "loss": 1.4489, + "num_input_tokens_seen": 65190788, + "step": 974 + }, + { + "epoch": 0.11052482269503545, + "loss": 1.4564988613128662, + "loss_ce": 0.006303605157881975, + "loss_iou": 0.64453125, + "loss_num": 0.031982421875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 65190788, + "step": 974 + }, + { + "epoch": 0.11063829787234042, + "grad_norm": 16.78886604309082, + "learning_rate": 5e-05, + "loss": 1.7758, + "num_input_tokens_seen": 65258092, + "step": 975 + }, + { + "epoch": 0.11063829787234042, + "loss": 1.8801778554916382, + "loss_ce": 0.007130986545234919, + "loss_iou": 0.828125, + "loss_num": 0.04296875, + "loss_xval": 1.875, + "num_input_tokens_seen": 65258092, + "step": 975 + }, + { + "epoch": 0.1107517730496454, + "grad_norm": 370.1587829589844, + "learning_rate": 5e-05, + "loss": 1.3423, + "num_input_tokens_seen": 65324284, + "step": 976 + }, + { + "epoch": 0.1107517730496454, + "loss": 1.279093861579895, + "loss_ce": 0.0027267143595963717, + "loss_iou": 0.55859375, + "loss_num": 0.0311279296875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 65324284, + "step": 976 + }, + { + "epoch": 0.11086524822695036, + "grad_norm": 33.005645751953125, + "learning_rate": 5e-05, + "loss": 1.5934, + "num_input_tokens_seen": 65390168, + "step": 977 + }, + { + "epoch": 0.11086524822695036, + "loss": 1.490673542022705, + "loss_ce": 0.003368770470842719, + "loss_iou": 0.6484375, + "loss_num": 0.0380859375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 65390168, + "step": 977 + }, + { + "epoch": 0.11097872340425533, + "grad_norm": 19.431917190551758, + "learning_rate": 5e-05, + "loss": 1.7221, + "num_input_tokens_seen": 65456856, + "step": 978 + }, + { + "epoch": 0.11097872340425533, + "loss": 1.735149621963501, + "loss_ce": 0.004680921323597431, + "loss_iou": 0.7734375, + "loss_num": 0.0361328125, + "loss_xval": 1.734375, + "num_input_tokens_seen": 65456856, + "step": 978 + }, + { + "epoch": 0.11109219858156029, + "grad_norm": 16.118560791015625, + "learning_rate": 5e-05, + "loss": 1.3471, + "num_input_tokens_seen": 65523184, + "step": 979 + }, + { + "epoch": 0.11109219858156029, + "loss": 1.50213623046875, + "loss_ce": 0.003112807869911194, + "loss_iou": 0.66015625, + "loss_num": 0.035400390625, + "loss_xval": 1.5, + "num_input_tokens_seen": 65523184, + "step": 979 + }, + { + "epoch": 0.11120567375886525, + "grad_norm": 24.320680618286133, + "learning_rate": 5e-05, + "loss": 1.4935, + "num_input_tokens_seen": 65590072, + "step": 980 + }, + { + "epoch": 0.11120567375886525, + "loss": 1.51410973072052, + "loss_ce": 0.004832324106246233, + "loss_iou": 0.625, + "loss_num": 0.051025390625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 65590072, + "step": 980 + }, + { + "epoch": 0.11131914893617022, + "grad_norm": 33.98814392089844, + "learning_rate": 5e-05, + "loss": 1.4792, + "num_input_tokens_seen": 65656292, + "step": 981 + }, + { + "epoch": 0.11131914893617022, + "loss": 1.331488013267517, + "loss_ce": 0.0072692218236625195, + "loss_iou": 0.58984375, + "loss_num": 0.0286865234375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 65656292, + "step": 981 + }, + { + "epoch": 0.11143262411347518, + "grad_norm": 19.73529052734375, + "learning_rate": 5e-05, + "loss": 1.834, + "num_input_tokens_seen": 65722940, + "step": 982 + }, + { + "epoch": 0.11143262411347518, + "loss": 1.828428864479065, + "loss_ce": 0.007139950059354305, + "loss_iou": 0.75390625, + "loss_num": 0.06298828125, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 65722940, + "step": 982 + }, + { + "epoch": 0.11154609929078015, + "grad_norm": 30.917612075805664, + "learning_rate": 5e-05, + "loss": 1.5532, + "num_input_tokens_seen": 65789292, + "step": 983 + }, + { + "epoch": 0.11154609929078015, + "loss": 1.701123595237732, + "loss_ce": 0.006787740625441074, + "loss_iou": 0.71875, + "loss_num": 0.052490234375, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 65789292, + "step": 983 + }, + { + "epoch": 0.11165957446808511, + "grad_norm": 23.100065231323242, + "learning_rate": 5e-05, + "loss": 1.6234, + "num_input_tokens_seen": 65856516, + "step": 984 + }, + { + "epoch": 0.11165957446808511, + "loss": 1.458715796470642, + "loss_ce": 0.0036376500502228737, + "loss_iou": 0.63671875, + "loss_num": 0.036376953125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 65856516, + "step": 984 + }, + { + "epoch": 0.11177304964539007, + "grad_norm": 18.616674423217773, + "learning_rate": 5e-05, + "loss": 1.5413, + "num_input_tokens_seen": 65922980, + "step": 985 + }, + { + "epoch": 0.11177304964539007, + "loss": 1.3890676498413086, + "loss_ce": 0.0062551349401474, + "loss_iou": 0.6171875, + "loss_num": 0.0296630859375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 65922980, + "step": 985 + }, + { + "epoch": 0.11188652482269504, + "grad_norm": 21.211524963378906, + "learning_rate": 5e-05, + "loss": 1.5453, + "num_input_tokens_seen": 65989816, + "step": 986 + }, + { + "epoch": 0.11188652482269504, + "loss": 1.5284688472747803, + "loss_ce": 0.002589903539046645, + "loss_iou": 0.6796875, + "loss_num": 0.033203125, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 65989816, + "step": 986 + }, + { + "epoch": 0.112, + "grad_norm": 16.928707122802734, + "learning_rate": 5e-05, + "loss": 1.4824, + "num_input_tokens_seen": 66056648, + "step": 987 + }, + { + "epoch": 0.112, + "loss": 1.41338050365448, + "loss_ce": 0.006642201915383339, + "loss_iou": 0.625, + "loss_num": 0.031982421875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 66056648, + "step": 987 + }, + { + "epoch": 0.11211347517730497, + "grad_norm": 18.27202796936035, + "learning_rate": 5e-05, + "loss": 1.6261, + "num_input_tokens_seen": 66124056, + "step": 988 + }, + { + "epoch": 0.11211347517730497, + "loss": 1.5662332773208618, + "loss_ce": 0.007639446295797825, + "loss_iou": 0.66015625, + "loss_num": 0.046875, + "loss_xval": 1.5625, + "num_input_tokens_seen": 66124056, + "step": 988 + }, + { + "epoch": 0.11222695035460993, + "grad_norm": 16.697107315063477, + "learning_rate": 5e-05, + "loss": 1.3651, + "num_input_tokens_seen": 66191688, + "step": 989 + }, + { + "epoch": 0.11222695035460993, + "loss": 1.363063097000122, + "loss_ce": 0.005809113383293152, + "loss_iou": 0.578125, + "loss_num": 0.04052734375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 66191688, + "step": 989 + }, + { + "epoch": 0.1123404255319149, + "grad_norm": 28.04990005493164, + "learning_rate": 5e-05, + "loss": 1.6554, + "num_input_tokens_seen": 66258408, + "step": 990 + }, + { + "epoch": 0.1123404255319149, + "loss": 1.8393874168395996, + "loss_ce": 0.006379701197147369, + "loss_iou": 0.7578125, + "loss_num": 0.0625, + "loss_xval": 1.8359375, + "num_input_tokens_seen": 66258408, + "step": 990 + }, + { + "epoch": 0.11245390070921986, + "grad_norm": 23.489952087402344, + "learning_rate": 5e-05, + "loss": 1.7164, + "num_input_tokens_seen": 66324804, + "step": 991 + }, + { + "epoch": 0.11245390070921986, + "loss": 1.7678680419921875, + "loss_ce": 0.008102413266897202, + "loss_iou": 0.7265625, + "loss_num": 0.06103515625, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 66324804, + "step": 991 + }, + { + "epoch": 0.11256737588652482, + "grad_norm": 26.8695011138916, + "learning_rate": 5e-05, + "loss": 1.6831, + "num_input_tokens_seen": 66391308, + "step": 992 + }, + { + "epoch": 0.11256737588652482, + "loss": 1.7472071647644043, + "loss_ce": 0.005019734613597393, + "loss_iou": 0.7578125, + "loss_num": 0.046142578125, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 66391308, + "step": 992 + }, + { + "epoch": 0.11268085106382979, + "grad_norm": 57.34007263183594, + "learning_rate": 5e-05, + "loss": 1.2286, + "num_input_tokens_seen": 66457880, + "step": 993 + }, + { + "epoch": 0.11268085106382979, + "loss": 1.1119410991668701, + "loss_ce": 0.00549581553786993, + "loss_iou": 0.50390625, + "loss_num": 0.0205078125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 66457880, + "step": 993 + }, + { + "epoch": 0.11279432624113475, + "grad_norm": 18.45754623413086, + "learning_rate": 5e-05, + "loss": 1.6085, + "num_input_tokens_seen": 66524416, + "step": 994 + }, + { + "epoch": 0.11279432624113475, + "loss": 1.698638677597046, + "loss_ce": 0.00820888951420784, + "loss_iou": 0.70703125, + "loss_num": 0.0556640625, + "loss_xval": 1.6875, + "num_input_tokens_seen": 66524416, + "step": 994 + }, + { + "epoch": 0.11290780141843972, + "grad_norm": 13.792594909667969, + "learning_rate": 5e-05, + "loss": 1.5441, + "num_input_tokens_seen": 66591436, + "step": 995 + }, + { + "epoch": 0.11290780141843972, + "loss": 1.3141109943389893, + "loss_ce": 0.007683923002332449, + "loss_iou": 0.5859375, + "loss_num": 0.0267333984375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 66591436, + "step": 995 + }, + { + "epoch": 0.11302127659574468, + "grad_norm": 19.352794647216797, + "learning_rate": 5e-05, + "loss": 1.4575, + "num_input_tokens_seen": 66657660, + "step": 996 + }, + { + "epoch": 0.11302127659574468, + "loss": 1.2984788417816162, + "loss_ce": 0.0045335739850997925, + "loss_iou": 0.52734375, + "loss_num": 0.04736328125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 66657660, + "step": 996 + }, + { + "epoch": 0.11313475177304964, + "grad_norm": 28.14486312866211, + "learning_rate": 5e-05, + "loss": 1.7542, + "num_input_tokens_seen": 66724092, + "step": 997 + }, + { + "epoch": 0.11313475177304964, + "loss": 1.7756389379501343, + "loss_ce": 0.005131042096763849, + "loss_iou": 0.7734375, + "loss_num": 0.044677734375, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 66724092, + "step": 997 + }, + { + "epoch": 0.11324822695035461, + "grad_norm": 26.646684646606445, + "learning_rate": 5e-05, + "loss": 1.4963, + "num_input_tokens_seen": 66790812, + "step": 998 + }, + { + "epoch": 0.11324822695035461, + "loss": 1.4103455543518066, + "loss_ce": 0.0031190470326691866, + "loss_iou": 0.609375, + "loss_num": 0.037841796875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 66790812, + "step": 998 + }, + { + "epoch": 0.11336170212765957, + "grad_norm": 37.644657135009766, + "learning_rate": 5e-05, + "loss": 1.6266, + "num_input_tokens_seen": 66858380, + "step": 999 + }, + { + "epoch": 0.11336170212765957, + "loss": 1.5098779201507568, + "loss_ce": 0.00499499449506402, + "loss_iou": 0.671875, + "loss_num": 0.031494140625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 66858380, + "step": 999 + }, + { + "epoch": 0.11347517730496454, + "grad_norm": 21.852724075317383, + "learning_rate": 5e-05, + "loss": 1.7563, + "num_input_tokens_seen": 66925244, + "step": 1000 + }, + { + "epoch": 0.11347517730496454, + "eval_seeclick_CIoU": 0.3372170031070709, + "eval_seeclick_GIoU": 0.3088449537754059, + "eval_seeclick_IoU": 0.43011267483234406, + "eval_seeclick_MAE_all": 0.16969216614961624, + "eval_seeclick_MAE_h": 0.09587906673550606, + "eval_seeclick_MAE_w": 0.14749066531658173, + "eval_seeclick_MAE_x_boxes": 0.2576962411403656, + "eval_seeclick_MAE_y_boxes": 0.13907130435109138, + "eval_seeclick_NUM_probability": 0.9996863603591919, + "eval_seeclick_inside_bbox": 0.5989583432674408, + "eval_seeclick_loss": 2.7314975261688232, + "eval_seeclick_loss_ce": 0.014189084060490131, + "eval_seeclick_loss_iou": 0.9354248046875, + "eval_seeclick_loss_num": 0.1673583984375, + "eval_seeclick_loss_xval": 2.70849609375, + "eval_seeclick_runtime": 62.8615, + "eval_seeclick_samples_per_second": 0.748, + "eval_seeclick_steps_per_second": 0.032, + "num_input_tokens_seen": 66925244, + "step": 1000 + }, + { + "epoch": 0.11347517730496454, + "eval_icons_CIoU": 0.4121934771537781, + "eval_icons_GIoU": 0.3821878731250763, + "eval_icons_IoU": 0.4548581540584564, + "eval_icons_MAE_all": 0.160062737762928, + "eval_icons_MAE_h": 0.13242103159427643, + "eval_icons_MAE_w": 0.1253330148756504, + "eval_icons_MAE_x_boxes": 0.10702159628272057, + "eval_icons_MAE_y_boxes": 0.12119023874402046, + "eval_icons_NUM_probability": 0.9994466006755829, + "eval_icons_inside_bbox": 0.7951388955116272, + "eval_icons_loss": 2.669114112854004, + "eval_icons_loss_ce": 0.0003051764942938462, + "eval_icons_loss_iou": 0.94677734375, + "eval_icons_loss_num": 0.143768310546875, + "eval_icons_loss_xval": 2.61181640625, + "eval_icons_runtime": 66.4442, + "eval_icons_samples_per_second": 0.753, + "eval_icons_steps_per_second": 0.03, + "num_input_tokens_seen": 66925244, + "step": 1000 + }, + { + "epoch": 0.11347517730496454, + "eval_screenspot_CIoU": 0.3616783817609151, + "eval_screenspot_GIoU": 0.33462725083033246, + "eval_screenspot_IoU": 0.44494465986887616, + "eval_screenspot_MAE_all": 0.15388312190771103, + "eval_screenspot_MAE_h": 0.09883108486731847, + "eval_screenspot_MAE_w": 0.1365153193473816, + "eval_screenspot_MAE_x_boxes": 0.2413888176282247, + "eval_screenspot_MAE_y_boxes": 0.09080426022410393, + "eval_screenspot_NUM_probability": 0.9993979533513387, + "eval_screenspot_inside_bbox": 0.6862499912579855, + "eval_screenspot_loss": 2.776301860809326, + "eval_screenspot_loss_ce": 0.00912293108801047, + "eval_screenspot_loss_iou": 1.0030924479166667, + "eval_screenspot_loss_num": 0.16402180989583334, + "eval_screenspot_loss_xval": 2.8268229166666665, + "eval_screenspot_runtime": 112.0591, + "eval_screenspot_samples_per_second": 0.794, + "eval_screenspot_steps_per_second": 0.027, + "num_input_tokens_seen": 66925244, + "step": 1000 + }, + { + "epoch": 0.11347517730496454, + "eval_compot_CIoU": 0.24028663337230682, + "eval_compot_GIoU": 0.1962457075715065, + "eval_compot_IoU": 0.34565243124961853, + "eval_compot_MAE_all": 0.1840585619211197, + "eval_compot_MAE_h": 0.10284339264035225, + "eval_compot_MAE_w": 0.11520912498235703, + "eval_compot_MAE_x_boxes": 0.22148388624191284, + "eval_compot_MAE_y_boxes": 0.15804358571767807, + "eval_compot_NUM_probability": 0.9996805787086487, + "eval_compot_inside_bbox": 0.4722222238779068, + "eval_compot_loss": 2.958268404006958, + "eval_compot_loss_ce": 0.0021637448808178306, + "eval_compot_loss_iou": 1.0322265625, + "eval_compot_loss_num": 0.1839599609375, + "eval_compot_loss_xval": 2.98193359375, + "eval_compot_runtime": 75.4193, + "eval_compot_samples_per_second": 0.663, + "eval_compot_steps_per_second": 0.027, + "num_input_tokens_seen": 66925244, + "step": 1000 + }, + { + "epoch": 0.11347517730496454, + "loss": 2.9131503105163574, + "loss_ce": 0.0020173443481326103, + "loss_iou": 1.015625, + "loss_num": 0.1767578125, + "loss_xval": 2.90625, + "num_input_tokens_seen": 66925244, + "step": 1000 + }, + { + "epoch": 0.1135886524822695, + "grad_norm": 48.49418640136719, + "learning_rate": 5e-05, + "loss": 1.4536, + "num_input_tokens_seen": 66992156, + "step": 1001 + }, + { + "epoch": 0.1135886524822695, + "loss": 1.452540636062622, + "loss_ce": 0.00966949574649334, + "loss_iou": 0.59375, + "loss_num": 0.050537109375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 66992156, + "step": 1001 + }, + { + "epoch": 0.11370212765957446, + "grad_norm": 45.15170669555664, + "learning_rate": 5e-05, + "loss": 1.7566, + "num_input_tokens_seen": 67058976, + "step": 1002 + }, + { + "epoch": 0.11370212765957446, + "loss": 1.6646173000335693, + "loss_ce": 0.0064141592010855675, + "loss_iou": 0.7421875, + "loss_num": 0.0341796875, + "loss_xval": 1.65625, + "num_input_tokens_seen": 67058976, + "step": 1002 + }, + { + "epoch": 0.11381560283687943, + "grad_norm": 19.825824737548828, + "learning_rate": 5e-05, + "loss": 1.6817, + "num_input_tokens_seen": 67126092, + "step": 1003 + }, + { + "epoch": 0.11381560283687943, + "loss": 1.7086234092712402, + "loss_ce": 0.003545295912772417, + "loss_iou": 0.7734375, + "loss_num": 0.0322265625, + "loss_xval": 1.703125, + "num_input_tokens_seen": 67126092, + "step": 1003 + }, + { + "epoch": 0.11392907801418439, + "grad_norm": 13.211539268493652, + "learning_rate": 5e-05, + "loss": 1.3849, + "num_input_tokens_seen": 67193080, + "step": 1004 + }, + { + "epoch": 0.11392907801418439, + "loss": 1.5167866945266724, + "loss_ce": 0.003114832565188408, + "loss_iou": 0.63671875, + "loss_num": 0.047607421875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 67193080, + "step": 1004 + }, + { + "epoch": 0.11404255319148936, + "grad_norm": 12.03847599029541, + "learning_rate": 5e-05, + "loss": 1.4667, + "num_input_tokens_seen": 67261280, + "step": 1005 + }, + { + "epoch": 0.11404255319148936, + "loss": 1.4656850099563599, + "loss_ce": 0.006212315522134304, + "loss_iou": 0.6171875, + "loss_num": 0.045654296875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 67261280, + "step": 1005 + }, + { + "epoch": 0.11415602836879432, + "grad_norm": 18.888261795043945, + "learning_rate": 5e-05, + "loss": 1.4156, + "num_input_tokens_seen": 67328320, + "step": 1006 + }, + { + "epoch": 0.11415602836879432, + "loss": 1.4184272289276123, + "loss_ce": 0.013153797015547752, + "loss_iou": 0.5390625, + "loss_num": 0.0654296875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 67328320, + "step": 1006 + }, + { + "epoch": 0.11426950354609929, + "grad_norm": 21.05299186706543, + "learning_rate": 5e-05, + "loss": 1.6404, + "num_input_tokens_seen": 67394916, + "step": 1007 + }, + { + "epoch": 0.11426950354609929, + "loss": 1.8097875118255615, + "loss_ce": 0.006076638586819172, + "loss_iou": 0.7421875, + "loss_num": 0.06396484375, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 67394916, + "step": 1007 + }, + { + "epoch": 0.11438297872340425, + "grad_norm": 30.18783950805664, + "learning_rate": 5e-05, + "loss": 1.5921, + "num_input_tokens_seen": 67462172, + "step": 1008 + }, + { + "epoch": 0.11438297872340425, + "loss": 1.5897728204727173, + "loss_ce": 0.004811886698007584, + "loss_iou": 0.671875, + "loss_num": 0.04833984375, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 67462172, + "step": 1008 + }, + { + "epoch": 0.11449645390070921, + "grad_norm": 19.085580825805664, + "learning_rate": 5e-05, + "loss": 1.5079, + "num_input_tokens_seen": 67527780, + "step": 1009 + }, + { + "epoch": 0.11449645390070921, + "loss": 1.488473892211914, + "loss_ce": 0.008981749415397644, + "loss_iou": 0.62109375, + "loss_num": 0.04736328125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 67527780, + "step": 1009 + }, + { + "epoch": 0.11460992907801418, + "grad_norm": 18.60703468322754, + "learning_rate": 5e-05, + "loss": 1.4101, + "num_input_tokens_seen": 67594688, + "step": 1010 + }, + { + "epoch": 0.11460992907801418, + "loss": 1.2888392210006714, + "loss_ce": 0.003194686956703663, + "loss_iou": 0.56640625, + "loss_num": 0.0299072265625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 67594688, + "step": 1010 + }, + { + "epoch": 0.11472340425531916, + "grad_norm": 25.357534408569336, + "learning_rate": 5e-05, + "loss": 1.519, + "num_input_tokens_seen": 67661468, + "step": 1011 + }, + { + "epoch": 0.11472340425531916, + "loss": 1.5041334629058838, + "loss_ce": 0.005110098980367184, + "loss_iou": 0.640625, + "loss_num": 0.0439453125, + "loss_xval": 1.5, + "num_input_tokens_seen": 67661468, + "step": 1011 + }, + { + "epoch": 0.11483687943262412, + "grad_norm": 18.272233963012695, + "learning_rate": 5e-05, + "loss": 1.5464, + "num_input_tokens_seen": 67729556, + "step": 1012 + }, + { + "epoch": 0.11483687943262412, + "loss": 1.631150245666504, + "loss_ce": 0.0051736487075686455, + "loss_iou": 0.66796875, + "loss_num": 0.057861328125, + "loss_xval": 1.625, + "num_input_tokens_seen": 67729556, + "step": 1012 + }, + { + "epoch": 0.11495035460992908, + "grad_norm": 18.68192481994629, + "learning_rate": 5e-05, + "loss": 1.4566, + "num_input_tokens_seen": 67796076, + "step": 1013 + }, + { + "epoch": 0.11495035460992908, + "loss": 1.4566658735275269, + "loss_ce": 0.0015877000987529755, + "loss_iou": 0.62890625, + "loss_num": 0.0390625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 67796076, + "step": 1013 + }, + { + "epoch": 0.11506382978723405, + "grad_norm": 33.40878677368164, + "learning_rate": 5e-05, + "loss": 1.4357, + "num_input_tokens_seen": 67863332, + "step": 1014 + }, + { + "epoch": 0.11506382978723405, + "loss": 1.6218197345733643, + "loss_ce": 0.004143962636590004, + "loss_iou": 0.66796875, + "loss_num": 0.056640625, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 67863332, + "step": 1014 + }, + { + "epoch": 0.11517730496453901, + "grad_norm": 22.64756202697754, + "learning_rate": 5e-05, + "loss": 1.6798, + "num_input_tokens_seen": 67930484, + "step": 1015 + }, + { + "epoch": 0.11517730496453901, + "loss": 1.5819597244262695, + "loss_ce": 0.005299596581608057, + "loss_iou": 0.6953125, + "loss_num": 0.03759765625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 67930484, + "step": 1015 + }, + { + "epoch": 0.11529078014184398, + "grad_norm": 12.590713500976562, + "learning_rate": 5e-05, + "loss": 1.4159, + "num_input_tokens_seen": 67998052, + "step": 1016 + }, + { + "epoch": 0.11529078014184398, + "loss": 1.2874126434326172, + "loss_ce": 0.0012798584066331387, + "loss_iou": 0.5703125, + "loss_num": 0.028564453125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 67998052, + "step": 1016 + }, + { + "epoch": 0.11540425531914894, + "grad_norm": 20.744447708129883, + "learning_rate": 5e-05, + "loss": 1.3823, + "num_input_tokens_seen": 68065496, + "step": 1017 + }, + { + "epoch": 0.11540425531914894, + "loss": 1.3270387649536133, + "loss_ce": 0.004284797236323357, + "loss_iou": 0.609375, + "loss_num": 0.020751953125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 68065496, + "step": 1017 + }, + { + "epoch": 0.1155177304964539, + "grad_norm": 38.8928108215332, + "learning_rate": 5e-05, + "loss": 1.5552, + "num_input_tokens_seen": 68132448, + "step": 1018 + }, + { + "epoch": 0.1155177304964539, + "loss": 1.5072153806686401, + "loss_ce": 0.00721535412594676, + "loss_iou": 0.6875, + "loss_num": 0.026123046875, + "loss_xval": 1.5, + "num_input_tokens_seen": 68132448, + "step": 1018 + }, + { + "epoch": 0.11563120567375887, + "grad_norm": 23.537824630737305, + "learning_rate": 5e-05, + "loss": 1.5913, + "num_input_tokens_seen": 68200184, + "step": 1019 + }, + { + "epoch": 0.11563120567375887, + "loss": 1.4865643978118896, + "loss_ce": 0.0041425107046961784, + "loss_iou": 0.6484375, + "loss_num": 0.036865234375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 68200184, + "step": 1019 + }, + { + "epoch": 0.11574468085106383, + "grad_norm": 24.870359420776367, + "learning_rate": 5e-05, + "loss": 1.2865, + "num_input_tokens_seen": 68268028, + "step": 1020 + }, + { + "epoch": 0.11574468085106383, + "loss": 1.1864848136901855, + "loss_ce": 0.0053324513137340546, + "loss_iou": 0.53125, + "loss_num": 0.0238037109375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 68268028, + "step": 1020 + }, + { + "epoch": 0.1158581560283688, + "grad_norm": 20.323469161987305, + "learning_rate": 5e-05, + "loss": 1.4458, + "num_input_tokens_seen": 68333808, + "step": 1021 + }, + { + "epoch": 0.1158581560283688, + "loss": 1.3718829154968262, + "loss_ce": 0.003474750556051731, + "loss_iou": 0.5703125, + "loss_num": 0.04541015625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 68333808, + "step": 1021 + }, + { + "epoch": 0.11597163120567376, + "grad_norm": 46.11212158203125, + "learning_rate": 5e-05, + "loss": 1.5453, + "num_input_tokens_seen": 68400616, + "step": 1022 + }, + { + "epoch": 0.11597163120567376, + "loss": 1.5005879402160645, + "loss_ce": 0.003517566714435816, + "loss_iou": 0.66015625, + "loss_num": 0.03515625, + "loss_xval": 1.5, + "num_input_tokens_seen": 68400616, + "step": 1022 + }, + { + "epoch": 0.11608510638297873, + "grad_norm": 13.618268013000488, + "learning_rate": 5e-05, + "loss": 1.4981, + "num_input_tokens_seen": 68466980, + "step": 1023 + }, + { + "epoch": 0.11608510638297873, + "loss": 1.534700870513916, + "loss_ce": 0.006380595266819, + "loss_iou": 0.625, + "loss_num": 0.056640625, + "loss_xval": 1.53125, + "num_input_tokens_seen": 68466980, + "step": 1023 + }, + { + "epoch": 0.11619858156028369, + "grad_norm": 37.58345413208008, + "learning_rate": 5e-05, + "loss": 1.5338, + "num_input_tokens_seen": 68534180, + "step": 1024 + }, + { + "epoch": 0.11619858156028369, + "loss": 1.4894651174545288, + "loss_ce": 0.0031370162032544613, + "loss_iou": 0.65625, + "loss_num": 0.03515625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 68534180, + "step": 1024 + }, + { + "epoch": 0.11631205673758865, + "grad_norm": 24.073951721191406, + "learning_rate": 5e-05, + "loss": 1.5234, + "num_input_tokens_seen": 68601048, + "step": 1025 + }, + { + "epoch": 0.11631205673758865, + "loss": 1.3981966972351074, + "loss_ce": 0.006350952200591564, + "loss_iou": 0.62890625, + "loss_num": 0.0274658203125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 68601048, + "step": 1025 + }, + { + "epoch": 0.11642553191489362, + "grad_norm": 13.467392921447754, + "learning_rate": 5e-05, + "loss": 1.3945, + "num_input_tokens_seen": 68667844, + "step": 1026 + }, + { + "epoch": 0.11642553191489362, + "loss": 1.5639383792877197, + "loss_ce": 0.0033915333915501833, + "loss_iou": 0.68359375, + "loss_num": 0.039306640625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 68667844, + "step": 1026 + }, + { + "epoch": 0.11653900709219858, + "grad_norm": 30.048683166503906, + "learning_rate": 5e-05, + "loss": 1.4337, + "num_input_tokens_seen": 68734412, + "step": 1027 + }, + { + "epoch": 0.11653900709219858, + "loss": 1.5211931467056274, + "loss_ce": 0.004591598641127348, + "loss_iou": 0.6484375, + "loss_num": 0.0439453125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 68734412, + "step": 1027 + }, + { + "epoch": 0.11665248226950355, + "grad_norm": 21.718215942382812, + "learning_rate": 5e-05, + "loss": 1.7929, + "num_input_tokens_seen": 68801784, + "step": 1028 + }, + { + "epoch": 0.11665248226950355, + "loss": 1.717137098312378, + "loss_ce": 0.0042464216239750385, + "loss_iou": 0.7578125, + "loss_num": 0.039306640625, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 68801784, + "step": 1028 + }, + { + "epoch": 0.11676595744680851, + "grad_norm": 17.123050689697266, + "learning_rate": 5e-05, + "loss": 1.5749, + "num_input_tokens_seen": 68866968, + "step": 1029 + }, + { + "epoch": 0.11676595744680851, + "loss": 1.5201835632324219, + "loss_ce": 0.008586876094341278, + "loss_iou": 0.56640625, + "loss_num": 0.0751953125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 68866968, + "step": 1029 + }, + { + "epoch": 0.11687943262411347, + "grad_norm": 33.83953094482422, + "learning_rate": 5e-05, + "loss": 1.3585, + "num_input_tokens_seen": 68933164, + "step": 1030 + }, + { + "epoch": 0.11687943262411347, + "loss": 1.2458062171936035, + "loss_ce": 0.007769137620925903, + "loss_iou": 0.546875, + "loss_num": 0.028564453125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 68933164, + "step": 1030 + }, + { + "epoch": 0.11699290780141844, + "grad_norm": 64.93009185791016, + "learning_rate": 5e-05, + "loss": 2.0054, + "num_input_tokens_seen": 69000932, + "step": 1031 + }, + { + "epoch": 0.11699290780141844, + "loss": 1.9974186420440674, + "loss_ce": 0.016949938610196114, + "loss_iou": 0.8515625, + "loss_num": 0.055419921875, + "loss_xval": 1.984375, + "num_input_tokens_seen": 69000932, + "step": 1031 + }, + { + "epoch": 0.1171063829787234, + "grad_norm": 17.209840774536133, + "learning_rate": 5e-05, + "loss": 1.53, + "num_input_tokens_seen": 69068396, + "step": 1032 + }, + { + "epoch": 0.1171063829787234, + "loss": 1.6402699947357178, + "loss_ce": 0.00648094667121768, + "loss_iou": 0.7109375, + "loss_num": 0.04248046875, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 69068396, + "step": 1032 + }, + { + "epoch": 0.11721985815602837, + "grad_norm": 19.892471313476562, + "learning_rate": 5e-05, + "loss": 1.3104, + "num_input_tokens_seen": 69135476, + "step": 1033 + }, + { + "epoch": 0.11721985815602837, + "loss": 1.3724713325500488, + "loss_ce": 0.007237010635435581, + "loss_iou": 0.578125, + "loss_num": 0.041259765625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 69135476, + "step": 1033 + }, + { + "epoch": 0.11733333333333333, + "grad_norm": 36.65412902832031, + "learning_rate": 5e-05, + "loss": 1.5843, + "num_input_tokens_seen": 69202080, + "step": 1034 + }, + { + "epoch": 0.11733333333333333, + "loss": 1.5974936485290527, + "loss_ce": 0.0056966873817145824, + "loss_iou": 0.6875, + "loss_num": 0.043212890625, + "loss_xval": 1.59375, + "num_input_tokens_seen": 69202080, + "step": 1034 + }, + { + "epoch": 0.1174468085106383, + "grad_norm": 24.844083786010742, + "learning_rate": 5e-05, + "loss": 1.4145, + "num_input_tokens_seen": 69269080, + "step": 1035 + }, + { + "epoch": 0.1174468085106383, + "loss": 1.4669828414916992, + "loss_ce": 0.006045301910489798, + "loss_iou": 0.640625, + "loss_num": 0.03564453125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 69269080, + "step": 1035 + }, + { + "epoch": 0.11756028368794326, + "grad_norm": 20.448698043823242, + "learning_rate": 5e-05, + "loss": 1.7522, + "num_input_tokens_seen": 69336196, + "step": 1036 + }, + { + "epoch": 0.11756028368794326, + "loss": 1.6773548126220703, + "loss_ce": 0.003526745131239295, + "loss_iou": 0.7109375, + "loss_num": 0.05029296875, + "loss_xval": 1.671875, + "num_input_tokens_seen": 69336196, + "step": 1036 + }, + { + "epoch": 0.11767375886524822, + "grad_norm": 20.847837448120117, + "learning_rate": 5e-05, + "loss": 1.3516, + "num_input_tokens_seen": 69402052, + "step": 1037 + }, + { + "epoch": 0.11767375886524822, + "loss": 1.3475629091262817, + "loss_ce": 0.0038128711748868227, + "loss_iou": 0.5546875, + "loss_num": 0.046875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 69402052, + "step": 1037 + }, + { + "epoch": 0.11778723404255319, + "grad_norm": 23.771955490112305, + "learning_rate": 5e-05, + "loss": 1.7766, + "num_input_tokens_seen": 69469224, + "step": 1038 + }, + { + "epoch": 0.11778723404255319, + "loss": 1.8947291374206543, + "loss_ce": 0.003127476666122675, + "loss_iou": 0.7734375, + "loss_num": 0.0693359375, + "loss_xval": 1.890625, + "num_input_tokens_seen": 69469224, + "step": 1038 + }, + { + "epoch": 0.11790070921985815, + "grad_norm": 23.73886489868164, + "learning_rate": 5e-05, + "loss": 1.3211, + "num_input_tokens_seen": 69535952, + "step": 1039 + }, + { + "epoch": 0.11790070921985815, + "loss": 1.3894157409667969, + "loss_ce": 0.009044725447893143, + "loss_iou": 0.5390625, + "loss_num": 0.061279296875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 69535952, + "step": 1039 + }, + { + "epoch": 0.11801418439716312, + "grad_norm": 22.98033905029297, + "learning_rate": 5e-05, + "loss": 1.3358, + "num_input_tokens_seen": 69602512, + "step": 1040 + }, + { + "epoch": 0.11801418439716312, + "loss": 1.175792932510376, + "loss_ce": 0.005871109664440155, + "loss_iou": 0.5078125, + "loss_num": 0.0311279296875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 69602512, + "step": 1040 + }, + { + "epoch": 0.11812765957446808, + "grad_norm": 25.46491813659668, + "learning_rate": 5e-05, + "loss": 1.4245, + "num_input_tokens_seen": 69669732, + "step": 1041 + }, + { + "epoch": 0.11812765957446808, + "loss": 1.5047407150268555, + "loss_ce": 0.005717240273952484, + "loss_iou": 0.65234375, + "loss_num": 0.039306640625, + "loss_xval": 1.5, + "num_input_tokens_seen": 69669732, + "step": 1041 + }, + { + "epoch": 0.11824113475177304, + "grad_norm": 19.123859405517578, + "learning_rate": 5e-05, + "loss": 1.8175, + "num_input_tokens_seen": 69737140, + "step": 1042 + }, + { + "epoch": 0.11824113475177304, + "loss": 1.6183900833129883, + "loss_ce": 0.00608536321669817, + "loss_iou": 0.69921875, + "loss_num": 0.04248046875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 69737140, + "step": 1042 + }, + { + "epoch": 0.11835460992907801, + "grad_norm": 11.914936065673828, + "learning_rate": 5e-05, + "loss": 1.4922, + "num_input_tokens_seen": 69803864, + "step": 1043 + }, + { + "epoch": 0.11835460992907801, + "loss": 1.4472683668136597, + "loss_ce": 0.007815269753336906, + "loss_iou": 0.62109375, + "loss_num": 0.039306640625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 69803864, + "step": 1043 + }, + { + "epoch": 0.11846808510638297, + "grad_norm": 15.001970291137695, + "learning_rate": 5e-05, + "loss": 1.3185, + "num_input_tokens_seen": 69870664, + "step": 1044 + }, + { + "epoch": 0.11846808510638297, + "loss": 1.2589397430419922, + "loss_ce": 0.005033512599766254, + "loss_iou": 0.55078125, + "loss_num": 0.031005859375, + "loss_xval": 1.25, + "num_input_tokens_seen": 69870664, + "step": 1044 + }, + { + "epoch": 0.11858156028368794, + "grad_norm": 63.239593505859375, + "learning_rate": 5e-05, + "loss": 1.4015, + "num_input_tokens_seen": 69938432, + "step": 1045 + }, + { + "epoch": 0.11858156028368794, + "loss": 1.2929410934448242, + "loss_ce": 0.0068083894439041615, + "loss_iou": 0.5703125, + "loss_num": 0.028564453125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 69938432, + "step": 1045 + }, + { + "epoch": 0.1186950354609929, + "grad_norm": 19.91580581665039, + "learning_rate": 5e-05, + "loss": 1.5851, + "num_input_tokens_seen": 70005476, + "step": 1046 + }, + { + "epoch": 0.1186950354609929, + "loss": 1.4612723588943481, + "loss_ce": 0.004729380831122398, + "loss_iou": 0.62890625, + "loss_num": 0.039794921875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 70005476, + "step": 1046 + }, + { + "epoch": 0.11880851063829788, + "grad_norm": 14.196168899536133, + "learning_rate": 5e-05, + "loss": 1.4416, + "num_input_tokens_seen": 70072760, + "step": 1047 + }, + { + "epoch": 0.11880851063829788, + "loss": 1.4763238430023193, + "loss_ce": 0.005132513120770454, + "loss_iou": 0.58984375, + "loss_num": 0.058349609375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 70072760, + "step": 1047 + }, + { + "epoch": 0.11892198581560284, + "grad_norm": 29.14299964904785, + "learning_rate": 5e-05, + "loss": 1.5303, + "num_input_tokens_seen": 70139556, + "step": 1048 + }, + { + "epoch": 0.11892198581560284, + "loss": 1.5607346296310425, + "loss_ce": 0.010929926298558712, + "loss_iou": 0.6640625, + "loss_num": 0.043701171875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 70139556, + "step": 1048 + }, + { + "epoch": 0.11903546099290781, + "grad_norm": 20.905906677246094, + "learning_rate": 5e-05, + "loss": 1.5012, + "num_input_tokens_seen": 70205804, + "step": 1049 + }, + { + "epoch": 0.11903546099290781, + "loss": 1.6398704051971436, + "loss_ce": 0.0051048207096755505, + "loss_iou": 0.734375, + "loss_num": 0.03271484375, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 70205804, + "step": 1049 + }, + { + "epoch": 0.11914893617021277, + "grad_norm": 35.71268081665039, + "learning_rate": 5e-05, + "loss": 1.5367, + "num_input_tokens_seen": 70272612, + "step": 1050 + }, + { + "epoch": 0.11914893617021277, + "loss": 1.7049236297607422, + "loss_ce": 0.004728242754936218, + "loss_iou": 0.7421875, + "loss_num": 0.04296875, + "loss_xval": 1.703125, + "num_input_tokens_seen": 70272612, + "step": 1050 + }, + { + "epoch": 0.11926241134751774, + "grad_norm": 21.4947566986084, + "learning_rate": 5e-05, + "loss": 1.7925, + "num_input_tokens_seen": 70339468, + "step": 1051 + }, + { + "epoch": 0.11926241134751774, + "loss": 1.9006870985031128, + "loss_ce": 0.004202688112854958, + "loss_iou": 0.828125, + "loss_num": 0.048095703125, + "loss_xval": 1.8984375, + "num_input_tokens_seen": 70339468, + "step": 1051 + }, + { + "epoch": 0.1193758865248227, + "grad_norm": 24.8465518951416, + "learning_rate": 5e-05, + "loss": 1.3831, + "num_input_tokens_seen": 70405928, + "step": 1052 + }, + { + "epoch": 0.1193758865248227, + "loss": 1.4306764602661133, + "loss_ce": 0.0014771235873922706, + "loss_iou": 0.60546875, + "loss_num": 0.044189453125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 70405928, + "step": 1052 + }, + { + "epoch": 0.11948936170212766, + "grad_norm": 46.6567268371582, + "learning_rate": 5e-05, + "loss": 1.7582, + "num_input_tokens_seen": 70472496, + "step": 1053 + }, + { + "epoch": 0.11948936170212766, + "loss": 1.702553153038025, + "loss_ce": 0.009193789213895798, + "loss_iou": 0.75, + "loss_num": 0.0380859375, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 70472496, + "step": 1053 + }, + { + "epoch": 0.11960283687943263, + "grad_norm": 13.54664134979248, + "learning_rate": 5e-05, + "loss": 1.7256, + "num_input_tokens_seen": 70539976, + "step": 1054 + }, + { + "epoch": 0.11960283687943263, + "loss": 1.4445332288742065, + "loss_ce": 0.00593454297631979, + "loss_iou": 0.60546875, + "loss_num": 0.044921875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 70539976, + "step": 1054 + }, + { + "epoch": 0.11971631205673759, + "grad_norm": 17.085397720336914, + "learning_rate": 5e-05, + "loss": 1.54, + "num_input_tokens_seen": 70606964, + "step": 1055 + }, + { + "epoch": 0.11971631205673759, + "loss": 1.5383620262145996, + "loss_ce": 0.002961610909551382, + "loss_iou": 0.65625, + "loss_num": 0.044189453125, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 70606964, + "step": 1055 + }, + { + "epoch": 0.11982978723404256, + "grad_norm": 17.334758758544922, + "learning_rate": 5e-05, + "loss": 1.382, + "num_input_tokens_seen": 70673368, + "step": 1056 + }, + { + "epoch": 0.11982978723404256, + "loss": 1.3155877590179443, + "loss_ce": 0.006505818106234074, + "loss_iou": 0.55078125, + "loss_num": 0.042236328125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 70673368, + "step": 1056 + }, + { + "epoch": 0.11994326241134752, + "grad_norm": 28.855234146118164, + "learning_rate": 5e-05, + "loss": 1.3468, + "num_input_tokens_seen": 70740276, + "step": 1057 + }, + { + "epoch": 0.11994326241134752, + "loss": 1.364157795906067, + "loss_ce": 0.00478280009701848, + "loss_iou": 0.55859375, + "loss_num": 0.049072265625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 70740276, + "step": 1057 + }, + { + "epoch": 0.12005673758865248, + "grad_norm": 19.44704818725586, + "learning_rate": 5e-05, + "loss": 1.7637, + "num_input_tokens_seen": 70807828, + "step": 1058 + }, + { + "epoch": 0.12005673758865248, + "loss": 1.8316447734832764, + "loss_ce": 0.002543151145800948, + "loss_iou": 0.7578125, + "loss_num": 0.0634765625, + "loss_xval": 1.828125, + "num_input_tokens_seen": 70807828, + "step": 1058 + }, + { + "epoch": 0.12017021276595745, + "grad_norm": 20.630643844604492, + "learning_rate": 5e-05, + "loss": 1.2368, + "num_input_tokens_seen": 70874812, + "step": 1059 + }, + { + "epoch": 0.12017021276595745, + "loss": 1.147193193435669, + "loss_ce": 0.006568218115717173, + "loss_iou": 0.515625, + "loss_num": 0.022216796875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 70874812, + "step": 1059 + }, + { + "epoch": 0.12028368794326241, + "grad_norm": 26.520105361938477, + "learning_rate": 5e-05, + "loss": 1.3088, + "num_input_tokens_seen": 70941732, + "step": 1060 + }, + { + "epoch": 0.12028368794326241, + "loss": 1.4233744144439697, + "loss_ce": 0.004429106134921312, + "loss_iou": 0.6171875, + "loss_num": 0.037841796875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 70941732, + "step": 1060 + }, + { + "epoch": 0.12039716312056738, + "grad_norm": 24.43547248840332, + "learning_rate": 5e-05, + "loss": 1.6276, + "num_input_tokens_seen": 71009948, + "step": 1061 + }, + { + "epoch": 0.12039716312056738, + "loss": 1.63724684715271, + "loss_ce": 0.00541095482185483, + "loss_iou": 0.7109375, + "loss_num": 0.04150390625, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 71009948, + "step": 1061 + }, + { + "epoch": 0.12051063829787234, + "grad_norm": 41.858551025390625, + "learning_rate": 5e-05, + "loss": 1.3821, + "num_input_tokens_seen": 71077232, + "step": 1062 + }, + { + "epoch": 0.12051063829787234, + "loss": 1.37339448928833, + "loss_ce": 0.006206928286701441, + "loss_iou": 0.58984375, + "loss_num": 0.037841796875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 71077232, + "step": 1062 + }, + { + "epoch": 0.1206241134751773, + "grad_norm": 19.38773536682129, + "learning_rate": 5e-05, + "loss": 1.5105, + "num_input_tokens_seen": 71143740, + "step": 1063 + }, + { + "epoch": 0.1206241134751773, + "loss": 1.4178826808929443, + "loss_ce": 0.002904658205807209, + "loss_iou": 0.60546875, + "loss_num": 0.0400390625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 71143740, + "step": 1063 + }, + { + "epoch": 0.12073758865248227, + "grad_norm": 14.068737983703613, + "learning_rate": 5e-05, + "loss": 1.4164, + "num_input_tokens_seen": 71211368, + "step": 1064 + }, + { + "epoch": 0.12073758865248227, + "loss": 1.3572829961776733, + "loss_ce": 0.007673612795770168, + "loss_iou": 0.6015625, + "loss_num": 0.029541015625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 71211368, + "step": 1064 + }, + { + "epoch": 0.12085106382978723, + "grad_norm": 27.589065551757812, + "learning_rate": 5e-05, + "loss": 1.5024, + "num_input_tokens_seen": 71279012, + "step": 1065 + }, + { + "epoch": 0.12085106382978723, + "loss": 1.551321268081665, + "loss_ce": 0.0029813633300364017, + "loss_iou": 0.69140625, + "loss_num": 0.033935546875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 71279012, + "step": 1065 + }, + { + "epoch": 0.1209645390070922, + "grad_norm": 17.587902069091797, + "learning_rate": 5e-05, + "loss": 1.4844, + "num_input_tokens_seen": 71344548, + "step": 1066 + }, + { + "epoch": 0.1209645390070922, + "loss": 1.5081167221069336, + "loss_ce": 0.00323390937410295, + "loss_iou": 0.67578125, + "loss_num": 0.03076171875, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 71344548, + "step": 1066 + }, + { + "epoch": 0.12107801418439716, + "grad_norm": 13.013212203979492, + "learning_rate": 5e-05, + "loss": 1.3347, + "num_input_tokens_seen": 71412488, + "step": 1067 + }, + { + "epoch": 0.12107801418439716, + "loss": 1.3471390008926392, + "loss_ce": 0.006806910969316959, + "loss_iou": 0.53125, + "loss_num": 0.054931640625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 71412488, + "step": 1067 + }, + { + "epoch": 0.12119148936170213, + "grad_norm": 30.311031341552734, + "learning_rate": 5e-05, + "loss": 1.536, + "num_input_tokens_seen": 71479480, + "step": 1068 + }, + { + "epoch": 0.12119148936170213, + "loss": 1.6623361110687256, + "loss_ce": 0.006086060777306557, + "loss_iou": 0.70703125, + "loss_num": 0.048583984375, + "loss_xval": 1.65625, + "num_input_tokens_seen": 71479480, + "step": 1068 + }, + { + "epoch": 0.12130496453900709, + "grad_norm": 35.650970458984375, + "learning_rate": 5e-05, + "loss": 1.7403, + "num_input_tokens_seen": 71546844, + "step": 1069 + }, + { + "epoch": 0.12130496453900709, + "loss": 1.8321516513824463, + "loss_ce": 0.004514794796705246, + "loss_iou": 0.765625, + "loss_num": 0.059326171875, + "loss_xval": 1.828125, + "num_input_tokens_seen": 71546844, + "step": 1069 + }, + { + "epoch": 0.12141843971631205, + "grad_norm": 30.837261199951172, + "learning_rate": 5e-05, + "loss": 1.6584, + "num_input_tokens_seen": 71614080, + "step": 1070 + }, + { + "epoch": 0.12141843971631205, + "loss": 1.7160903215408325, + "loss_ce": 0.0022231675684452057, + "loss_iou": 0.734375, + "loss_num": 0.0498046875, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 71614080, + "step": 1070 + }, + { + "epoch": 0.12153191489361702, + "grad_norm": 12.845964431762695, + "learning_rate": 5e-05, + "loss": 1.41, + "num_input_tokens_seen": 71680480, + "step": 1071 + }, + { + "epoch": 0.12153191489361702, + "loss": 1.3083281517028809, + "loss_ce": 0.004617227707058191, + "loss_iou": 0.5546875, + "loss_num": 0.0390625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 71680480, + "step": 1071 + }, + { + "epoch": 0.12164539007092198, + "grad_norm": 36.46562576293945, + "learning_rate": 5e-05, + "loss": 1.4658, + "num_input_tokens_seen": 71747100, + "step": 1072 + }, + { + "epoch": 0.12164539007092198, + "loss": 1.5694997310638428, + "loss_ce": 0.0069998037070035934, + "loss_iou": 0.69921875, + "loss_num": 0.032958984375, + "loss_xval": 1.5625, + "num_input_tokens_seen": 71747100, + "step": 1072 + }, + { + "epoch": 0.12175886524822695, + "grad_norm": 41.56955337524414, + "learning_rate": 5e-05, + "loss": 1.7258, + "num_input_tokens_seen": 71814808, + "step": 1073 + }, + { + "epoch": 0.12175886524822695, + "loss": 1.505321741104126, + "loss_ce": 0.004345199558883905, + "loss_iou": 0.625, + "loss_num": 0.050537109375, + "loss_xval": 1.5, + "num_input_tokens_seen": 71814808, + "step": 1073 + }, + { + "epoch": 0.12187234042553191, + "grad_norm": 19.620126724243164, + "learning_rate": 5e-05, + "loss": 1.7769, + "num_input_tokens_seen": 71881668, + "step": 1074 + }, + { + "epoch": 0.12187234042553191, + "loss": 1.953679084777832, + "loss_ce": 0.008366652764379978, + "loss_iou": 0.8203125, + "loss_num": 0.061767578125, + "loss_xval": 1.9453125, + "num_input_tokens_seen": 71881668, + "step": 1074 + }, + { + "epoch": 0.12198581560283688, + "grad_norm": 21.361658096313477, + "learning_rate": 5e-05, + "loss": 1.3641, + "num_input_tokens_seen": 71948640, + "step": 1075 + }, + { + "epoch": 0.12198581560283688, + "loss": 1.2887461185455322, + "loss_ce": 0.005054621957242489, + "loss_iou": 0.57421875, + "loss_num": 0.0277099609375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 71948640, + "step": 1075 + }, + { + "epoch": 0.12209929078014184, + "grad_norm": 16.07221031188965, + "learning_rate": 5e-05, + "loss": 1.3527, + "num_input_tokens_seen": 72016464, + "step": 1076 + }, + { + "epoch": 0.12209929078014184, + "loss": 1.3515028953552246, + "loss_ce": 0.005311455111950636, + "loss_iou": 0.5859375, + "loss_num": 0.035400390625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 72016464, + "step": 1076 + }, + { + "epoch": 0.1222127659574468, + "grad_norm": 20.0102596282959, + "learning_rate": 5e-05, + "loss": 1.3419, + "num_input_tokens_seen": 72082560, + "step": 1077 + }, + { + "epoch": 0.1222127659574468, + "loss": 1.0708537101745605, + "loss_ce": 0.00408122967928648, + "loss_iou": 0.451171875, + "loss_num": 0.033203125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 72082560, + "step": 1077 + }, + { + "epoch": 0.12232624113475177, + "grad_norm": 37.57889175415039, + "learning_rate": 5e-05, + "loss": 1.4512, + "num_input_tokens_seen": 72149224, + "step": 1078 + }, + { + "epoch": 0.12232624113475177, + "loss": 1.5674474239349365, + "loss_ce": 0.004459152463823557, + "loss_iou": 0.6953125, + "loss_num": 0.034912109375, + "loss_xval": 1.5625, + "num_input_tokens_seen": 72149224, + "step": 1078 + }, + { + "epoch": 0.12243971631205673, + "grad_norm": 19.37431526184082, + "learning_rate": 5e-05, + "loss": 1.7479, + "num_input_tokens_seen": 72217260, + "step": 1079 + }, + { + "epoch": 0.12243971631205673, + "loss": 1.9460384845733643, + "loss_ce": 0.00463230162858963, + "loss_iou": 0.87890625, + "loss_num": 0.037109375, + "loss_xval": 1.9375, + "num_input_tokens_seen": 72217260, + "step": 1079 + }, + { + "epoch": 0.1225531914893617, + "grad_norm": 21.806745529174805, + "learning_rate": 5e-05, + "loss": 1.5683, + "num_input_tokens_seen": 72283248, + "step": 1080 + }, + { + "epoch": 0.1225531914893617, + "loss": 1.520694613456726, + "loss_ce": 0.007022779434919357, + "loss_iou": 0.62890625, + "loss_num": 0.05126953125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 72283248, + "step": 1080 + }, + { + "epoch": 0.12266666666666666, + "grad_norm": 29.690343856811523, + "learning_rate": 5e-05, + "loss": 1.5172, + "num_input_tokens_seen": 72350496, + "step": 1081 + }, + { + "epoch": 0.12266666666666666, + "loss": 1.6189708709716797, + "loss_ce": 0.0037365425378084183, + "loss_iou": 0.6953125, + "loss_num": 0.04443359375, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 72350496, + "step": 1081 + }, + { + "epoch": 0.12278014184397164, + "grad_norm": 24.350439071655273, + "learning_rate": 5e-05, + "loss": 1.5459, + "num_input_tokens_seen": 72417592, + "step": 1082 + }, + { + "epoch": 0.12278014184397164, + "loss": 1.7577320337295532, + "loss_ce": 0.007731993682682514, + "loss_iou": 0.76171875, + "loss_num": 0.04541015625, + "loss_xval": 1.75, + "num_input_tokens_seen": 72417592, + "step": 1082 + }, + { + "epoch": 0.1228936170212766, + "grad_norm": 23.486133575439453, + "learning_rate": 5e-05, + "loss": 1.366, + "num_input_tokens_seen": 72485340, + "step": 1083 + }, + { + "epoch": 0.1228936170212766, + "loss": 1.3004717826843262, + "loss_ce": 0.002620209474116564, + "loss_iou": 0.5703125, + "loss_num": 0.03125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 72485340, + "step": 1083 + }, + { + "epoch": 0.12300709219858157, + "grad_norm": 30.23016929626465, + "learning_rate": 5e-05, + "loss": 1.7655, + "num_input_tokens_seen": 72550684, + "step": 1084 + }, + { + "epoch": 0.12300709219858157, + "loss": 1.7637834548950195, + "loss_ce": 0.003041309304535389, + "loss_iou": 0.76953125, + "loss_num": 0.044921875, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 72550684, + "step": 1084 + }, + { + "epoch": 0.12312056737588653, + "grad_norm": 31.37822723388672, + "learning_rate": 5e-05, + "loss": 1.6302, + "num_input_tokens_seen": 72617104, + "step": 1085 + }, + { + "epoch": 0.12312056737588653, + "loss": 1.582824945449829, + "loss_ce": 0.011047661304473877, + "loss_iou": 0.640625, + "loss_num": 0.058349609375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 72617104, + "step": 1085 + }, + { + "epoch": 0.1232340425531915, + "grad_norm": 23.156600952148438, + "learning_rate": 5e-05, + "loss": 1.3064, + "num_input_tokens_seen": 72684568, + "step": 1086 + }, + { + "epoch": 0.1232340425531915, + "loss": 1.389770746231079, + "loss_ce": 0.005005006678402424, + "loss_iou": 0.61328125, + "loss_num": 0.0322265625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 72684568, + "step": 1086 + }, + { + "epoch": 0.12334751773049646, + "grad_norm": 20.296064376831055, + "learning_rate": 5e-05, + "loss": 1.4142, + "num_input_tokens_seen": 72750492, + "step": 1087 + }, + { + "epoch": 0.12334751773049646, + "loss": 1.2607426643371582, + "loss_ce": 0.008545328862965107, + "loss_iou": 0.515625, + "loss_num": 0.04443359375, + "loss_xval": 1.25, + "num_input_tokens_seen": 72750492, + "step": 1087 + }, + { + "epoch": 0.12346099290780142, + "grad_norm": 15.329965591430664, + "learning_rate": 5e-05, + "loss": 1.4007, + "num_input_tokens_seen": 72817520, + "step": 1088 + }, + { + "epoch": 0.12346099290780142, + "loss": 1.3719193935394287, + "loss_ce": 0.005708570592105389, + "loss_iou": 0.609375, + "loss_num": 0.0286865234375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 72817520, + "step": 1088 + }, + { + "epoch": 0.12357446808510639, + "grad_norm": 24.41840171813965, + "learning_rate": 5e-05, + "loss": 1.3885, + "num_input_tokens_seen": 72884348, + "step": 1089 + }, + { + "epoch": 0.12357446808510639, + "loss": 1.4491665363311768, + "loss_ce": 0.00824842881411314, + "loss_iou": 0.6171875, + "loss_num": 0.040771484375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 72884348, + "step": 1089 + }, + { + "epoch": 0.12368794326241135, + "grad_norm": 40.6778678894043, + "learning_rate": 5e-05, + "loss": 1.6101, + "num_input_tokens_seen": 72951096, + "step": 1090 + }, + { + "epoch": 0.12368794326241135, + "loss": 1.7009046077728271, + "loss_ce": 0.003638938767835498, + "loss_iou": 0.7109375, + "loss_num": 0.0556640625, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 72951096, + "step": 1090 + }, + { + "epoch": 0.12380141843971632, + "grad_norm": 16.905630111694336, + "learning_rate": 5e-05, + "loss": 1.4113, + "num_input_tokens_seen": 73016452, + "step": 1091 + }, + { + "epoch": 0.12380141843971632, + "loss": 1.406628966331482, + "loss_ce": 0.007703207898885012, + "loss_iou": 0.6015625, + "loss_num": 0.039306640625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 73016452, + "step": 1091 + }, + { + "epoch": 0.12391489361702128, + "grad_norm": 18.79330062866211, + "learning_rate": 5e-05, + "loss": 1.6656, + "num_input_tokens_seen": 73084164, + "step": 1092 + }, + { + "epoch": 0.12391489361702128, + "loss": 1.771596908569336, + "loss_ce": 0.004995397757738829, + "loss_iou": 0.76171875, + "loss_num": 0.0478515625, + "loss_xval": 1.765625, + "num_input_tokens_seen": 73084164, + "step": 1092 + }, + { + "epoch": 0.12402836879432624, + "grad_norm": 32.5583381652832, + "learning_rate": 5e-05, + "loss": 1.5611, + "num_input_tokens_seen": 73150888, + "step": 1093 + }, + { + "epoch": 0.12402836879432624, + "loss": 1.6139198541641235, + "loss_ce": 0.003568258136510849, + "loss_iou": 0.70703125, + "loss_num": 0.039794921875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 73150888, + "step": 1093 + }, + { + "epoch": 0.12414184397163121, + "grad_norm": 17.29348373413086, + "learning_rate": 5e-05, + "loss": 1.3729, + "num_input_tokens_seen": 73218228, + "step": 1094 + }, + { + "epoch": 0.12414184397163121, + "loss": 1.2671818733215332, + "loss_ce": 0.007904564030468464, + "loss_iou": 0.54296875, + "loss_num": 0.035400390625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 73218228, + "step": 1094 + }, + { + "epoch": 0.12425531914893617, + "grad_norm": 17.079622268676758, + "learning_rate": 5e-05, + "loss": 1.3957, + "num_input_tokens_seen": 73284840, + "step": 1095 + }, + { + "epoch": 0.12425531914893617, + "loss": 1.38218092918396, + "loss_ce": 0.007180891931056976, + "loss_iou": 0.58203125, + "loss_num": 0.042236328125, + "loss_xval": 1.375, + "num_input_tokens_seen": 73284840, + "step": 1095 + }, + { + "epoch": 0.12436879432624114, + "grad_norm": 14.335100173950195, + "learning_rate": 5e-05, + "loss": 1.3008, + "num_input_tokens_seen": 73350824, + "step": 1096 + }, + { + "epoch": 0.12436879432624114, + "loss": 1.3798184394836426, + "loss_ce": 0.003353687934577465, + "loss_iou": 0.5546875, + "loss_num": 0.052490234375, + "loss_xval": 1.375, + "num_input_tokens_seen": 73350824, + "step": 1096 + }, + { + "epoch": 0.1244822695035461, + "grad_norm": 16.16689682006836, + "learning_rate": 5e-05, + "loss": 1.2042, + "num_input_tokens_seen": 73419032, + "step": 1097 + }, + { + "epoch": 0.1244822695035461, + "loss": 1.2212886810302734, + "loss_ce": 0.004980003461241722, + "loss_iou": 0.53125, + "loss_num": 0.03173828125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 73419032, + "step": 1097 + }, + { + "epoch": 0.12459574468085106, + "grad_norm": 20.887746810913086, + "learning_rate": 5e-05, + "loss": 1.373, + "num_input_tokens_seen": 73486468, + "step": 1098 + }, + { + "epoch": 0.12459574468085106, + "loss": 1.3229680061340332, + "loss_ce": 0.007050033658742905, + "loss_iou": 0.58203125, + "loss_num": 0.0308837890625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 73486468, + "step": 1098 + }, + { + "epoch": 0.12470921985815603, + "grad_norm": 20.299480438232422, + "learning_rate": 5e-05, + "loss": 1.4182, + "num_input_tokens_seen": 73553104, + "step": 1099 + }, + { + "epoch": 0.12470921985815603, + "loss": 1.341643214225769, + "loss_ce": 0.00668229628354311, + "loss_iou": 0.546875, + "loss_num": 0.04833984375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 73553104, + "step": 1099 + }, + { + "epoch": 0.12482269503546099, + "grad_norm": 20.099924087524414, + "learning_rate": 5e-05, + "loss": 1.3625, + "num_input_tokens_seen": 73620504, + "step": 1100 + }, + { + "epoch": 0.12482269503546099, + "loss": 1.2543283700942993, + "loss_ce": 0.0013986803824082017, + "loss_iou": 0.5703125, + "loss_num": 0.022705078125, + "loss_xval": 1.25, + "num_input_tokens_seen": 73620504, + "step": 1100 + }, + { + "epoch": 0.12493617021276596, + "grad_norm": 30.501298904418945, + "learning_rate": 5e-05, + "loss": 1.2696, + "num_input_tokens_seen": 73686944, + "step": 1101 + }, + { + "epoch": 0.12493617021276596, + "loss": 1.481467604637146, + "loss_ce": 0.005881625227630138, + "loss_iou": 0.59765625, + "loss_num": 0.056396484375, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 73686944, + "step": 1101 + }, + { + "epoch": 0.12504964539007093, + "grad_norm": 17.485074996948242, + "learning_rate": 5e-05, + "loss": 1.7104, + "num_input_tokens_seen": 73754360, + "step": 1102 + }, + { + "epoch": 0.12504964539007093, + "loss": 1.773779273033142, + "loss_ce": 0.008154258131980896, + "loss_iou": 0.72265625, + "loss_num": 0.06298828125, + "loss_xval": 1.765625, + "num_input_tokens_seen": 73754360, + "step": 1102 + }, + { + "epoch": 0.12516312056737589, + "grad_norm": 19.729915618896484, + "learning_rate": 5e-05, + "loss": 1.5907, + "num_input_tokens_seen": 73821540, + "step": 1103 + }, + { + "epoch": 0.12516312056737589, + "loss": 1.5609056949615479, + "loss_ce": 0.005241633392870426, + "loss_iou": 0.671875, + "loss_num": 0.04345703125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 73821540, + "step": 1103 + }, + { + "epoch": 0.12527659574468086, + "grad_norm": 21.609113693237305, + "learning_rate": 5e-05, + "loss": 1.3338, + "num_input_tokens_seen": 73888444, + "step": 1104 + }, + { + "epoch": 0.12527659574468086, + "loss": 1.3460172414779663, + "loss_ce": 0.004708635155111551, + "loss_iou": 0.55859375, + "loss_num": 0.04443359375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 73888444, + "step": 1104 + }, + { + "epoch": 0.1253900709219858, + "grad_norm": 33.43806076049805, + "learning_rate": 5e-05, + "loss": 1.4942, + "num_input_tokens_seen": 73955292, + "step": 1105 + }, + { + "epoch": 0.1253900709219858, + "loss": 1.5017110109329224, + "loss_ce": 0.005128991790115833, + "loss_iou": 0.6484375, + "loss_num": 0.039794921875, + "loss_xval": 1.5, + "num_input_tokens_seen": 73955292, + "step": 1105 + }, + { + "epoch": 0.1255035460992908, + "grad_norm": 20.97142219543457, + "learning_rate": 5e-05, + "loss": 1.7619, + "num_input_tokens_seen": 74022200, + "step": 1106 + }, + { + "epoch": 0.1255035460992908, + "loss": 1.7900227308273315, + "loss_ce": 0.00486653670668602, + "loss_iou": 0.75, + "loss_num": 0.0576171875, + "loss_xval": 1.78125, + "num_input_tokens_seen": 74022200, + "step": 1106 + }, + { + "epoch": 0.12561702127659574, + "grad_norm": 25.26389503479004, + "learning_rate": 5e-05, + "loss": 1.1888, + "num_input_tokens_seen": 74087980, + "step": 1107 + }, + { + "epoch": 0.12561702127659574, + "loss": 1.1746598482131958, + "loss_ce": 0.0066910539753735065, + "loss_iou": 0.451171875, + "loss_num": 0.053466796875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 74087980, + "step": 1107 + }, + { + "epoch": 0.12573049645390072, + "grad_norm": 20.751014709472656, + "learning_rate": 5e-05, + "loss": 1.6712, + "num_input_tokens_seen": 74155500, + "step": 1108 + }, + { + "epoch": 0.12573049645390072, + "loss": 1.811992883682251, + "loss_ce": 0.00632884819060564, + "loss_iou": 0.78515625, + "loss_num": 0.046142578125, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 74155500, + "step": 1108 + }, + { + "epoch": 0.12584397163120567, + "grad_norm": 17.114042282104492, + "learning_rate": 5e-05, + "loss": 1.4474, + "num_input_tokens_seen": 74222316, + "step": 1109 + }, + { + "epoch": 0.12584397163120567, + "loss": 1.628985047340393, + "loss_ce": 0.004961572587490082, + "loss_iou": 0.671875, + "loss_num": 0.0556640625, + "loss_xval": 1.625, + "num_input_tokens_seen": 74222316, + "step": 1109 + }, + { + "epoch": 0.12595744680851065, + "grad_norm": 18.13460922241211, + "learning_rate": 5e-05, + "loss": 1.6004, + "num_input_tokens_seen": 74289088, + "step": 1110 + }, + { + "epoch": 0.12595744680851065, + "loss": 1.694447636604309, + "loss_ce": 0.006947624031454325, + "loss_iou": 0.74609375, + "loss_num": 0.03955078125, + "loss_xval": 1.6875, + "num_input_tokens_seen": 74289088, + "step": 1110 + }, + { + "epoch": 0.1260709219858156, + "grad_norm": 42.35512924194336, + "learning_rate": 5e-05, + "loss": 1.5981, + "num_input_tokens_seen": 74356160, + "step": 1111 + }, + { + "epoch": 0.1260709219858156, + "loss": 1.7707762718200684, + "loss_ce": 0.005151239223778248, + "loss_iou": 0.78125, + "loss_num": 0.040283203125, + "loss_xval": 1.765625, + "num_input_tokens_seen": 74356160, + "step": 1111 + }, + { + "epoch": 0.12618439716312058, + "grad_norm": 15.042082786560059, + "learning_rate": 5e-05, + "loss": 1.8542, + "num_input_tokens_seen": 74423188, + "step": 1112 + }, + { + "epoch": 0.12618439716312058, + "loss": 2.2617430686950684, + "loss_ce": 0.00686016958206892, + "loss_iou": 0.8984375, + "loss_num": 0.0908203125, + "loss_xval": 2.25, + "num_input_tokens_seen": 74423188, + "step": 1112 + }, + { + "epoch": 0.12629787234042553, + "grad_norm": 17.913623809814453, + "learning_rate": 5e-05, + "loss": 1.7193, + "num_input_tokens_seen": 74491880, + "step": 1113 + }, + { + "epoch": 0.12629787234042553, + "loss": 1.5889523029327393, + "loss_ce": 0.0035030923318117857, + "loss_iou": 0.69921875, + "loss_num": 0.037353515625, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 74491880, + "step": 1113 + }, + { + "epoch": 0.1264113475177305, + "grad_norm": 18.276342391967773, + "learning_rate": 5e-05, + "loss": 1.3836, + "num_input_tokens_seen": 74559140, + "step": 1114 + }, + { + "epoch": 0.1264113475177305, + "loss": 1.3297455310821533, + "loss_ce": 0.007601978722959757, + "loss_iou": 0.53125, + "loss_num": 0.0517578125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 74559140, + "step": 1114 + }, + { + "epoch": 0.12652482269503545, + "grad_norm": 17.424585342407227, + "learning_rate": 5e-05, + "loss": 1.2155, + "num_input_tokens_seen": 74626736, + "step": 1115 + }, + { + "epoch": 0.12652482269503545, + "loss": 1.160754919052124, + "loss_ce": 0.006458142772316933, + "loss_iou": 0.53515625, + "loss_num": 0.0162353515625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 74626736, + "step": 1115 + }, + { + "epoch": 0.12663829787234043, + "grad_norm": 21.93143653869629, + "learning_rate": 5e-05, + "loss": 1.4512, + "num_input_tokens_seen": 74694060, + "step": 1116 + }, + { + "epoch": 0.12663829787234043, + "loss": 1.3189842700958252, + "loss_ce": 0.004042801912873983, + "loss_iou": 0.52734375, + "loss_num": 0.051513671875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 74694060, + "step": 1116 + }, + { + "epoch": 0.12675177304964538, + "grad_norm": 18.080873489379883, + "learning_rate": 5e-05, + "loss": 1.5781, + "num_input_tokens_seen": 74761240, + "step": 1117 + }, + { + "epoch": 0.12675177304964538, + "loss": 1.4490303993225098, + "loss_ce": 0.0027412897907197475, + "loss_iou": 0.640625, + "loss_num": 0.03271484375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 74761240, + "step": 1117 + }, + { + "epoch": 0.12686524822695036, + "grad_norm": 14.25580883026123, + "learning_rate": 5e-05, + "loss": 1.2058, + "num_input_tokens_seen": 74828168, + "step": 1118 + }, + { + "epoch": 0.12686524822695036, + "loss": 1.216654658317566, + "loss_ce": 0.003275759983807802, + "loss_iou": 0.54296875, + "loss_num": 0.0257568359375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 74828168, + "step": 1118 + }, + { + "epoch": 0.1269787234042553, + "grad_norm": 18.825809478759766, + "learning_rate": 5e-05, + "loss": 1.4229, + "num_input_tokens_seen": 74894700, + "step": 1119 + }, + { + "epoch": 0.1269787234042553, + "loss": 1.412070870399475, + "loss_ce": 0.0036236292216926813, + "loss_iou": 0.59375, + "loss_num": 0.044921875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 74894700, + "step": 1119 + }, + { + "epoch": 0.1270921985815603, + "grad_norm": 10.426859855651855, + "learning_rate": 5e-05, + "loss": 1.2904, + "num_input_tokens_seen": 74961956, + "step": 1120 + }, + { + "epoch": 0.1270921985815603, + "loss": 1.3465044498443604, + "loss_ce": 0.005195802543312311, + "loss_iou": 0.5859375, + "loss_num": 0.033203125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 74961956, + "step": 1120 + }, + { + "epoch": 0.12720567375886524, + "grad_norm": 13.647455215454102, + "learning_rate": 5e-05, + "loss": 1.2054, + "num_input_tokens_seen": 75028980, + "step": 1121 + }, + { + "epoch": 0.12720567375886524, + "loss": 1.391460657119751, + "loss_ce": 0.005718502216041088, + "loss_iou": 0.59765625, + "loss_num": 0.037109375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 75028980, + "step": 1121 + }, + { + "epoch": 0.12731914893617022, + "grad_norm": 37.70988082885742, + "learning_rate": 5e-05, + "loss": 1.3896, + "num_input_tokens_seen": 75096228, + "step": 1122 + }, + { + "epoch": 0.12731914893617022, + "loss": 1.3341522216796875, + "loss_ce": 0.007247995585203171, + "loss_iou": 0.57421875, + "loss_num": 0.0361328125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 75096228, + "step": 1122 + }, + { + "epoch": 0.12743262411347517, + "grad_norm": 22.411029815673828, + "learning_rate": 5e-05, + "loss": 1.5099, + "num_input_tokens_seen": 75162340, + "step": 1123 + }, + { + "epoch": 0.12743262411347517, + "loss": 1.5062395334243774, + "loss_ce": 0.00916927121579647, + "loss_iou": 0.58203125, + "loss_num": 0.06689453125, + "loss_xval": 1.5, + "num_input_tokens_seen": 75162340, + "step": 1123 + }, + { + "epoch": 0.12754609929078015, + "grad_norm": 13.935193061828613, + "learning_rate": 5e-05, + "loss": 1.2683, + "num_input_tokens_seen": 75230196, + "step": 1124 + }, + { + "epoch": 0.12754609929078015, + "loss": 1.2994954586029053, + "loss_ce": 0.004573510959744453, + "loss_iou": 0.53515625, + "loss_num": 0.044677734375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 75230196, + "step": 1124 + }, + { + "epoch": 0.1276595744680851, + "grad_norm": 103.61581420898438, + "learning_rate": 5e-05, + "loss": 1.3231, + "num_input_tokens_seen": 75296444, + "step": 1125 + }, + { + "epoch": 0.1276595744680851, + "loss": 1.2042781114578247, + "loss_ce": 0.006524210795760155, + "loss_iou": 0.5078125, + "loss_num": 0.035888671875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 75296444, + "step": 1125 + }, + { + "epoch": 0.12777304964539007, + "grad_norm": 18.80938720703125, + "learning_rate": 5e-05, + "loss": 1.5765, + "num_input_tokens_seen": 75362888, + "step": 1126 + }, + { + "epoch": 0.12777304964539007, + "loss": 1.7812848091125488, + "loss_ce": 0.004917607642710209, + "loss_iou": 0.80859375, + "loss_num": 0.031982421875, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 75362888, + "step": 1126 + }, + { + "epoch": 0.12788652482269502, + "grad_norm": 19.817733764648438, + "learning_rate": 5e-05, + "loss": 1.4103, + "num_input_tokens_seen": 75431632, + "step": 1127 + }, + { + "epoch": 0.12788652482269502, + "loss": 1.4897712469100952, + "loss_ce": 0.0034431726671755314, + "loss_iou": 0.6484375, + "loss_num": 0.03759765625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 75431632, + "step": 1127 + }, + { + "epoch": 0.128, + "grad_norm": 38.061859130859375, + "learning_rate": 5e-05, + "loss": 1.4911, + "num_input_tokens_seen": 75497604, + "step": 1128 + }, + { + "epoch": 0.128, + "loss": 1.4915831089019775, + "loss_ce": 0.010137803852558136, + "loss_iou": 0.66015625, + "loss_num": 0.032470703125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 75497604, + "step": 1128 + }, + { + "epoch": 0.12811347517730495, + "grad_norm": 19.095638275146484, + "learning_rate": 5e-05, + "loss": 1.8287, + "num_input_tokens_seen": 75564584, + "step": 1129 + }, + { + "epoch": 0.12811347517730495, + "loss": 1.8833942413330078, + "loss_ce": 0.004488065838813782, + "loss_iou": 0.8359375, + "loss_num": 0.04052734375, + "loss_xval": 1.875, + "num_input_tokens_seen": 75564584, + "step": 1129 + }, + { + "epoch": 0.12822695035460993, + "grad_norm": 17.35439682006836, + "learning_rate": 5e-05, + "loss": 1.4396, + "num_input_tokens_seen": 75631336, + "step": 1130 + }, + { + "epoch": 0.12822695035460993, + "loss": 1.2625737190246582, + "loss_ce": 0.005249454639852047, + "loss_iou": 0.54296875, + "loss_num": 0.03466796875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 75631336, + "step": 1130 + }, + { + "epoch": 0.12834042553191488, + "grad_norm": 33.83271026611328, + "learning_rate": 5e-05, + "loss": 1.258, + "num_input_tokens_seen": 75697084, + "step": 1131 + }, + { + "epoch": 0.12834042553191488, + "loss": 1.045494794845581, + "loss_ce": 0.003243256825953722, + "loss_iou": 0.470703125, + "loss_num": 0.019775390625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 75697084, + "step": 1131 + }, + { + "epoch": 0.12845390070921986, + "grad_norm": 22.54962158203125, + "learning_rate": 5e-05, + "loss": 1.6604, + "num_input_tokens_seen": 75764904, + "step": 1132 + }, + { + "epoch": 0.12845390070921986, + "loss": 1.5064022541046143, + "loss_ce": 0.002496051136404276, + "loss_iou": 0.6875, + "loss_num": 0.0263671875, + "loss_xval": 1.5, + "num_input_tokens_seen": 75764904, + "step": 1132 + }, + { + "epoch": 0.1285673758865248, + "grad_norm": 21.40488624572754, + "learning_rate": 5e-05, + "loss": 1.2234, + "num_input_tokens_seen": 75831320, + "step": 1133 + }, + { + "epoch": 0.1285673758865248, + "loss": 1.4097964763641357, + "loss_ce": 0.01038237102329731, + "loss_iou": 0.609375, + "loss_num": 0.035400390625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 75831320, + "step": 1133 + }, + { + "epoch": 0.1286808510638298, + "grad_norm": 37.878875732421875, + "learning_rate": 5e-05, + "loss": 1.4229, + "num_input_tokens_seen": 75897928, + "step": 1134 + }, + { + "epoch": 0.1286808510638298, + "loss": 1.2550249099731445, + "loss_ce": 0.010395997203886509, + "loss_iou": 0.546875, + "loss_num": 0.029541015625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 75897928, + "step": 1134 + }, + { + "epoch": 0.12879432624113477, + "grad_norm": 18.624706268310547, + "learning_rate": 5e-05, + "loss": 1.7927, + "num_input_tokens_seen": 75963672, + "step": 1135 + }, + { + "epoch": 0.12879432624113477, + "loss": 1.7257485389709473, + "loss_ce": 0.002115735085681081, + "loss_iou": 0.77734375, + "loss_num": 0.0341796875, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 75963672, + "step": 1135 + }, + { + "epoch": 0.12890780141843972, + "grad_norm": 19.8697566986084, + "learning_rate": 5e-05, + "loss": 1.4714, + "num_input_tokens_seen": 76032036, + "step": 1136 + }, + { + "epoch": 0.12890780141843972, + "loss": 1.5786631107330322, + "loss_ce": 0.004444362595677376, + "loss_iou": 0.671875, + "loss_num": 0.04541015625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 76032036, + "step": 1136 + }, + { + "epoch": 0.1290212765957447, + "grad_norm": 27.859500885009766, + "learning_rate": 5e-05, + "loss": 1.5154, + "num_input_tokens_seen": 76099492, + "step": 1137 + }, + { + "epoch": 0.1290212765957447, + "loss": 1.4936063289642334, + "loss_ce": 0.006301539484411478, + "loss_iou": 0.62890625, + "loss_num": 0.046142578125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 76099492, + "step": 1137 + }, + { + "epoch": 0.12913475177304964, + "grad_norm": 22.725698471069336, + "learning_rate": 5e-05, + "loss": 1.7517, + "num_input_tokens_seen": 76165760, + "step": 1138 + }, + { + "epoch": 0.12913475177304964, + "loss": 1.8162908554077148, + "loss_ce": 0.0028142635710537434, + "loss_iou": 0.7890625, + "loss_num": 0.0478515625, + "loss_xval": 1.8125, + "num_input_tokens_seen": 76165760, + "step": 1138 + }, + { + "epoch": 0.12924822695035462, + "grad_norm": 12.099393844604492, + "learning_rate": 5e-05, + "loss": 1.279, + "num_input_tokens_seen": 76231488, + "step": 1139 + }, + { + "epoch": 0.12924822695035462, + "loss": 1.2343354225158691, + "loss_ce": 0.004110846668481827, + "loss_iou": 0.51171875, + "loss_num": 0.041015625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 76231488, + "step": 1139 + }, + { + "epoch": 0.12936170212765957, + "grad_norm": 19.493427276611328, + "learning_rate": 5e-05, + "loss": 1.3799, + "num_input_tokens_seen": 76298060, + "step": 1140 + }, + { + "epoch": 0.12936170212765957, + "loss": 1.2714269161224365, + "loss_ce": 0.0038487049750983715, + "loss_iou": 0.55078125, + "loss_num": 0.033203125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 76298060, + "step": 1140 + }, + { + "epoch": 0.12947517730496455, + "grad_norm": 30.29786491394043, + "learning_rate": 5e-05, + "loss": 1.5148, + "num_input_tokens_seen": 76364952, + "step": 1141 + }, + { + "epoch": 0.12947517730496455, + "loss": 1.610666036605835, + "loss_ce": 0.005197301972657442, + "loss_iou": 0.6953125, + "loss_num": 0.04296875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 76364952, + "step": 1141 + }, + { + "epoch": 0.1295886524822695, + "grad_norm": 19.514965057373047, + "learning_rate": 5e-05, + "loss": 1.7341, + "num_input_tokens_seen": 76431924, + "step": 1142 + }, + { + "epoch": 0.1295886524822695, + "loss": 1.8184354305267334, + "loss_ce": 0.003982197493314743, + "loss_iou": 0.77734375, + "loss_num": 0.052490234375, + "loss_xval": 1.8125, + "num_input_tokens_seen": 76431924, + "step": 1142 + }, + { + "epoch": 0.12970212765957448, + "grad_norm": 20.13730812072754, + "learning_rate": 5e-05, + "loss": 1.194, + "num_input_tokens_seen": 76497932, + "step": 1143 + }, + { + "epoch": 0.12970212765957448, + "loss": 1.2186377048492432, + "loss_ce": 0.009165061637759209, + "loss_iou": 0.53515625, + "loss_num": 0.0286865234375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 76497932, + "step": 1143 + }, + { + "epoch": 0.12981560283687943, + "grad_norm": 27.00029754638672, + "learning_rate": 5e-05, + "loss": 1.5807, + "num_input_tokens_seen": 76564020, + "step": 1144 + }, + { + "epoch": 0.12981560283687943, + "loss": 1.488149642944336, + "loss_ce": 0.004262838046997786, + "loss_iou": 0.59765625, + "loss_num": 0.057373046875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 76564020, + "step": 1144 + }, + { + "epoch": 0.1299290780141844, + "grad_norm": 45.538230895996094, + "learning_rate": 5e-05, + "loss": 1.5591, + "num_input_tokens_seen": 76631280, + "step": 1145 + }, + { + "epoch": 0.1299290780141844, + "loss": 1.6165380477905273, + "loss_ce": 0.005209962837398052, + "loss_iou": 0.69921875, + "loss_num": 0.042724609375, + "loss_xval": 1.609375, + "num_input_tokens_seen": 76631280, + "step": 1145 + }, + { + "epoch": 0.13004255319148936, + "grad_norm": 21.977394104003906, + "learning_rate": 5e-05, + "loss": 1.7105, + "num_input_tokens_seen": 76698032, + "step": 1146 + }, + { + "epoch": 0.13004255319148936, + "loss": 1.7998039722442627, + "loss_ce": 0.0019523646915331483, + "loss_iou": 0.765625, + "loss_num": 0.052734375, + "loss_xval": 1.796875, + "num_input_tokens_seen": 76698032, + "step": 1146 + }, + { + "epoch": 0.13015602836879434, + "grad_norm": 20.058801651000977, + "learning_rate": 5e-05, + "loss": 1.3005, + "num_input_tokens_seen": 76765224, + "step": 1147 + }, + { + "epoch": 0.13015602836879434, + "loss": 1.1903960704803467, + "loss_ce": 0.00460506696254015, + "loss_iou": 0.50390625, + "loss_num": 0.036376953125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 76765224, + "step": 1147 + }, + { + "epoch": 0.13026950354609929, + "grad_norm": 30.892980575561523, + "learning_rate": 5e-05, + "loss": 1.288, + "num_input_tokens_seen": 76833220, + "step": 1148 + }, + { + "epoch": 0.13026950354609929, + "loss": 1.2096130847930908, + "loss_ce": 0.007952851243317127, + "loss_iou": 0.515625, + "loss_num": 0.03466796875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 76833220, + "step": 1148 + }, + { + "epoch": 0.13038297872340426, + "grad_norm": 20.316179275512695, + "learning_rate": 5e-05, + "loss": 1.8164, + "num_input_tokens_seen": 76900148, + "step": 1149 + }, + { + "epoch": 0.13038297872340426, + "loss": 1.8335130214691162, + "loss_ce": 0.005388055462390184, + "loss_iou": 0.80078125, + "loss_num": 0.044921875, + "loss_xval": 1.828125, + "num_input_tokens_seen": 76900148, + "step": 1149 + }, + { + "epoch": 0.13049645390070921, + "grad_norm": 18.155920028686523, + "learning_rate": 5e-05, + "loss": 1.2585, + "num_input_tokens_seen": 76967396, + "step": 1150 + }, + { + "epoch": 0.13049645390070921, + "loss": 1.2104525566101074, + "loss_ce": 0.004947132896631956, + "loss_iou": 0.53125, + "loss_num": 0.029296875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 76967396, + "step": 1150 + }, + { + "epoch": 0.1306099290780142, + "grad_norm": 24.818269729614258, + "learning_rate": 5e-05, + "loss": 1.4429, + "num_input_tokens_seen": 77033712, + "step": 1151 + }, + { + "epoch": 0.1306099290780142, + "loss": 1.4679821729660034, + "loss_ce": 0.004114977084100246, + "loss_iou": 0.62109375, + "loss_num": 0.0439453125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 77033712, + "step": 1151 + }, + { + "epoch": 0.13072340425531914, + "grad_norm": 21.470142364501953, + "learning_rate": 5e-05, + "loss": 1.4645, + "num_input_tokens_seen": 77099912, + "step": 1152 + }, + { + "epoch": 0.13072340425531914, + "loss": 1.3839144706726074, + "loss_ce": 0.0016512514557689428, + "loss_iou": 0.6015625, + "loss_num": 0.035888671875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 77099912, + "step": 1152 + }, + { + "epoch": 0.13083687943262412, + "grad_norm": 18.989181518554688, + "learning_rate": 5e-05, + "loss": 1.4646, + "num_input_tokens_seen": 77166436, + "step": 1153 + }, + { + "epoch": 0.13083687943262412, + "loss": 1.4079911708831787, + "loss_ce": 0.004670875612646341, + "loss_iou": 0.625, + "loss_num": 0.030517578125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 77166436, + "step": 1153 + }, + { + "epoch": 0.13095035460992907, + "grad_norm": 36.58635711669922, + "learning_rate": 5e-05, + "loss": 1.5348, + "num_input_tokens_seen": 77233340, + "step": 1154 + }, + { + "epoch": 0.13095035460992907, + "loss": 1.3037517070770264, + "loss_ce": 0.00443533668294549, + "loss_iou": 0.59765625, + "loss_num": 0.0211181640625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 77233340, + "step": 1154 + }, + { + "epoch": 0.13106382978723405, + "grad_norm": 32.01032638549805, + "learning_rate": 5e-05, + "loss": 1.7449, + "num_input_tokens_seen": 77300456, + "step": 1155 + }, + { + "epoch": 0.13106382978723405, + "loss": 1.6957001686096191, + "loss_ce": 0.0082001443952322, + "loss_iou": 0.77734375, + "loss_num": 0.027099609375, + "loss_xval": 1.6875, + "num_input_tokens_seen": 77300456, + "step": 1155 + }, + { + "epoch": 0.131177304964539, + "grad_norm": 14.94954776763916, + "learning_rate": 5e-05, + "loss": 1.3427, + "num_input_tokens_seen": 77367464, + "step": 1156 + }, + { + "epoch": 0.131177304964539, + "loss": 1.2518984079360962, + "loss_ce": 0.006781219970434904, + "loss_iou": 0.5234375, + "loss_num": 0.039306640625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 77367464, + "step": 1156 + }, + { + "epoch": 0.13129078014184398, + "grad_norm": 13.352287292480469, + "learning_rate": 5e-05, + "loss": 1.4309, + "num_input_tokens_seen": 77434712, + "step": 1157 + }, + { + "epoch": 0.13129078014184398, + "loss": 1.3041002750396729, + "loss_ce": 0.007225288078188896, + "loss_iou": 0.5546875, + "loss_num": 0.038330078125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 77434712, + "step": 1157 + }, + { + "epoch": 0.13140425531914893, + "grad_norm": 29.35538673400879, + "learning_rate": 5e-05, + "loss": 1.2922, + "num_input_tokens_seen": 77500172, + "step": 1158 + }, + { + "epoch": 0.13140425531914893, + "loss": 1.400982141494751, + "loss_ce": 0.008404011838138103, + "loss_iou": 0.6171875, + "loss_num": 0.031005859375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 77500172, + "step": 1158 + }, + { + "epoch": 0.1315177304964539, + "grad_norm": 17.68467903137207, + "learning_rate": 5e-05, + "loss": 1.5783, + "num_input_tokens_seen": 77566252, + "step": 1159 + }, + { + "epoch": 0.1315177304964539, + "loss": 1.3790677785873413, + "loss_ce": 0.005532565992325544, + "loss_iou": 0.57421875, + "loss_num": 0.045654296875, + "loss_xval": 1.375, + "num_input_tokens_seen": 77566252, + "step": 1159 + }, + { + "epoch": 0.13163120567375886, + "grad_norm": 17.59708023071289, + "learning_rate": 5e-05, + "loss": 1.3442, + "num_input_tokens_seen": 77632788, + "step": 1160 + }, + { + "epoch": 0.13163120567375886, + "loss": 1.355327844619751, + "loss_ce": 0.0037653741892427206, + "loss_iou": 0.5625, + "loss_num": 0.04443359375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 77632788, + "step": 1160 + }, + { + "epoch": 0.13174468085106383, + "grad_norm": 19.370975494384766, + "learning_rate": 5e-05, + "loss": 1.503, + "num_input_tokens_seen": 77699680, + "step": 1161 + }, + { + "epoch": 0.13174468085106383, + "loss": 1.3580994606018066, + "loss_ce": 0.004583839792758226, + "loss_iou": 0.58203125, + "loss_num": 0.037841796875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 77699680, + "step": 1161 + }, + { + "epoch": 0.13185815602836878, + "grad_norm": 17.97020149230957, + "learning_rate": 5e-05, + "loss": 1.4196, + "num_input_tokens_seen": 77767264, + "step": 1162 + }, + { + "epoch": 0.13185815602836878, + "loss": 1.2328405380249023, + "loss_ce": 0.0023717619478702545, + "loss_iou": 0.5234375, + "loss_num": 0.03662109375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 77767264, + "step": 1162 + }, + { + "epoch": 0.13197163120567376, + "grad_norm": 34.74858856201172, + "learning_rate": 5e-05, + "loss": 1.4178, + "num_input_tokens_seen": 77834316, + "step": 1163 + }, + { + "epoch": 0.13197163120567376, + "loss": 1.5352740287780762, + "loss_ce": 0.006953738164156675, + "loss_iou": 0.66015625, + "loss_num": 0.0419921875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 77834316, + "step": 1163 + }, + { + "epoch": 0.1320851063829787, + "grad_norm": 22.836156845092773, + "learning_rate": 5e-05, + "loss": 1.6845, + "num_input_tokens_seen": 77901496, + "step": 1164 + }, + { + "epoch": 0.1320851063829787, + "loss": 1.6434693336486816, + "loss_ce": 0.0028442596085369587, + "loss_iou": 0.703125, + "loss_num": 0.047119140625, + "loss_xval": 1.640625, + "num_input_tokens_seen": 77901496, + "step": 1164 + }, + { + "epoch": 0.1321985815602837, + "grad_norm": 18.121164321899414, + "learning_rate": 5e-05, + "loss": 1.3732, + "num_input_tokens_seen": 77968972, + "step": 1165 + }, + { + "epoch": 0.1321985815602837, + "loss": 1.4414541721343994, + "loss_ce": 0.008348705247044563, + "loss_iou": 0.640625, + "loss_num": 0.0311279296875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 77968972, + "step": 1165 + }, + { + "epoch": 0.13231205673758864, + "grad_norm": 16.240568161010742, + "learning_rate": 5e-05, + "loss": 1.5641, + "num_input_tokens_seen": 78035220, + "step": 1166 + }, + { + "epoch": 0.13231205673758864, + "loss": 1.6144508123397827, + "loss_ce": 0.006052318494766951, + "loss_iou": 0.671875, + "loss_num": 0.05322265625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 78035220, + "step": 1166 + }, + { + "epoch": 0.13242553191489362, + "grad_norm": 27.59115219116211, + "learning_rate": 5e-05, + "loss": 1.3918, + "num_input_tokens_seen": 78101688, + "step": 1167 + }, + { + "epoch": 0.13242553191489362, + "loss": 1.38643217086792, + "loss_ce": 0.005084428936243057, + "loss_iou": 0.59765625, + "loss_num": 0.036376953125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 78101688, + "step": 1167 + }, + { + "epoch": 0.13253900709219857, + "grad_norm": 43.579681396484375, + "learning_rate": 5e-05, + "loss": 1.6782, + "num_input_tokens_seen": 78169100, + "step": 1168 + }, + { + "epoch": 0.13253900709219857, + "loss": 1.5412015914916992, + "loss_ce": 0.008242607116699219, + "loss_iou": 0.65625, + "loss_num": 0.044189453125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 78169100, + "step": 1168 + }, + { + "epoch": 0.13265248226950355, + "grad_norm": 15.800996780395508, + "learning_rate": 5e-05, + "loss": 1.8648, + "num_input_tokens_seen": 78234956, + "step": 1169 + }, + { + "epoch": 0.13265248226950355, + "loss": 1.7791551351547241, + "loss_ce": 0.006694202311336994, + "loss_iou": 0.765625, + "loss_num": 0.0478515625, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 78234956, + "step": 1169 + }, + { + "epoch": 0.1327659574468085, + "grad_norm": 19.94856834411621, + "learning_rate": 5e-05, + "loss": 1.4505, + "num_input_tokens_seen": 78301360, + "step": 1170 + }, + { + "epoch": 0.1327659574468085, + "loss": 1.4500141143798828, + "loss_ce": 0.004213395528495312, + "loss_iou": 0.61328125, + "loss_num": 0.044189453125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 78301360, + "step": 1170 + }, + { + "epoch": 0.13287943262411347, + "grad_norm": 21.967172622680664, + "learning_rate": 5e-05, + "loss": 1.4115, + "num_input_tokens_seen": 78367516, + "step": 1171 + }, + { + "epoch": 0.13287943262411347, + "loss": 1.437298059463501, + "loss_ce": 0.005657488014549017, + "loss_iou": 0.59375, + "loss_num": 0.048583984375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 78367516, + "step": 1171 + }, + { + "epoch": 0.13299290780141845, + "grad_norm": 9.597172737121582, + "learning_rate": 5e-05, + "loss": 1.232, + "num_input_tokens_seen": 78434524, + "step": 1172 + }, + { + "epoch": 0.13299290780141845, + "loss": 1.0731661319732666, + "loss_ce": 0.002853596583008766, + "loss_iou": 0.490234375, + "loss_num": 0.017822265625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 78434524, + "step": 1172 + }, + { + "epoch": 0.1331063829787234, + "grad_norm": 12.72608470916748, + "learning_rate": 5e-05, + "loss": 1.4952, + "num_input_tokens_seen": 78502028, + "step": 1173 + }, + { + "epoch": 0.1331063829787234, + "loss": 1.3375731706619263, + "loss_ce": 0.003100545145571232, + "loss_iou": 0.5703125, + "loss_num": 0.0390625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 78502028, + "step": 1173 + }, + { + "epoch": 0.13321985815602838, + "grad_norm": 15.219212532043457, + "learning_rate": 5e-05, + "loss": 1.5376, + "num_input_tokens_seen": 78569336, + "step": 1174 + }, + { + "epoch": 0.13321985815602838, + "loss": 1.5597960948944092, + "loss_ce": 0.005108553916215897, + "loss_iou": 0.65625, + "loss_num": 0.048095703125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 78569336, + "step": 1174 + }, + { + "epoch": 0.13333333333333333, + "grad_norm": 21.010774612426758, + "learning_rate": 5e-05, + "loss": 1.421, + "num_input_tokens_seen": 78635940, + "step": 1175 + }, + { + "epoch": 0.13333333333333333, + "loss": 1.1527857780456543, + "loss_ce": 0.0095973527058959, + "loss_iou": 0.5234375, + "loss_num": 0.018798828125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 78635940, + "step": 1175 + }, + { + "epoch": 0.1334468085106383, + "grad_norm": 18.879304885864258, + "learning_rate": 5e-05, + "loss": 1.3975, + "num_input_tokens_seen": 78702024, + "step": 1176 + }, + { + "epoch": 0.1334468085106383, + "loss": 1.3100481033325195, + "loss_ce": 0.005360549781471491, + "loss_iou": 0.5546875, + "loss_num": 0.039306640625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 78702024, + "step": 1176 + }, + { + "epoch": 0.13356028368794326, + "grad_norm": 28.42647361755371, + "learning_rate": 5e-05, + "loss": 1.6142, + "num_input_tokens_seen": 78769744, + "step": 1177 + }, + { + "epoch": 0.13356028368794326, + "loss": 1.750873327255249, + "loss_ce": 0.0038029539864510298, + "loss_iou": 0.76171875, + "loss_num": 0.044677734375, + "loss_xval": 1.75, + "num_input_tokens_seen": 78769744, + "step": 1177 + }, + { + "epoch": 0.13367375886524824, + "grad_norm": 51.254276275634766, + "learning_rate": 5e-05, + "loss": 1.5618, + "num_input_tokens_seen": 78835956, + "step": 1178 + }, + { + "epoch": 0.13367375886524824, + "loss": 1.4123759269714355, + "loss_ce": 0.011985274963080883, + "loss_iou": 0.62109375, + "loss_num": 0.0322265625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 78835956, + "step": 1178 + }, + { + "epoch": 0.1337872340425532, + "grad_norm": 19.807607650756836, + "learning_rate": 5e-05, + "loss": 1.3265, + "num_input_tokens_seen": 78904200, + "step": 1179 + }, + { + "epoch": 0.1337872340425532, + "loss": 1.4884576797485352, + "loss_ce": 0.0050593409687280655, + "loss_iou": 0.58203125, + "loss_num": 0.0634765625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 78904200, + "step": 1179 + }, + { + "epoch": 0.13390070921985817, + "grad_norm": 19.85332679748535, + "learning_rate": 5e-05, + "loss": 1.2994, + "num_input_tokens_seen": 78971840, + "step": 1180 + }, + { + "epoch": 0.13390070921985817, + "loss": 1.3856432437896729, + "loss_ce": 0.00283078127540648, + "loss_iou": 0.6015625, + "loss_num": 0.036865234375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 78971840, + "step": 1180 + }, + { + "epoch": 0.13401418439716312, + "grad_norm": 16.98219871520996, + "learning_rate": 5e-05, + "loss": 1.365, + "num_input_tokens_seen": 79038924, + "step": 1181 + }, + { + "epoch": 0.13401418439716312, + "loss": 1.383253574371338, + "loss_ce": 0.002394153969362378, + "loss_iou": 0.60546875, + "loss_num": 0.03369140625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 79038924, + "step": 1181 + }, + { + "epoch": 0.1341276595744681, + "grad_norm": 19.939971923828125, + "learning_rate": 5e-05, + "loss": 1.2009, + "num_input_tokens_seen": 79106424, + "step": 1182 + }, + { + "epoch": 0.1341276595744681, + "loss": 1.111865758895874, + "loss_ce": 0.00542035698890686, + "loss_iou": 0.486328125, + "loss_num": 0.026611328125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 79106424, + "step": 1182 + }, + { + "epoch": 0.13424113475177304, + "grad_norm": 47.58906555175781, + "learning_rate": 5e-05, + "loss": 1.6005, + "num_input_tokens_seen": 79173664, + "step": 1183 + }, + { + "epoch": 0.13424113475177304, + "loss": 1.532172679901123, + "loss_ce": 0.006293659098446369, + "loss_iou": 0.6875, + "loss_num": 0.0301513671875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 79173664, + "step": 1183 + }, + { + "epoch": 0.13435460992907802, + "grad_norm": 21.909364700317383, + "learning_rate": 5e-05, + "loss": 1.6805, + "num_input_tokens_seen": 79240144, + "step": 1184 + }, + { + "epoch": 0.13435460992907802, + "loss": 1.5305540561676025, + "loss_ce": 0.010046247392892838, + "loss_iou": 0.62109375, + "loss_num": 0.055419921875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 79240144, + "step": 1184 + }, + { + "epoch": 0.13446808510638297, + "grad_norm": 23.18252944946289, + "learning_rate": 5e-05, + "loss": 1.5283, + "num_input_tokens_seen": 79306792, + "step": 1185 + }, + { + "epoch": 0.13446808510638297, + "loss": 1.5432246923446655, + "loss_ce": 0.0031856512650847435, + "loss_iou": 0.6875, + "loss_num": 0.03369140625, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 79306792, + "step": 1185 + }, + { + "epoch": 0.13458156028368795, + "grad_norm": 21.391267776489258, + "learning_rate": 5e-05, + "loss": 1.325, + "num_input_tokens_seen": 79372028, + "step": 1186 + }, + { + "epoch": 0.13458156028368795, + "loss": 1.0989161729812622, + "loss_ce": 0.004189653787761927, + "loss_iou": 0.451171875, + "loss_num": 0.03857421875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 79372028, + "step": 1186 + }, + { + "epoch": 0.1346950354609929, + "grad_norm": 26.117963790893555, + "learning_rate": 5e-05, + "loss": 1.3613, + "num_input_tokens_seen": 79438056, + "step": 1187 + }, + { + "epoch": 0.1346950354609929, + "loss": 1.2547872066497803, + "loss_ce": 0.006740288808941841, + "loss_iou": 0.53125, + "loss_num": 0.037841796875, + "loss_xval": 1.25, + "num_input_tokens_seen": 79438056, + "step": 1187 + }, + { + "epoch": 0.13480851063829788, + "grad_norm": 22.434045791625977, + "learning_rate": 5e-05, + "loss": 1.8574, + "num_input_tokens_seen": 79504748, + "step": 1188 + }, + { + "epoch": 0.13480851063829788, + "loss": 1.9079647064208984, + "loss_ce": 0.009527227841317654, + "loss_iou": 0.8046875, + "loss_num": 0.0576171875, + "loss_xval": 1.8984375, + "num_input_tokens_seen": 79504748, + "step": 1188 + }, + { + "epoch": 0.13492198581560283, + "grad_norm": 14.074780464172363, + "learning_rate": 5e-05, + "loss": 1.2558, + "num_input_tokens_seen": 79570736, + "step": 1189 + }, + { + "epoch": 0.13492198581560283, + "loss": 1.263122320175171, + "loss_ce": 0.005309822969138622, + "loss_iou": 0.53125, + "loss_num": 0.038818359375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 79570736, + "step": 1189 + }, + { + "epoch": 0.1350354609929078, + "grad_norm": 40.81941223144531, + "learning_rate": 5e-05, + "loss": 1.1647, + "num_input_tokens_seen": 79637120, + "step": 1190 + }, + { + "epoch": 0.1350354609929078, + "loss": 1.0572423934936523, + "loss_ce": 0.0030432783532887697, + "loss_iou": 0.46484375, + "loss_num": 0.0250244140625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 79637120, + "step": 1190 + }, + { + "epoch": 0.13514893617021276, + "grad_norm": 19.20269203186035, + "learning_rate": 5e-05, + "loss": 1.3265, + "num_input_tokens_seen": 79704240, + "step": 1191 + }, + { + "epoch": 0.13514893617021276, + "loss": 1.4163997173309326, + "loss_ce": 0.006243528798222542, + "loss_iou": 0.62109375, + "loss_num": 0.033203125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 79704240, + "step": 1191 + }, + { + "epoch": 0.13526241134751774, + "grad_norm": 17.32764434814453, + "learning_rate": 5e-05, + "loss": 1.3012, + "num_input_tokens_seen": 79770300, + "step": 1192 + }, + { + "epoch": 0.13526241134751774, + "loss": 1.1895222663879395, + "loss_ce": 0.005318235605955124, + "loss_iou": 0.5390625, + "loss_num": 0.0218505859375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 79770300, + "step": 1192 + }, + { + "epoch": 0.1353758865248227, + "grad_norm": 22.577350616455078, + "learning_rate": 5e-05, + "loss": 1.3881, + "num_input_tokens_seen": 79836608, + "step": 1193 + }, + { + "epoch": 0.1353758865248227, + "loss": 1.3994818925857544, + "loss_ce": 0.00958932563662529, + "loss_iou": 0.609375, + "loss_num": 0.03466796875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 79836608, + "step": 1193 + }, + { + "epoch": 0.13548936170212766, + "grad_norm": 36.0549430847168, + "learning_rate": 5e-05, + "loss": 1.3794, + "num_input_tokens_seen": 79902832, + "step": 1194 + }, + { + "epoch": 0.13548936170212766, + "loss": 1.4362719058990479, + "loss_ce": 0.006096197292208672, + "loss_iou": 0.6171875, + "loss_num": 0.038330078125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 79902832, + "step": 1194 + }, + { + "epoch": 0.13560283687943261, + "grad_norm": 21.327428817749023, + "learning_rate": 5e-05, + "loss": 1.8243, + "num_input_tokens_seen": 79969060, + "step": 1195 + }, + { + "epoch": 0.13560283687943261, + "loss": 1.979425072669983, + "loss_ce": 0.008721861056983471, + "loss_iou": 0.79296875, + "loss_num": 0.07666015625, + "loss_xval": 1.96875, + "num_input_tokens_seen": 79969060, + "step": 1195 + }, + { + "epoch": 0.1357163120567376, + "grad_norm": 17.286949157714844, + "learning_rate": 5e-05, + "loss": 1.4149, + "num_input_tokens_seen": 80035956, + "step": 1196 + }, + { + "epoch": 0.1357163120567376, + "loss": 1.3608438968658447, + "loss_ce": 0.0019571264274418354, + "loss_iou": 0.578125, + "loss_num": 0.0400390625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 80035956, + "step": 1196 + }, + { + "epoch": 0.13582978723404254, + "grad_norm": 36.952877044677734, + "learning_rate": 5e-05, + "loss": 1.2643, + "num_input_tokens_seen": 80103532, + "step": 1197 + }, + { + "epoch": 0.13582978723404254, + "loss": 1.3216041326522827, + "loss_ce": 0.003244720632210374, + "loss_iou": 0.578125, + "loss_num": 0.032470703125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 80103532, + "step": 1197 + }, + { + "epoch": 0.13594326241134752, + "grad_norm": 14.392904281616211, + "learning_rate": 5e-05, + "loss": 1.4214, + "num_input_tokens_seen": 80170144, + "step": 1198 + }, + { + "epoch": 0.13594326241134752, + "loss": 1.3676351308822632, + "loss_ce": 0.0035909346770495176, + "loss_iou": 0.5703125, + "loss_num": 0.044677734375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 80170144, + "step": 1198 + }, + { + "epoch": 0.13605673758865247, + "grad_norm": 22.86275291442871, + "learning_rate": 5e-05, + "loss": 1.4852, + "num_input_tokens_seen": 80236880, + "step": 1199 + }, + { + "epoch": 0.13605673758865247, + "loss": 1.5411748886108398, + "loss_ce": 0.007971711456775665, + "loss_iou": 0.6171875, + "loss_num": 0.059326171875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 80236880, + "step": 1199 + }, + { + "epoch": 0.13617021276595745, + "grad_norm": 24.697717666625977, + "learning_rate": 5e-05, + "loss": 1.4264, + "num_input_tokens_seen": 80303320, + "step": 1200 + }, + { + "epoch": 0.13617021276595745, + "loss": 1.3468399047851562, + "loss_ce": 0.014320479705929756, + "loss_iou": 0.59375, + "loss_num": 0.0284423828125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 80303320, + "step": 1200 + }, + { + "epoch": 0.1362836879432624, + "grad_norm": 24.1299991607666, + "learning_rate": 5e-05, + "loss": 1.5434, + "num_input_tokens_seen": 80369372, + "step": 1201 + }, + { + "epoch": 0.1362836879432624, + "loss": 1.7295103073120117, + "loss_ce": 0.0029478941578418016, + "loss_iou": 0.7578125, + "loss_num": 0.041015625, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 80369372, + "step": 1201 + }, + { + "epoch": 0.13639716312056738, + "grad_norm": 20.42946434020996, + "learning_rate": 5e-05, + "loss": 1.4309, + "num_input_tokens_seen": 80436824, + "step": 1202 + }, + { + "epoch": 0.13639716312056738, + "loss": 1.5422236919403076, + "loss_ce": 0.005114227067679167, + "loss_iou": 0.63671875, + "loss_num": 0.052978515625, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 80436824, + "step": 1202 + }, + { + "epoch": 0.13651063829787233, + "grad_norm": 25.667789459228516, + "learning_rate": 5e-05, + "loss": 1.5179, + "num_input_tokens_seen": 80503924, + "step": 1203 + }, + { + "epoch": 0.13651063829787233, + "loss": 1.7345032691955566, + "loss_ce": 0.0059876590967178345, + "loss_iou": 0.74609375, + "loss_num": 0.047119140625, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 80503924, + "step": 1203 + }, + { + "epoch": 0.1366241134751773, + "grad_norm": 38.565860748291016, + "learning_rate": 5e-05, + "loss": 1.4832, + "num_input_tokens_seen": 80570208, + "step": 1204 + }, + { + "epoch": 0.1366241134751773, + "loss": 1.4793400764465332, + "loss_ce": 0.0013127439888194203, + "loss_iou": 0.6484375, + "loss_num": 0.035888671875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 80570208, + "step": 1204 + }, + { + "epoch": 0.13673758865248226, + "grad_norm": 19.331098556518555, + "learning_rate": 5e-05, + "loss": 1.7071, + "num_input_tokens_seen": 80637760, + "step": 1205 + }, + { + "epoch": 0.13673758865248226, + "loss": 1.8045861721038818, + "loss_ce": 0.00478156004101038, + "loss_iou": 0.77734375, + "loss_num": 0.048828125, + "loss_xval": 1.796875, + "num_input_tokens_seen": 80637760, + "step": 1205 + }, + { + "epoch": 0.13685106382978723, + "grad_norm": 15.148218154907227, + "learning_rate": 5e-05, + "loss": 1.369, + "num_input_tokens_seen": 80704392, + "step": 1206 + }, + { + "epoch": 0.13685106382978723, + "loss": 1.3731275796890259, + "loss_ce": 0.007893264293670654, + "loss_iou": 0.609375, + "loss_num": 0.02978515625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 80704392, + "step": 1206 + }, + { + "epoch": 0.1369645390070922, + "grad_norm": 30.49707794189453, + "learning_rate": 5e-05, + "loss": 1.4834, + "num_input_tokens_seen": 80771936, + "step": 1207 + }, + { + "epoch": 0.1369645390070922, + "loss": 1.3982770442962646, + "loss_ce": 0.0017926094587892294, + "loss_iou": 0.65234375, + "loss_num": 0.0184326171875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 80771936, + "step": 1207 + }, + { + "epoch": 0.13707801418439716, + "grad_norm": 17.35027503967285, + "learning_rate": 5e-05, + "loss": 1.6141, + "num_input_tokens_seen": 80838560, + "step": 1208 + }, + { + "epoch": 0.13707801418439716, + "loss": 1.6880419254302979, + "loss_ce": 0.007377892732620239, + "loss_iou": 0.7421875, + "loss_num": 0.039794921875, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 80838560, + "step": 1208 + }, + { + "epoch": 0.13719148936170214, + "grad_norm": 37.463260650634766, + "learning_rate": 5e-05, + "loss": 1.2923, + "num_input_tokens_seen": 80904924, + "step": 1209 + }, + { + "epoch": 0.13719148936170214, + "loss": 1.292744517326355, + "loss_ce": 0.0017288398230448365, + "loss_iou": 0.5546875, + "loss_num": 0.03564453125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 80904924, + "step": 1209 + }, + { + "epoch": 0.1373049645390071, + "grad_norm": 38.97140884399414, + "learning_rate": 5e-05, + "loss": 1.5143, + "num_input_tokens_seen": 80972104, + "step": 1210 + }, + { + "epoch": 0.1373049645390071, + "loss": 1.3575387001037598, + "loss_ce": 0.002069971291348338, + "loss_iou": 0.625, + "loss_num": 0.020751953125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 80972104, + "step": 1210 + }, + { + "epoch": 0.13741843971631207, + "grad_norm": 28.530628204345703, + "learning_rate": 5e-05, + "loss": 1.6489, + "num_input_tokens_seen": 81039164, + "step": 1211 + }, + { + "epoch": 0.13741843971631207, + "loss": 1.6475305557250977, + "loss_ce": 0.00690543744713068, + "loss_iou": 0.71484375, + "loss_num": 0.0419921875, + "loss_xval": 1.640625, + "num_input_tokens_seen": 81039164, + "step": 1211 + }, + { + "epoch": 0.13753191489361702, + "grad_norm": 15.670174598693848, + "learning_rate": 5e-05, + "loss": 1.4233, + "num_input_tokens_seen": 81106420, + "step": 1212 + }, + { + "epoch": 0.13753191489361702, + "loss": 1.3804807662963867, + "loss_ce": 0.0020628594793379307, + "loss_iou": 0.6328125, + "loss_num": 0.0218505859375, + "loss_xval": 1.375, + "num_input_tokens_seen": 81106420, + "step": 1212 + }, + { + "epoch": 0.137645390070922, + "grad_norm": 24.299354553222656, + "learning_rate": 5e-05, + "loss": 1.3071, + "num_input_tokens_seen": 81173256, + "step": 1213 + }, + { + "epoch": 0.137645390070922, + "loss": 1.4372704029083252, + "loss_ce": 0.003676598658785224, + "loss_iou": 0.61328125, + "loss_num": 0.041259765625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 81173256, + "step": 1213 + }, + { + "epoch": 0.13775886524822695, + "grad_norm": 27.55335235595703, + "learning_rate": 5e-05, + "loss": 1.586, + "num_input_tokens_seen": 81240112, + "step": 1214 + }, + { + "epoch": 0.13775886524822695, + "loss": 1.7488603591918945, + "loss_ce": 0.006672969553619623, + "loss_iou": 0.75, + "loss_num": 0.048828125, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 81240112, + "step": 1214 + }, + { + "epoch": 0.13787234042553193, + "grad_norm": 23.642131805419922, + "learning_rate": 5e-05, + "loss": 1.5609, + "num_input_tokens_seen": 81307472, + "step": 1215 + }, + { + "epoch": 0.13787234042553193, + "loss": 1.5963404178619385, + "loss_ce": 0.005031908862292767, + "loss_iou": 0.64453125, + "loss_num": 0.060302734375, + "loss_xval": 1.59375, + "num_input_tokens_seen": 81307472, + "step": 1215 + }, + { + "epoch": 0.13798581560283688, + "grad_norm": 20.755388259887695, + "learning_rate": 5e-05, + "loss": 1.367, + "num_input_tokens_seen": 81375436, + "step": 1216 + }, + { + "epoch": 0.13798581560283688, + "loss": 1.2941069602966309, + "loss_ce": 0.006448197178542614, + "loss_iou": 0.546875, + "loss_num": 0.03955078125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 81375436, + "step": 1216 + }, + { + "epoch": 0.13809929078014185, + "grad_norm": 21.885238647460938, + "learning_rate": 5e-05, + "loss": 1.3487, + "num_input_tokens_seen": 81441336, + "step": 1217 + }, + { + "epoch": 0.13809929078014185, + "loss": 1.3473072052001953, + "loss_ce": 0.005510379560291767, + "loss_iou": 0.56640625, + "loss_num": 0.0419921875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 81441336, + "step": 1217 + }, + { + "epoch": 0.1382127659574468, + "grad_norm": 52.69109344482422, + "learning_rate": 5e-05, + "loss": 1.401, + "num_input_tokens_seen": 81507832, + "step": 1218 + }, + { + "epoch": 0.1382127659574468, + "loss": 1.3849750757217407, + "loss_ce": 0.0036274187732487917, + "loss_iou": 0.609375, + "loss_num": 0.031982421875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 81507832, + "step": 1218 + }, + { + "epoch": 0.13832624113475178, + "grad_norm": 22.810930252075195, + "learning_rate": 5e-05, + "loss": 1.6342, + "num_input_tokens_seen": 81573916, + "step": 1219 + }, + { + "epoch": 0.13832624113475178, + "loss": 1.51918625831604, + "loss_ce": 0.0072234198451042175, + "loss_iou": 0.64453125, + "loss_num": 0.04443359375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 81573916, + "step": 1219 + }, + { + "epoch": 0.13843971631205673, + "grad_norm": 13.40014934539795, + "learning_rate": 5e-05, + "loss": 1.3616, + "num_input_tokens_seen": 81641684, + "step": 1220 + }, + { + "epoch": 0.13843971631205673, + "loss": 1.423602819442749, + "loss_ce": 0.005633961409330368, + "loss_iou": 0.5703125, + "loss_num": 0.05517578125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 81641684, + "step": 1220 + }, + { + "epoch": 0.1385531914893617, + "grad_norm": 14.489871978759766, + "learning_rate": 5e-05, + "loss": 1.3462, + "num_input_tokens_seen": 81709312, + "step": 1221 + }, + { + "epoch": 0.1385531914893617, + "loss": 1.4026029109954834, + "loss_ce": 0.005141993053257465, + "loss_iou": 0.6171875, + "loss_num": 0.032470703125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 81709312, + "step": 1221 + }, + { + "epoch": 0.13866666666666666, + "grad_norm": 21.008548736572266, + "learning_rate": 5e-05, + "loss": 1.55, + "num_input_tokens_seen": 81776904, + "step": 1222 + }, + { + "epoch": 0.13866666666666666, + "loss": 1.4986374378204346, + "loss_ce": 0.010356185957789421, + "loss_iou": 0.60546875, + "loss_num": 0.055419921875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 81776904, + "step": 1222 + }, + { + "epoch": 0.13878014184397164, + "grad_norm": 16.903751373291016, + "learning_rate": 5e-05, + "loss": 1.3169, + "num_input_tokens_seen": 81843300, + "step": 1223 + }, + { + "epoch": 0.13878014184397164, + "loss": 1.2334771156311035, + "loss_ce": 0.009600206278264523, + "loss_iou": 0.5078125, + "loss_num": 0.041015625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 81843300, + "step": 1223 + }, + { + "epoch": 0.1388936170212766, + "grad_norm": 17.040407180786133, + "learning_rate": 5e-05, + "loss": 1.3464, + "num_input_tokens_seen": 81909260, + "step": 1224 + }, + { + "epoch": 0.1388936170212766, + "loss": 1.5150294303894043, + "loss_ce": 0.008193493820726871, + "loss_iou": 0.66796875, + "loss_num": 0.034912109375, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 81909260, + "step": 1224 + }, + { + "epoch": 0.13900709219858157, + "grad_norm": 18.777664184570312, + "learning_rate": 5e-05, + "loss": 1.4028, + "num_input_tokens_seen": 81975380, + "step": 1225 + }, + { + "epoch": 0.13900709219858157, + "loss": 1.3265271186828613, + "loss_ce": 0.0027965246699750423, + "loss_iou": 0.56640625, + "loss_num": 0.037841796875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 81975380, + "step": 1225 + }, + { + "epoch": 0.13912056737588652, + "grad_norm": 20.821178436279297, + "learning_rate": 5e-05, + "loss": 1.3293, + "num_input_tokens_seen": 82041308, + "step": 1226 + }, + { + "epoch": 0.13912056737588652, + "loss": 1.353590488433838, + "loss_ce": 0.0010514522437006235, + "loss_iou": 0.59375, + "loss_num": 0.033447265625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 82041308, + "step": 1226 + }, + { + "epoch": 0.1392340425531915, + "grad_norm": 16.72079849243164, + "learning_rate": 5e-05, + "loss": 1.415, + "num_input_tokens_seen": 82108108, + "step": 1227 + }, + { + "epoch": 0.1392340425531915, + "loss": 1.2221217155456543, + "loss_ce": 0.00959724746644497, + "loss_iou": 0.52734375, + "loss_num": 0.031982421875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 82108108, + "step": 1227 + }, + { + "epoch": 0.13934751773049645, + "grad_norm": 35.00901412963867, + "learning_rate": 5e-05, + "loss": 1.4974, + "num_input_tokens_seen": 82176532, + "step": 1228 + }, + { + "epoch": 0.13934751773049645, + "loss": 1.6323833465576172, + "loss_ce": 0.003477208549156785, + "loss_iou": 0.6640625, + "loss_num": 0.059326171875, + "loss_xval": 1.625, + "num_input_tokens_seen": 82176532, + "step": 1228 + }, + { + "epoch": 0.13946099290780142, + "grad_norm": 53.17935562133789, + "learning_rate": 5e-05, + "loss": 1.5581, + "num_input_tokens_seen": 82242800, + "step": 1229 + }, + { + "epoch": 0.13946099290780142, + "loss": 1.6357554197311401, + "loss_ce": 0.007825721055269241, + "loss_iou": 0.6953125, + "loss_num": 0.047119140625, + "loss_xval": 1.625, + "num_input_tokens_seen": 82242800, + "step": 1229 + }, + { + "epoch": 0.13957446808510637, + "grad_norm": 23.382482528686523, + "learning_rate": 5e-05, + "loss": 1.3192, + "num_input_tokens_seen": 82308860, + "step": 1230 + }, + { + "epoch": 0.13957446808510637, + "loss": 1.3961982727050781, + "loss_ce": 0.004108482506126165, + "loss_iou": 0.61328125, + "loss_num": 0.03271484375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 82308860, + "step": 1230 + }, + { + "epoch": 0.13968794326241135, + "grad_norm": 34.420387268066406, + "learning_rate": 5e-05, + "loss": 1.2426, + "num_input_tokens_seen": 82375732, + "step": 1231 + }, + { + "epoch": 0.13968794326241135, + "loss": 1.249476671218872, + "loss_ce": 0.003382823895663023, + "loss_iou": 0.55859375, + "loss_num": 0.026123046875, + "loss_xval": 1.25, + "num_input_tokens_seen": 82375732, + "step": 1231 + }, + { + "epoch": 0.1398014184397163, + "grad_norm": 19.88636016845703, + "learning_rate": 5e-05, + "loss": 1.5567, + "num_input_tokens_seen": 82443140, + "step": 1232 + }, + { + "epoch": 0.1398014184397163, + "loss": 1.6640522480010986, + "loss_ce": 0.002919495804235339, + "loss_iou": 0.71875, + "loss_num": 0.044921875, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 82443140, + "step": 1232 + }, + { + "epoch": 0.13991489361702128, + "grad_norm": 99.34906005859375, + "learning_rate": 5e-05, + "loss": 1.8166, + "num_input_tokens_seen": 82510152, + "step": 1233 + }, + { + "epoch": 0.13991489361702128, + "loss": 1.8904184103012085, + "loss_ce": 0.009559042751789093, + "loss_iou": 0.796875, + "loss_num": 0.05810546875, + "loss_xval": 1.8828125, + "num_input_tokens_seen": 82510152, + "step": 1233 + }, + { + "epoch": 0.14002836879432623, + "grad_norm": 35.57815170288086, + "learning_rate": 5e-05, + "loss": 1.3345, + "num_input_tokens_seen": 82577908, + "step": 1234 + }, + { + "epoch": 0.14002836879432623, + "loss": 1.321537971496582, + "loss_ce": 0.0026903185062110424, + "loss_iou": 0.55859375, + "loss_num": 0.041015625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 82577908, + "step": 1234 + }, + { + "epoch": 0.1401418439716312, + "grad_norm": 20.491300582885742, + "learning_rate": 5e-05, + "loss": 1.3836, + "num_input_tokens_seen": 82645528, + "step": 1235 + }, + { + "epoch": 0.1401418439716312, + "loss": 1.39552640914917, + "loss_ce": 0.003924878314137459, + "loss_iou": 0.60546875, + "loss_num": 0.036865234375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 82645528, + "step": 1235 + }, + { + "epoch": 0.14025531914893616, + "grad_norm": 34.151187896728516, + "learning_rate": 5e-05, + "loss": 1.1299, + "num_input_tokens_seen": 82711260, + "step": 1236 + }, + { + "epoch": 0.14025531914893616, + "loss": 1.102052927017212, + "loss_ce": 0.002565537579357624, + "loss_iou": 0.470703125, + "loss_num": 0.031982421875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 82711260, + "step": 1236 + }, + { + "epoch": 0.14036879432624114, + "grad_norm": 39.794368743896484, + "learning_rate": 5e-05, + "loss": 1.3865, + "num_input_tokens_seen": 82778264, + "step": 1237 + }, + { + "epoch": 0.14036879432624114, + "loss": 1.3191828727722168, + "loss_ce": 0.0037532318383455276, + "loss_iou": 0.57421875, + "loss_num": 0.033447265625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 82778264, + "step": 1237 + }, + { + "epoch": 0.1404822695035461, + "grad_norm": 22.60887336730957, + "learning_rate": 5e-05, + "loss": 1.1742, + "num_input_tokens_seen": 82843800, + "step": 1238 + }, + { + "epoch": 0.1404822695035461, + "loss": 1.1498268842697144, + "loss_ce": 0.0018166849622502923, + "loss_iou": 0.494140625, + "loss_num": 0.031982421875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 82843800, + "step": 1238 + }, + { + "epoch": 0.14059574468085106, + "grad_norm": 30.69041633605957, + "learning_rate": 5e-05, + "loss": 1.4335, + "num_input_tokens_seen": 82910708, + "step": 1239 + }, + { + "epoch": 0.14059574468085106, + "loss": 1.4689209461212158, + "loss_ce": 0.0060303229838609695, + "loss_iou": 0.640625, + "loss_num": 0.037109375, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 82910708, + "step": 1239 + }, + { + "epoch": 0.14070921985815601, + "grad_norm": 18.99515724182129, + "learning_rate": 5e-05, + "loss": 1.6952, + "num_input_tokens_seen": 82977808, + "step": 1240 + }, + { + "epoch": 0.14070921985815601, + "loss": 1.6955128908157349, + "loss_ce": 0.008012907579541206, + "loss_iou": 0.75390625, + "loss_num": 0.03564453125, + "loss_xval": 1.6875, + "num_input_tokens_seen": 82977808, + "step": 1240 + }, + { + "epoch": 0.140822695035461, + "grad_norm": 16.018024444580078, + "learning_rate": 5e-05, + "loss": 1.4754, + "num_input_tokens_seen": 83044512, + "step": 1241 + }, + { + "epoch": 0.140822695035461, + "loss": 1.5090053081512451, + "loss_ce": 0.00314600532874465, + "loss_iou": 0.640625, + "loss_num": 0.04541015625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 83044512, + "step": 1241 + }, + { + "epoch": 0.14093617021276597, + "grad_norm": 34.381431579589844, + "learning_rate": 5e-05, + "loss": 1.3697, + "num_input_tokens_seen": 83111568, + "step": 1242 + }, + { + "epoch": 0.14093617021276597, + "loss": 1.3025689125061035, + "loss_ce": 0.003252419177442789, + "loss_iou": 0.54296875, + "loss_num": 0.042236328125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 83111568, + "step": 1242 + }, + { + "epoch": 0.14104964539007092, + "grad_norm": 19.652851104736328, + "learning_rate": 5e-05, + "loss": 1.5053, + "num_input_tokens_seen": 83178832, + "step": 1243 + }, + { + "epoch": 0.14104964539007092, + "loss": 1.4770689010620117, + "loss_ce": 0.01246921718120575, + "loss_iou": 0.59375, + "loss_num": 0.055908203125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 83178832, + "step": 1243 + }, + { + "epoch": 0.1411631205673759, + "grad_norm": 29.28629493713379, + "learning_rate": 5e-05, + "loss": 1.7784, + "num_input_tokens_seen": 83245684, + "step": 1244 + }, + { + "epoch": 0.1411631205673759, + "loss": 1.8505866527557373, + "loss_ce": 0.0058601126074790955, + "loss_iou": 0.78515625, + "loss_num": 0.054443359375, + "loss_xval": 1.84375, + "num_input_tokens_seen": 83245684, + "step": 1244 + }, + { + "epoch": 0.14127659574468085, + "grad_norm": 32.99074172973633, + "learning_rate": 5e-05, + "loss": 1.3892, + "num_input_tokens_seen": 83311732, + "step": 1245 + }, + { + "epoch": 0.14127659574468085, + "loss": 1.5613733530044556, + "loss_ce": 0.007174075581133366, + "loss_iou": 0.6328125, + "loss_num": 0.057373046875, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 83311732, + "step": 1245 + }, + { + "epoch": 0.14139007092198583, + "grad_norm": 20.989307403564453, + "learning_rate": 5e-05, + "loss": 1.5556, + "num_input_tokens_seen": 83378504, + "step": 1246 + }, + { + "epoch": 0.14139007092198583, + "loss": 1.5263569355010986, + "loss_ce": 0.007802245207130909, + "loss_iou": 0.66015625, + "loss_num": 0.040283203125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 83378504, + "step": 1246 + }, + { + "epoch": 0.14150354609929078, + "grad_norm": 16.075763702392578, + "learning_rate": 5e-05, + "loss": 1.3399, + "num_input_tokens_seen": 83446696, + "step": 1247 + }, + { + "epoch": 0.14150354609929078, + "loss": 1.4105980396270752, + "loss_ce": 0.006301229353994131, + "loss_iou": 0.59375, + "loss_num": 0.04345703125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 83446696, + "step": 1247 + }, + { + "epoch": 0.14161702127659576, + "grad_norm": 31.072378158569336, + "learning_rate": 5e-05, + "loss": 1.3182, + "num_input_tokens_seen": 83513260, + "step": 1248 + }, + { + "epoch": 0.14161702127659576, + "loss": 1.314172625541687, + "loss_ce": 0.0041140420362353325, + "loss_iou": 0.6015625, + "loss_num": 0.0211181640625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 83513260, + "step": 1248 + }, + { + "epoch": 0.1417304964539007, + "grad_norm": 25.461591720581055, + "learning_rate": 5e-05, + "loss": 1.5348, + "num_input_tokens_seen": 83580020, + "step": 1249 + }, + { + "epoch": 0.1417304964539007, + "loss": 1.411663293838501, + "loss_ce": 0.0029718028381466866, + "loss_iou": 0.6328125, + "loss_num": 0.0289306640625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 83580020, + "step": 1249 + }, + { + "epoch": 0.14184397163120568, + "grad_norm": 21.89375877380371, + "learning_rate": 5e-05, + "loss": 1.1445, + "num_input_tokens_seen": 83647824, + "step": 1250 + }, + { + "epoch": 0.14184397163120568, + "eval_seeclick_CIoU": 0.35534368455410004, + "eval_seeclick_GIoU": 0.3230367451906204, + "eval_seeclick_IoU": 0.4360887259244919, + "eval_seeclick_MAE_all": 0.16445186734199524, + "eval_seeclick_MAE_h": 0.10260070860385895, + "eval_seeclick_MAE_w": 0.11237437278032303, + "eval_seeclick_MAE_x_boxes": 0.24701351672410965, + "eval_seeclick_MAE_y_boxes": 0.11459737084805965, + "eval_seeclick_NUM_probability": 0.9998001456260681, + "eval_seeclick_inside_bbox": 0.6458333432674408, + "eval_seeclick_loss": 2.5841243267059326, + "eval_seeclick_loss_ce": 0.015271092765033245, + "eval_seeclick_loss_iou": 0.8865966796875, + "eval_seeclick_loss_num": 0.1575164794921875, + "eval_seeclick_loss_xval": 2.56103515625, + "eval_seeclick_runtime": 62.844, + "eval_seeclick_samples_per_second": 0.748, + "eval_seeclick_steps_per_second": 0.032, + "num_input_tokens_seen": 83647824, + "step": 1250 + }, + { + "epoch": 0.14184397163120568, + "eval_icons_CIoU": 0.45511914789676666, + "eval_icons_GIoU": 0.43582502007484436, + "eval_icons_IoU": 0.49918705224990845, + "eval_icons_MAE_all": 0.1431579813361168, + "eval_icons_MAE_h": 0.1264801099896431, + "eval_icons_MAE_w": 0.10986915230751038, + "eval_icons_MAE_x_boxes": 0.1041191853582859, + "eval_icons_MAE_y_boxes": 0.11276685446500778, + "eval_icons_NUM_probability": 0.9997867941856384, + "eval_icons_inside_bbox": 0.7395833432674408, + "eval_icons_loss": 2.5688021183013916, + "eval_icons_loss_ce": 5.150009565113578e-05, + "eval_icons_loss_iou": 0.919921875, + "eval_icons_loss_num": 0.13809967041015625, + "eval_icons_loss_xval": 2.53125, + "eval_icons_runtime": 65.2537, + "eval_icons_samples_per_second": 0.766, + "eval_icons_steps_per_second": 0.031, + "num_input_tokens_seen": 83647824, + "step": 1250 + }, + { + "epoch": 0.14184397163120568, + "eval_screenspot_CIoU": 0.37544311086336773, + "eval_screenspot_GIoU": 0.35494911670684814, + "eval_screenspot_IoU": 0.46189379692077637, + "eval_screenspot_MAE_all": 0.14354566733042398, + "eval_screenspot_MAE_h": 0.06346188361446063, + "eval_screenspot_MAE_w": 0.1482609063386917, + "eval_screenspot_MAE_x_boxes": 0.22244171798229218, + "eval_screenspot_MAE_y_boxes": 0.11818373451630275, + "eval_screenspot_NUM_probability": 0.9996840755144755, + "eval_screenspot_inside_bbox": 0.6879166762034098, + "eval_screenspot_loss": 2.701646327972412, + "eval_screenspot_loss_ce": 0.009468633060654005, + "eval_screenspot_loss_iou": 0.9983723958333334, + "eval_screenspot_loss_num": 0.15602620442708334, + "eval_screenspot_loss_xval": 2.7770182291666665, + "eval_screenspot_runtime": 124.0022, + "eval_screenspot_samples_per_second": 0.718, + "eval_screenspot_steps_per_second": 0.024, + "num_input_tokens_seen": 83647824, + "step": 1250 + }, + { + "epoch": 0.14184397163120568, + "eval_compot_CIoU": 0.3888193219900131, + "eval_compot_GIoU": 0.36958639323711395, + "eval_compot_IoU": 0.47874514758586884, + "eval_compot_MAE_all": 0.14617571979761124, + "eval_compot_MAE_h": 0.04498503729701042, + "eval_compot_MAE_w": 0.14000533521175385, + "eval_compot_MAE_x_boxes": 0.20828162878751755, + "eval_compot_MAE_y_boxes": 0.12768446654081345, + "eval_compot_NUM_probability": 0.9996825158596039, + "eval_compot_inside_bbox": 0.6493055522441864, + "eval_compot_loss": 2.6246838569641113, + "eval_compot_loss_ce": 0.006993250455707312, + "eval_compot_loss_iou": 0.968505859375, + "eval_compot_loss_num": 0.13965606689453125, + "eval_compot_loss_xval": 2.63427734375, + "eval_compot_runtime": 69.8113, + "eval_compot_samples_per_second": 0.716, + "eval_compot_steps_per_second": 0.029, + "num_input_tokens_seen": 83647824, + "step": 1250 + }, + { + "epoch": 0.14184397163120568, + "loss": 2.673769474029541, + "loss_ce": 0.004824299365282059, + "loss_iou": 0.96875, + "loss_num": 0.146484375, + "loss_xval": 2.671875, + "num_input_tokens_seen": 83647824, + "step": 1250 + }, + { + "epoch": 0.14195744680851063, + "grad_norm": 23.104665756225586, + "learning_rate": 5e-05, + "loss": 1.5728, + "num_input_tokens_seen": 83715488, + "step": 1251 + }, + { + "epoch": 0.14195744680851063, + "loss": 1.4913071393966675, + "loss_ce": 0.0049790143966674805, + "loss_iou": 0.6328125, + "loss_num": 0.044677734375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 83715488, + "step": 1251 + }, + { + "epoch": 0.1420709219858156, + "grad_norm": 89.99991607666016, + "learning_rate": 5e-05, + "loss": 1.2201, + "num_input_tokens_seen": 83782176, + "step": 1252 + }, + { + "epoch": 0.1420709219858156, + "loss": 1.4072693586349487, + "loss_ce": 0.005902207922190428, + "loss_iou": 0.64453125, + "loss_num": 0.02197265625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 83782176, + "step": 1252 + }, + { + "epoch": 0.14218439716312056, + "grad_norm": 22.880544662475586, + "learning_rate": 5e-05, + "loss": 1.6521, + "num_input_tokens_seen": 83850512, + "step": 1253 + }, + { + "epoch": 0.14218439716312056, + "loss": 1.617642879486084, + "loss_ce": 0.004361619241535664, + "loss_iou": 0.68359375, + "loss_num": 0.049560546875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 83850512, + "step": 1253 + }, + { + "epoch": 0.14229787234042554, + "grad_norm": 39.49728012084961, + "learning_rate": 5e-05, + "loss": 1.3073, + "num_input_tokens_seen": 83917632, + "step": 1254 + }, + { + "epoch": 0.14229787234042554, + "loss": 1.4510488510131836, + "loss_ce": 0.006224633660167456, + "loss_iou": 0.5859375, + "loss_num": 0.05419921875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 83917632, + "step": 1254 + }, + { + "epoch": 0.1424113475177305, + "grad_norm": 19.207666397094727, + "learning_rate": 5e-05, + "loss": 1.7466, + "num_input_tokens_seen": 83983884, + "step": 1255 + }, + { + "epoch": 0.1424113475177305, + "loss": 1.8959105014801025, + "loss_ce": 0.006262082606554031, + "loss_iou": 0.7890625, + "loss_num": 0.0615234375, + "loss_xval": 1.890625, + "num_input_tokens_seen": 83983884, + "step": 1255 + }, + { + "epoch": 0.14252482269503547, + "grad_norm": 21.705059051513672, + "learning_rate": 5e-05, + "loss": 1.1817, + "num_input_tokens_seen": 84050900, + "step": 1256 + }, + { + "epoch": 0.14252482269503547, + "loss": 1.1838637590408325, + "loss_ce": 0.003138656262308359, + "loss_iou": 0.53125, + "loss_num": 0.0242919921875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 84050900, + "step": 1256 + }, + { + "epoch": 0.14263829787234042, + "grad_norm": 33.35017776489258, + "learning_rate": 5e-05, + "loss": 1.57, + "num_input_tokens_seen": 84117900, + "step": 1257 + }, + { + "epoch": 0.14263829787234042, + "loss": 1.7908754348754883, + "loss_ce": 0.005719222128391266, + "loss_iou": 0.7578125, + "loss_num": 0.05322265625, + "loss_xval": 1.78125, + "num_input_tokens_seen": 84117900, + "step": 1257 + }, + { + "epoch": 0.1427517730496454, + "grad_norm": 16.660888671875, + "learning_rate": 5e-05, + "loss": 1.5323, + "num_input_tokens_seen": 84185932, + "step": 1258 + }, + { + "epoch": 0.1427517730496454, + "loss": 1.4248194694519043, + "loss_ce": 0.002944433130323887, + "loss_iou": 0.640625, + "loss_num": 0.027587890625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 84185932, + "step": 1258 + }, + { + "epoch": 0.14286524822695035, + "grad_norm": 19.679264068603516, + "learning_rate": 5e-05, + "loss": 1.487, + "num_input_tokens_seen": 84252008, + "step": 1259 + }, + { + "epoch": 0.14286524822695035, + "loss": 1.5165657997131348, + "loss_ce": 0.003870561718940735, + "loss_iou": 0.66015625, + "loss_num": 0.037841796875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 84252008, + "step": 1259 + }, + { + "epoch": 0.14297872340425533, + "grad_norm": 26.991609573364258, + "learning_rate": 5e-05, + "loss": 1.3037, + "num_input_tokens_seen": 84319012, + "step": 1260 + }, + { + "epoch": 0.14297872340425533, + "loss": 1.275674819946289, + "loss_ce": 0.003213902236893773, + "loss_iou": 0.5859375, + "loss_num": 0.020263671875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 84319012, + "step": 1260 + }, + { + "epoch": 0.14309219858156028, + "grad_norm": 20.15938377380371, + "learning_rate": 5e-05, + "loss": 1.5302, + "num_input_tokens_seen": 84387116, + "step": 1261 + }, + { + "epoch": 0.14309219858156028, + "loss": 1.4571900367736816, + "loss_ce": 0.005041563883423805, + "loss_iou": 0.6328125, + "loss_num": 0.037353515625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 84387116, + "step": 1261 + }, + { + "epoch": 0.14320567375886525, + "grad_norm": 17.457637786865234, + "learning_rate": 5e-05, + "loss": 1.3634, + "num_input_tokens_seen": 84453336, + "step": 1262 + }, + { + "epoch": 0.14320567375886525, + "loss": 1.3392393589019775, + "loss_ce": 0.004400388337671757, + "loss_iou": 0.546875, + "loss_num": 0.0478515625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 84453336, + "step": 1262 + }, + { + "epoch": 0.1433191489361702, + "grad_norm": 19.0693416595459, + "learning_rate": 5e-05, + "loss": 1.7089, + "num_input_tokens_seen": 84520440, + "step": 1263 + }, + { + "epoch": 0.1433191489361702, + "loss": 1.5996140241622925, + "loss_ce": 0.004887450952082872, + "loss_iou": 0.671875, + "loss_num": 0.050048828125, + "loss_xval": 1.59375, + "num_input_tokens_seen": 84520440, + "step": 1263 + }, + { + "epoch": 0.14343262411347518, + "grad_norm": 22.67519187927246, + "learning_rate": 5e-05, + "loss": 1.3051, + "num_input_tokens_seen": 84588164, + "step": 1264 + }, + { + "epoch": 0.14343262411347518, + "loss": 1.1696977615356445, + "loss_ce": 0.0066118501126766205, + "loss_iou": 0.51953125, + "loss_num": 0.02490234375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 84588164, + "step": 1264 + }, + { + "epoch": 0.14354609929078013, + "grad_norm": 23.912017822265625, + "learning_rate": 5e-05, + "loss": 1.5299, + "num_input_tokens_seen": 84655172, + "step": 1265 + }, + { + "epoch": 0.14354609929078013, + "loss": 1.4501495361328125, + "loss_ce": 0.008255008608102798, + "loss_iou": 0.59765625, + "loss_num": 0.048828125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 84655172, + "step": 1265 + }, + { + "epoch": 0.1436595744680851, + "grad_norm": 35.148441314697266, + "learning_rate": 5e-05, + "loss": 1.291, + "num_input_tokens_seen": 84721388, + "step": 1266 + }, + { + "epoch": 0.1436595744680851, + "loss": 1.2760717868804932, + "loss_ce": 0.005563922226428986, + "loss_iou": 0.5703125, + "loss_num": 0.0264892578125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 84721388, + "step": 1266 + }, + { + "epoch": 0.14377304964539006, + "grad_norm": 22.54648208618164, + "learning_rate": 5e-05, + "loss": 1.5625, + "num_input_tokens_seen": 84788324, + "step": 1267 + }, + { + "epoch": 0.14377304964539006, + "loss": 1.4042866230010986, + "loss_ce": 0.005299702286720276, + "loss_iou": 0.6171875, + "loss_num": 0.032470703125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 84788324, + "step": 1267 + }, + { + "epoch": 0.14388652482269504, + "grad_norm": 14.795693397521973, + "learning_rate": 5e-05, + "loss": 1.3543, + "num_input_tokens_seen": 84856220, + "step": 1268 + }, + { + "epoch": 0.14388652482269504, + "loss": 1.5055372714996338, + "loss_ce": 0.008466976694762707, + "loss_iou": 0.6328125, + "loss_num": 0.046630859375, + "loss_xval": 1.5, + "num_input_tokens_seen": 84856220, + "step": 1268 + }, + { + "epoch": 0.144, + "grad_norm": 25.36332130432129, + "learning_rate": 5e-05, + "loss": 1.253, + "num_input_tokens_seen": 84923616, + "step": 1269 + }, + { + "epoch": 0.144, + "loss": 1.1057672500610352, + "loss_ce": 0.006157920230180025, + "loss_iou": 0.5078125, + "loss_num": 0.0167236328125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 84923616, + "step": 1269 + }, + { + "epoch": 0.14411347517730497, + "grad_norm": 19.687204360961914, + "learning_rate": 5e-05, + "loss": 1.5529, + "num_input_tokens_seen": 84990936, + "step": 1270 + }, + { + "epoch": 0.14411347517730497, + "loss": 1.570521354675293, + "loss_ce": 0.003138601779937744, + "loss_iou": 0.7109375, + "loss_num": 0.0291748046875, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 84990936, + "step": 1270 + }, + { + "epoch": 0.14422695035460992, + "grad_norm": 21.73642349243164, + "learning_rate": 5e-05, + "loss": 1.5103, + "num_input_tokens_seen": 85058952, + "step": 1271 + }, + { + "epoch": 0.14422695035460992, + "loss": 1.4701350927352905, + "loss_ce": 0.006267893593758345, + "loss_iou": 0.625, + "loss_num": 0.04296875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 85058952, + "step": 1271 + }, + { + "epoch": 0.1443404255319149, + "grad_norm": 26.395143508911133, + "learning_rate": 5e-05, + "loss": 1.493, + "num_input_tokens_seen": 85126704, + "step": 1272 + }, + { + "epoch": 0.1443404255319149, + "loss": 1.3636934757232666, + "loss_ce": 0.007736537139862776, + "loss_iou": 0.60546875, + "loss_num": 0.0284423828125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 85126704, + "step": 1272 + }, + { + "epoch": 0.14445390070921985, + "grad_norm": 25.712158203125, + "learning_rate": 5e-05, + "loss": 1.5127, + "num_input_tokens_seen": 85193132, + "step": 1273 + }, + { + "epoch": 0.14445390070921985, + "loss": 1.5804246664047241, + "loss_ce": 0.005229274742305279, + "loss_iou": 0.6953125, + "loss_num": 0.036376953125, + "loss_xval": 1.578125, + "num_input_tokens_seen": 85193132, + "step": 1273 + }, + { + "epoch": 0.14456737588652482, + "grad_norm": 48.47491455078125, + "learning_rate": 5e-05, + "loss": 1.4295, + "num_input_tokens_seen": 85259256, + "step": 1274 + }, + { + "epoch": 0.14456737588652482, + "loss": 1.496551752090454, + "loss_ce": 0.0028993836604058743, + "loss_iou": 0.6484375, + "loss_num": 0.0390625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 85259256, + "step": 1274 + }, + { + "epoch": 0.14468085106382977, + "grad_norm": 21.542951583862305, + "learning_rate": 5e-05, + "loss": 1.3163, + "num_input_tokens_seen": 85325920, + "step": 1275 + }, + { + "epoch": 0.14468085106382977, + "loss": 1.2259358167648315, + "loss_ce": 0.008162369020283222, + "loss_iou": 0.55859375, + "loss_num": 0.0205078125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 85325920, + "step": 1275 + }, + { + "epoch": 0.14479432624113475, + "grad_norm": 19.89081382751465, + "learning_rate": 5e-05, + "loss": 1.3954, + "num_input_tokens_seen": 85391956, + "step": 1276 + }, + { + "epoch": 0.14479432624113475, + "loss": 1.3486804962158203, + "loss_ce": 0.0061512659303843975, + "loss_iou": 0.55859375, + "loss_num": 0.044921875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 85391956, + "step": 1276 + }, + { + "epoch": 0.14490780141843973, + "grad_norm": 33.10628890991211, + "learning_rate": 5e-05, + "loss": 1.4922, + "num_input_tokens_seen": 85459108, + "step": 1277 + }, + { + "epoch": 0.14490780141843973, + "loss": 1.4686174392700195, + "loss_ce": 0.0027972571551799774, + "loss_iou": 0.63671875, + "loss_num": 0.038818359375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 85459108, + "step": 1277 + }, + { + "epoch": 0.14502127659574468, + "grad_norm": 20.98089027404785, + "learning_rate": 5e-05, + "loss": 1.6278, + "num_input_tokens_seen": 85525672, + "step": 1278 + }, + { + "epoch": 0.14502127659574468, + "loss": 1.6187314987182617, + "loss_ce": 0.009356459602713585, + "loss_iou": 0.7109375, + "loss_num": 0.036376953125, + "loss_xval": 1.609375, + "num_input_tokens_seen": 85525672, + "step": 1278 + }, + { + "epoch": 0.14513475177304966, + "grad_norm": 23.314123153686523, + "learning_rate": 5e-05, + "loss": 1.2374, + "num_input_tokens_seen": 85592220, + "step": 1279 + }, + { + "epoch": 0.14513475177304966, + "loss": 1.1303927898406982, + "loss_ce": 0.0014865901321172714, + "loss_iou": 0.5078125, + "loss_num": 0.02294921875, + "loss_xval": 1.125, + "num_input_tokens_seen": 85592220, + "step": 1279 + }, + { + "epoch": 0.1452482269503546, + "grad_norm": 23.345767974853516, + "learning_rate": 5e-05, + "loss": 1.5345, + "num_input_tokens_seen": 85659896, + "step": 1280 + }, + { + "epoch": 0.1452482269503546, + "loss": 1.6871885061264038, + "loss_ce": 0.005547807086259127, + "loss_iou": 0.7578125, + "loss_num": 0.0341796875, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 85659896, + "step": 1280 + }, + { + "epoch": 0.1453617021276596, + "grad_norm": 27.71649169921875, + "learning_rate": 5e-05, + "loss": 1.4165, + "num_input_tokens_seen": 85727604, + "step": 1281 + }, + { + "epoch": 0.1453617021276596, + "loss": 1.445253849029541, + "loss_ce": 0.006777266506105661, + "loss_iou": 0.62109375, + "loss_num": 0.039306640625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 85727604, + "step": 1281 + }, + { + "epoch": 0.14547517730496454, + "grad_norm": 19.060155868530273, + "learning_rate": 5e-05, + "loss": 1.5371, + "num_input_tokens_seen": 85794804, + "step": 1282 + }, + { + "epoch": 0.14547517730496454, + "loss": 1.5616226196289062, + "loss_ce": 0.0049820952117443085, + "loss_iou": 0.7109375, + "loss_num": 0.026611328125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 85794804, + "step": 1282 + }, + { + "epoch": 0.14558865248226951, + "grad_norm": 36.65122985839844, + "learning_rate": 5e-05, + "loss": 1.4422, + "num_input_tokens_seen": 85861924, + "step": 1283 + }, + { + "epoch": 0.14558865248226951, + "loss": 1.392329454421997, + "loss_ce": 0.0036576255224645138, + "loss_iou": 0.62109375, + "loss_num": 0.0299072265625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 85861924, + "step": 1283 + }, + { + "epoch": 0.14570212765957447, + "grad_norm": 50.20028305053711, + "learning_rate": 5e-05, + "loss": 1.4092, + "num_input_tokens_seen": 85929664, + "step": 1284 + }, + { + "epoch": 0.14570212765957447, + "loss": 1.3304567337036133, + "loss_ce": 0.003308376995846629, + "loss_iou": 0.58203125, + "loss_num": 0.033447265625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 85929664, + "step": 1284 + }, + { + "epoch": 0.14581560283687944, + "grad_norm": 36.9982795715332, + "learning_rate": 5e-05, + "loss": 1.5325, + "num_input_tokens_seen": 85995888, + "step": 1285 + }, + { + "epoch": 0.14581560283687944, + "loss": 1.4955884218215942, + "loss_ce": 0.002424384467303753, + "loss_iou": 0.68359375, + "loss_num": 0.0255126953125, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 85995888, + "step": 1285 + }, + { + "epoch": 0.1459290780141844, + "grad_norm": 27.300844192504883, + "learning_rate": 5e-05, + "loss": 1.5988, + "num_input_tokens_seen": 86062172, + "step": 1286 + }, + { + "epoch": 0.1459290780141844, + "loss": 1.5350995063781738, + "loss_ce": 0.005802600644528866, + "loss_iou": 0.6796875, + "loss_num": 0.034423828125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 86062172, + "step": 1286 + }, + { + "epoch": 0.14604255319148937, + "grad_norm": 34.43598937988281, + "learning_rate": 5e-05, + "loss": 1.4802, + "num_input_tokens_seen": 86128520, + "step": 1287 + }, + { + "epoch": 0.14604255319148937, + "loss": 1.5697935819625854, + "loss_ce": 0.00729357311502099, + "loss_iou": 0.625, + "loss_num": 0.06298828125, + "loss_xval": 1.5625, + "num_input_tokens_seen": 86128520, + "step": 1287 + }, + { + "epoch": 0.14615602836879432, + "grad_norm": 45.84560012817383, + "learning_rate": 5e-05, + "loss": 1.4506, + "num_input_tokens_seen": 86195332, + "step": 1288 + }, + { + "epoch": 0.14615602836879432, + "loss": 1.58847177028656, + "loss_ce": 0.006440514698624611, + "loss_iou": 0.65234375, + "loss_num": 0.05615234375, + "loss_xval": 1.578125, + "num_input_tokens_seen": 86195332, + "step": 1288 + }, + { + "epoch": 0.1462695035460993, + "grad_norm": 25.893476486206055, + "learning_rate": 5e-05, + "loss": 1.646, + "num_input_tokens_seen": 86263444, + "step": 1289 + }, + { + "epoch": 0.1462695035460993, + "loss": 1.6560659408569336, + "loss_ce": 0.005675372667610645, + "loss_iou": 0.73046875, + "loss_num": 0.038330078125, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 86263444, + "step": 1289 + }, + { + "epoch": 0.14638297872340425, + "grad_norm": 23.81976890563965, + "learning_rate": 5e-05, + "loss": 1.4672, + "num_input_tokens_seen": 86330416, + "step": 1290 + }, + { + "epoch": 0.14638297872340425, + "loss": 1.5897024869918823, + "loss_ce": 0.005229849833995104, + "loss_iou": 0.62890625, + "loss_num": 0.06640625, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 86330416, + "step": 1290 + }, + { + "epoch": 0.14649645390070923, + "grad_norm": 45.55655288696289, + "learning_rate": 5e-05, + "loss": 1.5638, + "num_input_tokens_seen": 86396884, + "step": 1291 + }, + { + "epoch": 0.14649645390070923, + "loss": 1.5570671558380127, + "loss_ce": 0.0057976702228188515, + "loss_iou": 0.68359375, + "loss_num": 0.037353515625, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 86396884, + "step": 1291 + }, + { + "epoch": 0.14660992907801418, + "grad_norm": 20.70387840270996, + "learning_rate": 5e-05, + "loss": 1.7103, + "num_input_tokens_seen": 86464248, + "step": 1292 + }, + { + "epoch": 0.14660992907801418, + "loss": 1.7631964683532715, + "loss_ce": 0.004407284315675497, + "loss_iou": 0.76171875, + "loss_num": 0.046875, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 86464248, + "step": 1292 + }, + { + "epoch": 0.14672340425531916, + "grad_norm": 18.667028427124023, + "learning_rate": 5e-05, + "loss": 1.3508, + "num_input_tokens_seen": 86530440, + "step": 1293 + }, + { + "epoch": 0.14672340425531916, + "loss": 1.1692137718200684, + "loss_ce": 0.004907158203423023, + "loss_iou": 0.515625, + "loss_num": 0.027099609375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 86530440, + "step": 1293 + }, + { + "epoch": 0.1468368794326241, + "grad_norm": 12.54991340637207, + "learning_rate": 5e-05, + "loss": 1.4255, + "num_input_tokens_seen": 86597472, + "step": 1294 + }, + { + "epoch": 0.1468368794326241, + "loss": 1.510319709777832, + "loss_ce": 0.008854799903929234, + "loss_iou": 0.609375, + "loss_num": 0.056396484375, + "loss_xval": 1.5, + "num_input_tokens_seen": 86597472, + "step": 1294 + }, + { + "epoch": 0.14695035460992908, + "grad_norm": 29.967374801635742, + "learning_rate": 5e-05, + "loss": 1.3464, + "num_input_tokens_seen": 86663892, + "step": 1295 + }, + { + "epoch": 0.14695035460992908, + "loss": 1.3096106052398682, + "loss_ce": 0.004923174157738686, + "loss_iou": 0.55078125, + "loss_num": 0.040771484375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 86663892, + "step": 1295 + }, + { + "epoch": 0.14706382978723403, + "grad_norm": 22.299840927124023, + "learning_rate": 5e-05, + "loss": 1.5423, + "num_input_tokens_seen": 86730124, + "step": 1296 + }, + { + "epoch": 0.14706382978723403, + "loss": 1.4547605514526367, + "loss_ce": 0.006518366746604443, + "loss_iou": 0.62890625, + "loss_num": 0.037841796875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 86730124, + "step": 1296 + }, + { + "epoch": 0.147177304964539, + "grad_norm": 24.39252471923828, + "learning_rate": 5e-05, + "loss": 1.2675, + "num_input_tokens_seen": 86797596, + "step": 1297 + }, + { + "epoch": 0.147177304964539, + "loss": 1.1520748138427734, + "loss_ce": 0.00522430706769228, + "loss_iou": 0.51953125, + "loss_num": 0.021484375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 86797596, + "step": 1297 + }, + { + "epoch": 0.14729078014184396, + "grad_norm": 20.3963623046875, + "learning_rate": 5e-05, + "loss": 1.407, + "num_input_tokens_seen": 86862984, + "step": 1298 + }, + { + "epoch": 0.14729078014184396, + "loss": 1.3799612522125244, + "loss_ce": 0.006425987463444471, + "loss_iou": 0.59765625, + "loss_num": 0.03564453125, + "loss_xval": 1.375, + "num_input_tokens_seen": 86862984, + "step": 1298 + }, + { + "epoch": 0.14740425531914894, + "grad_norm": 29.38516616821289, + "learning_rate": 5e-05, + "loss": 1.3016, + "num_input_tokens_seen": 86930988, + "step": 1299 + }, + { + "epoch": 0.14740425531914894, + "loss": 1.1973681449890137, + "loss_ce": 0.0044970205053687096, + "loss_iou": 0.53125, + "loss_num": 0.025634765625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 86930988, + "step": 1299 + }, + { + "epoch": 0.1475177304964539, + "grad_norm": 26.14555549621582, + "learning_rate": 5e-05, + "loss": 1.4342, + "num_input_tokens_seen": 86996768, + "step": 1300 + }, + { + "epoch": 0.1475177304964539, + "loss": 1.4338159561157227, + "loss_ce": 0.006447749212384224, + "loss_iou": 0.58984375, + "loss_num": 0.050048828125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 86996768, + "step": 1300 + }, + { + "epoch": 0.14763120567375887, + "grad_norm": 26.963953018188477, + "learning_rate": 5e-05, + "loss": 1.4429, + "num_input_tokens_seen": 87064600, + "step": 1301 + }, + { + "epoch": 0.14763120567375887, + "loss": 1.518078327178955, + "loss_ce": 0.007336154580116272, + "loss_iou": 0.65625, + "loss_num": 0.040283203125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 87064600, + "step": 1301 + }, + { + "epoch": 0.14774468085106382, + "grad_norm": 21.259740829467773, + "learning_rate": 5e-05, + "loss": 1.4435, + "num_input_tokens_seen": 87131072, + "step": 1302 + }, + { + "epoch": 0.14774468085106382, + "loss": 1.0970572233200073, + "loss_ce": 0.004527924582362175, + "loss_iou": 0.46484375, + "loss_num": 0.03271484375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 87131072, + "step": 1302 + }, + { + "epoch": 0.1478581560283688, + "grad_norm": 31.432504653930664, + "learning_rate": 5e-05, + "loss": 1.3646, + "num_input_tokens_seen": 87196384, + "step": 1303 + }, + { + "epoch": 0.1478581560283688, + "loss": 1.484288215637207, + "loss_ce": 0.0038193881046026945, + "loss_iou": 0.64453125, + "loss_num": 0.038818359375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 87196384, + "step": 1303 + }, + { + "epoch": 0.14797163120567375, + "grad_norm": 18.16376495361328, + "learning_rate": 5e-05, + "loss": 1.9419, + "num_input_tokens_seen": 87262884, + "step": 1304 + }, + { + "epoch": 0.14797163120567375, + "loss": 2.002736806869507, + "loss_ce": 0.004690042231231928, + "loss_iou": 0.8203125, + "loss_num": 0.072265625, + "loss_xval": 2.0, + "num_input_tokens_seen": 87262884, + "step": 1304 + }, + { + "epoch": 0.14808510638297873, + "grad_norm": 11.11935043334961, + "learning_rate": 5e-05, + "loss": 1.1973, + "num_input_tokens_seen": 87330388, + "step": 1305 + }, + { + "epoch": 0.14808510638297873, + "loss": 1.1713640689849854, + "loss_ce": 0.00827821809798479, + "loss_iou": 0.5234375, + "loss_num": 0.02294921875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 87330388, + "step": 1305 + }, + { + "epoch": 0.14819858156028368, + "grad_norm": 15.846238136291504, + "learning_rate": 5e-05, + "loss": 1.3543, + "num_input_tokens_seen": 87396848, + "step": 1306 + }, + { + "epoch": 0.14819858156028368, + "loss": 1.3870909214019775, + "loss_ce": 0.008184690028429031, + "loss_iou": 0.6171875, + "loss_num": 0.02978515625, + "loss_xval": 1.375, + "num_input_tokens_seen": 87396848, + "step": 1306 + }, + { + "epoch": 0.14831205673758865, + "grad_norm": 30.55608367919922, + "learning_rate": 5e-05, + "loss": 1.36, + "num_input_tokens_seen": 87463056, + "step": 1307 + }, + { + "epoch": 0.14831205673758865, + "loss": 1.34807550907135, + "loss_ce": 0.005180013366043568, + "loss_iou": 0.546875, + "loss_num": 0.05029296875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 87463056, + "step": 1307 + }, + { + "epoch": 0.1484255319148936, + "grad_norm": 131.8466796875, + "learning_rate": 5e-05, + "loss": 1.5923, + "num_input_tokens_seen": 87530008, + "step": 1308 + }, + { + "epoch": 0.1484255319148936, + "loss": 1.6604068279266357, + "loss_ce": 0.005133303813636303, + "loss_iou": 0.734375, + "loss_num": 0.037841796875, + "loss_xval": 1.65625, + "num_input_tokens_seen": 87530008, + "step": 1308 + }, + { + "epoch": 0.14853900709219858, + "grad_norm": 26.52593231201172, + "learning_rate": 5e-05, + "loss": 1.2208, + "num_input_tokens_seen": 87597104, + "step": 1309 + }, + { + "epoch": 0.14853900709219858, + "loss": 1.1116611957550049, + "loss_ce": 0.0037510301917791367, + "loss_iou": 0.51953125, + "loss_num": 0.0145263671875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 87597104, + "step": 1309 + }, + { + "epoch": 0.14865248226950353, + "grad_norm": 32.560489654541016, + "learning_rate": 5e-05, + "loss": 1.6305, + "num_input_tokens_seen": 87664344, + "step": 1310 + }, + { + "epoch": 0.14865248226950353, + "loss": 1.6433274745941162, + "loss_ce": 0.0027024473529309034, + "loss_iou": 0.73046875, + "loss_num": 0.03564453125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 87664344, + "step": 1310 + }, + { + "epoch": 0.1487659574468085, + "grad_norm": 18.625362396240234, + "learning_rate": 5e-05, + "loss": 1.5323, + "num_input_tokens_seen": 87730404, + "step": 1311 + }, + { + "epoch": 0.1487659574468085, + "loss": 1.6039080619812012, + "loss_ce": 0.0013690838823094964, + "loss_iou": 0.73046875, + "loss_num": 0.028564453125, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 87730404, + "step": 1311 + }, + { + "epoch": 0.14887943262411346, + "grad_norm": 17.95171546936035, + "learning_rate": 5e-05, + "loss": 1.2088, + "num_input_tokens_seen": 87797688, + "step": 1312 + }, + { + "epoch": 0.14887943262411346, + "loss": 1.2553317546844482, + "loss_ce": 0.006308343261480331, + "loss_iou": 0.53125, + "loss_num": 0.037353515625, + "loss_xval": 1.25, + "num_input_tokens_seen": 87797688, + "step": 1312 + }, + { + "epoch": 0.14899290780141844, + "grad_norm": 36.34787368774414, + "learning_rate": 5e-05, + "loss": 1.2982, + "num_input_tokens_seen": 87864468, + "step": 1313 + }, + { + "epoch": 0.14899290780141844, + "loss": 1.273520827293396, + "loss_ce": 0.007895877584815025, + "loss_iou": 0.55859375, + "loss_num": 0.0291748046875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 87864468, + "step": 1313 + }, + { + "epoch": 0.14910638297872342, + "grad_norm": 19.083904266357422, + "learning_rate": 5e-05, + "loss": 1.6311, + "num_input_tokens_seen": 87931332, + "step": 1314 + }, + { + "epoch": 0.14910638297872342, + "loss": 1.6248027086257935, + "loss_ce": 0.007126990705728531, + "loss_iou": 0.72265625, + "loss_num": 0.034423828125, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 87931332, + "step": 1314 + }, + { + "epoch": 0.14921985815602837, + "grad_norm": 19.092147827148438, + "learning_rate": 5e-05, + "loss": 1.2973, + "num_input_tokens_seen": 87998932, + "step": 1315 + }, + { + "epoch": 0.14921985815602837, + "loss": 1.3417994976043701, + "loss_ce": 0.0029322972986847162, + "loss_iou": 0.54296875, + "loss_num": 0.05029296875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 87998932, + "step": 1315 + }, + { + "epoch": 0.14933333333333335, + "grad_norm": 28.77894401550293, + "learning_rate": 5e-05, + "loss": 1.4129, + "num_input_tokens_seen": 88066592, + "step": 1316 + }, + { + "epoch": 0.14933333333333335, + "loss": 1.4974095821380615, + "loss_ce": 0.0032690782099962234, + "loss_iou": 0.65234375, + "loss_num": 0.037353515625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 88066592, + "step": 1316 + }, + { + "epoch": 0.1494468085106383, + "grad_norm": 22.24178695678711, + "learning_rate": 5e-05, + "loss": 1.438, + "num_input_tokens_seen": 88133772, + "step": 1317 + }, + { + "epoch": 0.1494468085106383, + "loss": 1.4850013256072998, + "loss_ce": 0.003067680401727557, + "loss_iou": 0.6484375, + "loss_num": 0.03662109375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 88133772, + "step": 1317 + }, + { + "epoch": 0.14956028368794327, + "grad_norm": 30.215608596801758, + "learning_rate": 5e-05, + "loss": 1.2694, + "num_input_tokens_seen": 88200272, + "step": 1318 + }, + { + "epoch": 0.14956028368794327, + "loss": 1.2670977115631104, + "loss_ce": 0.0029375902377068996, + "loss_iou": 0.57421875, + "loss_num": 0.0235595703125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 88200272, + "step": 1318 + }, + { + "epoch": 0.14967375886524822, + "grad_norm": 23.84105110168457, + "learning_rate": 5e-05, + "loss": 1.3874, + "num_input_tokens_seen": 88266688, + "step": 1319 + }, + { + "epoch": 0.14967375886524822, + "loss": 1.34384024143219, + "loss_ce": 0.00301999575458467, + "loss_iou": 0.59765625, + "loss_num": 0.0294189453125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 88266688, + "step": 1319 + }, + { + "epoch": 0.1497872340425532, + "grad_norm": 30.594383239746094, + "learning_rate": 5e-05, + "loss": 1.503, + "num_input_tokens_seen": 88333712, + "step": 1320 + }, + { + "epoch": 0.1497872340425532, + "loss": 1.3129856586456299, + "loss_ce": 0.006100820377469063, + "loss_iou": 0.56640625, + "loss_num": 0.03466796875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 88333712, + "step": 1320 + }, + { + "epoch": 0.14990070921985815, + "grad_norm": 23.04979133605957, + "learning_rate": 5e-05, + "loss": 1.4322, + "num_input_tokens_seen": 88400552, + "step": 1321 + }, + { + "epoch": 0.14990070921985815, + "loss": 1.515491008758545, + "loss_ce": 0.0042605772614479065, + "loss_iou": 0.6484375, + "loss_num": 0.04345703125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 88400552, + "step": 1321 + }, + { + "epoch": 0.15001418439716313, + "grad_norm": 25.429798126220703, + "learning_rate": 5e-05, + "loss": 1.3557, + "num_input_tokens_seen": 88467188, + "step": 1322 + }, + { + "epoch": 0.15001418439716313, + "loss": 1.3536975383758545, + "loss_ce": 0.003111570607870817, + "loss_iou": 0.6015625, + "loss_num": 0.030029296875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 88467188, + "step": 1322 + }, + { + "epoch": 0.15012765957446808, + "grad_norm": 22.38202476501465, + "learning_rate": 5e-05, + "loss": 1.5347, + "num_input_tokens_seen": 88532300, + "step": 1323 + }, + { + "epoch": 0.15012765957446808, + "loss": 1.5672866106033325, + "loss_ce": 0.002833486534655094, + "loss_iou": 0.66015625, + "loss_num": 0.049072265625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 88532300, + "step": 1323 + }, + { + "epoch": 0.15024113475177306, + "grad_norm": 43.44551086425781, + "learning_rate": 5e-05, + "loss": 1.5141, + "num_input_tokens_seen": 88598792, + "step": 1324 + }, + { + "epoch": 0.15024113475177306, + "loss": 1.6895990371704102, + "loss_ce": 0.00649357819929719, + "loss_iou": 0.71484375, + "loss_num": 0.05126953125, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 88598792, + "step": 1324 + }, + { + "epoch": 0.150354609929078, + "grad_norm": 15.687923431396484, + "learning_rate": 5e-05, + "loss": 1.3574, + "num_input_tokens_seen": 88665100, + "step": 1325 + }, + { + "epoch": 0.150354609929078, + "loss": 1.268775224685669, + "loss_ce": 0.0014412363525480032, + "loss_iou": 0.53125, + "loss_num": 0.040771484375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 88665100, + "step": 1325 + }, + { + "epoch": 0.150468085106383, + "grad_norm": 16.29924964904785, + "learning_rate": 5e-05, + "loss": 1.2754, + "num_input_tokens_seen": 88731464, + "step": 1326 + }, + { + "epoch": 0.150468085106383, + "loss": 1.254408836364746, + "loss_ce": 0.007338502909988165, + "loss_iou": 0.5546875, + "loss_num": 0.0283203125, + "loss_xval": 1.25, + "num_input_tokens_seen": 88731464, + "step": 1326 + }, + { + "epoch": 0.15058156028368794, + "grad_norm": 53.738182067871094, + "learning_rate": 5e-05, + "loss": 1.3078, + "num_input_tokens_seen": 88798464, + "step": 1327 + }, + { + "epoch": 0.15058156028368794, + "loss": 1.371898889541626, + "loss_ce": 0.006176131311804056, + "loss_iou": 0.5625, + "loss_num": 0.047607421875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 88798464, + "step": 1327 + }, + { + "epoch": 0.15069503546099292, + "grad_norm": 47.25907897949219, + "learning_rate": 5e-05, + "loss": 1.6371, + "num_input_tokens_seen": 88865816, + "step": 1328 + }, + { + "epoch": 0.15069503546099292, + "loss": 1.5530340671539307, + "loss_ce": 0.003229305613785982, + "loss_iou": 0.671875, + "loss_num": 0.041748046875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 88865816, + "step": 1328 + }, + { + "epoch": 0.15080851063829787, + "grad_norm": 15.325007438659668, + "learning_rate": 5e-05, + "loss": 1.1567, + "num_input_tokens_seen": 88932244, + "step": 1329 + }, + { + "epoch": 0.15080851063829787, + "loss": 1.2155239582061768, + "loss_ce": 0.005074711516499519, + "loss_iou": 0.49609375, + "loss_num": 0.0439453125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 88932244, + "step": 1329 + }, + { + "epoch": 0.15092198581560284, + "grad_norm": 23.777345657348633, + "learning_rate": 5e-05, + "loss": 1.3193, + "num_input_tokens_seen": 89000140, + "step": 1330 + }, + { + "epoch": 0.15092198581560284, + "loss": 1.5056676864624023, + "loss_ce": 0.004691022913902998, + "loss_iou": 0.64453125, + "loss_num": 0.04150390625, + "loss_xval": 1.5, + "num_input_tokens_seen": 89000140, + "step": 1330 + }, + { + "epoch": 0.1510354609929078, + "grad_norm": 40.137977600097656, + "learning_rate": 5e-05, + "loss": 1.5105, + "num_input_tokens_seen": 89067440, + "step": 1331 + }, + { + "epoch": 0.1510354609929078, + "loss": 1.4940109252929688, + "loss_ce": 0.0009079549345187843, + "loss_iou": 0.6796875, + "loss_num": 0.02734375, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 89067440, + "step": 1331 + }, + { + "epoch": 0.15114893617021277, + "grad_norm": 19.203996658325195, + "learning_rate": 5e-05, + "loss": 1.6002, + "num_input_tokens_seen": 89134808, + "step": 1332 + }, + { + "epoch": 0.15114893617021277, + "loss": 1.665140151977539, + "loss_ce": 0.003519084770232439, + "loss_iou": 0.69921875, + "loss_num": 0.052734375, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 89134808, + "step": 1332 + }, + { + "epoch": 0.15126241134751772, + "grad_norm": 16.332576751708984, + "learning_rate": 5e-05, + "loss": 1.3398, + "num_input_tokens_seen": 89201804, + "step": 1333 + }, + { + "epoch": 0.15126241134751772, + "loss": 1.283353567123413, + "loss_ce": 0.007962837815284729, + "loss_iou": 0.546875, + "loss_num": 0.0361328125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 89201804, + "step": 1333 + }, + { + "epoch": 0.1513758865248227, + "grad_norm": 31.501789093017578, + "learning_rate": 5e-05, + "loss": 1.1255, + "num_input_tokens_seen": 89269872, + "step": 1334 + }, + { + "epoch": 0.1513758865248227, + "loss": 1.2078235149383545, + "loss_ce": 0.006163432262837887, + "loss_iou": 0.5234375, + "loss_num": 0.031494140625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 89269872, + "step": 1334 + }, + { + "epoch": 0.15148936170212765, + "grad_norm": 33.277523040771484, + "learning_rate": 5e-05, + "loss": 1.5181, + "num_input_tokens_seen": 89336780, + "step": 1335 + }, + { + "epoch": 0.15148936170212765, + "loss": 1.5452415943145752, + "loss_ce": 0.00178461370524019, + "loss_iou": 0.6875, + "loss_num": 0.033203125, + "loss_xval": 1.546875, + "num_input_tokens_seen": 89336780, + "step": 1335 + }, + { + "epoch": 0.15160283687943263, + "grad_norm": 40.19322204589844, + "learning_rate": 5e-05, + "loss": 1.398, + "num_input_tokens_seen": 89403552, + "step": 1336 + }, + { + "epoch": 0.15160283687943263, + "loss": 1.219425082206726, + "loss_ce": 0.005557941272854805, + "loss_iou": 0.515625, + "loss_num": 0.03662109375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 89403552, + "step": 1336 + }, + { + "epoch": 0.15171631205673758, + "grad_norm": 54.804229736328125, + "learning_rate": 5e-05, + "loss": 1.5188, + "num_input_tokens_seen": 89470536, + "step": 1337 + }, + { + "epoch": 0.15171631205673758, + "loss": 1.4517970085144043, + "loss_ce": 0.007217020262032747, + "loss_iou": 0.64453125, + "loss_num": 0.03173828125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 89470536, + "step": 1337 + }, + { + "epoch": 0.15182978723404256, + "grad_norm": 72.18270874023438, + "learning_rate": 5e-05, + "loss": 1.6611, + "num_input_tokens_seen": 89537156, + "step": 1338 + }, + { + "epoch": 0.15182978723404256, + "loss": 1.6472914218902588, + "loss_ce": 0.0047133611515164375, + "loss_iou": 0.73046875, + "loss_num": 0.036865234375, + "loss_xval": 1.640625, + "num_input_tokens_seen": 89537156, + "step": 1338 + }, + { + "epoch": 0.1519432624113475, + "grad_norm": 21.567813873291016, + "learning_rate": 5e-05, + "loss": 1.3182, + "num_input_tokens_seen": 89603728, + "step": 1339 + }, + { + "epoch": 0.1519432624113475, + "loss": 1.3729610443115234, + "loss_ce": 0.007726723328232765, + "loss_iou": 0.57421875, + "loss_num": 0.043701171875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 89603728, + "step": 1339 + }, + { + "epoch": 0.15205673758865249, + "grad_norm": 28.647380828857422, + "learning_rate": 5e-05, + "loss": 1.3365, + "num_input_tokens_seen": 89670972, + "step": 1340 + }, + { + "epoch": 0.15205673758865249, + "loss": 1.568037986755371, + "loss_ce": 0.0030965320765972137, + "loss_iou": 0.6640625, + "loss_num": 0.0478515625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 89670972, + "step": 1340 + }, + { + "epoch": 0.15217021276595744, + "grad_norm": 18.30323028564453, + "learning_rate": 5e-05, + "loss": 1.4093, + "num_input_tokens_seen": 89737440, + "step": 1341 + }, + { + "epoch": 0.15217021276595744, + "loss": 1.6600297689437866, + "loss_ce": 0.002803241601213813, + "loss_iou": 0.76171875, + "loss_num": 0.02734375, + "loss_xval": 1.65625, + "num_input_tokens_seen": 89737440, + "step": 1341 + }, + { + "epoch": 0.1522836879432624, + "grad_norm": 19.870264053344727, + "learning_rate": 5e-05, + "loss": 1.1482, + "num_input_tokens_seen": 89803888, + "step": 1342 + }, + { + "epoch": 0.1522836879432624, + "loss": 1.2534723281860352, + "loss_ce": 0.005425491835922003, + "loss_iou": 0.50390625, + "loss_num": 0.04833984375, + "loss_xval": 1.25, + "num_input_tokens_seen": 89803888, + "step": 1342 + }, + { + "epoch": 0.15239716312056736, + "grad_norm": 44.718772888183594, + "learning_rate": 5e-05, + "loss": 1.3572, + "num_input_tokens_seen": 89870108, + "step": 1343 + }, + { + "epoch": 0.15239716312056736, + "loss": 1.7103073596954346, + "loss_ce": 0.0032760906033217907, + "loss_iou": 0.796875, + "loss_num": 0.022705078125, + "loss_xval": 1.703125, + "num_input_tokens_seen": 89870108, + "step": 1343 + }, + { + "epoch": 0.15251063829787234, + "grad_norm": 21.38633155822754, + "learning_rate": 5e-05, + "loss": 1.5642, + "num_input_tokens_seen": 89937072, + "step": 1344 + }, + { + "epoch": 0.15251063829787234, + "loss": 1.6256990432739258, + "loss_ce": 0.0036286981776356697, + "loss_iou": 0.69921875, + "loss_num": 0.04443359375, + "loss_xval": 1.625, + "num_input_tokens_seen": 89937072, + "step": 1344 + }, + { + "epoch": 0.1526241134751773, + "grad_norm": 14.071433067321777, + "learning_rate": 5e-05, + "loss": 1.1269, + "num_input_tokens_seen": 90003472, + "step": 1345 + }, + { + "epoch": 0.1526241134751773, + "loss": 1.2913217544555664, + "loss_ce": 0.0061654821038246155, + "loss_iou": 0.55859375, + "loss_num": 0.032958984375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 90003472, + "step": 1345 + }, + { + "epoch": 0.15273758865248227, + "grad_norm": 18.457983016967773, + "learning_rate": 5e-05, + "loss": 1.4593, + "num_input_tokens_seen": 90067720, + "step": 1346 + }, + { + "epoch": 0.15273758865248227, + "loss": 1.3933038711547852, + "loss_ce": 0.004631969146430492, + "loss_iou": 0.5234375, + "loss_num": 0.06884765625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 90067720, + "step": 1346 + }, + { + "epoch": 0.15285106382978722, + "grad_norm": 43.51057434082031, + "learning_rate": 5e-05, + "loss": 1.2823, + "num_input_tokens_seen": 90134580, + "step": 1347 + }, + { + "epoch": 0.15285106382978722, + "loss": 1.299647569656372, + "loss_ce": 0.0017959459219127893, + "loss_iou": 0.5859375, + "loss_num": 0.0255126953125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 90134580, + "step": 1347 + }, + { + "epoch": 0.1529645390070922, + "grad_norm": 39.90752410888672, + "learning_rate": 5e-05, + "loss": 1.5134, + "num_input_tokens_seen": 90201144, + "step": 1348 + }, + { + "epoch": 0.1529645390070922, + "loss": 1.4388731718063354, + "loss_ce": 0.008270066231489182, + "loss_iou": 0.62109375, + "loss_num": 0.037109375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 90201144, + "step": 1348 + }, + { + "epoch": 0.15307801418439718, + "grad_norm": 45.20566177368164, + "learning_rate": 5e-05, + "loss": 1.3303, + "num_input_tokens_seen": 90268756, + "step": 1349 + }, + { + "epoch": 0.15307801418439718, + "loss": 1.4306793212890625, + "loss_ce": 0.003921461757272482, + "loss_iou": 0.625, + "loss_num": 0.03564453125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 90268756, + "step": 1349 + }, + { + "epoch": 0.15319148936170213, + "grad_norm": 36.97539138793945, + "learning_rate": 5e-05, + "loss": 1.6136, + "num_input_tokens_seen": 90336100, + "step": 1350 + }, + { + "epoch": 0.15319148936170213, + "loss": 1.5533440113067627, + "loss_ce": 0.008422227576375008, + "loss_iou": 0.67578125, + "loss_num": 0.038330078125, + "loss_xval": 1.546875, + "num_input_tokens_seen": 90336100, + "step": 1350 + }, + { + "epoch": 0.1533049645390071, + "grad_norm": 25.69546890258789, + "learning_rate": 5e-05, + "loss": 1.8265, + "num_input_tokens_seen": 90402600, + "step": 1351 + }, + { + "epoch": 0.1533049645390071, + "loss": 1.7640340328216553, + "loss_ce": 0.005244891624897718, + "loss_iou": 0.7578125, + "loss_num": 0.047607421875, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 90402600, + "step": 1351 + }, + { + "epoch": 0.15341843971631206, + "grad_norm": 30.114797592163086, + "learning_rate": 5e-05, + "loss": 1.3739, + "num_input_tokens_seen": 90469484, + "step": 1352 + }, + { + "epoch": 0.15341843971631206, + "loss": 1.4780101776123047, + "loss_ce": 0.005353855900466442, + "loss_iou": 0.66796875, + "loss_num": 0.0281982421875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 90469484, + "step": 1352 + }, + { + "epoch": 0.15353191489361703, + "grad_norm": 19.005199432373047, + "learning_rate": 5e-05, + "loss": 1.2766, + "num_input_tokens_seen": 90537308, + "step": 1353 + }, + { + "epoch": 0.15353191489361703, + "loss": 1.326777458190918, + "loss_ce": 0.006953287869691849, + "loss_iou": 0.58984375, + "loss_num": 0.028076171875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 90537308, + "step": 1353 + }, + { + "epoch": 0.15364539007092198, + "grad_norm": 15.683085441589355, + "learning_rate": 5e-05, + "loss": 1.1925, + "num_input_tokens_seen": 90603720, + "step": 1354 + }, + { + "epoch": 0.15364539007092198, + "loss": 1.1946871280670166, + "loss_ce": 0.004257380496710539, + "loss_iou": 0.515625, + "loss_num": 0.031005859375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 90603720, + "step": 1354 + }, + { + "epoch": 0.15375886524822696, + "grad_norm": 14.901130676269531, + "learning_rate": 5e-05, + "loss": 1.4121, + "num_input_tokens_seen": 90670180, + "step": 1355 + }, + { + "epoch": 0.15375886524822696, + "loss": 1.3641841411590576, + "loss_ce": 0.00773893017321825, + "loss_iou": 0.58203125, + "loss_num": 0.038818359375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 90670180, + "step": 1355 + }, + { + "epoch": 0.1538723404255319, + "grad_norm": 21.161256790161133, + "learning_rate": 5e-05, + "loss": 1.3343, + "num_input_tokens_seen": 90737136, + "step": 1356 + }, + { + "epoch": 0.1538723404255319, + "loss": 1.3309085369110107, + "loss_ce": 0.0055300770327448845, + "loss_iou": 0.58984375, + "loss_num": 0.0294189453125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 90737136, + "step": 1356 + }, + { + "epoch": 0.1539858156028369, + "grad_norm": 46.244178771972656, + "learning_rate": 5e-05, + "loss": 1.2808, + "num_input_tokens_seen": 90804324, + "step": 1357 + }, + { + "epoch": 0.1539858156028369, + "loss": 1.3070281744003296, + "loss_ce": 0.0033172271214425564, + "loss_iou": 0.57421875, + "loss_num": 0.03173828125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 90804324, + "step": 1357 + }, + { + "epoch": 0.15409929078014184, + "grad_norm": 18.116413116455078, + "learning_rate": 5e-05, + "loss": 1.5826, + "num_input_tokens_seen": 90870236, + "step": 1358 + }, + { + "epoch": 0.15409929078014184, + "loss": 1.629413366317749, + "loss_ce": 0.004413431975990534, + "loss_iou": 0.671875, + "loss_num": 0.056640625, + "loss_xval": 1.625, + "num_input_tokens_seen": 90870236, + "step": 1358 + }, + { + "epoch": 0.15421276595744682, + "grad_norm": 20.526281356811523, + "learning_rate": 5e-05, + "loss": 1.4205, + "num_input_tokens_seen": 90936092, + "step": 1359 + }, + { + "epoch": 0.15421276595744682, + "loss": 1.58791983127594, + "loss_ce": 0.008818247355520725, + "loss_iou": 0.703125, + "loss_num": 0.034912109375, + "loss_xval": 1.578125, + "num_input_tokens_seen": 90936092, + "step": 1359 + }, + { + "epoch": 0.15432624113475177, + "grad_norm": 11.766497611999512, + "learning_rate": 5e-05, + "loss": 1.2709, + "num_input_tokens_seen": 91002028, + "step": 1360 + }, + { + "epoch": 0.15432624113475177, + "loss": 1.3968586921691895, + "loss_ce": 0.006721937097609043, + "loss_iou": 0.55078125, + "loss_num": 0.058349609375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 91002028, + "step": 1360 + }, + { + "epoch": 0.15443971631205675, + "grad_norm": 17.351390838623047, + "learning_rate": 5e-05, + "loss": 1.2212, + "num_input_tokens_seen": 91069516, + "step": 1361 + }, + { + "epoch": 0.15443971631205675, + "loss": 1.3107273578643799, + "loss_ce": 0.005063238553702831, + "loss_iou": 0.55859375, + "loss_num": 0.038330078125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 91069516, + "step": 1361 + }, + { + "epoch": 0.1545531914893617, + "grad_norm": 23.04197883605957, + "learning_rate": 5e-05, + "loss": 1.3187, + "num_input_tokens_seen": 91135364, + "step": 1362 + }, + { + "epoch": 0.1545531914893617, + "loss": 1.2058062553405762, + "loss_ce": 0.006099250167608261, + "loss_iou": 0.546875, + "loss_num": 0.0205078125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 91135364, + "step": 1362 + }, + { + "epoch": 0.15466666666666667, + "grad_norm": 16.824413299560547, + "learning_rate": 5e-05, + "loss": 1.3703, + "num_input_tokens_seen": 91201824, + "step": 1363 + }, + { + "epoch": 0.15466666666666667, + "loss": 1.422565221786499, + "loss_ce": 0.004596465267241001, + "loss_iou": 0.58203125, + "loss_num": 0.05126953125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 91201824, + "step": 1363 + }, + { + "epoch": 0.15478014184397162, + "grad_norm": 23.650577545166016, + "learning_rate": 5e-05, + "loss": 1.4882, + "num_input_tokens_seen": 91269228, + "step": 1364 + }, + { + "epoch": 0.15478014184397162, + "loss": 1.4713191986083984, + "loss_ce": 0.0025691664777696133, + "loss_iou": 0.61328125, + "loss_num": 0.048095703125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 91269228, + "step": 1364 + }, + { + "epoch": 0.1548936170212766, + "grad_norm": 37.21215057373047, + "learning_rate": 5e-05, + "loss": 1.3919, + "num_input_tokens_seen": 91334968, + "step": 1365 + }, + { + "epoch": 0.1548936170212766, + "loss": 1.2927732467651367, + "loss_ce": 0.0037106466479599476, + "loss_iou": 0.5703125, + "loss_num": 0.029541015625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 91334968, + "step": 1365 + }, + { + "epoch": 0.15500709219858155, + "grad_norm": 22.1804256439209, + "learning_rate": 5e-05, + "loss": 1.3812, + "num_input_tokens_seen": 91401764, + "step": 1366 + }, + { + "epoch": 0.15500709219858155, + "loss": 1.2366242408752441, + "loss_ce": 0.007132025435566902, + "loss_iou": 0.5234375, + "loss_num": 0.037109375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 91401764, + "step": 1366 + }, + { + "epoch": 0.15512056737588653, + "grad_norm": 15.567319869995117, + "learning_rate": 5e-05, + "loss": 1.2785, + "num_input_tokens_seen": 91468260, + "step": 1367 + }, + { + "epoch": 0.15512056737588653, + "loss": 1.5738526582717896, + "loss_ce": 0.004028474446386099, + "loss_iou": 0.65625, + "loss_num": 0.05224609375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 91468260, + "step": 1367 + }, + { + "epoch": 0.15523404255319148, + "grad_norm": 23.154062271118164, + "learning_rate": 5e-05, + "loss": 1.1691, + "num_input_tokens_seen": 91534628, + "step": 1368 + }, + { + "epoch": 0.15523404255319148, + "loss": 1.2201457023620605, + "loss_ce": 0.005790105555206537, + "loss_iou": 0.53125, + "loss_num": 0.030517578125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 91534628, + "step": 1368 + }, + { + "epoch": 0.15534751773049646, + "grad_norm": 24.188779830932617, + "learning_rate": 5e-05, + "loss": 1.5339, + "num_input_tokens_seen": 91601936, + "step": 1369 + }, + { + "epoch": 0.15534751773049646, + "loss": 1.5021262168884277, + "loss_ce": 0.00310288998298347, + "loss_iou": 0.6640625, + "loss_num": 0.03466796875, + "loss_xval": 1.5, + "num_input_tokens_seen": 91601936, + "step": 1369 + }, + { + "epoch": 0.1554609929078014, + "grad_norm": 31.827089309692383, + "learning_rate": 5e-05, + "loss": 1.2546, + "num_input_tokens_seen": 91667968, + "step": 1370 + }, + { + "epoch": 0.1554609929078014, + "loss": 1.1247382164001465, + "loss_ce": 0.0041327280923724174, + "loss_iou": 0.5, + "loss_num": 0.024169921875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 91667968, + "step": 1370 + }, + { + "epoch": 0.1555744680851064, + "grad_norm": 18.617355346679688, + "learning_rate": 5e-05, + "loss": 1.6777, + "num_input_tokens_seen": 91735488, + "step": 1371 + }, + { + "epoch": 0.1555744680851064, + "loss": 1.698040246963501, + "loss_ce": 0.004680949728935957, + "loss_iou": 0.7265625, + "loss_num": 0.048095703125, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 91735488, + "step": 1371 + }, + { + "epoch": 0.15568794326241134, + "grad_norm": 32.96821594238281, + "learning_rate": 5e-05, + "loss": 1.0845, + "num_input_tokens_seen": 91802636, + "step": 1372 + }, + { + "epoch": 0.15568794326241134, + "loss": 1.0120246410369873, + "loss_ce": 0.004700392484664917, + "loss_iou": 0.470703125, + "loss_num": 0.0130615234375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 91802636, + "step": 1372 + }, + { + "epoch": 0.15580141843971632, + "grad_norm": 24.858501434326172, + "learning_rate": 5e-05, + "loss": 1.3131, + "num_input_tokens_seen": 91870468, + "step": 1373 + }, + { + "epoch": 0.15580141843971632, + "loss": 1.4251823425292969, + "loss_ce": 0.006237129680812359, + "loss_iou": 0.59765625, + "loss_num": 0.044189453125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 91870468, + "step": 1373 + }, + { + "epoch": 0.15591489361702127, + "grad_norm": 17.894351959228516, + "learning_rate": 5e-05, + "loss": 1.2899, + "num_input_tokens_seen": 91937588, + "step": 1374 + }, + { + "epoch": 0.15591489361702127, + "loss": 1.1471444368362427, + "loss_ce": 0.00261323107406497, + "loss_iou": 0.53515625, + "loss_num": 0.0155029296875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 91937588, + "step": 1374 + }, + { + "epoch": 0.15602836879432624, + "grad_norm": 18.379383087158203, + "learning_rate": 5e-05, + "loss": 1.1633, + "num_input_tokens_seen": 92004180, + "step": 1375 + }, + { + "epoch": 0.15602836879432624, + "loss": 1.1049127578735352, + "loss_ce": 0.0033502872101962566, + "loss_iou": 0.5078125, + "loss_num": 0.0169677734375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 92004180, + "step": 1375 + }, + { + "epoch": 0.1561418439716312, + "grad_norm": 31.969707489013672, + "learning_rate": 5e-05, + "loss": 1.4084, + "num_input_tokens_seen": 92070864, + "step": 1376 + }, + { + "epoch": 0.1561418439716312, + "loss": 1.420957326889038, + "loss_ce": 0.003965111915022135, + "loss_iou": 0.62109375, + "loss_num": 0.03515625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 92070864, + "step": 1376 + }, + { + "epoch": 0.15625531914893617, + "grad_norm": 39.44655990600586, + "learning_rate": 5e-05, + "loss": 1.6807, + "num_input_tokens_seen": 92138004, + "step": 1377 + }, + { + "epoch": 0.15625531914893617, + "loss": 1.5945696830749512, + "loss_ce": 0.005702439695596695, + "loss_iou": 0.6953125, + "loss_num": 0.040283203125, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 92138004, + "step": 1377 + }, + { + "epoch": 0.15636879432624112, + "grad_norm": 18.147369384765625, + "learning_rate": 5e-05, + "loss": 1.1846, + "num_input_tokens_seen": 92205128, + "step": 1378 + }, + { + "epoch": 0.15636879432624112, + "loss": 1.109562635421753, + "loss_ce": 0.007511892355978489, + "loss_iou": 0.490234375, + "loss_num": 0.0240478515625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 92205128, + "step": 1378 + }, + { + "epoch": 0.1564822695035461, + "grad_norm": 32.61055374145508, + "learning_rate": 5e-05, + "loss": 1.2809, + "num_input_tokens_seen": 92272376, + "step": 1379 + }, + { + "epoch": 0.1564822695035461, + "loss": 1.293632984161377, + "loss_ce": 0.003105674870312214, + "loss_iou": 0.5546875, + "loss_num": 0.03515625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 92272376, + "step": 1379 + }, + { + "epoch": 0.15659574468085105, + "grad_norm": 12.650940895080566, + "learning_rate": 5e-05, + "loss": 1.378, + "num_input_tokens_seen": 92339244, + "step": 1380 + }, + { + "epoch": 0.15659574468085105, + "loss": 1.3828403949737549, + "loss_ce": 0.003934201784431934, + "loss_iou": 0.59375, + "loss_num": 0.0390625, + "loss_xval": 1.375, + "num_input_tokens_seen": 92339244, + "step": 1380 + }, + { + "epoch": 0.15670921985815603, + "grad_norm": 12.53935718536377, + "learning_rate": 5e-05, + "loss": 1.1725, + "num_input_tokens_seen": 92406236, + "step": 1381 + }, + { + "epoch": 0.15670921985815603, + "loss": 1.0685477256774902, + "loss_ce": 0.0040944963693618774, + "loss_iou": 0.462890625, + "loss_num": 0.0277099609375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 92406236, + "step": 1381 + }, + { + "epoch": 0.15682269503546098, + "grad_norm": 18.331872940063477, + "learning_rate": 5e-05, + "loss": 1.2387, + "num_input_tokens_seen": 92473648, + "step": 1382 + }, + { + "epoch": 0.15682269503546098, + "loss": 1.2127772569656372, + "loss_ce": 0.010628847405314445, + "loss_iou": 0.5390625, + "loss_num": 0.02392578125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 92473648, + "step": 1382 + }, + { + "epoch": 0.15693617021276596, + "grad_norm": 38.12130355834961, + "learning_rate": 5e-05, + "loss": 1.2633, + "num_input_tokens_seen": 92539972, + "step": 1383 + }, + { + "epoch": 0.15693617021276596, + "loss": 1.231189250946045, + "loss_ce": 0.003650241531431675, + "loss_iou": 0.52734375, + "loss_num": 0.0341796875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 92539972, + "step": 1383 + }, + { + "epoch": 0.15704964539007094, + "grad_norm": 17.765104293823242, + "learning_rate": 5e-05, + "loss": 1.6642, + "num_input_tokens_seen": 92607812, + "step": 1384 + }, + { + "epoch": 0.15704964539007094, + "loss": 1.7928417921066284, + "loss_ce": 0.0076855807565152645, + "loss_iou": 0.78515625, + "loss_num": 0.043701171875, + "loss_xval": 1.78125, + "num_input_tokens_seen": 92607812, + "step": 1384 + }, + { + "epoch": 0.15716312056737589, + "grad_norm": 22.487810134887695, + "learning_rate": 5e-05, + "loss": 1.4091, + "num_input_tokens_seen": 92675012, + "step": 1385 + }, + { + "epoch": 0.15716312056737589, + "loss": 1.5882363319396973, + "loss_ce": 0.0062050651758909225, + "loss_iou": 0.671875, + "loss_num": 0.0478515625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 92675012, + "step": 1385 + }, + { + "epoch": 0.15727659574468086, + "grad_norm": 28.956083297729492, + "learning_rate": 5e-05, + "loss": 1.4748, + "num_input_tokens_seen": 92741736, + "step": 1386 + }, + { + "epoch": 0.15727659574468086, + "loss": 1.3492623567581177, + "loss_ce": 0.004535795655101538, + "loss_iou": 0.609375, + "loss_num": 0.025146484375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 92741736, + "step": 1386 + }, + { + "epoch": 0.15739007092198581, + "grad_norm": 50.92910385131836, + "learning_rate": 5e-05, + "loss": 1.5846, + "num_input_tokens_seen": 92808560, + "step": 1387 + }, + { + "epoch": 0.15739007092198581, + "loss": 1.606842279434204, + "loss_ce": 0.0018618226749822497, + "loss_iou": 0.65625, + "loss_num": 0.05810546875, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 92808560, + "step": 1387 + }, + { + "epoch": 0.1575035460992908, + "grad_norm": 27.498180389404297, + "learning_rate": 5e-05, + "loss": 1.5027, + "num_input_tokens_seen": 92876064, + "step": 1388 + }, + { + "epoch": 0.1575035460992908, + "loss": 1.5918192863464355, + "loss_ce": 0.010764554142951965, + "loss_iou": 0.65234375, + "loss_num": 0.05517578125, + "loss_xval": 1.578125, + "num_input_tokens_seen": 92876064, + "step": 1388 + }, + { + "epoch": 0.15761702127659574, + "grad_norm": 23.942522048950195, + "learning_rate": 5e-05, + "loss": 1.3949, + "num_input_tokens_seen": 92943048, + "step": 1389 + }, + { + "epoch": 0.15761702127659574, + "loss": 1.3999953269958496, + "loss_ce": 0.009858600795269012, + "loss_iou": 0.59375, + "loss_num": 0.040771484375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 92943048, + "step": 1389 + }, + { + "epoch": 0.15773049645390072, + "grad_norm": 39.49870300292969, + "learning_rate": 5e-05, + "loss": 1.4558, + "num_input_tokens_seen": 93009736, + "step": 1390 + }, + { + "epoch": 0.15773049645390072, + "loss": 1.4393961429595947, + "loss_ce": 0.00677884928882122, + "loss_iou": 0.55859375, + "loss_num": 0.06298828125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 93009736, + "step": 1390 + }, + { + "epoch": 0.15784397163120567, + "grad_norm": 19.24323272705078, + "learning_rate": 5e-05, + "loss": 1.5471, + "num_input_tokens_seen": 93075864, + "step": 1391 + }, + { + "epoch": 0.15784397163120567, + "loss": 1.6856744289398193, + "loss_ce": 0.005987050477415323, + "loss_iou": 0.7109375, + "loss_num": 0.052490234375, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 93075864, + "step": 1391 + }, + { + "epoch": 0.15795744680851065, + "grad_norm": 36.51688766479492, + "learning_rate": 5e-05, + "loss": 1.4576, + "num_input_tokens_seen": 93141740, + "step": 1392 + }, + { + "epoch": 0.15795744680851065, + "loss": 1.7025197744369507, + "loss_ce": 0.004521877039223909, + "loss_iou": 0.67578125, + "loss_num": 0.06982421875, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 93141740, + "step": 1392 + }, + { + "epoch": 0.1580709219858156, + "grad_norm": 29.804399490356445, + "learning_rate": 5e-05, + "loss": 1.5875, + "num_input_tokens_seen": 93209688, + "step": 1393 + }, + { + "epoch": 0.1580709219858156, + "loss": 1.6070996522903442, + "loss_ce": 0.005537157412618399, + "loss_iou": 0.703125, + "loss_num": 0.038818359375, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 93209688, + "step": 1393 + }, + { + "epoch": 0.15818439716312058, + "grad_norm": 20.788650512695312, + "learning_rate": 5e-05, + "loss": 1.3211, + "num_input_tokens_seen": 93277384, + "step": 1394 + }, + { + "epoch": 0.15818439716312058, + "loss": 1.2497215270996094, + "loss_ce": 0.011684361845254898, + "loss_iou": 0.55859375, + "loss_num": 0.024658203125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 93277384, + "step": 1394 + }, + { + "epoch": 0.15829787234042553, + "grad_norm": 28.783672332763672, + "learning_rate": 5e-05, + "loss": 1.2878, + "num_input_tokens_seen": 93344584, + "step": 1395 + }, + { + "epoch": 0.15829787234042553, + "loss": 1.2628042697906494, + "loss_ce": 0.004015154205262661, + "loss_iou": 0.546875, + "loss_num": 0.032958984375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 93344584, + "step": 1395 + }, + { + "epoch": 0.1584113475177305, + "grad_norm": 23.85704803466797, + "learning_rate": 5e-05, + "loss": 1.5126, + "num_input_tokens_seen": 93412240, + "step": 1396 + }, + { + "epoch": 0.1584113475177305, + "loss": 1.419162631034851, + "loss_ce": 0.0036353091709315777, + "loss_iou": 0.6328125, + "loss_num": 0.0296630859375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 93412240, + "step": 1396 + }, + { + "epoch": 0.15852482269503546, + "grad_norm": 15.094533920288086, + "learning_rate": 5e-05, + "loss": 1.3959, + "num_input_tokens_seen": 93479788, + "step": 1397 + }, + { + "epoch": 0.15852482269503546, + "loss": 1.3642773628234863, + "loss_ce": 0.0034374836832284927, + "loss_iou": 0.5234375, + "loss_num": 0.0625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 93479788, + "step": 1397 + }, + { + "epoch": 0.15863829787234043, + "grad_norm": 19.201356887817383, + "learning_rate": 5e-05, + "loss": 1.3868, + "num_input_tokens_seen": 93546976, + "step": 1398 + }, + { + "epoch": 0.15863829787234043, + "loss": 1.347615122795105, + "loss_ce": 0.004841729998588562, + "loss_iou": 0.56640625, + "loss_num": 0.041748046875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 93546976, + "step": 1398 + }, + { + "epoch": 0.15875177304964538, + "grad_norm": 21.355201721191406, + "learning_rate": 5e-05, + "loss": 1.3233, + "num_input_tokens_seen": 93613312, + "step": 1399 + }, + { + "epoch": 0.15875177304964538, + "loss": 1.2923245429992676, + "loss_ce": 0.004726899787783623, + "loss_iou": 0.5390625, + "loss_num": 0.0419921875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 93613312, + "step": 1399 + }, + { + "epoch": 0.15886524822695036, + "grad_norm": 14.224013328552246, + "learning_rate": 5e-05, + "loss": 1.1242, + "num_input_tokens_seen": 93679648, + "step": 1400 + }, + { + "epoch": 0.15886524822695036, + "loss": 1.227968454360962, + "loss_ce": 0.002748686820268631, + "loss_iou": 0.53125, + "loss_num": 0.032470703125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 93679648, + "step": 1400 + }, + { + "epoch": 0.1589787234042553, + "grad_norm": 18.453062057495117, + "learning_rate": 5e-05, + "loss": 1.1425, + "num_input_tokens_seen": 93746156, + "step": 1401 + }, + { + "epoch": 0.1589787234042553, + "loss": 1.0093094110488892, + "loss_ce": 0.0034500123001635075, + "loss_iou": 0.451171875, + "loss_num": 0.0205078125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 93746156, + "step": 1401 + }, + { + "epoch": 0.1590921985815603, + "grad_norm": 66.59143829345703, + "learning_rate": 5e-05, + "loss": 1.4361, + "num_input_tokens_seen": 93813096, + "step": 1402 + }, + { + "epoch": 0.1590921985815603, + "loss": 1.3888654708862305, + "loss_ce": 0.0031234021298587322, + "loss_iou": 0.5625, + "loss_num": 0.052734375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 93813096, + "step": 1402 + }, + { + "epoch": 0.15920567375886524, + "grad_norm": 41.567325592041016, + "learning_rate": 5e-05, + "loss": 1.4573, + "num_input_tokens_seen": 93880736, + "step": 1403 + }, + { + "epoch": 0.15920567375886524, + "loss": 1.6992911100387573, + "loss_ce": 0.0025137588381767273, + "loss_iou": 0.734375, + "loss_num": 0.046630859375, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 93880736, + "step": 1403 + }, + { + "epoch": 0.15931914893617022, + "grad_norm": 33.03921127319336, + "learning_rate": 5e-05, + "loss": 1.593, + "num_input_tokens_seen": 93947984, + "step": 1404 + }, + { + "epoch": 0.15931914893617022, + "loss": 1.438969373703003, + "loss_ce": 0.008305240422487259, + "loss_iou": 0.63671875, + "loss_num": 0.031982421875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 93947984, + "step": 1404 + }, + { + "epoch": 0.15943262411347517, + "grad_norm": 30.453556060791016, + "learning_rate": 5e-05, + "loss": 1.3927, + "num_input_tokens_seen": 94014396, + "step": 1405 + }, + { + "epoch": 0.15943262411347517, + "loss": 1.388251781463623, + "loss_ce": 0.007392475381493568, + "loss_iou": 0.60546875, + "loss_num": 0.034423828125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 94014396, + "step": 1405 + }, + { + "epoch": 0.15954609929078015, + "grad_norm": 25.083738327026367, + "learning_rate": 5e-05, + "loss": 1.2442, + "num_input_tokens_seen": 94081216, + "step": 1406 + }, + { + "epoch": 0.15954609929078015, + "loss": 1.0857148170471191, + "loss_ce": 0.005148348398506641, + "loss_iou": 0.50390625, + "loss_num": 0.01519775390625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 94081216, + "step": 1406 + }, + { + "epoch": 0.1596595744680851, + "grad_norm": 24.726619720458984, + "learning_rate": 5e-05, + "loss": 1.6437, + "num_input_tokens_seen": 94148432, + "step": 1407 + }, + { + "epoch": 0.1596595744680851, + "loss": 1.679570198059082, + "loss_ce": 0.004765571095049381, + "loss_iou": 0.69140625, + "loss_num": 0.057861328125, + "loss_xval": 1.671875, + "num_input_tokens_seen": 94148432, + "step": 1407 + }, + { + "epoch": 0.15977304964539008, + "grad_norm": 38.99140930175781, + "learning_rate": 5e-05, + "loss": 1.444, + "num_input_tokens_seen": 94215404, + "step": 1408 + }, + { + "epoch": 0.15977304964539008, + "loss": 1.400331735610962, + "loss_ce": 0.005312158726155758, + "loss_iou": 0.57421875, + "loss_num": 0.049560546875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 94215404, + "step": 1408 + }, + { + "epoch": 0.15988652482269503, + "grad_norm": 22.523021697998047, + "learning_rate": 5e-05, + "loss": 1.3627, + "num_input_tokens_seen": 94282596, + "step": 1409 + }, + { + "epoch": 0.15988652482269503, + "loss": 1.2792434692382812, + "loss_ce": 0.003364439820870757, + "loss_iou": 0.578125, + "loss_num": 0.0234375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 94282596, + "step": 1409 + }, + { + "epoch": 0.16, + "grad_norm": 38.39946746826172, + "learning_rate": 5e-05, + "loss": 1.5942, + "num_input_tokens_seen": 94350008, + "step": 1410 + }, + { + "epoch": 0.16, + "loss": 1.4759876728057861, + "loss_ce": 0.004307943396270275, + "loss_iou": 0.6015625, + "loss_num": 0.053466796875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 94350008, + "step": 1410 + }, + { + "epoch": 0.16011347517730495, + "grad_norm": 21.726795196533203, + "learning_rate": 5e-05, + "loss": 1.6216, + "num_input_tokens_seen": 94416364, + "step": 1411 + }, + { + "epoch": 0.16011347517730495, + "loss": 1.6459157466888428, + "loss_ce": 0.009196922183036804, + "loss_iou": 0.7265625, + "loss_num": 0.03662109375, + "loss_xval": 1.640625, + "num_input_tokens_seen": 94416364, + "step": 1411 + }, + { + "epoch": 0.16022695035460993, + "grad_norm": 22.730215072631836, + "learning_rate": 5e-05, + "loss": 1.2861, + "num_input_tokens_seen": 94483904, + "step": 1412 + }, + { + "epoch": 0.16022695035460993, + "loss": 1.2237051725387573, + "loss_ce": 0.004466916900128126, + "loss_iou": 0.53125, + "loss_num": 0.031982421875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 94483904, + "step": 1412 + }, + { + "epoch": 0.16034042553191488, + "grad_norm": 44.6351432800293, + "learning_rate": 5e-05, + "loss": 1.4992, + "num_input_tokens_seen": 94550324, + "step": 1413 + }, + { + "epoch": 0.16034042553191488, + "loss": 1.6185182332992554, + "loss_ce": 0.018908875063061714, + "loss_iou": 0.65234375, + "loss_num": 0.058837890625, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 94550324, + "step": 1413 + }, + { + "epoch": 0.16045390070921986, + "grad_norm": 50.50934982299805, + "learning_rate": 5e-05, + "loss": 1.5205, + "num_input_tokens_seen": 94617944, + "step": 1414 + }, + { + "epoch": 0.16045390070921986, + "loss": 1.4503462314605713, + "loss_ce": 0.005033812485635281, + "loss_iou": 0.59765625, + "loss_num": 0.0498046875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 94617944, + "step": 1414 + }, + { + "epoch": 0.1605673758865248, + "grad_norm": 23.921512603759766, + "learning_rate": 5e-05, + "loss": 1.3747, + "num_input_tokens_seen": 94683568, + "step": 1415 + }, + { + "epoch": 0.1605673758865248, + "loss": 1.5307536125183105, + "loss_ce": 0.005363067612051964, + "loss_iou": 0.6875, + "loss_num": 0.0301513671875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 94683568, + "step": 1415 + }, + { + "epoch": 0.1606808510638298, + "grad_norm": 30.10498046875, + "learning_rate": 5e-05, + "loss": 1.2795, + "num_input_tokens_seen": 94750752, + "step": 1416 + }, + { + "epoch": 0.1606808510638298, + "loss": 1.4065062999725342, + "loss_ce": 0.0031859264709055424, + "loss_iou": 0.60546875, + "loss_num": 0.038818359375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 94750752, + "step": 1416 + }, + { + "epoch": 0.16079432624113474, + "grad_norm": 24.4038143157959, + "learning_rate": 5e-05, + "loss": 1.3604, + "num_input_tokens_seen": 94818172, + "step": 1417 + }, + { + "epoch": 0.16079432624113474, + "loss": 1.1757440567016602, + "loss_ce": 0.003869038075208664, + "loss_iou": 0.53515625, + "loss_num": 0.0201416015625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 94818172, + "step": 1417 + }, + { + "epoch": 0.16090780141843972, + "grad_norm": 33.857154846191406, + "learning_rate": 5e-05, + "loss": 1.5052, + "num_input_tokens_seen": 94885908, + "step": 1418 + }, + { + "epoch": 0.16090780141843972, + "loss": 1.5999641418457031, + "loss_ce": 0.001331255305558443, + "loss_iou": 0.6875, + "loss_num": 0.04443359375, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 94885908, + "step": 1418 + }, + { + "epoch": 0.1610212765957447, + "grad_norm": 36.55307388305664, + "learning_rate": 5e-05, + "loss": 1.6459, + "num_input_tokens_seen": 94953636, + "step": 1419 + }, + { + "epoch": 0.1610212765957447, + "loss": 1.3633986711502075, + "loss_ce": 0.009394821710884571, + "loss_iou": 0.6015625, + "loss_num": 0.030517578125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 94953636, + "step": 1419 + }, + { + "epoch": 0.16113475177304964, + "grad_norm": 20.821714401245117, + "learning_rate": 5e-05, + "loss": 1.1792, + "num_input_tokens_seen": 95020460, + "step": 1420 + }, + { + "epoch": 0.16113475177304964, + "loss": 1.0877606868743896, + "loss_ce": 0.0042644827626645565, + "loss_iou": 0.486328125, + "loss_num": 0.021728515625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 95020460, + "step": 1420 + }, + { + "epoch": 0.16124822695035462, + "grad_norm": 42.285945892333984, + "learning_rate": 5e-05, + "loss": 1.2538, + "num_input_tokens_seen": 95087304, + "step": 1421 + }, + { + "epoch": 0.16124822695035462, + "loss": 1.174832820892334, + "loss_ce": 0.004422661382704973, + "loss_iou": 0.5234375, + "loss_num": 0.0242919921875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 95087304, + "step": 1421 + }, + { + "epoch": 0.16136170212765957, + "grad_norm": 42.113773345947266, + "learning_rate": 5e-05, + "loss": 1.6904, + "num_input_tokens_seen": 95154612, + "step": 1422 + }, + { + "epoch": 0.16136170212765957, + "loss": 1.7112860679626465, + "loss_ce": 0.006696177646517754, + "loss_iou": 0.7734375, + "loss_num": 0.03125, + "loss_xval": 1.703125, + "num_input_tokens_seen": 95154612, + "step": 1422 + }, + { + "epoch": 0.16147517730496455, + "grad_norm": 22.64089012145996, + "learning_rate": 5e-05, + "loss": 1.6004, + "num_input_tokens_seen": 95221692, + "step": 1423 + }, + { + "epoch": 0.16147517730496455, + "loss": 1.79758882522583, + "loss_ce": 0.005596687085926533, + "loss_iou": 0.734375, + "loss_num": 0.064453125, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 95221692, + "step": 1423 + }, + { + "epoch": 0.1615886524822695, + "grad_norm": 13.115741729736328, + "learning_rate": 5e-05, + "loss": 1.5802, + "num_input_tokens_seen": 95288412, + "step": 1424 + }, + { + "epoch": 0.1615886524822695, + "loss": 1.4457597732543945, + "loss_ce": 0.004353449679911137, + "loss_iou": 0.609375, + "loss_num": 0.043701171875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 95288412, + "step": 1424 + }, + { + "epoch": 0.16170212765957448, + "grad_norm": 23.07511329650879, + "learning_rate": 5e-05, + "loss": 1.4063, + "num_input_tokens_seen": 95356512, + "step": 1425 + }, + { + "epoch": 0.16170212765957448, + "loss": 1.4681141376495361, + "loss_ce": 0.003758698236197233, + "loss_iou": 0.6171875, + "loss_num": 0.04638671875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 95356512, + "step": 1425 + }, + { + "epoch": 0.16181560283687943, + "grad_norm": 17.38796615600586, + "learning_rate": 5e-05, + "loss": 1.3174, + "num_input_tokens_seen": 95423644, + "step": 1426 + }, + { + "epoch": 0.16181560283687943, + "loss": 0.9631972908973694, + "loss_ce": 0.0037246306892484426, + "loss_iou": 0.43359375, + "loss_num": 0.01904296875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 95423644, + "step": 1426 + }, + { + "epoch": 0.1619290780141844, + "grad_norm": 25.567916870117188, + "learning_rate": 5e-05, + "loss": 1.392, + "num_input_tokens_seen": 95490652, + "step": 1427 + }, + { + "epoch": 0.1619290780141844, + "loss": 1.6146681308746338, + "loss_ce": 0.009199297986924648, + "loss_iou": 0.671875, + "loss_num": 0.052490234375, + "loss_xval": 1.609375, + "num_input_tokens_seen": 95490652, + "step": 1427 + }, + { + "epoch": 0.16204255319148936, + "grad_norm": 21.4984188079834, + "learning_rate": 5e-05, + "loss": 1.3979, + "num_input_tokens_seen": 95558072, + "step": 1428 + }, + { + "epoch": 0.16204255319148936, + "loss": 1.2100920677185059, + "loss_ce": 0.004037249833345413, + "loss_iou": 0.546875, + "loss_num": 0.02294921875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 95558072, + "step": 1428 + }, + { + "epoch": 0.16215602836879434, + "grad_norm": 26.298498153686523, + "learning_rate": 5e-05, + "loss": 1.4124, + "num_input_tokens_seen": 95623364, + "step": 1429 + }, + { + "epoch": 0.16215602836879434, + "loss": 1.4905102252960205, + "loss_ce": 0.0032054453622549772, + "loss_iou": 0.66796875, + "loss_num": 0.0303955078125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 95623364, + "step": 1429 + }, + { + "epoch": 0.1622695035460993, + "grad_norm": 27.643165588378906, + "learning_rate": 5e-05, + "loss": 1.5931, + "num_input_tokens_seen": 95690120, + "step": 1430 + }, + { + "epoch": 0.1622695035460993, + "loss": 1.7808769941329956, + "loss_ce": 0.004021477419883013, + "loss_iou": 0.7421875, + "loss_num": 0.05810546875, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 95690120, + "step": 1430 + }, + { + "epoch": 0.16238297872340426, + "grad_norm": 33.58243179321289, + "learning_rate": 5e-05, + "loss": 1.2484, + "num_input_tokens_seen": 95756732, + "step": 1431 + }, + { + "epoch": 0.16238297872340426, + "loss": 1.3167545795440674, + "loss_ce": 0.009137367829680443, + "loss_iou": 0.5703125, + "loss_num": 0.0341796875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 95756732, + "step": 1431 + }, + { + "epoch": 0.16249645390070921, + "grad_norm": 16.946487426757812, + "learning_rate": 5e-05, + "loss": 1.2454, + "num_input_tokens_seen": 95823416, + "step": 1432 + }, + { + "epoch": 0.16249645390070921, + "loss": 1.330282211303711, + "loss_ce": 0.005086924880743027, + "loss_iou": 0.5390625, + "loss_num": 0.049072265625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 95823416, + "step": 1432 + }, + { + "epoch": 0.1626099290780142, + "grad_norm": 23.932472229003906, + "learning_rate": 5e-05, + "loss": 1.3489, + "num_input_tokens_seen": 95890288, + "step": 1433 + }, + { + "epoch": 0.1626099290780142, + "loss": 1.2600212097167969, + "loss_ce": 0.007335656322538853, + "loss_iou": 0.5234375, + "loss_num": 0.040771484375, + "loss_xval": 1.25, + "num_input_tokens_seen": 95890288, + "step": 1433 + }, + { + "epoch": 0.16272340425531914, + "grad_norm": 37.80769348144531, + "learning_rate": 5e-05, + "loss": 1.6092, + "num_input_tokens_seen": 95956748, + "step": 1434 + }, + { + "epoch": 0.16272340425531914, + "loss": 1.6296859979629517, + "loss_ce": 0.0027328995056450367, + "loss_iou": 0.67578125, + "loss_num": 0.054931640625, + "loss_xval": 1.625, + "num_input_tokens_seen": 95956748, + "step": 1434 + }, + { + "epoch": 0.16283687943262412, + "grad_norm": 20.53826332092285, + "learning_rate": 5e-05, + "loss": 1.4577, + "num_input_tokens_seen": 96023008, + "step": 1435 + }, + { + "epoch": 0.16283687943262412, + "loss": 1.403676152229309, + "loss_ce": 0.004140029661357403, + "loss_iou": 0.625, + "loss_num": 0.0299072265625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 96023008, + "step": 1435 + }, + { + "epoch": 0.16295035460992907, + "grad_norm": 21.817943572998047, + "learning_rate": 5e-05, + "loss": 1.1335, + "num_input_tokens_seen": 96090252, + "step": 1436 + }, + { + "epoch": 0.16295035460992907, + "loss": 1.047298550605774, + "loss_ce": 0.00481809675693512, + "loss_iou": 0.4375, + "loss_num": 0.03369140625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 96090252, + "step": 1436 + }, + { + "epoch": 0.16306382978723405, + "grad_norm": 38.023765563964844, + "learning_rate": 5e-05, + "loss": 1.4604, + "num_input_tokens_seen": 96158176, + "step": 1437 + }, + { + "epoch": 0.16306382978723405, + "loss": 1.3360669612884521, + "loss_ce": 0.005500554107129574, + "loss_iou": 0.6015625, + "loss_num": 0.0245361328125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 96158176, + "step": 1437 + }, + { + "epoch": 0.163177304964539, + "grad_norm": 24.376617431640625, + "learning_rate": 5e-05, + "loss": 1.691, + "num_input_tokens_seen": 96224580, + "step": 1438 + }, + { + "epoch": 0.163177304964539, + "loss": 1.5829079151153564, + "loss_ce": 0.0057595293037593365, + "loss_iou": 0.703125, + "loss_num": 0.034423828125, + "loss_xval": 1.578125, + "num_input_tokens_seen": 96224580, + "step": 1438 + }, + { + "epoch": 0.16329078014184398, + "grad_norm": 23.051786422729492, + "learning_rate": 5e-05, + "loss": 1.2926, + "num_input_tokens_seen": 96291228, + "step": 1439 + }, + { + "epoch": 0.16329078014184398, + "loss": 1.2865984439849854, + "loss_ce": 0.003395350417122245, + "loss_iou": 0.5546875, + "loss_num": 0.03515625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 96291228, + "step": 1439 + }, + { + "epoch": 0.16340425531914893, + "grad_norm": 22.817651748657227, + "learning_rate": 5e-05, + "loss": 1.3646, + "num_input_tokens_seen": 96357856, + "step": 1440 + }, + { + "epoch": 0.16340425531914893, + "loss": 1.4351881742477417, + "loss_ce": 0.005012440495193005, + "loss_iou": 0.60546875, + "loss_num": 0.04345703125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 96357856, + "step": 1440 + }, + { + "epoch": 0.1635177304964539, + "grad_norm": 26.09381866455078, + "learning_rate": 5e-05, + "loss": 1.5939, + "num_input_tokens_seen": 96424296, + "step": 1441 + }, + { + "epoch": 0.1635177304964539, + "loss": 1.528671383857727, + "loss_ce": 0.005233873147517443, + "loss_iou": 0.5859375, + "loss_num": 0.06982421875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 96424296, + "step": 1441 + }, + { + "epoch": 0.16363120567375886, + "grad_norm": 32.26089096069336, + "learning_rate": 5e-05, + "loss": 1.3071, + "num_input_tokens_seen": 96491000, + "step": 1442 + }, + { + "epoch": 0.16363120567375886, + "loss": 1.4495906829833984, + "loss_ce": 0.006231298204511404, + "loss_iou": 0.625, + "loss_num": 0.03955078125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 96491000, + "step": 1442 + }, + { + "epoch": 0.16374468085106383, + "grad_norm": 15.914433479309082, + "learning_rate": 5e-05, + "loss": 1.3271, + "num_input_tokens_seen": 96558332, + "step": 1443 + }, + { + "epoch": 0.16374468085106383, + "loss": 1.2748092412948608, + "loss_ce": 0.009184177033603191, + "loss_iou": 0.56640625, + "loss_num": 0.0260009765625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 96558332, + "step": 1443 + }, + { + "epoch": 0.16385815602836878, + "grad_norm": 35.92223358154297, + "learning_rate": 5e-05, + "loss": 1.4563, + "num_input_tokens_seen": 96625432, + "step": 1444 + }, + { + "epoch": 0.16385815602836878, + "loss": 1.472663402557373, + "loss_ce": 0.0029369371477514505, + "loss_iou": 0.61328125, + "loss_num": 0.0478515625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 96625432, + "step": 1444 + }, + { + "epoch": 0.16397163120567376, + "grad_norm": 57.21689224243164, + "learning_rate": 5e-05, + "loss": 1.2259, + "num_input_tokens_seen": 96692888, + "step": 1445 + }, + { + "epoch": 0.16397163120567376, + "loss": 1.2812305688858032, + "loss_ce": 0.006328223273158073, + "loss_iou": 0.5390625, + "loss_num": 0.039794921875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 96692888, + "step": 1445 + }, + { + "epoch": 0.1640851063829787, + "grad_norm": 27.887786865234375, + "learning_rate": 5e-05, + "loss": 1.367, + "num_input_tokens_seen": 96759812, + "step": 1446 + }, + { + "epoch": 0.1640851063829787, + "loss": 1.410744071006775, + "loss_ce": 0.008400285616517067, + "loss_iou": 0.59375, + "loss_num": 0.042724609375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 96759812, + "step": 1446 + }, + { + "epoch": 0.1641985815602837, + "grad_norm": 26.82257652282715, + "learning_rate": 5e-05, + "loss": 1.4782, + "num_input_tokens_seen": 96826772, + "step": 1447 + }, + { + "epoch": 0.1641985815602837, + "loss": 1.3864299058914185, + "loss_ce": 0.008500264957547188, + "loss_iou": 0.5546875, + "loss_num": 0.052978515625, + "loss_xval": 1.375, + "num_input_tokens_seen": 96826772, + "step": 1447 + }, + { + "epoch": 0.16431205673758864, + "grad_norm": 31.328495025634766, + "learning_rate": 5e-05, + "loss": 1.2333, + "num_input_tokens_seen": 96891240, + "step": 1448 + }, + { + "epoch": 0.16431205673758864, + "loss": 1.401232361793518, + "loss_ce": 0.008654315024614334, + "loss_iou": 0.55859375, + "loss_num": 0.0546875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 96891240, + "step": 1448 + }, + { + "epoch": 0.16442553191489362, + "grad_norm": 35.84320068359375, + "learning_rate": 5e-05, + "loss": 1.4497, + "num_input_tokens_seen": 96957508, + "step": 1449 + }, + { + "epoch": 0.16442553191489362, + "loss": 1.5435035228729248, + "loss_ce": 0.004441011697053909, + "loss_iou": 0.671875, + "loss_num": 0.039306640625, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 96957508, + "step": 1449 + }, + { + "epoch": 0.16453900709219857, + "grad_norm": 20.688451766967773, + "learning_rate": 5e-05, + "loss": 1.3384, + "num_input_tokens_seen": 97024356, + "step": 1450 + }, + { + "epoch": 0.16453900709219857, + "loss": 1.442873239517212, + "loss_ce": 0.006838122382760048, + "loss_iou": 0.61328125, + "loss_num": 0.04150390625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 97024356, + "step": 1450 + }, + { + "epoch": 0.16465248226950355, + "grad_norm": 21.572616577148438, + "learning_rate": 5e-05, + "loss": 1.1904, + "num_input_tokens_seen": 97090540, + "step": 1451 + }, + { + "epoch": 0.16465248226950355, + "loss": 1.2216044664382935, + "loss_ce": 0.007554125972092152, + "loss_iou": 0.470703125, + "loss_num": 0.054443359375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 97090540, + "step": 1451 + }, + { + "epoch": 0.1647659574468085, + "grad_norm": 23.397375106811523, + "learning_rate": 5e-05, + "loss": 1.5875, + "num_input_tokens_seen": 97158156, + "step": 1452 + }, + { + "epoch": 0.1647659574468085, + "loss": 1.7389628887176514, + "loss_ce": 0.0036113921087235212, + "loss_iou": 0.73828125, + "loss_num": 0.0517578125, + "loss_xval": 1.734375, + "num_input_tokens_seen": 97158156, + "step": 1452 + }, + { + "epoch": 0.16487943262411348, + "grad_norm": 24.104740142822266, + "learning_rate": 5e-05, + "loss": 1.5919, + "num_input_tokens_seen": 97225160, + "step": 1453 + }, + { + "epoch": 0.16487943262411348, + "loss": 1.8354196548461914, + "loss_ce": 0.00827120803296566, + "loss_iou": 0.7578125, + "loss_num": 0.0634765625, + "loss_xval": 1.828125, + "num_input_tokens_seen": 97225160, + "step": 1453 + }, + { + "epoch": 0.16499290780141845, + "grad_norm": 25.37748146057129, + "learning_rate": 5e-05, + "loss": 1.161, + "num_input_tokens_seen": 97291564, + "step": 1454 + }, + { + "epoch": 0.16499290780141845, + "loss": 1.0973637104034424, + "loss_ce": 0.0065434155985713005, + "loss_iou": 0.455078125, + "loss_num": 0.03662109375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 97291564, + "step": 1454 + }, + { + "epoch": 0.1651063829787234, + "grad_norm": 30.489192962646484, + "learning_rate": 5e-05, + "loss": 1.3208, + "num_input_tokens_seen": 97357436, + "step": 1455 + }, + { + "epoch": 0.1651063829787234, + "loss": 1.0959283113479614, + "loss_ce": 0.00217838236130774, + "loss_iou": 0.49609375, + "loss_num": 0.02001953125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 97357436, + "step": 1455 + }, + { + "epoch": 0.16521985815602838, + "grad_norm": 21.0488224029541, + "learning_rate": 5e-05, + "loss": 1.4853, + "num_input_tokens_seen": 97423892, + "step": 1456 + }, + { + "epoch": 0.16521985815602838, + "loss": 1.365999460220337, + "loss_ce": 0.006563376635313034, + "loss_iou": 0.6015625, + "loss_num": 0.031494140625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 97423892, + "step": 1456 + }, + { + "epoch": 0.16533333333333333, + "grad_norm": 31.726165771484375, + "learning_rate": 5e-05, + "loss": 1.3677, + "num_input_tokens_seen": 97490800, + "step": 1457 + }, + { + "epoch": 0.16533333333333333, + "loss": 1.2327532768249512, + "loss_ce": 0.0045427437871694565, + "loss_iou": 0.5, + "loss_num": 0.0458984375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 97490800, + "step": 1457 + }, + { + "epoch": 0.1654468085106383, + "grad_norm": 16.500457763671875, + "learning_rate": 5e-05, + "loss": 1.2192, + "num_input_tokens_seen": 97558400, + "step": 1458 + }, + { + "epoch": 0.1654468085106383, + "loss": 1.3959393501281738, + "loss_ce": 0.007755795493721962, + "loss_iou": 0.6015625, + "loss_num": 0.037109375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 97558400, + "step": 1458 + }, + { + "epoch": 0.16556028368794326, + "grad_norm": 27.220361709594727, + "learning_rate": 5e-05, + "loss": 1.3957, + "num_input_tokens_seen": 97624824, + "step": 1459 + }, + { + "epoch": 0.16556028368794326, + "loss": 1.3116284608840942, + "loss_ce": 0.008894074708223343, + "loss_iou": 0.59375, + "loss_num": 0.023193359375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 97624824, + "step": 1459 + }, + { + "epoch": 0.16567375886524824, + "grad_norm": 21.191598892211914, + "learning_rate": 5e-05, + "loss": 1.6661, + "num_input_tokens_seen": 97691688, + "step": 1460 + }, + { + "epoch": 0.16567375886524824, + "loss": 1.6598697900772095, + "loss_ce": 0.003619779134169221, + "loss_iou": 0.73046875, + "loss_num": 0.0390625, + "loss_xval": 1.65625, + "num_input_tokens_seen": 97691688, + "step": 1460 + }, + { + "epoch": 0.1657872340425532, + "grad_norm": 38.055057525634766, + "learning_rate": 5e-05, + "loss": 1.4343, + "num_input_tokens_seen": 97758540, + "step": 1461 + }, + { + "epoch": 0.1657872340425532, + "loss": 1.2253727912902832, + "loss_ce": 0.0019536272156983614, + "loss_iou": 0.53515625, + "loss_num": 0.031005859375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 97758540, + "step": 1461 + }, + { + "epoch": 0.16590070921985817, + "grad_norm": 424.5186462402344, + "learning_rate": 5e-05, + "loss": 1.8316, + "num_input_tokens_seen": 97825988, + "step": 1462 + }, + { + "epoch": 0.16590070921985817, + "loss": 1.8880770206451416, + "loss_ce": 0.004287923686206341, + "loss_iou": 0.7890625, + "loss_num": 0.06103515625, + "loss_xval": 1.8828125, + "num_input_tokens_seen": 97825988, + "step": 1462 + }, + { + "epoch": 0.16601418439716312, + "grad_norm": 27.811918258666992, + "learning_rate": 5e-05, + "loss": 1.3183, + "num_input_tokens_seen": 97892368, + "step": 1463 + }, + { + "epoch": 0.16601418439716312, + "loss": 1.2533859014511108, + "loss_ce": 0.006315615959465504, + "loss_iou": 0.5546875, + "loss_num": 0.0277099609375, + "loss_xval": 1.25, + "num_input_tokens_seen": 97892368, + "step": 1463 + }, + { + "epoch": 0.1661276595744681, + "grad_norm": 24.245166778564453, + "learning_rate": 5e-05, + "loss": 1.2, + "num_input_tokens_seen": 97960600, + "step": 1464 + }, + { + "epoch": 0.1661276595744681, + "loss": 1.1538692712783813, + "loss_ce": 0.004546819254755974, + "loss_iou": 0.50390625, + "loss_num": 0.02783203125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 97960600, + "step": 1464 + }, + { + "epoch": 0.16624113475177305, + "grad_norm": 34.72902297973633, + "learning_rate": 5e-05, + "loss": 1.548, + "num_input_tokens_seen": 98027036, + "step": 1465 + }, + { + "epoch": 0.16624113475177305, + "loss": 1.5323960781097412, + "loss_ce": 0.0050523909740149975, + "loss_iou": 0.66796875, + "loss_num": 0.0380859375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 98027036, + "step": 1465 + }, + { + "epoch": 0.16635460992907802, + "grad_norm": 29.500917434692383, + "learning_rate": 5e-05, + "loss": 1.2561, + "num_input_tokens_seen": 98094744, + "step": 1466 + }, + { + "epoch": 0.16635460992907802, + "loss": 1.2057849168777466, + "loss_ce": 0.006077905185520649, + "loss_iou": 0.5390625, + "loss_num": 0.023681640625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 98094744, + "step": 1466 + }, + { + "epoch": 0.16646808510638297, + "grad_norm": 23.344463348388672, + "learning_rate": 5e-05, + "loss": 1.5569, + "num_input_tokens_seen": 98162096, + "step": 1467 + }, + { + "epoch": 0.16646808510638297, + "loss": 1.5655384063720703, + "loss_ce": 0.006456367671489716, + "loss_iou": 0.6875, + "loss_num": 0.03759765625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 98162096, + "step": 1467 + }, + { + "epoch": 0.16658156028368795, + "grad_norm": 14.585994720458984, + "learning_rate": 5e-05, + "loss": 1.1208, + "num_input_tokens_seen": 98228028, + "step": 1468 + }, + { + "epoch": 0.16658156028368795, + "loss": 1.1995770931243896, + "loss_ce": 0.005729406140744686, + "loss_iou": 0.52734375, + "loss_num": 0.0274658203125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 98228028, + "step": 1468 + }, + { + "epoch": 0.1666950354609929, + "grad_norm": 18.86631202697754, + "learning_rate": 5e-05, + "loss": 1.38, + "num_input_tokens_seen": 98294736, + "step": 1469 + }, + { + "epoch": 0.1666950354609929, + "loss": 1.1892995834350586, + "loss_ce": 0.003264414146542549, + "loss_iou": 0.49609375, + "loss_num": 0.038330078125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 98294736, + "step": 1469 + }, + { + "epoch": 0.16680851063829788, + "grad_norm": 30.92275619506836, + "learning_rate": 5e-05, + "loss": 1.4332, + "num_input_tokens_seen": 98361968, + "step": 1470 + }, + { + "epoch": 0.16680851063829788, + "loss": 1.3166730403900146, + "loss_ce": 0.007102668285369873, + "loss_iou": 0.57421875, + "loss_num": 0.03271484375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 98361968, + "step": 1470 + }, + { + "epoch": 0.16692198581560283, + "grad_norm": 25.262672424316406, + "learning_rate": 5e-05, + "loss": 1.5039, + "num_input_tokens_seen": 98428772, + "step": 1471 + }, + { + "epoch": 0.16692198581560283, + "loss": 1.463072657585144, + "loss_ce": 0.004088252782821655, + "loss_iou": 0.62890625, + "loss_num": 0.041015625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 98428772, + "step": 1471 + }, + { + "epoch": 0.1670354609929078, + "grad_norm": 14.580304145812988, + "learning_rate": 5e-05, + "loss": 1.2212, + "num_input_tokens_seen": 98495240, + "step": 1472 + }, + { + "epoch": 0.1670354609929078, + "loss": 1.2914528846740723, + "loss_ce": 0.00824972614645958, + "loss_iou": 0.5859375, + "loss_num": 0.02294921875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 98495240, + "step": 1472 + }, + { + "epoch": 0.16714893617021276, + "grad_norm": 27.52585220336914, + "learning_rate": 5e-05, + "loss": 1.2662, + "num_input_tokens_seen": 98561416, + "step": 1473 + }, + { + "epoch": 0.16714893617021276, + "loss": 1.2806737422943115, + "loss_ce": 0.00381830264814198, + "loss_iou": 0.546875, + "loss_num": 0.03662109375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 98561416, + "step": 1473 + }, + { + "epoch": 0.16726241134751774, + "grad_norm": 47.88740921020508, + "learning_rate": 5e-05, + "loss": 1.6896, + "num_input_tokens_seen": 98629160, + "step": 1474 + }, + { + "epoch": 0.16726241134751774, + "loss": 1.5139180421829224, + "loss_ce": 0.009035298600792885, + "loss_iou": 0.6796875, + "loss_num": 0.0284423828125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 98629160, + "step": 1474 + }, + { + "epoch": 0.1673758865248227, + "grad_norm": 14.886201858520508, + "learning_rate": 5e-05, + "loss": 1.6097, + "num_input_tokens_seen": 98696528, + "step": 1475 + }, + { + "epoch": 0.1673758865248227, + "loss": 1.7023037672042847, + "loss_ce": 0.0030850055627524853, + "loss_iou": 0.75390625, + "loss_num": 0.03857421875, + "loss_xval": 1.703125, + "num_input_tokens_seen": 98696528, + "step": 1475 + }, + { + "epoch": 0.16748936170212766, + "grad_norm": 19.381032943725586, + "learning_rate": 5e-05, + "loss": 1.3333, + "num_input_tokens_seen": 98763088, + "step": 1476 + }, + { + "epoch": 0.16748936170212766, + "loss": 1.3863070011138916, + "loss_ce": 0.004104873165488243, + "loss_iou": 0.5625, + "loss_num": 0.050537109375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 98763088, + "step": 1476 + }, + { + "epoch": 0.16760283687943262, + "grad_norm": 20.505300521850586, + "learning_rate": 5e-05, + "loss": 1.3055, + "num_input_tokens_seen": 98829992, + "step": 1477 + }, + { + "epoch": 0.16760283687943262, + "loss": 1.4282262325286865, + "loss_ce": 0.004398132674396038, + "loss_iou": 0.59765625, + "loss_num": 0.0458984375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 98829992, + "step": 1477 + }, + { + "epoch": 0.1677163120567376, + "grad_norm": 22.874101638793945, + "learning_rate": 5e-05, + "loss": 1.3465, + "num_input_tokens_seen": 98897300, + "step": 1478 + }, + { + "epoch": 0.1677163120567376, + "loss": 1.5260050296783447, + "loss_ce": 0.009159315377473831, + "loss_iou": 0.640625, + "loss_num": 0.046875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 98897300, + "step": 1478 + }, + { + "epoch": 0.16782978723404254, + "grad_norm": 22.481815338134766, + "learning_rate": 5e-05, + "loss": 1.3259, + "num_input_tokens_seen": 98961572, + "step": 1479 + }, + { + "epoch": 0.16782978723404254, + "loss": 1.465559720993042, + "loss_ce": 0.007185642141848803, + "loss_iou": 0.5703125, + "loss_num": 0.06396484375, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 98961572, + "step": 1479 + }, + { + "epoch": 0.16794326241134752, + "grad_norm": 23.551807403564453, + "learning_rate": 5e-05, + "loss": 1.2358, + "num_input_tokens_seen": 99029332, + "step": 1480 + }, + { + "epoch": 0.16794326241134752, + "loss": 1.3161590099334717, + "loss_ce": 0.006100350059568882, + "loss_iou": 0.5703125, + "loss_num": 0.033203125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 99029332, + "step": 1480 + }, + { + "epoch": 0.16805673758865247, + "grad_norm": 25.916025161743164, + "learning_rate": 5e-05, + "loss": 1.3946, + "num_input_tokens_seen": 99095292, + "step": 1481 + }, + { + "epoch": 0.16805673758865247, + "loss": 1.3856544494628906, + "loss_ce": 0.008091025985777378, + "loss_iou": 0.57421875, + "loss_num": 0.0458984375, + "loss_xval": 1.375, + "num_input_tokens_seen": 99095292, + "step": 1481 + }, + { + "epoch": 0.16817021276595745, + "grad_norm": 36.21730041503906, + "learning_rate": 5e-05, + "loss": 1.3617, + "num_input_tokens_seen": 99162700, + "step": 1482 + }, + { + "epoch": 0.16817021276595745, + "loss": 1.3843227624893188, + "loss_ce": 0.005904826335608959, + "loss_iou": 0.63671875, + "loss_num": 0.021240234375, + "loss_xval": 1.375, + "num_input_tokens_seen": 99162700, + "step": 1482 + }, + { + "epoch": 0.1682836879432624, + "grad_norm": 23.224609375, + "learning_rate": 5e-05, + "loss": 1.5161, + "num_input_tokens_seen": 99229324, + "step": 1483 + }, + { + "epoch": 0.1682836879432624, + "loss": 1.5423758029937744, + "loss_ce": 0.004289960488677025, + "loss_iou": 0.69140625, + "loss_num": 0.031494140625, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 99229324, + "step": 1483 + }, + { + "epoch": 0.16839716312056738, + "grad_norm": 15.24223518371582, + "learning_rate": 5e-05, + "loss": 1.0938, + "num_input_tokens_seen": 99295556, + "step": 1484 + }, + { + "epoch": 0.16839716312056738, + "loss": 0.978603184223175, + "loss_ce": 0.006923487409949303, + "loss_iou": 0.4453125, + "loss_num": 0.016357421875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 99295556, + "step": 1484 + }, + { + "epoch": 0.16851063829787233, + "grad_norm": 19.662750244140625, + "learning_rate": 5e-05, + "loss": 1.1119, + "num_input_tokens_seen": 99361580, + "step": 1485 + }, + { + "epoch": 0.16851063829787233, + "loss": 1.0264986753463745, + "loss_ce": 0.007699855603277683, + "loss_iou": 0.44921875, + "loss_num": 0.0240478515625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 99361580, + "step": 1485 + }, + { + "epoch": 0.1686241134751773, + "grad_norm": 105.3409194946289, + "learning_rate": 5e-05, + "loss": 1.3567, + "num_input_tokens_seen": 99428856, + "step": 1486 + }, + { + "epoch": 0.1686241134751773, + "loss": 1.1675132513046265, + "loss_ce": 0.003939040470868349, + "loss_iou": 0.515625, + "loss_num": 0.02685546875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 99428856, + "step": 1486 + }, + { + "epoch": 0.16873758865248226, + "grad_norm": 25.26343536376953, + "learning_rate": 5e-05, + "loss": 1.5666, + "num_input_tokens_seen": 99496248, + "step": 1487 + }, + { + "epoch": 0.16873758865248226, + "loss": 1.5267245769500732, + "loss_ce": 0.005240214057266712, + "loss_iou": 0.65625, + "loss_num": 0.041748046875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 99496248, + "step": 1487 + }, + { + "epoch": 0.16885106382978723, + "grad_norm": 42.41560745239258, + "learning_rate": 5e-05, + "loss": 1.5307, + "num_input_tokens_seen": 99563524, + "step": 1488 + }, + { + "epoch": 0.16885106382978723, + "loss": 1.6865386962890625, + "loss_ce": 0.004409718792885542, + "loss_iou": 0.71875, + "loss_num": 0.04931640625, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 99563524, + "step": 1488 + }, + { + "epoch": 0.16896453900709218, + "grad_norm": 18.602066040039062, + "learning_rate": 5e-05, + "loss": 1.6919, + "num_input_tokens_seen": 99630784, + "step": 1489 + }, + { + "epoch": 0.16896453900709218, + "loss": 1.6378512382507324, + "loss_ce": 0.00406227121129632, + "loss_iou": 0.734375, + "loss_num": 0.033447265625, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 99630784, + "step": 1489 + }, + { + "epoch": 0.16907801418439716, + "grad_norm": 23.160255432128906, + "learning_rate": 5e-05, + "loss": 1.3943, + "num_input_tokens_seen": 99697328, + "step": 1490 + }, + { + "epoch": 0.16907801418439716, + "loss": 1.3519089221954346, + "loss_ce": 0.004252796992659569, + "loss_iou": 0.55859375, + "loss_num": 0.04638671875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 99697328, + "step": 1490 + }, + { + "epoch": 0.16919148936170214, + "grad_norm": 29.85942840576172, + "learning_rate": 5e-05, + "loss": 1.2343, + "num_input_tokens_seen": 99763268, + "step": 1491 + }, + { + "epoch": 0.16919148936170214, + "loss": 1.233814001083374, + "loss_ce": 0.0037115479353815317, + "loss_iou": 0.56640625, + "loss_num": 0.0198974609375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 99763268, + "step": 1491 + }, + { + "epoch": 0.1693049645390071, + "grad_norm": 36.65243911743164, + "learning_rate": 5e-05, + "loss": 1.4076, + "num_input_tokens_seen": 99831536, + "step": 1492 + }, + { + "epoch": 0.1693049645390071, + "loss": 1.4199793338775635, + "loss_ce": 0.005916828755289316, + "loss_iou": 0.6328125, + "loss_num": 0.030029296875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 99831536, + "step": 1492 + }, + { + "epoch": 0.16941843971631207, + "grad_norm": 24.73849105834961, + "learning_rate": 5e-05, + "loss": 1.2721, + "num_input_tokens_seen": 99897764, + "step": 1493 + }, + { + "epoch": 0.16941843971631207, + "loss": 1.1725544929504395, + "loss_ce": 0.005562379024922848, + "loss_iou": 0.515625, + "loss_num": 0.026611328125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 99897764, + "step": 1493 + }, + { + "epoch": 0.16953191489361702, + "grad_norm": 21.93195343017578, + "learning_rate": 5e-05, + "loss": 1.363, + "num_input_tokens_seen": 99963960, + "step": 1494 + }, + { + "epoch": 0.16953191489361702, + "loss": 1.2714269161224365, + "loss_ce": 0.0067785196006298065, + "loss_iou": 0.57421875, + "loss_num": 0.0228271484375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 99963960, + "step": 1494 + }, + { + "epoch": 0.169645390070922, + "grad_norm": 30.62963104248047, + "learning_rate": 5e-05, + "loss": 1.4306, + "num_input_tokens_seen": 100031652, + "step": 1495 + }, + { + "epoch": 0.169645390070922, + "loss": 1.3794103860855103, + "loss_ce": 0.005386976059526205, + "loss_iou": 0.6015625, + "loss_num": 0.033447265625, + "loss_xval": 1.375, + "num_input_tokens_seen": 100031652, + "step": 1495 + }, + { + "epoch": 0.16975886524822695, + "grad_norm": 28.81067657470703, + "learning_rate": 5e-05, + "loss": 1.4261, + "num_input_tokens_seen": 100098860, + "step": 1496 + }, + { + "epoch": 0.16975886524822695, + "loss": 1.5277912616729736, + "loss_ce": 0.004842008929699659, + "loss_iou": 0.64453125, + "loss_num": 0.04638671875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 100098860, + "step": 1496 + }, + { + "epoch": 0.16987234042553193, + "grad_norm": 27.57518196105957, + "learning_rate": 5e-05, + "loss": 1.351, + "num_input_tokens_seen": 100165200, + "step": 1497 + }, + { + "epoch": 0.16987234042553193, + "loss": 1.4985744953155518, + "loss_ce": 0.0019924312364310026, + "loss_iou": 0.64453125, + "loss_num": 0.041748046875, + "loss_xval": 1.5, + "num_input_tokens_seen": 100165200, + "step": 1497 + }, + { + "epoch": 0.16998581560283688, + "grad_norm": 25.776615142822266, + "learning_rate": 5e-05, + "loss": 1.5667, + "num_input_tokens_seen": 100232236, + "step": 1498 + }, + { + "epoch": 0.16998581560283688, + "loss": 1.531759262084961, + "loss_ce": 0.004415405448526144, + "loss_iou": 0.6796875, + "loss_num": 0.03369140625, + "loss_xval": 1.53125, + "num_input_tokens_seen": 100232236, + "step": 1498 + }, + { + "epoch": 0.17009929078014185, + "grad_norm": 25.154254913330078, + "learning_rate": 5e-05, + "loss": 1.3046, + "num_input_tokens_seen": 100298376, + "step": 1499 + }, + { + "epoch": 0.17009929078014185, + "loss": 1.289010763168335, + "loss_ce": 0.00531928613781929, + "loss_iou": 0.56640625, + "loss_num": 0.03076171875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 100298376, + "step": 1499 + }, + { + "epoch": 0.1702127659574468, + "grad_norm": 24.301605224609375, + "learning_rate": 5e-05, + "loss": 1.4888, + "num_input_tokens_seen": 100366704, + "step": 1500 + }, + { + "epoch": 0.1702127659574468, + "eval_seeclick_CIoU": 0.3633064776659012, + "eval_seeclick_GIoU": 0.3227757662534714, + "eval_seeclick_IoU": 0.4528391361236572, + "eval_seeclick_MAE_all": 0.17845654487609863, + "eval_seeclick_MAE_h": 0.10305338725447655, + "eval_seeclick_MAE_w": 0.12234250083565712, + "eval_seeclick_MAE_x_boxes": 0.25182636082172394, + "eval_seeclick_MAE_y_boxes": 0.13418130204081535, + "eval_seeclick_NUM_probability": 0.9996703267097473, + "eval_seeclick_inside_bbox": 0.6614583432674408, + "eval_seeclick_loss": 2.641179084777832, + "eval_seeclick_loss_ce": 0.015376354567706585, + "eval_seeclick_loss_iou": 0.8858642578125, + "eval_seeclick_loss_num": 0.16890716552734375, + "eval_seeclick_loss_xval": 2.616455078125, + "eval_seeclick_runtime": 66.9981, + "eval_seeclick_samples_per_second": 0.702, + "eval_seeclick_steps_per_second": 0.03, + "num_input_tokens_seen": 100366704, + "step": 1500 + }, + { + "epoch": 0.1702127659574468, + "eval_icons_CIoU": 0.44868944585323334, + "eval_icons_GIoU": 0.42771391570568085, + "eval_icons_IoU": 0.49963515996932983, + "eval_icons_MAE_all": 0.1411273181438446, + "eval_icons_MAE_h": 0.08707606792449951, + "eval_icons_MAE_w": 0.1034458726644516, + "eval_icons_MAE_x_boxes": 0.114631537348032, + "eval_icons_MAE_y_boxes": 0.14064187556505203, + "eval_icons_NUM_probability": 0.999829113483429, + "eval_icons_inside_bbox": 0.7517361044883728, + "eval_icons_loss": 2.604473352432251, + "eval_icons_loss_ce": 8.474261176161235e-05, + "eval_icons_loss_iou": 0.9423828125, + "eval_icons_loss_num": 0.14340972900390625, + "eval_icons_loss_xval": 2.60400390625, + "eval_icons_runtime": 67.5402, + "eval_icons_samples_per_second": 0.74, + "eval_icons_steps_per_second": 0.03, + "num_input_tokens_seen": 100366704, + "step": 1500 + }, + { + "epoch": 0.1702127659574468, + "eval_screenspot_CIoU": 0.40785400072733563, + "eval_screenspot_GIoU": 0.3994733989238739, + "eval_screenspot_IoU": 0.478279451529185, + "eval_screenspot_MAE_all": 0.13809962073961893, + "eval_screenspot_MAE_h": 0.07747493435939153, + "eval_screenspot_MAE_w": 0.16229389111200967, + "eval_screenspot_MAE_x_boxes": 0.19194862246513367, + "eval_screenspot_MAE_y_boxes": 0.08692146092653275, + "eval_screenspot_NUM_probability": 0.9997092882792155, + "eval_screenspot_inside_bbox": 0.7487499912579855, + "eval_screenspot_loss": 2.6195950508117676, + "eval_screenspot_loss_ce": 0.007849094613144795, + "eval_screenspot_loss_iou": 0.9772135416666666, + "eval_screenspot_loss_num": 0.15012613932291666, + "eval_screenspot_loss_xval": 2.7041015625, + "eval_screenspot_runtime": 119.7076, + "eval_screenspot_samples_per_second": 0.743, + "eval_screenspot_steps_per_second": 0.025, + "num_input_tokens_seen": 100366704, + "step": 1500 + }, + { + "epoch": 0.1702127659574468, + "eval_compot_CIoU": 0.34233179688453674, + "eval_compot_GIoU": 0.2934027910232544, + "eval_compot_IoU": 0.4338478147983551, + "eval_compot_MAE_all": 0.17414939403533936, + "eval_compot_MAE_h": 0.057375501841306686, + "eval_compot_MAE_w": 0.19616135954856873, + "eval_compot_MAE_x_boxes": 0.16620351374149323, + "eval_compot_MAE_y_boxes": 0.1773752123117447, + "eval_compot_NUM_probability": 0.9995885789394379, + "eval_compot_inside_bbox": 0.53125, + "eval_compot_loss": 2.766892433166504, + "eval_compot_loss_ce": 0.0070859589613974094, + "eval_compot_loss_iou": 0.969970703125, + "eval_compot_loss_num": 0.1533355712890625, + "eval_compot_loss_xval": 2.7041015625, + "eval_compot_runtime": 77.132, + "eval_compot_samples_per_second": 0.648, + "eval_compot_steps_per_second": 0.026, + "num_input_tokens_seen": 100366704, + "step": 1500 + }, + { + "epoch": 0.1702127659574468, + "loss": 2.7952966690063477, + "loss_ce": 0.0062339892610907555, + "loss_iou": 0.94921875, + "loss_num": 0.177734375, + "loss_xval": 2.78125, + "num_input_tokens_seen": 100366704, + "step": 1500 + }, + { + "epoch": 0.17032624113475178, + "grad_norm": 13.033895492553711, + "learning_rate": 5e-05, + "loss": 1.3904, + "num_input_tokens_seen": 100433388, + "step": 1501 + }, + { + "epoch": 0.17032624113475178, + "loss": 1.370243787765503, + "loss_ce": 0.0047348435036838055, + "loss_iou": 0.59375, + "loss_num": 0.034912109375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 100433388, + "step": 1501 + }, + { + "epoch": 0.17043971631205673, + "grad_norm": 42.985740661621094, + "learning_rate": 5e-05, + "loss": 1.1519, + "num_input_tokens_seen": 100499464, + "step": 1502 + }, + { + "epoch": 0.17043971631205673, + "loss": 1.3439195156097412, + "loss_ce": 0.006517224013805389, + "loss_iou": 0.578125, + "loss_num": 0.03564453125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 100499464, + "step": 1502 + }, + { + "epoch": 0.1705531914893617, + "grad_norm": 39.53863525390625, + "learning_rate": 5e-05, + "loss": 1.3971, + "num_input_tokens_seen": 100565476, + "step": 1503 + }, + { + "epoch": 0.1705531914893617, + "loss": 1.127745270729065, + "loss_ce": 0.005674978718161583, + "loss_iou": 0.51953125, + "loss_num": 0.0159912109375, + "loss_xval": 1.125, + "num_input_tokens_seen": 100565476, + "step": 1503 + }, + { + "epoch": 0.17066666666666666, + "grad_norm": 25.246204376220703, + "learning_rate": 5e-05, + "loss": 1.4776, + "num_input_tokens_seen": 100632964, + "step": 1504 + }, + { + "epoch": 0.17066666666666666, + "loss": 1.4023921489715576, + "loss_ce": 0.0029780245386064053, + "loss_iou": 0.6015625, + "loss_num": 0.039794921875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 100632964, + "step": 1504 + }, + { + "epoch": 0.17078014184397164, + "grad_norm": 18.882165908813477, + "learning_rate": 5e-05, + "loss": 1.205, + "num_input_tokens_seen": 100698828, + "step": 1505 + }, + { + "epoch": 0.17078014184397164, + "loss": 1.1988525390625, + "loss_ce": 0.005981383845210075, + "loss_iou": 0.5078125, + "loss_num": 0.03466796875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 100698828, + "step": 1505 + }, + { + "epoch": 0.1708936170212766, + "grad_norm": 20.793636322021484, + "learning_rate": 5e-05, + "loss": 1.584, + "num_input_tokens_seen": 100765264, + "step": 1506 + }, + { + "epoch": 0.1708936170212766, + "loss": 1.6270403861999512, + "loss_ce": 0.006923207081854343, + "loss_iou": 0.6796875, + "loss_num": 0.052734375, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 100765264, + "step": 1506 + }, + { + "epoch": 0.17100709219858157, + "grad_norm": 23.41921615600586, + "learning_rate": 5e-05, + "loss": 1.3277, + "num_input_tokens_seen": 100832380, + "step": 1507 + }, + { + "epoch": 0.17100709219858157, + "loss": 1.3111693859100342, + "loss_ce": 0.009411489591002464, + "loss_iou": 0.52734375, + "loss_num": 0.050048828125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 100832380, + "step": 1507 + }, + { + "epoch": 0.17112056737588652, + "grad_norm": 21.658309936523438, + "learning_rate": 5e-05, + "loss": 1.6362, + "num_input_tokens_seen": 100900644, + "step": 1508 + }, + { + "epoch": 0.17112056737588652, + "loss": 1.693943977355957, + "loss_ce": 0.006444046273827553, + "loss_iou": 0.734375, + "loss_num": 0.04443359375, + "loss_xval": 1.6875, + "num_input_tokens_seen": 100900644, + "step": 1508 + }, + { + "epoch": 0.1712340425531915, + "grad_norm": 24.176517486572266, + "learning_rate": 5e-05, + "loss": 1.3698, + "num_input_tokens_seen": 100967908, + "step": 1509 + }, + { + "epoch": 0.1712340425531915, + "loss": 1.2965668439865112, + "loss_ce": 0.003598080947995186, + "loss_iou": 0.52734375, + "loss_num": 0.047119140625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 100967908, + "step": 1509 + }, + { + "epoch": 0.17134751773049645, + "grad_norm": 33.627044677734375, + "learning_rate": 5e-05, + "loss": 1.5141, + "num_input_tokens_seen": 101035932, + "step": 1510 + }, + { + "epoch": 0.17134751773049645, + "loss": 1.5637872219085693, + "loss_ce": 0.00226379930973053, + "loss_iou": 0.6640625, + "loss_num": 0.047119140625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 101035932, + "step": 1510 + }, + { + "epoch": 0.17146099290780142, + "grad_norm": 38.3928108215332, + "learning_rate": 5e-05, + "loss": 1.3726, + "num_input_tokens_seen": 101102860, + "step": 1511 + }, + { + "epoch": 0.17146099290780142, + "loss": 1.1861345767974854, + "loss_ce": 0.004493927117437124, + "loss_iou": 0.53515625, + "loss_num": 0.021728515625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 101102860, + "step": 1511 + }, + { + "epoch": 0.17157446808510637, + "grad_norm": 20.477628707885742, + "learning_rate": 5e-05, + "loss": 1.6226, + "num_input_tokens_seen": 101169992, + "step": 1512 + }, + { + "epoch": 0.17157446808510637, + "loss": 1.9075367450714111, + "loss_ce": 0.0032398924231529236, + "loss_iou": 0.77734375, + "loss_num": 0.06982421875, + "loss_xval": 1.90625, + "num_input_tokens_seen": 101169992, + "step": 1512 + }, + { + "epoch": 0.17168794326241135, + "grad_norm": 16.404205322265625, + "learning_rate": 5e-05, + "loss": 1.3741, + "num_input_tokens_seen": 101236692, + "step": 1513 + }, + { + "epoch": 0.17168794326241135, + "loss": 1.1107193231582642, + "loss_ce": 0.007356285583227873, + "loss_iou": 0.45703125, + "loss_num": 0.0380859375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 101236692, + "step": 1513 + }, + { + "epoch": 0.1718014184397163, + "grad_norm": 33.833229064941406, + "learning_rate": 5e-05, + "loss": 1.4369, + "num_input_tokens_seen": 101303972, + "step": 1514 + }, + { + "epoch": 0.1718014184397163, + "loss": 1.5798323154449463, + "loss_ce": 0.007566617336124182, + "loss_iou": 0.66796875, + "loss_num": 0.048095703125, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 101303972, + "step": 1514 + }, + { + "epoch": 0.17191489361702128, + "grad_norm": 19.746000289916992, + "learning_rate": 5e-05, + "loss": 1.4642, + "num_input_tokens_seen": 101369640, + "step": 1515 + }, + { + "epoch": 0.17191489361702128, + "loss": 1.5444402694702148, + "loss_ce": 0.0024481453001499176, + "loss_iou": 0.71875, + "loss_num": 0.0213623046875, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 101369640, + "step": 1515 + }, + { + "epoch": 0.17202836879432623, + "grad_norm": 31.26228141784668, + "learning_rate": 5e-05, + "loss": 1.2943, + "num_input_tokens_seen": 101435884, + "step": 1516 + }, + { + "epoch": 0.17202836879432623, + "loss": 1.1129732131958008, + "loss_ce": 0.002133413450792432, + "loss_iou": 0.474609375, + "loss_num": 0.03271484375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 101435884, + "step": 1516 + }, + { + "epoch": 0.1721418439716312, + "grad_norm": 21.49285316467285, + "learning_rate": 5e-05, + "loss": 1.3976, + "num_input_tokens_seen": 101501132, + "step": 1517 + }, + { + "epoch": 0.1721418439716312, + "loss": 1.3990650177001953, + "loss_ce": 0.007463391404598951, + "loss_iou": 0.5234375, + "loss_num": 0.06884765625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 101501132, + "step": 1517 + }, + { + "epoch": 0.17225531914893616, + "grad_norm": 20.566347122192383, + "learning_rate": 5e-05, + "loss": 1.2773, + "num_input_tokens_seen": 101567804, + "step": 1518 + }, + { + "epoch": 0.17225531914893616, + "loss": 1.410090446472168, + "loss_ce": 0.0033522313460707664, + "loss_iou": 0.59375, + "loss_num": 0.0439453125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 101567804, + "step": 1518 + }, + { + "epoch": 0.17236879432624114, + "grad_norm": 20.690387725830078, + "learning_rate": 5e-05, + "loss": 1.257, + "num_input_tokens_seen": 101634232, + "step": 1519 + }, + { + "epoch": 0.17236879432624114, + "loss": 1.3347196578979492, + "loss_ce": 0.00415330845862627, + "loss_iou": 0.5625, + "loss_num": 0.040771484375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 101634232, + "step": 1519 + }, + { + "epoch": 0.1724822695035461, + "grad_norm": 26.46270751953125, + "learning_rate": 5e-05, + "loss": 1.2988, + "num_input_tokens_seen": 101701228, + "step": 1520 + }, + { + "epoch": 0.1724822695035461, + "loss": 1.4172112941741943, + "loss_ce": 0.007054985500872135, + "loss_iou": 0.6015625, + "loss_num": 0.041748046875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 101701228, + "step": 1520 + }, + { + "epoch": 0.17259574468085107, + "grad_norm": 29.239194869995117, + "learning_rate": 5e-05, + "loss": 1.4363, + "num_input_tokens_seen": 101768212, + "step": 1521 + }, + { + "epoch": 0.17259574468085107, + "loss": 1.5420794486999512, + "loss_ce": 0.0020403843373060226, + "loss_iou": 0.66796875, + "loss_num": 0.041015625, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 101768212, + "step": 1521 + }, + { + "epoch": 0.17270921985815602, + "grad_norm": 21.097164154052734, + "learning_rate": 5e-05, + "loss": 1.5003, + "num_input_tokens_seen": 101835988, + "step": 1522 + }, + { + "epoch": 0.17270921985815602, + "loss": 1.700077772140503, + "loss_ce": 0.0037887198850512505, + "loss_iou": 0.70703125, + "loss_num": 0.056396484375, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 101835988, + "step": 1522 + }, + { + "epoch": 0.172822695035461, + "grad_norm": 52.254638671875, + "learning_rate": 5e-05, + "loss": 1.4376, + "num_input_tokens_seen": 101902380, + "step": 1523 + }, + { + "epoch": 0.172822695035461, + "loss": 1.4069581031799316, + "loss_ce": 0.007544109597802162, + "loss_iou": 0.578125, + "loss_num": 0.048828125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 101902380, + "step": 1523 + }, + { + "epoch": 0.17293617021276594, + "grad_norm": 26.20392417907715, + "learning_rate": 5e-05, + "loss": 1.3332, + "num_input_tokens_seen": 101970116, + "step": 1524 + }, + { + "epoch": 0.17293617021276594, + "loss": 1.159214973449707, + "loss_ce": 0.002476735273376107, + "loss_iou": 0.484375, + "loss_num": 0.03759765625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 101970116, + "step": 1524 + }, + { + "epoch": 0.17304964539007092, + "grad_norm": 17.964616775512695, + "learning_rate": 5e-05, + "loss": 1.3077, + "num_input_tokens_seen": 102037416, + "step": 1525 + }, + { + "epoch": 0.17304964539007092, + "loss": 1.4234004020690918, + "loss_ce": 0.0029902211390435696, + "loss_iou": 0.61328125, + "loss_num": 0.038330078125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 102037416, + "step": 1525 + }, + { + "epoch": 0.1731631205673759, + "grad_norm": 28.4901065826416, + "learning_rate": 5e-05, + "loss": 1.5805, + "num_input_tokens_seen": 102103832, + "step": 1526 + }, + { + "epoch": 0.1731631205673759, + "loss": 1.583054542541504, + "loss_ce": 0.011765414848923683, + "loss_iou": 0.64453125, + "loss_num": 0.05712890625, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 102103832, + "step": 1526 + }, + { + "epoch": 0.17327659574468085, + "grad_norm": 27.712158203125, + "learning_rate": 5e-05, + "loss": 1.4612, + "num_input_tokens_seen": 102170452, + "step": 1527 + }, + { + "epoch": 0.17327659574468085, + "loss": 1.4655671119689941, + "loss_ce": 0.005606204271316528, + "loss_iou": 0.6328125, + "loss_num": 0.038818359375, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 102170452, + "step": 1527 + }, + { + "epoch": 0.17339007092198583, + "grad_norm": 23.574155807495117, + "learning_rate": 5e-05, + "loss": 1.675, + "num_input_tokens_seen": 102237708, + "step": 1528 + }, + { + "epoch": 0.17339007092198583, + "loss": 1.6939843893051147, + "loss_ce": 0.00257816887460649, + "loss_iou": 0.73046875, + "loss_num": 0.04638671875, + "loss_xval": 1.6875, + "num_input_tokens_seen": 102237708, + "step": 1528 + }, + { + "epoch": 0.17350354609929078, + "grad_norm": 20.531627655029297, + "learning_rate": 5e-05, + "loss": 1.2815, + "num_input_tokens_seen": 102304864, + "step": 1529 + }, + { + "epoch": 0.17350354609929078, + "loss": 1.239065408706665, + "loss_ce": 0.006155246868729591, + "loss_iou": 0.515625, + "loss_num": 0.039794921875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 102304864, + "step": 1529 + }, + { + "epoch": 0.17361702127659576, + "grad_norm": 20.82017707824707, + "learning_rate": 5e-05, + "loss": 1.6308, + "num_input_tokens_seen": 102370724, + "step": 1530 + }, + { + "epoch": 0.17361702127659576, + "loss": 1.6285228729248047, + "loss_ce": 0.004499472677707672, + "loss_iou": 0.67578125, + "loss_num": 0.05419921875, + "loss_xval": 1.625, + "num_input_tokens_seen": 102370724, + "step": 1530 + }, + { + "epoch": 0.1737304964539007, + "grad_norm": 25.09425163269043, + "learning_rate": 5e-05, + "loss": 1.346, + "num_input_tokens_seen": 102436776, + "step": 1531 + }, + { + "epoch": 0.1737304964539007, + "loss": 1.2662006616592407, + "loss_ce": 0.003993619699031115, + "loss_iou": 0.4765625, + "loss_num": 0.06201171875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 102436776, + "step": 1531 + }, + { + "epoch": 0.17384397163120568, + "grad_norm": 20.524301528930664, + "learning_rate": 5e-05, + "loss": 1.5523, + "num_input_tokens_seen": 102503644, + "step": 1532 + }, + { + "epoch": 0.17384397163120568, + "loss": 1.6848822832107544, + "loss_ce": 0.008124417625367641, + "loss_iou": 0.70703125, + "loss_num": 0.05224609375, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 102503644, + "step": 1532 + }, + { + "epoch": 0.17395744680851064, + "grad_norm": 32.57455062866211, + "learning_rate": 5e-05, + "loss": 1.4208, + "num_input_tokens_seen": 102571768, + "step": 1533 + }, + { + "epoch": 0.17395744680851064, + "loss": 1.2352406978607178, + "loss_ce": 0.00770165678113699, + "loss_iou": 0.51953125, + "loss_num": 0.03759765625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 102571768, + "step": 1533 + }, + { + "epoch": 0.1740709219858156, + "grad_norm": 22.117778778076172, + "learning_rate": 5e-05, + "loss": 1.3857, + "num_input_tokens_seen": 102639880, + "step": 1534 + }, + { + "epoch": 0.1740709219858156, + "loss": 1.3138737678527832, + "loss_ce": 0.0062565309926867485, + "loss_iou": 0.578125, + "loss_num": 0.02978515625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 102639880, + "step": 1534 + }, + { + "epoch": 0.17418439716312056, + "grad_norm": 44.69984436035156, + "learning_rate": 5e-05, + "loss": 1.5093, + "num_input_tokens_seen": 102707764, + "step": 1535 + }, + { + "epoch": 0.17418439716312056, + "loss": 1.392115592956543, + "loss_ce": 0.003932053688913584, + "loss_iou": 0.61328125, + "loss_num": 0.03271484375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 102707764, + "step": 1535 + }, + { + "epoch": 0.17429787234042554, + "grad_norm": 20.79380989074707, + "learning_rate": 5e-05, + "loss": 1.7761, + "num_input_tokens_seen": 102776036, + "step": 1536 + }, + { + "epoch": 0.17429787234042554, + "loss": 1.7945234775543213, + "loss_ce": 0.0035078157670795918, + "loss_iou": 0.74609375, + "loss_num": 0.059326171875, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 102776036, + "step": 1536 + }, + { + "epoch": 0.1744113475177305, + "grad_norm": 50.28203201293945, + "learning_rate": 5e-05, + "loss": 1.3236, + "num_input_tokens_seen": 102843080, + "step": 1537 + }, + { + "epoch": 0.1744113475177305, + "loss": 1.311899185180664, + "loss_ce": 0.004282089881598949, + "loss_iou": 0.53125, + "loss_num": 0.04833984375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 102843080, + "step": 1537 + }, + { + "epoch": 0.17452482269503547, + "grad_norm": 24.215293884277344, + "learning_rate": 5e-05, + "loss": 1.24, + "num_input_tokens_seen": 102911080, + "step": 1538 + }, + { + "epoch": 0.17452482269503547, + "loss": 1.1315233707427979, + "loss_ce": 0.007011712528765202, + "loss_iou": 0.50390625, + "loss_num": 0.0238037109375, + "loss_xval": 1.125, + "num_input_tokens_seen": 102911080, + "step": 1538 + }, + { + "epoch": 0.17463829787234042, + "grad_norm": 21.632749557495117, + "learning_rate": 5e-05, + "loss": 1.1494, + "num_input_tokens_seen": 102976844, + "step": 1539 + }, + { + "epoch": 0.17463829787234042, + "loss": 1.0484594106674194, + "loss_ce": 0.0025610127486288548, + "loss_iou": 0.484375, + "loss_num": 0.015869140625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 102976844, + "step": 1539 + }, + { + "epoch": 0.1747517730496454, + "grad_norm": 19.359525680541992, + "learning_rate": 5e-05, + "loss": 1.479, + "num_input_tokens_seen": 103042616, + "step": 1540 + }, + { + "epoch": 0.1747517730496454, + "loss": 1.3325066566467285, + "loss_ce": 0.009997007437050343, + "loss_iou": 0.51953125, + "loss_num": 0.056640625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 103042616, + "step": 1540 + }, + { + "epoch": 0.17486524822695035, + "grad_norm": 29.135215759277344, + "learning_rate": 5e-05, + "loss": 1.4372, + "num_input_tokens_seen": 103109624, + "step": 1541 + }, + { + "epoch": 0.17486524822695035, + "loss": 1.5098817348480225, + "loss_ce": 0.00792872253805399, + "loss_iou": 0.6484375, + "loss_num": 0.04052734375, + "loss_xval": 1.5, + "num_input_tokens_seen": 103109624, + "step": 1541 + }, + { + "epoch": 0.17497872340425533, + "grad_norm": 20.974767684936523, + "learning_rate": 5e-05, + "loss": 1.6145, + "num_input_tokens_seen": 103177316, + "step": 1542 + }, + { + "epoch": 0.17497872340425533, + "loss": 1.654951572418213, + "loss_ce": 0.004560940898954868, + "loss_iou": 0.7109375, + "loss_num": 0.04541015625, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 103177316, + "step": 1542 + }, + { + "epoch": 0.17509219858156028, + "grad_norm": 17.895061492919922, + "learning_rate": 5e-05, + "loss": 1.5047, + "num_input_tokens_seen": 103243776, + "step": 1543 + }, + { + "epoch": 0.17509219858156028, + "loss": 1.3031506538391113, + "loss_ce": 0.0050549590960145, + "loss_iou": 0.55859375, + "loss_num": 0.035888671875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 103243776, + "step": 1543 + }, + { + "epoch": 0.17520567375886525, + "grad_norm": 23.739065170288086, + "learning_rate": 5e-05, + "loss": 1.4959, + "num_input_tokens_seen": 103310236, + "step": 1544 + }, + { + "epoch": 0.17520567375886525, + "loss": 1.508547067642212, + "loss_ce": 0.004640812985599041, + "loss_iou": 0.61328125, + "loss_num": 0.054931640625, + "loss_xval": 1.5, + "num_input_tokens_seen": 103310236, + "step": 1544 + }, + { + "epoch": 0.1753191489361702, + "grad_norm": 37.03984451293945, + "learning_rate": 5e-05, + "loss": 1.4602, + "num_input_tokens_seen": 103377172, + "step": 1545 + }, + { + "epoch": 0.1753191489361702, + "loss": 1.4451167583465576, + "loss_ce": 0.0061518787406384945, + "loss_iou": 0.62109375, + "loss_num": 0.038818359375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 103377172, + "step": 1545 + }, + { + "epoch": 0.17543262411347518, + "grad_norm": 18.83721351623535, + "learning_rate": 5e-05, + "loss": 1.8077, + "num_input_tokens_seen": 103444900, + "step": 1546 + }, + { + "epoch": 0.17543262411347518, + "loss": 1.7299394607543945, + "loss_ce": 0.0072832065634429455, + "loss_iou": 0.75390625, + "loss_num": 0.04345703125, + "loss_xval": 1.71875, + "num_input_tokens_seen": 103444900, + "step": 1546 + }, + { + "epoch": 0.17554609929078013, + "grad_norm": 24.66327476501465, + "learning_rate": 5e-05, + "loss": 1.4043, + "num_input_tokens_seen": 103510696, + "step": 1547 + }, + { + "epoch": 0.17554609929078013, + "loss": 1.5850989818572998, + "loss_ce": 0.009903731755912304, + "loss_iou": 0.6484375, + "loss_num": 0.0556640625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 103510696, + "step": 1547 + }, + { + "epoch": 0.1756595744680851, + "grad_norm": 19.281034469604492, + "learning_rate": 5e-05, + "loss": 1.4674, + "num_input_tokens_seen": 103576512, + "step": 1548 + }, + { + "epoch": 0.1756595744680851, + "loss": 1.319642186164856, + "loss_ce": 0.004212433472275734, + "loss_iou": 0.58203125, + "loss_num": 0.0306396484375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 103576512, + "step": 1548 + }, + { + "epoch": 0.17577304964539006, + "grad_norm": 37.54100799560547, + "learning_rate": 5e-05, + "loss": 1.2807, + "num_input_tokens_seen": 103643764, + "step": 1549 + }, + { + "epoch": 0.17577304964539006, + "loss": 1.3876194953918457, + "loss_ce": 0.004135646857321262, + "loss_iou": 0.578125, + "loss_num": 0.044921875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 103643764, + "step": 1549 + }, + { + "epoch": 0.17588652482269504, + "grad_norm": 36.58296585083008, + "learning_rate": 5e-05, + "loss": 1.4851, + "num_input_tokens_seen": 103710964, + "step": 1550 + }, + { + "epoch": 0.17588652482269504, + "loss": 1.4166626930236816, + "loss_ce": 0.007482909131795168, + "loss_iou": 0.59375, + "loss_num": 0.0439453125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 103710964, + "step": 1550 + }, + { + "epoch": 0.176, + "grad_norm": 20.313186645507812, + "learning_rate": 5e-05, + "loss": 1.6898, + "num_input_tokens_seen": 103778804, + "step": 1551 + }, + { + "epoch": 0.176, + "loss": 1.5915627479553223, + "loss_ce": 0.007578337099403143, + "loss_iou": 0.71484375, + "loss_num": 0.031005859375, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 103778804, + "step": 1551 + }, + { + "epoch": 0.17611347517730497, + "grad_norm": 28.149049758911133, + "learning_rate": 5e-05, + "loss": 1.2242, + "num_input_tokens_seen": 103846112, + "step": 1552 + }, + { + "epoch": 0.17611347517730497, + "loss": 1.318758249282837, + "loss_ce": 0.0043051717802882195, + "loss_iou": 0.56640625, + "loss_num": 0.0361328125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 103846112, + "step": 1552 + }, + { + "epoch": 0.17622695035460992, + "grad_norm": 21.607681274414062, + "learning_rate": 5e-05, + "loss": 1.2749, + "num_input_tokens_seen": 103913068, + "step": 1553 + }, + { + "epoch": 0.17622695035460992, + "loss": 1.2999745607376099, + "loss_ce": 0.0030995653942227364, + "loss_iou": 0.515625, + "loss_num": 0.05224609375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 103913068, + "step": 1553 + }, + { + "epoch": 0.1763404255319149, + "grad_norm": 26.347761154174805, + "learning_rate": 5e-05, + "loss": 1.5394, + "num_input_tokens_seen": 103980512, + "step": 1554 + }, + { + "epoch": 0.1763404255319149, + "loss": 1.5537410974502563, + "loss_ce": 0.005889576859772205, + "loss_iou": 0.65625, + "loss_num": 0.04638671875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 103980512, + "step": 1554 + }, + { + "epoch": 0.17645390070921985, + "grad_norm": 61.287437438964844, + "learning_rate": 5e-05, + "loss": 1.3947, + "num_input_tokens_seen": 104046848, + "step": 1555 + }, + { + "epoch": 0.17645390070921985, + "loss": 1.4635112285614014, + "loss_ce": 0.005503435619175434, + "loss_iou": 0.625, + "loss_num": 0.0419921875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 104046848, + "step": 1555 + }, + { + "epoch": 0.17656737588652482, + "grad_norm": 19.37053680419922, + "learning_rate": 5e-05, + "loss": 1.5311, + "num_input_tokens_seen": 104111688, + "step": 1556 + }, + { + "epoch": 0.17656737588652482, + "loss": 1.6245869398117065, + "loss_ce": 0.002516669686883688, + "loss_iou": 0.6875, + "loss_num": 0.050048828125, + "loss_xval": 1.625, + "num_input_tokens_seen": 104111688, + "step": 1556 + }, + { + "epoch": 0.17668085106382977, + "grad_norm": 13.40495491027832, + "learning_rate": 5e-05, + "loss": 1.347, + "num_input_tokens_seen": 104178864, + "step": 1557 + }, + { + "epoch": 0.17668085106382977, + "loss": 1.4340254068374634, + "loss_ce": 0.004826278425753117, + "loss_iou": 0.609375, + "loss_num": 0.042236328125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 104178864, + "step": 1557 + }, + { + "epoch": 0.17679432624113475, + "grad_norm": 24.19254493713379, + "learning_rate": 5e-05, + "loss": 1.422, + "num_input_tokens_seen": 104245464, + "step": 1558 + }, + { + "epoch": 0.17679432624113475, + "loss": 1.4616801738739014, + "loss_ce": 0.006602123379707336, + "loss_iou": 0.58984375, + "loss_num": 0.055419921875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 104245464, + "step": 1558 + }, + { + "epoch": 0.1769078014184397, + "grad_norm": 19.23759651184082, + "learning_rate": 5e-05, + "loss": 1.3137, + "num_input_tokens_seen": 104312444, + "step": 1559 + }, + { + "epoch": 0.1769078014184397, + "loss": 1.3542349338531494, + "loss_ce": 0.004625614266842604, + "loss_iou": 0.62890625, + "loss_num": 0.01806640625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 104312444, + "step": 1559 + }, + { + "epoch": 0.17702127659574468, + "grad_norm": 23.2930908203125, + "learning_rate": 5e-05, + "loss": 1.3894, + "num_input_tokens_seen": 104379084, + "step": 1560 + }, + { + "epoch": 0.17702127659574468, + "loss": 1.3019061088562012, + "loss_ce": 0.007472492754459381, + "loss_iou": 0.5625, + "loss_num": 0.034423828125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 104379084, + "step": 1560 + }, + { + "epoch": 0.17713475177304966, + "grad_norm": 25.223392486572266, + "learning_rate": 5e-05, + "loss": 1.4751, + "num_input_tokens_seen": 104445952, + "step": 1561 + }, + { + "epoch": 0.17713475177304966, + "loss": 1.5452046394348145, + "loss_ce": 0.007118617184460163, + "loss_iou": 0.6484375, + "loss_num": 0.0478515625, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 104445952, + "step": 1561 + }, + { + "epoch": 0.1772482269503546, + "grad_norm": 25.25143814086914, + "learning_rate": 5e-05, + "loss": 1.2798, + "num_input_tokens_seen": 104513280, + "step": 1562 + }, + { + "epoch": 0.1772482269503546, + "loss": 1.4744794368743896, + "loss_ce": 0.007682641036808491, + "loss_iou": 0.6328125, + "loss_num": 0.041015625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 104513280, + "step": 1562 + }, + { + "epoch": 0.1773617021276596, + "grad_norm": 21.50188446044922, + "learning_rate": 5e-05, + "loss": 1.6016, + "num_input_tokens_seen": 104579332, + "step": 1563 + }, + { + "epoch": 0.1773617021276596, + "loss": 1.4093120098114014, + "loss_ce": 0.00593066168949008, + "loss_iou": 0.5859375, + "loss_num": 0.04541015625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 104579332, + "step": 1563 + }, + { + "epoch": 0.17747517730496454, + "grad_norm": 18.70534896850586, + "learning_rate": 5e-05, + "loss": 1.1562, + "num_input_tokens_seen": 104645500, + "step": 1564 + }, + { + "epoch": 0.17747517730496454, + "loss": 1.3231450319290161, + "loss_ce": 0.006250503472983837, + "loss_iou": 0.56640625, + "loss_num": 0.037109375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 104645500, + "step": 1564 + }, + { + "epoch": 0.17758865248226952, + "grad_norm": 17.698619842529297, + "learning_rate": 5e-05, + "loss": 1.4059, + "num_input_tokens_seen": 104712412, + "step": 1565 + }, + { + "epoch": 0.17758865248226952, + "loss": 1.3468880653381348, + "loss_ce": 0.0026497396174818277, + "loss_iou": 0.57421875, + "loss_num": 0.038818359375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 104712412, + "step": 1565 + }, + { + "epoch": 0.17770212765957447, + "grad_norm": 31.243925094604492, + "learning_rate": 5e-05, + "loss": 1.2683, + "num_input_tokens_seen": 104778808, + "step": 1566 + }, + { + "epoch": 0.17770212765957447, + "loss": 1.5631437301635742, + "loss_ce": 0.00845616776496172, + "loss_iou": 0.66015625, + "loss_num": 0.04736328125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 104778808, + "step": 1566 + }, + { + "epoch": 0.17781560283687944, + "grad_norm": 19.322399139404297, + "learning_rate": 5e-05, + "loss": 1.8222, + "num_input_tokens_seen": 104845200, + "step": 1567 + }, + { + "epoch": 0.17781560283687944, + "loss": 1.7529610395431519, + "loss_ce": 0.007843857631087303, + "loss_iou": 0.7421875, + "loss_num": 0.05224609375, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 104845200, + "step": 1567 + }, + { + "epoch": 0.1779290780141844, + "grad_norm": 35.93505859375, + "learning_rate": 5e-05, + "loss": 1.1999, + "num_input_tokens_seen": 104912244, + "step": 1568 + }, + { + "epoch": 0.1779290780141844, + "loss": 1.0633591413497925, + "loss_ce": 0.005741942208260298, + "loss_iou": 0.46875, + "loss_num": 0.0240478515625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 104912244, + "step": 1568 + }, + { + "epoch": 0.17804255319148937, + "grad_norm": 33.91901397705078, + "learning_rate": 5e-05, + "loss": 1.4044, + "num_input_tokens_seen": 104979480, + "step": 1569 + }, + { + "epoch": 0.17804255319148937, + "loss": 1.2838482856750488, + "loss_ce": 0.0021100384183228016, + "loss_iou": 0.57421875, + "loss_num": 0.0267333984375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 104979480, + "step": 1569 + }, + { + "epoch": 0.17815602836879432, + "grad_norm": 21.137296676635742, + "learning_rate": 5e-05, + "loss": 1.3878, + "num_input_tokens_seen": 105045348, + "step": 1570 + }, + { + "epoch": 0.17815602836879432, + "loss": 1.34056556224823, + "loss_ce": 0.005604610312730074, + "loss_iou": 0.59765625, + "loss_num": 0.0286865234375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 105045348, + "step": 1570 + }, + { + "epoch": 0.1782695035460993, + "grad_norm": 16.44196319580078, + "learning_rate": 5e-05, + "loss": 1.3784, + "num_input_tokens_seen": 105111000, + "step": 1571 + }, + { + "epoch": 0.1782695035460993, + "loss": 1.4577686786651611, + "loss_ce": 0.01001480221748352, + "loss_iou": 0.6640625, + "loss_num": 0.024658203125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 105111000, + "step": 1571 + }, + { + "epoch": 0.17838297872340425, + "grad_norm": 22.55048179626465, + "learning_rate": 5e-05, + "loss": 1.4175, + "num_input_tokens_seen": 105178704, + "step": 1572 + }, + { + "epoch": 0.17838297872340425, + "loss": 1.4241247177124023, + "loss_ce": 0.002737964503467083, + "loss_iou": 0.59375, + "loss_num": 0.046630859375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 105178704, + "step": 1572 + }, + { + "epoch": 0.17849645390070923, + "grad_norm": 23.27818489074707, + "learning_rate": 5e-05, + "loss": 1.3013, + "num_input_tokens_seen": 105246164, + "step": 1573 + }, + { + "epoch": 0.17849645390070923, + "loss": 1.1992452144622803, + "loss_ce": 0.010768728330731392, + "loss_iou": 0.52734375, + "loss_num": 0.0262451171875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 105246164, + "step": 1573 + }, + { + "epoch": 0.17860992907801418, + "grad_norm": 40.867515563964844, + "learning_rate": 5e-05, + "loss": 1.3575, + "num_input_tokens_seen": 105312812, + "step": 1574 + }, + { + "epoch": 0.17860992907801418, + "loss": 1.1291719675064087, + "loss_ce": 0.0035616070963442326, + "loss_iou": 0.490234375, + "loss_num": 0.029052734375, + "loss_xval": 1.125, + "num_input_tokens_seen": 105312812, + "step": 1574 + }, + { + "epoch": 0.17872340425531916, + "grad_norm": 21.090496063232422, + "learning_rate": 5e-05, + "loss": 1.8006, + "num_input_tokens_seen": 105380540, + "step": 1575 + }, + { + "epoch": 0.17872340425531916, + "loss": 1.6639091968536377, + "loss_ce": 0.0057060373947024345, + "loss_iou": 0.70703125, + "loss_num": 0.048583984375, + "loss_xval": 1.65625, + "num_input_tokens_seen": 105380540, + "step": 1575 + }, + { + "epoch": 0.1788368794326241, + "grad_norm": 15.411458969116211, + "learning_rate": 5e-05, + "loss": 1.3822, + "num_input_tokens_seen": 105447156, + "step": 1576 + }, + { + "epoch": 0.1788368794326241, + "loss": 1.4599699974060059, + "loss_ce": 0.0039152828976511955, + "loss_iou": 0.58203125, + "loss_num": 0.05859375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 105447156, + "step": 1576 + }, + { + "epoch": 0.17895035460992909, + "grad_norm": 33.3551025390625, + "learning_rate": 5e-05, + "loss": 1.4828, + "num_input_tokens_seen": 105513904, + "step": 1577 + }, + { + "epoch": 0.17895035460992909, + "loss": 1.311246633529663, + "loss_ce": 0.004117733798921108, + "loss_iou": 0.55078125, + "loss_num": 0.04150390625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 105513904, + "step": 1577 + }, + { + "epoch": 0.17906382978723404, + "grad_norm": 23.905324935913086, + "learning_rate": 5e-05, + "loss": 1.2157, + "num_input_tokens_seen": 105579944, + "step": 1578 + }, + { + "epoch": 0.17906382978723404, + "loss": 1.147951364517212, + "loss_ce": 0.004396645352244377, + "loss_iou": 0.4921875, + "loss_num": 0.0322265625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 105579944, + "step": 1578 + }, + { + "epoch": 0.179177304964539, + "grad_norm": 28.229076385498047, + "learning_rate": 5e-05, + "loss": 1.6254, + "num_input_tokens_seen": 105646608, + "step": 1579 + }, + { + "epoch": 0.179177304964539, + "loss": 1.8342487812042236, + "loss_ce": 0.004170733969658613, + "loss_iou": 0.73046875, + "loss_num": 0.07421875, + "loss_xval": 1.828125, + "num_input_tokens_seen": 105646608, + "step": 1579 + }, + { + "epoch": 0.17929078014184396, + "grad_norm": 17.80910873413086, + "learning_rate": 5e-05, + "loss": 1.4702, + "num_input_tokens_seen": 105713704, + "step": 1580 + }, + { + "epoch": 0.17929078014184396, + "loss": 1.6703262329101562, + "loss_ce": 0.0043106647208333015, + "loss_iou": 0.71875, + "loss_num": 0.045166015625, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 105713704, + "step": 1580 + }, + { + "epoch": 0.17940425531914894, + "grad_norm": 16.404096603393555, + "learning_rate": 5e-05, + "loss": 1.1524, + "num_input_tokens_seen": 105780800, + "step": 1581 + }, + { + "epoch": 0.17940425531914894, + "loss": 1.3035367727279663, + "loss_ce": 0.004708660300821066, + "loss_iou": 0.5546875, + "loss_num": 0.037109375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 105780800, + "step": 1581 + }, + { + "epoch": 0.1795177304964539, + "grad_norm": 115.88936614990234, + "learning_rate": 5e-05, + "loss": 1.3777, + "num_input_tokens_seen": 105847404, + "step": 1582 + }, + { + "epoch": 0.1795177304964539, + "loss": 1.4820998907089233, + "loss_ce": 0.0026076731737703085, + "loss_iou": 0.6171875, + "loss_num": 0.048583984375, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 105847404, + "step": 1582 + }, + { + "epoch": 0.17963120567375887, + "grad_norm": 19.958423614501953, + "learning_rate": 5e-05, + "loss": 1.2649, + "num_input_tokens_seen": 105913856, + "step": 1583 + }, + { + "epoch": 0.17963120567375887, + "loss": 1.2134511470794678, + "loss_ce": 0.006419820245355368, + "loss_iou": 0.5078125, + "loss_num": 0.0380859375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 105913856, + "step": 1583 + }, + { + "epoch": 0.17974468085106382, + "grad_norm": 17.167400360107422, + "learning_rate": 5e-05, + "loss": 1.2741, + "num_input_tokens_seen": 105980496, + "step": 1584 + }, + { + "epoch": 0.17974468085106382, + "loss": 1.1944483518600464, + "loss_ce": 0.004995248280465603, + "loss_iou": 0.455078125, + "loss_num": 0.055908203125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 105980496, + "step": 1584 + }, + { + "epoch": 0.1798581560283688, + "grad_norm": 18.49695587158203, + "learning_rate": 5e-05, + "loss": 1.2876, + "num_input_tokens_seen": 106047760, + "step": 1585 + }, + { + "epoch": 0.1798581560283688, + "loss": 1.1930568218231201, + "loss_ce": 0.007021688856184483, + "loss_iou": 0.52734375, + "loss_num": 0.0255126953125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 106047760, + "step": 1585 + }, + { + "epoch": 0.17997163120567375, + "grad_norm": 35.536399841308594, + "learning_rate": 5e-05, + "loss": 1.4126, + "num_input_tokens_seen": 106114392, + "step": 1586 + }, + { + "epoch": 0.17997163120567375, + "loss": 1.390749216079712, + "loss_ce": 0.004030501935631037, + "loss_iou": 0.59375, + "loss_num": 0.0400390625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 106114392, + "step": 1586 + }, + { + "epoch": 0.18008510638297873, + "grad_norm": 19.610519409179688, + "learning_rate": 5e-05, + "loss": 1.568, + "num_input_tokens_seen": 106181148, + "step": 1587 + }, + { + "epoch": 0.18008510638297873, + "loss": 1.4541301727294922, + "loss_ce": 0.0029583896975964308, + "loss_iou": 0.6484375, + "loss_num": 0.0308837890625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 106181148, + "step": 1587 + }, + { + "epoch": 0.18019858156028368, + "grad_norm": 18.35902976989746, + "learning_rate": 5e-05, + "loss": 1.308, + "num_input_tokens_seen": 106249004, + "step": 1588 + }, + { + "epoch": 0.18019858156028368, + "loss": 1.1413753032684326, + "loss_ce": 0.004656510427594185, + "loss_iou": 0.515625, + "loss_num": 0.0208740234375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 106249004, + "step": 1588 + }, + { + "epoch": 0.18031205673758866, + "grad_norm": 19.658828735351562, + "learning_rate": 5e-05, + "loss": 1.3198, + "num_input_tokens_seen": 106315720, + "step": 1589 + }, + { + "epoch": 0.18031205673758866, + "loss": 1.3555186986923218, + "loss_ce": 0.020069444552063942, + "loss_iou": 0.5625, + "loss_num": 0.0419921875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 106315720, + "step": 1589 + }, + { + "epoch": 0.1804255319148936, + "grad_norm": 25.360599517822266, + "learning_rate": 5e-05, + "loss": 1.6291, + "num_input_tokens_seen": 106382348, + "step": 1590 + }, + { + "epoch": 0.1804255319148936, + "loss": 1.8529595136642456, + "loss_ce": 0.010186044499278069, + "loss_iou": 0.68359375, + "loss_num": 0.09423828125, + "loss_xval": 1.84375, + "num_input_tokens_seen": 106382348, + "step": 1590 + }, + { + "epoch": 0.18053900709219858, + "grad_norm": 53.093143463134766, + "learning_rate": 5e-05, + "loss": 1.151, + "num_input_tokens_seen": 106448476, + "step": 1591 + }, + { + "epoch": 0.18053900709219858, + "loss": 1.1868067979812622, + "loss_ce": 0.00861470215022564, + "loss_iou": 0.5078125, + "loss_num": 0.03271484375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 106448476, + "step": 1591 + }, + { + "epoch": 0.18065248226950353, + "grad_norm": 19.568721771240234, + "learning_rate": 5e-05, + "loss": 1.2118, + "num_input_tokens_seen": 106514960, + "step": 1592 + }, + { + "epoch": 0.18065248226950353, + "loss": 1.3047938346862793, + "loss_ce": 0.006453921552747488, + "loss_iou": 0.5703125, + "loss_num": 0.032470703125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 106514960, + "step": 1592 + }, + { + "epoch": 0.1807659574468085, + "grad_norm": 50.984622955322266, + "learning_rate": 5e-05, + "loss": 1.2992, + "num_input_tokens_seen": 106581736, + "step": 1593 + }, + { + "epoch": 0.1807659574468085, + "loss": 1.2316153049468994, + "loss_ce": 0.0026114347856491804, + "loss_iou": 0.5703125, + "loss_num": 0.0169677734375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 106581736, + "step": 1593 + }, + { + "epoch": 0.18087943262411346, + "grad_norm": 22.966934204101562, + "learning_rate": 5e-05, + "loss": 1.5432, + "num_input_tokens_seen": 106648536, + "step": 1594 + }, + { + "epoch": 0.18087943262411346, + "loss": 1.4156025648117065, + "loss_ce": 0.0047748638316988945, + "loss_iou": 0.59375, + "loss_num": 0.044921875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 106648536, + "step": 1594 + }, + { + "epoch": 0.18099290780141844, + "grad_norm": 27.908750534057617, + "learning_rate": 5e-05, + "loss": 1.2759, + "num_input_tokens_seen": 106715248, + "step": 1595 + }, + { + "epoch": 0.18099290780141844, + "loss": 1.2542554140090942, + "loss_ce": 0.0066968295723199844, + "loss_iou": 0.53515625, + "loss_num": 0.0361328125, + "loss_xval": 1.25, + "num_input_tokens_seen": 106715248, + "step": 1595 + }, + { + "epoch": 0.18110638297872342, + "grad_norm": 44.1150016784668, + "learning_rate": 5e-05, + "loss": 1.3522, + "num_input_tokens_seen": 106782104, + "step": 1596 + }, + { + "epoch": 0.18110638297872342, + "loss": 1.4097788333892822, + "loss_ce": 0.009388112463057041, + "loss_iou": 0.63671875, + "loss_num": 0.0260009765625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 106782104, + "step": 1596 + }, + { + "epoch": 0.18121985815602837, + "grad_norm": 23.27513885498047, + "learning_rate": 5e-05, + "loss": 1.5314, + "num_input_tokens_seen": 106849712, + "step": 1597 + }, + { + "epoch": 0.18121985815602837, + "loss": 1.3807191848754883, + "loss_ce": 0.0023011495359241962, + "loss_iou": 0.6328125, + "loss_num": 0.0218505859375, + "loss_xval": 1.375, + "num_input_tokens_seen": 106849712, + "step": 1597 + }, + { + "epoch": 0.18133333333333335, + "grad_norm": 20.158611297607422, + "learning_rate": 5e-05, + "loss": 1.4031, + "num_input_tokens_seen": 106916008, + "step": 1598 + }, + { + "epoch": 0.18133333333333335, + "loss": 1.3759822845458984, + "loss_ce": 0.002935508033260703, + "loss_iou": 0.58203125, + "loss_num": 0.0419921875, + "loss_xval": 1.375, + "num_input_tokens_seen": 106916008, + "step": 1598 + }, + { + "epoch": 0.1814468085106383, + "grad_norm": 28.58029556274414, + "learning_rate": 5e-05, + "loss": 1.4852, + "num_input_tokens_seen": 106982368, + "step": 1599 + }, + { + "epoch": 0.1814468085106383, + "loss": 1.5120748281478882, + "loss_ce": 0.0052389162592589855, + "loss_iou": 0.63671875, + "loss_num": 0.047119140625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 106982368, + "step": 1599 + }, + { + "epoch": 0.18156028368794327, + "grad_norm": 41.838436126708984, + "learning_rate": 5e-05, + "loss": 1.6399, + "num_input_tokens_seen": 107049300, + "step": 1600 + }, + { + "epoch": 0.18156028368794327, + "loss": 1.7443528175354004, + "loss_ce": 0.0031419266015291214, + "loss_iou": 0.703125, + "loss_num": 0.0673828125, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 107049300, + "step": 1600 + }, + { + "epoch": 0.18167375886524822, + "grad_norm": 24.821325302124023, + "learning_rate": 5e-05, + "loss": 1.2721, + "num_input_tokens_seen": 107115696, + "step": 1601 + }, + { + "epoch": 0.18167375886524822, + "loss": 1.1471295356750488, + "loss_ce": 0.0030866065062582493, + "loss_iou": 0.515625, + "loss_num": 0.0230712890625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 107115696, + "step": 1601 + }, + { + "epoch": 0.1817872340425532, + "grad_norm": 18.862140655517578, + "learning_rate": 5e-05, + "loss": 1.2238, + "num_input_tokens_seen": 107180744, + "step": 1602 + }, + { + "epoch": 0.1817872340425532, + "loss": 1.074562907218933, + "loss_ce": 0.005776361562311649, + "loss_iou": 0.412109375, + "loss_num": 0.048583984375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 107180744, + "step": 1602 + }, + { + "epoch": 0.18190070921985815, + "grad_norm": 21.62657356262207, + "learning_rate": 5e-05, + "loss": 1.3199, + "num_input_tokens_seen": 107247308, + "step": 1603 + }, + { + "epoch": 0.18190070921985815, + "loss": 0.8942767381668091, + "loss_ce": 0.0031023891642689705, + "loss_iou": 0.4140625, + "loss_num": 0.01239013671875, + "loss_xval": 0.890625, + "num_input_tokens_seen": 107247308, + "step": 1603 + }, + { + "epoch": 0.18201418439716313, + "grad_norm": 22.55217742919922, + "learning_rate": 5e-05, + "loss": 1.4462, + "num_input_tokens_seen": 107314368, + "step": 1604 + }, + { + "epoch": 0.18201418439716313, + "loss": 1.4625619649887085, + "loss_ce": 0.003089308040216565, + "loss_iou": 0.64453125, + "loss_num": 0.033203125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 107314368, + "step": 1604 + }, + { + "epoch": 0.18212765957446808, + "grad_norm": 35.5177001953125, + "learning_rate": 5e-05, + "loss": 1.5048, + "num_input_tokens_seen": 107381208, + "step": 1605 + }, + { + "epoch": 0.18212765957446808, + "loss": 1.6606642007827759, + "loss_ce": 0.004414317198097706, + "loss_iou": 0.70703125, + "loss_num": 0.048828125, + "loss_xval": 1.65625, + "num_input_tokens_seen": 107381208, + "step": 1605 + }, + { + "epoch": 0.18224113475177306, + "grad_norm": 22.812450408935547, + "learning_rate": 5e-05, + "loss": 1.3313, + "num_input_tokens_seen": 107448136, + "step": 1606 + }, + { + "epoch": 0.18224113475177306, + "loss": 1.2483983039855957, + "loss_ce": 0.007797745056450367, + "loss_iou": 0.53125, + "loss_num": 0.034912109375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 107448136, + "step": 1606 + }, + { + "epoch": 0.182354609929078, + "grad_norm": 18.525657653808594, + "learning_rate": 5e-05, + "loss": 1.2634, + "num_input_tokens_seen": 107515084, + "step": 1607 + }, + { + "epoch": 0.182354609929078, + "loss": 1.419631004333496, + "loss_ce": 0.008009925484657288, + "loss_iou": 0.59375, + "loss_num": 0.04443359375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 107515084, + "step": 1607 + }, + { + "epoch": 0.182468085106383, + "grad_norm": 24.029672622680664, + "learning_rate": 5e-05, + "loss": 1.1405, + "num_input_tokens_seen": 107581708, + "step": 1608 + }, + { + "epoch": 0.182468085106383, + "loss": 1.1924984455108643, + "loss_ce": 0.005730768665671349, + "loss_iou": 0.4921875, + "loss_num": 0.0400390625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 107581708, + "step": 1608 + }, + { + "epoch": 0.18258156028368794, + "grad_norm": 31.02732276916504, + "learning_rate": 5e-05, + "loss": 1.4532, + "num_input_tokens_seen": 107648916, + "step": 1609 + }, + { + "epoch": 0.18258156028368794, + "loss": 1.334265947341919, + "loss_ce": 0.005652640946209431, + "loss_iou": 0.5859375, + "loss_num": 0.031494140625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 107648916, + "step": 1609 + }, + { + "epoch": 0.18269503546099292, + "grad_norm": 24.657352447509766, + "learning_rate": 5e-05, + "loss": 1.4496, + "num_input_tokens_seen": 107716304, + "step": 1610 + }, + { + "epoch": 0.18269503546099292, + "loss": 1.6568245887756348, + "loss_ce": 0.005457415245473385, + "loss_iou": 0.6640625, + "loss_num": 0.064453125, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 107716304, + "step": 1610 + }, + { + "epoch": 0.18280851063829787, + "grad_norm": 28.42765998840332, + "learning_rate": 5e-05, + "loss": 1.3598, + "num_input_tokens_seen": 107783592, + "step": 1611 + }, + { + "epoch": 0.18280851063829787, + "loss": 1.4078885316848755, + "loss_ce": 0.007711565587669611, + "loss_iou": 0.57421875, + "loss_num": 0.049560546875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 107783592, + "step": 1611 + }, + { + "epoch": 0.18292198581560284, + "grad_norm": 30.16036605834961, + "learning_rate": 5e-05, + "loss": 1.2936, + "num_input_tokens_seen": 107850300, + "step": 1612 + }, + { + "epoch": 0.18292198581560284, + "loss": 1.3291258811950684, + "loss_ce": 0.00637194374576211, + "loss_iou": 0.5546875, + "loss_num": 0.04248046875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 107850300, + "step": 1612 + }, + { + "epoch": 0.1830354609929078, + "grad_norm": 25.982648849487305, + "learning_rate": 5e-05, + "loss": 1.4625, + "num_input_tokens_seen": 107917028, + "step": 1613 + }, + { + "epoch": 0.1830354609929078, + "loss": 1.5728206634521484, + "loss_ce": 0.0054378630593419075, + "loss_iou": 0.6875, + "loss_num": 0.03759765625, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 107917028, + "step": 1613 + }, + { + "epoch": 0.18314893617021277, + "grad_norm": 31.459369659423828, + "learning_rate": 5e-05, + "loss": 1.2225, + "num_input_tokens_seen": 107984556, + "step": 1614 + }, + { + "epoch": 0.18314893617021277, + "loss": 1.1500874757766724, + "loss_ce": 0.006044536828994751, + "loss_iou": 0.5078125, + "loss_num": 0.0252685546875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 107984556, + "step": 1614 + }, + { + "epoch": 0.18326241134751772, + "grad_norm": 22.055583953857422, + "learning_rate": 5e-05, + "loss": 1.34, + "num_input_tokens_seen": 108051996, + "step": 1615 + }, + { + "epoch": 0.18326241134751772, + "loss": 1.3302106857299805, + "loss_ce": 0.005015470553189516, + "loss_iou": 0.5859375, + "loss_num": 0.02978515625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 108051996, + "step": 1615 + }, + { + "epoch": 0.1833758865248227, + "grad_norm": 24.12533950805664, + "learning_rate": 5e-05, + "loss": 1.3954, + "num_input_tokens_seen": 108118508, + "step": 1616 + }, + { + "epoch": 0.1833758865248227, + "loss": 1.4976667165756226, + "loss_ce": 0.003526069689542055, + "loss_iou": 0.625, + "loss_num": 0.04833984375, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 108118508, + "step": 1616 + }, + { + "epoch": 0.18348936170212765, + "grad_norm": 30.14169692993164, + "learning_rate": 5e-05, + "loss": 1.548, + "num_input_tokens_seen": 108185584, + "step": 1617 + }, + { + "epoch": 0.18348936170212765, + "loss": 1.4370262622833252, + "loss_ce": 0.00636230269446969, + "loss_iou": 0.61328125, + "loss_num": 0.04052734375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 108185584, + "step": 1617 + }, + { + "epoch": 0.18360283687943263, + "grad_norm": 24.321056365966797, + "learning_rate": 5e-05, + "loss": 1.357, + "num_input_tokens_seen": 108253216, + "step": 1618 + }, + { + "epoch": 0.18360283687943263, + "loss": 1.2647746801376343, + "loss_ce": 0.0020793024450540543, + "loss_iou": 0.56640625, + "loss_num": 0.0264892578125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 108253216, + "step": 1618 + }, + { + "epoch": 0.18371631205673758, + "grad_norm": 21.492961883544922, + "learning_rate": 5e-05, + "loss": 1.3713, + "num_input_tokens_seen": 108319988, + "step": 1619 + }, + { + "epoch": 0.18371631205673758, + "loss": 1.3515076637268066, + "loss_ce": 0.004339759238064289, + "loss_iou": 0.58984375, + "loss_num": 0.033203125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 108319988, + "step": 1619 + }, + { + "epoch": 0.18382978723404256, + "grad_norm": 25.56041717529297, + "learning_rate": 5e-05, + "loss": 1.1205, + "num_input_tokens_seen": 108386136, + "step": 1620 + }, + { + "epoch": 0.18382978723404256, + "loss": 0.8886277675628662, + "loss_ce": 0.003862173995003104, + "loss_iou": 0.41015625, + "loss_num": 0.01275634765625, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 108386136, + "step": 1620 + }, + { + "epoch": 0.1839432624113475, + "grad_norm": 23.312673568725586, + "learning_rate": 5e-05, + "loss": 1.3232, + "num_input_tokens_seen": 108452480, + "step": 1621 + }, + { + "epoch": 0.1839432624113475, + "loss": 1.320819616317749, + "loss_ce": 0.00490163080394268, + "loss_iou": 0.56640625, + "loss_num": 0.0361328125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 108452480, + "step": 1621 + }, + { + "epoch": 0.18405673758865249, + "grad_norm": 34.35400390625, + "learning_rate": 5e-05, + "loss": 1.3603, + "num_input_tokens_seen": 108520224, + "step": 1622 + }, + { + "epoch": 0.18405673758865249, + "loss": 1.3381562232971191, + "loss_ce": 0.008077969774603844, + "loss_iou": 0.57421875, + "loss_num": 0.03564453125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 108520224, + "step": 1622 + }, + { + "epoch": 0.18417021276595744, + "grad_norm": 22.072317123413086, + "learning_rate": 5e-05, + "loss": 1.3761, + "num_input_tokens_seen": 108587552, + "step": 1623 + }, + { + "epoch": 0.18417021276595744, + "loss": 1.543907642364502, + "loss_ce": 0.0028919971082359552, + "loss_iou": 0.6875, + "loss_num": 0.03369140625, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 108587552, + "step": 1623 + }, + { + "epoch": 0.18428368794326241, + "grad_norm": 33.984256744384766, + "learning_rate": 5e-05, + "loss": 1.3477, + "num_input_tokens_seen": 108654916, + "step": 1624 + }, + { + "epoch": 0.18428368794326241, + "loss": 1.4144179821014404, + "loss_ce": 0.0037733931094408035, + "loss_iou": 0.61328125, + "loss_num": 0.037109375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 108654916, + "step": 1624 + }, + { + "epoch": 0.18439716312056736, + "grad_norm": 20.639007568359375, + "learning_rate": 5e-05, + "loss": 1.5626, + "num_input_tokens_seen": 108722768, + "step": 1625 + }, + { + "epoch": 0.18439716312056736, + "loss": 1.592851161956787, + "loss_ce": 0.005448800511658192, + "loss_iou": 0.69921875, + "loss_num": 0.037109375, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 108722768, + "step": 1625 + }, + { + "epoch": 0.18451063829787234, + "grad_norm": 20.888463973999023, + "learning_rate": 5e-05, + "loss": 1.177, + "num_input_tokens_seen": 108789100, + "step": 1626 + }, + { + "epoch": 0.18451063829787234, + "loss": 1.1366088390350342, + "loss_ce": 0.002819752786308527, + "loss_iou": 0.52734375, + "loss_num": 0.0164794921875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 108789100, + "step": 1626 + }, + { + "epoch": 0.1846241134751773, + "grad_norm": 23.642803192138672, + "learning_rate": 5e-05, + "loss": 1.4291, + "num_input_tokens_seen": 108856136, + "step": 1627 + }, + { + "epoch": 0.1846241134751773, + "loss": 1.3257474899291992, + "loss_ce": 0.0034819107968360186, + "loss_iou": 0.55078125, + "loss_num": 0.04443359375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 108856136, + "step": 1627 + }, + { + "epoch": 0.18473758865248227, + "grad_norm": 36.768104553222656, + "learning_rate": 5e-05, + "loss": 1.379, + "num_input_tokens_seen": 108922940, + "step": 1628 + }, + { + "epoch": 0.18473758865248227, + "loss": 1.37961745262146, + "loss_ce": 0.006082277279347181, + "loss_iou": 0.578125, + "loss_num": 0.043701171875, + "loss_xval": 1.375, + "num_input_tokens_seen": 108922940, + "step": 1628 + }, + { + "epoch": 0.18485106382978722, + "grad_norm": 17.79433250427246, + "learning_rate": 5e-05, + "loss": 1.8288, + "num_input_tokens_seen": 108990428, + "step": 1629 + }, + { + "epoch": 0.18485106382978722, + "loss": 1.725973129272461, + "loss_ce": 0.0052698878571391106, + "loss_iou": 0.734375, + "loss_num": 0.05078125, + "loss_xval": 1.71875, + "num_input_tokens_seen": 108990428, + "step": 1629 + }, + { + "epoch": 0.1849645390070922, + "grad_norm": 40.61050796508789, + "learning_rate": 5e-05, + "loss": 1.3976, + "num_input_tokens_seen": 109056812, + "step": 1630 + }, + { + "epoch": 0.1849645390070922, + "loss": 1.4244359731674194, + "loss_ce": 0.006467191502451897, + "loss_iou": 0.62890625, + "loss_num": 0.0322265625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 109056812, + "step": 1630 + }, + { + "epoch": 0.18507801418439715, + "grad_norm": 37.362159729003906, + "learning_rate": 5e-05, + "loss": 1.2156, + "num_input_tokens_seen": 109123936, + "step": 1631 + }, + { + "epoch": 0.18507801418439715, + "loss": 1.3268373012542725, + "loss_ce": 0.002618519589304924, + "loss_iou": 0.57421875, + "loss_num": 0.035400390625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 109123936, + "step": 1631 + }, + { + "epoch": 0.18519148936170213, + "grad_norm": 20.64363670349121, + "learning_rate": 5e-05, + "loss": 1.6765, + "num_input_tokens_seen": 109190408, + "step": 1632 + }, + { + "epoch": 0.18519148936170213, + "loss": 1.6596441268920898, + "loss_ce": 0.004370660986751318, + "loss_iou": 0.703125, + "loss_num": 0.049072265625, + "loss_xval": 1.65625, + "num_input_tokens_seen": 109190408, + "step": 1632 + }, + { + "epoch": 0.1853049645390071, + "grad_norm": 40.43166732788086, + "learning_rate": 5e-05, + "loss": 1.3928, + "num_input_tokens_seen": 109258036, + "step": 1633 + }, + { + "epoch": 0.1853049645390071, + "loss": 1.3969075679779053, + "loss_ce": 0.005794279742985964, + "loss_iou": 0.5625, + "loss_num": 0.05322265625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 109258036, + "step": 1633 + }, + { + "epoch": 0.18541843971631206, + "grad_norm": 37.578495025634766, + "learning_rate": 5e-05, + "loss": 1.3347, + "num_input_tokens_seen": 109324892, + "step": 1634 + }, + { + "epoch": 0.18541843971631206, + "loss": 1.4781970977783203, + "loss_ce": 0.0035876973997801542, + "loss_iou": 0.65234375, + "loss_num": 0.034423828125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 109324892, + "step": 1634 + }, + { + "epoch": 0.18553191489361703, + "grad_norm": 18.454570770263672, + "learning_rate": 5e-05, + "loss": 1.7212, + "num_input_tokens_seen": 109392280, + "step": 1635 + }, + { + "epoch": 0.18553191489361703, + "loss": 1.814410924911499, + "loss_ce": 0.005817204248160124, + "loss_iou": 0.80859375, + "loss_num": 0.0390625, + "loss_xval": 1.8125, + "num_input_tokens_seen": 109392280, + "step": 1635 + }, + { + "epoch": 0.18564539007092198, + "grad_norm": 21.866046905517578, + "learning_rate": 5e-05, + "loss": 1.4779, + "num_input_tokens_seen": 109458720, + "step": 1636 + }, + { + "epoch": 0.18564539007092198, + "loss": 1.6885406970977783, + "loss_ce": 0.010806364938616753, + "loss_iou": 0.69140625, + "loss_num": 0.058837890625, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 109458720, + "step": 1636 + }, + { + "epoch": 0.18575886524822696, + "grad_norm": 29.417692184448242, + "learning_rate": 5e-05, + "loss": 1.2687, + "num_input_tokens_seen": 109526632, + "step": 1637 + }, + { + "epoch": 0.18575886524822696, + "loss": 1.1185927391052246, + "loss_ce": 0.0014052166370674968, + "loss_iou": 0.515625, + "loss_num": 0.0177001953125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 109526632, + "step": 1637 + }, + { + "epoch": 0.1858723404255319, + "grad_norm": 28.996559143066406, + "learning_rate": 5e-05, + "loss": 1.3985, + "num_input_tokens_seen": 109593824, + "step": 1638 + }, + { + "epoch": 0.1858723404255319, + "loss": 1.529344916343689, + "loss_ce": 0.008837087079882622, + "loss_iou": 0.62890625, + "loss_num": 0.052978515625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 109593824, + "step": 1638 + }, + { + "epoch": 0.1859858156028369, + "grad_norm": 21.89948272705078, + "learning_rate": 5e-05, + "loss": 1.3761, + "num_input_tokens_seen": 109660608, + "step": 1639 + }, + { + "epoch": 0.1859858156028369, + "loss": 1.473021388053894, + "loss_ce": 0.004271399695426226, + "loss_iou": 0.5703125, + "loss_num": 0.06494140625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 109660608, + "step": 1639 + }, + { + "epoch": 0.18609929078014184, + "grad_norm": 27.388418197631836, + "learning_rate": 5e-05, + "loss": 1.5414, + "num_input_tokens_seen": 109727452, + "step": 1640 + }, + { + "epoch": 0.18609929078014184, + "loss": 1.7286033630371094, + "loss_ce": 0.005458838306367397, + "loss_iou": 0.7109375, + "loss_num": 0.06005859375, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 109727452, + "step": 1640 + }, + { + "epoch": 0.18621276595744682, + "grad_norm": 20.976133346557617, + "learning_rate": 5e-05, + "loss": 1.4646, + "num_input_tokens_seen": 109795228, + "step": 1641 + }, + { + "epoch": 0.18621276595744682, + "loss": 1.5921611785888672, + "loss_ce": 0.0037822669837623835, + "loss_iou": 0.6796875, + "loss_num": 0.045654296875, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 109795228, + "step": 1641 + }, + { + "epoch": 0.18632624113475177, + "grad_norm": 17.728391647338867, + "learning_rate": 5e-05, + "loss": 1.3701, + "num_input_tokens_seen": 109863180, + "step": 1642 + }, + { + "epoch": 0.18632624113475177, + "loss": 1.4138400554656982, + "loss_ce": 0.006125199608504772, + "loss_iou": 0.59375, + "loss_num": 0.04443359375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 109863180, + "step": 1642 + }, + { + "epoch": 0.18643971631205675, + "grad_norm": 28.75685691833496, + "learning_rate": 5e-05, + "loss": 1.3268, + "num_input_tokens_seen": 109930212, + "step": 1643 + }, + { + "epoch": 0.18643971631205675, + "loss": 1.200746774673462, + "loss_ce": 0.007875731214880943, + "loss_iou": 0.51171875, + "loss_num": 0.033203125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 109930212, + "step": 1643 + }, + { + "epoch": 0.1865531914893617, + "grad_norm": 39.1341552734375, + "learning_rate": 5e-05, + "loss": 1.47, + "num_input_tokens_seen": 109996108, + "step": 1644 + }, + { + "epoch": 0.1865531914893617, + "loss": 1.685747504234314, + "loss_ce": 0.009966226294636726, + "loss_iou": 0.70703125, + "loss_num": 0.05322265625, + "loss_xval": 1.671875, + "num_input_tokens_seen": 109996108, + "step": 1644 + }, + { + "epoch": 0.18666666666666668, + "grad_norm": 17.19142723083496, + "learning_rate": 5e-05, + "loss": 1.3064, + "num_input_tokens_seen": 110062732, + "step": 1645 + }, + { + "epoch": 0.18666666666666668, + "loss": 1.3708469867706299, + "loss_ce": 0.007077428512275219, + "loss_iou": 0.55078125, + "loss_num": 0.052490234375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 110062732, + "step": 1645 + }, + { + "epoch": 0.18678014184397163, + "grad_norm": 24.431875228881836, + "learning_rate": 5e-05, + "loss": 1.5021, + "num_input_tokens_seen": 110130096, + "step": 1646 + }, + { + "epoch": 0.18678014184397163, + "loss": 1.404362440109253, + "loss_ce": 0.005924842786043882, + "loss_iou": 0.57421875, + "loss_num": 0.049560546875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 110130096, + "step": 1646 + }, + { + "epoch": 0.1868936170212766, + "grad_norm": 37.75514221191406, + "learning_rate": 5e-05, + "loss": 1.2976, + "num_input_tokens_seen": 110197104, + "step": 1647 + }, + { + "epoch": 0.1868936170212766, + "loss": 1.2709565162658691, + "loss_ce": 0.006308184936642647, + "loss_iou": 0.51953125, + "loss_num": 0.04541015625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 110197104, + "step": 1647 + }, + { + "epoch": 0.18700709219858155, + "grad_norm": 20.576904296875, + "learning_rate": 5e-05, + "loss": 1.4817, + "num_input_tokens_seen": 110264104, + "step": 1648 + }, + { + "epoch": 0.18700709219858155, + "loss": 1.4870109558105469, + "loss_ce": 0.009471883997321129, + "loss_iou": 0.6796875, + "loss_num": 0.0233154296875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 110264104, + "step": 1648 + }, + { + "epoch": 0.18712056737588653, + "grad_norm": 15.440882682800293, + "learning_rate": 5e-05, + "loss": 1.2798, + "num_input_tokens_seen": 110330716, + "step": 1649 + }, + { + "epoch": 0.18712056737588653, + "loss": 1.2046089172363281, + "loss_ce": 0.0063667893409729, + "loss_iou": 0.50390625, + "loss_num": 0.038330078125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 110330716, + "step": 1649 + }, + { + "epoch": 0.18723404255319148, + "grad_norm": 19.362497329711914, + "learning_rate": 5e-05, + "loss": 1.2797, + "num_input_tokens_seen": 110397888, + "step": 1650 + }, + { + "epoch": 0.18723404255319148, + "loss": 1.4337743520736694, + "loss_ce": 0.004086825530976057, + "loss_iou": 0.59375, + "loss_num": 0.048095703125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 110397888, + "step": 1650 + }, + { + "epoch": 0.18734751773049646, + "grad_norm": 20.720401763916016, + "learning_rate": 5e-05, + "loss": 1.4477, + "num_input_tokens_seen": 110465156, + "step": 1651 + }, + { + "epoch": 0.18734751773049646, + "loss": 1.4766618013381958, + "loss_ce": 0.003028996754437685, + "loss_iou": 0.64453125, + "loss_num": 0.037353515625, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 110465156, + "step": 1651 + }, + { + "epoch": 0.1874609929078014, + "grad_norm": 38.58795166015625, + "learning_rate": 5e-05, + "loss": 1.3882, + "num_input_tokens_seen": 110532528, + "step": 1652 + }, + { + "epoch": 0.1874609929078014, + "loss": 1.2840831279754639, + "loss_ce": 0.0018566198414191604, + "loss_iou": 0.54296875, + "loss_num": 0.03955078125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 110532528, + "step": 1652 + }, + { + "epoch": 0.1875744680851064, + "grad_norm": 20.83867073059082, + "learning_rate": 5e-05, + "loss": 1.532, + "num_input_tokens_seen": 110600016, + "step": 1653 + }, + { + "epoch": 0.1875744680851064, + "loss": 1.673654556274414, + "loss_ce": 0.002756234258413315, + "loss_iou": 0.75390625, + "loss_num": 0.032470703125, + "loss_xval": 1.671875, + "num_input_tokens_seen": 110600016, + "step": 1653 + }, + { + "epoch": 0.18768794326241134, + "grad_norm": 299.7466125488281, + "learning_rate": 5e-05, + "loss": 1.4822, + "num_input_tokens_seen": 110667380, + "step": 1654 + }, + { + "epoch": 0.18768794326241134, + "loss": 1.4074673652648926, + "loss_ce": 0.004147074650973082, + "loss_iou": 0.5859375, + "loss_num": 0.04638671875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 110667380, + "step": 1654 + }, + { + "epoch": 0.18780141843971632, + "grad_norm": 35.661468505859375, + "learning_rate": 5e-05, + "loss": 1.5181, + "num_input_tokens_seen": 110733836, + "step": 1655 + }, + { + "epoch": 0.18780141843971632, + "loss": 1.5584932565689087, + "loss_ce": 0.005270575173199177, + "loss_iou": 0.640625, + "loss_num": 0.05419921875, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 110733836, + "step": 1655 + }, + { + "epoch": 0.18791489361702127, + "grad_norm": 24.4388427734375, + "learning_rate": 5e-05, + "loss": 1.4696, + "num_input_tokens_seen": 110801164, + "step": 1656 + }, + { + "epoch": 0.18791489361702127, + "loss": 1.3438622951507568, + "loss_ce": 0.006948186084628105, + "loss_iou": 0.6171875, + "loss_num": 0.0205078125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 110801164, + "step": 1656 + }, + { + "epoch": 0.18802836879432624, + "grad_norm": 52.87391662597656, + "learning_rate": 5e-05, + "loss": 1.1827, + "num_input_tokens_seen": 110867600, + "step": 1657 + }, + { + "epoch": 0.18802836879432624, + "loss": 1.0635299682617188, + "loss_ce": 0.0071335164830088615, + "loss_iou": 0.4296875, + "loss_num": 0.03955078125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 110867600, + "step": 1657 + }, + { + "epoch": 0.1881418439716312, + "grad_norm": 21.499767303466797, + "learning_rate": 5e-05, + "loss": 1.445, + "num_input_tokens_seen": 110935256, + "step": 1658 + }, + { + "epoch": 0.1881418439716312, + "loss": 1.471495270729065, + "loss_ce": 0.005919098388403654, + "loss_iou": 0.58984375, + "loss_num": 0.056640625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 110935256, + "step": 1658 + }, + { + "epoch": 0.18825531914893617, + "grad_norm": 17.252416610717773, + "learning_rate": 5e-05, + "loss": 1.1482, + "num_input_tokens_seen": 111001096, + "step": 1659 + }, + { + "epoch": 0.18825531914893617, + "loss": 1.1423208713531494, + "loss_ce": 0.002550306962803006, + "loss_iou": 0.4765625, + "loss_num": 0.037353515625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 111001096, + "step": 1659 + }, + { + "epoch": 0.18836879432624112, + "grad_norm": 45.67679977416992, + "learning_rate": 5e-05, + "loss": 1.283, + "num_input_tokens_seen": 111067280, + "step": 1660 + }, + { + "epoch": 0.18836879432624112, + "loss": 1.4065985679626465, + "loss_ce": 0.003522462211549282, + "loss_iou": 0.56640625, + "loss_num": 0.05419921875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 111067280, + "step": 1660 + }, + { + "epoch": 0.1884822695035461, + "grad_norm": 18.50098419189453, + "learning_rate": 5e-05, + "loss": 1.0531, + "num_input_tokens_seen": 111134084, + "step": 1661 + }, + { + "epoch": 0.1884822695035461, + "loss": 1.1498006582260132, + "loss_ce": 0.0023397437762469053, + "loss_iou": 0.50390625, + "loss_num": 0.0277099609375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 111134084, + "step": 1661 + }, + { + "epoch": 0.18859574468085105, + "grad_norm": 22.217544555664062, + "learning_rate": 5e-05, + "loss": 1.4103, + "num_input_tokens_seen": 111201332, + "step": 1662 + }, + { + "epoch": 0.18859574468085105, + "loss": 1.2726593017578125, + "loss_ce": 0.007522599305957556, + "loss_iou": 0.55078125, + "loss_num": 0.031982421875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 111201332, + "step": 1662 + }, + { + "epoch": 0.18870921985815603, + "grad_norm": 35.11845779418945, + "learning_rate": 5e-05, + "loss": 1.4863, + "num_input_tokens_seen": 111268392, + "step": 1663 + }, + { + "epoch": 0.18870921985815603, + "loss": 1.5535647869110107, + "loss_ce": 0.0032718575093895197, + "loss_iou": 0.63671875, + "loss_num": 0.054931640625, + "loss_xval": 1.546875, + "num_input_tokens_seen": 111268392, + "step": 1663 + }, + { + "epoch": 0.18882269503546098, + "grad_norm": 21.208341598510742, + "learning_rate": 5e-05, + "loss": 1.5608, + "num_input_tokens_seen": 111335176, + "step": 1664 + }, + { + "epoch": 0.18882269503546098, + "loss": 1.6476037502288818, + "loss_ce": 0.006002211943268776, + "loss_iou": 0.7109375, + "loss_num": 0.044189453125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 111335176, + "step": 1664 + }, + { + "epoch": 0.18893617021276596, + "grad_norm": 18.31223487854004, + "learning_rate": 5e-05, + "loss": 1.264, + "num_input_tokens_seen": 111401384, + "step": 1665 + }, + { + "epoch": 0.18893617021276596, + "loss": 1.2400612831115723, + "loss_ce": 0.0037331385537981987, + "loss_iou": 0.54296875, + "loss_num": 0.0306396484375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 111401384, + "step": 1665 + }, + { + "epoch": 0.1890496453900709, + "grad_norm": 25.199398040771484, + "learning_rate": 5e-05, + "loss": 1.1436, + "num_input_tokens_seen": 111468724, + "step": 1666 + }, + { + "epoch": 0.1890496453900709, + "loss": 1.2094382047653198, + "loss_ce": 0.004848301410675049, + "loss_iou": 0.546875, + "loss_num": 0.021484375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 111468724, + "step": 1666 + }, + { + "epoch": 0.1891631205673759, + "grad_norm": 56.12299728393555, + "learning_rate": 5e-05, + "loss": 1.652, + "num_input_tokens_seen": 111535988, + "step": 1667 + }, + { + "epoch": 0.1891631205673759, + "loss": 1.4921823740005493, + "loss_ce": 0.0058541977778077126, + "loss_iou": 0.609375, + "loss_num": 0.052978515625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 111535988, + "step": 1667 + }, + { + "epoch": 0.18927659574468086, + "grad_norm": 28.489870071411133, + "learning_rate": 5e-05, + "loss": 1.4322, + "num_input_tokens_seen": 111604192, + "step": 1668 + }, + { + "epoch": 0.18927659574468086, + "loss": 1.3138033151626587, + "loss_ce": 0.005209492519497871, + "loss_iou": 0.56640625, + "loss_num": 0.035400390625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 111604192, + "step": 1668 + }, + { + "epoch": 0.18939007092198581, + "grad_norm": 23.437259674072266, + "learning_rate": 5e-05, + "loss": 1.3764, + "num_input_tokens_seen": 111671668, + "step": 1669 + }, + { + "epoch": 0.18939007092198581, + "loss": 1.7248001098632812, + "loss_ce": 0.008003261871635914, + "loss_iou": 0.7421875, + "loss_num": 0.047119140625, + "loss_xval": 1.71875, + "num_input_tokens_seen": 111671668, + "step": 1669 + }, + { + "epoch": 0.1895035460992908, + "grad_norm": 15.297697067260742, + "learning_rate": 5e-05, + "loss": 1.0315, + "num_input_tokens_seen": 111738016, + "step": 1670 + }, + { + "epoch": 0.1895035460992908, + "loss": 1.0790050029754639, + "loss_ce": 0.005030425265431404, + "loss_iou": 0.451171875, + "loss_num": 0.034423828125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 111738016, + "step": 1670 + }, + { + "epoch": 0.18961702127659574, + "grad_norm": 38.07169723510742, + "learning_rate": 5e-05, + "loss": 1.3955, + "num_input_tokens_seen": 111804672, + "step": 1671 + }, + { + "epoch": 0.18961702127659574, + "loss": 1.4414900541305542, + "loss_ce": 0.003990008495748043, + "loss_iou": 0.62109375, + "loss_num": 0.039306640625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 111804672, + "step": 1671 + }, + { + "epoch": 0.18973049645390072, + "grad_norm": 43.12104034423828, + "learning_rate": 5e-05, + "loss": 1.4089, + "num_input_tokens_seen": 111871724, + "step": 1672 + }, + { + "epoch": 0.18973049645390072, + "loss": 1.2216918468475342, + "loss_ce": 0.0039182668551802635, + "loss_iou": 0.5390625, + "loss_num": 0.02783203125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 111871724, + "step": 1672 + }, + { + "epoch": 0.18984397163120567, + "grad_norm": 13.617021560668945, + "learning_rate": 5e-05, + "loss": 1.3452, + "num_input_tokens_seen": 111938104, + "step": 1673 + }, + { + "epoch": 0.18984397163120567, + "loss": 1.2122626304626465, + "loss_ce": 0.007672854699194431, + "loss_iou": 0.474609375, + "loss_num": 0.051025390625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 111938104, + "step": 1673 + }, + { + "epoch": 0.18995744680851065, + "grad_norm": 22.63729476928711, + "learning_rate": 5e-05, + "loss": 1.5357, + "num_input_tokens_seen": 112005236, + "step": 1674 + }, + { + "epoch": 0.18995744680851065, + "loss": 1.4736108779907227, + "loss_ce": 0.0038843490183353424, + "loss_iou": 0.640625, + "loss_num": 0.038818359375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 112005236, + "step": 1674 + }, + { + "epoch": 0.1900709219858156, + "grad_norm": 24.37607765197754, + "learning_rate": 5e-05, + "loss": 1.4067, + "num_input_tokens_seen": 112072264, + "step": 1675 + }, + { + "epoch": 0.1900709219858156, + "loss": 1.432771921157837, + "loss_ce": 0.0021078430581837893, + "loss_iou": 0.625, + "loss_num": 0.03662109375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 112072264, + "step": 1675 + }, + { + "epoch": 0.19018439716312058, + "grad_norm": 38.3804817199707, + "learning_rate": 5e-05, + "loss": 1.1861, + "num_input_tokens_seen": 112137928, + "step": 1676 + }, + { + "epoch": 0.19018439716312058, + "loss": 1.1547253131866455, + "loss_ce": 0.007157464977353811, + "loss_iou": 0.4765625, + "loss_num": 0.0390625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 112137928, + "step": 1676 + }, + { + "epoch": 0.19029787234042553, + "grad_norm": 21.33831787109375, + "learning_rate": 5e-05, + "loss": 1.4165, + "num_input_tokens_seen": 112205552, + "step": 1677 + }, + { + "epoch": 0.19029787234042553, + "loss": 1.550480842590332, + "loss_ce": 0.00458245724439621, + "loss_iou": 0.6640625, + "loss_num": 0.044677734375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 112205552, + "step": 1677 + }, + { + "epoch": 0.1904113475177305, + "grad_norm": 29.68856430053711, + "learning_rate": 5e-05, + "loss": 1.4, + "num_input_tokens_seen": 112273236, + "step": 1678 + }, + { + "epoch": 0.1904113475177305, + "loss": 1.2819843292236328, + "loss_ce": 0.007570184767246246, + "loss_iou": 0.5859375, + "loss_num": 0.020263671875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 112273236, + "step": 1678 + }, + { + "epoch": 0.19052482269503546, + "grad_norm": 19.480424880981445, + "learning_rate": 5e-05, + "loss": 1.234, + "num_input_tokens_seen": 112340372, + "step": 1679 + }, + { + "epoch": 0.19052482269503546, + "loss": 1.168894648551941, + "loss_ce": 0.0058087510988116264, + "loss_iou": 0.5, + "loss_num": 0.03271484375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 112340372, + "step": 1679 + }, + { + "epoch": 0.19063829787234043, + "grad_norm": 31.894119262695312, + "learning_rate": 5e-05, + "loss": 1.4653, + "num_input_tokens_seen": 112407208, + "step": 1680 + }, + { + "epoch": 0.19063829787234043, + "loss": 1.428551197052002, + "loss_ce": 0.005211310926824808, + "loss_iou": 0.54296875, + "loss_num": 0.06787109375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 112407208, + "step": 1680 + }, + { + "epoch": 0.19075177304964538, + "grad_norm": 19.772607803344727, + "learning_rate": 5e-05, + "loss": 1.4913, + "num_input_tokens_seen": 112472884, + "step": 1681 + }, + { + "epoch": 0.19075177304964538, + "loss": 1.8160293102264404, + "loss_ce": 0.007923897355794907, + "loss_iou": 0.734375, + "loss_num": 0.06689453125, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 112472884, + "step": 1681 + }, + { + "epoch": 0.19086524822695036, + "grad_norm": 24.90383529663086, + "learning_rate": 5e-05, + "loss": 1.3709, + "num_input_tokens_seen": 112540356, + "step": 1682 + }, + { + "epoch": 0.19086524822695036, + "loss": 1.2292962074279785, + "loss_ce": 0.008593074977397919, + "loss_iou": 0.515625, + "loss_num": 0.037109375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 112540356, + "step": 1682 + }, + { + "epoch": 0.1909787234042553, + "grad_norm": 57.63435745239258, + "learning_rate": 5e-05, + "loss": 1.2526, + "num_input_tokens_seen": 112606408, + "step": 1683 + }, + { + "epoch": 0.1909787234042553, + "loss": 1.031533122062683, + "loss_ce": 0.002510856604203582, + "loss_iou": 0.447265625, + "loss_num": 0.0269775390625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 112606408, + "step": 1683 + }, + { + "epoch": 0.1910921985815603, + "grad_norm": 24.412479400634766, + "learning_rate": 5e-05, + "loss": 1.5238, + "num_input_tokens_seen": 112674212, + "step": 1684 + }, + { + "epoch": 0.1910921985815603, + "loss": 1.3369667530059814, + "loss_ce": 0.0034706327132880688, + "loss_iou": 0.59765625, + "loss_num": 0.0279541015625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 112674212, + "step": 1684 + }, + { + "epoch": 0.19120567375886524, + "grad_norm": 34.589111328125, + "learning_rate": 5e-05, + "loss": 1.2922, + "num_input_tokens_seen": 112741124, + "step": 1685 + }, + { + "epoch": 0.19120567375886524, + "loss": 1.2786710262298584, + "loss_ce": 0.004745249170809984, + "loss_iou": 0.56640625, + "loss_num": 0.027587890625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 112741124, + "step": 1685 + }, + { + "epoch": 0.19131914893617022, + "grad_norm": 28.890058517456055, + "learning_rate": 5e-05, + "loss": 1.384, + "num_input_tokens_seen": 112807248, + "step": 1686 + }, + { + "epoch": 0.19131914893617022, + "loss": 1.2975611686706543, + "loss_ce": 0.0075220270082354546, + "loss_iou": 0.546875, + "loss_num": 0.0390625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 112807248, + "step": 1686 + }, + { + "epoch": 0.19143262411347517, + "grad_norm": 27.486961364746094, + "learning_rate": 5e-05, + "loss": 1.4967, + "num_input_tokens_seen": 112874568, + "step": 1687 + }, + { + "epoch": 0.19143262411347517, + "loss": 1.33351731300354, + "loss_ce": 0.002950905356556177, + "loss_iou": 0.5859375, + "loss_num": 0.032470703125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 112874568, + "step": 1687 + }, + { + "epoch": 0.19154609929078015, + "grad_norm": 22.82539176940918, + "learning_rate": 5e-05, + "loss": 1.3352, + "num_input_tokens_seen": 112941356, + "step": 1688 + }, + { + "epoch": 0.19154609929078015, + "loss": 1.1888236999511719, + "loss_ce": 0.006206413730978966, + "loss_iou": 0.5390625, + "loss_num": 0.0213623046875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 112941356, + "step": 1688 + }, + { + "epoch": 0.1916595744680851, + "grad_norm": 25.635648727416992, + "learning_rate": 5e-05, + "loss": 1.3029, + "num_input_tokens_seen": 113008160, + "step": 1689 + }, + { + "epoch": 0.1916595744680851, + "loss": 1.186838984489441, + "loss_ce": 0.005930878221988678, + "loss_iou": 0.458984375, + "loss_num": 0.052490234375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 113008160, + "step": 1689 + }, + { + "epoch": 0.19177304964539008, + "grad_norm": 27.1005916595459, + "learning_rate": 5e-05, + "loss": 1.1986, + "num_input_tokens_seen": 113073204, + "step": 1690 + }, + { + "epoch": 0.19177304964539008, + "loss": 1.1365416049957275, + "loss_ce": 0.003484988585114479, + "loss_iou": 0.52734375, + "loss_num": 0.0162353515625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 113073204, + "step": 1690 + }, + { + "epoch": 0.19188652482269503, + "grad_norm": 33.52342987060547, + "learning_rate": 5e-05, + "loss": 1.3796, + "num_input_tokens_seen": 113139872, + "step": 1691 + }, + { + "epoch": 0.19188652482269503, + "loss": 1.5300476551055908, + "loss_ce": 0.0075867800042033195, + "loss_iou": 0.65234375, + "loss_num": 0.04296875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 113139872, + "step": 1691 + }, + { + "epoch": 0.192, + "grad_norm": 34.171634674072266, + "learning_rate": 5e-05, + "loss": 1.3311, + "num_input_tokens_seen": 113206284, + "step": 1692 + }, + { + "epoch": 0.192, + "loss": 1.3665919303894043, + "loss_ce": 0.0037990128621459007, + "loss_iou": 0.6171875, + "loss_num": 0.026123046875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 113206284, + "step": 1692 + }, + { + "epoch": 0.19211347517730495, + "grad_norm": 25.811891555786133, + "learning_rate": 5e-05, + "loss": 1.5961, + "num_input_tokens_seen": 113273240, + "step": 1693 + }, + { + "epoch": 0.19211347517730495, + "loss": 1.6635539531707764, + "loss_ce": 0.004374241456389427, + "loss_iou": 0.67578125, + "loss_num": 0.06103515625, + "loss_xval": 1.65625, + "num_input_tokens_seen": 113273240, + "step": 1693 + }, + { + "epoch": 0.19222695035460993, + "grad_norm": 14.67483139038086, + "learning_rate": 5e-05, + "loss": 1.3601, + "num_input_tokens_seen": 113339260, + "step": 1694 + }, + { + "epoch": 0.19222695035460993, + "loss": 1.4860737323760986, + "loss_ce": 0.005604945123195648, + "loss_iou": 0.59375, + "loss_num": 0.057861328125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 113339260, + "step": 1694 + }, + { + "epoch": 0.19234042553191488, + "grad_norm": 19.207172393798828, + "learning_rate": 5e-05, + "loss": 1.3746, + "num_input_tokens_seen": 113406312, + "step": 1695 + }, + { + "epoch": 0.19234042553191488, + "loss": 1.5111967325210571, + "loss_ce": 0.004177652299404144, + "loss_iou": 0.61328125, + "loss_num": 0.056396484375, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 113406312, + "step": 1695 + }, + { + "epoch": 0.19245390070921986, + "grad_norm": 27.846498489379883, + "learning_rate": 5e-05, + "loss": 1.6796, + "num_input_tokens_seen": 113473020, + "step": 1696 + }, + { + "epoch": 0.19245390070921986, + "loss": 1.6458284854888916, + "loss_ce": 0.0022738513071089983, + "loss_iou": 0.7265625, + "loss_num": 0.0390625, + "loss_xval": 1.640625, + "num_input_tokens_seen": 113473020, + "step": 1696 + }, + { + "epoch": 0.1925673758865248, + "grad_norm": 19.869789123535156, + "learning_rate": 5e-05, + "loss": 1.381, + "num_input_tokens_seen": 113539968, + "step": 1697 + }, + { + "epoch": 0.1925673758865248, + "loss": 1.288120150566101, + "loss_ce": 0.005405280739068985, + "loss_iou": 0.5390625, + "loss_num": 0.041015625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 113539968, + "step": 1697 + }, + { + "epoch": 0.1926808510638298, + "grad_norm": 40.278533935546875, + "learning_rate": 5e-05, + "loss": 1.5407, + "num_input_tokens_seen": 113606072, + "step": 1698 + }, + { + "epoch": 0.1926808510638298, + "loss": 1.724462866783142, + "loss_ce": 0.011572219431400299, + "loss_iou": 0.73046875, + "loss_num": 0.051025390625, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 113606072, + "step": 1698 + }, + { + "epoch": 0.19279432624113474, + "grad_norm": 21.487552642822266, + "learning_rate": 5e-05, + "loss": 1.5828, + "num_input_tokens_seen": 113672084, + "step": 1699 + }, + { + "epoch": 0.19279432624113474, + "loss": 1.7643132209777832, + "loss_ce": 0.0074772401712834835, + "loss_iou": 0.75, + "loss_num": 0.050537109375, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 113672084, + "step": 1699 + }, + { + "epoch": 0.19290780141843972, + "grad_norm": 17.27366065979004, + "learning_rate": 5e-05, + "loss": 1.2954, + "num_input_tokens_seen": 113738880, + "step": 1700 + }, + { + "epoch": 0.19290780141843972, + "loss": 1.2565821409225464, + "loss_ce": 0.002675952622666955, + "loss_iou": 0.56640625, + "loss_num": 0.023681640625, + "loss_xval": 1.25, + "num_input_tokens_seen": 113738880, + "step": 1700 + }, + { + "epoch": 0.19302127659574467, + "grad_norm": 24.10215187072754, + "learning_rate": 5e-05, + "loss": 1.4849, + "num_input_tokens_seen": 113806284, + "step": 1701 + }, + { + "epoch": 0.19302127659574467, + "loss": 1.3677277565002441, + "loss_ce": 0.006399650126695633, + "loss_iou": 0.58984375, + "loss_num": 0.035888671875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 113806284, + "step": 1701 + }, + { + "epoch": 0.19313475177304965, + "grad_norm": 23.356067657470703, + "learning_rate": 5e-05, + "loss": 1.338, + "num_input_tokens_seen": 113873276, + "step": 1702 + }, + { + "epoch": 0.19313475177304965, + "loss": 1.3461531400680542, + "loss_ce": 0.0043562352657318115, + "loss_iou": 0.55078125, + "loss_num": 0.04833984375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 113873276, + "step": 1702 + }, + { + "epoch": 0.19324822695035462, + "grad_norm": 35.618595123291016, + "learning_rate": 5e-05, + "loss": 1.4644, + "num_input_tokens_seen": 113939964, + "step": 1703 + }, + { + "epoch": 0.19324822695035462, + "loss": 1.4984759092330933, + "loss_ce": 0.0053118453361094, + "loss_iou": 0.66015625, + "loss_num": 0.03466796875, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 113939964, + "step": 1703 + }, + { + "epoch": 0.19336170212765957, + "grad_norm": 23.256681442260742, + "learning_rate": 5e-05, + "loss": 1.5197, + "num_input_tokens_seen": 114005864, + "step": 1704 + }, + { + "epoch": 0.19336170212765957, + "loss": 1.5505738258361816, + "loss_ce": 0.006628434173762798, + "loss_iou": 0.63671875, + "loss_num": 0.0537109375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 114005864, + "step": 1704 + }, + { + "epoch": 0.19347517730496455, + "grad_norm": 70.02191162109375, + "learning_rate": 5e-05, + "loss": 1.6412, + "num_input_tokens_seen": 114072252, + "step": 1705 + }, + { + "epoch": 0.19347517730496455, + "loss": 1.489469051361084, + "loss_ce": 0.003140921238809824, + "loss_iou": 0.65625, + "loss_num": 0.03515625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 114072252, + "step": 1705 + }, + { + "epoch": 0.1935886524822695, + "grad_norm": 25.170183181762695, + "learning_rate": 5e-05, + "loss": 1.6592, + "num_input_tokens_seen": 114139864, + "step": 1706 + }, + { + "epoch": 0.1935886524822695, + "loss": 1.5053844451904297, + "loss_ce": 0.005384416319429874, + "loss_iou": 0.6875, + "loss_num": 0.0252685546875, + "loss_xval": 1.5, + "num_input_tokens_seen": 114139864, + "step": 1706 + }, + { + "epoch": 0.19370212765957448, + "grad_norm": 16.36402130126953, + "learning_rate": 5e-05, + "loss": 1.1647, + "num_input_tokens_seen": 114206436, + "step": 1707 + }, + { + "epoch": 0.19370212765957448, + "loss": 1.2931904792785645, + "loss_ce": 0.007057568058371544, + "loss_iou": 0.55859375, + "loss_num": 0.033447265625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 114206436, + "step": 1707 + }, + { + "epoch": 0.19381560283687943, + "grad_norm": 37.36726379394531, + "learning_rate": 5e-05, + "loss": 1.4568, + "num_input_tokens_seen": 114273208, + "step": 1708 + }, + { + "epoch": 0.19381560283687943, + "loss": 1.5170038938522339, + "loss_ce": 0.004308554343879223, + "loss_iou": 0.66015625, + "loss_num": 0.038818359375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 114273208, + "step": 1708 + }, + { + "epoch": 0.1939290780141844, + "grad_norm": 22.09754753112793, + "learning_rate": 5e-05, + "loss": 1.5589, + "num_input_tokens_seen": 114340552, + "step": 1709 + }, + { + "epoch": 0.1939290780141844, + "loss": 1.3234261274337769, + "loss_ce": 0.003601813456043601, + "loss_iou": 0.5625, + "loss_num": 0.03955078125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 114340552, + "step": 1709 + }, + { + "epoch": 0.19404255319148936, + "grad_norm": 18.837535858154297, + "learning_rate": 5e-05, + "loss": 1.204, + "num_input_tokens_seen": 114407840, + "step": 1710 + }, + { + "epoch": 0.19404255319148936, + "loss": 1.3341768980026245, + "loss_ce": 0.007028486579656601, + "loss_iou": 0.55859375, + "loss_num": 0.04248046875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 114407840, + "step": 1710 + }, + { + "epoch": 0.19415602836879434, + "grad_norm": 32.701656341552734, + "learning_rate": 5e-05, + "loss": 1.2725, + "num_input_tokens_seen": 114473876, + "step": 1711 + }, + { + "epoch": 0.19415602836879434, + "loss": 1.11211097240448, + "loss_ce": 0.005665620788931847, + "loss_iou": 0.470703125, + "loss_num": 0.033203125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 114473876, + "step": 1711 + }, + { + "epoch": 0.1942695035460993, + "grad_norm": 24.164703369140625, + "learning_rate": 5e-05, + "loss": 1.6067, + "num_input_tokens_seen": 114541364, + "step": 1712 + }, + { + "epoch": 0.1942695035460993, + "loss": 1.5400677919387817, + "loss_ce": 0.003934991545975208, + "loss_iou": 0.6171875, + "loss_num": 0.059326171875, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 114541364, + "step": 1712 + }, + { + "epoch": 0.19438297872340426, + "grad_norm": 22.300947189331055, + "learning_rate": 5e-05, + "loss": 1.4698, + "num_input_tokens_seen": 114608284, + "step": 1713 + }, + { + "epoch": 0.19438297872340426, + "loss": 1.5829944610595703, + "loss_ce": 0.008775720372796059, + "loss_iou": 0.625, + "loss_num": 0.06396484375, + "loss_xval": 1.578125, + "num_input_tokens_seen": 114608284, + "step": 1713 + }, + { + "epoch": 0.19449645390070922, + "grad_norm": 19.64942169189453, + "learning_rate": 5e-05, + "loss": 1.4072, + "num_input_tokens_seen": 114674804, + "step": 1714 + }, + { + "epoch": 0.19449645390070922, + "loss": 1.5354655981063843, + "loss_ce": 0.005192145239561796, + "loss_iou": 0.640625, + "loss_num": 0.049072265625, + "loss_xval": 1.53125, + "num_input_tokens_seen": 114674804, + "step": 1714 + }, + { + "epoch": 0.1946099290780142, + "grad_norm": 31.398386001586914, + "learning_rate": 5e-05, + "loss": 1.1942, + "num_input_tokens_seen": 114741212, + "step": 1715 + }, + { + "epoch": 0.1946099290780142, + "loss": 1.3276050090789795, + "loss_ce": 0.0028979810886085033, + "loss_iou": 0.5390625, + "loss_num": 0.0498046875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 114741212, + "step": 1715 + }, + { + "epoch": 0.19472340425531914, + "grad_norm": 19.83119010925293, + "learning_rate": 5e-05, + "loss": 1.4056, + "num_input_tokens_seen": 114807476, + "step": 1716 + }, + { + "epoch": 0.19472340425531914, + "loss": 1.3536689281463623, + "loss_ce": 0.008698212914168835, + "loss_iou": 0.5234375, + "loss_num": 0.059326171875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 114807476, + "step": 1716 + }, + { + "epoch": 0.19483687943262412, + "grad_norm": 20.462350845336914, + "learning_rate": 5e-05, + "loss": 1.2228, + "num_input_tokens_seen": 114874588, + "step": 1717 + }, + { + "epoch": 0.19483687943262412, + "loss": 1.1102286577224731, + "loss_ce": 0.0025626695714890957, + "loss_iou": 0.50390625, + "loss_num": 0.0198974609375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 114874588, + "step": 1717 + }, + { + "epoch": 0.19495035460992907, + "grad_norm": 42.206199645996094, + "learning_rate": 5e-05, + "loss": 1.289, + "num_input_tokens_seen": 114941252, + "step": 1718 + }, + { + "epoch": 0.19495035460992907, + "loss": 1.2392356395721436, + "loss_ce": 0.0019309679046273232, + "loss_iou": 0.55859375, + "loss_num": 0.023681640625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 114941252, + "step": 1718 + }, + { + "epoch": 0.19506382978723405, + "grad_norm": 18.426475524902344, + "learning_rate": 5e-05, + "loss": 1.4565, + "num_input_tokens_seen": 115007300, + "step": 1719 + }, + { + "epoch": 0.19506382978723405, + "loss": 1.6026877164840698, + "loss_ce": 0.007961135357618332, + "loss_iou": 0.6640625, + "loss_num": 0.0537109375, + "loss_xval": 1.59375, + "num_input_tokens_seen": 115007300, + "step": 1719 + }, + { + "epoch": 0.195177304964539, + "grad_norm": 26.856901168823242, + "learning_rate": 5e-05, + "loss": 1.2079, + "num_input_tokens_seen": 115074056, + "step": 1720 + }, + { + "epoch": 0.195177304964539, + "loss": 1.1729650497436523, + "loss_ce": 0.002554918872192502, + "loss_iou": 0.5234375, + "loss_num": 0.0247802734375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 115074056, + "step": 1720 + }, + { + "epoch": 0.19529078014184398, + "grad_norm": 21.58884048461914, + "learning_rate": 5e-05, + "loss": 1.2878, + "num_input_tokens_seen": 115140504, + "step": 1721 + }, + { + "epoch": 0.19529078014184398, + "loss": 1.1125156879425049, + "loss_ce": 0.005261698737740517, + "loss_iou": 0.44140625, + "loss_num": 0.044677734375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 115140504, + "step": 1721 + }, + { + "epoch": 0.19540425531914893, + "grad_norm": 36.84272384643555, + "learning_rate": 5e-05, + "loss": 1.3566, + "num_input_tokens_seen": 115207160, + "step": 1722 + }, + { + "epoch": 0.19540425531914893, + "loss": 1.386744499206543, + "loss_ce": 0.0029554462525993586, + "loss_iou": 0.6015625, + "loss_num": 0.037109375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 115207160, + "step": 1722 + }, + { + "epoch": 0.1955177304964539, + "grad_norm": 17.557619094848633, + "learning_rate": 5e-05, + "loss": 1.6308, + "num_input_tokens_seen": 115273328, + "step": 1723 + }, + { + "epoch": 0.1955177304964539, + "loss": 1.5449483394622803, + "loss_ce": 0.005031340755522251, + "loss_iou": 0.66015625, + "loss_num": 0.0439453125, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 115273328, + "step": 1723 + }, + { + "epoch": 0.19563120567375886, + "grad_norm": 22.385009765625, + "learning_rate": 5e-05, + "loss": 1.416, + "num_input_tokens_seen": 115340516, + "step": 1724 + }, + { + "epoch": 0.19563120567375886, + "loss": 1.340869665145874, + "loss_ce": 0.00688538933172822, + "loss_iou": 0.5703125, + "loss_num": 0.039306640625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 115340516, + "step": 1724 + }, + { + "epoch": 0.19574468085106383, + "grad_norm": 51.01015853881836, + "learning_rate": 5e-05, + "loss": 1.4319, + "num_input_tokens_seen": 115406816, + "step": 1725 + }, + { + "epoch": 0.19574468085106383, + "loss": 1.455830693244934, + "loss_ce": 0.005147084128111601, + "loss_iou": 0.6484375, + "loss_num": 0.030517578125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 115406816, + "step": 1725 + }, + { + "epoch": 0.19585815602836879, + "grad_norm": 23.323810577392578, + "learning_rate": 5e-05, + "loss": 1.5192, + "num_input_tokens_seen": 115473700, + "step": 1726 + }, + { + "epoch": 0.19585815602836879, + "loss": 1.5938477516174316, + "loss_ce": 0.006933695636689663, + "loss_iou": 0.69921875, + "loss_num": 0.038330078125, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 115473700, + "step": 1726 + }, + { + "epoch": 0.19597163120567376, + "grad_norm": 19.39344024658203, + "learning_rate": 5e-05, + "loss": 1.2431, + "num_input_tokens_seen": 115540328, + "step": 1727 + }, + { + "epoch": 0.19597163120567376, + "loss": 1.1502629518508911, + "loss_ce": 0.004266845062375069, + "loss_iou": 0.48046875, + "loss_num": 0.036865234375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 115540328, + "step": 1727 + }, + { + "epoch": 0.1960851063829787, + "grad_norm": 24.28586196899414, + "learning_rate": 5e-05, + "loss": 1.3477, + "num_input_tokens_seen": 115607432, + "step": 1728 + }, + { + "epoch": 0.1960851063829787, + "loss": 1.335130214691162, + "loss_ce": 0.004075503442436457, + "loss_iou": 0.57421875, + "loss_num": 0.0361328125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 115607432, + "step": 1728 + }, + { + "epoch": 0.1961985815602837, + "grad_norm": 47.48444366455078, + "learning_rate": 5e-05, + "loss": 1.5712, + "num_input_tokens_seen": 115673836, + "step": 1729 + }, + { + "epoch": 0.1961985815602837, + "loss": 1.7704720497131348, + "loss_ce": 0.0028938758186995983, + "loss_iou": 0.7578125, + "loss_num": 0.051025390625, + "loss_xval": 1.765625, + "num_input_tokens_seen": 115673836, + "step": 1729 + }, + { + "epoch": 0.19631205673758864, + "grad_norm": 24.225187301635742, + "learning_rate": 5e-05, + "loss": 1.6693, + "num_input_tokens_seen": 115740856, + "step": 1730 + }, + { + "epoch": 0.19631205673758864, + "loss": 1.7975823879241943, + "loss_ce": 0.006566651165485382, + "loss_iou": 0.7890625, + "loss_num": 0.043212890625, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 115740856, + "step": 1730 + }, + { + "epoch": 0.19642553191489362, + "grad_norm": 29.333812713623047, + "learning_rate": 5e-05, + "loss": 1.1893, + "num_input_tokens_seen": 115807600, + "step": 1731 + }, + { + "epoch": 0.19642553191489362, + "loss": 1.303526520729065, + "loss_ce": 0.004698349628597498, + "loss_iou": 0.57421875, + "loss_num": 0.0303955078125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 115807600, + "step": 1731 + }, + { + "epoch": 0.19653900709219857, + "grad_norm": 31.053621292114258, + "learning_rate": 5e-05, + "loss": 1.4339, + "num_input_tokens_seen": 115874824, + "step": 1732 + }, + { + "epoch": 0.19653900709219857, + "loss": 1.371816873550415, + "loss_ce": 0.006582492031157017, + "loss_iou": 0.5859375, + "loss_num": 0.03857421875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 115874824, + "step": 1732 + }, + { + "epoch": 0.19665248226950355, + "grad_norm": 32.01290512084961, + "learning_rate": 5e-05, + "loss": 1.608, + "num_input_tokens_seen": 115941736, + "step": 1733 + }, + { + "epoch": 0.19665248226950355, + "loss": 1.7825359106063843, + "loss_ce": 0.003239138051867485, + "loss_iou": 0.78125, + "loss_num": 0.04296875, + "loss_xval": 1.78125, + "num_input_tokens_seen": 115941736, + "step": 1733 + }, + { + "epoch": 0.1967659574468085, + "grad_norm": 24.080772399902344, + "learning_rate": 5e-05, + "loss": 1.47, + "num_input_tokens_seen": 116007740, + "step": 1734 + }, + { + "epoch": 0.1967659574468085, + "loss": 1.660369873046875, + "loss_ce": 0.006072975695133209, + "loss_iou": 0.67578125, + "loss_num": 0.06005859375, + "loss_xval": 1.65625, + "num_input_tokens_seen": 116007740, + "step": 1734 + }, + { + "epoch": 0.19687943262411348, + "grad_norm": 15.187326431274414, + "learning_rate": 5e-05, + "loss": 1.2495, + "num_input_tokens_seen": 116074608, + "step": 1735 + }, + { + "epoch": 0.19687943262411348, + "loss": 1.372673511505127, + "loss_ce": 0.020134471356868744, + "loss_iou": 0.53125, + "loss_num": 0.058349609375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 116074608, + "step": 1735 + }, + { + "epoch": 0.19699290780141843, + "grad_norm": 18.296707153320312, + "learning_rate": 5e-05, + "loss": 1.1015, + "num_input_tokens_seen": 116141452, + "step": 1736 + }, + { + "epoch": 0.19699290780141843, + "loss": 0.8486695885658264, + "loss_ce": 0.006628589704632759, + "loss_iou": 0.36328125, + "loss_num": 0.022705078125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 116141452, + "step": 1736 + }, + { + "epoch": 0.1971063829787234, + "grad_norm": 24.77526092529297, + "learning_rate": 5e-05, + "loss": 1.4209, + "num_input_tokens_seen": 116207924, + "step": 1737 + }, + { + "epoch": 0.1971063829787234, + "loss": 1.4749963283538818, + "loss_ce": 0.004781579133123159, + "loss_iou": 0.6484375, + "loss_num": 0.033935546875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 116207924, + "step": 1737 + }, + { + "epoch": 0.19721985815602838, + "grad_norm": 21.337480545043945, + "learning_rate": 5e-05, + "loss": 1.5985, + "num_input_tokens_seen": 116274716, + "step": 1738 + }, + { + "epoch": 0.19721985815602838, + "loss": 1.51265287399292, + "loss_ce": 0.006793412379920483, + "loss_iou": 0.65625, + "loss_num": 0.038330078125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 116274716, + "step": 1738 + }, + { + "epoch": 0.19733333333333333, + "grad_norm": 28.64930534362793, + "learning_rate": 5e-05, + "loss": 1.3596, + "num_input_tokens_seen": 116342120, + "step": 1739 + }, + { + "epoch": 0.19733333333333333, + "loss": 1.2596579790115356, + "loss_ce": 0.003798652905970812, + "loss_iou": 0.5390625, + "loss_num": 0.03466796875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 116342120, + "step": 1739 + }, + { + "epoch": 0.1974468085106383, + "grad_norm": 24.923351287841797, + "learning_rate": 5e-05, + "loss": 1.268, + "num_input_tokens_seen": 116409652, + "step": 1740 + }, + { + "epoch": 0.1974468085106383, + "loss": 1.2260942459106445, + "loss_ce": 0.003437922801822424, + "loss_iou": 0.52734375, + "loss_num": 0.033447265625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 116409652, + "step": 1740 + }, + { + "epoch": 0.19756028368794326, + "grad_norm": 36.14959716796875, + "learning_rate": 5e-05, + "loss": 1.4857, + "num_input_tokens_seen": 116476224, + "step": 1741 + }, + { + "epoch": 0.19756028368794326, + "loss": 1.4588322639465332, + "loss_ce": 0.0076603046618402, + "loss_iou": 0.6171875, + "loss_num": 0.043212890625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 116476224, + "step": 1741 + }, + { + "epoch": 0.19767375886524824, + "grad_norm": 22.151769638061523, + "learning_rate": 5e-05, + "loss": 1.6049, + "num_input_tokens_seen": 116544956, + "step": 1742 + }, + { + "epoch": 0.19767375886524824, + "loss": 1.4603639841079712, + "loss_ce": 0.004309364594519138, + "loss_iou": 0.65625, + "loss_num": 0.0294189453125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 116544956, + "step": 1742 + }, + { + "epoch": 0.1977872340425532, + "grad_norm": 24.298423767089844, + "learning_rate": 5e-05, + "loss": 1.2029, + "num_input_tokens_seen": 116611548, + "step": 1743 + }, + { + "epoch": 0.1977872340425532, + "loss": 1.2924634218215942, + "loss_ce": 0.007795431651175022, + "loss_iou": 0.5234375, + "loss_num": 0.048095703125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 116611548, + "step": 1743 + }, + { + "epoch": 0.19790070921985817, + "grad_norm": 19.289196014404297, + "learning_rate": 5e-05, + "loss": 1.1724, + "num_input_tokens_seen": 116679060, + "step": 1744 + }, + { + "epoch": 0.19790070921985817, + "loss": 1.1193203926086426, + "loss_ce": 0.0040860846638679504, + "loss_iou": 0.494140625, + "loss_num": 0.0255126953125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 116679060, + "step": 1744 + }, + { + "epoch": 0.19801418439716312, + "grad_norm": 31.51619529724121, + "learning_rate": 5e-05, + "loss": 1.3096, + "num_input_tokens_seen": 116745580, + "step": 1745 + }, + { + "epoch": 0.19801418439716312, + "loss": 1.4624607563018799, + "loss_ce": 0.005429489072412252, + "loss_iou": 0.59765625, + "loss_num": 0.05224609375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 116745580, + "step": 1745 + }, + { + "epoch": 0.1981276595744681, + "grad_norm": 24.555702209472656, + "learning_rate": 5e-05, + "loss": 1.549, + "num_input_tokens_seen": 116812088, + "step": 1746 + }, + { + "epoch": 0.1981276595744681, + "loss": 1.5721526145935059, + "loss_ce": 0.0037931951228529215, + "loss_iou": 0.6640625, + "loss_num": 0.047607421875, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 116812088, + "step": 1746 + }, + { + "epoch": 0.19824113475177305, + "grad_norm": 18.738134384155273, + "learning_rate": 5e-05, + "loss": 1.2859, + "num_input_tokens_seen": 116878496, + "step": 1747 + }, + { + "epoch": 0.19824113475177305, + "loss": 1.2310067415237427, + "loss_ce": 0.0051766191609203815, + "loss_iou": 0.546875, + "loss_num": 0.02734375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 116878496, + "step": 1747 + }, + { + "epoch": 0.19835460992907802, + "grad_norm": 22.39378547668457, + "learning_rate": 5e-05, + "loss": 1.4618, + "num_input_tokens_seen": 116944932, + "step": 1748 + }, + { + "epoch": 0.19835460992907802, + "loss": 1.5523772239685059, + "loss_ce": 0.00550219789147377, + "loss_iou": 0.63671875, + "loss_num": 0.05517578125, + "loss_xval": 1.546875, + "num_input_tokens_seen": 116944932, + "step": 1748 + }, + { + "epoch": 0.19846808510638297, + "grad_norm": 27.173500061035156, + "learning_rate": 5e-05, + "loss": 1.3209, + "num_input_tokens_seen": 117011296, + "step": 1749 + }, + { + "epoch": 0.19846808510638297, + "loss": 1.3161981105804443, + "loss_ce": 0.0035760903265327215, + "loss_iou": 0.56640625, + "loss_num": 0.036376953125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 117011296, + "step": 1749 + }, + { + "epoch": 0.19858156028368795, + "grad_norm": 24.563825607299805, + "learning_rate": 5e-05, + "loss": 1.3188, + "num_input_tokens_seen": 117078700, + "step": 1750 + }, + { + "epoch": 0.19858156028368795, + "eval_seeclick_CIoU": 0.40624378621578217, + "eval_seeclick_GIoU": 0.37907280027866364, + "eval_seeclick_IoU": 0.48010386526584625, + "eval_seeclick_MAE_all": 0.1390305608510971, + "eval_seeclick_MAE_h": 0.05916961468756199, + "eval_seeclick_MAE_w": 0.15215174853801727, + "eval_seeclick_MAE_x_boxes": 0.2222456932067871, + "eval_seeclick_MAE_y_boxes": 0.09547068178653717, + "eval_seeclick_NUM_probability": 0.9999082088470459, + "eval_seeclick_inside_bbox": 0.7083333432674408, + "eval_seeclick_loss": 2.5150251388549805, + "eval_seeclick_loss_ce": 0.01665485091507435, + "eval_seeclick_loss_iou": 0.8905029296875, + "eval_seeclick_loss_num": 0.14373016357421875, + "eval_seeclick_loss_xval": 2.500244140625, + "eval_seeclick_runtime": 71.8059, + "eval_seeclick_samples_per_second": 0.655, + "eval_seeclick_steps_per_second": 0.028, + "num_input_tokens_seen": 117078700, + "step": 1750 + }, + { + "epoch": 0.19858156028368795, + "eval_icons_CIoU": 0.5433892011642456, + "eval_icons_GIoU": 0.5343434810638428, + "eval_icons_IoU": 0.5684720873832703, + "eval_icons_MAE_all": 0.11533700674772263, + "eval_icons_MAE_h": 0.10853087902069092, + "eval_icons_MAE_w": 0.1204865574836731, + "eval_icons_MAE_x_boxes": 0.07463653013110161, + "eval_icons_MAE_y_boxes": 0.06945887207984924, + "eval_icons_NUM_probability": 0.999940812587738, + "eval_icons_inside_bbox": 0.8975694477558136, + "eval_icons_loss": 2.366302013397217, + "eval_icons_loss_ce": 4.331818581704283e-05, + "eval_icons_loss_iou": 0.90380859375, + "eval_icons_loss_num": 0.11038589477539062, + "eval_icons_loss_xval": 2.35791015625, + "eval_icons_runtime": 74.7442, + "eval_icons_samples_per_second": 0.669, + "eval_icons_steps_per_second": 0.027, + "num_input_tokens_seen": 117078700, + "step": 1750 + }, + { + "epoch": 0.19858156028368795, + "eval_screenspot_CIoU": 0.4026324252287547, + "eval_screenspot_GIoU": 0.38486941655476886, + "eval_screenspot_IoU": 0.46728434165318805, + "eval_screenspot_MAE_all": 0.15543009837468466, + "eval_screenspot_MAE_h": 0.09798002739747365, + "eval_screenspot_MAE_w": 0.1844031016031901, + "eval_screenspot_MAE_x_boxes": 0.19654945035775503, + "eval_screenspot_MAE_y_boxes": 0.08721882104873657, + "eval_screenspot_NUM_probability": 0.9998394449551901, + "eval_screenspot_inside_bbox": 0.725000003973643, + "eval_screenspot_loss": 2.6471831798553467, + "eval_screenspot_loss_ce": 0.014615753665566444, + "eval_screenspot_loss_iou": 0.94482421875, + "eval_screenspot_loss_num": 0.164093017578125, + "eval_screenspot_loss_xval": 2.7099609375, + "eval_screenspot_runtime": 123.3207, + "eval_screenspot_samples_per_second": 0.722, + "eval_screenspot_steps_per_second": 0.024, + "num_input_tokens_seen": 117078700, + "step": 1750 + }, + { + "epoch": 0.19858156028368795, + "eval_compot_CIoU": 0.3692883402109146, + "eval_compot_GIoU": 0.34662632644176483, + "eval_compot_IoU": 0.44485072791576385, + "eval_compot_MAE_all": 0.16543706506490707, + "eval_compot_MAE_h": 0.09690085053443909, + "eval_compot_MAE_w": 0.23519432544708252, + "eval_compot_MAE_x_boxes": 0.13115395978093147, + "eval_compot_MAE_y_boxes": 0.14017460867762566, + "eval_compot_NUM_probability": 0.9998684823513031, + "eval_compot_inside_bbox": 0.5902777910232544, + "eval_compot_loss": 2.83296275138855, + "eval_compot_loss_ce": 0.007954437751322985, + "eval_compot_loss_iou": 1.015869140625, + "eval_compot_loss_num": 0.143585205078125, + "eval_compot_loss_xval": 2.74853515625, + "eval_compot_runtime": 68.7992, + "eval_compot_samples_per_second": 0.727, + "eval_compot_steps_per_second": 0.029, + "num_input_tokens_seen": 117078700, + "step": 1750 + }, + { + "epoch": 0.19858156028368795, + "loss": 2.729569911956787, + "loss_ce": 0.005936935544013977, + "loss_iou": 0.9921875, + "loss_num": 0.1484375, + "loss_xval": 2.71875, + "num_input_tokens_seen": 117078700, + "step": 1750 + }, + { + "epoch": 0.1986950354609929, + "grad_norm": 21.978471755981445, + "learning_rate": 5e-05, + "loss": 1.2915, + "num_input_tokens_seen": 117145056, + "step": 1751 + }, + { + "epoch": 0.1986950354609929, + "loss": 1.2878117561340332, + "loss_ce": 0.006561813410371542, + "loss_iou": 0.53515625, + "loss_num": 0.04248046875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 117145056, + "step": 1751 + }, + { + "epoch": 0.19880851063829788, + "grad_norm": 30.830846786499023, + "learning_rate": 5e-05, + "loss": 1.2967, + "num_input_tokens_seen": 117211196, + "step": 1752 + }, + { + "epoch": 0.19880851063829788, + "loss": 1.221449851989746, + "loss_ce": 0.004164587706327438, + "loss_iou": 0.55078125, + "loss_num": 0.023681640625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 117211196, + "step": 1752 + }, + { + "epoch": 0.19892198581560283, + "grad_norm": 34.89544677734375, + "learning_rate": 5e-05, + "loss": 1.3959, + "num_input_tokens_seen": 117277816, + "step": 1753 + }, + { + "epoch": 0.19892198581560283, + "loss": 1.3691117763519287, + "loss_ce": 0.0043656593188643456, + "loss_iou": 0.609375, + "loss_num": 0.02978515625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 117277816, + "step": 1753 + }, + { + "epoch": 0.1990354609929078, + "grad_norm": 30.407289505004883, + "learning_rate": 5e-05, + "loss": 1.5261, + "num_input_tokens_seen": 117345104, + "step": 1754 + }, + { + "epoch": 0.1990354609929078, + "loss": 1.6776745319366455, + "loss_ce": 0.003846498904749751, + "loss_iou": 0.703125, + "loss_num": 0.05419921875, + "loss_xval": 1.671875, + "num_input_tokens_seen": 117345104, + "step": 1754 + }, + { + "epoch": 0.19914893617021276, + "grad_norm": 19.009599685668945, + "learning_rate": 5e-05, + "loss": 1.3012, + "num_input_tokens_seen": 117412416, + "step": 1755 + }, + { + "epoch": 0.19914893617021276, + "loss": 1.2359510660171509, + "loss_ce": 0.0025526690296828747, + "loss_iou": 0.54296875, + "loss_num": 0.0299072265625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 117412416, + "step": 1755 + }, + { + "epoch": 0.19926241134751774, + "grad_norm": 15.663090705871582, + "learning_rate": 5e-05, + "loss": 1.2586, + "num_input_tokens_seen": 117481012, + "step": 1756 + }, + { + "epoch": 0.19926241134751774, + "loss": 1.2012040615081787, + "loss_ce": 0.001985328970476985, + "loss_iou": 0.53125, + "loss_num": 0.02685546875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 117481012, + "step": 1756 + }, + { + "epoch": 0.1993758865248227, + "grad_norm": 28.103679656982422, + "learning_rate": 5e-05, + "loss": 1.273, + "num_input_tokens_seen": 117548120, + "step": 1757 + }, + { + "epoch": 0.1993758865248227, + "loss": 1.3319841623306274, + "loss_ce": 0.008741993457078934, + "loss_iou": 0.5625, + "loss_num": 0.039794921875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 117548120, + "step": 1757 + }, + { + "epoch": 0.19948936170212767, + "grad_norm": 24.328025817871094, + "learning_rate": 5e-05, + "loss": 1.2016, + "num_input_tokens_seen": 117616036, + "step": 1758 + }, + { + "epoch": 0.19948936170212767, + "loss": 1.1278156042099, + "loss_ce": 0.006477711722254753, + "loss_iou": 0.48828125, + "loss_num": 0.029052734375, + "loss_xval": 1.125, + "num_input_tokens_seen": 117616036, + "step": 1758 + }, + { + "epoch": 0.19960283687943262, + "grad_norm": 27.254085540771484, + "learning_rate": 5e-05, + "loss": 1.2772, + "num_input_tokens_seen": 117682928, + "step": 1759 + }, + { + "epoch": 0.19960283687943262, + "loss": 1.23945951461792, + "loss_ce": 0.003131409175693989, + "loss_iou": 0.5390625, + "loss_num": 0.032470703125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 117682928, + "step": 1759 + }, + { + "epoch": 0.1997163120567376, + "grad_norm": 24.71839714050293, + "learning_rate": 5e-05, + "loss": 1.6028, + "num_input_tokens_seen": 117750476, + "step": 1760 + }, + { + "epoch": 0.1997163120567376, + "loss": 1.6130374670028687, + "loss_ce": 0.006592154502868652, + "loss_iou": 0.66015625, + "loss_num": 0.0576171875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 117750476, + "step": 1760 + }, + { + "epoch": 0.19982978723404254, + "grad_norm": 22.693586349487305, + "learning_rate": 5e-05, + "loss": 1.4442, + "num_input_tokens_seen": 117818216, + "step": 1761 + }, + { + "epoch": 0.19982978723404254, + "loss": 1.5019510984420776, + "loss_ce": 0.007322258781641722, + "loss_iou": 0.65234375, + "loss_num": 0.03759765625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 117818216, + "step": 1761 + }, + { + "epoch": 0.19994326241134752, + "grad_norm": 20.760162353515625, + "learning_rate": 5e-05, + "loss": 1.179, + "num_input_tokens_seen": 117884608, + "step": 1762 + }, + { + "epoch": 0.19994326241134752, + "loss": 1.0646922588348389, + "loss_ce": 0.008051667362451553, + "loss_iou": 0.431640625, + "loss_num": 0.038330078125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 117884608, + "step": 1762 + }, + { + "epoch": 0.20005673758865247, + "grad_norm": 33.367244720458984, + "learning_rate": 5e-05, + "loss": 1.4633, + "num_input_tokens_seen": 117951668, + "step": 1763 + }, + { + "epoch": 0.20005673758865247, + "loss": 1.5685003995895386, + "loss_ce": 0.0040472932159900665, + "loss_iou": 0.67578125, + "loss_num": 0.043212890625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 117951668, + "step": 1763 + }, + { + "epoch": 0.20017021276595745, + "grad_norm": 30.502605438232422, + "learning_rate": 5e-05, + "loss": 1.6157, + "num_input_tokens_seen": 118018444, + "step": 1764 + }, + { + "epoch": 0.20017021276595745, + "loss": 1.5112318992614746, + "loss_ce": 0.01220841333270073, + "loss_iou": 0.6484375, + "loss_num": 0.0400390625, + "loss_xval": 1.5, + "num_input_tokens_seen": 118018444, + "step": 1764 + }, + { + "epoch": 0.2002836879432624, + "grad_norm": 21.72279930114746, + "learning_rate": 5e-05, + "loss": 1.3427, + "num_input_tokens_seen": 118084840, + "step": 1765 + }, + { + "epoch": 0.2002836879432624, + "loss": 1.402201771736145, + "loss_ce": 0.004252564162015915, + "loss_iou": 0.59375, + "loss_num": 0.042236328125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 118084840, + "step": 1765 + }, + { + "epoch": 0.20039716312056738, + "grad_norm": 13.82384204864502, + "learning_rate": 5e-05, + "loss": 1.112, + "num_input_tokens_seen": 118151520, + "step": 1766 + }, + { + "epoch": 0.20039716312056738, + "loss": 0.9756250381469727, + "loss_ce": 0.003151876851916313, + "loss_iou": 0.423828125, + "loss_num": 0.0250244140625, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 118151520, + "step": 1766 + }, + { + "epoch": 0.20051063829787233, + "grad_norm": 29.599716186523438, + "learning_rate": 5e-05, + "loss": 1.2996, + "num_input_tokens_seen": 118218756, + "step": 1767 + }, + { + "epoch": 0.20051063829787233, + "loss": 1.4082927703857422, + "loss_ce": 0.003995891660451889, + "loss_iou": 0.54296875, + "loss_num": 0.0634765625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 118218756, + "step": 1767 + }, + { + "epoch": 0.2006241134751773, + "grad_norm": 24.57575798034668, + "learning_rate": 5e-05, + "loss": 1.4573, + "num_input_tokens_seen": 118286844, + "step": 1768 + }, + { + "epoch": 0.2006241134751773, + "loss": 1.361972451210022, + "loss_ce": 0.005527128931134939, + "loss_iou": 0.625, + "loss_num": 0.02197265625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 118286844, + "step": 1768 + }, + { + "epoch": 0.20073758865248226, + "grad_norm": 20.095434188842773, + "learning_rate": 5e-05, + "loss": 1.221, + "num_input_tokens_seen": 118352988, + "step": 1769 + }, + { + "epoch": 0.20073758865248226, + "loss": 1.2561373710632324, + "loss_ce": 0.004184145946055651, + "loss_iou": 0.484375, + "loss_num": 0.056884765625, + "loss_xval": 1.25, + "num_input_tokens_seen": 118352988, + "step": 1769 + }, + { + "epoch": 0.20085106382978724, + "grad_norm": 24.82451820373535, + "learning_rate": 5e-05, + "loss": 1.4537, + "num_input_tokens_seen": 118419088, + "step": 1770 + }, + { + "epoch": 0.20085106382978724, + "loss": 1.21501886844635, + "loss_ce": 0.010673204436898232, + "loss_iou": 0.474609375, + "loss_num": 0.05078125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 118419088, + "step": 1770 + }, + { + "epoch": 0.20096453900709219, + "grad_norm": 36.33395767211914, + "learning_rate": 5e-05, + "loss": 1.5811, + "num_input_tokens_seen": 118486432, + "step": 1771 + }, + { + "epoch": 0.20096453900709219, + "loss": 1.610029697418213, + "loss_ce": 0.009443771094083786, + "loss_iou": 0.68359375, + "loss_num": 0.0458984375, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 118486432, + "step": 1771 + }, + { + "epoch": 0.20107801418439716, + "grad_norm": 21.045467376708984, + "learning_rate": 5e-05, + "loss": 1.5886, + "num_input_tokens_seen": 118552508, + "step": 1772 + }, + { + "epoch": 0.20107801418439716, + "loss": 1.7561984062194824, + "loss_ce": 0.006198433227837086, + "loss_iou": 0.71875, + "loss_num": 0.0625, + "loss_xval": 1.75, + "num_input_tokens_seen": 118552508, + "step": 1772 + }, + { + "epoch": 0.2011914893617021, + "grad_norm": 14.697307586669922, + "learning_rate": 5e-05, + "loss": 1.1615, + "num_input_tokens_seen": 118620008, + "step": 1773 + }, + { + "epoch": 0.2011914893617021, + "loss": 1.1230484247207642, + "loss_ce": 0.002931199036538601, + "loss_iou": 0.46875, + "loss_num": 0.03662109375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 118620008, + "step": 1773 + }, + { + "epoch": 0.2013049645390071, + "grad_norm": 13.284406661987305, + "learning_rate": 5e-05, + "loss": 1.3735, + "num_input_tokens_seen": 118686872, + "step": 1774 + }, + { + "epoch": 0.2013049645390071, + "loss": 1.4962202310562134, + "loss_ce": 0.0037886088248342276, + "loss_iou": 0.62890625, + "loss_num": 0.046630859375, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 118686872, + "step": 1774 + }, + { + "epoch": 0.20141843971631207, + "grad_norm": 18.134517669677734, + "learning_rate": 5e-05, + "loss": 1.054, + "num_input_tokens_seen": 118754280, + "step": 1775 + }, + { + "epoch": 0.20141843971631207, + "loss": 1.0819215774536133, + "loss_ce": 0.013318000361323357, + "loss_iou": 0.4140625, + "loss_num": 0.04833984375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 118754280, + "step": 1775 + }, + { + "epoch": 0.20153191489361702, + "grad_norm": 23.225465774536133, + "learning_rate": 5e-05, + "loss": 1.3931, + "num_input_tokens_seen": 118821680, + "step": 1776 + }, + { + "epoch": 0.20153191489361702, + "loss": 1.4298555850982666, + "loss_ce": 0.0021211716812103987, + "loss_iou": 0.640625, + "loss_num": 0.0289306640625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 118821680, + "step": 1776 + }, + { + "epoch": 0.201645390070922, + "grad_norm": 26.690670013427734, + "learning_rate": 5e-05, + "loss": 1.2994, + "num_input_tokens_seen": 118888848, + "step": 1777 + }, + { + "epoch": 0.201645390070922, + "loss": 1.5093708038330078, + "loss_ce": 0.0020466893911361694, + "loss_iou": 0.6484375, + "loss_num": 0.041748046875, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 118888848, + "step": 1777 + }, + { + "epoch": 0.20175886524822695, + "grad_norm": 21.392913818359375, + "learning_rate": 5e-05, + "loss": 1.138, + "num_input_tokens_seen": 118955540, + "step": 1778 + }, + { + "epoch": 0.20175886524822695, + "loss": 1.1890579462051392, + "loss_ce": 0.002870170632377267, + "loss_iou": 0.46875, + "loss_num": 0.0498046875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 118955540, + "step": 1778 + }, + { + "epoch": 0.20187234042553193, + "grad_norm": 34.46961975097656, + "learning_rate": 5e-05, + "loss": 1.3248, + "num_input_tokens_seen": 119022484, + "step": 1779 + }, + { + "epoch": 0.20187234042553193, + "loss": 1.2637468576431274, + "loss_ce": 0.002516374457627535, + "loss_iou": 0.5859375, + "loss_num": 0.0181884765625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 119022484, + "step": 1779 + }, + { + "epoch": 0.20198581560283688, + "grad_norm": 24.621732711791992, + "learning_rate": 5e-05, + "loss": 1.5773, + "num_input_tokens_seen": 119088844, + "step": 1780 + }, + { + "epoch": 0.20198581560283688, + "loss": 1.7755317687988281, + "loss_ce": 0.006976981647312641, + "loss_iou": 0.76171875, + "loss_num": 0.0498046875, + "loss_xval": 1.765625, + "num_input_tokens_seen": 119088844, + "step": 1780 + }, + { + "epoch": 0.20209929078014185, + "grad_norm": 15.510872840881348, + "learning_rate": 5e-05, + "loss": 1.214, + "num_input_tokens_seen": 119155784, + "step": 1781 + }, + { + "epoch": 0.20209929078014185, + "loss": 1.2542948722839355, + "loss_ce": 0.009666087105870247, + "loss_iou": 0.52734375, + "loss_num": 0.0380859375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 119155784, + "step": 1781 + }, + { + "epoch": 0.2022127659574468, + "grad_norm": 28.12854766845703, + "learning_rate": 5e-05, + "loss": 1.2644, + "num_input_tokens_seen": 119222136, + "step": 1782 + }, + { + "epoch": 0.2022127659574468, + "loss": 1.173579216003418, + "loss_ce": 0.007563601713627577, + "loss_iou": 0.53515625, + "loss_num": 0.01904296875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 119222136, + "step": 1782 + }, + { + "epoch": 0.20232624113475178, + "grad_norm": 22.177644729614258, + "learning_rate": 5e-05, + "loss": 1.5567, + "num_input_tokens_seen": 119288484, + "step": 1783 + }, + { + "epoch": 0.20232624113475178, + "loss": 1.4824755191802979, + "loss_ce": 0.0015183768700808287, + "loss_iou": 0.640625, + "loss_num": 0.039794921875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 119288484, + "step": 1783 + }, + { + "epoch": 0.20243971631205673, + "grad_norm": 14.34970760345459, + "learning_rate": 5e-05, + "loss": 1.1679, + "num_input_tokens_seen": 119354952, + "step": 1784 + }, + { + "epoch": 0.20243971631205673, + "loss": 1.0343639850616455, + "loss_ce": 0.004090471658855677, + "loss_iou": 0.4453125, + "loss_num": 0.0279541015625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 119354952, + "step": 1784 + }, + { + "epoch": 0.2025531914893617, + "grad_norm": 21.225074768066406, + "learning_rate": 5e-05, + "loss": 1.2699, + "num_input_tokens_seen": 119422072, + "step": 1785 + }, + { + "epoch": 0.2025531914893617, + "loss": 1.2582910060882568, + "loss_ce": 0.001943225972354412, + "loss_iou": 0.52734375, + "loss_num": 0.04052734375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 119422072, + "step": 1785 + }, + { + "epoch": 0.20266666666666666, + "grad_norm": 29.937301635742188, + "learning_rate": 5e-05, + "loss": 1.549, + "num_input_tokens_seen": 119489188, + "step": 1786 + }, + { + "epoch": 0.20266666666666666, + "loss": 1.460572600364685, + "loss_ce": 0.006471062544733286, + "loss_iou": 0.62109375, + "loss_num": 0.042236328125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 119489188, + "step": 1786 + }, + { + "epoch": 0.20278014184397164, + "grad_norm": 22.083263397216797, + "learning_rate": 5e-05, + "loss": 1.5489, + "num_input_tokens_seen": 119556052, + "step": 1787 + }, + { + "epoch": 0.20278014184397164, + "loss": 1.649175763130188, + "loss_ce": 0.0056211212649941444, + "loss_iou": 0.73046875, + "loss_num": 0.03564453125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 119556052, + "step": 1787 + }, + { + "epoch": 0.2028936170212766, + "grad_norm": 35.23084259033203, + "learning_rate": 5e-05, + "loss": 1.2392, + "num_input_tokens_seen": 119622872, + "step": 1788 + }, + { + "epoch": 0.2028936170212766, + "loss": 1.3452527523040771, + "loss_ce": 0.004432412330061197, + "loss_iou": 0.55859375, + "loss_num": 0.04443359375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 119622872, + "step": 1788 + }, + { + "epoch": 0.20300709219858157, + "grad_norm": 31.23100471496582, + "learning_rate": 5e-05, + "loss": 1.5693, + "num_input_tokens_seen": 119689632, + "step": 1789 + }, + { + "epoch": 0.20300709219858157, + "loss": 1.4660370349884033, + "loss_ce": 0.0031464239582419395, + "loss_iou": 0.640625, + "loss_num": 0.037109375, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 119689632, + "step": 1789 + }, + { + "epoch": 0.20312056737588652, + "grad_norm": 26.703121185302734, + "learning_rate": 5e-05, + "loss": 1.2927, + "num_input_tokens_seen": 119756196, + "step": 1790 + }, + { + "epoch": 0.20312056737588652, + "loss": 1.3208117485046387, + "loss_ce": 0.00489372294396162, + "loss_iou": 0.54296875, + "loss_num": 0.046630859375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 119756196, + "step": 1790 + }, + { + "epoch": 0.2032340425531915, + "grad_norm": 23.128128051757812, + "learning_rate": 5e-05, + "loss": 1.4147, + "num_input_tokens_seen": 119823508, + "step": 1791 + }, + { + "epoch": 0.2032340425531915, + "loss": 1.288408637046814, + "loss_ce": 0.0027641397900879383, + "loss_iou": 0.53125, + "loss_num": 0.0439453125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 119823508, + "step": 1791 + }, + { + "epoch": 0.20334751773049645, + "grad_norm": 19.368749618530273, + "learning_rate": 5e-05, + "loss": 1.1317, + "num_input_tokens_seen": 119891064, + "step": 1792 + }, + { + "epoch": 0.20334751773049645, + "loss": 1.2716243267059326, + "loss_ce": 0.008440744131803513, + "loss_iou": 0.5234375, + "loss_num": 0.04248046875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 119891064, + "step": 1792 + }, + { + "epoch": 0.20346099290780142, + "grad_norm": 23.539398193359375, + "learning_rate": 5e-05, + "loss": 1.3076, + "num_input_tokens_seen": 119957588, + "step": 1793 + }, + { + "epoch": 0.20346099290780142, + "loss": 1.5255088806152344, + "loss_ce": 0.010372268036007881, + "loss_iou": 0.66015625, + "loss_num": 0.0390625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 119957588, + "step": 1793 + }, + { + "epoch": 0.20357446808510637, + "grad_norm": 27.867090225219727, + "learning_rate": 5e-05, + "loss": 1.3192, + "num_input_tokens_seen": 120024528, + "step": 1794 + }, + { + "epoch": 0.20357446808510637, + "loss": 1.244905710220337, + "loss_ce": 0.005159596912562847, + "loss_iou": 0.51953125, + "loss_num": 0.041015625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 120024528, + "step": 1794 + }, + { + "epoch": 0.20368794326241135, + "grad_norm": 20.595247268676758, + "learning_rate": 5e-05, + "loss": 1.5074, + "num_input_tokens_seen": 120092264, + "step": 1795 + }, + { + "epoch": 0.20368794326241135, + "loss": 1.6159480810165405, + "loss_ce": 0.003155106445774436, + "loss_iou": 0.640625, + "loss_num": 0.0654296875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 120092264, + "step": 1795 + }, + { + "epoch": 0.2038014184397163, + "grad_norm": 37.812007904052734, + "learning_rate": 5e-05, + "loss": 1.2444, + "num_input_tokens_seen": 120159352, + "step": 1796 + }, + { + "epoch": 0.2038014184397163, + "loss": 1.026224136352539, + "loss_ce": 0.005960481707006693, + "loss_iou": 0.439453125, + "loss_num": 0.0284423828125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 120159352, + "step": 1796 + }, + { + "epoch": 0.20391489361702128, + "grad_norm": 18.810779571533203, + "learning_rate": 5e-05, + "loss": 1.5641, + "num_input_tokens_seen": 120226912, + "step": 1797 + }, + { + "epoch": 0.20391489361702128, + "loss": 1.6238772869110107, + "loss_ce": 0.008643001317977905, + "loss_iou": 0.71484375, + "loss_num": 0.036376953125, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 120226912, + "step": 1797 + }, + { + "epoch": 0.20402836879432623, + "grad_norm": 21.62908363342285, + "learning_rate": 5e-05, + "loss": 1.312, + "num_input_tokens_seen": 120293128, + "step": 1798 + }, + { + "epoch": 0.20402836879432623, + "loss": 1.1984922885894775, + "loss_ce": 0.005132866557687521, + "loss_iou": 0.515625, + "loss_num": 0.031982421875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 120293128, + "step": 1798 + }, + { + "epoch": 0.2041418439716312, + "grad_norm": 28.877525329589844, + "learning_rate": 5e-05, + "loss": 1.3253, + "num_input_tokens_seen": 120360692, + "step": 1799 + }, + { + "epoch": 0.2041418439716312, + "loss": 1.0772494077682495, + "loss_ce": 0.0035189641639590263, + "loss_iou": 0.46484375, + "loss_num": 0.029296875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 120360692, + "step": 1799 + }, + { + "epoch": 0.20425531914893616, + "grad_norm": 25.471500396728516, + "learning_rate": 5e-05, + "loss": 1.4539, + "num_input_tokens_seen": 120427236, + "step": 1800 + }, + { + "epoch": 0.20425531914893616, + "loss": 1.4342007637023926, + "loss_ce": 0.010372553952038288, + "loss_iou": 0.64453125, + "loss_num": 0.02685546875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 120427236, + "step": 1800 + }, + { + "epoch": 0.20436879432624114, + "grad_norm": 32.74164581298828, + "learning_rate": 5e-05, + "loss": 1.1935, + "num_input_tokens_seen": 120495280, + "step": 1801 + }, + { + "epoch": 0.20436879432624114, + "loss": 1.1848630905151367, + "loss_ce": 0.003222428262233734, + "loss_iou": 0.51953125, + "loss_num": 0.0279541015625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 120495280, + "step": 1801 + }, + { + "epoch": 0.2044822695035461, + "grad_norm": 30.609399795532227, + "learning_rate": 5e-05, + "loss": 1.5562, + "num_input_tokens_seen": 120561556, + "step": 1802 + }, + { + "epoch": 0.2044822695035461, + "loss": 1.5777161121368408, + "loss_ce": 0.010333279147744179, + "loss_iou": 0.6484375, + "loss_num": 0.0546875, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 120561556, + "step": 1802 + }, + { + "epoch": 0.20459574468085107, + "grad_norm": 29.716848373413086, + "learning_rate": 5e-05, + "loss": 1.2107, + "num_input_tokens_seen": 120627804, + "step": 1803 + }, + { + "epoch": 0.20459574468085107, + "loss": 1.2168970108032227, + "loss_ce": 0.007424375973641872, + "loss_iou": 0.5078125, + "loss_num": 0.03955078125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 120627804, + "step": 1803 + }, + { + "epoch": 0.20470921985815602, + "grad_norm": 27.42653465270996, + "learning_rate": 5e-05, + "loss": 1.3263, + "num_input_tokens_seen": 120695008, + "step": 1804 + }, + { + "epoch": 0.20470921985815602, + "loss": 1.2192063331604004, + "loss_ce": 0.0019211958860978484, + "loss_iou": 0.53125, + "loss_num": 0.030029296875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 120695008, + "step": 1804 + }, + { + "epoch": 0.204822695035461, + "grad_norm": 40.22596740722656, + "learning_rate": 5e-05, + "loss": 1.252, + "num_input_tokens_seen": 120762020, + "step": 1805 + }, + { + "epoch": 0.204822695035461, + "loss": 1.2079299688339233, + "loss_ce": 0.002851803321391344, + "loss_iou": 0.53515625, + "loss_num": 0.02685546875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 120762020, + "step": 1805 + }, + { + "epoch": 0.20493617021276594, + "grad_norm": 23.267314910888672, + "learning_rate": 5e-05, + "loss": 1.5737, + "num_input_tokens_seen": 120829664, + "step": 1806 + }, + { + "epoch": 0.20493617021276594, + "loss": 1.4569538831710815, + "loss_ce": 0.0033405942376703024, + "loss_iou": 0.62109375, + "loss_num": 0.042236328125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 120829664, + "step": 1806 + }, + { + "epoch": 0.20504964539007092, + "grad_norm": 13.679203033447266, + "learning_rate": 5e-05, + "loss": 1.1957, + "num_input_tokens_seen": 120896348, + "step": 1807 + }, + { + "epoch": 0.20504964539007092, + "loss": 1.0947091579437256, + "loss_ce": 0.00510955136269331, + "loss_iou": 0.458984375, + "loss_num": 0.0341796875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 120896348, + "step": 1807 + }, + { + "epoch": 0.20516312056737587, + "grad_norm": 24.164274215698242, + "learning_rate": 5e-05, + "loss": 1.1992, + "num_input_tokens_seen": 120963448, + "step": 1808 + }, + { + "epoch": 0.20516312056737587, + "loss": 1.2729268074035645, + "loss_ce": 0.007561208680272102, + "loss_iou": 0.52734375, + "loss_num": 0.042724609375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 120963448, + "step": 1808 + }, + { + "epoch": 0.20527659574468085, + "grad_norm": 42.293087005615234, + "learning_rate": 5e-05, + "loss": 1.6645, + "num_input_tokens_seen": 121029656, + "step": 1809 + }, + { + "epoch": 0.20527659574468085, + "loss": 1.602476716041565, + "loss_ce": 0.002867286093533039, + "loss_iou": 0.7109375, + "loss_num": 0.03515625, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 121029656, + "step": 1809 + }, + { + "epoch": 0.20539007092198583, + "grad_norm": 23.06837272644043, + "learning_rate": 5e-05, + "loss": 1.6306, + "num_input_tokens_seen": 121097328, + "step": 1810 + }, + { + "epoch": 0.20539007092198583, + "loss": 1.5548241138458252, + "loss_ce": 0.004042844288051128, + "loss_iou": 0.66015625, + "loss_num": 0.045654296875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 121097328, + "step": 1810 + }, + { + "epoch": 0.20550354609929078, + "grad_norm": 21.174076080322266, + "learning_rate": 5e-05, + "loss": 1.385, + "num_input_tokens_seen": 121163872, + "step": 1811 + }, + { + "epoch": 0.20550354609929078, + "loss": 1.3783833980560303, + "loss_ce": 0.006190907675772905, + "loss_iou": 0.5390625, + "loss_num": 0.05810546875, + "loss_xval": 1.375, + "num_input_tokens_seen": 121163872, + "step": 1811 + }, + { + "epoch": 0.20561702127659576, + "grad_norm": 36.07659912109375, + "learning_rate": 5e-05, + "loss": 1.255, + "num_input_tokens_seen": 121230424, + "step": 1812 + }, + { + "epoch": 0.20561702127659576, + "loss": 1.3102445602416992, + "loss_ce": 0.003115731757134199, + "loss_iou": 0.58203125, + "loss_num": 0.0283203125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 121230424, + "step": 1812 + }, + { + "epoch": 0.2057304964539007, + "grad_norm": 46.319786071777344, + "learning_rate": 5e-05, + "loss": 1.3794, + "num_input_tokens_seen": 121297656, + "step": 1813 + }, + { + "epoch": 0.2057304964539007, + "loss": 1.3064532279968262, + "loss_ce": 0.006648514419794083, + "loss_iou": 0.53515625, + "loss_num": 0.045654296875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 121297656, + "step": 1813 + }, + { + "epoch": 0.20584397163120569, + "grad_norm": 17.52973747253418, + "learning_rate": 5e-05, + "loss": 1.338, + "num_input_tokens_seen": 121364632, + "step": 1814 + }, + { + "epoch": 0.20584397163120569, + "loss": 1.3693804740905762, + "loss_ce": 0.005122591741383076, + "loss_iou": 0.609375, + "loss_num": 0.0296630859375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 121364632, + "step": 1814 + }, + { + "epoch": 0.20595744680851064, + "grad_norm": 26.554758071899414, + "learning_rate": 5e-05, + "loss": 1.3072, + "num_input_tokens_seen": 121431232, + "step": 1815 + }, + { + "epoch": 0.20595744680851064, + "loss": 1.4059817790985107, + "loss_ce": 0.0026615255046635866, + "loss_iou": 0.62890625, + "loss_num": 0.0294189453125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 121431232, + "step": 1815 + }, + { + "epoch": 0.2060709219858156, + "grad_norm": 24.453765869140625, + "learning_rate": 5e-05, + "loss": 1.3651, + "num_input_tokens_seen": 121497808, + "step": 1816 + }, + { + "epoch": 0.2060709219858156, + "loss": 1.2343679666519165, + "loss_ce": 0.008782019838690758, + "loss_iou": 0.5390625, + "loss_num": 0.0296630859375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 121497808, + "step": 1816 + }, + { + "epoch": 0.20618439716312056, + "grad_norm": 26.29030418395996, + "learning_rate": 5e-05, + "loss": 1.4656, + "num_input_tokens_seen": 121564248, + "step": 1817 + }, + { + "epoch": 0.20618439716312056, + "loss": 1.3787076473236084, + "loss_ce": 0.007613937370479107, + "loss_iou": 0.6171875, + "loss_num": 0.027587890625, + "loss_xval": 1.375, + "num_input_tokens_seen": 121564248, + "step": 1817 + }, + { + "epoch": 0.20629787234042554, + "grad_norm": 23.305356979370117, + "learning_rate": 5e-05, + "loss": 1.3202, + "num_input_tokens_seen": 121630004, + "step": 1818 + }, + { + "epoch": 0.20629787234042554, + "loss": 1.3794515132904053, + "loss_ce": 0.007381257601082325, + "loss_iou": 0.55078125, + "loss_num": 0.054443359375, + "loss_xval": 1.375, + "num_input_tokens_seen": 121630004, + "step": 1818 + }, + { + "epoch": 0.2064113475177305, + "grad_norm": 21.461503982543945, + "learning_rate": 5e-05, + "loss": 1.3914, + "num_input_tokens_seen": 121696100, + "step": 1819 + }, + { + "epoch": 0.2064113475177305, + "loss": 1.2313389778137207, + "loss_ce": 0.003311594482511282, + "loss_iou": 0.515625, + "loss_num": 0.039306640625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 121696100, + "step": 1819 + }, + { + "epoch": 0.20652482269503547, + "grad_norm": 23.219404220581055, + "learning_rate": 5e-05, + "loss": 1.3014, + "num_input_tokens_seen": 121762232, + "step": 1820 + }, + { + "epoch": 0.20652482269503547, + "loss": 1.486249327659607, + "loss_ce": 0.004804056603461504, + "loss_iou": 0.63671875, + "loss_num": 0.0419921875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 121762232, + "step": 1820 + }, + { + "epoch": 0.20663829787234042, + "grad_norm": 19.289600372314453, + "learning_rate": 5e-05, + "loss": 1.3396, + "num_input_tokens_seen": 121828472, + "step": 1821 + }, + { + "epoch": 0.20663829787234042, + "loss": 1.267971158027649, + "loss_ce": 0.006252384278923273, + "loss_iou": 0.5625, + "loss_num": 0.027587890625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 121828472, + "step": 1821 + }, + { + "epoch": 0.2067517730496454, + "grad_norm": 18.616207122802734, + "learning_rate": 5e-05, + "loss": 1.4055, + "num_input_tokens_seen": 121896184, + "step": 1822 + }, + { + "epoch": 0.2067517730496454, + "loss": 1.3945283889770508, + "loss_ce": 0.0043915994465351105, + "loss_iou": 0.57421875, + "loss_num": 0.049072265625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 121896184, + "step": 1822 + }, + { + "epoch": 0.20686524822695035, + "grad_norm": 15.89615249633789, + "learning_rate": 5e-05, + "loss": 1.2869, + "num_input_tokens_seen": 121962308, + "step": 1823 + }, + { + "epoch": 0.20686524822695035, + "loss": 1.1691327095031738, + "loss_ce": 0.006291009020060301, + "loss_iou": 0.453125, + "loss_num": 0.051513671875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 121962308, + "step": 1823 + }, + { + "epoch": 0.20697872340425533, + "grad_norm": 19.455537796020508, + "learning_rate": 5e-05, + "loss": 1.352, + "num_input_tokens_seen": 122028552, + "step": 1824 + }, + { + "epoch": 0.20697872340425533, + "loss": 1.5711737871170044, + "loss_ce": 0.013739744201302528, + "loss_iou": 0.625, + "loss_num": 0.06201171875, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 122028552, + "step": 1824 + }, + { + "epoch": 0.20709219858156028, + "grad_norm": 37.2281379699707, + "learning_rate": 5e-05, + "loss": 1.4287, + "num_input_tokens_seen": 122095596, + "step": 1825 + }, + { + "epoch": 0.20709219858156028, + "loss": 1.440447211265564, + "loss_ce": 0.003923735581338406, + "loss_iou": 0.58203125, + "loss_num": 0.055419921875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 122095596, + "step": 1825 + }, + { + "epoch": 0.20720567375886526, + "grad_norm": 38.5538330078125, + "learning_rate": 5e-05, + "loss": 1.4014, + "num_input_tokens_seen": 122161172, + "step": 1826 + }, + { + "epoch": 0.20720567375886526, + "loss": 1.4206510782241821, + "loss_ce": 0.006588544696569443, + "loss_iou": 0.59765625, + "loss_num": 0.0439453125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 122161172, + "step": 1826 + }, + { + "epoch": 0.2073191489361702, + "grad_norm": 44.040184020996094, + "learning_rate": 5e-05, + "loss": 1.3533, + "num_input_tokens_seen": 122227896, + "step": 1827 + }, + { + "epoch": 0.2073191489361702, + "loss": 1.3952606916427612, + "loss_ce": 0.004757757298648357, + "loss_iou": 0.5546875, + "loss_num": 0.056396484375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 122227896, + "step": 1827 + }, + { + "epoch": 0.20743262411347518, + "grad_norm": 14.848538398742676, + "learning_rate": 5e-05, + "loss": 1.2997, + "num_input_tokens_seen": 122294464, + "step": 1828 + }, + { + "epoch": 0.20743262411347518, + "loss": 1.1870465278625488, + "loss_ce": 0.004429388791322708, + "loss_iou": 0.5234375, + "loss_num": 0.0277099609375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 122294464, + "step": 1828 + }, + { + "epoch": 0.20754609929078013, + "grad_norm": 14.377166748046875, + "learning_rate": 5e-05, + "loss": 1.1766, + "num_input_tokens_seen": 122362032, + "step": 1829 + }, + { + "epoch": 0.20754609929078013, + "loss": 1.3474047183990479, + "loss_ce": 0.007072759792208672, + "loss_iou": 0.55078125, + "loss_num": 0.048095703125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 122362032, + "step": 1829 + }, + { + "epoch": 0.2076595744680851, + "grad_norm": 18.754884719848633, + "learning_rate": 5e-05, + "loss": 1.1793, + "num_input_tokens_seen": 122428588, + "step": 1830 + }, + { + "epoch": 0.2076595744680851, + "loss": 1.050290584564209, + "loss_ce": 0.0034155254252254963, + "loss_iou": 0.451171875, + "loss_num": 0.029052734375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 122428588, + "step": 1830 + }, + { + "epoch": 0.20777304964539006, + "grad_norm": 43.98708724975586, + "learning_rate": 5e-05, + "loss": 1.3666, + "num_input_tokens_seen": 122496060, + "step": 1831 + }, + { + "epoch": 0.20777304964539006, + "loss": 1.2264161109924316, + "loss_ce": 0.005713014863431454, + "loss_iou": 0.5390625, + "loss_num": 0.0277099609375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 122496060, + "step": 1831 + }, + { + "epoch": 0.20788652482269504, + "grad_norm": 23.903839111328125, + "learning_rate": 5e-05, + "loss": 1.5456, + "num_input_tokens_seen": 122562744, + "step": 1832 + }, + { + "epoch": 0.20788652482269504, + "loss": 1.699760913848877, + "loss_ce": 0.007866406813263893, + "loss_iou": 0.65625, + "loss_num": 0.0751953125, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 122562744, + "step": 1832 + }, + { + "epoch": 0.208, + "grad_norm": 38.76064682006836, + "learning_rate": 5e-05, + "loss": 1.3264, + "num_input_tokens_seen": 122630188, + "step": 1833 + }, + { + "epoch": 0.208, + "loss": 1.3958957195281982, + "loss_ce": 0.012594997882843018, + "loss_iou": 0.578125, + "loss_num": 0.0458984375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 122630188, + "step": 1833 + }, + { + "epoch": 0.20811347517730497, + "grad_norm": 37.056339263916016, + "learning_rate": 5e-05, + "loss": 1.4465, + "num_input_tokens_seen": 122697948, + "step": 1834 + }, + { + "epoch": 0.20811347517730497, + "loss": 1.5363545417785645, + "loss_ce": 0.005104526877403259, + "loss_iou": 0.6640625, + "loss_num": 0.0400390625, + "loss_xval": 1.53125, + "num_input_tokens_seen": 122697948, + "step": 1834 + }, + { + "epoch": 0.20822695035460992, + "grad_norm": 54.86750793457031, + "learning_rate": 5e-05, + "loss": 1.7229, + "num_input_tokens_seen": 122764472, + "step": 1835 + }, + { + "epoch": 0.20822695035460992, + "loss": 1.80054771900177, + "loss_ce": 0.011485285125672817, + "loss_iou": 0.75, + "loss_num": 0.058349609375, + "loss_xval": 1.7890625, + "num_input_tokens_seen": 122764472, + "step": 1835 + }, + { + "epoch": 0.2083404255319149, + "grad_norm": 47.47636032104492, + "learning_rate": 5e-05, + "loss": 1.5395, + "num_input_tokens_seen": 122831176, + "step": 1836 + }, + { + "epoch": 0.2083404255319149, + "loss": 1.5014232397079468, + "loss_ce": 0.005817776545882225, + "loss_iou": 0.59765625, + "loss_num": 0.06005859375, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 122831176, + "step": 1836 + }, + { + "epoch": 0.20845390070921985, + "grad_norm": 36.50994110107422, + "learning_rate": 5e-05, + "loss": 1.3205, + "num_input_tokens_seen": 122898664, + "step": 1837 + }, + { + "epoch": 0.20845390070921985, + "loss": 1.3632936477661133, + "loss_ce": 0.007824897766113281, + "loss_iou": 0.609375, + "loss_num": 0.0277099609375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 122898664, + "step": 1837 + }, + { + "epoch": 0.20856737588652483, + "grad_norm": 17.487760543823242, + "learning_rate": 5e-05, + "loss": 1.6723, + "num_input_tokens_seen": 122966192, + "step": 1838 + }, + { + "epoch": 0.20856737588652483, + "loss": 1.694272756576538, + "loss_ce": 0.005796268116682768, + "loss_iou": 0.72265625, + "loss_num": 0.0478515625, + "loss_xval": 1.6875, + "num_input_tokens_seen": 122966192, + "step": 1838 + }, + { + "epoch": 0.20868085106382978, + "grad_norm": 14.387699127197266, + "learning_rate": 5e-05, + "loss": 1.4074, + "num_input_tokens_seen": 123033860, + "step": 1839 + }, + { + "epoch": 0.20868085106382978, + "loss": 1.5661722421646118, + "loss_ce": 0.005137084051966667, + "loss_iou": 0.63671875, + "loss_num": 0.057373046875, + "loss_xval": 1.5625, + "num_input_tokens_seen": 123033860, + "step": 1839 + }, + { + "epoch": 0.20879432624113475, + "grad_norm": 16.314453125, + "learning_rate": 5e-05, + "loss": 1.3555, + "num_input_tokens_seen": 123101308, + "step": 1840 + }, + { + "epoch": 0.20879432624113475, + "loss": 1.3283393383026123, + "loss_ce": 0.004120569676160812, + "loss_iou": 0.54296875, + "loss_num": 0.04736328125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 123101308, + "step": 1840 + }, + { + "epoch": 0.2089078014184397, + "grad_norm": 30.44655990600586, + "learning_rate": 5e-05, + "loss": 1.2925, + "num_input_tokens_seen": 123169344, + "step": 1841 + }, + { + "epoch": 0.2089078014184397, + "loss": 1.2545042037963867, + "loss_ce": 0.007922209799289703, + "loss_iou": 0.51171875, + "loss_num": 0.044921875, + "loss_xval": 1.25, + "num_input_tokens_seen": 123169344, + "step": 1841 + }, + { + "epoch": 0.20902127659574468, + "grad_norm": 27.191213607788086, + "learning_rate": 5e-05, + "loss": 1.5771, + "num_input_tokens_seen": 123235984, + "step": 1842 + }, + { + "epoch": 0.20902127659574468, + "loss": 1.7351012229919434, + "loss_ce": 0.009515305981040001, + "loss_iou": 0.6953125, + "loss_num": 0.06689453125, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 123235984, + "step": 1842 + }, + { + "epoch": 0.20913475177304963, + "grad_norm": 17.38056182861328, + "learning_rate": 5e-05, + "loss": 1.2428, + "num_input_tokens_seen": 123302332, + "step": 1843 + }, + { + "epoch": 0.20913475177304963, + "loss": 1.301153540611267, + "loss_ce": 0.0046447878703475, + "loss_iou": 0.546875, + "loss_num": 0.039794921875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 123302332, + "step": 1843 + }, + { + "epoch": 0.2092482269503546, + "grad_norm": 12.640437126159668, + "learning_rate": 5e-05, + "loss": 1.2405, + "num_input_tokens_seen": 123368592, + "step": 1844 + }, + { + "epoch": 0.2092482269503546, + "loss": 1.4218870401382446, + "loss_ce": 0.0078245485201478, + "loss_iou": 0.6015625, + "loss_num": 0.0419921875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 123368592, + "step": 1844 + }, + { + "epoch": 0.2093617021276596, + "grad_norm": 18.432893753051758, + "learning_rate": 5e-05, + "loss": 1.4058, + "num_input_tokens_seen": 123435012, + "step": 1845 + }, + { + "epoch": 0.2093617021276596, + "loss": 1.4190737009048462, + "loss_ce": 0.006475999020040035, + "loss_iou": 0.5859375, + "loss_num": 0.047607421875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 123435012, + "step": 1845 + }, + { + "epoch": 0.20947517730496454, + "grad_norm": 29.1065731048584, + "learning_rate": 5e-05, + "loss": 1.2676, + "num_input_tokens_seen": 123502480, + "step": 1846 + }, + { + "epoch": 0.20947517730496454, + "loss": 1.0628876686096191, + "loss_ce": 0.0028291333001106977, + "loss_iou": 0.4921875, + "loss_num": 0.0152587890625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 123502480, + "step": 1846 + }, + { + "epoch": 0.20958865248226952, + "grad_norm": 19.812448501586914, + "learning_rate": 5e-05, + "loss": 1.5678, + "num_input_tokens_seen": 123569876, + "step": 1847 + }, + { + "epoch": 0.20958865248226952, + "loss": 1.7214984893798828, + "loss_ce": 0.0037251675967127085, + "loss_iou": 0.73828125, + "loss_num": 0.048583984375, + "loss_xval": 1.71875, + "num_input_tokens_seen": 123569876, + "step": 1847 + }, + { + "epoch": 0.20970212765957447, + "grad_norm": 17.317716598510742, + "learning_rate": 5e-05, + "loss": 1.3258, + "num_input_tokens_seen": 123636688, + "step": 1848 + }, + { + "epoch": 0.20970212765957447, + "loss": 1.155266284942627, + "loss_ce": 0.009758468717336655, + "loss_iou": 0.455078125, + "loss_num": 0.04736328125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 123636688, + "step": 1848 + }, + { + "epoch": 0.20981560283687944, + "grad_norm": 21.081987380981445, + "learning_rate": 5e-05, + "loss": 1.2839, + "num_input_tokens_seen": 123703460, + "step": 1849 + }, + { + "epoch": 0.20981560283687944, + "loss": 1.2957266569137573, + "loss_ce": 0.006298033054918051, + "loss_iou": 0.52734375, + "loss_num": 0.047607421875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 123703460, + "step": 1849 + }, + { + "epoch": 0.2099290780141844, + "grad_norm": 31.785354614257812, + "learning_rate": 5e-05, + "loss": 1.1404, + "num_input_tokens_seen": 123770296, + "step": 1850 + }, + { + "epoch": 0.2099290780141844, + "loss": 1.1461175680160522, + "loss_ce": 0.005980833433568478, + "loss_iou": 0.49609375, + "loss_num": 0.029296875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 123770296, + "step": 1850 + }, + { + "epoch": 0.21004255319148937, + "grad_norm": 24.16352653503418, + "learning_rate": 5e-05, + "loss": 1.5697, + "num_input_tokens_seen": 123837868, + "step": 1851 + }, + { + "epoch": 0.21004255319148937, + "loss": 1.6877485513687134, + "loss_ce": 0.0041548097506165504, + "loss_iou": 0.734375, + "loss_num": 0.043212890625, + "loss_xval": 1.6875, + "num_input_tokens_seen": 123837868, + "step": 1851 + }, + { + "epoch": 0.21015602836879432, + "grad_norm": 10.61952018737793, + "learning_rate": 5e-05, + "loss": 1.2218, + "num_input_tokens_seen": 123904592, + "step": 1852 + }, + { + "epoch": 0.21015602836879432, + "loss": 1.2879869937896729, + "loss_ce": 0.004295593127608299, + "loss_iou": 0.53125, + "loss_num": 0.0439453125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 123904592, + "step": 1852 + }, + { + "epoch": 0.2102695035460993, + "grad_norm": 25.04264259338379, + "learning_rate": 5e-05, + "loss": 1.3567, + "num_input_tokens_seen": 123971424, + "step": 1853 + }, + { + "epoch": 0.2102695035460993, + "loss": 1.38511323928833, + "loss_ce": 0.008648323826491833, + "loss_iou": 0.55078125, + "loss_num": 0.05517578125, + "loss_xval": 1.375, + "num_input_tokens_seen": 123971424, + "step": 1853 + }, + { + "epoch": 0.21038297872340425, + "grad_norm": 38.52370071411133, + "learning_rate": 5e-05, + "loss": 1.6916, + "num_input_tokens_seen": 124038504, + "step": 1854 + }, + { + "epoch": 0.21038297872340425, + "loss": 1.7223215103149414, + "loss_ce": 0.005524619948118925, + "loss_iou": 0.75, + "loss_num": 0.0439453125, + "loss_xval": 1.71875, + "num_input_tokens_seen": 124038504, + "step": 1854 + }, + { + "epoch": 0.21049645390070923, + "grad_norm": 19.591737747192383, + "learning_rate": 5e-05, + "loss": 1.7749, + "num_input_tokens_seen": 124105544, + "step": 1855 + }, + { + "epoch": 0.21049645390070923, + "loss": 1.7365002632141113, + "loss_ce": 0.006031527183949947, + "loss_iou": 0.7265625, + "loss_num": 0.0546875, + "loss_xval": 1.734375, + "num_input_tokens_seen": 124105544, + "step": 1855 + }, + { + "epoch": 0.21060992907801418, + "grad_norm": 18.73748207092285, + "learning_rate": 5e-05, + "loss": 1.1875, + "num_input_tokens_seen": 124172316, + "step": 1856 + }, + { + "epoch": 0.21060992907801418, + "loss": 1.2420618534088135, + "loss_ce": 0.005733661353588104, + "loss_iou": 0.50390625, + "loss_num": 0.045166015625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 124172316, + "step": 1856 + }, + { + "epoch": 0.21072340425531916, + "grad_norm": 15.362834930419922, + "learning_rate": 5e-05, + "loss": 1.0529, + "num_input_tokens_seen": 124239812, + "step": 1857 + }, + { + "epoch": 0.21072340425531916, + "loss": 1.041860818862915, + "loss_ce": 0.004324262961745262, + "loss_iou": 0.474609375, + "loss_num": 0.0177001953125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 124239812, + "step": 1857 + }, + { + "epoch": 0.2108368794326241, + "grad_norm": 23.74177360534668, + "learning_rate": 5e-05, + "loss": 1.3101, + "num_input_tokens_seen": 124306448, + "step": 1858 + }, + { + "epoch": 0.2108368794326241, + "loss": 1.2549412250518799, + "loss_ce": 0.004941211082041264, + "loss_iou": 0.53515625, + "loss_num": 0.035888671875, + "loss_xval": 1.25, + "num_input_tokens_seen": 124306448, + "step": 1858 + }, + { + "epoch": 0.21095035460992909, + "grad_norm": 26.62175178527832, + "learning_rate": 5e-05, + "loss": 1.6346, + "num_input_tokens_seen": 124374188, + "step": 1859 + }, + { + "epoch": 0.21095035460992909, + "loss": 1.8489713668823242, + "loss_ce": 0.008151007816195488, + "loss_iou": 0.75390625, + "loss_num": 0.06640625, + "loss_xval": 1.84375, + "num_input_tokens_seen": 124374188, + "step": 1859 + }, + { + "epoch": 0.21106382978723404, + "grad_norm": 20.160890579223633, + "learning_rate": 5e-05, + "loss": 1.378, + "num_input_tokens_seen": 124441196, + "step": 1860 + }, + { + "epoch": 0.21106382978723404, + "loss": 1.3230383396148682, + "loss_ce": 0.004678928293287754, + "loss_iou": 0.58203125, + "loss_num": 0.031494140625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 124441196, + "step": 1860 + }, + { + "epoch": 0.21117730496453901, + "grad_norm": 26.20771598815918, + "learning_rate": 5e-05, + "loss": 1.5658, + "num_input_tokens_seen": 124508472, + "step": 1861 + }, + { + "epoch": 0.21117730496453901, + "loss": 1.5328209400177002, + "loss_ce": 0.004500716924667358, + "loss_iou": 0.67578125, + "loss_num": 0.035888671875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 124508472, + "step": 1861 + }, + { + "epoch": 0.21129078014184396, + "grad_norm": 21.76909828186035, + "learning_rate": 5e-05, + "loss": 1.2733, + "num_input_tokens_seen": 124575076, + "step": 1862 + }, + { + "epoch": 0.21129078014184396, + "loss": 1.3624228239059448, + "loss_ce": 0.006953985430300236, + "loss_iou": 0.578125, + "loss_num": 0.038818359375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 124575076, + "step": 1862 + }, + { + "epoch": 0.21140425531914894, + "grad_norm": 42.06001281738281, + "learning_rate": 5e-05, + "loss": 1.5038, + "num_input_tokens_seen": 124642340, + "step": 1863 + }, + { + "epoch": 0.21140425531914894, + "loss": 1.4064871072769165, + "loss_ce": 0.006096472032368183, + "loss_iou": 0.58984375, + "loss_num": 0.0439453125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 124642340, + "step": 1863 + }, + { + "epoch": 0.2115177304964539, + "grad_norm": 21.679901123046875, + "learning_rate": 5e-05, + "loss": 1.693, + "num_input_tokens_seen": 124708888, + "step": 1864 + }, + { + "epoch": 0.2115177304964539, + "loss": 1.6649627685546875, + "loss_ce": 0.0077362037263810635, + "loss_iou": 0.72265625, + "loss_num": 0.04296875, + "loss_xval": 1.65625, + "num_input_tokens_seen": 124708888, + "step": 1864 + }, + { + "epoch": 0.21163120567375887, + "grad_norm": 25.682170867919922, + "learning_rate": 5e-05, + "loss": 1.4056, + "num_input_tokens_seen": 124777336, + "step": 1865 + }, + { + "epoch": 0.21163120567375887, + "loss": 1.4398927688598633, + "loss_ce": 0.006299027241766453, + "loss_iou": 0.609375, + "loss_num": 0.042724609375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 124777336, + "step": 1865 + }, + { + "epoch": 0.21174468085106382, + "grad_norm": 23.986711502075195, + "learning_rate": 5e-05, + "loss": 1.2102, + "num_input_tokens_seen": 124843768, + "step": 1866 + }, + { + "epoch": 0.21174468085106382, + "loss": 1.2178170680999756, + "loss_ce": 0.007856158539652824, + "loss_iou": 0.53125, + "loss_num": 0.0302734375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 124843768, + "step": 1866 + }, + { + "epoch": 0.2118581560283688, + "grad_norm": 64.78337860107422, + "learning_rate": 5e-05, + "loss": 1.533, + "num_input_tokens_seen": 124909544, + "step": 1867 + }, + { + "epoch": 0.2118581560283688, + "loss": 1.5543112754821777, + "loss_ce": 0.009389320388436317, + "loss_iou": 0.6171875, + "loss_num": 0.0615234375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 124909544, + "step": 1867 + }, + { + "epoch": 0.21197163120567375, + "grad_norm": 29.87447166442871, + "learning_rate": 5e-05, + "loss": 1.2433, + "num_input_tokens_seen": 124976820, + "step": 1868 + }, + { + "epoch": 0.21197163120567375, + "loss": 1.248319149017334, + "loss_ce": 0.008084846660494804, + "loss_iou": 0.515625, + "loss_num": 0.041748046875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 124976820, + "step": 1868 + }, + { + "epoch": 0.21208510638297873, + "grad_norm": 25.336660385131836, + "learning_rate": 5e-05, + "loss": 1.5884, + "num_input_tokens_seen": 125044172, + "step": 1869 + }, + { + "epoch": 0.21208510638297873, + "loss": 1.6234253644943237, + "loss_ce": 0.005261304788291454, + "loss_iou": 0.67578125, + "loss_num": 0.05419921875, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 125044172, + "step": 1869 + }, + { + "epoch": 0.21219858156028368, + "grad_norm": 28.075843811035156, + "learning_rate": 5e-05, + "loss": 1.0977, + "num_input_tokens_seen": 125110352, + "step": 1870 + }, + { + "epoch": 0.21219858156028368, + "loss": 1.239732265472412, + "loss_ce": 0.00682213855907321, + "loss_iou": 0.5078125, + "loss_num": 0.043701171875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 125110352, + "step": 1870 + }, + { + "epoch": 0.21231205673758866, + "grad_norm": 21.43779945373535, + "learning_rate": 5e-05, + "loss": 1.298, + "num_input_tokens_seen": 125176952, + "step": 1871 + }, + { + "epoch": 0.21231205673758866, + "loss": 1.3809468746185303, + "loss_ce": 0.007899945601820946, + "loss_iou": 0.6015625, + "loss_num": 0.03515625, + "loss_xval": 1.375, + "num_input_tokens_seen": 125176952, + "step": 1871 + }, + { + "epoch": 0.2124255319148936, + "grad_norm": 14.164793968200684, + "learning_rate": 5e-05, + "loss": 1.0681, + "num_input_tokens_seen": 125243316, + "step": 1872 + }, + { + "epoch": 0.2124255319148936, + "loss": 1.1276967525482178, + "loss_ce": 0.0026966917794197798, + "loss_iou": 0.46875, + "loss_num": 0.037109375, + "loss_xval": 1.125, + "num_input_tokens_seen": 125243316, + "step": 1872 + }, + { + "epoch": 0.21253900709219858, + "grad_norm": 31.323232650756836, + "learning_rate": 5e-05, + "loss": 1.509, + "num_input_tokens_seen": 125310228, + "step": 1873 + }, + { + "epoch": 0.21253900709219858, + "loss": 1.8556654453277588, + "loss_ce": 0.009962325915694237, + "loss_iou": 0.73828125, + "loss_num": 0.07470703125, + "loss_xval": 1.84375, + "num_input_tokens_seen": 125310228, + "step": 1873 + }, + { + "epoch": 0.21265248226950353, + "grad_norm": 18.623882293701172, + "learning_rate": 5e-05, + "loss": 1.3111, + "num_input_tokens_seen": 125376772, + "step": 1874 + }, + { + "epoch": 0.21265248226950353, + "loss": 1.2358648777008057, + "loss_ce": 0.006372748874127865, + "loss_iou": 0.54296875, + "loss_num": 0.0281982421875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 125376772, + "step": 1874 + }, + { + "epoch": 0.2127659574468085, + "grad_norm": 46.65225601196289, + "learning_rate": 5e-05, + "loss": 1.2834, + "num_input_tokens_seen": 125443804, + "step": 1875 + }, + { + "epoch": 0.2127659574468085, + "loss": 1.1762192249298096, + "loss_ce": 0.004344169050455093, + "loss_iou": 0.494140625, + "loss_num": 0.036376953125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 125443804, + "step": 1875 + }, + { + "epoch": 0.21287943262411346, + "grad_norm": 27.110445022583008, + "learning_rate": 5e-05, + "loss": 1.4814, + "num_input_tokens_seen": 125510328, + "step": 1876 + }, + { + "epoch": 0.21287943262411346, + "loss": 1.3762750625610352, + "loss_ce": 0.00908753089606762, + "loss_iou": 0.62109375, + "loss_num": 0.025634765625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 125510328, + "step": 1876 + }, + { + "epoch": 0.21299290780141844, + "grad_norm": 30.31709861755371, + "learning_rate": 5e-05, + "loss": 1.3034, + "num_input_tokens_seen": 125577632, + "step": 1877 + }, + { + "epoch": 0.21299290780141844, + "loss": 1.3135619163513184, + "loss_ce": 0.006433050148189068, + "loss_iou": 0.5546875, + "loss_num": 0.03955078125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 125577632, + "step": 1877 + }, + { + "epoch": 0.2131063829787234, + "grad_norm": 21.106510162353516, + "learning_rate": 5e-05, + "loss": 1.4185, + "num_input_tokens_seen": 125645280, + "step": 1878 + }, + { + "epoch": 0.2131063829787234, + "loss": 1.4904181957244873, + "loss_ce": 0.004090082366019487, + "loss_iou": 0.62109375, + "loss_num": 0.049072265625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 125645280, + "step": 1878 + }, + { + "epoch": 0.21321985815602837, + "grad_norm": 18.233081817626953, + "learning_rate": 5e-05, + "loss": 1.319, + "num_input_tokens_seen": 125711708, + "step": 1879 + }, + { + "epoch": 0.21321985815602837, + "loss": 1.5530766248703003, + "loss_ce": 0.005225043743848801, + "loss_iou": 0.6015625, + "loss_num": 0.06884765625, + "loss_xval": 1.546875, + "num_input_tokens_seen": 125711708, + "step": 1879 + }, + { + "epoch": 0.21333333333333335, + "grad_norm": 16.901151657104492, + "learning_rate": 5e-05, + "loss": 1.2837, + "num_input_tokens_seen": 125778076, + "step": 1880 + }, + { + "epoch": 0.21333333333333335, + "loss": 1.2563015222549438, + "loss_ce": 0.004836647771298885, + "loss_iou": 0.50390625, + "loss_num": 0.04931640625, + "loss_xval": 1.25, + "num_input_tokens_seen": 125778076, + "step": 1880 + }, + { + "epoch": 0.2134468085106383, + "grad_norm": 36.68320846557617, + "learning_rate": 5e-05, + "loss": 1.3896, + "num_input_tokens_seen": 125844708, + "step": 1881 + }, + { + "epoch": 0.2134468085106383, + "loss": 1.5009279251098633, + "loss_ce": 0.00922868587076664, + "loss_iou": 0.625, + "loss_num": 0.048828125, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 125844708, + "step": 1881 + }, + { + "epoch": 0.21356028368794328, + "grad_norm": 51.717132568359375, + "learning_rate": 5e-05, + "loss": 1.5901, + "num_input_tokens_seen": 125910708, + "step": 1882 + }, + { + "epoch": 0.21356028368794328, + "loss": 1.7481663227081299, + "loss_ce": 0.005002323072403669, + "loss_iou": 0.6953125, + "loss_num": 0.06982421875, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 125910708, + "step": 1882 + }, + { + "epoch": 0.21367375886524823, + "grad_norm": 24.042213439941406, + "learning_rate": 5e-05, + "loss": 1.302, + "num_input_tokens_seen": 125977344, + "step": 1883 + }, + { + "epoch": 0.21367375886524823, + "loss": 1.175896167755127, + "loss_ce": 0.007439052686095238, + "loss_iou": 0.5078125, + "loss_num": 0.030029296875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 125977344, + "step": 1883 + }, + { + "epoch": 0.2137872340425532, + "grad_norm": 12.013107299804688, + "learning_rate": 5e-05, + "loss": 1.1154, + "num_input_tokens_seen": 126043564, + "step": 1884 + }, + { + "epoch": 0.2137872340425532, + "loss": 0.9805540442466736, + "loss_ce": 0.008141888305544853, + "loss_iou": 0.40234375, + "loss_num": 0.033935546875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 126043564, + "step": 1884 + }, + { + "epoch": 0.21390070921985815, + "grad_norm": 14.981123924255371, + "learning_rate": 5e-05, + "loss": 1.3667, + "num_input_tokens_seen": 126110484, + "step": 1885 + }, + { + "epoch": 0.21390070921985815, + "loss": 1.4075998067855835, + "loss_ce": 0.006720827892422676, + "loss_iou": 0.55078125, + "loss_num": 0.0595703125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 126110484, + "step": 1885 + }, + { + "epoch": 0.21401418439716313, + "grad_norm": 24.16684341430664, + "learning_rate": 5e-05, + "loss": 1.2105, + "num_input_tokens_seen": 126178432, + "step": 1886 + }, + { + "epoch": 0.21401418439716313, + "loss": 1.2815407514572144, + "loss_ce": 0.004685296211391687, + "loss_iou": 0.578125, + "loss_num": 0.024658203125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 126178432, + "step": 1886 + }, + { + "epoch": 0.21412765957446808, + "grad_norm": 17.967205047607422, + "learning_rate": 5e-05, + "loss": 1.1548, + "num_input_tokens_seen": 126244640, + "step": 1887 + }, + { + "epoch": 0.21412765957446808, + "loss": 1.354785442352295, + "loss_ce": 0.012012088671326637, + "loss_iou": 0.54296875, + "loss_num": 0.05126953125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 126244640, + "step": 1887 + }, + { + "epoch": 0.21424113475177306, + "grad_norm": 36.40177536010742, + "learning_rate": 5e-05, + "loss": 1.2584, + "num_input_tokens_seen": 126312488, + "step": 1888 + }, + { + "epoch": 0.21424113475177306, + "loss": 1.2839021682739258, + "loss_ce": 0.003628805745393038, + "loss_iou": 0.578125, + "loss_num": 0.0252685546875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 126312488, + "step": 1888 + }, + { + "epoch": 0.214354609929078, + "grad_norm": 14.43301773071289, + "learning_rate": 5e-05, + "loss": 1.4009, + "num_input_tokens_seen": 126379444, + "step": 1889 + }, + { + "epoch": 0.214354609929078, + "loss": 1.342073678970337, + "loss_ce": 0.004671345930546522, + "loss_iou": 0.5546875, + "loss_num": 0.046142578125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 126379444, + "step": 1889 + }, + { + "epoch": 0.214468085106383, + "grad_norm": 22.466915130615234, + "learning_rate": 5e-05, + "loss": 1.241, + "num_input_tokens_seen": 126445980, + "step": 1890 + }, + { + "epoch": 0.214468085106383, + "loss": 1.0765924453735352, + "loss_ce": 0.005791761912405491, + "loss_iou": 0.46484375, + "loss_num": 0.0284423828125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 126445980, + "step": 1890 + }, + { + "epoch": 0.21458156028368794, + "grad_norm": 30.058286666870117, + "learning_rate": 5e-05, + "loss": 1.2757, + "num_input_tokens_seen": 126512048, + "step": 1891 + }, + { + "epoch": 0.21458156028368794, + "loss": 1.4246947765350342, + "loss_ce": 0.005993551108986139, + "loss_iou": 0.6171875, + "loss_num": 0.03662109375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 126512048, + "step": 1891 + }, + { + "epoch": 0.21469503546099292, + "grad_norm": 25.328989028930664, + "learning_rate": 5e-05, + "loss": 1.4598, + "num_input_tokens_seen": 126579556, + "step": 1892 + }, + { + "epoch": 0.21469503546099292, + "loss": 1.3002206087112427, + "loss_ce": 0.00481047946959734, + "loss_iou": 0.546875, + "loss_num": 0.0400390625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 126579556, + "step": 1892 + }, + { + "epoch": 0.21480851063829787, + "grad_norm": 14.731558799743652, + "learning_rate": 5e-05, + "loss": 1.2322, + "num_input_tokens_seen": 126646984, + "step": 1893 + }, + { + "epoch": 0.21480851063829787, + "loss": 1.1534560918807983, + "loss_ce": 0.008924838155508041, + "loss_iou": 0.484375, + "loss_num": 0.035400390625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 126646984, + "step": 1893 + }, + { + "epoch": 0.21492198581560285, + "grad_norm": 23.413471221923828, + "learning_rate": 5e-05, + "loss": 1.1906, + "num_input_tokens_seen": 126714112, + "step": 1894 + }, + { + "epoch": 0.21492198581560285, + "loss": 1.1019752025604248, + "loss_ce": 0.010056297294795513, + "loss_iou": 0.431640625, + "loss_num": 0.0458984375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 126714112, + "step": 1894 + }, + { + "epoch": 0.2150354609929078, + "grad_norm": 50.001800537109375, + "learning_rate": 5e-05, + "loss": 1.6113, + "num_input_tokens_seen": 126780644, + "step": 1895 + }, + { + "epoch": 0.2150354609929078, + "loss": 1.6399357318878174, + "loss_ce": 0.006146631669253111, + "loss_iou": 0.6875, + "loss_num": 0.051025390625, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 126780644, + "step": 1895 + }, + { + "epoch": 0.21514893617021277, + "grad_norm": 21.281705856323242, + "learning_rate": 5e-05, + "loss": 1.5955, + "num_input_tokens_seen": 126848480, + "step": 1896 + }, + { + "epoch": 0.21514893617021277, + "loss": 1.5946866273880005, + "loss_ce": 0.005819432437419891, + "loss_iou": 0.6953125, + "loss_num": 0.04052734375, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 126848480, + "step": 1896 + }, + { + "epoch": 0.21526241134751772, + "grad_norm": 17.716943740844727, + "learning_rate": 5e-05, + "loss": 1.2872, + "num_input_tokens_seen": 126915448, + "step": 1897 + }, + { + "epoch": 0.21526241134751772, + "loss": 1.3000805377960205, + "loss_ce": 0.007355953566730022, + "loss_iou": 0.5390625, + "loss_num": 0.043212890625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 126915448, + "step": 1897 + }, + { + "epoch": 0.2153758865248227, + "grad_norm": 19.019336700439453, + "learning_rate": 5e-05, + "loss": 1.2705, + "num_input_tokens_seen": 126982348, + "step": 1898 + }, + { + "epoch": 0.2153758865248227, + "loss": 1.2703561782836914, + "loss_ce": 0.00814911350607872, + "loss_iou": 0.546875, + "loss_num": 0.033203125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 126982348, + "step": 1898 + }, + { + "epoch": 0.21548936170212765, + "grad_norm": 27.395435333251953, + "learning_rate": 5e-05, + "loss": 1.0581, + "num_input_tokens_seen": 127048720, + "step": 1899 + }, + { + "epoch": 0.21548936170212765, + "loss": 1.2202506065368652, + "loss_ce": 0.004430383909493685, + "loss_iou": 0.5078125, + "loss_num": 0.040771484375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 127048720, + "step": 1899 + }, + { + "epoch": 0.21560283687943263, + "grad_norm": 23.684175491333008, + "learning_rate": 5e-05, + "loss": 1.5744, + "num_input_tokens_seen": 127115456, + "step": 1900 + }, + { + "epoch": 0.21560283687943263, + "loss": 1.6838457584381104, + "loss_ce": 0.006111388094723225, + "loss_iou": 0.6953125, + "loss_num": 0.057373046875, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 127115456, + "step": 1900 + }, + { + "epoch": 0.21571631205673758, + "grad_norm": 28.786264419555664, + "learning_rate": 5e-05, + "loss": 1.2383, + "num_input_tokens_seen": 127181952, + "step": 1901 + }, + { + "epoch": 0.21571631205673758, + "loss": 1.179884433746338, + "loss_ce": 0.003614875953644514, + "loss_iou": 0.54296875, + "loss_num": 0.0177001953125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 127181952, + "step": 1901 + }, + { + "epoch": 0.21582978723404256, + "grad_norm": 16.882831573486328, + "learning_rate": 5e-05, + "loss": 1.4189, + "num_input_tokens_seen": 127248148, + "step": 1902 + }, + { + "epoch": 0.21582978723404256, + "loss": 1.3499879837036133, + "loss_ce": 0.005871674045920372, + "loss_iou": 0.578125, + "loss_num": 0.037841796875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 127248148, + "step": 1902 + }, + { + "epoch": 0.2159432624113475, + "grad_norm": 18.967687606811523, + "learning_rate": 5e-05, + "loss": 1.198, + "num_input_tokens_seen": 127314876, + "step": 1903 + }, + { + "epoch": 0.2159432624113475, + "loss": 1.1505986452102661, + "loss_ce": 0.0038701342418789864, + "loss_iou": 0.48828125, + "loss_num": 0.034423828125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 127314876, + "step": 1903 + }, + { + "epoch": 0.2160567375886525, + "grad_norm": 38.73296356201172, + "learning_rate": 5e-05, + "loss": 1.436, + "num_input_tokens_seen": 127381844, + "step": 1904 + }, + { + "epoch": 0.2160567375886525, + "loss": 1.4222970008850098, + "loss_ce": 0.0053047602996230125, + "loss_iou": 0.58984375, + "loss_num": 0.047119140625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 127381844, + "step": 1904 + }, + { + "epoch": 0.21617021276595744, + "grad_norm": 22.037925720214844, + "learning_rate": 5e-05, + "loss": 1.5888, + "num_input_tokens_seen": 127448956, + "step": 1905 + }, + { + "epoch": 0.21617021276595744, + "loss": 1.7158788442611694, + "loss_ce": 0.002988262800499797, + "loss_iou": 0.76953125, + "loss_num": 0.034912109375, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 127448956, + "step": 1905 + }, + { + "epoch": 0.21628368794326241, + "grad_norm": 23.096527099609375, + "learning_rate": 5e-05, + "loss": 1.4058, + "num_input_tokens_seen": 127516080, + "step": 1906 + }, + { + "epoch": 0.21628368794326241, + "loss": 1.4238537549972534, + "loss_ce": 0.006373278331011534, + "loss_iou": 0.58203125, + "loss_num": 0.05126953125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 127516080, + "step": 1906 + }, + { + "epoch": 0.21639716312056737, + "grad_norm": 31.259605407714844, + "learning_rate": 5e-05, + "loss": 1.333, + "num_input_tokens_seen": 127582492, + "step": 1907 + }, + { + "epoch": 0.21639716312056737, + "loss": 1.5759891271591187, + "loss_ce": 0.0037235617637634277, + "loss_iou": 0.671875, + "loss_num": 0.044677734375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 127582492, + "step": 1907 + }, + { + "epoch": 0.21651063829787234, + "grad_norm": 27.54084587097168, + "learning_rate": 5e-05, + "loss": 1.467, + "num_input_tokens_seen": 127649244, + "step": 1908 + }, + { + "epoch": 0.21651063829787234, + "loss": 1.4994257688522339, + "loss_ce": 0.007238264195621014, + "loss_iou": 0.64453125, + "loss_num": 0.040283203125, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 127649244, + "step": 1908 + }, + { + "epoch": 0.2166241134751773, + "grad_norm": 34.25274658203125, + "learning_rate": 5e-05, + "loss": 1.3778, + "num_input_tokens_seen": 127717632, + "step": 1909 + }, + { + "epoch": 0.2166241134751773, + "loss": 1.3811789751052856, + "loss_ce": 0.004225822631269693, + "loss_iou": 0.58203125, + "loss_num": 0.042724609375, + "loss_xval": 1.375, + "num_input_tokens_seen": 127717632, + "step": 1909 + }, + { + "epoch": 0.21673758865248227, + "grad_norm": 23.511240005493164, + "learning_rate": 5e-05, + "loss": 1.7798, + "num_input_tokens_seen": 127784264, + "step": 1910 + }, + { + "epoch": 0.21673758865248227, + "loss": 1.7435593605041504, + "loss_ce": 0.0023484216071665287, + "loss_iou": 0.734375, + "loss_num": 0.05517578125, + "loss_xval": 1.7421875, + "num_input_tokens_seen": 127784264, + "step": 1910 + }, + { + "epoch": 0.21685106382978722, + "grad_norm": 23.150104522705078, + "learning_rate": 5e-05, + "loss": 1.2456, + "num_input_tokens_seen": 127851844, + "step": 1911 + }, + { + "epoch": 0.21685106382978722, + "loss": 1.2255942821502686, + "loss_ce": 0.003914551343768835, + "loss_iou": 0.55078125, + "loss_num": 0.024658203125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 127851844, + "step": 1911 + }, + { + "epoch": 0.2169645390070922, + "grad_norm": 20.955734252929688, + "learning_rate": 5e-05, + "loss": 1.6103, + "num_input_tokens_seen": 127920240, + "step": 1912 + }, + { + "epoch": 0.2169645390070922, + "loss": 1.587541103363037, + "loss_ce": 0.007462971843779087, + "loss_iou": 0.6875, + "loss_num": 0.040283203125, + "loss_xval": 1.578125, + "num_input_tokens_seen": 127920240, + "step": 1912 + }, + { + "epoch": 0.21707801418439715, + "grad_norm": 20.83844757080078, + "learning_rate": 5e-05, + "loss": 1.26, + "num_input_tokens_seen": 127986844, + "step": 1913 + }, + { + "epoch": 0.21707801418439715, + "loss": 1.4184749126434326, + "loss_ce": 0.007830399088561535, + "loss_iou": 0.56640625, + "loss_num": 0.056396484375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 127986844, + "step": 1913 + }, + { + "epoch": 0.21719148936170213, + "grad_norm": 24.739381790161133, + "learning_rate": 5e-05, + "loss": 1.0784, + "num_input_tokens_seen": 128053012, + "step": 1914 + }, + { + "epoch": 0.21719148936170213, + "loss": 0.9597398042678833, + "loss_ce": 0.008842654526233673, + "loss_iou": 0.38671875, + "loss_num": 0.035400390625, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 128053012, + "step": 1914 + }, + { + "epoch": 0.2173049645390071, + "grad_norm": 32.0253791809082, + "learning_rate": 5e-05, + "loss": 1.4171, + "num_input_tokens_seen": 128119828, + "step": 1915 + }, + { + "epoch": 0.2173049645390071, + "loss": 1.6488118171691895, + "loss_ce": 0.006233798339962959, + "loss_iou": 0.703125, + "loss_num": 0.0478515625, + "loss_xval": 1.640625, + "num_input_tokens_seen": 128119828, + "step": 1915 + }, + { + "epoch": 0.21741843971631206, + "grad_norm": 20.6385440826416, + "learning_rate": 5e-05, + "loss": 1.4587, + "num_input_tokens_seen": 128186388, + "step": 1916 + }, + { + "epoch": 0.21741843971631206, + "loss": 1.5520541667938232, + "loss_ce": 0.009085445664823055, + "loss_iou": 0.67578125, + "loss_num": 0.038330078125, + "loss_xval": 1.546875, + "num_input_tokens_seen": 128186388, + "step": 1916 + }, + { + "epoch": 0.21753191489361703, + "grad_norm": 32.31060028076172, + "learning_rate": 5e-05, + "loss": 1.304, + "num_input_tokens_seen": 128253804, + "step": 1917 + }, + { + "epoch": 0.21753191489361703, + "loss": 1.339881181716919, + "loss_ce": 0.0059579056687653065, + "loss_iou": 0.55078125, + "loss_num": 0.0458984375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 128253804, + "step": 1917 + }, + { + "epoch": 0.21764539007092198, + "grad_norm": 14.540494918823242, + "learning_rate": 5e-05, + "loss": 1.4967, + "num_input_tokens_seen": 128320452, + "step": 1918 + }, + { + "epoch": 0.21764539007092198, + "loss": 1.3686097860336304, + "loss_ce": 0.007281720172613859, + "loss_iou": 0.5625, + "loss_num": 0.046875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 128320452, + "step": 1918 + }, + { + "epoch": 0.21775886524822696, + "grad_norm": 26.538652420043945, + "learning_rate": 5e-05, + "loss": 1.0848, + "num_input_tokens_seen": 128385600, + "step": 1919 + }, + { + "epoch": 0.21775886524822696, + "loss": 1.3974781036376953, + "loss_ce": 0.0029468312859535217, + "loss_iou": 0.64453125, + "loss_num": 0.0208740234375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 128385600, + "step": 1919 + }, + { + "epoch": 0.2178723404255319, + "grad_norm": 27.207181930541992, + "learning_rate": 5e-05, + "loss": 1.5922, + "num_input_tokens_seen": 128452724, + "step": 1920 + }, + { + "epoch": 0.2178723404255319, + "loss": 1.5194673538208008, + "loss_ce": 0.0038424809463322163, + "loss_iou": 0.65234375, + "loss_num": 0.041748046875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 128452724, + "step": 1920 + }, + { + "epoch": 0.2179858156028369, + "grad_norm": 28.417322158813477, + "learning_rate": 5e-05, + "loss": 1.2728, + "num_input_tokens_seen": 128520312, + "step": 1921 + }, + { + "epoch": 0.2179858156028369, + "loss": 1.2363581657409668, + "loss_ce": 0.0034480267204344273, + "loss_iou": 0.53515625, + "loss_num": 0.032958984375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 128520312, + "step": 1921 + }, + { + "epoch": 0.21809929078014184, + "grad_norm": 25.21616554260254, + "learning_rate": 5e-05, + "loss": 1.5879, + "num_input_tokens_seen": 128587648, + "step": 1922 + }, + { + "epoch": 0.21809929078014184, + "loss": 1.6384212970733643, + "loss_ce": 0.005608859937638044, + "loss_iou": 0.640625, + "loss_num": 0.0703125, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 128587648, + "step": 1922 + }, + { + "epoch": 0.21821276595744682, + "grad_norm": 18.756982803344727, + "learning_rate": 5e-05, + "loss": 1.1185, + "num_input_tokens_seen": 128654428, + "step": 1923 + }, + { + "epoch": 0.21821276595744682, + "loss": 1.2969584465026855, + "loss_ce": 0.0025247465819120407, + "loss_iou": 0.5390625, + "loss_num": 0.043701171875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 128654428, + "step": 1923 + }, + { + "epoch": 0.21832624113475177, + "grad_norm": 36.07487869262695, + "learning_rate": 5e-05, + "loss": 1.3823, + "num_input_tokens_seen": 128721520, + "step": 1924 + }, + { + "epoch": 0.21832624113475177, + "loss": 1.2925912141799927, + "loss_ce": 0.003040448296815157, + "loss_iou": 0.55078125, + "loss_num": 0.037353515625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 128721520, + "step": 1924 + }, + { + "epoch": 0.21843971631205675, + "grad_norm": 19.11833953857422, + "learning_rate": 5e-05, + "loss": 1.2535, + "num_input_tokens_seen": 128788556, + "step": 1925 + }, + { + "epoch": 0.21843971631205675, + "loss": 1.3486310243606567, + "loss_ce": 0.004392770119011402, + "loss_iou": 0.5859375, + "loss_num": 0.034912109375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 128788556, + "step": 1925 + }, + { + "epoch": 0.2185531914893617, + "grad_norm": 23.1262149810791, + "learning_rate": 5e-05, + "loss": 1.3528, + "num_input_tokens_seen": 128854964, + "step": 1926 + }, + { + "epoch": 0.2185531914893617, + "loss": 1.408139944076538, + "loss_ce": 0.005796266719698906, + "loss_iou": 0.578125, + "loss_num": 0.048828125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 128854964, + "step": 1926 + }, + { + "epoch": 0.21866666666666668, + "grad_norm": 27.5975284576416, + "learning_rate": 5e-05, + "loss": 1.2747, + "num_input_tokens_seen": 128922168, + "step": 1927 + }, + { + "epoch": 0.21866666666666668, + "loss": 1.4323022365570068, + "loss_ce": 0.005544336512684822, + "loss_iou": 0.6015625, + "loss_num": 0.043701171875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 128922168, + "step": 1927 + }, + { + "epoch": 0.21878014184397163, + "grad_norm": 35.50611114501953, + "learning_rate": 5e-05, + "loss": 1.3016, + "num_input_tokens_seen": 128988736, + "step": 1928 + }, + { + "epoch": 0.21878014184397163, + "loss": 1.4595575332641602, + "loss_ce": 0.005455970298498869, + "loss_iou": 0.62890625, + "loss_num": 0.038818359375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 128988736, + "step": 1928 + }, + { + "epoch": 0.2188936170212766, + "grad_norm": 22.904722213745117, + "learning_rate": 5e-05, + "loss": 1.2804, + "num_input_tokens_seen": 129055240, + "step": 1929 + }, + { + "epoch": 0.2188936170212766, + "loss": 1.056912899017334, + "loss_ce": 0.00954955443739891, + "loss_iou": 0.423828125, + "loss_num": 0.04052734375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 129055240, + "step": 1929 + }, + { + "epoch": 0.21900709219858155, + "grad_norm": 12.51004695892334, + "learning_rate": 5e-05, + "loss": 1.1851, + "num_input_tokens_seen": 129122544, + "step": 1930 + }, + { + "epoch": 0.21900709219858155, + "loss": 1.1662709712982178, + "loss_ce": 0.008067844435572624, + "loss_iou": 0.490234375, + "loss_num": 0.035400390625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 129122544, + "step": 1930 + }, + { + "epoch": 0.21912056737588653, + "grad_norm": 14.537444114685059, + "learning_rate": 5e-05, + "loss": 1.4896, + "num_input_tokens_seen": 129189408, + "step": 1931 + }, + { + "epoch": 0.21912056737588653, + "loss": 1.490706205368042, + "loss_ce": 0.005842948332428932, + "loss_iou": 0.5859375, + "loss_num": 0.0625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 129189408, + "step": 1931 + }, + { + "epoch": 0.21923404255319148, + "grad_norm": 22.248008728027344, + "learning_rate": 5e-05, + "loss": 1.1044, + "num_input_tokens_seen": 129255356, + "step": 1932 + }, + { + "epoch": 0.21923404255319148, + "loss": 1.0307040214538574, + "loss_ce": 0.004184181801974773, + "loss_iou": 0.466796875, + "loss_num": 0.0181884765625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 129255356, + "step": 1932 + }, + { + "epoch": 0.21934751773049646, + "grad_norm": 23.9533634185791, + "learning_rate": 5e-05, + "loss": 1.5985, + "num_input_tokens_seen": 129322272, + "step": 1933 + }, + { + "epoch": 0.21934751773049646, + "loss": 1.6034340858459473, + "loss_ce": 0.0028481660410761833, + "loss_iou": 0.69921875, + "loss_num": 0.040283203125, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 129322272, + "step": 1933 + }, + { + "epoch": 0.2194609929078014, + "grad_norm": 32.65679931640625, + "learning_rate": 5e-05, + "loss": 1.3036, + "num_input_tokens_seen": 129389836, + "step": 1934 + }, + { + "epoch": 0.2194609929078014, + "loss": 1.288123607635498, + "loss_ce": 0.005896979011595249, + "loss_iou": 0.58984375, + "loss_num": 0.0203857421875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 129389836, + "step": 1934 + }, + { + "epoch": 0.2195744680851064, + "grad_norm": 19.483779907226562, + "learning_rate": 5e-05, + "loss": 1.585, + "num_input_tokens_seen": 129456544, + "step": 1935 + }, + { + "epoch": 0.2195744680851064, + "loss": 1.5200700759887695, + "loss_ce": 0.009816234931349754, + "loss_iou": 0.64453125, + "loss_num": 0.044677734375, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 129456544, + "step": 1935 + }, + { + "epoch": 0.21968794326241134, + "grad_norm": 16.078615188598633, + "learning_rate": 5e-05, + "loss": 1.2472, + "num_input_tokens_seen": 129523216, + "step": 1936 + }, + { + "epoch": 0.21968794326241134, + "loss": 1.2754042148590088, + "loss_ce": 0.003919842187315226, + "loss_iou": 0.5625, + "loss_num": 0.029296875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 129523216, + "step": 1936 + }, + { + "epoch": 0.21980141843971632, + "grad_norm": 29.21549415588379, + "learning_rate": 5e-05, + "loss": 1.2775, + "num_input_tokens_seen": 129590036, + "step": 1937 + }, + { + "epoch": 0.21980141843971632, + "loss": 1.1629507541656494, + "loss_ce": 0.004747653380036354, + "loss_iou": 0.494140625, + "loss_num": 0.033935546875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 129590036, + "step": 1937 + }, + { + "epoch": 0.21991489361702127, + "grad_norm": 45.53508377075195, + "learning_rate": 5e-05, + "loss": 1.5524, + "num_input_tokens_seen": 129657388, + "step": 1938 + }, + { + "epoch": 0.21991489361702127, + "loss": 1.5295172929763794, + "loss_ce": 0.007544620428234339, + "loss_iou": 0.66796875, + "loss_num": 0.0380859375, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 129657388, + "step": 1938 + }, + { + "epoch": 0.22002836879432625, + "grad_norm": 18.911821365356445, + "learning_rate": 5e-05, + "loss": 1.6721, + "num_input_tokens_seen": 129723208, + "step": 1939 + }, + { + "epoch": 0.22002836879432625, + "loss": 1.509957194328308, + "loss_ce": 0.00483025424182415, + "loss_iou": 0.62109375, + "loss_num": 0.052978515625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 129723208, + "step": 1939 + }, + { + "epoch": 0.2201418439716312, + "grad_norm": 23.500173568725586, + "learning_rate": 5e-05, + "loss": 1.4893, + "num_input_tokens_seen": 129789084, + "step": 1940 + }, + { + "epoch": 0.2201418439716312, + "loss": 1.5422732830047607, + "loss_ce": 0.007116927299648523, + "loss_iou": 0.671875, + "loss_num": 0.03759765625, + "loss_xval": 1.53125, + "num_input_tokens_seen": 129789084, + "step": 1940 + }, + { + "epoch": 0.22025531914893617, + "grad_norm": 16.767383575439453, + "learning_rate": 5e-05, + "loss": 1.232, + "num_input_tokens_seen": 129855892, + "step": 1941 + }, + { + "epoch": 0.22025531914893617, + "loss": 1.4349684715270996, + "loss_ce": 0.006745944265276194, + "loss_iou": 0.62109375, + "loss_num": 0.03662109375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 129855892, + "step": 1941 + }, + { + "epoch": 0.22036879432624112, + "grad_norm": 39.816192626953125, + "learning_rate": 5e-05, + "loss": 1.394, + "num_input_tokens_seen": 129922648, + "step": 1942 + }, + { + "epoch": 0.22036879432624112, + "loss": 1.4544597864151, + "loss_ce": 0.010612115263938904, + "loss_iou": 0.55859375, + "loss_num": 0.064453125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 129922648, + "step": 1942 + }, + { + "epoch": 0.2204822695035461, + "grad_norm": 21.380441665649414, + "learning_rate": 5e-05, + "loss": 1.7069, + "num_input_tokens_seen": 129989556, + "step": 1943 + }, + { + "epoch": 0.2204822695035461, + "loss": 1.5668251514434814, + "loss_ce": 0.00432517658919096, + "loss_iou": 0.67578125, + "loss_num": 0.04248046875, + "loss_xval": 1.5625, + "num_input_tokens_seen": 129989556, + "step": 1943 + }, + { + "epoch": 0.22059574468085105, + "grad_norm": 23.49190902709961, + "learning_rate": 5e-05, + "loss": 1.4488, + "num_input_tokens_seen": 130057648, + "step": 1944 + }, + { + "epoch": 0.22059574468085105, + "loss": 1.362195372581482, + "loss_ce": 0.005750082433223724, + "loss_iou": 0.5703125, + "loss_num": 0.04296875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 130057648, + "step": 1944 + }, + { + "epoch": 0.22070921985815603, + "grad_norm": 34.97682571411133, + "learning_rate": 5e-05, + "loss": 1.3842, + "num_input_tokens_seen": 130124764, + "step": 1945 + }, + { + "epoch": 0.22070921985815603, + "loss": 1.4732059240341187, + "loss_ce": 0.008362239226698875, + "loss_iou": 0.58984375, + "loss_num": 0.0576171875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 130124764, + "step": 1945 + }, + { + "epoch": 0.22082269503546098, + "grad_norm": 32.324031829833984, + "learning_rate": 5e-05, + "loss": 1.4754, + "num_input_tokens_seen": 130191968, + "step": 1946 + }, + { + "epoch": 0.22082269503546098, + "loss": 1.4781477451324463, + "loss_ce": 0.007932949811220169, + "loss_iou": 0.64453125, + "loss_num": 0.037109375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 130191968, + "step": 1946 + }, + { + "epoch": 0.22093617021276596, + "grad_norm": 26.80756187438965, + "learning_rate": 5e-05, + "loss": 1.2601, + "num_input_tokens_seen": 130259512, + "step": 1947 + }, + { + "epoch": 0.22093617021276596, + "loss": 1.2641950845718384, + "loss_ce": 0.008335717022418976, + "loss_iou": 0.55078125, + "loss_num": 0.03125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 130259512, + "step": 1947 + }, + { + "epoch": 0.2210496453900709, + "grad_norm": 16.80965232849121, + "learning_rate": 5e-05, + "loss": 1.448, + "num_input_tokens_seen": 130326848, + "step": 1948 + }, + { + "epoch": 0.2210496453900709, + "loss": 1.4244804382324219, + "loss_ce": 0.0026054128538817167, + "loss_iou": 0.6328125, + "loss_num": 0.0306396484375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 130326848, + "step": 1948 + }, + { + "epoch": 0.2211631205673759, + "grad_norm": 10.246618270874023, + "learning_rate": 5e-05, + "loss": 1.1126, + "num_input_tokens_seen": 130392756, + "step": 1949 + }, + { + "epoch": 0.2211631205673759, + "loss": 1.1596384048461914, + "loss_ce": 0.0058298250660300255, + "loss_iou": 0.494140625, + "loss_num": 0.03271484375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 130392756, + "step": 1949 + }, + { + "epoch": 0.22127659574468084, + "grad_norm": 18.412677764892578, + "learning_rate": 5e-05, + "loss": 1.0956, + "num_input_tokens_seen": 130459016, + "step": 1950 + }, + { + "epoch": 0.22127659574468084, + "loss": 0.9832940697669983, + "loss_ce": 0.00526669854298234, + "loss_iou": 0.396484375, + "loss_num": 0.036865234375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 130459016, + "step": 1950 + }, + { + "epoch": 0.22139007092198582, + "grad_norm": 22.291160583496094, + "learning_rate": 5e-05, + "loss": 1.3359, + "num_input_tokens_seen": 130525316, + "step": 1951 + }, + { + "epoch": 0.22139007092198582, + "loss": 1.3996928930282593, + "loss_ce": 0.007114766165614128, + "loss_iou": 0.6015625, + "loss_num": 0.037841796875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 130525316, + "step": 1951 + }, + { + "epoch": 0.2215035460992908, + "grad_norm": 26.633045196533203, + "learning_rate": 5e-05, + "loss": 1.3855, + "num_input_tokens_seen": 130592200, + "step": 1952 + }, + { + "epoch": 0.2215035460992908, + "loss": 1.486738920211792, + "loss_ce": 0.0048053078353405, + "loss_iou": 0.60546875, + "loss_num": 0.0537109375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 130592200, + "step": 1952 + }, + { + "epoch": 0.22161702127659574, + "grad_norm": 21.726341247558594, + "learning_rate": 5e-05, + "loss": 1.2457, + "num_input_tokens_seen": 130659364, + "step": 1953 + }, + { + "epoch": 0.22161702127659574, + "loss": 1.2004218101501465, + "loss_ce": 0.006085854955017567, + "loss_iou": 0.4921875, + "loss_num": 0.042236328125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 130659364, + "step": 1953 + }, + { + "epoch": 0.22173049645390072, + "grad_norm": 38.00472640991211, + "learning_rate": 5e-05, + "loss": 1.3219, + "num_input_tokens_seen": 130726932, + "step": 1954 + }, + { + "epoch": 0.22173049645390072, + "loss": 1.143416404724121, + "loss_ce": 0.0057210796512663364, + "loss_iou": 0.494140625, + "loss_num": 0.0296630859375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 130726932, + "step": 1954 + }, + { + "epoch": 0.22184397163120567, + "grad_norm": 20.52427101135254, + "learning_rate": 5e-05, + "loss": 1.5977, + "num_input_tokens_seen": 130793360, + "step": 1955 + }, + { + "epoch": 0.22184397163120567, + "loss": 1.6802678108215332, + "loss_ce": 0.005463156383484602, + "loss_iou": 0.7265625, + "loss_num": 0.04443359375, + "loss_xval": 1.671875, + "num_input_tokens_seen": 130793360, + "step": 1955 + }, + { + "epoch": 0.22195744680851065, + "grad_norm": 31.02869987487793, + "learning_rate": 5e-05, + "loss": 1.5339, + "num_input_tokens_seen": 130860148, + "step": 1956 + }, + { + "epoch": 0.22195744680851065, + "loss": 1.7272690534591675, + "loss_ce": 0.004124507308006287, + "loss_iou": 0.7109375, + "loss_num": 0.060791015625, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 130860148, + "step": 1956 + }, + { + "epoch": 0.2220709219858156, + "grad_norm": 44.17657470703125, + "learning_rate": 5e-05, + "loss": 1.3791, + "num_input_tokens_seen": 130927732, + "step": 1957 + }, + { + "epoch": 0.2220709219858156, + "loss": 1.4000471830368042, + "loss_ce": 0.0055158501490950584, + "loss_iou": 0.59765625, + "loss_num": 0.03955078125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 130927732, + "step": 1957 + }, + { + "epoch": 0.22218439716312058, + "grad_norm": 20.483457565307617, + "learning_rate": 5e-05, + "loss": 1.6815, + "num_input_tokens_seen": 130994164, + "step": 1958 + }, + { + "epoch": 0.22218439716312058, + "loss": 1.5384936332702637, + "loss_ce": 0.0065110959112644196, + "loss_iou": 0.62109375, + "loss_num": 0.0576171875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 130994164, + "step": 1958 + }, + { + "epoch": 0.22229787234042553, + "grad_norm": 16.7015438079834, + "learning_rate": 5e-05, + "loss": 1.3365, + "num_input_tokens_seen": 131060988, + "step": 1959 + }, + { + "epoch": 0.22229787234042553, + "loss": 1.3515044450759888, + "loss_ce": 0.0038482099771499634, + "loss_iou": 0.5390625, + "loss_num": 0.054931640625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 131060988, + "step": 1959 + }, + { + "epoch": 0.2224113475177305, + "grad_norm": 21.710803985595703, + "learning_rate": 5e-05, + "loss": 1.0769, + "num_input_tokens_seen": 131126540, + "step": 1960 + }, + { + "epoch": 0.2224113475177305, + "loss": 1.0770902633666992, + "loss_ce": 0.005312926135957241, + "loss_iou": 0.421875, + "loss_num": 0.04541015625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 131126540, + "step": 1960 + }, + { + "epoch": 0.22252482269503546, + "grad_norm": 21.21295738220215, + "learning_rate": 5e-05, + "loss": 1.3058, + "num_input_tokens_seen": 131192944, + "step": 1961 + }, + { + "epoch": 0.22252482269503546, + "loss": 1.189226746559143, + "loss_ce": 0.006121203303337097, + "loss_iou": 0.484375, + "loss_num": 0.042724609375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 131192944, + "step": 1961 + }, + { + "epoch": 0.22263829787234043, + "grad_norm": 19.95804214477539, + "learning_rate": 5e-05, + "loss": 1.4213, + "num_input_tokens_seen": 131260136, + "step": 1962 + }, + { + "epoch": 0.22263829787234043, + "loss": 1.42156982421875, + "loss_ce": 0.004577696323394775, + "loss_iou": 0.59765625, + "loss_num": 0.045166015625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 131260136, + "step": 1962 + }, + { + "epoch": 0.22275177304964539, + "grad_norm": 23.455045700073242, + "learning_rate": 5e-05, + "loss": 1.466, + "num_input_tokens_seen": 131327020, + "step": 1963 + }, + { + "epoch": 0.22275177304964539, + "loss": 1.4411685466766357, + "loss_ce": 0.002692017238587141, + "loss_iou": 0.62890625, + "loss_num": 0.03662109375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 131327020, + "step": 1963 + }, + { + "epoch": 0.22286524822695036, + "grad_norm": 47.51083755493164, + "learning_rate": 5e-05, + "loss": 1.6246, + "num_input_tokens_seen": 131393376, + "step": 1964 + }, + { + "epoch": 0.22286524822695036, + "loss": 1.5896271467208862, + "loss_ce": 0.009549062699079514, + "loss_iou": 0.66015625, + "loss_num": 0.0517578125, + "loss_xval": 1.578125, + "num_input_tokens_seen": 131393376, + "step": 1964 + }, + { + "epoch": 0.2229787234042553, + "grad_norm": 24.48193359375, + "learning_rate": 5e-05, + "loss": 1.5901, + "num_input_tokens_seen": 131459824, + "step": 1965 + }, + { + "epoch": 0.2229787234042553, + "loss": 1.725296974182129, + "loss_ce": 0.002640719059854746, + "loss_iou": 0.73828125, + "loss_num": 0.04931640625, + "loss_xval": 1.71875, + "num_input_tokens_seen": 131459824, + "step": 1965 + }, + { + "epoch": 0.2230921985815603, + "grad_norm": 9.83313274383545, + "learning_rate": 5e-05, + "loss": 1.2236, + "num_input_tokens_seen": 131527764, + "step": 1966 + }, + { + "epoch": 0.2230921985815603, + "loss": 1.2268069982528687, + "loss_ce": 0.005615596659481525, + "loss_iou": 0.4921875, + "loss_num": 0.047119140625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 131527764, + "step": 1966 + }, + { + "epoch": 0.22320567375886524, + "grad_norm": 12.63760757446289, + "learning_rate": 5e-05, + "loss": 1.1789, + "num_input_tokens_seen": 131595456, + "step": 1967 + }, + { + "epoch": 0.22320567375886524, + "loss": 1.1943976879119873, + "loss_ce": 0.0029915007762610912, + "loss_iou": 0.482421875, + "loss_num": 0.045166015625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 131595456, + "step": 1967 + }, + { + "epoch": 0.22331914893617022, + "grad_norm": 31.33421516418457, + "learning_rate": 5e-05, + "loss": 1.3125, + "num_input_tokens_seen": 131662844, + "step": 1968 + }, + { + "epoch": 0.22331914893617022, + "loss": 1.2745131254196167, + "loss_ce": 0.00205221027135849, + "loss_iou": 0.578125, + "loss_num": 0.023681640625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 131662844, + "step": 1968 + }, + { + "epoch": 0.22343262411347517, + "grad_norm": 20.558086395263672, + "learning_rate": 5e-05, + "loss": 1.6145, + "num_input_tokens_seen": 131729848, + "step": 1969 + }, + { + "epoch": 0.22343262411347517, + "loss": 1.6502952575683594, + "loss_ce": 0.006130259484052658, + "loss_iou": 0.66015625, + "loss_num": 0.064453125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 131729848, + "step": 1969 + }, + { + "epoch": 0.22354609929078015, + "grad_norm": 23.697002410888672, + "learning_rate": 5e-05, + "loss": 1.4546, + "num_input_tokens_seen": 131797204, + "step": 1970 + }, + { + "epoch": 0.22354609929078015, + "loss": 1.5087332725524902, + "loss_ce": 0.011662936769425869, + "loss_iou": 0.61328125, + "loss_num": 0.05419921875, + "loss_xval": 1.5, + "num_input_tokens_seen": 131797204, + "step": 1970 + }, + { + "epoch": 0.2236595744680851, + "grad_norm": 31.708723068237305, + "learning_rate": 5e-05, + "loss": 1.4099, + "num_input_tokens_seen": 131864892, + "step": 1971 + }, + { + "epoch": 0.2236595744680851, + "loss": 1.5159891843795776, + "loss_ce": 0.007200108841061592, + "loss_iou": 0.62890625, + "loss_num": 0.050048828125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 131864892, + "step": 1971 + }, + { + "epoch": 0.22377304964539008, + "grad_norm": 266.75927734375, + "learning_rate": 5e-05, + "loss": 1.4704, + "num_input_tokens_seen": 131931396, + "step": 1972 + }, + { + "epoch": 0.22377304964539008, + "loss": 1.4441862106323242, + "loss_ce": 0.004733186215162277, + "loss_iou": 0.609375, + "loss_num": 0.044677734375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 131931396, + "step": 1972 + }, + { + "epoch": 0.22388652482269503, + "grad_norm": 25.550682067871094, + "learning_rate": 5e-05, + "loss": 1.2053, + "num_input_tokens_seen": 131998808, + "step": 1973 + }, + { + "epoch": 0.22388652482269503, + "loss": 1.254183053970337, + "loss_ce": 0.005159614607691765, + "loss_iou": 0.51953125, + "loss_num": 0.04150390625, + "loss_xval": 1.25, + "num_input_tokens_seen": 131998808, + "step": 1973 + }, + { + "epoch": 0.224, + "grad_norm": 20.98421859741211, + "learning_rate": 5e-05, + "loss": 1.4565, + "num_input_tokens_seen": 132066688, + "step": 1974 + }, + { + "epoch": 0.224, + "loss": 1.6719105243682861, + "loss_ce": 0.005894833244383335, + "loss_iou": 0.65625, + "loss_num": 0.0703125, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 132066688, + "step": 1974 + }, + { + "epoch": 0.22411347517730495, + "grad_norm": 28.795297622680664, + "learning_rate": 5e-05, + "loss": 1.3363, + "num_input_tokens_seen": 132134056, + "step": 1975 + }, + { + "epoch": 0.22411347517730495, + "loss": 1.3196969032287598, + "loss_ce": 0.00524383457377553, + "loss_iou": 0.5546875, + "loss_num": 0.04052734375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 132134056, + "step": 1975 + }, + { + "epoch": 0.22422695035460993, + "grad_norm": 32.36819839477539, + "learning_rate": 5e-05, + "loss": 1.5494, + "num_input_tokens_seen": 132201820, + "step": 1976 + }, + { + "epoch": 0.22422695035460993, + "loss": 1.2884944677352905, + "loss_ce": 0.004436859395354986, + "loss_iou": 0.55078125, + "loss_num": 0.0361328125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 132201820, + "step": 1976 + }, + { + "epoch": 0.22434042553191488, + "grad_norm": 13.506450653076172, + "learning_rate": 5e-05, + "loss": 1.3259, + "num_input_tokens_seen": 132268260, + "step": 1977 + }, + { + "epoch": 0.22434042553191488, + "loss": 1.291307806968689, + "loss_ce": 0.002977753058075905, + "loss_iou": 0.56640625, + "loss_num": 0.031005859375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 132268260, + "step": 1977 + }, + { + "epoch": 0.22445390070921986, + "grad_norm": 23.995702743530273, + "learning_rate": 5e-05, + "loss": 1.1996, + "num_input_tokens_seen": 132336384, + "step": 1978 + }, + { + "epoch": 0.22445390070921986, + "loss": 1.2755030393600464, + "loss_ce": 0.004506956320255995, + "loss_iou": 0.5234375, + "loss_num": 0.044677734375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 132336384, + "step": 1978 + }, + { + "epoch": 0.2245673758865248, + "grad_norm": 27.219226837158203, + "learning_rate": 5e-05, + "loss": 1.3534, + "num_input_tokens_seen": 132403660, + "step": 1979 + }, + { + "epoch": 0.2245673758865248, + "loss": 1.3447027206420898, + "loss_ce": 0.004858972039073706, + "loss_iou": 0.60546875, + "loss_num": 0.0262451171875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 132403660, + "step": 1979 + }, + { + "epoch": 0.2246808510638298, + "grad_norm": 44.116050720214844, + "learning_rate": 5e-05, + "loss": 1.5285, + "num_input_tokens_seen": 132470704, + "step": 1980 + }, + { + "epoch": 0.2246808510638298, + "loss": 1.303457260131836, + "loss_ce": 0.010488496161997318, + "loss_iou": 0.578125, + "loss_num": 0.02685546875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 132470704, + "step": 1980 + }, + { + "epoch": 0.22479432624113474, + "grad_norm": 26.136119842529297, + "learning_rate": 5e-05, + "loss": 1.5083, + "num_input_tokens_seen": 132536720, + "step": 1981 + }, + { + "epoch": 0.22479432624113474, + "loss": 1.489700198173523, + "loss_ce": 0.0033721073996275663, + "loss_iou": 0.66796875, + "loss_num": 0.02978515625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 132536720, + "step": 1981 + }, + { + "epoch": 0.22490780141843972, + "grad_norm": 16.626314163208008, + "learning_rate": 5e-05, + "loss": 1.2525, + "num_input_tokens_seen": 132604096, + "step": 1982 + }, + { + "epoch": 0.22490780141843972, + "loss": 1.1821430921554565, + "loss_ce": 0.002455571200698614, + "loss_iou": 0.50390625, + "loss_num": 0.033935546875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 132604096, + "step": 1982 + }, + { + "epoch": 0.22502127659574467, + "grad_norm": 25.128185272216797, + "learning_rate": 5e-05, + "loss": 1.3901, + "num_input_tokens_seen": 132671260, + "step": 1983 + }, + { + "epoch": 0.22502127659574467, + "loss": 1.1124144792556763, + "loss_ce": 0.0030394396744668484, + "loss_iou": 0.515625, + "loss_num": 0.015625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 132671260, + "step": 1983 + }, + { + "epoch": 0.22513475177304965, + "grad_norm": 21.03541374206543, + "learning_rate": 5e-05, + "loss": 1.5352, + "num_input_tokens_seen": 132738484, + "step": 1984 + }, + { + "epoch": 0.22513475177304965, + "loss": 1.6253116130828857, + "loss_ce": 0.0022647609002888203, + "loss_iou": 0.67578125, + "loss_num": 0.05322265625, + "loss_xval": 1.625, + "num_input_tokens_seen": 132738484, + "step": 1984 + }, + { + "epoch": 0.2252482269503546, + "grad_norm": 18.13155746459961, + "learning_rate": 5e-05, + "loss": 1.2379, + "num_input_tokens_seen": 132804964, + "step": 1985 + }, + { + "epoch": 0.2252482269503546, + "loss": 1.2339882850646973, + "loss_ce": 0.005472681950777769, + "loss_iou": 0.50390625, + "loss_num": 0.04443359375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 132804964, + "step": 1985 + }, + { + "epoch": 0.22536170212765957, + "grad_norm": 27.822662353515625, + "learning_rate": 5e-05, + "loss": 1.1903, + "num_input_tokens_seen": 132871296, + "step": 1986 + }, + { + "epoch": 0.22536170212765957, + "loss": 1.2218321561813354, + "loss_ce": 0.004547039046883583, + "loss_iou": 0.51171875, + "loss_num": 0.03955078125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 132871296, + "step": 1986 + }, + { + "epoch": 0.22547517730496455, + "grad_norm": 30.082202911376953, + "learning_rate": 5e-05, + "loss": 1.1506, + "num_input_tokens_seen": 132937440, + "step": 1987 + }, + { + "epoch": 0.22547517730496455, + "loss": 1.2249172925949097, + "loss_ce": 0.002383085899055004, + "loss_iou": 0.52734375, + "loss_num": 0.033447265625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 132937440, + "step": 1987 + }, + { + "epoch": 0.2255886524822695, + "grad_norm": 21.58560562133789, + "learning_rate": 5e-05, + "loss": 1.2016, + "num_input_tokens_seen": 133004052, + "step": 1988 + }, + { + "epoch": 0.2255886524822695, + "loss": 1.2481839656829834, + "loss_ce": 0.0037992852739989758, + "loss_iou": 0.51171875, + "loss_num": 0.043212890625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 133004052, + "step": 1988 + }, + { + "epoch": 0.22570212765957448, + "grad_norm": 37.787174224853516, + "learning_rate": 5e-05, + "loss": 1.3363, + "num_input_tokens_seen": 133070840, + "step": 1989 + }, + { + "epoch": 0.22570212765957448, + "loss": 1.3909528255462646, + "loss_ce": 0.005698946304619312, + "loss_iou": 0.59375, + "loss_num": 0.0400390625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 133070840, + "step": 1989 + }, + { + "epoch": 0.22581560283687943, + "grad_norm": 27.63946533203125, + "learning_rate": 5e-05, + "loss": 1.6255, + "num_input_tokens_seen": 133138536, + "step": 1990 + }, + { + "epoch": 0.22581560283687943, + "loss": 1.5831515789031982, + "loss_ce": 0.006491462700068951, + "loss_iou": 0.68359375, + "loss_num": 0.04150390625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 133138536, + "step": 1990 + }, + { + "epoch": 0.2259290780141844, + "grad_norm": 20.305103302001953, + "learning_rate": 5e-05, + "loss": 1.6275, + "num_input_tokens_seen": 133206000, + "step": 1991 + }, + { + "epoch": 0.2259290780141844, + "loss": 1.6989128589630127, + "loss_ce": 0.004088586196303368, + "loss_iou": 0.67578125, + "loss_num": 0.068359375, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 133206000, + "step": 1991 + }, + { + "epoch": 0.22604255319148936, + "grad_norm": 19.842805862426758, + "learning_rate": 5e-05, + "loss": 1.5454, + "num_input_tokens_seen": 133272360, + "step": 1992 + }, + { + "epoch": 0.22604255319148936, + "loss": 1.5679435729980469, + "loss_ce": 0.010814731940627098, + "loss_iou": 0.59765625, + "loss_num": 0.072265625, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 133272360, + "step": 1992 + }, + { + "epoch": 0.22615602836879434, + "grad_norm": 25.7852725982666, + "learning_rate": 5e-05, + "loss": 1.3092, + "num_input_tokens_seen": 133339692, + "step": 1993 + }, + { + "epoch": 0.22615602836879434, + "loss": 1.18706214427948, + "loss_ce": 0.003468393115326762, + "loss_iou": 0.49609375, + "loss_num": 0.03857421875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 133339692, + "step": 1993 + }, + { + "epoch": 0.2262695035460993, + "grad_norm": 37.58256530761719, + "learning_rate": 5e-05, + "loss": 1.5197, + "num_input_tokens_seen": 133407264, + "step": 1994 + }, + { + "epoch": 0.2262695035460993, + "loss": 1.4967929124832153, + "loss_ce": 0.006558561697602272, + "loss_iou": 0.63671875, + "loss_num": 0.043701171875, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 133407264, + "step": 1994 + }, + { + "epoch": 0.22638297872340427, + "grad_norm": 22.805248260498047, + "learning_rate": 5e-05, + "loss": 1.5183, + "num_input_tokens_seen": 133474500, + "step": 1995 + }, + { + "epoch": 0.22638297872340427, + "loss": 1.5285565853118896, + "loss_ce": 0.0021893910598009825, + "loss_iou": 0.64453125, + "loss_num": 0.046630859375, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 133474500, + "step": 1995 + }, + { + "epoch": 0.22649645390070922, + "grad_norm": 16.609554290771484, + "learning_rate": 5e-05, + "loss": 1.1366, + "num_input_tokens_seen": 133541212, + "step": 1996 + }, + { + "epoch": 0.22649645390070922, + "loss": 1.162423849105835, + "loss_ce": 0.0051972330547869205, + "loss_iou": 0.48046875, + "loss_num": 0.039306640625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 133541212, + "step": 1996 + }, + { + "epoch": 0.2266099290780142, + "grad_norm": 28.03524398803711, + "learning_rate": 5e-05, + "loss": 1.4696, + "num_input_tokens_seen": 133608092, + "step": 1997 + }, + { + "epoch": 0.2266099290780142, + "loss": 1.6252012252807617, + "loss_ce": 0.002154434798285365, + "loss_iou": 0.69921875, + "loss_num": 0.04443359375, + "loss_xval": 1.625, + "num_input_tokens_seen": 133608092, + "step": 1997 + }, + { + "epoch": 0.22672340425531914, + "grad_norm": 40.089866638183594, + "learning_rate": 5e-05, + "loss": 1.5384, + "num_input_tokens_seen": 133674096, + "step": 1998 + }, + { + "epoch": 0.22672340425531914, + "loss": 1.6853752136230469, + "loss_ce": 0.0032463606912642717, + "loss_iou": 0.73046875, + "loss_num": 0.0439453125, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 133674096, + "step": 1998 + }, + { + "epoch": 0.22683687943262412, + "grad_norm": 22.613950729370117, + "learning_rate": 5e-05, + "loss": 1.2243, + "num_input_tokens_seen": 133740612, + "step": 1999 + }, + { + "epoch": 0.22683687943262412, + "loss": 1.362693428993225, + "loss_ce": 0.004295044578611851, + "loss_iou": 0.58984375, + "loss_num": 0.03564453125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 133740612, + "step": 1999 + }, + { + "epoch": 0.22695035460992907, + "grad_norm": 13.752289772033691, + "learning_rate": 5e-05, + "loss": 1.2664, + "num_input_tokens_seen": 133807964, + "step": 2000 + }, + { + "epoch": 0.22695035460992907, + "eval_seeclick_CIoU": 0.34426629543304443, + "eval_seeclick_GIoU": 0.31926728785037994, + "eval_seeclick_IoU": 0.43812374770641327, + "eval_seeclick_MAE_all": 0.163093663752079, + "eval_seeclick_MAE_h": 0.10025879368185997, + "eval_seeclick_MAE_w": 0.14377503842115402, + "eval_seeclick_MAE_x_boxes": 0.23835749924182892, + "eval_seeclick_MAE_y_boxes": 0.1347159929573536, + "eval_seeclick_NUM_probability": 0.9998016357421875, + "eval_seeclick_inside_bbox": 0.6614583432674408, + "eval_seeclick_loss": 2.6518149375915527, + "eval_seeclick_loss_ce": 0.01591816684231162, + "eval_seeclick_loss_iou": 0.9163818359375, + "eval_seeclick_loss_num": 0.16298675537109375, + "eval_seeclick_loss_xval": 2.64599609375, + "eval_seeclick_runtime": 86.8158, + "eval_seeclick_samples_per_second": 0.541, + "eval_seeclick_steps_per_second": 0.023, + "num_input_tokens_seen": 133807964, + "step": 2000 + }, + { + "epoch": 0.22695035460992907, + "eval_icons_CIoU": 0.4480409026145935, + "eval_icons_GIoU": 0.42837369441986084, + "eval_icons_IoU": 0.49970248341560364, + "eval_icons_MAE_all": 0.14744016900658607, + "eval_icons_MAE_h": 0.10056149959564209, + "eval_icons_MAE_w": 0.11631055921316147, + "eval_icons_MAE_x_boxes": 0.11812345311045647, + "eval_icons_MAE_y_boxes": 0.1263774037361145, + "eval_icons_NUM_probability": 0.999912679195404, + "eval_icons_inside_bbox": 0.7395833432674408, + "eval_icons_loss": 2.4613287448883057, + "eval_icons_loss_ce": 6.802596726629417e-05, + "eval_icons_loss_iou": 0.885009765625, + "eval_icons_loss_num": 0.1356964111328125, + "eval_icons_loss_xval": 2.44775390625, + "eval_icons_runtime": 92.7021, + "eval_icons_samples_per_second": 0.539, + "eval_icons_steps_per_second": 0.022, + "num_input_tokens_seen": 133807964, + "step": 2000 + }, + { + "epoch": 0.22695035460992907, + "eval_screenspot_CIoU": 0.3551298926273982, + "eval_screenspot_GIoU": 0.3153218775987625, + "eval_screenspot_IoU": 0.43693260351816815, + "eval_screenspot_MAE_all": 0.17736593882242838, + "eval_screenspot_MAE_h": 0.10277784864107768, + "eval_screenspot_MAE_w": 0.17480774720509848, + "eval_screenspot_MAE_x_boxes": 0.23112698396046957, + "eval_screenspot_MAE_y_boxes": 0.13011351476113, + "eval_screenspot_NUM_probability": 0.9998083313306173, + "eval_screenspot_inside_bbox": 0.6699999968210856, + "eval_screenspot_loss": 2.807647705078125, + "eval_screenspot_loss_ce": 0.014993442843357721, + "eval_screenspot_loss_iou": 0.95703125, + "eval_screenspot_loss_num": 0.18742879231770834, + "eval_screenspot_loss_xval": 2.8518880208333335, + "eval_screenspot_runtime": 126.2572, + "eval_screenspot_samples_per_second": 0.705, + "eval_screenspot_steps_per_second": 0.024, + "num_input_tokens_seen": 133807964, + "step": 2000 + }, + { + "epoch": 0.22695035460992907, + "eval_compot_CIoU": 0.38398708403110504, + "eval_compot_GIoU": 0.3428562879562378, + "eval_compot_IoU": 0.47344549000263214, + "eval_compot_MAE_all": 0.15506690740585327, + "eval_compot_MAE_h": 0.05109363608062267, + "eval_compot_MAE_w": 0.16540144383907318, + "eval_compot_MAE_x_boxes": 0.17781401425600052, + "eval_compot_MAE_y_boxes": 0.13905224949121475, + "eval_compot_NUM_probability": 0.9998413026332855, + "eval_compot_inside_bbox": 0.5989583432674408, + "eval_compot_loss": 2.6418304443359375, + "eval_compot_loss_ce": 0.005358418449759483, + "eval_compot_loss_iou": 0.9169921875, + "eval_compot_loss_num": 0.15304183959960938, + "eval_compot_loss_xval": 2.599609375, + "eval_compot_runtime": 71.2105, + "eval_compot_samples_per_second": 0.702, + "eval_compot_steps_per_second": 0.028, + "num_input_tokens_seen": 133807964, + "step": 2000 + }, + { + "epoch": 0.22695035460992907, + "loss": 2.4957566261291504, + "loss_ce": 0.004545603413134813, + "loss_iou": 0.8984375, + "loss_num": 0.138671875, + "loss_xval": 2.484375, + "num_input_tokens_seen": 133807964, + "step": 2000 + }, + { + "epoch": 0.22706382978723405, + "grad_norm": 15.289379119873047, + "learning_rate": 5e-05, + "loss": 1.1772, + "num_input_tokens_seen": 133874924, + "step": 2001 + }, + { + "epoch": 0.22706382978723405, + "loss": 1.095270037651062, + "loss_ce": 0.003961401991546154, + "loss_iou": 0.478515625, + "loss_num": 0.0272216796875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 133874924, + "step": 2001 + }, + { + "epoch": 0.227177304964539, + "grad_norm": 26.859811782836914, + "learning_rate": 5e-05, + "loss": 1.4366, + "num_input_tokens_seen": 133942948, + "step": 2002 + }, + { + "epoch": 0.227177304964539, + "loss": 1.555048942565918, + "loss_ce": 0.005732533987611532, + "loss_iou": 0.66796875, + "loss_num": 0.04296875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 133942948, + "step": 2002 + }, + { + "epoch": 0.22729078014184398, + "grad_norm": 45.3256950378418, + "learning_rate": 5e-05, + "loss": 1.4395, + "num_input_tokens_seen": 134009016, + "step": 2003 + }, + { + "epoch": 0.22729078014184398, + "loss": 1.3472453355789185, + "loss_ce": 0.005204303190112114, + "loss_iou": 0.5625, + "loss_num": 0.042724609375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 134009016, + "step": 2003 + }, + { + "epoch": 0.22740425531914893, + "grad_norm": 22.057201385498047, + "learning_rate": 5e-05, + "loss": 1.1471, + "num_input_tokens_seen": 134075928, + "step": 2004 + }, + { + "epoch": 0.22740425531914893, + "loss": 1.2536946535110474, + "loss_ce": 0.0036946558393538, + "loss_iou": 0.55078125, + "loss_num": 0.0296630859375, + "loss_xval": 1.25, + "num_input_tokens_seen": 134075928, + "step": 2004 + }, + { + "epoch": 0.2275177304964539, + "grad_norm": 30.015384674072266, + "learning_rate": 5e-05, + "loss": 1.357, + "num_input_tokens_seen": 134141884, + "step": 2005 + }, + { + "epoch": 0.2275177304964539, + "loss": 1.6490429639816284, + "loss_ce": 0.0035350704565644264, + "loss_iou": 0.66796875, + "loss_num": 0.061767578125, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 134141884, + "step": 2005 + }, + { + "epoch": 0.22763120567375886, + "grad_norm": 35.33918380737305, + "learning_rate": 5e-05, + "loss": 1.276, + "num_input_tokens_seen": 134210144, + "step": 2006 + }, + { + "epoch": 0.22763120567375886, + "loss": 1.2276334762573242, + "loss_ce": 0.004488867707550526, + "loss_iou": 0.52734375, + "loss_num": 0.032958984375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 134210144, + "step": 2006 + }, + { + "epoch": 0.22774468085106384, + "grad_norm": 22.396697998046875, + "learning_rate": 5e-05, + "loss": 1.4836, + "num_input_tokens_seen": 134277012, + "step": 2007 + }, + { + "epoch": 0.22774468085106384, + "loss": 1.5110599994659424, + "loss_ce": 0.002759236842393875, + "loss_iou": 0.65625, + "loss_num": 0.039306640625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 134277012, + "step": 2007 + }, + { + "epoch": 0.22785815602836879, + "grad_norm": 14.94880485534668, + "learning_rate": 5e-05, + "loss": 1.172, + "num_input_tokens_seen": 134343484, + "step": 2008 + }, + { + "epoch": 0.22785815602836879, + "loss": 1.1806801557540894, + "loss_ce": 0.008072719909250736, + "loss_iou": 0.48046875, + "loss_num": 0.04248046875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 134343484, + "step": 2008 + }, + { + "epoch": 0.22797163120567376, + "grad_norm": 28.177486419677734, + "learning_rate": 5e-05, + "loss": 1.2756, + "num_input_tokens_seen": 134410236, + "step": 2009 + }, + { + "epoch": 0.22797163120567376, + "loss": 1.4232925176620483, + "loss_ce": 0.00532371923327446, + "loss_iou": 0.6015625, + "loss_num": 0.04296875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 134410236, + "step": 2009 + }, + { + "epoch": 0.22808510638297871, + "grad_norm": 36.87175750732422, + "learning_rate": 5e-05, + "loss": 1.2659, + "num_input_tokens_seen": 134478452, + "step": 2010 + }, + { + "epoch": 0.22808510638297871, + "loss": 1.195324182510376, + "loss_ce": 0.006847548298537731, + "loss_iou": 0.5, + "loss_num": 0.03662109375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 134478452, + "step": 2010 + }, + { + "epoch": 0.2281985815602837, + "grad_norm": 26.465660095214844, + "learning_rate": 5e-05, + "loss": 1.4167, + "num_input_tokens_seen": 134546260, + "step": 2011 + }, + { + "epoch": 0.2281985815602837, + "loss": 1.4041646718978882, + "loss_ce": 0.00572718121111393, + "loss_iou": 0.609375, + "loss_num": 0.0361328125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 134546260, + "step": 2011 + }, + { + "epoch": 0.22831205673758864, + "grad_norm": 24.763656616210938, + "learning_rate": 5e-05, + "loss": 1.1257, + "num_input_tokens_seen": 134613120, + "step": 2012 + }, + { + "epoch": 0.22831205673758864, + "loss": 1.3402702808380127, + "loss_ce": 0.006285816431045532, + "loss_iou": 0.5625, + "loss_num": 0.04150390625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 134613120, + "step": 2012 + }, + { + "epoch": 0.22842553191489362, + "grad_norm": 46.95137405395508, + "learning_rate": 5e-05, + "loss": 1.5816, + "num_input_tokens_seen": 134679568, + "step": 2013 + }, + { + "epoch": 0.22842553191489362, + "loss": 1.4688806533813477, + "loss_ce": 0.008431393653154373, + "loss_iou": 0.62890625, + "loss_num": 0.040283203125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 134679568, + "step": 2013 + }, + { + "epoch": 0.22853900709219857, + "grad_norm": 26.3516845703125, + "learning_rate": 5e-05, + "loss": 1.2936, + "num_input_tokens_seen": 134747168, + "step": 2014 + }, + { + "epoch": 0.22853900709219857, + "loss": 1.3415887355804443, + "loss_ce": 0.006627822294831276, + "loss_iou": 0.55078125, + "loss_num": 0.0458984375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 134747168, + "step": 2014 + }, + { + "epoch": 0.22865248226950355, + "grad_norm": 30.557313919067383, + "learning_rate": 5e-05, + "loss": 1.4184, + "num_input_tokens_seen": 134814480, + "step": 2015 + }, + { + "epoch": 0.22865248226950355, + "loss": 1.3582922220230103, + "loss_ce": 0.00380011647939682, + "loss_iou": 0.59375, + "loss_num": 0.033935546875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 134814480, + "step": 2015 + }, + { + "epoch": 0.2287659574468085, + "grad_norm": 37.435874938964844, + "learning_rate": 5e-05, + "loss": 1.393, + "num_input_tokens_seen": 134881848, + "step": 2016 + }, + { + "epoch": 0.2287659574468085, + "loss": 1.573784589767456, + "loss_ce": 0.005913556553423405, + "loss_iou": 0.6328125, + "loss_num": 0.061279296875, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 134881848, + "step": 2016 + }, + { + "epoch": 0.22887943262411348, + "grad_norm": 21.736892700195312, + "learning_rate": 5e-05, + "loss": 1.6853, + "num_input_tokens_seen": 134949036, + "step": 2017 + }, + { + "epoch": 0.22887943262411348, + "loss": 1.6020491123199463, + "loss_ce": 0.006345873232930899, + "loss_iou": 0.703125, + "loss_num": 0.038330078125, + "loss_xval": 1.59375, + "num_input_tokens_seen": 134949036, + "step": 2017 + }, + { + "epoch": 0.22899290780141843, + "grad_norm": 24.602642059326172, + "learning_rate": 5e-05, + "loss": 1.2993, + "num_input_tokens_seen": 135015648, + "step": 2018 + }, + { + "epoch": 0.22899290780141843, + "loss": 1.191311240196228, + "loss_ce": 0.003811201313510537, + "loss_iou": 0.5, + "loss_num": 0.037841796875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 135015648, + "step": 2018 + }, + { + "epoch": 0.2291063829787234, + "grad_norm": 31.042495727539062, + "learning_rate": 5e-05, + "loss": 1.25, + "num_input_tokens_seen": 135082288, + "step": 2019 + }, + { + "epoch": 0.2291063829787234, + "loss": 1.4482593536376953, + "loss_ce": 0.0029468571301549673, + "loss_iou": 0.640625, + "loss_num": 0.031982421875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 135082288, + "step": 2019 + }, + { + "epoch": 0.22921985815602836, + "grad_norm": 25.40342903137207, + "learning_rate": 5e-05, + "loss": 1.5081, + "num_input_tokens_seen": 135149748, + "step": 2020 + }, + { + "epoch": 0.22921985815602836, + "loss": 1.522582769393921, + "loss_ce": 0.002074998337775469, + "loss_iou": 0.6328125, + "loss_num": 0.0517578125, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 135149748, + "step": 2020 + }, + { + "epoch": 0.22933333333333333, + "grad_norm": 18.62691879272461, + "learning_rate": 5e-05, + "loss": 1.3122, + "num_input_tokens_seen": 135216788, + "step": 2021 + }, + { + "epoch": 0.22933333333333333, + "loss": 1.2365171909332275, + "loss_ce": 0.005071887746453285, + "loss_iou": 0.5, + "loss_num": 0.04638671875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 135216788, + "step": 2021 + }, + { + "epoch": 0.2294468085106383, + "grad_norm": 46.972171783447266, + "learning_rate": 5e-05, + "loss": 1.1938, + "num_input_tokens_seen": 135283552, + "step": 2022 + }, + { + "epoch": 0.2294468085106383, + "loss": 1.205946683883667, + "loss_ce": 0.004286488518118858, + "loss_iou": 0.5, + "loss_num": 0.041015625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 135283552, + "step": 2022 + }, + { + "epoch": 0.22956028368794326, + "grad_norm": 28.472429275512695, + "learning_rate": 5e-05, + "loss": 1.2358, + "num_input_tokens_seen": 135349804, + "step": 2023 + }, + { + "epoch": 0.22956028368794326, + "loss": 1.224898099899292, + "loss_ce": 0.004683193750679493, + "loss_iou": 0.5, + "loss_num": 0.04443359375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 135349804, + "step": 2023 + }, + { + "epoch": 0.22967375886524824, + "grad_norm": 22.15022850036621, + "learning_rate": 5e-05, + "loss": 1.2698, + "num_input_tokens_seen": 135416720, + "step": 2024 + }, + { + "epoch": 0.22967375886524824, + "loss": 1.3212740421295166, + "loss_ce": 0.004379604011774063, + "loss_iou": 0.55859375, + "loss_num": 0.040283203125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 135416720, + "step": 2024 + }, + { + "epoch": 0.2297872340425532, + "grad_norm": 20.779464721679688, + "learning_rate": 5e-05, + "loss": 1.2848, + "num_input_tokens_seen": 135484504, + "step": 2025 + }, + { + "epoch": 0.2297872340425532, + "loss": 1.1025192737579346, + "loss_ce": 0.001445042435079813, + "loss_iou": 0.486328125, + "loss_num": 0.025634765625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 135484504, + "step": 2025 + }, + { + "epoch": 0.22990070921985817, + "grad_norm": 23.939172744750977, + "learning_rate": 5e-05, + "loss": 1.2693, + "num_input_tokens_seen": 135551200, + "step": 2026 + }, + { + "epoch": 0.22990070921985817, + "loss": 1.0693081617355347, + "loss_ce": 0.002901950851082802, + "loss_iou": 0.4765625, + "loss_num": 0.02294921875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 135551200, + "step": 2026 + }, + { + "epoch": 0.23001418439716312, + "grad_norm": 19.07532501220703, + "learning_rate": 5e-05, + "loss": 1.2646, + "num_input_tokens_seen": 135618148, + "step": 2027 + }, + { + "epoch": 0.23001418439716312, + "loss": 1.3553054332733154, + "loss_ce": 0.005695998668670654, + "loss_iou": 0.546875, + "loss_num": 0.05126953125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 135618148, + "step": 2027 + }, + { + "epoch": 0.2301276595744681, + "grad_norm": 21.542993545532227, + "learning_rate": 5e-05, + "loss": 1.2401, + "num_input_tokens_seen": 135684592, + "step": 2028 + }, + { + "epoch": 0.2301276595744681, + "loss": 1.0857696533203125, + "loss_ce": 0.004226607270538807, + "loss_iou": 0.4765625, + "loss_num": 0.026123046875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 135684592, + "step": 2028 + }, + { + "epoch": 0.23024113475177305, + "grad_norm": 28.628328323364258, + "learning_rate": 5e-05, + "loss": 1.3276, + "num_input_tokens_seen": 135752360, + "step": 2029 + }, + { + "epoch": 0.23024113475177305, + "loss": 1.2580406665802002, + "loss_ce": 0.005110942292958498, + "loss_iou": 0.5234375, + "loss_num": 0.04052734375, + "loss_xval": 1.25, + "num_input_tokens_seen": 135752360, + "step": 2029 + }, + { + "epoch": 0.23035460992907802, + "grad_norm": 43.75878143310547, + "learning_rate": 5e-05, + "loss": 1.3477, + "num_input_tokens_seen": 135820508, + "step": 2030 + }, + { + "epoch": 0.23035460992907802, + "loss": 1.4575709104537964, + "loss_ce": 0.006887244060635567, + "loss_iou": 0.6015625, + "loss_num": 0.049072265625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 135820508, + "step": 2030 + }, + { + "epoch": 0.23046808510638297, + "grad_norm": 21.08272933959961, + "learning_rate": 5e-05, + "loss": 1.3268, + "num_input_tokens_seen": 135887280, + "step": 2031 + }, + { + "epoch": 0.23046808510638297, + "loss": 1.387467861175537, + "loss_ce": 0.002702227095142007, + "loss_iou": 0.5703125, + "loss_num": 0.048828125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 135887280, + "step": 2031 + }, + { + "epoch": 0.23058156028368795, + "grad_norm": 15.335607528686523, + "learning_rate": 5e-05, + "loss": 1.3723, + "num_input_tokens_seen": 135953620, + "step": 2032 + }, + { + "epoch": 0.23058156028368795, + "loss": 1.4347398281097412, + "loss_ce": 0.0030992156825959682, + "loss_iou": 0.578125, + "loss_num": 0.05517578125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 135953620, + "step": 2032 + }, + { + "epoch": 0.2306950354609929, + "grad_norm": 26.609085083007812, + "learning_rate": 5e-05, + "loss": 1.3803, + "num_input_tokens_seen": 136020564, + "step": 2033 + }, + { + "epoch": 0.2306950354609929, + "loss": 1.3818200826644897, + "loss_ce": 0.007308430038392544, + "loss_iou": 0.57421875, + "loss_num": 0.044921875, + "loss_xval": 1.375, + "num_input_tokens_seen": 136020564, + "step": 2033 + }, + { + "epoch": 0.23080851063829788, + "grad_norm": 35.458526611328125, + "learning_rate": 5e-05, + "loss": 1.4366, + "num_input_tokens_seen": 136087400, + "step": 2034 + }, + { + "epoch": 0.23080851063829788, + "loss": 1.4207435846328735, + "loss_ce": 0.005216238088905811, + "loss_iou": 0.6328125, + "loss_num": 0.029296875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 136087400, + "step": 2034 + }, + { + "epoch": 0.23092198581560283, + "grad_norm": 25.101194381713867, + "learning_rate": 5e-05, + "loss": 1.4433, + "num_input_tokens_seen": 136155588, + "step": 2035 + }, + { + "epoch": 0.23092198581560283, + "loss": 1.4715025424957275, + "loss_ce": 0.007635368034243584, + "loss_iou": 0.640625, + "loss_num": 0.0361328125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 136155588, + "step": 2035 + }, + { + "epoch": 0.2310354609929078, + "grad_norm": 27.711580276489258, + "learning_rate": 5e-05, + "loss": 1.4603, + "num_input_tokens_seen": 136222664, + "step": 2036 + }, + { + "epoch": 0.2310354609929078, + "loss": 1.1517407894134521, + "loss_ce": 0.001838383381254971, + "loss_iou": 0.5078125, + "loss_num": 0.02685546875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 136222664, + "step": 2036 + }, + { + "epoch": 0.23114893617021276, + "grad_norm": 25.31820297241211, + "learning_rate": 5e-05, + "loss": 1.6098, + "num_input_tokens_seen": 136289652, + "step": 2037 + }, + { + "epoch": 0.23114893617021276, + "loss": 1.4407150745391846, + "loss_ce": 0.009318486787378788, + "loss_iou": 0.6015625, + "loss_num": 0.04638671875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 136289652, + "step": 2037 + }, + { + "epoch": 0.23126241134751774, + "grad_norm": 21.291994094848633, + "learning_rate": 5e-05, + "loss": 1.3638, + "num_input_tokens_seen": 136356660, + "step": 2038 + }, + { + "epoch": 0.23126241134751774, + "loss": 1.6328544616699219, + "loss_ce": 0.0044365013018250465, + "loss_iou": 0.66015625, + "loss_num": 0.06103515625, + "loss_xval": 1.625, + "num_input_tokens_seen": 136356660, + "step": 2038 + }, + { + "epoch": 0.2313758865248227, + "grad_norm": 22.731719970703125, + "learning_rate": 5e-05, + "loss": 1.2247, + "num_input_tokens_seen": 136422532, + "step": 2039 + }, + { + "epoch": 0.2313758865248227, + "loss": 1.2202749252319336, + "loss_ce": 0.0054312339052557945, + "loss_iou": 0.51953125, + "loss_num": 0.03515625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 136422532, + "step": 2039 + }, + { + "epoch": 0.23148936170212767, + "grad_norm": 44.39201354980469, + "learning_rate": 5e-05, + "loss": 1.3282, + "num_input_tokens_seen": 136490052, + "step": 2040 + }, + { + "epoch": 0.23148936170212767, + "loss": 1.2150123119354248, + "loss_ce": 0.0065161860547959805, + "loss_iou": 0.54296875, + "loss_num": 0.0252685546875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 136490052, + "step": 2040 + }, + { + "epoch": 0.23160283687943262, + "grad_norm": 20.9036808013916, + "learning_rate": 5e-05, + "loss": 1.3772, + "num_input_tokens_seen": 136557540, + "step": 2041 + }, + { + "epoch": 0.23160283687943262, + "loss": 1.4677550792694092, + "loss_ce": 0.002911216579377651, + "loss_iou": 0.6484375, + "loss_num": 0.033447265625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 136557540, + "step": 2041 + }, + { + "epoch": 0.2317163120567376, + "grad_norm": 14.917709350585938, + "learning_rate": 5e-05, + "loss": 1.3003, + "num_input_tokens_seen": 136624596, + "step": 2042 + }, + { + "epoch": 0.2317163120567376, + "loss": 1.169108271598816, + "loss_ce": 0.005533982999622822, + "loss_iou": 0.453125, + "loss_num": 0.05224609375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 136624596, + "step": 2042 + }, + { + "epoch": 0.23182978723404254, + "grad_norm": 14.936861991882324, + "learning_rate": 5e-05, + "loss": 1.1518, + "num_input_tokens_seen": 136691112, + "step": 2043 + }, + { + "epoch": 0.23182978723404254, + "loss": 1.2507281303405762, + "loss_ce": 0.005122697446495295, + "loss_iou": 0.5078125, + "loss_num": 0.045654296875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 136691112, + "step": 2043 + }, + { + "epoch": 0.23194326241134752, + "grad_norm": 35.865726470947266, + "learning_rate": 5e-05, + "loss": 1.1699, + "num_input_tokens_seen": 136758408, + "step": 2044 + }, + { + "epoch": 0.23194326241134752, + "loss": 1.0573309659957886, + "loss_ce": 0.00557315768674016, + "loss_iou": 0.466796875, + "loss_num": 0.0238037109375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 136758408, + "step": 2044 + }, + { + "epoch": 0.23205673758865247, + "grad_norm": 25.519580841064453, + "learning_rate": 5e-05, + "loss": 1.452, + "num_input_tokens_seen": 136826096, + "step": 2045 + }, + { + "epoch": 0.23205673758865247, + "loss": 1.3130764961242676, + "loss_ce": 0.002529588993638754, + "loss_iou": 0.5859375, + "loss_num": 0.02734375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 136826096, + "step": 2045 + }, + { + "epoch": 0.23217021276595745, + "grad_norm": 24.028642654418945, + "learning_rate": 5e-05, + "loss": 1.1842, + "num_input_tokens_seen": 136893496, + "step": 2046 + }, + { + "epoch": 0.23217021276595745, + "loss": 1.0569978952407837, + "loss_ce": 0.0018221026984974742, + "loss_iou": 0.490234375, + "loss_num": 0.0147705078125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 136893496, + "step": 2046 + }, + { + "epoch": 0.2322836879432624, + "grad_norm": 36.479713439941406, + "learning_rate": 5e-05, + "loss": 1.4017, + "num_input_tokens_seen": 136960284, + "step": 2047 + }, + { + "epoch": 0.2322836879432624, + "loss": 1.2296922206878662, + "loss_ce": 0.0016649110475555062, + "loss_iou": 0.5546875, + "loss_num": 0.023193359375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 136960284, + "step": 2047 + }, + { + "epoch": 0.23239716312056738, + "grad_norm": 26.264671325683594, + "learning_rate": 5e-05, + "loss": 1.7452, + "num_input_tokens_seen": 137027556, + "step": 2048 + }, + { + "epoch": 0.23239716312056738, + "loss": 1.8572262525558472, + "loss_ce": 0.010058222338557243, + "loss_iou": 0.80859375, + "loss_num": 0.04638671875, + "loss_xval": 1.84375, + "num_input_tokens_seen": 137027556, + "step": 2048 + }, + { + "epoch": 0.23251063829787233, + "grad_norm": 9.957148551940918, + "learning_rate": 5e-05, + "loss": 0.9188, + "num_input_tokens_seen": 137093940, + "step": 2049 + }, + { + "epoch": 0.23251063829787233, + "loss": 0.8883804082870483, + "loss_ce": 0.005232281517237425, + "loss_iou": 0.40234375, + "loss_num": 0.015380859375, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 137093940, + "step": 2049 + }, + { + "epoch": 0.2326241134751773, + "grad_norm": 25.960311889648438, + "learning_rate": 5e-05, + "loss": 1.1905, + "num_input_tokens_seen": 137161996, + "step": 2050 + }, + { + "epoch": 0.2326241134751773, + "loss": 1.310711145401001, + "loss_ce": 0.004558734595775604, + "loss_iou": 0.5859375, + "loss_num": 0.0263671875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 137161996, + "step": 2050 + }, + { + "epoch": 0.23273758865248226, + "grad_norm": 36.041629791259766, + "learning_rate": 5e-05, + "loss": 1.2062, + "num_input_tokens_seen": 137229048, + "step": 2051 + }, + { + "epoch": 0.23273758865248226, + "loss": 1.1576271057128906, + "loss_ce": 0.003330232808366418, + "loss_iou": 0.5, + "loss_num": 0.030517578125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 137229048, + "step": 2051 + }, + { + "epoch": 0.23285106382978724, + "grad_norm": 31.17409896850586, + "learning_rate": 5e-05, + "loss": 1.3977, + "num_input_tokens_seen": 137295548, + "step": 2052 + }, + { + "epoch": 0.23285106382978724, + "loss": 1.4793686866760254, + "loss_ce": 0.0028062458150088787, + "loss_iou": 0.6640625, + "loss_num": 0.0296630859375, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 137295548, + "step": 2052 + }, + { + "epoch": 0.2329645390070922, + "grad_norm": 10.73112678527832, + "learning_rate": 5e-05, + "loss": 1.0646, + "num_input_tokens_seen": 137361980, + "step": 2053 + }, + { + "epoch": 0.2329645390070922, + "loss": 1.243431568145752, + "loss_ce": 0.003685547737404704, + "loss_iou": 0.51171875, + "loss_num": 0.04248046875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 137361980, + "step": 2053 + }, + { + "epoch": 0.23307801418439716, + "grad_norm": 20.441362380981445, + "learning_rate": 5e-05, + "loss": 1.292, + "num_input_tokens_seen": 137428020, + "step": 2054 + }, + { + "epoch": 0.23307801418439716, + "loss": 1.5222448110580444, + "loss_ce": 0.004666633903980255, + "loss_iou": 0.6484375, + "loss_num": 0.045166015625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 137428020, + "step": 2054 + }, + { + "epoch": 0.23319148936170211, + "grad_norm": 124.35025024414062, + "learning_rate": 5e-05, + "loss": 1.238, + "num_input_tokens_seen": 137494912, + "step": 2055 + }, + { + "epoch": 0.23319148936170211, + "loss": 1.154191017150879, + "loss_ce": 0.003800421254709363, + "loss_iou": 0.48046875, + "loss_num": 0.037353515625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 137494912, + "step": 2055 + }, + { + "epoch": 0.2333049645390071, + "grad_norm": 29.854949951171875, + "learning_rate": 5e-05, + "loss": 1.3157, + "num_input_tokens_seen": 137562516, + "step": 2056 + }, + { + "epoch": 0.2333049645390071, + "loss": 1.4099164009094238, + "loss_ce": 0.008549172431230545, + "loss_iou": 0.609375, + "loss_num": 0.035888671875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 137562516, + "step": 2056 + }, + { + "epoch": 0.23341843971631207, + "grad_norm": 41.15663146972656, + "learning_rate": 5e-05, + "loss": 1.4604, + "num_input_tokens_seen": 137629392, + "step": 2057 + }, + { + "epoch": 0.23341843971631207, + "loss": 1.40517258644104, + "loss_ce": 0.003317113034427166, + "loss_iou": 0.62890625, + "loss_num": 0.029541015625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 137629392, + "step": 2057 + }, + { + "epoch": 0.23353191489361702, + "grad_norm": 21.95905876159668, + "learning_rate": 5e-05, + "loss": 1.5941, + "num_input_tokens_seen": 137696816, + "step": 2058 + }, + { + "epoch": 0.23353191489361702, + "loss": 1.7794818878173828, + "loss_ce": 0.005067810416221619, + "loss_iou": 0.7421875, + "loss_num": 0.05712890625, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 137696816, + "step": 2058 + }, + { + "epoch": 0.233645390070922, + "grad_norm": 12.148500442504883, + "learning_rate": 5e-05, + "loss": 1.0351, + "num_input_tokens_seen": 137763792, + "step": 2059 + }, + { + "epoch": 0.233645390070922, + "loss": 0.7435404062271118, + "loss_ce": 0.005930556915700436, + "loss_iou": 0.326171875, + "loss_num": 0.0169677734375, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 137763792, + "step": 2059 + }, + { + "epoch": 0.23375886524822695, + "grad_norm": 12.08680248260498, + "learning_rate": 5e-05, + "loss": 1.2431, + "num_input_tokens_seen": 137830832, + "step": 2060 + }, + { + "epoch": 0.23375886524822695, + "loss": 1.324596881866455, + "loss_ce": 0.005260852165520191, + "loss_iou": 0.5546875, + "loss_num": 0.0419921875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 137830832, + "step": 2060 + }, + { + "epoch": 0.23387234042553193, + "grad_norm": 39.10596466064453, + "learning_rate": 5e-05, + "loss": 1.1499, + "num_input_tokens_seen": 137897844, + "step": 2061 + }, + { + "epoch": 0.23387234042553193, + "loss": 1.0491108894348145, + "loss_ce": 0.0037007031496614218, + "loss_iou": 0.46875, + "loss_num": 0.021240234375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 137897844, + "step": 2061 + }, + { + "epoch": 0.23398581560283688, + "grad_norm": 25.661209106445312, + "learning_rate": 5e-05, + "loss": 1.6205, + "num_input_tokens_seen": 137963676, + "step": 2062 + }, + { + "epoch": 0.23398581560283688, + "loss": 1.633843183517456, + "loss_ce": 0.008843122981488705, + "loss_iou": 0.6484375, + "loss_num": 0.0654296875, + "loss_xval": 1.625, + "num_input_tokens_seen": 137963676, + "step": 2062 + }, + { + "epoch": 0.23409929078014186, + "grad_norm": 29.752559661865234, + "learning_rate": 5e-05, + "loss": 1.015, + "num_input_tokens_seen": 138029800, + "step": 2063 + }, + { + "epoch": 0.23409929078014186, + "loss": 1.0028702020645142, + "loss_ce": 0.0028702029958367348, + "loss_iou": 0.443359375, + "loss_num": 0.022705078125, + "loss_xval": 1.0, + "num_input_tokens_seen": 138029800, + "step": 2063 + }, + { + "epoch": 0.2342127659574468, + "grad_norm": 35.852439880371094, + "learning_rate": 5e-05, + "loss": 1.3701, + "num_input_tokens_seen": 138096140, + "step": 2064 + }, + { + "epoch": 0.2342127659574468, + "loss": 1.2461440563201904, + "loss_ce": 0.004933053627610207, + "loss_iou": 0.546875, + "loss_num": 0.029052734375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 138096140, + "step": 2064 + }, + { + "epoch": 0.23432624113475178, + "grad_norm": 22.945711135864258, + "learning_rate": 5e-05, + "loss": 1.2215, + "num_input_tokens_seen": 138163456, + "step": 2065 + }, + { + "epoch": 0.23432624113475178, + "loss": 1.2071146965026855, + "loss_ce": 0.0044780392199754715, + "loss_iou": 0.51953125, + "loss_num": 0.032470703125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 138163456, + "step": 2065 + }, + { + "epoch": 0.23443971631205673, + "grad_norm": 28.790828704833984, + "learning_rate": 5e-05, + "loss": 1.354, + "num_input_tokens_seen": 138230780, + "step": 2066 + }, + { + "epoch": 0.23443971631205673, + "loss": 1.3850150108337402, + "loss_ce": 0.003667325247079134, + "loss_iou": 0.58203125, + "loss_num": 0.04296875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 138230780, + "step": 2066 + }, + { + "epoch": 0.2345531914893617, + "grad_norm": 37.643829345703125, + "learning_rate": 5e-05, + "loss": 1.3133, + "num_input_tokens_seen": 138296652, + "step": 2067 + }, + { + "epoch": 0.2345531914893617, + "loss": 1.3993010520935059, + "loss_ce": 0.006722989492118359, + "loss_iou": 0.60546875, + "loss_num": 0.036865234375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 138296652, + "step": 2067 + }, + { + "epoch": 0.23466666666666666, + "grad_norm": 27.82712745666504, + "learning_rate": 5e-05, + "loss": 1.4739, + "num_input_tokens_seen": 138362844, + "step": 2068 + }, + { + "epoch": 0.23466666666666666, + "loss": 1.576665997505188, + "loss_ce": 0.006353476084768772, + "loss_iou": 0.6875, + "loss_num": 0.03857421875, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 138362844, + "step": 2068 + }, + { + "epoch": 0.23478014184397164, + "grad_norm": 16.943723678588867, + "learning_rate": 5e-05, + "loss": 1.4529, + "num_input_tokens_seen": 138429044, + "step": 2069 + }, + { + "epoch": 0.23478014184397164, + "loss": 1.3964051008224487, + "loss_ce": 0.004803510382771492, + "loss_iou": 0.5546875, + "loss_num": 0.05712890625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 138429044, + "step": 2069 + }, + { + "epoch": 0.2348936170212766, + "grad_norm": 19.469667434692383, + "learning_rate": 5e-05, + "loss": 1.1948, + "num_input_tokens_seen": 138495712, + "step": 2070 + }, + { + "epoch": 0.2348936170212766, + "loss": 1.1970102787017822, + "loss_ce": 0.004627506248652935, + "loss_iou": 0.53125, + "loss_num": 0.0264892578125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 138495712, + "step": 2070 + }, + { + "epoch": 0.23500709219858157, + "grad_norm": 21.473461151123047, + "learning_rate": 5e-05, + "loss": 1.3365, + "num_input_tokens_seen": 138563504, + "step": 2071 + }, + { + "epoch": 0.23500709219858157, + "loss": 1.1130914688110352, + "loss_ce": 0.0022515510208904743, + "loss_iou": 0.49609375, + "loss_num": 0.0240478515625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 138563504, + "step": 2071 + }, + { + "epoch": 0.23512056737588652, + "grad_norm": 17.56344223022461, + "learning_rate": 5e-05, + "loss": 1.2937, + "num_input_tokens_seen": 138630896, + "step": 2072 + }, + { + "epoch": 0.23512056737588652, + "loss": 1.281983733177185, + "loss_ce": 0.0090345349162817, + "loss_iou": 0.546875, + "loss_num": 0.0361328125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 138630896, + "step": 2072 + }, + { + "epoch": 0.2352340425531915, + "grad_norm": 20.37179183959961, + "learning_rate": 5e-05, + "loss": 1.3344, + "num_input_tokens_seen": 138695956, + "step": 2073 + }, + { + "epoch": 0.2352340425531915, + "loss": 1.252246618270874, + "loss_ce": 0.0027349223382771015, + "loss_iou": 0.54296875, + "loss_num": 0.0322265625, + "loss_xval": 1.25, + "num_input_tokens_seen": 138695956, + "step": 2073 + }, + { + "epoch": 0.23534751773049645, + "grad_norm": 17.9854679107666, + "learning_rate": 5e-05, + "loss": 1.0855, + "num_input_tokens_seen": 138763204, + "step": 2074 + }, + { + "epoch": 0.23534751773049645, + "loss": 1.012081265449524, + "loss_ce": 0.0039025265723466873, + "loss_iou": 0.4140625, + "loss_num": 0.03564453125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 138763204, + "step": 2074 + }, + { + "epoch": 0.23546099290780143, + "grad_norm": 38.06800842285156, + "learning_rate": 5e-05, + "loss": 1.2708, + "num_input_tokens_seen": 138830216, + "step": 2075 + }, + { + "epoch": 0.23546099290780143, + "loss": 1.1075276136398315, + "loss_ce": 0.006941677536815405, + "loss_iou": 0.51171875, + "loss_num": 0.014892578125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 138830216, + "step": 2075 + }, + { + "epoch": 0.23557446808510638, + "grad_norm": 24.396528244018555, + "learning_rate": 5e-05, + "loss": 1.5541, + "num_input_tokens_seen": 138896472, + "step": 2076 + }, + { + "epoch": 0.23557446808510638, + "loss": 1.4707401990890503, + "loss_ce": 0.004919916391372681, + "loss_iou": 0.6484375, + "loss_num": 0.03369140625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 138896472, + "step": 2076 + }, + { + "epoch": 0.23568794326241135, + "grad_norm": 24.5815486907959, + "learning_rate": 5e-05, + "loss": 1.1254, + "num_input_tokens_seen": 138964056, + "step": 2077 + }, + { + "epoch": 0.23568794326241135, + "loss": 1.27105712890625, + "loss_ce": 0.003967379219830036, + "loss_iou": 0.53515625, + "loss_num": 0.039306640625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 138964056, + "step": 2077 + }, + { + "epoch": 0.2358014184397163, + "grad_norm": 37.927005767822266, + "learning_rate": 5e-05, + "loss": 1.4559, + "num_input_tokens_seen": 139031304, + "step": 2078 + }, + { + "epoch": 0.2358014184397163, + "loss": 1.4460985660552979, + "loss_ce": 0.0027391696348786354, + "loss_iou": 0.609375, + "loss_num": 0.044189453125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 139031304, + "step": 2078 + }, + { + "epoch": 0.23591489361702128, + "grad_norm": 24.305774688720703, + "learning_rate": 5e-05, + "loss": 1.4405, + "num_input_tokens_seen": 139098720, + "step": 2079 + }, + { + "epoch": 0.23591489361702128, + "loss": 1.6392714977264404, + "loss_ce": 0.0035292988177388906, + "loss_iou": 0.70703125, + "loss_num": 0.044189453125, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 139098720, + "step": 2079 + }, + { + "epoch": 0.23602836879432623, + "grad_norm": 17.04037857055664, + "learning_rate": 5e-05, + "loss": 1.1709, + "num_input_tokens_seen": 139164208, + "step": 2080 + }, + { + "epoch": 0.23602836879432623, + "loss": 1.1183085441589355, + "loss_ce": 0.00868942029774189, + "loss_iou": 0.4375, + "loss_num": 0.046630859375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 139164208, + "step": 2080 + }, + { + "epoch": 0.2361418439716312, + "grad_norm": 17.081283569335938, + "learning_rate": 5e-05, + "loss": 1.1662, + "num_input_tokens_seen": 139230976, + "step": 2081 + }, + { + "epoch": 0.2361418439716312, + "loss": 1.000046730041504, + "loss_ce": 0.003953031729906797, + "loss_iou": 0.44921875, + "loss_num": 0.01953125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 139230976, + "step": 2081 + }, + { + "epoch": 0.23625531914893616, + "grad_norm": 18.932842254638672, + "learning_rate": 5e-05, + "loss": 1.0389, + "num_input_tokens_seen": 139297732, + "step": 2082 + }, + { + "epoch": 0.23625531914893616, + "loss": 0.8403486013412476, + "loss_ce": 0.002457936992868781, + "loss_iou": 0.349609375, + "loss_num": 0.0277099609375, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 139297732, + "step": 2082 + }, + { + "epoch": 0.23636879432624114, + "grad_norm": 22.244976043701172, + "learning_rate": 5e-05, + "loss": 1.3601, + "num_input_tokens_seen": 139363392, + "step": 2083 + }, + { + "epoch": 0.23636879432624114, + "loss": 1.137681484222412, + "loss_ce": 0.009263508021831512, + "loss_iou": 0.484375, + "loss_num": 0.03173828125, + "loss_xval": 1.125, + "num_input_tokens_seen": 139363392, + "step": 2083 + }, + { + "epoch": 0.2364822695035461, + "grad_norm": 38.50423049926758, + "learning_rate": 5e-05, + "loss": 1.6186, + "num_input_tokens_seen": 139429916, + "step": 2084 + }, + { + "epoch": 0.2364822695035461, + "loss": 1.4590660333633423, + "loss_ce": 0.005941074341535568, + "loss_iou": 0.59375, + "loss_num": 0.052978515625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 139429916, + "step": 2084 + }, + { + "epoch": 0.23659574468085107, + "grad_norm": 32.5299186706543, + "learning_rate": 5e-05, + "loss": 1.6514, + "num_input_tokens_seen": 139496192, + "step": 2085 + }, + { + "epoch": 0.23659574468085107, + "loss": 1.6575347185134888, + "loss_ce": 0.004214459098875523, + "loss_iou": 0.69140625, + "loss_num": 0.05419921875, + "loss_xval": 1.65625, + "num_input_tokens_seen": 139496192, + "step": 2085 + }, + { + "epoch": 0.23670921985815602, + "grad_norm": 18.592504501342773, + "learning_rate": 5e-05, + "loss": 1.4467, + "num_input_tokens_seen": 139563428, + "step": 2086 + }, + { + "epoch": 0.23670921985815602, + "loss": 1.505403995513916, + "loss_ce": 0.008333789184689522, + "loss_iou": 0.6171875, + "loss_num": 0.05224609375, + "loss_xval": 1.5, + "num_input_tokens_seen": 139563428, + "step": 2086 + }, + { + "epoch": 0.236822695035461, + "grad_norm": 25.54380989074707, + "learning_rate": 5e-05, + "loss": 1.2467, + "num_input_tokens_seen": 139630700, + "step": 2087 + }, + { + "epoch": 0.236822695035461, + "loss": 1.215453863143921, + "loss_ce": 0.004028065130114555, + "loss_iou": 0.53125, + "loss_num": 0.0303955078125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 139630700, + "step": 2087 + }, + { + "epoch": 0.23693617021276595, + "grad_norm": 29.074495315551758, + "learning_rate": 5e-05, + "loss": 1.3149, + "num_input_tokens_seen": 139698244, + "step": 2088 + }, + { + "epoch": 0.23693617021276595, + "loss": 1.4059786796569824, + "loss_ce": 0.0046115051954984665, + "loss_iou": 0.60546875, + "loss_num": 0.037841796875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 139698244, + "step": 2088 + }, + { + "epoch": 0.23704964539007092, + "grad_norm": 24.31699562072754, + "learning_rate": 5e-05, + "loss": 1.3402, + "num_input_tokens_seen": 139766148, + "step": 2089 + }, + { + "epoch": 0.23704964539007092, + "loss": 1.323918104171753, + "loss_ce": 0.007023650221526623, + "loss_iou": 0.56640625, + "loss_num": 0.036376953125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 139766148, + "step": 2089 + }, + { + "epoch": 0.23716312056737587, + "grad_norm": 33.02131652832031, + "learning_rate": 5e-05, + "loss": 1.3347, + "num_input_tokens_seen": 139833524, + "step": 2090 + }, + { + "epoch": 0.23716312056737587, + "loss": 1.4548603296279907, + "loss_ce": 0.0017353211296722293, + "loss_iou": 0.6015625, + "loss_num": 0.049072265625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 139833524, + "step": 2090 + }, + { + "epoch": 0.23727659574468085, + "grad_norm": 50.18512725830078, + "learning_rate": 5e-05, + "loss": 1.3849, + "num_input_tokens_seen": 139900684, + "step": 2091 + }, + { + "epoch": 0.23727659574468085, + "loss": 1.4510201215744019, + "loss_ce": 0.00521928770467639, + "loss_iou": 0.62890625, + "loss_num": 0.03759765625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 139900684, + "step": 2091 + }, + { + "epoch": 0.2373900709219858, + "grad_norm": 21.285188674926758, + "learning_rate": 5e-05, + "loss": 1.2787, + "num_input_tokens_seen": 139967804, + "step": 2092 + }, + { + "epoch": 0.2373900709219858, + "loss": 1.0499804019927979, + "loss_ce": 0.0070116957649588585, + "loss_iou": 0.48828125, + "loss_num": 0.0133056640625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 139967804, + "step": 2092 + }, + { + "epoch": 0.23750354609929078, + "grad_norm": 27.18001365661621, + "learning_rate": 5e-05, + "loss": 1.2908, + "num_input_tokens_seen": 140034888, + "step": 2093 + }, + { + "epoch": 0.23750354609929078, + "loss": 1.2942190170288086, + "loss_ce": 0.007597910240292549, + "loss_iou": 0.55078125, + "loss_num": 0.0361328125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 140034888, + "step": 2093 + }, + { + "epoch": 0.23761702127659576, + "grad_norm": 41.97334671020508, + "learning_rate": 5e-05, + "loss": 1.2108, + "num_input_tokens_seen": 140102736, + "step": 2094 + }, + { + "epoch": 0.23761702127659576, + "loss": 1.3840067386627197, + "loss_ce": 0.0051004355773329735, + "loss_iou": 0.5546875, + "loss_num": 0.05322265625, + "loss_xval": 1.375, + "num_input_tokens_seen": 140102736, + "step": 2094 + }, + { + "epoch": 0.2377304964539007, + "grad_norm": 27.91741371154785, + "learning_rate": 5e-05, + "loss": 1.4938, + "num_input_tokens_seen": 140170920, + "step": 2095 + }, + { + "epoch": 0.2377304964539007, + "loss": 1.2434849739074707, + "loss_ce": 0.007156821899116039, + "loss_iou": 0.54296875, + "loss_num": 0.0301513671875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 140170920, + "step": 2095 + }, + { + "epoch": 0.2378439716312057, + "grad_norm": 18.509166717529297, + "learning_rate": 5e-05, + "loss": 1.2287, + "num_input_tokens_seen": 140238376, + "step": 2096 + }, + { + "epoch": 0.2378439716312057, + "loss": 1.294339656829834, + "loss_ce": 0.005277186632156372, + "loss_iou": 0.53125, + "loss_num": 0.046142578125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 140238376, + "step": 2096 + }, + { + "epoch": 0.23795744680851064, + "grad_norm": 28.711885452270508, + "learning_rate": 5e-05, + "loss": 1.0258, + "num_input_tokens_seen": 140305268, + "step": 2097 + }, + { + "epoch": 0.23795744680851064, + "loss": 1.0873998403549194, + "loss_ce": 0.008786547929048538, + "loss_iou": 0.470703125, + "loss_num": 0.0274658203125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 140305268, + "step": 2097 + }, + { + "epoch": 0.23807092198581561, + "grad_norm": 31.537954330444336, + "learning_rate": 5e-05, + "loss": 1.2694, + "num_input_tokens_seen": 140372244, + "step": 2098 + }, + { + "epoch": 0.23807092198581561, + "loss": 1.2699968814849854, + "loss_ce": 0.0029070847667753696, + "loss_iou": 0.55078125, + "loss_num": 0.03369140625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 140372244, + "step": 2098 + }, + { + "epoch": 0.23818439716312056, + "grad_norm": 23.722444534301758, + "learning_rate": 5e-05, + "loss": 1.7376, + "num_input_tokens_seen": 140439320, + "step": 2099 + }, + { + "epoch": 0.23818439716312056, + "loss": 1.7315138578414917, + "loss_ce": 0.009834223426878452, + "loss_iou": 0.734375, + "loss_num": 0.049560546875, + "loss_xval": 1.71875, + "num_input_tokens_seen": 140439320, + "step": 2099 + }, + { + "epoch": 0.23829787234042554, + "grad_norm": 24.19367218017578, + "learning_rate": 5e-05, + "loss": 1.1831, + "num_input_tokens_seen": 140505984, + "step": 2100 + }, + { + "epoch": 0.23829787234042554, + "loss": 1.3222756385803223, + "loss_ce": 0.0039163269102573395, + "loss_iou": 0.546875, + "loss_num": 0.044921875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 140505984, + "step": 2100 + }, + { + "epoch": 0.2384113475177305, + "grad_norm": 36.6457405090332, + "learning_rate": 5e-05, + "loss": 1.3283, + "num_input_tokens_seen": 140574080, + "step": 2101 + }, + { + "epoch": 0.2384113475177305, + "loss": 1.1938422918319702, + "loss_ce": 0.0068305861204862595, + "loss_iou": 0.53125, + "loss_num": 0.0247802734375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 140574080, + "step": 2101 + }, + { + "epoch": 0.23852482269503547, + "grad_norm": 41.12563705444336, + "learning_rate": 5e-05, + "loss": 1.5529, + "num_input_tokens_seen": 140641076, + "step": 2102 + }, + { + "epoch": 0.23852482269503547, + "loss": 1.5113822221755981, + "loss_ce": 0.005522834602743387, + "loss_iou": 0.63671875, + "loss_num": 0.047119140625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 140641076, + "step": 2102 + }, + { + "epoch": 0.23863829787234042, + "grad_norm": 20.35697364807129, + "learning_rate": 5e-05, + "loss": 1.369, + "num_input_tokens_seen": 140708096, + "step": 2103 + }, + { + "epoch": 0.23863829787234042, + "loss": 1.2732696533203125, + "loss_ce": 0.005203336477279663, + "loss_iou": 0.52734375, + "loss_num": 0.042236328125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 140708096, + "step": 2103 + }, + { + "epoch": 0.2387517730496454, + "grad_norm": 24.308937072753906, + "learning_rate": 5e-05, + "loss": 0.9863, + "num_input_tokens_seen": 140774252, + "step": 2104 + }, + { + "epoch": 0.2387517730496454, + "loss": 1.0968973636627197, + "loss_ce": 0.0026590684428811073, + "loss_iou": 0.46484375, + "loss_num": 0.032958984375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 140774252, + "step": 2104 + }, + { + "epoch": 0.23886524822695035, + "grad_norm": 31.8422794342041, + "learning_rate": 5e-05, + "loss": 1.4027, + "num_input_tokens_seen": 140841432, + "step": 2105 + }, + { + "epoch": 0.23886524822695035, + "loss": 1.470102310180664, + "loss_ce": 0.007211686577647924, + "loss_iou": 0.640625, + "loss_num": 0.036376953125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 140841432, + "step": 2105 + }, + { + "epoch": 0.23897872340425533, + "grad_norm": 32.49705505371094, + "learning_rate": 5e-05, + "loss": 1.3509, + "num_input_tokens_seen": 140909100, + "step": 2106 + }, + { + "epoch": 0.23897872340425533, + "loss": 1.2243146896362305, + "loss_ce": 0.006541217211633921, + "loss_iou": 0.515625, + "loss_num": 0.037841796875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 140909100, + "step": 2106 + }, + { + "epoch": 0.23909219858156028, + "grad_norm": 23.098670959472656, + "learning_rate": 5e-05, + "loss": 1.4318, + "num_input_tokens_seen": 140976028, + "step": 2107 + }, + { + "epoch": 0.23909219858156028, + "loss": 1.424094557762146, + "loss_ce": 0.004660971462726593, + "loss_iou": 0.625, + "loss_num": 0.032958984375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 140976028, + "step": 2107 + }, + { + "epoch": 0.23920567375886526, + "grad_norm": 28.326358795166016, + "learning_rate": 5e-05, + "loss": 1.2935, + "num_input_tokens_seen": 141042416, + "step": 2108 + }, + { + "epoch": 0.23920567375886526, + "loss": 1.0542070865631104, + "loss_ce": 0.0063555920496582985, + "loss_iou": 0.4453125, + "loss_num": 0.0311279296875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 141042416, + "step": 2108 + }, + { + "epoch": 0.2393191489361702, + "grad_norm": 26.95307159423828, + "learning_rate": 5e-05, + "loss": 1.5112, + "num_input_tokens_seen": 141108512, + "step": 2109 + }, + { + "epoch": 0.2393191489361702, + "loss": 1.6798866987228394, + "loss_ce": 0.0041054957546293736, + "loss_iou": 0.7109375, + "loss_num": 0.051513671875, + "loss_xval": 1.671875, + "num_input_tokens_seen": 141108512, + "step": 2109 + }, + { + "epoch": 0.23943262411347518, + "grad_norm": 15.88924789428711, + "learning_rate": 5e-05, + "loss": 1.1946, + "num_input_tokens_seen": 141174512, + "step": 2110 + }, + { + "epoch": 0.23943262411347518, + "loss": 1.2167226076126099, + "loss_ce": 0.005190051160752773, + "loss_iou": 0.494140625, + "loss_num": 0.04443359375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 141174512, + "step": 2110 + }, + { + "epoch": 0.23954609929078013, + "grad_norm": 16.052860260009766, + "learning_rate": 5e-05, + "loss": 1.0847, + "num_input_tokens_seen": 141240564, + "step": 2111 + }, + { + "epoch": 0.23954609929078013, + "loss": 0.9810453653335571, + "loss_ce": 0.00643600057810545, + "loss_iou": 0.443359375, + "loss_num": 0.0177001953125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 141240564, + "step": 2111 + }, + { + "epoch": 0.2396595744680851, + "grad_norm": 16.463613510131836, + "learning_rate": 5e-05, + "loss": 1.1822, + "num_input_tokens_seen": 141307428, + "step": 2112 + }, + { + "epoch": 0.2396595744680851, + "loss": 1.1751751899719238, + "loss_ce": 0.006962212268263102, + "loss_iou": 0.5234375, + "loss_num": 0.024169921875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 141307428, + "step": 2112 + }, + { + "epoch": 0.23977304964539006, + "grad_norm": 22.638635635375977, + "learning_rate": 5e-05, + "loss": 1.2653, + "num_input_tokens_seen": 141373452, + "step": 2113 + }, + { + "epoch": 0.23977304964539006, + "loss": 1.2961807250976562, + "loss_ce": 0.003700191155076027, + "loss_iou": 0.57421875, + "loss_num": 0.02978515625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 141373452, + "step": 2113 + }, + { + "epoch": 0.23988652482269504, + "grad_norm": 35.47750473022461, + "learning_rate": 5e-05, + "loss": 1.1612, + "num_input_tokens_seen": 141440616, + "step": 2114 + }, + { + "epoch": 0.23988652482269504, + "loss": 1.2507671117782593, + "loss_ce": 0.0027202432975172997, + "loss_iou": 0.55859375, + "loss_num": 0.0260009765625, + "loss_xval": 1.25, + "num_input_tokens_seen": 141440616, + "step": 2114 + }, + { + "epoch": 0.24, + "grad_norm": 25.126699447631836, + "learning_rate": 5e-05, + "loss": 1.4363, + "num_input_tokens_seen": 141507492, + "step": 2115 + }, + { + "epoch": 0.24, + "loss": 1.3748247623443604, + "loss_ce": 0.004707557149231434, + "loss_iou": 0.5859375, + "loss_num": 0.0400390625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 141507492, + "step": 2115 + }, + { + "epoch": 0.24011347517730497, + "grad_norm": 18.633275985717773, + "learning_rate": 5e-05, + "loss": 1.3241, + "num_input_tokens_seen": 141574868, + "step": 2116 + }, + { + "epoch": 0.24011347517730497, + "loss": 1.3297353982925415, + "loss_ce": 0.0055166250094771385, + "loss_iou": 0.546875, + "loss_num": 0.045654296875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 141574868, + "step": 2116 + }, + { + "epoch": 0.24022695035460992, + "grad_norm": 22.048809051513672, + "learning_rate": 5e-05, + "loss": 1.107, + "num_input_tokens_seen": 141642032, + "step": 2117 + }, + { + "epoch": 0.24022695035460992, + "loss": 1.2551789283752441, + "loss_ce": 0.006643712520599365, + "loss_iou": 0.494140625, + "loss_num": 0.052490234375, + "loss_xval": 1.25, + "num_input_tokens_seen": 141642032, + "step": 2117 + }, + { + "epoch": 0.2403404255319149, + "grad_norm": 27.654455184936523, + "learning_rate": 5e-05, + "loss": 1.6115, + "num_input_tokens_seen": 141709016, + "step": 2118 + }, + { + "epoch": 0.2403404255319149, + "loss": 1.5946638584136963, + "loss_ce": 0.005796774290502071, + "loss_iou": 0.65234375, + "loss_num": 0.056640625, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 141709016, + "step": 2118 + }, + { + "epoch": 0.24045390070921985, + "grad_norm": 24.23868751525879, + "learning_rate": 5e-05, + "loss": 1.4977, + "num_input_tokens_seen": 141775732, + "step": 2119 + }, + { + "epoch": 0.24045390070921985, + "loss": 1.2224955558776855, + "loss_ce": 0.005759738385677338, + "loss_iou": 0.51171875, + "loss_num": 0.0390625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 141775732, + "step": 2119 + }, + { + "epoch": 0.24056737588652483, + "grad_norm": 18.657726287841797, + "learning_rate": 5e-05, + "loss": 1.1516, + "num_input_tokens_seen": 141843080, + "step": 2120 + }, + { + "epoch": 0.24056737588652483, + "loss": 1.0925437211990356, + "loss_ce": 0.0034324091393500566, + "loss_iou": 0.48828125, + "loss_num": 0.0218505859375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 141843080, + "step": 2120 + }, + { + "epoch": 0.24068085106382978, + "grad_norm": 18.318111419677734, + "learning_rate": 5e-05, + "loss": 1.3989, + "num_input_tokens_seen": 141909392, + "step": 2121 + }, + { + "epoch": 0.24068085106382978, + "loss": 1.1837220191955566, + "loss_ce": 0.0035462998785078526, + "loss_iou": 0.49609375, + "loss_num": 0.0380859375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 141909392, + "step": 2121 + }, + { + "epoch": 0.24079432624113475, + "grad_norm": 35.99518966674805, + "learning_rate": 5e-05, + "loss": 1.4308, + "num_input_tokens_seen": 141977052, + "step": 2122 + }, + { + "epoch": 0.24079432624113475, + "loss": 1.4354974031448364, + "loss_ce": 0.007763066329061985, + "loss_iou": 0.60546875, + "loss_num": 0.044189453125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 141977052, + "step": 2122 + }, + { + "epoch": 0.2409078014184397, + "grad_norm": 14.799403190612793, + "learning_rate": 5e-05, + "loss": 1.0973, + "num_input_tokens_seen": 142042552, + "step": 2123 + }, + { + "epoch": 0.2409078014184397, + "loss": 1.0464881658554077, + "loss_ce": 0.008951467461884022, + "loss_iou": 0.43359375, + "loss_num": 0.033935546875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 142042552, + "step": 2123 + }, + { + "epoch": 0.24102127659574468, + "grad_norm": 17.5782413482666, + "learning_rate": 5e-05, + "loss": 1.2286, + "num_input_tokens_seen": 142109336, + "step": 2124 + }, + { + "epoch": 0.24102127659574468, + "loss": 1.2320868968963623, + "loss_ce": 0.00796588510274887, + "loss_iou": 0.439453125, + "loss_num": 0.06884765625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 142109336, + "step": 2124 + }, + { + "epoch": 0.24113475177304963, + "grad_norm": 31.590017318725586, + "learning_rate": 5e-05, + "loss": 1.2449, + "num_input_tokens_seen": 142176260, + "step": 2125 + }, + { + "epoch": 0.24113475177304963, + "loss": 1.2479143142700195, + "loss_ce": 0.002308934461325407, + "loss_iou": 0.53515625, + "loss_num": 0.035400390625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 142176260, + "step": 2125 + }, + { + "epoch": 0.2412482269503546, + "grad_norm": 35.94646072387695, + "learning_rate": 5e-05, + "loss": 1.4929, + "num_input_tokens_seen": 142242768, + "step": 2126 + }, + { + "epoch": 0.2412482269503546, + "loss": 1.4200465679168701, + "loss_ce": 0.005007459782063961, + "loss_iou": 0.62109375, + "loss_num": 0.034912109375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 142242768, + "step": 2126 + }, + { + "epoch": 0.24136170212765956, + "grad_norm": 15.948591232299805, + "learning_rate": 5e-05, + "loss": 1.3857, + "num_input_tokens_seen": 142311324, + "step": 2127 + }, + { + "epoch": 0.24136170212765956, + "loss": 1.1799677610397339, + "loss_ce": 0.005651356652379036, + "loss_iou": 0.515625, + "loss_num": 0.0294189453125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 142311324, + "step": 2127 + }, + { + "epoch": 0.24147517730496454, + "grad_norm": 17.884510040283203, + "learning_rate": 5e-05, + "loss": 1.2817, + "num_input_tokens_seen": 142378440, + "step": 2128 + }, + { + "epoch": 0.24147517730496454, + "loss": 1.1925170421600342, + "loss_ce": 0.005505304783582687, + "loss_iou": 0.51171875, + "loss_num": 0.032958984375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 142378440, + "step": 2128 + }, + { + "epoch": 0.24158865248226952, + "grad_norm": 30.72815704345703, + "learning_rate": 5e-05, + "loss": 1.5474, + "num_input_tokens_seen": 142444396, + "step": 2129 + }, + { + "epoch": 0.24158865248226952, + "loss": 1.359363317489624, + "loss_ce": 0.004382775630801916, + "loss_iou": 0.625, + "loss_num": 0.02197265625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 142444396, + "step": 2129 + }, + { + "epoch": 0.24170212765957447, + "grad_norm": 28.637435913085938, + "learning_rate": 5e-05, + "loss": 1.6272, + "num_input_tokens_seen": 142511840, + "step": 2130 + }, + { + "epoch": 0.24170212765957447, + "loss": 1.2662773132324219, + "loss_ce": 0.005901258438825607, + "loss_iou": 0.546875, + "loss_num": 0.03271484375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 142511840, + "step": 2130 + }, + { + "epoch": 0.24181560283687945, + "grad_norm": 21.36404800415039, + "learning_rate": 5e-05, + "loss": 1.425, + "num_input_tokens_seen": 142578236, + "step": 2131 + }, + { + "epoch": 0.24181560283687945, + "loss": 1.5128529071807861, + "loss_ce": 0.004063846543431282, + "loss_iou": 0.62890625, + "loss_num": 0.050537109375, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 142578236, + "step": 2131 + }, + { + "epoch": 0.2419290780141844, + "grad_norm": 16.356233596801758, + "learning_rate": 5e-05, + "loss": 1.1497, + "num_input_tokens_seen": 142644564, + "step": 2132 + }, + { + "epoch": 0.2419290780141844, + "loss": 1.1219887733459473, + "loss_ce": 0.006266123615205288, + "loss_iou": 0.45703125, + "loss_num": 0.0400390625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 142644564, + "step": 2132 + }, + { + "epoch": 0.24204255319148937, + "grad_norm": 23.19045066833496, + "learning_rate": 5e-05, + "loss": 1.6156, + "num_input_tokens_seen": 142710908, + "step": 2133 + }, + { + "epoch": 0.24204255319148937, + "loss": 1.461380958557129, + "loss_ce": 0.007767777889966965, + "loss_iou": 0.6171875, + "loss_num": 0.04443359375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 142710908, + "step": 2133 + }, + { + "epoch": 0.24215602836879432, + "grad_norm": 26.57259178161621, + "learning_rate": 5e-05, + "loss": 1.4117, + "num_input_tokens_seen": 142778528, + "step": 2134 + }, + { + "epoch": 0.24215602836879432, + "loss": 1.5471200942993164, + "loss_ce": 0.006104405969381332, + "loss_iou": 0.671875, + "loss_num": 0.03857421875, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 142778528, + "step": 2134 + }, + { + "epoch": 0.2422695035460993, + "grad_norm": 26.024459838867188, + "learning_rate": 5e-05, + "loss": 1.359, + "num_input_tokens_seen": 142844900, + "step": 2135 + }, + { + "epoch": 0.2422695035460993, + "loss": 1.5060585737228394, + "loss_ce": 0.005082059185951948, + "loss_iou": 0.62890625, + "loss_num": 0.048095703125, + "loss_xval": 1.5, + "num_input_tokens_seen": 142844900, + "step": 2135 + }, + { + "epoch": 0.24238297872340425, + "grad_norm": 31.788639068603516, + "learning_rate": 5e-05, + "loss": 1.3683, + "num_input_tokens_seen": 142912560, + "step": 2136 + }, + { + "epoch": 0.24238297872340425, + "loss": 1.4960581064224243, + "loss_ce": 0.005823730491101742, + "loss_iou": 0.6328125, + "loss_num": 0.045166015625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 142912560, + "step": 2136 + }, + { + "epoch": 0.24249645390070923, + "grad_norm": 439.2041931152344, + "learning_rate": 5e-05, + "loss": 1.4613, + "num_input_tokens_seen": 142980236, + "step": 2137 + }, + { + "epoch": 0.24249645390070923, + "loss": 1.5019313097000122, + "loss_ce": 0.006814103107899427, + "loss_iou": 0.6640625, + "loss_num": 0.033447265625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 142980236, + "step": 2137 + }, + { + "epoch": 0.24260992907801418, + "grad_norm": 19.144245147705078, + "learning_rate": 5e-05, + "loss": 1.3036, + "num_input_tokens_seen": 143047364, + "step": 2138 + }, + { + "epoch": 0.24260992907801418, + "loss": 1.434209942817688, + "loss_ce": 0.005499033257365227, + "loss_iou": 0.58203125, + "loss_num": 0.052734375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 143047364, + "step": 2138 + }, + { + "epoch": 0.24272340425531916, + "grad_norm": 24.486297607421875, + "learning_rate": 5e-05, + "loss": 1.3291, + "num_input_tokens_seen": 143113620, + "step": 2139 + }, + { + "epoch": 0.24272340425531916, + "loss": 1.2486956119537354, + "loss_ce": 0.005165329203009605, + "loss_iou": 0.515625, + "loss_num": 0.04248046875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 143113620, + "step": 2139 + }, + { + "epoch": 0.2428368794326241, + "grad_norm": 30.798280715942383, + "learning_rate": 5e-05, + "loss": 1.3339, + "num_input_tokens_seen": 143179968, + "step": 2140 + }, + { + "epoch": 0.2428368794326241, + "loss": 1.461129069328308, + "loss_ce": 0.004097845871001482, + "loss_iou": 0.6171875, + "loss_num": 0.045166015625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 143179968, + "step": 2140 + }, + { + "epoch": 0.2429503546099291, + "grad_norm": 34.056358337402344, + "learning_rate": 5e-05, + "loss": 1.2249, + "num_input_tokens_seen": 143246900, + "step": 2141 + }, + { + "epoch": 0.2429503546099291, + "loss": 1.204607367515564, + "loss_ce": 0.009294850751757622, + "loss_iou": 0.48046875, + "loss_num": 0.04638671875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 143246900, + "step": 2141 + }, + { + "epoch": 0.24306382978723404, + "grad_norm": 41.633872985839844, + "learning_rate": 5e-05, + "loss": 1.4922, + "num_input_tokens_seen": 143314996, + "step": 2142 + }, + { + "epoch": 0.24306382978723404, + "loss": 1.6460351943969727, + "loss_ce": 0.0054101767018437386, + "loss_iou": 0.68359375, + "loss_num": 0.0546875, + "loss_xval": 1.640625, + "num_input_tokens_seen": 143314996, + "step": 2142 + }, + { + "epoch": 0.24317730496453902, + "grad_norm": 29.860036849975586, + "learning_rate": 5e-05, + "loss": 1.344, + "num_input_tokens_seen": 143381916, + "step": 2143 + }, + { + "epoch": 0.24317730496453902, + "loss": 1.2155485153198242, + "loss_ce": 0.0031461953185498714, + "loss_iou": 0.515625, + "loss_num": 0.035888671875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 143381916, + "step": 2143 + }, + { + "epoch": 0.24329078014184397, + "grad_norm": 26.00592613220215, + "learning_rate": 5e-05, + "loss": 1.6061, + "num_input_tokens_seen": 143449664, + "step": 2144 + }, + { + "epoch": 0.24329078014184397, + "loss": 1.6430656909942627, + "loss_ce": 0.002440624637529254, + "loss_iou": 0.671875, + "loss_num": 0.06005859375, + "loss_xval": 1.640625, + "num_input_tokens_seen": 143449664, + "step": 2144 + }, + { + "epoch": 0.24340425531914894, + "grad_norm": 17.586997985839844, + "learning_rate": 5e-05, + "loss": 1.352, + "num_input_tokens_seen": 143515784, + "step": 2145 + }, + { + "epoch": 0.24340425531914894, + "loss": 1.2578351497650146, + "loss_ce": 0.00685863196849823, + "loss_iou": 0.51953125, + "loss_num": 0.04296875, + "loss_xval": 1.25, + "num_input_tokens_seen": 143515784, + "step": 2145 + }, + { + "epoch": 0.2435177304964539, + "grad_norm": 17.647659301757812, + "learning_rate": 5e-05, + "loss": 1.115, + "num_input_tokens_seen": 143582936, + "step": 2146 + }, + { + "epoch": 0.2435177304964539, + "loss": 1.134355902671814, + "loss_ce": 0.0025199251249432564, + "loss_iou": 0.48046875, + "loss_num": 0.03466796875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 143582936, + "step": 2146 + }, + { + "epoch": 0.24363120567375887, + "grad_norm": 27.71455192565918, + "learning_rate": 5e-05, + "loss": 1.1469, + "num_input_tokens_seen": 143650080, + "step": 2147 + }, + { + "epoch": 0.24363120567375887, + "loss": 1.1905555725097656, + "loss_ce": 0.005985348951071501, + "loss_iou": 0.515625, + "loss_num": 0.031494140625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 143650080, + "step": 2147 + }, + { + "epoch": 0.24374468085106382, + "grad_norm": 20.923316955566406, + "learning_rate": 5e-05, + "loss": 1.4304, + "num_input_tokens_seen": 143716776, + "step": 2148 + }, + { + "epoch": 0.24374468085106382, + "loss": 1.4875725507736206, + "loss_ce": 0.01003347896039486, + "loss_iou": 0.62890625, + "loss_num": 0.043701171875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 143716776, + "step": 2148 + }, + { + "epoch": 0.2438581560283688, + "grad_norm": 28.51578712463379, + "learning_rate": 5e-05, + "loss": 1.1653, + "num_input_tokens_seen": 143784244, + "step": 2149 + }, + { + "epoch": 0.2438581560283688, + "loss": 1.0135817527770996, + "loss_ce": 0.006257461383938789, + "loss_iou": 0.4609375, + "loss_num": 0.016845703125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 143784244, + "step": 2149 + }, + { + "epoch": 0.24397163120567375, + "grad_norm": 29.58493423461914, + "learning_rate": 5e-05, + "loss": 1.3478, + "num_input_tokens_seen": 143850628, + "step": 2150 + }, + { + "epoch": 0.24397163120567375, + "loss": 1.1885493993759155, + "loss_ce": 0.00696979183703661, + "loss_iou": 0.49609375, + "loss_num": 0.037841796875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 143850628, + "step": 2150 + }, + { + "epoch": 0.24408510638297873, + "grad_norm": 30.462413787841797, + "learning_rate": 5e-05, + "loss": 1.3382, + "num_input_tokens_seen": 143918192, + "step": 2151 + }, + { + "epoch": 0.24408510638297873, + "loss": 1.341451644897461, + "loss_ce": 0.006002428941428661, + "loss_iou": 0.5703125, + "loss_num": 0.038330078125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 143918192, + "step": 2151 + }, + { + "epoch": 0.24419858156028368, + "grad_norm": 24.80821418762207, + "learning_rate": 5e-05, + "loss": 1.3571, + "num_input_tokens_seen": 143984840, + "step": 2152 + }, + { + "epoch": 0.24419858156028368, + "loss": 1.2618263959884644, + "loss_ce": 0.01081936713308096, + "loss_iou": 0.484375, + "loss_num": 0.056396484375, + "loss_xval": 1.25, + "num_input_tokens_seen": 143984840, + "step": 2152 + }, + { + "epoch": 0.24431205673758866, + "grad_norm": 20.268978118896484, + "learning_rate": 5e-05, + "loss": 1.1767, + "num_input_tokens_seen": 144051684, + "step": 2153 + }, + { + "epoch": 0.24431205673758866, + "loss": 1.0506885051727295, + "loss_ce": 0.0033251806162297726, + "loss_iou": 0.482421875, + "loss_num": 0.0164794921875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 144051684, + "step": 2153 + }, + { + "epoch": 0.2444255319148936, + "grad_norm": 26.563919067382812, + "learning_rate": 5e-05, + "loss": 1.2849, + "num_input_tokens_seen": 144118984, + "step": 2154 + }, + { + "epoch": 0.2444255319148936, + "loss": 1.315901756286621, + "loss_ce": 0.0038900417275726795, + "loss_iou": 0.5703125, + "loss_num": 0.03369140625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 144118984, + "step": 2154 + }, + { + "epoch": 0.24453900709219858, + "grad_norm": 45.42453384399414, + "learning_rate": 5e-05, + "loss": 1.3753, + "num_input_tokens_seen": 144185716, + "step": 2155 + }, + { + "epoch": 0.24453900709219858, + "loss": 1.3388023376464844, + "loss_ce": 0.0038415202870965004, + "loss_iou": 0.59375, + "loss_num": 0.0299072265625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 144185716, + "step": 2155 + }, + { + "epoch": 0.24465248226950354, + "grad_norm": 25.24507713317871, + "learning_rate": 5e-05, + "loss": 1.38, + "num_input_tokens_seen": 144252536, + "step": 2156 + }, + { + "epoch": 0.24465248226950354, + "loss": 1.504751443862915, + "loss_ce": 0.004751513712108135, + "loss_iou": 0.63671875, + "loss_num": 0.04443359375, + "loss_xval": 1.5, + "num_input_tokens_seen": 144252536, + "step": 2156 + }, + { + "epoch": 0.2447659574468085, + "grad_norm": 15.070289611816406, + "learning_rate": 5e-05, + "loss": 1.1697, + "num_input_tokens_seen": 144319880, + "step": 2157 + }, + { + "epoch": 0.2447659574468085, + "loss": 1.370811939239502, + "loss_ce": 0.003136096987873316, + "loss_iou": 0.5234375, + "loss_num": 0.0634765625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 144319880, + "step": 2157 + }, + { + "epoch": 0.24487943262411346, + "grad_norm": 34.00269317626953, + "learning_rate": 5e-05, + "loss": 1.3167, + "num_input_tokens_seen": 144387224, + "step": 2158 + }, + { + "epoch": 0.24487943262411346, + "loss": 1.4259899854660034, + "loss_ce": 0.006068069487810135, + "loss_iou": 0.58984375, + "loss_num": 0.04833984375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 144387224, + "step": 2158 + }, + { + "epoch": 0.24499290780141844, + "grad_norm": 31.736610412597656, + "learning_rate": 5e-05, + "loss": 1.7426, + "num_input_tokens_seen": 144454092, + "step": 2159 + }, + { + "epoch": 0.24499290780141844, + "loss": 1.727374792098999, + "loss_ce": 0.004718543495982885, + "loss_iou": 0.73046875, + "loss_num": 0.052734375, + "loss_xval": 1.71875, + "num_input_tokens_seen": 144454092, + "step": 2159 + }, + { + "epoch": 0.2451063829787234, + "grad_norm": 24.190078735351562, + "learning_rate": 5e-05, + "loss": 1.2915, + "num_input_tokens_seen": 144520432, + "step": 2160 + }, + { + "epoch": 0.2451063829787234, + "loss": 1.3363982439041138, + "loss_ce": 0.005587650462985039, + "loss_iou": 0.5234375, + "loss_num": 0.0576171875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 144520432, + "step": 2160 + }, + { + "epoch": 0.24521985815602837, + "grad_norm": 50.16253662109375, + "learning_rate": 5e-05, + "loss": 1.1577, + "num_input_tokens_seen": 144586332, + "step": 2161 + }, + { + "epoch": 0.24521985815602837, + "loss": 1.2751691341400146, + "loss_ce": 0.0022199484519660473, + "loss_iou": 0.54296875, + "loss_num": 0.037109375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 144586332, + "step": 2161 + }, + { + "epoch": 0.24533333333333332, + "grad_norm": 69.20976257324219, + "learning_rate": 5e-05, + "loss": 1.3227, + "num_input_tokens_seen": 144654276, + "step": 2162 + }, + { + "epoch": 0.24533333333333332, + "loss": 1.3411028385162354, + "loss_ce": 0.0051652854308485985, + "loss_iou": 0.60546875, + "loss_num": 0.025634765625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 144654276, + "step": 2162 + }, + { + "epoch": 0.2454468085106383, + "grad_norm": 45.61486053466797, + "learning_rate": 5e-05, + "loss": 1.264, + "num_input_tokens_seen": 144721728, + "step": 2163 + }, + { + "epoch": 0.2454468085106383, + "loss": 1.1704434156417847, + "loss_ce": 0.005404401104897261, + "loss_iou": 0.50390625, + "loss_num": 0.031494140625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 144721728, + "step": 2163 + }, + { + "epoch": 0.24556028368794328, + "grad_norm": 26.780391693115234, + "learning_rate": 5e-05, + "loss": 1.4471, + "num_input_tokens_seen": 144788728, + "step": 2164 + }, + { + "epoch": 0.24556028368794328, + "loss": 1.5584512948989868, + "loss_ce": 0.0027872142381966114, + "loss_iou": 0.62890625, + "loss_num": 0.059326171875, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 144788728, + "step": 2164 + }, + { + "epoch": 0.24567375886524823, + "grad_norm": 29.987899780273438, + "learning_rate": 5e-05, + "loss": 1.21, + "num_input_tokens_seen": 144855544, + "step": 2165 + }, + { + "epoch": 0.24567375886524823, + "loss": 1.1678024530410767, + "loss_ce": 0.012529040686786175, + "loss_iou": 0.41015625, + "loss_num": 0.06689453125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 144855544, + "step": 2165 + }, + { + "epoch": 0.2457872340425532, + "grad_norm": 28.498443603515625, + "learning_rate": 5e-05, + "loss": 1.2045, + "num_input_tokens_seen": 144922068, + "step": 2166 + }, + { + "epoch": 0.2457872340425532, + "loss": 1.2355518341064453, + "loss_ce": 0.005815428681671619, + "loss_iou": 0.48046875, + "loss_num": 0.053466796875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 144922068, + "step": 2166 + }, + { + "epoch": 0.24590070921985815, + "grad_norm": 82.18123626708984, + "learning_rate": 5e-05, + "loss": 1.4123, + "num_input_tokens_seen": 144988724, + "step": 2167 + }, + { + "epoch": 0.24590070921985815, + "loss": 1.329702377319336, + "loss_ce": 0.005483666900545359, + "loss_iou": 0.5546875, + "loss_num": 0.042236328125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 144988724, + "step": 2167 + }, + { + "epoch": 0.24601418439716313, + "grad_norm": 24.996013641357422, + "learning_rate": 5e-05, + "loss": 1.1896, + "num_input_tokens_seen": 145056124, + "step": 2168 + }, + { + "epoch": 0.24601418439716313, + "loss": 1.314170479774475, + "loss_ce": 0.004600136540830135, + "loss_iou": 0.58203125, + "loss_num": 0.029541015625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 145056124, + "step": 2168 + }, + { + "epoch": 0.24612765957446808, + "grad_norm": 22.875329971313477, + "learning_rate": 5e-05, + "loss": 1.4273, + "num_input_tokens_seen": 145123560, + "step": 2169 + }, + { + "epoch": 0.24612765957446808, + "loss": 1.341855525970459, + "loss_ce": 0.0015844936715438962, + "loss_iou": 0.59375, + "loss_num": 0.031005859375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 145123560, + "step": 2169 + }, + { + "epoch": 0.24624113475177306, + "grad_norm": 37.090213775634766, + "learning_rate": 5e-05, + "loss": 1.5558, + "num_input_tokens_seen": 145189996, + "step": 2170 + }, + { + "epoch": 0.24624113475177306, + "loss": 1.6183197498321533, + "loss_ce": 0.0060150353237986565, + "loss_iou": 0.68359375, + "loss_num": 0.048583984375, + "loss_xval": 1.609375, + "num_input_tokens_seen": 145189996, + "step": 2170 + }, + { + "epoch": 0.246354609929078, + "grad_norm": 28.297693252563477, + "learning_rate": 5e-05, + "loss": 1.5486, + "num_input_tokens_seen": 145256576, + "step": 2171 + }, + { + "epoch": 0.246354609929078, + "loss": 1.588221549987793, + "loss_ce": 0.010584832169115543, + "loss_iou": 0.671875, + "loss_num": 0.046875, + "loss_xval": 1.578125, + "num_input_tokens_seen": 145256576, + "step": 2171 + }, + { + "epoch": 0.246468085106383, + "grad_norm": 50.62351608276367, + "learning_rate": 5e-05, + "loss": 1.253, + "num_input_tokens_seen": 145322168, + "step": 2172 + }, + { + "epoch": 0.246468085106383, + "loss": 1.1264084577560425, + "loss_ce": 0.005314727313816547, + "loss_iou": 0.47265625, + "loss_num": 0.035400390625, + "loss_xval": 1.125, + "num_input_tokens_seen": 145322168, + "step": 2172 + }, + { + "epoch": 0.24658156028368794, + "grad_norm": 36.71396255493164, + "learning_rate": 5e-05, + "loss": 1.0471, + "num_input_tokens_seen": 145388716, + "step": 2173 + }, + { + "epoch": 0.24658156028368794, + "loss": 1.0175095796585083, + "loss_ce": 0.004814271815121174, + "loss_iou": 0.41015625, + "loss_num": 0.0380859375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 145388716, + "step": 2173 + }, + { + "epoch": 0.24669503546099292, + "grad_norm": 29.71967124938965, + "learning_rate": 5e-05, + "loss": 1.55, + "num_input_tokens_seen": 145455764, + "step": 2174 + }, + { + "epoch": 0.24669503546099292, + "loss": 1.5182759761810303, + "loss_ce": 0.006068896967917681, + "loss_iou": 0.64453125, + "loss_num": 0.045166015625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 145455764, + "step": 2174 + }, + { + "epoch": 0.24680851063829787, + "grad_norm": 107.73597717285156, + "learning_rate": 5e-05, + "loss": 1.3301, + "num_input_tokens_seen": 145523296, + "step": 2175 + }, + { + "epoch": 0.24680851063829787, + "loss": 1.4355878829956055, + "loss_ce": 0.004923800006508827, + "loss_iou": 0.56640625, + "loss_num": 0.06005859375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 145523296, + "step": 2175 + }, + { + "epoch": 0.24692198581560285, + "grad_norm": 52.72064971923828, + "learning_rate": 5e-05, + "loss": 1.1987, + "num_input_tokens_seen": 145591024, + "step": 2176 + }, + { + "epoch": 0.24692198581560285, + "loss": 1.192797064781189, + "loss_ce": 0.006761963479220867, + "loss_iou": 0.515625, + "loss_num": 0.0302734375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 145591024, + "step": 2176 + }, + { + "epoch": 0.2470354609929078, + "grad_norm": 27.491426467895508, + "learning_rate": 5e-05, + "loss": 1.1199, + "num_input_tokens_seen": 145657652, + "step": 2177 + }, + { + "epoch": 0.2470354609929078, + "loss": 0.8249852657318115, + "loss_ce": 0.004428606480360031, + "loss_iou": 0.3359375, + "loss_num": 0.029541015625, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 145657652, + "step": 2177 + }, + { + "epoch": 0.24714893617021277, + "grad_norm": 46.8922233581543, + "learning_rate": 5e-05, + "loss": 1.305, + "num_input_tokens_seen": 145725188, + "step": 2178 + }, + { + "epoch": 0.24714893617021277, + "loss": 1.3793811798095703, + "loss_ce": 0.0038928110152482986, + "loss_iou": 0.58984375, + "loss_num": 0.039306640625, + "loss_xval": 1.375, + "num_input_tokens_seen": 145725188, + "step": 2178 + }, + { + "epoch": 0.24726241134751772, + "grad_norm": 23.521892547607422, + "learning_rate": 5e-05, + "loss": 1.191, + "num_input_tokens_seen": 145792208, + "step": 2179 + }, + { + "epoch": 0.24726241134751772, + "loss": 1.3127291202545166, + "loss_ce": 0.00364706851541996, + "loss_iou": 0.55078125, + "loss_num": 0.042236328125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 145792208, + "step": 2179 + }, + { + "epoch": 0.2473758865248227, + "grad_norm": 41.845802307128906, + "learning_rate": 5e-05, + "loss": 1.1692, + "num_input_tokens_seen": 145859336, + "step": 2180 + }, + { + "epoch": 0.2473758865248227, + "loss": 1.0300614833831787, + "loss_ce": 0.0041824690997600555, + "loss_iou": 0.46484375, + "loss_num": 0.0194091796875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 145859336, + "step": 2180 + }, + { + "epoch": 0.24748936170212765, + "grad_norm": 31.391944885253906, + "learning_rate": 5e-05, + "loss": 1.2647, + "num_input_tokens_seen": 145925500, + "step": 2181 + }, + { + "epoch": 0.24748936170212765, + "loss": 1.1494044065475464, + "loss_ce": 0.007802807260304689, + "loss_iou": 0.451171875, + "loss_num": 0.048095703125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 145925500, + "step": 2181 + }, + { + "epoch": 0.24760283687943263, + "grad_norm": 25.754077911376953, + "learning_rate": 5e-05, + "loss": 1.3526, + "num_input_tokens_seen": 145991496, + "step": 2182 + }, + { + "epoch": 0.24760283687943263, + "loss": 1.5973864793777466, + "loss_ce": 0.002049621194601059, + "loss_iou": 0.6484375, + "loss_num": 0.05908203125, + "loss_xval": 1.59375, + "num_input_tokens_seen": 145991496, + "step": 2182 + }, + { + "epoch": 0.24771631205673758, + "grad_norm": 18.833040237426758, + "learning_rate": 5e-05, + "loss": 1.0822, + "num_input_tokens_seen": 146059204, + "step": 2183 + }, + { + "epoch": 0.24771631205673758, + "loss": 1.0772690773010254, + "loss_ce": 0.0030504176393151283, + "loss_iou": 0.4296875, + "loss_num": 0.04296875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 146059204, + "step": 2183 + }, + { + "epoch": 0.24782978723404256, + "grad_norm": 43.9871711730957, + "learning_rate": 5e-05, + "loss": 1.4319, + "num_input_tokens_seen": 146126372, + "step": 2184 + }, + { + "epoch": 0.24782978723404256, + "loss": 1.4002448320388794, + "loss_ce": 0.007178472355008125, + "loss_iou": 0.60546875, + "loss_num": 0.036865234375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 146126372, + "step": 2184 + }, + { + "epoch": 0.2479432624113475, + "grad_norm": 43.84333801269531, + "learning_rate": 5e-05, + "loss": 1.5545, + "num_input_tokens_seen": 146193140, + "step": 2185 + }, + { + "epoch": 0.2479432624113475, + "loss": 1.5067074298858643, + "loss_ce": 0.006707493215799332, + "loss_iou": 0.65234375, + "loss_num": 0.038818359375, + "loss_xval": 1.5, + "num_input_tokens_seen": 146193140, + "step": 2185 + }, + { + "epoch": 0.2480567375886525, + "grad_norm": 26.523361206054688, + "learning_rate": 5e-05, + "loss": 1.4542, + "num_input_tokens_seen": 146258724, + "step": 2186 + }, + { + "epoch": 0.2480567375886525, + "loss": 1.278275728225708, + "loss_ce": 0.005005991086363792, + "loss_iou": 0.54296875, + "loss_num": 0.037841796875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 146258724, + "step": 2186 + }, + { + "epoch": 0.24817021276595744, + "grad_norm": 11.70703411102295, + "learning_rate": 5e-05, + "loss": 1.0333, + "num_input_tokens_seen": 146325000, + "step": 2187 + }, + { + "epoch": 0.24817021276595744, + "loss": 1.0454189777374268, + "loss_ce": 0.005135734565556049, + "loss_iou": 0.44921875, + "loss_num": 0.028564453125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 146325000, + "step": 2187 + }, + { + "epoch": 0.24828368794326242, + "grad_norm": 66.41378784179688, + "learning_rate": 5e-05, + "loss": 1.2436, + "num_input_tokens_seen": 146391692, + "step": 2188 + }, + { + "epoch": 0.24828368794326242, + "loss": 1.2102359533309937, + "loss_ce": 0.006134379655122757, + "loss_iou": 0.48046875, + "loss_num": 0.048828125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 146391692, + "step": 2188 + }, + { + "epoch": 0.24839716312056737, + "grad_norm": 25.648555755615234, + "learning_rate": 5e-05, + "loss": 1.2843, + "num_input_tokens_seen": 146459876, + "step": 2189 + }, + { + "epoch": 0.24839716312056737, + "loss": 1.3493602275848389, + "loss_ce": 0.004145359620451927, + "loss_iou": 0.55078125, + "loss_num": 0.048095703125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 146459876, + "step": 2189 + }, + { + "epoch": 0.24851063829787234, + "grad_norm": 46.637996673583984, + "learning_rate": 5e-05, + "loss": 1.4342, + "num_input_tokens_seen": 146527384, + "step": 2190 + }, + { + "epoch": 0.24851063829787234, + "loss": 1.3304446935653687, + "loss_ce": 0.008667385205626488, + "loss_iou": 0.60546875, + "loss_num": 0.0228271484375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 146527384, + "step": 2190 + }, + { + "epoch": 0.2486241134751773, + "grad_norm": 21.29415512084961, + "learning_rate": 5e-05, + "loss": 1.4421, + "num_input_tokens_seen": 146595864, + "step": 2191 + }, + { + "epoch": 0.2486241134751773, + "loss": 1.557183027267456, + "loss_ce": 0.005425149574875832, + "loss_iou": 0.60546875, + "loss_num": 0.06787109375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 146595864, + "step": 2191 + }, + { + "epoch": 0.24873758865248227, + "grad_norm": 20.550338745117188, + "learning_rate": 5e-05, + "loss": 1.3291, + "num_input_tokens_seen": 146662076, + "step": 2192 + }, + { + "epoch": 0.24873758865248227, + "loss": 1.1073734760284424, + "loss_ce": 0.0069096083752810955, + "loss_iou": 0.423828125, + "loss_num": 0.05029296875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 146662076, + "step": 2192 + }, + { + "epoch": 0.24885106382978722, + "grad_norm": 29.988027572631836, + "learning_rate": 5e-05, + "loss": 1.5261, + "num_input_tokens_seen": 146729828, + "step": 2193 + }, + { + "epoch": 0.24885106382978722, + "loss": 1.5835144519805908, + "loss_ce": 0.004412836395204067, + "loss_iou": 0.6484375, + "loss_num": 0.056884765625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 146729828, + "step": 2193 + }, + { + "epoch": 0.2489645390070922, + "grad_norm": 26.51302146911621, + "learning_rate": 5e-05, + "loss": 1.3316, + "num_input_tokens_seen": 146796068, + "step": 2194 + }, + { + "epoch": 0.2489645390070922, + "loss": 1.2653329372406006, + "loss_ce": 0.0055674053728580475, + "loss_iou": 0.51953125, + "loss_num": 0.0439453125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 146796068, + "step": 2194 + }, + { + "epoch": 0.24907801418439715, + "grad_norm": 39.06315612792969, + "learning_rate": 5e-05, + "loss": 1.4854, + "num_input_tokens_seen": 146862640, + "step": 2195 + }, + { + "epoch": 0.24907801418439715, + "loss": 1.521587610244751, + "loss_ce": 0.007915752939879894, + "loss_iou": 0.609375, + "loss_num": 0.058349609375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 146862640, + "step": 2195 + }, + { + "epoch": 0.24919148936170213, + "grad_norm": 25.979732513427734, + "learning_rate": 5e-05, + "loss": 1.5739, + "num_input_tokens_seen": 146929248, + "step": 2196 + }, + { + "epoch": 0.24919148936170213, + "loss": 1.4829380512237549, + "loss_ce": 0.004422470927238464, + "loss_iou": 0.65625, + "loss_num": 0.033203125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 146929248, + "step": 2196 + }, + { + "epoch": 0.24930496453900708, + "grad_norm": 28.611835479736328, + "learning_rate": 5e-05, + "loss": 1.3686, + "num_input_tokens_seen": 146996152, + "step": 2197 + }, + { + "epoch": 0.24930496453900708, + "loss": 1.2731168270111084, + "loss_ce": 0.009444858878850937, + "loss_iou": 0.498046875, + "loss_num": 0.05322265625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 146996152, + "step": 2197 + }, + { + "epoch": 0.24941843971631206, + "grad_norm": 21.58652687072754, + "learning_rate": 5e-05, + "loss": 1.4038, + "num_input_tokens_seen": 147063116, + "step": 2198 + }, + { + "epoch": 0.24941843971631206, + "loss": 1.5046489238739014, + "loss_ce": 0.005625521764159203, + "loss_iou": 0.625, + "loss_num": 0.04931640625, + "loss_xval": 1.5, + "num_input_tokens_seen": 147063116, + "step": 2198 + }, + { + "epoch": 0.24953191489361704, + "grad_norm": 46.858741760253906, + "learning_rate": 5e-05, + "loss": 1.3319, + "num_input_tokens_seen": 147130500, + "step": 2199 + }, + { + "epoch": 0.24953191489361704, + "loss": 1.4419951438903809, + "loss_ce": 0.01035451889038086, + "loss_iou": 0.59375, + "loss_num": 0.04931640625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 147130500, + "step": 2199 + }, + { + "epoch": 0.24964539007092199, + "grad_norm": 23.47685432434082, + "learning_rate": 5e-05, + "loss": 1.4546, + "num_input_tokens_seen": 147198296, + "step": 2200 + }, + { + "epoch": 0.24964539007092199, + "loss": 1.5130372047424316, + "loss_ce": 0.009619249030947685, + "loss_iou": 0.6171875, + "loss_num": 0.0537109375, + "loss_xval": 1.5, + "num_input_tokens_seen": 147198296, + "step": 2200 + }, + { + "epoch": 0.24975886524822696, + "grad_norm": 26.805593490600586, + "learning_rate": 5e-05, + "loss": 1.4152, + "num_input_tokens_seen": 147264992, + "step": 2201 + }, + { + "epoch": 0.24975886524822696, + "loss": 1.119579792022705, + "loss_ce": 0.005566146224737167, + "loss_iou": 0.4765625, + "loss_num": 0.03271484375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 147264992, + "step": 2201 + }, + { + "epoch": 0.2498723404255319, + "grad_norm": 21.90778923034668, + "learning_rate": 5e-05, + "loss": 1.4566, + "num_input_tokens_seen": 147330568, + "step": 2202 + }, + { + "epoch": 0.2498723404255319, + "loss": 1.377942442893982, + "loss_ce": 0.005872141569852829, + "loss_iou": 0.59375, + "loss_num": 0.037353515625, + "loss_xval": 1.375, + "num_input_tokens_seen": 147330568, + "step": 2202 + }, + { + "epoch": 0.2499858156028369, + "grad_norm": 24.026676177978516, + "learning_rate": 5e-05, + "loss": 1.3308, + "num_input_tokens_seen": 147396028, + "step": 2203 + }, + { + "epoch": 0.2499858156028369, + "loss": 1.4125679731369019, + "loss_ce": 0.0038766192737966776, + "loss_iou": 0.58203125, + "loss_num": 0.04931640625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 147396028, + "step": 2203 + }, + { + "epoch": 0.25009929078014187, + "grad_norm": 25.98309326171875, + "learning_rate": 5e-05, + "loss": 1.4913, + "num_input_tokens_seen": 147462740, + "step": 2204 + }, + { + "epoch": 0.25009929078014187, + "loss": 1.6264095306396484, + "loss_ce": 0.009222136810421944, + "loss_iou": 0.703125, + "loss_num": 0.0419921875, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 147462740, + "step": 2204 + }, + { + "epoch": 0.2502127659574468, + "grad_norm": 25.374582290649414, + "learning_rate": 5e-05, + "loss": 1.3066, + "num_input_tokens_seen": 147529268, + "step": 2205 + }, + { + "epoch": 0.2502127659574468, + "loss": 1.2227444648742676, + "loss_ce": 0.00936555489897728, + "loss_iou": 0.484375, + "loss_num": 0.048583984375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 147529268, + "step": 2205 + }, + { + "epoch": 0.25032624113475177, + "grad_norm": 21.407316207885742, + "learning_rate": 5e-05, + "loss": 1.2379, + "num_input_tokens_seen": 147596128, + "step": 2206 + }, + { + "epoch": 0.25032624113475177, + "loss": 1.2956979274749756, + "loss_ce": 0.0027292650192976, + "loss_iou": 0.5703125, + "loss_num": 0.031005859375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 147596128, + "step": 2206 + }, + { + "epoch": 0.25043971631205675, + "grad_norm": 15.768391609191895, + "learning_rate": 5e-05, + "loss": 1.055, + "num_input_tokens_seen": 147661956, + "step": 2207 + }, + { + "epoch": 0.25043971631205675, + "loss": 1.253631830215454, + "loss_ce": 0.004608343355357647, + "loss_iou": 0.5390625, + "loss_num": 0.034423828125, + "loss_xval": 1.25, + "num_input_tokens_seen": 147661956, + "step": 2207 + }, + { + "epoch": 0.2505531914893617, + "grad_norm": 26.22655487060547, + "learning_rate": 5e-05, + "loss": 1.4919, + "num_input_tokens_seen": 147728788, + "step": 2208 + }, + { + "epoch": 0.2505531914893617, + "loss": 1.2064324617385864, + "loss_ce": 0.00867850985378027, + "loss_iou": 0.49609375, + "loss_num": 0.041015625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 147728788, + "step": 2208 + }, + { + "epoch": 0.25066666666666665, + "grad_norm": 35.43553161621094, + "learning_rate": 5e-05, + "loss": 1.3017, + "num_input_tokens_seen": 147795604, + "step": 2209 + }, + { + "epoch": 0.25066666666666665, + "loss": 1.2578463554382324, + "loss_ce": 0.003940069116652012, + "loss_iou": 0.5390625, + "loss_num": 0.03466796875, + "loss_xval": 1.25, + "num_input_tokens_seen": 147795604, + "step": 2209 + }, + { + "epoch": 0.2507801418439716, + "grad_norm": 37.75529479980469, + "learning_rate": 5e-05, + "loss": 1.472, + "num_input_tokens_seen": 147862188, + "step": 2210 + }, + { + "epoch": 0.2507801418439716, + "loss": 1.5618634223937988, + "loss_ce": 0.004246204160153866, + "loss_iou": 0.671875, + "loss_num": 0.04248046875, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 147862188, + "step": 2210 + }, + { + "epoch": 0.2508936170212766, + "grad_norm": 17.959150314331055, + "learning_rate": 5e-05, + "loss": 1.1185, + "num_input_tokens_seen": 147929052, + "step": 2211 + }, + { + "epoch": 0.2508936170212766, + "loss": 1.0663630962371826, + "loss_ce": 0.006792728789150715, + "loss_iou": 0.447265625, + "loss_num": 0.03271484375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 147929052, + "step": 2211 + }, + { + "epoch": 0.2510070921985816, + "grad_norm": 27.10935401916504, + "learning_rate": 5e-05, + "loss": 1.2525, + "num_input_tokens_seen": 147996196, + "step": 2212 + }, + { + "epoch": 0.2510070921985816, + "loss": 1.1610233783721924, + "loss_ce": 0.004285055212676525, + "loss_iou": 0.45703125, + "loss_num": 0.048583984375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 147996196, + "step": 2212 + }, + { + "epoch": 0.2511205673758865, + "grad_norm": 44.92762756347656, + "learning_rate": 5e-05, + "loss": 1.3956, + "num_input_tokens_seen": 148063828, + "step": 2213 + }, + { + "epoch": 0.2511205673758865, + "loss": 1.394423246383667, + "loss_ce": 0.007216290570795536, + "loss_iou": 0.5625, + "loss_num": 0.052734375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 148063828, + "step": 2213 + }, + { + "epoch": 0.2512340425531915, + "grad_norm": 25.76276969909668, + "learning_rate": 5e-05, + "loss": 1.358, + "num_input_tokens_seen": 148131052, + "step": 2214 + }, + { + "epoch": 0.2512340425531915, + "loss": 1.3787567615509033, + "loss_ce": 0.004244979005306959, + "loss_iou": 0.625, + "loss_num": 0.0247802734375, + "loss_xval": 1.375, + "num_input_tokens_seen": 148131052, + "step": 2214 + }, + { + "epoch": 0.25134751773049646, + "grad_norm": 14.89449405670166, + "learning_rate": 5e-05, + "loss": 1.1441, + "num_input_tokens_seen": 148197916, + "step": 2215 + }, + { + "epoch": 0.25134751773049646, + "loss": 1.258130669593811, + "loss_ce": 0.0022712962236255407, + "loss_iou": 0.53125, + "loss_num": 0.03857421875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 148197916, + "step": 2215 + }, + { + "epoch": 0.25146099290780144, + "grad_norm": 18.530439376831055, + "learning_rate": 5e-05, + "loss": 1.2646, + "num_input_tokens_seen": 148266196, + "step": 2216 + }, + { + "epoch": 0.25146099290780144, + "loss": 1.2781310081481934, + "loss_ce": 0.003228757530450821, + "loss_iou": 0.5390625, + "loss_num": 0.03955078125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 148266196, + "step": 2216 + }, + { + "epoch": 0.25157446808510636, + "grad_norm": 28.9365291595459, + "learning_rate": 5e-05, + "loss": 1.2878, + "num_input_tokens_seen": 148333352, + "step": 2217 + }, + { + "epoch": 0.25157446808510636, + "loss": 1.3443859815597534, + "loss_ce": 0.00551878847181797, + "loss_iou": 0.56640625, + "loss_num": 0.041748046875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 148333352, + "step": 2217 + }, + { + "epoch": 0.25168794326241134, + "grad_norm": 28.193603515625, + "learning_rate": 5e-05, + "loss": 1.3643, + "num_input_tokens_seen": 148399424, + "step": 2218 + }, + { + "epoch": 0.25168794326241134, + "loss": 1.3798145055770874, + "loss_ce": 0.006767601706087589, + "loss_iou": 0.5703125, + "loss_num": 0.046142578125, + "loss_xval": 1.375, + "num_input_tokens_seen": 148399424, + "step": 2218 + }, + { + "epoch": 0.2518014184397163, + "grad_norm": 32.01054000854492, + "learning_rate": 5e-05, + "loss": 1.1781, + "num_input_tokens_seen": 148466232, + "step": 2219 + }, + { + "epoch": 0.2518014184397163, + "loss": 1.2603956460952759, + "loss_ce": 0.009419109672307968, + "loss_iou": 0.54296875, + "loss_num": 0.03369140625, + "loss_xval": 1.25, + "num_input_tokens_seen": 148466232, + "step": 2219 + }, + { + "epoch": 0.2519148936170213, + "grad_norm": 24.36316680908203, + "learning_rate": 5e-05, + "loss": 1.5772, + "num_input_tokens_seen": 148532960, + "step": 2220 + }, + { + "epoch": 0.2519148936170213, + "loss": 1.4144631624221802, + "loss_ce": 0.006748327985405922, + "loss_iou": 0.6328125, + "loss_num": 0.0279541015625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 148532960, + "step": 2220 + }, + { + "epoch": 0.2520283687943262, + "grad_norm": 17.926921844482422, + "learning_rate": 5e-05, + "loss": 1.0699, + "num_input_tokens_seen": 148600392, + "step": 2221 + }, + { + "epoch": 0.2520283687943262, + "loss": 1.08857262134552, + "loss_ce": 0.00507664680480957, + "loss_iou": 0.4765625, + "loss_num": 0.025634765625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 148600392, + "step": 2221 + }, + { + "epoch": 0.2521418439716312, + "grad_norm": 19.98357391357422, + "learning_rate": 5e-05, + "loss": 1.26, + "num_input_tokens_seen": 148666812, + "step": 2222 + }, + { + "epoch": 0.2521418439716312, + "loss": 1.44479238986969, + "loss_ce": 0.007780628744512796, + "loss_iou": 0.58984375, + "loss_num": 0.051025390625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 148666812, + "step": 2222 + }, + { + "epoch": 0.2522553191489362, + "grad_norm": 23.592945098876953, + "learning_rate": 5e-05, + "loss": 1.1303, + "num_input_tokens_seen": 148734900, + "step": 2223 + }, + { + "epoch": 0.2522553191489362, + "loss": 1.343103051185608, + "loss_ce": 0.005212480202317238, + "loss_iou": 0.55078125, + "loss_num": 0.04736328125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 148734900, + "step": 2223 + }, + { + "epoch": 0.25236879432624115, + "grad_norm": 24.11664581298828, + "learning_rate": 5e-05, + "loss": 1.4929, + "num_input_tokens_seen": 148802212, + "step": 2224 + }, + { + "epoch": 0.25236879432624115, + "loss": 1.5274255275726318, + "loss_ce": 0.008870754390954971, + "loss_iou": 0.65234375, + "loss_num": 0.042236328125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 148802212, + "step": 2224 + }, + { + "epoch": 0.2524822695035461, + "grad_norm": 27.282302856445312, + "learning_rate": 5e-05, + "loss": 1.2031, + "num_input_tokens_seen": 148868644, + "step": 2225 + }, + { + "epoch": 0.2524822695035461, + "loss": 1.3311336040496826, + "loss_ce": 0.006426509469747543, + "loss_iou": 0.546875, + "loss_num": 0.045654296875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 148868644, + "step": 2225 + }, + { + "epoch": 0.25259574468085105, + "grad_norm": 31.737422943115234, + "learning_rate": 5e-05, + "loss": 1.3671, + "num_input_tokens_seen": 148935572, + "step": 2226 + }, + { + "epoch": 0.25259574468085105, + "loss": 1.3335036039352417, + "loss_ce": 0.006355207413434982, + "loss_iou": 0.578125, + "loss_num": 0.033935546875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 148935572, + "step": 2226 + }, + { + "epoch": 0.25270921985815603, + "grad_norm": 39.060184478759766, + "learning_rate": 5e-05, + "loss": 1.4564, + "num_input_tokens_seen": 149002084, + "step": 2227 + }, + { + "epoch": 0.25270921985815603, + "loss": 1.4468060731887817, + "loss_ce": 0.0024701408110558987, + "loss_iou": 0.61328125, + "loss_num": 0.044189453125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 149002084, + "step": 2227 + }, + { + "epoch": 0.252822695035461, + "grad_norm": 23.41057014465332, + "learning_rate": 5e-05, + "loss": 1.4909, + "num_input_tokens_seen": 149069300, + "step": 2228 + }, + { + "epoch": 0.252822695035461, + "loss": 1.4493523836135864, + "loss_ce": 0.004039914347231388, + "loss_iou": 0.640625, + "loss_num": 0.03271484375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 149069300, + "step": 2228 + }, + { + "epoch": 0.25293617021276593, + "grad_norm": 19.01471519470215, + "learning_rate": 5e-05, + "loss": 1.1736, + "num_input_tokens_seen": 149136132, + "step": 2229 + }, + { + "epoch": 0.25293617021276593, + "loss": 1.328519344329834, + "loss_ce": 0.004300503060221672, + "loss_iou": 0.5390625, + "loss_num": 0.048828125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 149136132, + "step": 2229 + }, + { + "epoch": 0.2530496453900709, + "grad_norm": 24.29515838623047, + "learning_rate": 5e-05, + "loss": 1.221, + "num_input_tokens_seen": 149202880, + "step": 2230 + }, + { + "epoch": 0.2530496453900709, + "loss": 1.148925542831421, + "loss_ce": 0.002441170159727335, + "loss_iou": 0.50390625, + "loss_num": 0.0272216796875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 149202880, + "step": 2230 + }, + { + "epoch": 0.2531631205673759, + "grad_norm": 24.351245880126953, + "learning_rate": 5e-05, + "loss": 1.2659, + "num_input_tokens_seen": 149270640, + "step": 2231 + }, + { + "epoch": 0.2531631205673759, + "loss": 1.268079161643982, + "loss_ce": 0.004407328553497791, + "loss_iou": 0.515625, + "loss_num": 0.047119140625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 149270640, + "step": 2231 + }, + { + "epoch": 0.25327659574468087, + "grad_norm": 30.239200592041016, + "learning_rate": 5e-05, + "loss": 1.2985, + "num_input_tokens_seen": 149337352, + "step": 2232 + }, + { + "epoch": 0.25327659574468087, + "loss": 1.3245313167572021, + "loss_ce": 0.006660324987024069, + "loss_iou": 0.59765625, + "loss_num": 0.025390625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 149337352, + "step": 2232 + }, + { + "epoch": 0.2533900709219858, + "grad_norm": 36.6585578918457, + "learning_rate": 5e-05, + "loss": 1.2473, + "num_input_tokens_seen": 149403736, + "step": 2233 + }, + { + "epoch": 0.2533900709219858, + "loss": 1.148462176322937, + "loss_ce": 0.011499189771711826, + "loss_iou": 0.48828125, + "loss_num": 0.0322265625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 149403736, + "step": 2233 + }, + { + "epoch": 0.25350354609929077, + "grad_norm": 39.06591796875, + "learning_rate": 5e-05, + "loss": 1.4845, + "num_input_tokens_seen": 149471176, + "step": 2234 + }, + { + "epoch": 0.25350354609929077, + "loss": 1.5175731182098389, + "loss_ce": 0.003901340998709202, + "loss_iou": 0.65625, + "loss_num": 0.0400390625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 149471176, + "step": 2234 + }, + { + "epoch": 0.25361702127659574, + "grad_norm": 40.609195709228516, + "learning_rate": 5e-05, + "loss": 1.2491, + "num_input_tokens_seen": 149538668, + "step": 2235 + }, + { + "epoch": 0.25361702127659574, + "loss": 1.1660590171813965, + "loss_ce": 0.0073675736784935, + "loss_iou": 0.51171875, + "loss_num": 0.0274658203125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 149538668, + "step": 2235 + }, + { + "epoch": 0.2537304964539007, + "grad_norm": 25.697935104370117, + "learning_rate": 5e-05, + "loss": 1.4122, + "num_input_tokens_seen": 149605248, + "step": 2236 + }, + { + "epoch": 0.2537304964539007, + "loss": 1.4400420188903809, + "loss_ce": 0.008889739401638508, + "loss_iou": 0.62109375, + "loss_num": 0.0380859375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 149605248, + "step": 2236 + }, + { + "epoch": 0.2538439716312057, + "grad_norm": 27.417619705200195, + "learning_rate": 5e-05, + "loss": 1.207, + "num_input_tokens_seen": 149673136, + "step": 2237 + }, + { + "epoch": 0.2538439716312057, + "loss": 1.1390657424926758, + "loss_ce": 0.0072298613376915455, + "loss_iou": 0.462890625, + "loss_num": 0.04150390625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 149673136, + "step": 2237 + }, + { + "epoch": 0.2539574468085106, + "grad_norm": 22.526805877685547, + "learning_rate": 5e-05, + "loss": 1.0843, + "num_input_tokens_seen": 149738040, + "step": 2238 + }, + { + "epoch": 0.2539574468085106, + "loss": 1.2068305015563965, + "loss_ce": 0.0066352728754282, + "loss_iou": 0.46875, + "loss_num": 0.052978515625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 149738040, + "step": 2238 + }, + { + "epoch": 0.2540709219858156, + "grad_norm": 33.953369140625, + "learning_rate": 5e-05, + "loss": 1.263, + "num_input_tokens_seen": 149805780, + "step": 2239 + }, + { + "epoch": 0.2540709219858156, + "loss": 1.0557653903961182, + "loss_ce": 0.00498407706618309, + "loss_iou": 0.4609375, + "loss_num": 0.0255126953125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 149805780, + "step": 2239 + }, + { + "epoch": 0.2541843971631206, + "grad_norm": 40.7319221496582, + "learning_rate": 5e-05, + "loss": 1.3908, + "num_input_tokens_seen": 149873072, + "step": 2240 + }, + { + "epoch": 0.2541843971631206, + "loss": 1.318683385848999, + "loss_ce": 0.002277248539030552, + "loss_iou": 0.578125, + "loss_num": 0.031494140625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 149873072, + "step": 2240 + }, + { + "epoch": 0.25429787234042556, + "grad_norm": 27.631837844848633, + "learning_rate": 5e-05, + "loss": 1.3438, + "num_input_tokens_seen": 149940064, + "step": 2241 + }, + { + "epoch": 0.25429787234042556, + "loss": 1.4892442226409912, + "loss_ce": 0.005845748819410801, + "loss_iou": 0.6171875, + "loss_num": 0.05078125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 149940064, + "step": 2241 + }, + { + "epoch": 0.2544113475177305, + "grad_norm": 27.571434020996094, + "learning_rate": 5e-05, + "loss": 1.3883, + "num_input_tokens_seen": 150006876, + "step": 2242 + }, + { + "epoch": 0.2544113475177305, + "loss": 1.30314040184021, + "loss_ce": 0.009195062331855297, + "loss_iou": 0.56640625, + "loss_num": 0.031982421875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 150006876, + "step": 2242 + }, + { + "epoch": 0.25452482269503546, + "grad_norm": 55.708553314208984, + "learning_rate": 5e-05, + "loss": 1.3843, + "num_input_tokens_seen": 150074780, + "step": 2243 + }, + { + "epoch": 0.25452482269503546, + "loss": 1.5354691743850708, + "loss_ce": 0.006660575978457928, + "loss_iou": 0.63671875, + "loss_num": 0.051025390625, + "loss_xval": 1.53125, + "num_input_tokens_seen": 150074780, + "step": 2243 + }, + { + "epoch": 0.25463829787234044, + "grad_norm": 35.14090347290039, + "learning_rate": 5e-05, + "loss": 1.3442, + "num_input_tokens_seen": 150142380, + "step": 2244 + }, + { + "epoch": 0.25463829787234044, + "loss": 1.422736406326294, + "loss_ce": 0.0033027243334800005, + "loss_iou": 0.57421875, + "loss_num": 0.054931640625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 150142380, + "step": 2244 + }, + { + "epoch": 0.2547517730496454, + "grad_norm": 39.768272399902344, + "learning_rate": 5e-05, + "loss": 1.5143, + "num_input_tokens_seen": 150208836, + "step": 2245 + }, + { + "epoch": 0.2547517730496454, + "loss": 1.2619538307189941, + "loss_ce": 0.0041412729769945145, + "loss_iou": 0.49609375, + "loss_num": 0.053466796875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 150208836, + "step": 2245 + }, + { + "epoch": 0.25486524822695034, + "grad_norm": 25.840503692626953, + "learning_rate": 5e-05, + "loss": 1.3078, + "num_input_tokens_seen": 150276164, + "step": 2246 + }, + { + "epoch": 0.25486524822695034, + "loss": 1.4300901889801025, + "loss_ce": 0.008215243928134441, + "loss_iou": 0.6171875, + "loss_num": 0.038330078125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 150276164, + "step": 2246 + }, + { + "epoch": 0.2549787234042553, + "grad_norm": 30.003398895263672, + "learning_rate": 5e-05, + "loss": 1.35, + "num_input_tokens_seen": 150342656, + "step": 2247 + }, + { + "epoch": 0.2549787234042553, + "loss": 1.3411531448364258, + "loss_ce": 0.007046752609312534, + "loss_iou": 0.54296875, + "loss_num": 0.04931640625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 150342656, + "step": 2247 + }, + { + "epoch": 0.2550921985815603, + "grad_norm": 22.562026977539062, + "learning_rate": 5e-05, + "loss": 1.2298, + "num_input_tokens_seen": 150410692, + "step": 2248 + }, + { + "epoch": 0.2550921985815603, + "loss": 1.1202547550201416, + "loss_ce": 0.0035554012283682823, + "loss_iou": 0.482421875, + "loss_num": 0.0306396484375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 150410692, + "step": 2248 + }, + { + "epoch": 0.25520567375886527, + "grad_norm": 24.78208351135254, + "learning_rate": 5e-05, + "loss": 1.2284, + "num_input_tokens_seen": 150476928, + "step": 2249 + }, + { + "epoch": 0.25520567375886527, + "loss": 1.2281947135925293, + "loss_ce": 0.005538458004593849, + "loss_iou": 0.51953125, + "loss_num": 0.0361328125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 150476928, + "step": 2249 + }, + { + "epoch": 0.2553191489361702, + "grad_norm": 28.508895874023438, + "learning_rate": 5e-05, + "loss": 1.2614, + "num_input_tokens_seen": 150543780, + "step": 2250 + }, + { + "epoch": 0.2553191489361702, + "eval_seeclick_CIoU": 0.35964788496494293, + "eval_seeclick_GIoU": 0.3426017314195633, + "eval_seeclick_IoU": 0.44984452426433563, + "eval_seeclick_MAE_all": 0.1581539586186409, + "eval_seeclick_MAE_h": 0.0829925574362278, + "eval_seeclick_MAE_w": 0.13946590945124626, + "eval_seeclick_MAE_x_boxes": 0.2588537633419037, + "eval_seeclick_MAE_y_boxes": 0.109920434653759, + "eval_seeclick_NUM_probability": 0.9992623031139374, + "eval_seeclick_inside_bbox": 0.6770833432674408, + "eval_seeclick_loss": 2.5886716842651367, + "eval_seeclick_loss_ce": 0.01619507558643818, + "eval_seeclick_loss_iou": 0.911865234375, + "eval_seeclick_loss_num": 0.1614990234375, + "eval_seeclick_loss_xval": 2.63232421875, + "eval_seeclick_runtime": 68.292, + "eval_seeclick_samples_per_second": 0.688, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 150543780, + "step": 2250 + }, + { + "epoch": 0.2553191489361702, + "eval_icons_CIoU": 0.43926337361335754, + "eval_icons_GIoU": 0.42812177538871765, + "eval_icons_IoU": 0.4921083152294159, + "eval_icons_MAE_all": 0.15488801524043083, + "eval_icons_MAE_h": 0.11082700453698635, + "eval_icons_MAE_w": 0.15391966700553894, + "eval_icons_MAE_x_boxes": 0.10545464605093002, + "eval_icons_MAE_y_boxes": 0.12813004665076733, + "eval_icons_NUM_probability": 0.9998693764209747, + "eval_icons_inside_bbox": 0.6805555522441864, + "eval_icons_loss": 2.50883150100708, + "eval_icons_loss_ce": 7.972286039148457e-05, + "eval_icons_loss_iou": 0.91064453125, + "eval_icons_loss_num": 0.12891006469726562, + "eval_icons_loss_xval": 2.466796875, + "eval_icons_runtime": 70.397, + "eval_icons_samples_per_second": 0.71, + "eval_icons_steps_per_second": 0.028, + "num_input_tokens_seen": 150543780, + "step": 2250 + }, + { + "epoch": 0.2553191489361702, + "eval_screenspot_CIoU": 0.34274056553840637, + "eval_screenspot_GIoU": 0.2952997287114461, + "eval_screenspot_IoU": 0.4251477320988973, + "eval_screenspot_MAE_all": 0.1866831729809443, + "eval_screenspot_MAE_h": 0.09984683742125829, + "eval_screenspot_MAE_w": 0.17880670726299286, + "eval_screenspot_MAE_x_boxes": 0.23260989288489023, + "eval_screenspot_MAE_y_boxes": 0.1375646044810613, + "eval_screenspot_NUM_probability": 0.9997116525967916, + "eval_screenspot_inside_bbox": 0.6254166762034098, + "eval_screenspot_loss": 2.8727476596832275, + "eval_screenspot_loss_ce": 0.01665436290204525, + "eval_screenspot_loss_iou": 0.9658203125, + "eval_screenspot_loss_num": 0.19777425130208334, + "eval_screenspot_loss_xval": 2.9208984375, + "eval_screenspot_runtime": 124.2738, + "eval_screenspot_samples_per_second": 0.716, + "eval_screenspot_steps_per_second": 0.024, + "num_input_tokens_seen": 150543780, + "step": 2250 + }, + { + "epoch": 0.2553191489361702, + "eval_compot_CIoU": 0.3386107087135315, + "eval_compot_GIoU": 0.30266381800174713, + "eval_compot_IoU": 0.4394567012786865, + "eval_compot_MAE_all": 0.16634799540042877, + "eval_compot_MAE_h": 0.06553464196622372, + "eval_compot_MAE_w": 0.15342193096876144, + "eval_compot_MAE_x_boxes": 0.18113382160663605, + "eval_compot_MAE_y_boxes": 0.1568208560347557, + "eval_compot_NUM_probability": 0.9997806251049042, + "eval_compot_inside_bbox": 0.5746527910232544, + "eval_compot_loss": 2.719686508178711, + "eval_compot_loss_ce": 0.006127081578597426, + "eval_compot_loss_iou": 0.93994140625, + "eval_compot_loss_num": 0.159423828125, + "eval_compot_loss_xval": 2.6787109375, + "eval_compot_runtime": 70.77, + "eval_compot_samples_per_second": 0.707, + "eval_compot_steps_per_second": 0.028, + "num_input_tokens_seen": 150543780, + "step": 2250 + }, + { + "epoch": 0.2553191489361702, + "loss": 2.5965189933776855, + "loss_ce": 0.005698634311556816, + "loss_iou": 0.9296875, + "loss_num": 0.1455078125, + "loss_xval": 2.59375, + "num_input_tokens_seen": 150543780, + "step": 2250 + }, + { + "epoch": 0.25543262411347517, + "grad_norm": 34.06513214111328, + "learning_rate": 5e-05, + "loss": 1.2634, + "num_input_tokens_seen": 150610832, + "step": 2251 + }, + { + "epoch": 0.25543262411347517, + "loss": 1.2528430223464966, + "loss_ce": 0.006749236956238747, + "loss_iou": 0.546875, + "loss_num": 0.0306396484375, + "loss_xval": 1.25, + "num_input_tokens_seen": 150610832, + "step": 2251 + }, + { + "epoch": 0.25554609929078015, + "grad_norm": 27.10631561279297, + "learning_rate": 5e-05, + "loss": 1.5234, + "num_input_tokens_seen": 150677744, + "step": 2252 + }, + { + "epoch": 0.25554609929078015, + "loss": 1.445225477218628, + "loss_ce": 0.004795846529304981, + "loss_iou": 0.61328125, + "loss_num": 0.04296875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 150677744, + "step": 2252 + }, + { + "epoch": 0.2556595744680851, + "grad_norm": 15.889168739318848, + "learning_rate": 5e-05, + "loss": 1.2283, + "num_input_tokens_seen": 150744884, + "step": 2253 + }, + { + "epoch": 0.2556595744680851, + "loss": 1.373274326324463, + "loss_ce": 0.00803997553884983, + "loss_iou": 0.5703125, + "loss_num": 0.04443359375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 150744884, + "step": 2253 + }, + { + "epoch": 0.25577304964539005, + "grad_norm": 31.94011688232422, + "learning_rate": 5e-05, + "loss": 1.3257, + "num_input_tokens_seen": 150811924, + "step": 2254 + }, + { + "epoch": 0.25577304964539005, + "loss": 1.2419826984405518, + "loss_ce": 0.0032131322659552097, + "loss_iou": 0.52734375, + "loss_num": 0.036376953125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 150811924, + "step": 2254 + }, + { + "epoch": 0.255886524822695, + "grad_norm": 44.02924728393555, + "learning_rate": 5e-05, + "loss": 1.3818, + "num_input_tokens_seen": 150878732, + "step": 2255 + }, + { + "epoch": 0.255886524822695, + "loss": 1.278606653213501, + "loss_ce": 0.0046808309853076935, + "loss_iou": 0.5546875, + "loss_num": 0.032958984375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 150878732, + "step": 2255 + }, + { + "epoch": 0.256, + "grad_norm": 25.573163986206055, + "learning_rate": 5e-05, + "loss": 1.773, + "num_input_tokens_seen": 150944988, + "step": 2256 + }, + { + "epoch": 0.256, + "loss": 1.8631565570831299, + "loss_ce": 0.009640871547162533, + "loss_iou": 0.7890625, + "loss_num": 0.0556640625, + "loss_xval": 1.8515625, + "num_input_tokens_seen": 150944988, + "step": 2256 + }, + { + "epoch": 0.256113475177305, + "grad_norm": 20.14813804626465, + "learning_rate": 5e-05, + "loss": 1.1372, + "num_input_tokens_seen": 151011332, + "step": 2257 + }, + { + "epoch": 0.256113475177305, + "loss": 1.085414171218872, + "loss_ce": 0.003382842056453228, + "loss_iou": 0.484375, + "loss_num": 0.02294921875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 151011332, + "step": 2257 + }, + { + "epoch": 0.2562269503546099, + "grad_norm": 29.773502349853516, + "learning_rate": 5e-05, + "loss": 1.3868, + "num_input_tokens_seen": 151078264, + "step": 2258 + }, + { + "epoch": 0.2562269503546099, + "loss": 1.6100711822509766, + "loss_ce": 0.005579004064202309, + "loss_iou": 0.6640625, + "loss_num": 0.0546875, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 151078264, + "step": 2258 + }, + { + "epoch": 0.2563404255319149, + "grad_norm": 28.497400283813477, + "learning_rate": 5e-05, + "loss": 1.7307, + "num_input_tokens_seen": 151146160, + "step": 2259 + }, + { + "epoch": 0.2563404255319149, + "loss": 1.9740512371063232, + "loss_ce": 0.008231034502387047, + "loss_iou": 0.828125, + "loss_num": 0.061767578125, + "loss_xval": 1.96875, + "num_input_tokens_seen": 151146160, + "step": 2259 + }, + { + "epoch": 0.25645390070921986, + "grad_norm": 37.22575378417969, + "learning_rate": 5e-05, + "loss": 1.1186, + "num_input_tokens_seen": 151213544, + "step": 2260 + }, + { + "epoch": 0.25645390070921986, + "loss": 1.0753874778747559, + "loss_ce": 0.0036101676523685455, + "loss_iou": 0.478515625, + "loss_num": 0.0225830078125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 151213544, + "step": 2260 + }, + { + "epoch": 0.25656737588652484, + "grad_norm": 27.255552291870117, + "learning_rate": 5e-05, + "loss": 1.355, + "num_input_tokens_seen": 151280640, + "step": 2261 + }, + { + "epoch": 0.25656737588652484, + "loss": 1.3972790241241455, + "loss_ce": 0.002747801598161459, + "loss_iou": 0.60546875, + "loss_num": 0.037353515625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 151280640, + "step": 2261 + }, + { + "epoch": 0.25668085106382976, + "grad_norm": 30.606510162353516, + "learning_rate": 5e-05, + "loss": 1.1807, + "num_input_tokens_seen": 151347108, + "step": 2262 + }, + { + "epoch": 0.25668085106382976, + "loss": 1.1021233797073364, + "loss_ce": 0.0020256717689335346, + "loss_iou": 0.48046875, + "loss_num": 0.0279541015625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 151347108, + "step": 2262 + }, + { + "epoch": 0.25679432624113474, + "grad_norm": 33.784912109375, + "learning_rate": 5e-05, + "loss": 1.263, + "num_input_tokens_seen": 151414132, + "step": 2263 + }, + { + "epoch": 0.25679432624113474, + "loss": 1.2680449485778809, + "loss_ce": 0.003396520623937249, + "loss_iou": 0.5390625, + "loss_num": 0.037109375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 151414132, + "step": 2263 + }, + { + "epoch": 0.2569078014184397, + "grad_norm": 30.072460174560547, + "learning_rate": 5e-05, + "loss": 1.2743, + "num_input_tokens_seen": 151481556, + "step": 2264 + }, + { + "epoch": 0.2569078014184397, + "loss": 1.2581642866134644, + "loss_ce": 0.0018166124355047941, + "loss_iou": 0.53515625, + "loss_num": 0.036865234375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 151481556, + "step": 2264 + }, + { + "epoch": 0.2570212765957447, + "grad_norm": 31.63279914855957, + "learning_rate": 5e-05, + "loss": 1.3992, + "num_input_tokens_seen": 151547380, + "step": 2265 + }, + { + "epoch": 0.2570212765957447, + "loss": 1.7367693185806274, + "loss_ce": 0.007277166470885277, + "loss_iou": 0.7109375, + "loss_num": 0.0625, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 151547380, + "step": 2265 + }, + { + "epoch": 0.2571347517730496, + "grad_norm": 27.996383666992188, + "learning_rate": 5e-05, + "loss": 1.502, + "num_input_tokens_seen": 151614124, + "step": 2266 + }, + { + "epoch": 0.2571347517730496, + "loss": 1.5692903995513916, + "loss_ce": 0.0077669741585850716, + "loss_iou": 0.67578125, + "loss_num": 0.04150390625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 151614124, + "step": 2266 + }, + { + "epoch": 0.2572482269503546, + "grad_norm": 19.260160446166992, + "learning_rate": 5e-05, + "loss": 1.4641, + "num_input_tokens_seen": 151680720, + "step": 2267 + }, + { + "epoch": 0.2572482269503546, + "loss": 1.6274292469024658, + "loss_ce": 0.0082886116579175, + "loss_iou": 0.609375, + "loss_num": 0.0791015625, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 151680720, + "step": 2267 + }, + { + "epoch": 0.2573617021276596, + "grad_norm": 39.1953125, + "learning_rate": 5e-05, + "loss": 1.5651, + "num_input_tokens_seen": 151746924, + "step": 2268 + }, + { + "epoch": 0.2573617021276596, + "loss": 1.799630045890808, + "loss_ce": 0.006661265157163143, + "loss_iou": 0.71875, + "loss_num": 0.0712890625, + "loss_xval": 1.796875, + "num_input_tokens_seen": 151746924, + "step": 2268 + }, + { + "epoch": 0.25747517730496455, + "grad_norm": 27.67774200439453, + "learning_rate": 5e-05, + "loss": 1.1871, + "num_input_tokens_seen": 151813828, + "step": 2269 + }, + { + "epoch": 0.25747517730496455, + "loss": 1.0532206296920776, + "loss_ce": 0.00829877145588398, + "loss_iou": 0.439453125, + "loss_num": 0.032958984375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 151813828, + "step": 2269 + }, + { + "epoch": 0.25758865248226953, + "grad_norm": 25.21343421936035, + "learning_rate": 5e-05, + "loss": 1.3461, + "num_input_tokens_seen": 151881516, + "step": 2270 + }, + { + "epoch": 0.25758865248226953, + "loss": 1.463045358657837, + "loss_ce": 0.006502390373498201, + "loss_iou": 0.61328125, + "loss_num": 0.04541015625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 151881516, + "step": 2270 + }, + { + "epoch": 0.25770212765957445, + "grad_norm": 23.208709716796875, + "learning_rate": 5e-05, + "loss": 1.1946, + "num_input_tokens_seen": 151947456, + "step": 2271 + }, + { + "epoch": 0.25770212765957445, + "loss": 0.9938951730728149, + "loss_ce": 0.0048204632475972176, + "loss_iou": 0.41015625, + "loss_num": 0.03369140625, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 151947456, + "step": 2271 + }, + { + "epoch": 0.25781560283687943, + "grad_norm": 17.64348030090332, + "learning_rate": 5e-05, + "loss": 1.1562, + "num_input_tokens_seen": 152013544, + "step": 2272 + }, + { + "epoch": 0.25781560283687943, + "loss": 1.0695902109146118, + "loss_ce": 0.0031838971190154552, + "loss_iou": 0.443359375, + "loss_num": 0.035888671875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 152013544, + "step": 2272 + }, + { + "epoch": 0.2579290780141844, + "grad_norm": 16.7410945892334, + "learning_rate": 5e-05, + "loss": 1.217, + "num_input_tokens_seen": 152079820, + "step": 2273 + }, + { + "epoch": 0.2579290780141844, + "loss": 0.942825198173523, + "loss_ce": 0.0027617320884019136, + "loss_iou": 0.40625, + "loss_num": 0.02587890625, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 152079820, + "step": 2273 + }, + { + "epoch": 0.2580425531914894, + "grad_norm": 25.72906494140625, + "learning_rate": 5e-05, + "loss": 1.2062, + "num_input_tokens_seen": 152146884, + "step": 2274 + }, + { + "epoch": 0.2580425531914894, + "loss": 1.3677071332931519, + "loss_ce": 0.005402416922152042, + "loss_iou": 0.59765625, + "loss_num": 0.032958984375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 152146884, + "step": 2274 + }, + { + "epoch": 0.2581560283687943, + "grad_norm": 44.277645111083984, + "learning_rate": 5e-05, + "loss": 1.7715, + "num_input_tokens_seen": 152213888, + "step": 2275 + }, + { + "epoch": 0.2581560283687943, + "loss": 1.6330158710479736, + "loss_ce": 0.0075276559218764305, + "loss_iou": 0.6796875, + "loss_num": 0.053466796875, + "loss_xval": 1.625, + "num_input_tokens_seen": 152213888, + "step": 2275 + }, + { + "epoch": 0.2582695035460993, + "grad_norm": 62.95450973510742, + "learning_rate": 5e-05, + "loss": 1.7307, + "num_input_tokens_seen": 152280688, + "step": 2276 + }, + { + "epoch": 0.2582695035460993, + "loss": 1.6810766458511353, + "loss_ce": 0.007248427718877792, + "loss_iou": 0.70703125, + "loss_num": 0.05224609375, + "loss_xval": 1.671875, + "num_input_tokens_seen": 152280688, + "step": 2276 + }, + { + "epoch": 0.25838297872340427, + "grad_norm": 14.439287185668945, + "learning_rate": 5e-05, + "loss": 1.2184, + "num_input_tokens_seen": 152346136, + "step": 2277 + }, + { + "epoch": 0.25838297872340427, + "loss": 1.325469970703125, + "loss_ce": 0.005157497711479664, + "loss_iou": 0.5625, + "loss_num": 0.0390625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 152346136, + "step": 2277 + }, + { + "epoch": 0.25849645390070924, + "grad_norm": 19.33851432800293, + "learning_rate": 5e-05, + "loss": 1.1496, + "num_input_tokens_seen": 152412512, + "step": 2278 + }, + { + "epoch": 0.25849645390070924, + "loss": 0.9638797044754028, + "loss_ce": 0.0024539409205317497, + "loss_iou": 0.408203125, + "loss_num": 0.0289306640625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 152412512, + "step": 2278 + }, + { + "epoch": 0.25860992907801417, + "grad_norm": 24.391357421875, + "learning_rate": 5e-05, + "loss": 1.2178, + "num_input_tokens_seen": 152479128, + "step": 2279 + }, + { + "epoch": 0.25860992907801417, + "loss": 1.2819761037826538, + "loss_ce": 0.004632395692169666, + "loss_iou": 0.5625, + "loss_num": 0.03125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 152479128, + "step": 2279 + }, + { + "epoch": 0.25872340425531914, + "grad_norm": 28.839702606201172, + "learning_rate": 5e-05, + "loss": 1.2566, + "num_input_tokens_seen": 152545648, + "step": 2280 + }, + { + "epoch": 0.25872340425531914, + "loss": 1.200348138809204, + "loss_ce": 0.004059064667671919, + "loss_iou": 0.46875, + "loss_num": 0.051513671875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 152545648, + "step": 2280 + }, + { + "epoch": 0.2588368794326241, + "grad_norm": 43.55033493041992, + "learning_rate": 5e-05, + "loss": 1.3099, + "num_input_tokens_seen": 152612348, + "step": 2281 + }, + { + "epoch": 0.2588368794326241, + "loss": 1.3303184509277344, + "loss_ce": 0.006618386134505272, + "loss_iou": 0.5546875, + "loss_num": 0.04248046875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 152612348, + "step": 2281 + }, + { + "epoch": 0.2589503546099291, + "grad_norm": 22.469348907470703, + "learning_rate": 5e-05, + "loss": 1.0202, + "num_input_tokens_seen": 152678272, + "step": 2282 + }, + { + "epoch": 0.2589503546099291, + "loss": 0.9892443418502808, + "loss_ce": 0.004014847334474325, + "loss_iou": 0.427734375, + "loss_num": 0.02587890625, + "loss_xval": 0.984375, + "num_input_tokens_seen": 152678272, + "step": 2282 + }, + { + "epoch": 0.259063829787234, + "grad_norm": 26.15501594543457, + "learning_rate": 5e-05, + "loss": 1.2836, + "num_input_tokens_seen": 152745916, + "step": 2283 + }, + { + "epoch": 0.259063829787234, + "loss": 1.2390031814575195, + "loss_ce": 0.004628186114132404, + "loss_iou": 0.5625, + "loss_num": 0.022216796875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 152745916, + "step": 2283 + }, + { + "epoch": 0.259177304964539, + "grad_norm": 49.601436614990234, + "learning_rate": 5e-05, + "loss": 1.3811, + "num_input_tokens_seen": 152813360, + "step": 2284 + }, + { + "epoch": 0.259177304964539, + "loss": 1.3705909252166748, + "loss_ce": 0.005356550216674805, + "loss_iou": 0.6015625, + "loss_num": 0.03271484375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 152813360, + "step": 2284 + }, + { + "epoch": 0.259290780141844, + "grad_norm": 20.45146369934082, + "learning_rate": 5e-05, + "loss": 1.2714, + "num_input_tokens_seen": 152880256, + "step": 2285 + }, + { + "epoch": 0.259290780141844, + "loss": 1.0882809162139893, + "loss_ce": 0.004784763790667057, + "loss_iou": 0.498046875, + "loss_num": 0.01708984375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 152880256, + "step": 2285 + }, + { + "epoch": 0.25940425531914896, + "grad_norm": 20.79523468017578, + "learning_rate": 5e-05, + "loss": 1.2279, + "num_input_tokens_seen": 152945828, + "step": 2286 + }, + { + "epoch": 0.25940425531914896, + "loss": 1.186261534690857, + "loss_ce": 0.005109212826937437, + "loss_iou": 0.5234375, + "loss_num": 0.0274658203125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 152945828, + "step": 2286 + }, + { + "epoch": 0.2595177304964539, + "grad_norm": 17.515439987182617, + "learning_rate": 5e-05, + "loss": 1.4037, + "num_input_tokens_seen": 153012560, + "step": 2287 + }, + { + "epoch": 0.2595177304964539, + "loss": 1.4444096088409424, + "loss_ce": 0.00642131082713604, + "loss_iou": 0.59765625, + "loss_num": 0.049560546875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 153012560, + "step": 2287 + }, + { + "epoch": 0.25963120567375886, + "grad_norm": 57.32632064819336, + "learning_rate": 5e-05, + "loss": 1.4577, + "num_input_tokens_seen": 153079492, + "step": 2288 + }, + { + "epoch": 0.25963120567375886, + "loss": 1.4400962591171265, + "loss_ce": 0.0074790907092392445, + "loss_iou": 0.6171875, + "loss_num": 0.039306640625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 153079492, + "step": 2288 + }, + { + "epoch": 0.25974468085106384, + "grad_norm": 136.37733459472656, + "learning_rate": 5e-05, + "loss": 1.4644, + "num_input_tokens_seen": 153146832, + "step": 2289 + }, + { + "epoch": 0.25974468085106384, + "loss": 1.5626308917999268, + "loss_ce": 0.005013726651668549, + "loss_iou": 0.6328125, + "loss_num": 0.058349609375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 153146832, + "step": 2289 + }, + { + "epoch": 0.2598581560283688, + "grad_norm": 21.954294204711914, + "learning_rate": 5e-05, + "loss": 1.5844, + "num_input_tokens_seen": 153214088, + "step": 2290 + }, + { + "epoch": 0.2598581560283688, + "loss": 1.597105622291565, + "loss_ce": 0.0033556553535163403, + "loss_iou": 0.68359375, + "loss_num": 0.044921875, + "loss_xval": 1.59375, + "num_input_tokens_seen": 153214088, + "step": 2290 + }, + { + "epoch": 0.25997163120567374, + "grad_norm": 17.743648529052734, + "learning_rate": 5e-05, + "loss": 1.0988, + "num_input_tokens_seen": 153280384, + "step": 2291 + }, + { + "epoch": 0.25997163120567374, + "loss": 0.884011447429657, + "loss_ce": 0.006081734783947468, + "loss_iou": 0.3984375, + "loss_num": 0.015869140625, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 153280384, + "step": 2291 + }, + { + "epoch": 0.2600851063829787, + "grad_norm": 17.81770896911621, + "learning_rate": 5e-05, + "loss": 1.2131, + "num_input_tokens_seen": 153347928, + "step": 2292 + }, + { + "epoch": 0.2600851063829787, + "loss": 1.142275094985962, + "loss_ce": 0.0036032104399055243, + "loss_iou": 0.494140625, + "loss_num": 0.0296630859375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 153347928, + "step": 2292 + }, + { + "epoch": 0.2601985815602837, + "grad_norm": 44.185585021972656, + "learning_rate": 5e-05, + "loss": 1.2281, + "num_input_tokens_seen": 153415548, + "step": 2293 + }, + { + "epoch": 0.2601985815602837, + "loss": 1.230902910232544, + "loss_ce": 0.008246623910963535, + "loss_iou": 0.5078125, + "loss_num": 0.040771484375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 153415548, + "step": 2293 + }, + { + "epoch": 0.26031205673758867, + "grad_norm": 25.082412719726562, + "learning_rate": 5e-05, + "loss": 1.5826, + "num_input_tokens_seen": 153482984, + "step": 2294 + }, + { + "epoch": 0.26031205673758867, + "loss": 1.4971579313278198, + "loss_ce": 0.00692359171807766, + "loss_iou": 0.65234375, + "loss_num": 0.03662109375, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 153482984, + "step": 2294 + }, + { + "epoch": 0.2604255319148936, + "grad_norm": 9.176899909973145, + "learning_rate": 5e-05, + "loss": 0.9883, + "num_input_tokens_seen": 153550628, + "step": 2295 + }, + { + "epoch": 0.2604255319148936, + "loss": 0.9736641049385071, + "loss_ce": 0.005646544974297285, + "loss_iou": 0.408203125, + "loss_num": 0.0303955078125, + "loss_xval": 0.96875, + "num_input_tokens_seen": 153550628, + "step": 2295 + }, + { + "epoch": 0.26053900709219857, + "grad_norm": 35.53776931762695, + "learning_rate": 5e-05, + "loss": 1.1787, + "num_input_tokens_seen": 153618004, + "step": 2296 + }, + { + "epoch": 0.26053900709219857, + "loss": 1.0256402492523193, + "loss_ce": 0.006108998320996761, + "loss_iou": 0.4375, + "loss_num": 0.029052734375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 153618004, + "step": 2296 + }, + { + "epoch": 0.26065248226950355, + "grad_norm": 34.6790885925293, + "learning_rate": 5e-05, + "loss": 1.5063, + "num_input_tokens_seen": 153685384, + "step": 2297 + }, + { + "epoch": 0.26065248226950355, + "loss": 1.5793523788452148, + "loss_ce": 0.0031805154867470264, + "loss_iou": 0.65234375, + "loss_num": 0.054931640625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 153685384, + "step": 2297 + }, + { + "epoch": 0.2607659574468085, + "grad_norm": 23.157154083251953, + "learning_rate": 5e-05, + "loss": 1.5999, + "num_input_tokens_seen": 153751184, + "step": 2298 + }, + { + "epoch": 0.2607659574468085, + "loss": 1.4007337093353271, + "loss_ce": 0.0052259196527302265, + "loss_iou": 0.58203125, + "loss_num": 0.045654296875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 153751184, + "step": 2298 + }, + { + "epoch": 0.26087943262411345, + "grad_norm": 21.04926872253418, + "learning_rate": 5e-05, + "loss": 1.1775, + "num_input_tokens_seen": 153818164, + "step": 2299 + }, + { + "epoch": 0.26087943262411345, + "loss": 1.0447156429290771, + "loss_ce": 0.006385497748851776, + "loss_iou": 0.462890625, + "loss_num": 0.0228271484375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 153818164, + "step": 2299 + }, + { + "epoch": 0.26099290780141843, + "grad_norm": 23.359066009521484, + "learning_rate": 5e-05, + "loss": 1.1358, + "num_input_tokens_seen": 153885928, + "step": 2300 + }, + { + "epoch": 0.26099290780141843, + "loss": 1.0874087810516357, + "loss_ce": 0.004889333620667458, + "loss_iou": 0.498046875, + "loss_num": 0.01708984375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 153885928, + "step": 2300 + }, + { + "epoch": 0.2611063829787234, + "grad_norm": 35.5702018737793, + "learning_rate": 5e-05, + "loss": 1.4436, + "num_input_tokens_seen": 153953140, + "step": 2301 + }, + { + "epoch": 0.2611063829787234, + "loss": 1.3350989818572998, + "loss_ce": 0.004532648250460625, + "loss_iou": 0.58203125, + "loss_num": 0.032958984375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 153953140, + "step": 2301 + }, + { + "epoch": 0.2612198581560284, + "grad_norm": 26.1451473236084, + "learning_rate": 5e-05, + "loss": 1.4671, + "num_input_tokens_seen": 154019984, + "step": 2302 + }, + { + "epoch": 0.2612198581560284, + "loss": 1.401308298110962, + "loss_ce": 0.004823953844606876, + "loss_iou": 0.62890625, + "loss_num": 0.0274658203125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 154019984, + "step": 2302 + }, + { + "epoch": 0.2613333333333333, + "grad_norm": 18.61838150024414, + "learning_rate": 5e-05, + "loss": 1.4329, + "num_input_tokens_seen": 154086828, + "step": 2303 + }, + { + "epoch": 0.2613333333333333, + "loss": 1.4404690265655518, + "loss_ce": 0.013711133040487766, + "loss_iou": 0.5390625, + "loss_num": 0.06982421875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 154086828, + "step": 2303 + }, + { + "epoch": 0.2614468085106383, + "grad_norm": 33.06393814086914, + "learning_rate": 5e-05, + "loss": 1.4146, + "num_input_tokens_seen": 154153624, + "step": 2304 + }, + { + "epoch": 0.2614468085106383, + "loss": 1.4208505153656006, + "loss_ce": 0.0038582398556172848, + "loss_iou": 0.625, + "loss_num": 0.033203125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 154153624, + "step": 2304 + }, + { + "epoch": 0.26156028368794326, + "grad_norm": 35.59825134277344, + "learning_rate": 5e-05, + "loss": 1.2442, + "num_input_tokens_seen": 154220932, + "step": 2305 + }, + { + "epoch": 0.26156028368794326, + "loss": 1.1648932695388794, + "loss_ce": 0.006201889831572771, + "loss_iou": 0.53125, + "loss_num": 0.019775390625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 154220932, + "step": 2305 + }, + { + "epoch": 0.26167375886524824, + "grad_norm": 29.066694259643555, + "learning_rate": 5e-05, + "loss": 1.4112, + "num_input_tokens_seen": 154288120, + "step": 2306 + }, + { + "epoch": 0.26167375886524824, + "loss": 1.5346450805664062, + "loss_ce": 0.012672413140535355, + "loss_iou": 0.6328125, + "loss_num": 0.050537109375, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 154288120, + "step": 2306 + }, + { + "epoch": 0.2617872340425532, + "grad_norm": 71.97068786621094, + "learning_rate": 5e-05, + "loss": 1.3119, + "num_input_tokens_seen": 154354708, + "step": 2307 + }, + { + "epoch": 0.2617872340425532, + "loss": 1.341719150543213, + "loss_ce": 0.004805173724889755, + "loss_iou": 0.54296875, + "loss_num": 0.050048828125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 154354708, + "step": 2307 + }, + { + "epoch": 0.26190070921985814, + "grad_norm": 23.74489402770996, + "learning_rate": 5e-05, + "loss": 1.2569, + "num_input_tokens_seen": 154422152, + "step": 2308 + }, + { + "epoch": 0.26190070921985814, + "loss": 1.179163932800293, + "loss_ce": 0.006312432698905468, + "loss_iou": 0.53515625, + "loss_num": 0.020751953125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 154422152, + "step": 2308 + }, + { + "epoch": 0.2620141843971631, + "grad_norm": 26.092342376708984, + "learning_rate": 5e-05, + "loss": 1.1679, + "num_input_tokens_seen": 154488648, + "step": 2309 + }, + { + "epoch": 0.2620141843971631, + "loss": 1.064692735671997, + "loss_ce": 0.004634092561900616, + "loss_iou": 0.44921875, + "loss_num": 0.031982421875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 154488648, + "step": 2309 + }, + { + "epoch": 0.2621276595744681, + "grad_norm": 29.011505126953125, + "learning_rate": 5e-05, + "loss": 1.2807, + "num_input_tokens_seen": 154556824, + "step": 2310 + }, + { + "epoch": 0.2621276595744681, + "loss": 1.2629053592681885, + "loss_ce": 0.0046046157367527485, + "loss_iou": 0.546875, + "loss_num": 0.032958984375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 154556824, + "step": 2310 + }, + { + "epoch": 0.2622411347517731, + "grad_norm": 111.37384796142578, + "learning_rate": 5e-05, + "loss": 1.427, + "num_input_tokens_seen": 154623524, + "step": 2311 + }, + { + "epoch": 0.2622411347517731, + "loss": 1.3715183734893799, + "loss_ce": 0.0033542299643158913, + "loss_iou": 0.59375, + "loss_num": 0.035888671875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 154623524, + "step": 2311 + }, + { + "epoch": 0.262354609929078, + "grad_norm": 22.7523250579834, + "learning_rate": 5e-05, + "loss": 1.1813, + "num_input_tokens_seen": 154690188, + "step": 2312 + }, + { + "epoch": 0.262354609929078, + "loss": 1.0930163860321045, + "loss_ce": 0.010985152795910835, + "loss_iou": 0.45703125, + "loss_num": 0.03369140625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 154690188, + "step": 2312 + }, + { + "epoch": 0.262468085106383, + "grad_norm": 19.10521697998047, + "learning_rate": 5e-05, + "loss": 1.2695, + "num_input_tokens_seen": 154758184, + "step": 2313 + }, + { + "epoch": 0.262468085106383, + "loss": 1.4537729024887085, + "loss_ce": 0.007483895868062973, + "loss_iou": 0.59375, + "loss_num": 0.052734375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 154758184, + "step": 2313 + }, + { + "epoch": 0.26258156028368795, + "grad_norm": 37.49919128417969, + "learning_rate": 5e-05, + "loss": 1.4244, + "num_input_tokens_seen": 154825796, + "step": 2314 + }, + { + "epoch": 0.26258156028368795, + "loss": 1.2414294481277466, + "loss_ce": 0.006566105410456657, + "loss_iou": 0.52734375, + "loss_num": 0.0361328125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 154825796, + "step": 2314 + }, + { + "epoch": 0.26269503546099293, + "grad_norm": 26.38422393798828, + "learning_rate": 5e-05, + "loss": 1.5899, + "num_input_tokens_seen": 154892208, + "step": 2315 + }, + { + "epoch": 0.26269503546099293, + "loss": 1.8240435123443604, + "loss_ce": 0.004707591608166695, + "loss_iou": 0.72265625, + "loss_num": 0.07421875, + "loss_xval": 1.8203125, + "num_input_tokens_seen": 154892208, + "step": 2315 + }, + { + "epoch": 0.26280851063829785, + "grad_norm": 15.359116554260254, + "learning_rate": 5e-05, + "loss": 1.284, + "num_input_tokens_seen": 154958980, + "step": 2316 + }, + { + "epoch": 0.26280851063829785, + "loss": 1.2536070346832275, + "loss_ce": 0.00995463877916336, + "loss_iou": 0.515625, + "loss_num": 0.042236328125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 154958980, + "step": 2316 + }, + { + "epoch": 0.26292198581560283, + "grad_norm": 36.96556854248047, + "learning_rate": 5e-05, + "loss": 1.3036, + "num_input_tokens_seen": 155026184, + "step": 2317 + }, + { + "epoch": 0.26292198581560283, + "loss": 1.2167288064956665, + "loss_ce": 0.008720923215150833, + "loss_iou": 0.5078125, + "loss_num": 0.0390625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 155026184, + "step": 2317 + }, + { + "epoch": 0.2630354609929078, + "grad_norm": 23.77519989013672, + "learning_rate": 5e-05, + "loss": 1.5333, + "num_input_tokens_seen": 155093960, + "step": 2318 + }, + { + "epoch": 0.2630354609929078, + "loss": 1.6209442615509033, + "loss_ce": 0.004733259789645672, + "loss_iou": 0.69140625, + "loss_num": 0.046875, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 155093960, + "step": 2318 + }, + { + "epoch": 0.2631489361702128, + "grad_norm": 23.132692337036133, + "learning_rate": 5e-05, + "loss": 1.2759, + "num_input_tokens_seen": 155158896, + "step": 2319 + }, + { + "epoch": 0.2631489361702128, + "loss": 1.0764355659484863, + "loss_ce": 0.00612306222319603, + "loss_iou": 0.46484375, + "loss_num": 0.0284423828125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 155158896, + "step": 2319 + }, + { + "epoch": 0.2632624113475177, + "grad_norm": 25.993019104003906, + "learning_rate": 5e-05, + "loss": 1.4877, + "num_input_tokens_seen": 155226140, + "step": 2320 + }, + { + "epoch": 0.2632624113475177, + "loss": 1.4266362190246582, + "loss_ce": 0.005737695377320051, + "loss_iou": 0.5859375, + "loss_num": 0.049072265625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 155226140, + "step": 2320 + }, + { + "epoch": 0.2633758865248227, + "grad_norm": 22.291460037231445, + "learning_rate": 5e-05, + "loss": 1.3347, + "num_input_tokens_seen": 155293712, + "step": 2321 + }, + { + "epoch": 0.2633758865248227, + "loss": 1.389613389968872, + "loss_ce": 0.007289228029549122, + "loss_iou": 0.58203125, + "loss_num": 0.0439453125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 155293712, + "step": 2321 + }, + { + "epoch": 0.26348936170212767, + "grad_norm": 20.97493553161621, + "learning_rate": 5e-05, + "loss": 1.2106, + "num_input_tokens_seen": 155360784, + "step": 2322 + }, + { + "epoch": 0.26348936170212767, + "loss": 1.1780407428741455, + "loss_ce": 0.005189194343984127, + "loss_iou": 0.51953125, + "loss_num": 0.0269775390625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 155360784, + "step": 2322 + }, + { + "epoch": 0.26360283687943264, + "grad_norm": 15.991686820983887, + "learning_rate": 5e-05, + "loss": 1.2595, + "num_input_tokens_seen": 155427964, + "step": 2323 + }, + { + "epoch": 0.26360283687943264, + "loss": 1.3260304927825928, + "loss_ce": 0.0047414060682058334, + "loss_iou": 0.546875, + "loss_num": 0.044921875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 155427964, + "step": 2323 + }, + { + "epoch": 0.26371631205673757, + "grad_norm": 26.11006736755371, + "learning_rate": 5e-05, + "loss": 1.3831, + "num_input_tokens_seen": 155495308, + "step": 2324 + }, + { + "epoch": 0.26371631205673757, + "loss": 1.4004358053207397, + "loss_ce": 0.006881140172481537, + "loss_iou": 0.59375, + "loss_num": 0.041259765625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 155495308, + "step": 2324 + }, + { + "epoch": 0.26382978723404255, + "grad_norm": 37.36357498168945, + "learning_rate": 5e-05, + "loss": 1.2995, + "num_input_tokens_seen": 155562344, + "step": 2325 + }, + { + "epoch": 0.26382978723404255, + "loss": 1.3867278099060059, + "loss_ce": 0.003915244713425636, + "loss_iou": 0.56640625, + "loss_num": 0.049560546875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 155562344, + "step": 2325 + }, + { + "epoch": 0.2639432624113475, + "grad_norm": 25.427871704101562, + "learning_rate": 5e-05, + "loss": 1.4618, + "num_input_tokens_seen": 155629468, + "step": 2326 + }, + { + "epoch": 0.2639432624113475, + "loss": 1.4048162698745728, + "loss_ce": 0.004425631836056709, + "loss_iou": 0.6015625, + "loss_num": 0.039306640625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 155629468, + "step": 2326 + }, + { + "epoch": 0.2640567375886525, + "grad_norm": 91.82071685791016, + "learning_rate": 5e-05, + "loss": 1.3215, + "num_input_tokens_seen": 155696156, + "step": 2327 + }, + { + "epoch": 0.2640567375886525, + "loss": 1.3953545093536377, + "loss_ce": 0.0037530134432017803, + "loss_iou": 0.5390625, + "loss_num": 0.06298828125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 155696156, + "step": 2327 + }, + { + "epoch": 0.2641702127659574, + "grad_norm": 22.067651748657227, + "learning_rate": 5e-05, + "loss": 1.3354, + "num_input_tokens_seen": 155763156, + "step": 2328 + }, + { + "epoch": 0.2641702127659574, + "loss": 1.31868577003479, + "loss_ce": 0.003256058320403099, + "loss_iou": 0.54296875, + "loss_num": 0.04541015625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 155763156, + "step": 2328 + }, + { + "epoch": 0.2642836879432624, + "grad_norm": 38.47707748413086, + "learning_rate": 5e-05, + "loss": 1.2894, + "num_input_tokens_seen": 155830232, + "step": 2329 + }, + { + "epoch": 0.2642836879432624, + "loss": 1.355919361114502, + "loss_ce": 0.0048451172187924385, + "loss_iou": 0.58203125, + "loss_num": 0.03759765625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 155830232, + "step": 2329 + }, + { + "epoch": 0.2643971631205674, + "grad_norm": 24.97203826904297, + "learning_rate": 5e-05, + "loss": 1.6932, + "num_input_tokens_seen": 155897560, + "step": 2330 + }, + { + "epoch": 0.2643971631205674, + "loss": 1.5553443431854248, + "loss_ce": 0.0065161604434251785, + "loss_iou": 0.66015625, + "loss_num": 0.045166015625, + "loss_xval": 1.546875, + "num_input_tokens_seen": 155897560, + "step": 2330 + }, + { + "epoch": 0.26451063829787236, + "grad_norm": 20.854171752929688, + "learning_rate": 5e-05, + "loss": 1.3855, + "num_input_tokens_seen": 155964572, + "step": 2331 + }, + { + "epoch": 0.26451063829787236, + "loss": 1.2639095783233643, + "loss_ce": 0.004143872298300266, + "loss_iou": 0.515625, + "loss_num": 0.045166015625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 155964572, + "step": 2331 + }, + { + "epoch": 0.2646241134751773, + "grad_norm": 24.602556228637695, + "learning_rate": 5e-05, + "loss": 1.3562, + "num_input_tokens_seen": 156031556, + "step": 2332 + }, + { + "epoch": 0.2646241134751773, + "loss": 1.3749054670333862, + "loss_ce": 0.005276531912386417, + "loss_iou": 0.59375, + "loss_num": 0.036865234375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 156031556, + "step": 2332 + }, + { + "epoch": 0.26473758865248226, + "grad_norm": 37.272403717041016, + "learning_rate": 5e-05, + "loss": 1.4212, + "num_input_tokens_seen": 156098556, + "step": 2333 + }, + { + "epoch": 0.26473758865248226, + "loss": 1.4258602857589722, + "loss_ce": 0.00545016722753644, + "loss_iou": 0.578125, + "loss_num": 0.052001953125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 156098556, + "step": 2333 + }, + { + "epoch": 0.26485106382978724, + "grad_norm": 23.053119659423828, + "learning_rate": 5e-05, + "loss": 1.6079, + "num_input_tokens_seen": 156165516, + "step": 2334 + }, + { + "epoch": 0.26485106382978724, + "loss": 1.5604965686798096, + "loss_ce": 0.003855883376672864, + "loss_iou": 0.66796875, + "loss_num": 0.043701171875, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 156165516, + "step": 2334 + }, + { + "epoch": 0.2649645390070922, + "grad_norm": 16.722606658935547, + "learning_rate": 5e-05, + "loss": 1.1915, + "num_input_tokens_seen": 156231492, + "step": 2335 + }, + { + "epoch": 0.2649645390070922, + "loss": 1.0721116065979004, + "loss_ce": 0.004728779196739197, + "loss_iou": 0.435546875, + "loss_num": 0.0390625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 156231492, + "step": 2335 + }, + { + "epoch": 0.26507801418439714, + "grad_norm": 31.67003059387207, + "learning_rate": 5e-05, + "loss": 1.2391, + "num_input_tokens_seen": 156298748, + "step": 2336 + }, + { + "epoch": 0.26507801418439714, + "loss": 1.1970241069793701, + "loss_ce": 0.004153024405241013, + "loss_iou": 0.5078125, + "loss_num": 0.035400390625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 156298748, + "step": 2336 + }, + { + "epoch": 0.2651914893617021, + "grad_norm": 23.45975112915039, + "learning_rate": 5e-05, + "loss": 1.2006, + "num_input_tokens_seen": 156364824, + "step": 2337 + }, + { + "epoch": 0.2651914893617021, + "loss": 1.052272081375122, + "loss_ce": 0.009059127420186996, + "loss_iou": 0.447265625, + "loss_num": 0.030029296875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 156364824, + "step": 2337 + }, + { + "epoch": 0.2653049645390071, + "grad_norm": 35.537513732910156, + "learning_rate": 5e-05, + "loss": 1.265, + "num_input_tokens_seen": 156432052, + "step": 2338 + }, + { + "epoch": 0.2653049645390071, + "loss": 1.2905478477478027, + "loss_ce": 0.0044149840250611305, + "loss_iou": 0.546875, + "loss_num": 0.038818359375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 156432052, + "step": 2338 + }, + { + "epoch": 0.26541843971631207, + "grad_norm": 32.23825454711914, + "learning_rate": 5e-05, + "loss": 1.6351, + "num_input_tokens_seen": 156498036, + "step": 2339 + }, + { + "epoch": 0.26541843971631207, + "loss": 1.502480149269104, + "loss_ce": 0.006386379711329937, + "loss_iou": 0.671875, + "loss_num": 0.03125, + "loss_xval": 1.5, + "num_input_tokens_seen": 156498036, + "step": 2339 + }, + { + "epoch": 0.265531914893617, + "grad_norm": 14.869471549987793, + "learning_rate": 5e-05, + "loss": 1.2816, + "num_input_tokens_seen": 156565024, + "step": 2340 + }, + { + "epoch": 0.265531914893617, + "loss": 1.3316712379455566, + "loss_ce": 0.007452401332557201, + "loss_iou": 0.55859375, + "loss_num": 0.0419921875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 156565024, + "step": 2340 + }, + { + "epoch": 0.26564539007092197, + "grad_norm": 27.430370330810547, + "learning_rate": 5e-05, + "loss": 1.3215, + "num_input_tokens_seen": 156631676, + "step": 2341 + }, + { + "epoch": 0.26564539007092197, + "loss": 1.1728754043579102, + "loss_ce": 0.006127288565039635, + "loss_iou": 0.482421875, + "loss_num": 0.04052734375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 156631676, + "step": 2341 + }, + { + "epoch": 0.26575886524822695, + "grad_norm": 16.68491554260254, + "learning_rate": 5e-05, + "loss": 1.2703, + "num_input_tokens_seen": 156697504, + "step": 2342 + }, + { + "epoch": 0.26575886524822695, + "loss": 1.219269871711731, + "loss_ce": 0.007355756592005491, + "loss_iou": 0.515625, + "loss_num": 0.03564453125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 156697504, + "step": 2342 + }, + { + "epoch": 0.26587234042553193, + "grad_norm": 21.967533111572266, + "learning_rate": 5e-05, + "loss": 1.3582, + "num_input_tokens_seen": 156764372, + "step": 2343 + }, + { + "epoch": 0.26587234042553193, + "loss": 1.3118562698364258, + "loss_ce": 0.006680441088974476, + "loss_iou": 0.50390625, + "loss_num": 0.059326171875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 156764372, + "step": 2343 + }, + { + "epoch": 0.2659858156028369, + "grad_norm": 30.741912841796875, + "learning_rate": 5e-05, + "loss": 1.2848, + "num_input_tokens_seen": 156832252, + "step": 2344 + }, + { + "epoch": 0.2659858156028369, + "loss": 1.3270424604415894, + "loss_ce": 0.002823719521984458, + "loss_iou": 0.5625, + "loss_num": 0.03955078125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 156832252, + "step": 2344 + }, + { + "epoch": 0.26609929078014183, + "grad_norm": 35.48207092285156, + "learning_rate": 5e-05, + "loss": 1.4696, + "num_input_tokens_seen": 156898760, + "step": 2345 + }, + { + "epoch": 0.26609929078014183, + "loss": 1.3555189371109009, + "loss_ce": 0.005909531842917204, + "loss_iou": 0.578125, + "loss_num": 0.03857421875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 156898760, + "step": 2345 + }, + { + "epoch": 0.2662127659574468, + "grad_norm": 26.50129508972168, + "learning_rate": 5e-05, + "loss": 1.4283, + "num_input_tokens_seen": 156965516, + "step": 2346 + }, + { + "epoch": 0.2662127659574468, + "loss": 1.3663041591644287, + "loss_ce": 0.0047318958677351475, + "loss_iou": 0.57421875, + "loss_num": 0.042724609375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 156965516, + "step": 2346 + }, + { + "epoch": 0.2663262411347518, + "grad_norm": 31.3304500579834, + "learning_rate": 5e-05, + "loss": 1.2484, + "num_input_tokens_seen": 157032720, + "step": 2347 + }, + { + "epoch": 0.2663262411347518, + "loss": 1.2012690305709839, + "loss_ce": 0.006933074444532394, + "loss_iou": 0.5078125, + "loss_num": 0.03564453125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 157032720, + "step": 2347 + }, + { + "epoch": 0.26643971631205676, + "grad_norm": 28.647903442382812, + "learning_rate": 5e-05, + "loss": 1.4878, + "num_input_tokens_seen": 157099396, + "step": 2348 + }, + { + "epoch": 0.26643971631205676, + "loss": 1.7212190628051758, + "loss_ce": 0.006375353783369064, + "loss_iou": 0.70703125, + "loss_num": 0.06005859375, + "loss_xval": 1.71875, + "num_input_tokens_seen": 157099396, + "step": 2348 + }, + { + "epoch": 0.2665531914893617, + "grad_norm": 27.82228660583496, + "learning_rate": 5e-05, + "loss": 1.2446, + "num_input_tokens_seen": 157165992, + "step": 2349 + }, + { + "epoch": 0.2665531914893617, + "loss": 1.2682905197143555, + "loss_ce": 0.0041304780170321465, + "loss_iou": 0.5390625, + "loss_num": 0.037109375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 157165992, + "step": 2349 + }, + { + "epoch": 0.26666666666666666, + "grad_norm": 19.71327781677246, + "learning_rate": 5e-05, + "loss": 1.3909, + "num_input_tokens_seen": 157232588, + "step": 2350 + }, + { + "epoch": 0.26666666666666666, + "loss": 1.5303584337234497, + "loss_ce": 0.006920941174030304, + "loss_iou": 0.6640625, + "loss_num": 0.0390625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 157232588, + "step": 2350 + }, + { + "epoch": 0.26678014184397164, + "grad_norm": 17.44993019104004, + "learning_rate": 5e-05, + "loss": 1.0317, + "num_input_tokens_seen": 157299484, + "step": 2351 + }, + { + "epoch": 0.26678014184397164, + "loss": 1.1596826314926147, + "loss_ce": 0.008315449580550194, + "loss_iou": 0.51171875, + "loss_num": 0.026123046875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 157299484, + "step": 2351 + }, + { + "epoch": 0.2668936170212766, + "grad_norm": 29.194229125976562, + "learning_rate": 5e-05, + "loss": 1.4173, + "num_input_tokens_seen": 157367104, + "step": 2352 + }, + { + "epoch": 0.2668936170212766, + "loss": 1.3153955936431885, + "loss_ce": 0.002895654644817114, + "loss_iou": 0.5546875, + "loss_num": 0.040771484375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 157367104, + "step": 2352 + }, + { + "epoch": 0.26700709219858154, + "grad_norm": 41.94213104248047, + "learning_rate": 5e-05, + "loss": 1.3771, + "num_input_tokens_seen": 157434592, + "step": 2353 + }, + { + "epoch": 0.26700709219858154, + "loss": 1.3011455535888672, + "loss_ce": 0.005247170105576515, + "loss_iou": 0.5703125, + "loss_num": 0.0308837890625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 157434592, + "step": 2353 + }, + { + "epoch": 0.2671205673758865, + "grad_norm": 29.429927825927734, + "learning_rate": 5e-05, + "loss": 1.6408, + "num_input_tokens_seen": 157501932, + "step": 2354 + }, + { + "epoch": 0.2671205673758865, + "loss": 1.6174473762512207, + "loss_ce": 0.0070958686992526054, + "loss_iou": 0.703125, + "loss_num": 0.041259765625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 157501932, + "step": 2354 + }, + { + "epoch": 0.2672340425531915, + "grad_norm": 18.552410125732422, + "learning_rate": 5e-05, + "loss": 1.5776, + "num_input_tokens_seen": 157566836, + "step": 2355 + }, + { + "epoch": 0.2672340425531915, + "loss": 1.574362874031067, + "loss_ce": 0.008444865234196186, + "loss_iou": 0.640625, + "loss_num": 0.05712890625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 157566836, + "step": 2355 + }, + { + "epoch": 0.2673475177304965, + "grad_norm": 25.76849365234375, + "learning_rate": 5e-05, + "loss": 1.4407, + "num_input_tokens_seen": 157633688, + "step": 2356 + }, + { + "epoch": 0.2673475177304965, + "loss": 1.408008337020874, + "loss_ce": 0.0037114694714546204, + "loss_iou": 0.609375, + "loss_num": 0.03662109375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 157633688, + "step": 2356 + }, + { + "epoch": 0.2674609929078014, + "grad_norm": 29.258460998535156, + "learning_rate": 5e-05, + "loss": 1.3627, + "num_input_tokens_seen": 157701444, + "step": 2357 + }, + { + "epoch": 0.2674609929078014, + "loss": 1.4073779582977295, + "loss_ce": 0.006987432949244976, + "loss_iou": 0.578125, + "loss_num": 0.048095703125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 157701444, + "step": 2357 + }, + { + "epoch": 0.2675744680851064, + "grad_norm": 17.57285499572754, + "learning_rate": 5e-05, + "loss": 1.1889, + "num_input_tokens_seen": 157768016, + "step": 2358 + }, + { + "epoch": 0.2675744680851064, + "loss": 1.0267763137817383, + "loss_ce": 0.003827127628028393, + "loss_iou": 0.47265625, + "loss_num": 0.0152587890625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 157768016, + "step": 2358 + }, + { + "epoch": 0.26768794326241135, + "grad_norm": 25.206802368164062, + "learning_rate": 5e-05, + "loss": 1.2755, + "num_input_tokens_seen": 157834972, + "step": 2359 + }, + { + "epoch": 0.26768794326241135, + "loss": 1.202797293663025, + "loss_ce": 0.004555071704089642, + "loss_iou": 0.515625, + "loss_num": 0.03369140625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 157834972, + "step": 2359 + }, + { + "epoch": 0.26780141843971633, + "grad_norm": 45.846961975097656, + "learning_rate": 5e-05, + "loss": 1.3768, + "num_input_tokens_seen": 157901480, + "step": 2360 + }, + { + "epoch": 0.26780141843971633, + "loss": 1.5584638118743896, + "loss_ce": 0.005729469005018473, + "loss_iou": 0.640625, + "loss_num": 0.05517578125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 157901480, + "step": 2360 + }, + { + "epoch": 0.26791489361702125, + "grad_norm": 21.421052932739258, + "learning_rate": 5e-05, + "loss": 1.4839, + "num_input_tokens_seen": 157967612, + "step": 2361 + }, + { + "epoch": 0.26791489361702125, + "loss": 1.763384461402893, + "loss_ce": 0.005571947433054447, + "loss_iou": 0.73046875, + "loss_num": 0.06005859375, + "loss_xval": 1.7578125, + "num_input_tokens_seen": 157967612, + "step": 2361 + }, + { + "epoch": 0.26802836879432623, + "grad_norm": 21.68344497680664, + "learning_rate": 5e-05, + "loss": 1.2877, + "num_input_tokens_seen": 158034516, + "step": 2362 + }, + { + "epoch": 0.26802836879432623, + "loss": 1.2214409112930298, + "loss_ce": 0.006597180850803852, + "loss_iou": 0.515625, + "loss_num": 0.036376953125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 158034516, + "step": 2362 + }, + { + "epoch": 0.2681418439716312, + "grad_norm": 40.57746887207031, + "learning_rate": 5e-05, + "loss": 1.3468, + "num_input_tokens_seen": 158101824, + "step": 2363 + }, + { + "epoch": 0.2681418439716312, + "loss": 1.2873953580856323, + "loss_ce": 0.0066335792653262615, + "loss_iou": 0.50390625, + "loss_num": 0.054931640625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 158101824, + "step": 2363 + }, + { + "epoch": 0.2682553191489362, + "grad_norm": 27.15469741821289, + "learning_rate": 5e-05, + "loss": 1.236, + "num_input_tokens_seen": 158168080, + "step": 2364 + }, + { + "epoch": 0.2682553191489362, + "loss": 1.253347635269165, + "loss_ce": 0.0043241651728749275, + "loss_iou": 0.55078125, + "loss_num": 0.0302734375, + "loss_xval": 1.25, + "num_input_tokens_seen": 158168080, + "step": 2364 + }, + { + "epoch": 0.2683687943262411, + "grad_norm": 29.261940002441406, + "learning_rate": 5e-05, + "loss": 1.2801, + "num_input_tokens_seen": 158236016, + "step": 2365 + }, + { + "epoch": 0.2683687943262411, + "loss": 1.1426548957824707, + "loss_ce": 0.005447900854051113, + "loss_iou": 0.48046875, + "loss_num": 0.035400390625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 158236016, + "step": 2365 + }, + { + "epoch": 0.2684822695035461, + "grad_norm": 28.27101707458496, + "learning_rate": 5e-05, + "loss": 1.2922, + "num_input_tokens_seen": 158303240, + "step": 2366 + }, + { + "epoch": 0.2684822695035461, + "loss": 1.3648231029510498, + "loss_ce": 0.005448108538985252, + "loss_iou": 0.59375, + "loss_num": 0.0341796875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 158303240, + "step": 2366 + }, + { + "epoch": 0.26859574468085107, + "grad_norm": 36.76707458496094, + "learning_rate": 5e-05, + "loss": 1.0678, + "num_input_tokens_seen": 158369208, + "step": 2367 + }, + { + "epoch": 0.26859574468085107, + "loss": 1.0954864025115967, + "loss_ce": 0.005154365673661232, + "loss_iou": 0.49609375, + "loss_num": 0.0196533203125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 158369208, + "step": 2367 + }, + { + "epoch": 0.26870921985815605, + "grad_norm": 28.226608276367188, + "learning_rate": 5e-05, + "loss": 1.1742, + "num_input_tokens_seen": 158435404, + "step": 2368 + }, + { + "epoch": 0.26870921985815605, + "loss": 1.128639817237854, + "loss_ce": 0.006813650019466877, + "loss_iou": 0.455078125, + "loss_num": 0.041748046875, + "loss_xval": 1.125, + "num_input_tokens_seen": 158435404, + "step": 2368 + }, + { + "epoch": 0.26882269503546097, + "grad_norm": 13.105355262756348, + "learning_rate": 5e-05, + "loss": 1.2708, + "num_input_tokens_seen": 158502516, + "step": 2369 + }, + { + "epoch": 0.26882269503546097, + "loss": 1.3943047523498535, + "loss_ce": 0.004656297154724598, + "loss_iou": 0.53515625, + "loss_num": 0.06396484375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 158502516, + "step": 2369 + }, + { + "epoch": 0.26893617021276595, + "grad_norm": 25.035247802734375, + "learning_rate": 5e-05, + "loss": 1.292, + "num_input_tokens_seen": 158569764, + "step": 2370 + }, + { + "epoch": 0.26893617021276595, + "loss": 1.3388113975524902, + "loss_ce": 0.0043386248871684074, + "loss_iou": 0.54296875, + "loss_num": 0.0498046875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 158569764, + "step": 2370 + }, + { + "epoch": 0.2690496453900709, + "grad_norm": 15.778913497924805, + "learning_rate": 5e-05, + "loss": 1.0443, + "num_input_tokens_seen": 158635156, + "step": 2371 + }, + { + "epoch": 0.2690496453900709, + "loss": 0.9971939325332642, + "loss_ce": 0.0052505964413285255, + "loss_iou": 0.392578125, + "loss_num": 0.041259765625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 158635156, + "step": 2371 + }, + { + "epoch": 0.2691631205673759, + "grad_norm": 27.770593643188477, + "learning_rate": 5e-05, + "loss": 1.1312, + "num_input_tokens_seen": 158703088, + "step": 2372 + }, + { + "epoch": 0.2691631205673759, + "loss": 1.2111117839813232, + "loss_ce": 0.0049349586479365826, + "loss_iou": 0.515625, + "loss_num": 0.03466796875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 158703088, + "step": 2372 + }, + { + "epoch": 0.2692765957446808, + "grad_norm": 41.868736267089844, + "learning_rate": 5e-05, + "loss": 1.3318, + "num_input_tokens_seen": 158770312, + "step": 2373 + }, + { + "epoch": 0.2692765957446808, + "loss": 1.307919979095459, + "loss_ce": 0.004697449970990419, + "loss_iou": 0.59765625, + "loss_num": 0.0218505859375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 158770312, + "step": 2373 + }, + { + "epoch": 0.2693900709219858, + "grad_norm": 28.10552215576172, + "learning_rate": 5e-05, + "loss": 1.6284, + "num_input_tokens_seen": 158838360, + "step": 2374 + }, + { + "epoch": 0.2693900709219858, + "loss": 1.722900390625, + "loss_ce": 0.0061033666133880615, + "loss_iou": 0.765625, + "loss_num": 0.037353515625, + "loss_xval": 1.71875, + "num_input_tokens_seen": 158838360, + "step": 2374 + }, + { + "epoch": 0.2695035460992908, + "grad_norm": 31.206958770751953, + "learning_rate": 5e-05, + "loss": 1.2984, + "num_input_tokens_seen": 158905132, + "step": 2375 + }, + { + "epoch": 0.2695035460992908, + "loss": 1.2153499126434326, + "loss_ce": 0.005388912744820118, + "loss_iou": 0.5234375, + "loss_num": 0.033203125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 158905132, + "step": 2375 + }, + { + "epoch": 0.26961702127659576, + "grad_norm": 36.62555694580078, + "learning_rate": 5e-05, + "loss": 1.3874, + "num_input_tokens_seen": 158972504, + "step": 2376 + }, + { + "epoch": 0.26961702127659576, + "loss": 1.4569077491760254, + "loss_ce": 0.008177257142961025, + "loss_iou": 0.57421875, + "loss_num": 0.060302734375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 158972504, + "step": 2376 + }, + { + "epoch": 0.26973049645390074, + "grad_norm": 22.119661331176758, + "learning_rate": 5e-05, + "loss": 1.0734, + "num_input_tokens_seen": 159039488, + "step": 2377 + }, + { + "epoch": 0.26973049645390074, + "loss": 1.1531811952590942, + "loss_ce": 0.005231998860836029, + "loss_iou": 0.46484375, + "loss_num": 0.04345703125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 159039488, + "step": 2377 + }, + { + "epoch": 0.26984397163120566, + "grad_norm": 22.483232498168945, + "learning_rate": 5e-05, + "loss": 1.2226, + "num_input_tokens_seen": 159106440, + "step": 2378 + }, + { + "epoch": 0.26984397163120566, + "loss": 1.1561033725738525, + "loss_ce": 0.0057127708569169044, + "loss_iou": 0.484375, + "loss_num": 0.03662109375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 159106440, + "step": 2378 + }, + { + "epoch": 0.26995744680851064, + "grad_norm": 40.25628662109375, + "learning_rate": 5e-05, + "loss": 1.2854, + "num_input_tokens_seen": 159172704, + "step": 2379 + }, + { + "epoch": 0.26995744680851064, + "loss": 1.3450913429260254, + "loss_ce": 0.008177336305379868, + "loss_iou": 0.54296875, + "loss_num": 0.05078125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 159172704, + "step": 2379 + }, + { + "epoch": 0.2700709219858156, + "grad_norm": 29.194746017456055, + "learning_rate": 5e-05, + "loss": 1.228, + "num_input_tokens_seen": 159238628, + "step": 2380 + }, + { + "epoch": 0.2700709219858156, + "loss": 1.385955572128296, + "loss_ce": 0.006072822492569685, + "loss_iou": 0.58203125, + "loss_num": 0.04345703125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 159238628, + "step": 2380 + }, + { + "epoch": 0.2701843971631206, + "grad_norm": 18.953868865966797, + "learning_rate": 5e-05, + "loss": 1.3488, + "num_input_tokens_seen": 159305964, + "step": 2381 + }, + { + "epoch": 0.2701843971631206, + "loss": 1.3431105613708496, + "loss_ce": 0.006684678606688976, + "loss_iou": 0.54296875, + "loss_num": 0.050048828125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 159305964, + "step": 2381 + }, + { + "epoch": 0.2702978723404255, + "grad_norm": 27.74530792236328, + "learning_rate": 5e-05, + "loss": 1.3893, + "num_input_tokens_seen": 159372324, + "step": 2382 + }, + { + "epoch": 0.2702978723404255, + "loss": 1.521047830581665, + "loss_ce": 0.0054228585213422775, + "loss_iou": 0.61328125, + "loss_num": 0.058349609375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 159372324, + "step": 2382 + }, + { + "epoch": 0.2704113475177305, + "grad_norm": 37.6505241394043, + "learning_rate": 5e-05, + "loss": 1.3789, + "num_input_tokens_seen": 159438884, + "step": 2383 + }, + { + "epoch": 0.2704113475177305, + "loss": 1.3502897024154663, + "loss_ce": 0.0036100177094340324, + "loss_iou": 0.578125, + "loss_num": 0.038330078125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 159438884, + "step": 2383 + }, + { + "epoch": 0.27052482269503547, + "grad_norm": 23.83799934387207, + "learning_rate": 5e-05, + "loss": 1.4573, + "num_input_tokens_seen": 159505908, + "step": 2384 + }, + { + "epoch": 0.27052482269503547, + "loss": 1.4734916687011719, + "loss_ce": 0.005718137137591839, + "loss_iou": 0.61328125, + "loss_num": 0.047607421875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 159505908, + "step": 2384 + }, + { + "epoch": 0.27063829787234045, + "grad_norm": 18.128868103027344, + "learning_rate": 5e-05, + "loss": 1.1754, + "num_input_tokens_seen": 159573460, + "step": 2385 + }, + { + "epoch": 0.27063829787234045, + "loss": 1.1827183961868286, + "loss_ce": 0.004984060302376747, + "loss_iou": 0.498046875, + "loss_num": 0.035888671875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 159573460, + "step": 2385 + }, + { + "epoch": 0.2707517730496454, + "grad_norm": 24.58871078491211, + "learning_rate": 5e-05, + "loss": 1.2483, + "num_input_tokens_seen": 159640328, + "step": 2386 + }, + { + "epoch": 0.2707517730496454, + "loss": 1.4338798522949219, + "loss_ce": 0.0032157283276319504, + "loss_iou": 0.58984375, + "loss_num": 0.049560546875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 159640328, + "step": 2386 + }, + { + "epoch": 0.27086524822695035, + "grad_norm": 29.95407485961914, + "learning_rate": 5e-05, + "loss": 1.5197, + "num_input_tokens_seen": 159707564, + "step": 2387 + }, + { + "epoch": 0.27086524822695035, + "loss": 1.510270118713379, + "loss_ce": 0.006363881751894951, + "loss_iou": 0.63671875, + "loss_num": 0.0458984375, + "loss_xval": 1.5, + "num_input_tokens_seen": 159707564, + "step": 2387 + }, + { + "epoch": 0.27097872340425533, + "grad_norm": 25.224201202392578, + "learning_rate": 5e-05, + "loss": 1.3112, + "num_input_tokens_seen": 159774344, + "step": 2388 + }, + { + "epoch": 0.27097872340425533, + "loss": 1.2800935506820679, + "loss_ce": 0.004702904727309942, + "loss_iou": 0.5546875, + "loss_num": 0.033203125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 159774344, + "step": 2388 + }, + { + "epoch": 0.2710921985815603, + "grad_norm": 27.393543243408203, + "learning_rate": 5e-05, + "loss": 1.3471, + "num_input_tokens_seen": 159840836, + "step": 2389 + }, + { + "epoch": 0.2710921985815603, + "loss": 1.1670857667922974, + "loss_ce": 0.0035114968195557594, + "loss_iou": 0.5, + "loss_num": 0.033203125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 159840836, + "step": 2389 + }, + { + "epoch": 0.27120567375886523, + "grad_norm": 27.778635025024414, + "learning_rate": 5e-05, + "loss": 1.3284, + "num_input_tokens_seen": 159908192, + "step": 2390 + }, + { + "epoch": 0.27120567375886523, + "loss": 1.4260311126708984, + "loss_ce": 0.004156215116381645, + "loss_iou": 0.62109375, + "loss_num": 0.03564453125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 159908192, + "step": 2390 + }, + { + "epoch": 0.2713191489361702, + "grad_norm": 42.58454513549805, + "learning_rate": 5e-05, + "loss": 1.2738, + "num_input_tokens_seen": 159974376, + "step": 2391 + }, + { + "epoch": 0.2713191489361702, + "loss": 1.1955361366271973, + "loss_ce": 0.014872045256197453, + "loss_iou": 0.46484375, + "loss_num": 0.050048828125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 159974376, + "step": 2391 + }, + { + "epoch": 0.2714326241134752, + "grad_norm": 28.795312881469727, + "learning_rate": 5e-05, + "loss": 1.3995, + "num_input_tokens_seen": 160040240, + "step": 2392 + }, + { + "epoch": 0.2714326241134752, + "loss": 1.3903968334197998, + "loss_ce": 0.004166355822235346, + "loss_iou": 0.60546875, + "loss_num": 0.03515625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 160040240, + "step": 2392 + }, + { + "epoch": 0.27154609929078016, + "grad_norm": 16.413209915161133, + "learning_rate": 5e-05, + "loss": 1.3842, + "num_input_tokens_seen": 160108488, + "step": 2393 + }, + { + "epoch": 0.27154609929078016, + "loss": 1.1997932195663452, + "loss_ce": 0.004969033412635326, + "loss_iou": 0.466796875, + "loss_num": 0.05224609375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 160108488, + "step": 2393 + }, + { + "epoch": 0.2716595744680851, + "grad_norm": 26.431682586669922, + "learning_rate": 5e-05, + "loss": 1.5778, + "num_input_tokens_seen": 160173724, + "step": 2394 + }, + { + "epoch": 0.2716595744680851, + "loss": 1.402040719985962, + "loss_ce": 0.0050681172870099545, + "loss_iou": 0.578125, + "loss_num": 0.047607421875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 160173724, + "step": 2394 + }, + { + "epoch": 0.27177304964539006, + "grad_norm": 21.814865112304688, + "learning_rate": 5e-05, + "loss": 1.6864, + "num_input_tokens_seen": 160239988, + "step": 2395 + }, + { + "epoch": 0.27177304964539006, + "loss": 1.935031533241272, + "loss_ce": 0.003848693100735545, + "loss_iou": 0.76171875, + "loss_num": 0.08203125, + "loss_xval": 1.9296875, + "num_input_tokens_seen": 160239988, + "step": 2395 + }, + { + "epoch": 0.27188652482269504, + "grad_norm": 22.603275299072266, + "learning_rate": 5e-05, + "loss": 1.2845, + "num_input_tokens_seen": 160307160, + "step": 2396 + }, + { + "epoch": 0.27188652482269504, + "loss": 1.2502360343933105, + "loss_ce": 0.002189223188906908, + "loss_iou": 0.53125, + "loss_num": 0.03759765625, + "loss_xval": 1.25, + "num_input_tokens_seen": 160307160, + "step": 2396 + }, + { + "epoch": 0.272, + "grad_norm": 29.67661476135254, + "learning_rate": 5e-05, + "loss": 1.1648, + "num_input_tokens_seen": 160375264, + "step": 2397 + }, + { + "epoch": 0.272, + "loss": 1.02250337600708, + "loss_ce": 0.004192808642983437, + "loss_iou": 0.44921875, + "loss_num": 0.02392578125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 160375264, + "step": 2397 + }, + { + "epoch": 0.27211347517730494, + "grad_norm": 26.851621627807617, + "learning_rate": 5e-05, + "loss": 1.2371, + "num_input_tokens_seen": 160442444, + "step": 2398 + }, + { + "epoch": 0.27211347517730494, + "loss": 1.2859352827072144, + "loss_ce": 0.004685232415795326, + "loss_iou": 0.51171875, + "loss_num": 0.0517578125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 160442444, + "step": 2398 + }, + { + "epoch": 0.2722269503546099, + "grad_norm": 24.898422241210938, + "learning_rate": 5e-05, + "loss": 1.0648, + "num_input_tokens_seen": 160508544, + "step": 2399 + }, + { + "epoch": 0.2722269503546099, + "loss": 1.1119446754455566, + "loss_ce": 0.010626313276588917, + "loss_iou": 0.44921875, + "loss_num": 0.041015625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 160508544, + "step": 2399 + }, + { + "epoch": 0.2723404255319149, + "grad_norm": 18.592243194580078, + "learning_rate": 5e-05, + "loss": 1.3579, + "num_input_tokens_seen": 160576584, + "step": 2400 + }, + { + "epoch": 0.2723404255319149, + "loss": 1.3629040718078613, + "loss_ce": 0.005970562342554331, + "loss_iou": 0.5625, + "loss_num": 0.046630859375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 160576584, + "step": 2400 + }, + { + "epoch": 0.2724539007092199, + "grad_norm": 19.13413429260254, + "learning_rate": 5e-05, + "loss": 1.1342, + "num_input_tokens_seen": 160644012, + "step": 2401 + }, + { + "epoch": 0.2724539007092199, + "loss": 1.2986286878585815, + "loss_ce": 0.009077953174710274, + "loss_iou": 0.5078125, + "loss_num": 0.05517578125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 160644012, + "step": 2401 + }, + { + "epoch": 0.2725673758865248, + "grad_norm": 25.795068740844727, + "learning_rate": 5e-05, + "loss": 1.0637, + "num_input_tokens_seen": 160710452, + "step": 2402 + }, + { + "epoch": 0.2725673758865248, + "loss": 0.9566891193389893, + "loss_ce": 0.007958637550473213, + "loss_iou": 0.427734375, + "loss_num": 0.018310546875, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 160710452, + "step": 2402 + }, + { + "epoch": 0.2726808510638298, + "grad_norm": 31.100025177001953, + "learning_rate": 5e-05, + "loss": 1.3475, + "num_input_tokens_seen": 160777772, + "step": 2403 + }, + { + "epoch": 0.2726808510638298, + "loss": 1.2257695198059082, + "loss_ce": 0.006531313993036747, + "loss_iou": 0.52734375, + "loss_num": 0.033203125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 160777772, + "step": 2403 + }, + { + "epoch": 0.27279432624113475, + "grad_norm": 23.45183563232422, + "learning_rate": 5e-05, + "loss": 1.4099, + "num_input_tokens_seen": 160844604, + "step": 2404 + }, + { + "epoch": 0.27279432624113475, + "loss": 1.4374094009399414, + "loss_ce": 0.009674940258264542, + "loss_iou": 0.58984375, + "loss_num": 0.049560546875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 160844604, + "step": 2404 + }, + { + "epoch": 0.27290780141843973, + "grad_norm": 21.805265426635742, + "learning_rate": 5e-05, + "loss": 1.1438, + "num_input_tokens_seen": 160911284, + "step": 2405 + }, + { + "epoch": 0.27290780141843973, + "loss": 1.1330506801605225, + "loss_ce": 0.0056093186140060425, + "loss_iou": 0.46875, + "loss_num": 0.03759765625, + "loss_xval": 1.125, + "num_input_tokens_seen": 160911284, + "step": 2405 + }, + { + "epoch": 0.27302127659574466, + "grad_norm": 24.764198303222656, + "learning_rate": 5e-05, + "loss": 1.2384, + "num_input_tokens_seen": 160978384, + "step": 2406 + }, + { + "epoch": 0.27302127659574466, + "loss": 1.1582112312316895, + "loss_ce": 0.003914301283657551, + "loss_iou": 0.5078125, + "loss_num": 0.027587890625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 160978384, + "step": 2406 + }, + { + "epoch": 0.27313475177304963, + "grad_norm": 49.4785041809082, + "learning_rate": 5e-05, + "loss": 1.297, + "num_input_tokens_seen": 161045096, + "step": 2407 + }, + { + "epoch": 0.27313475177304963, + "loss": 1.3605952262878418, + "loss_ce": 0.0036616679280996323, + "loss_iou": 0.6171875, + "loss_num": 0.0245361328125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 161045096, + "step": 2407 + }, + { + "epoch": 0.2732482269503546, + "grad_norm": 16.99403190612793, + "learning_rate": 5e-05, + "loss": 1.5174, + "num_input_tokens_seen": 161110736, + "step": 2408 + }, + { + "epoch": 0.2732482269503546, + "loss": 1.2363154888153076, + "loss_ce": 0.006457054987549782, + "loss_iou": 0.4765625, + "loss_num": 0.0556640625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 161110736, + "step": 2408 + }, + { + "epoch": 0.2733617021276596, + "grad_norm": 21.61829376220703, + "learning_rate": 5e-05, + "loss": 1.5266, + "num_input_tokens_seen": 161177660, + "step": 2409 + }, + { + "epoch": 0.2733617021276596, + "loss": 1.5268980264663696, + "loss_ce": 0.006390238180756569, + "loss_iou": 0.62890625, + "loss_num": 0.052490234375, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 161177660, + "step": 2409 + }, + { + "epoch": 0.2734751773049645, + "grad_norm": 15.712510108947754, + "learning_rate": 5e-05, + "loss": 1.1581, + "num_input_tokens_seen": 161245048, + "step": 2410 + }, + { + "epoch": 0.2734751773049645, + "loss": 1.105473518371582, + "loss_ce": 0.009129520505666733, + "loss_iou": 0.4453125, + "loss_num": 0.04150390625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 161245048, + "step": 2410 + }, + { + "epoch": 0.2735886524822695, + "grad_norm": 25.065841674804688, + "learning_rate": 5e-05, + "loss": 1.2877, + "num_input_tokens_seen": 161312104, + "step": 2411 + }, + { + "epoch": 0.2735886524822695, + "loss": 1.3084697723388672, + "loss_ce": 0.0028057005256414413, + "loss_iou": 0.5703125, + "loss_num": 0.033935546875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 161312104, + "step": 2411 + }, + { + "epoch": 0.27370212765957447, + "grad_norm": 20.38715362548828, + "learning_rate": 5e-05, + "loss": 1.1501, + "num_input_tokens_seen": 161380192, + "step": 2412 + }, + { + "epoch": 0.27370212765957447, + "loss": 1.1314713954925537, + "loss_ce": 0.005494843237102032, + "loss_iou": 0.470703125, + "loss_num": 0.036865234375, + "loss_xval": 1.125, + "num_input_tokens_seen": 161380192, + "step": 2412 + }, + { + "epoch": 0.27381560283687945, + "grad_norm": 31.24575424194336, + "learning_rate": 5e-05, + "loss": 1.442, + "num_input_tokens_seen": 161447776, + "step": 2413 + }, + { + "epoch": 0.27381560283687945, + "loss": 1.3904898166656494, + "loss_ce": 0.006700707133859396, + "loss_iou": 0.59765625, + "loss_num": 0.037841796875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 161447776, + "step": 2413 + }, + { + "epoch": 0.2739290780141844, + "grad_norm": 25.129112243652344, + "learning_rate": 5e-05, + "loss": 1.6103, + "num_input_tokens_seen": 161515344, + "step": 2414 + }, + { + "epoch": 0.2739290780141844, + "loss": 1.6110379695892334, + "loss_ce": 0.004592670127749443, + "loss_iou": 0.67578125, + "loss_num": 0.051025390625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 161515344, + "step": 2414 + }, + { + "epoch": 0.27404255319148935, + "grad_norm": 15.30652904510498, + "learning_rate": 5e-05, + "loss": 1.448, + "num_input_tokens_seen": 161581436, + "step": 2415 + }, + { + "epoch": 0.27404255319148935, + "loss": 1.4204295873641968, + "loss_ce": 0.002460857154801488, + "loss_iou": 0.58984375, + "loss_num": 0.0478515625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 161581436, + "step": 2415 + }, + { + "epoch": 0.2741560283687943, + "grad_norm": 29.89098358154297, + "learning_rate": 5e-05, + "loss": 1.4311, + "num_input_tokens_seen": 161649112, + "step": 2416 + }, + { + "epoch": 0.2741560283687943, + "loss": 1.4206054210662842, + "loss_ce": 0.006542973220348358, + "loss_iou": 0.5859375, + "loss_num": 0.0478515625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 161649112, + "step": 2416 + }, + { + "epoch": 0.2742695035460993, + "grad_norm": 23.5784969329834, + "learning_rate": 5e-05, + "loss": 1.6012, + "num_input_tokens_seen": 161716380, + "step": 2417 + }, + { + "epoch": 0.2742695035460993, + "loss": 1.6723589897155762, + "loss_ce": 0.007319982163608074, + "loss_iou": 0.68359375, + "loss_num": 0.059326171875, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 161716380, + "step": 2417 + }, + { + "epoch": 0.2743829787234043, + "grad_norm": 16.632503509521484, + "learning_rate": 5e-05, + "loss": 1.2372, + "num_input_tokens_seen": 161783916, + "step": 2418 + }, + { + "epoch": 0.2743829787234043, + "loss": 1.3116952180862427, + "loss_ce": 0.004077982623130083, + "loss_iou": 0.578125, + "loss_num": 0.0306396484375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 161783916, + "step": 2418 + }, + { + "epoch": 0.2744964539007092, + "grad_norm": 25.581947326660156, + "learning_rate": 5e-05, + "loss": 1.457, + "num_input_tokens_seen": 161850720, + "step": 2419 + }, + { + "epoch": 0.2744964539007092, + "loss": 1.3649277687072754, + "loss_ce": 0.00674290768802166, + "loss_iou": 0.52734375, + "loss_num": 0.060302734375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 161850720, + "step": 2419 + }, + { + "epoch": 0.2746099290780142, + "grad_norm": 31.57598304748535, + "learning_rate": 5e-05, + "loss": 1.3134, + "num_input_tokens_seen": 161918340, + "step": 2420 + }, + { + "epoch": 0.2746099290780142, + "loss": 1.1710896492004395, + "loss_ce": 0.005074057728052139, + "loss_iou": 0.50390625, + "loss_num": 0.0322265625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 161918340, + "step": 2420 + }, + { + "epoch": 0.27472340425531916, + "grad_norm": 27.736921310424805, + "learning_rate": 5e-05, + "loss": 1.2043, + "num_input_tokens_seen": 161984544, + "step": 2421 + }, + { + "epoch": 0.27472340425531916, + "loss": 1.1908684968948364, + "loss_ce": 0.006786518730223179, + "loss_iou": 0.51953125, + "loss_num": 0.0291748046875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 161984544, + "step": 2421 + }, + { + "epoch": 0.27483687943262414, + "grad_norm": 29.116613388061523, + "learning_rate": 5e-05, + "loss": 1.1513, + "num_input_tokens_seen": 162049872, + "step": 2422 + }, + { + "epoch": 0.27483687943262414, + "loss": 0.8913017511367798, + "loss_ce": 0.005788424052298069, + "loss_iou": 0.330078125, + "loss_num": 0.045166015625, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 162049872, + "step": 2422 + }, + { + "epoch": 0.27495035460992906, + "grad_norm": 29.201868057250977, + "learning_rate": 5e-05, + "loss": 1.312, + "num_input_tokens_seen": 162115672, + "step": 2423 + }, + { + "epoch": 0.27495035460992906, + "loss": 1.3340762853622437, + "loss_ce": 0.005951305851340294, + "loss_iou": 0.5546875, + "loss_num": 0.04296875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 162115672, + "step": 2423 + }, + { + "epoch": 0.27506382978723404, + "grad_norm": 12.655155181884766, + "learning_rate": 5e-05, + "loss": 1.1451, + "num_input_tokens_seen": 162183696, + "step": 2424 + }, + { + "epoch": 0.27506382978723404, + "loss": 1.068515658378601, + "loss_ce": 0.006030957214534283, + "loss_iou": 0.42578125, + "loss_num": 0.042236328125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 162183696, + "step": 2424 + }, + { + "epoch": 0.275177304964539, + "grad_norm": 35.71864700317383, + "learning_rate": 5e-05, + "loss": 1.1639, + "num_input_tokens_seen": 162251132, + "step": 2425 + }, + { + "epoch": 0.275177304964539, + "loss": 1.2372961044311523, + "loss_ce": 0.006827365141361952, + "loss_iou": 0.52734375, + "loss_num": 0.035888671875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 162251132, + "step": 2425 + }, + { + "epoch": 0.275290780141844, + "grad_norm": 26.644977569580078, + "learning_rate": 5e-05, + "loss": 1.3491, + "num_input_tokens_seen": 162317900, + "step": 2426 + }, + { + "epoch": 0.275290780141844, + "loss": 1.356087327003479, + "loss_ce": 0.004524793475866318, + "loss_iou": 0.56640625, + "loss_num": 0.04443359375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 162317900, + "step": 2426 + }, + { + "epoch": 0.2754042553191489, + "grad_norm": 19.55602264404297, + "learning_rate": 5e-05, + "loss": 1.4159, + "num_input_tokens_seen": 162384700, + "step": 2427 + }, + { + "epoch": 0.2754042553191489, + "loss": 1.4604763984680176, + "loss_ce": 0.00735128577798605, + "loss_iou": 0.625, + "loss_num": 0.041015625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 162384700, + "step": 2427 + }, + { + "epoch": 0.2755177304964539, + "grad_norm": 18.470869064331055, + "learning_rate": 5e-05, + "loss": 1.535, + "num_input_tokens_seen": 162451504, + "step": 2428 + }, + { + "epoch": 0.2755177304964539, + "loss": 1.610544204711914, + "loss_ce": 0.009469978511333466, + "loss_iou": 0.6171875, + "loss_num": 0.072265625, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 162451504, + "step": 2428 + }, + { + "epoch": 0.2756312056737589, + "grad_norm": 24.071916580200195, + "learning_rate": 5e-05, + "loss": 1.3198, + "num_input_tokens_seen": 162518948, + "step": 2429 + }, + { + "epoch": 0.2756312056737589, + "loss": 1.4025537967681885, + "loss_ce": 0.005092877894639969, + "loss_iou": 0.609375, + "loss_num": 0.0361328125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 162518948, + "step": 2429 + }, + { + "epoch": 0.27574468085106385, + "grad_norm": 29.649572372436523, + "learning_rate": 5e-05, + "loss": 1.1499, + "num_input_tokens_seen": 162586524, + "step": 2430 + }, + { + "epoch": 0.27574468085106385, + "loss": 1.1488571166992188, + "loss_ce": 0.005790775641798973, + "loss_iou": 0.5, + "loss_num": 0.0284423828125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 162586524, + "step": 2430 + }, + { + "epoch": 0.2758581560283688, + "grad_norm": 33.34419250488281, + "learning_rate": 5e-05, + "loss": 1.3081, + "num_input_tokens_seen": 162652484, + "step": 2431 + }, + { + "epoch": 0.2758581560283688, + "loss": 1.2538514137268066, + "loss_ce": 0.006781080272048712, + "loss_iou": 0.5234375, + "loss_num": 0.039306640625, + "loss_xval": 1.25, + "num_input_tokens_seen": 162652484, + "step": 2431 + }, + { + "epoch": 0.27597163120567375, + "grad_norm": 29.206296920776367, + "learning_rate": 5e-05, + "loss": 1.2368, + "num_input_tokens_seen": 162719080, + "step": 2432 + }, + { + "epoch": 0.27597163120567375, + "loss": 1.073976755142212, + "loss_ce": 0.006105724256485701, + "loss_iou": 0.451171875, + "loss_num": 0.033203125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 162719080, + "step": 2432 + }, + { + "epoch": 0.27608510638297873, + "grad_norm": 24.566627502441406, + "learning_rate": 5e-05, + "loss": 1.3604, + "num_input_tokens_seen": 162785912, + "step": 2433 + }, + { + "epoch": 0.27608510638297873, + "loss": 1.4269282817840576, + "loss_ce": 0.0031001167371869087, + "loss_iou": 0.6015625, + "loss_num": 0.04345703125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 162785912, + "step": 2433 + }, + { + "epoch": 0.2761985815602837, + "grad_norm": 25.033775329589844, + "learning_rate": 5e-05, + "loss": 1.3991, + "num_input_tokens_seen": 162853012, + "step": 2434 + }, + { + "epoch": 0.2761985815602837, + "loss": 1.2496421337127686, + "loss_ce": 0.005013204179704189, + "loss_iou": 0.51171875, + "loss_num": 0.044677734375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 162853012, + "step": 2434 + }, + { + "epoch": 0.27631205673758863, + "grad_norm": 31.25701332092285, + "learning_rate": 5e-05, + "loss": 1.4075, + "num_input_tokens_seen": 162920264, + "step": 2435 + }, + { + "epoch": 0.27631205673758863, + "loss": 1.5513983964920044, + "loss_ce": 0.008429668843746185, + "loss_iou": 0.67578125, + "loss_num": 0.038818359375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 162920264, + "step": 2435 + }, + { + "epoch": 0.2764255319148936, + "grad_norm": 76.53617095947266, + "learning_rate": 5e-05, + "loss": 1.3235, + "num_input_tokens_seen": 162987532, + "step": 2436 + }, + { + "epoch": 0.2764255319148936, + "loss": 1.2236535549163818, + "loss_ce": 0.004415293224155903, + "loss_iou": 0.54296875, + "loss_num": 0.0267333984375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 162987532, + "step": 2436 + }, + { + "epoch": 0.2765390070921986, + "grad_norm": 15.77505874633789, + "learning_rate": 5e-05, + "loss": 1.3642, + "num_input_tokens_seen": 163054344, + "step": 2437 + }, + { + "epoch": 0.2765390070921986, + "loss": 1.5644057989120483, + "loss_ce": 0.004835519008338451, + "loss_iou": 0.62109375, + "loss_num": 0.06396484375, + "loss_xval": 1.5625, + "num_input_tokens_seen": 163054344, + "step": 2437 + }, + { + "epoch": 0.27665248226950356, + "grad_norm": 30.963863372802734, + "learning_rate": 5e-05, + "loss": 1.3271, + "num_input_tokens_seen": 163121600, + "step": 2438 + }, + { + "epoch": 0.27665248226950356, + "loss": 1.5595555305480957, + "loss_ce": 0.0043798102997243404, + "loss_iou": 0.6484375, + "loss_num": 0.0517578125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 163121600, + "step": 2438 + }, + { + "epoch": 0.2767659574468085, + "grad_norm": 44.15012741088867, + "learning_rate": 5e-05, + "loss": 1.4485, + "num_input_tokens_seen": 163188340, + "step": 2439 + }, + { + "epoch": 0.2767659574468085, + "loss": 1.4949766397476196, + "loss_ce": 0.004742310848087072, + "loss_iou": 0.6484375, + "loss_num": 0.037841796875, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 163188340, + "step": 2439 + }, + { + "epoch": 0.27687943262411346, + "grad_norm": 31.956573486328125, + "learning_rate": 5e-05, + "loss": 1.7174, + "num_input_tokens_seen": 163255812, + "step": 2440 + }, + { + "epoch": 0.27687943262411346, + "loss": 1.5554317235946655, + "loss_ce": 0.007580139208585024, + "loss_iou": 0.6640625, + "loss_num": 0.04443359375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 163255812, + "step": 2440 + }, + { + "epoch": 0.27699290780141844, + "grad_norm": 20.61301612854004, + "learning_rate": 5e-05, + "loss": 1.405, + "num_input_tokens_seen": 163323340, + "step": 2441 + }, + { + "epoch": 0.27699290780141844, + "loss": 1.2809258699417114, + "loss_ce": 0.006511781830340624, + "loss_iou": 0.55859375, + "loss_num": 0.031982421875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 163323340, + "step": 2441 + }, + { + "epoch": 0.2771063829787234, + "grad_norm": 35.67294692993164, + "learning_rate": 5e-05, + "loss": 1.1594, + "num_input_tokens_seen": 163390092, + "step": 2442 + }, + { + "epoch": 0.2771063829787234, + "loss": 1.1389546394348145, + "loss_ce": 0.005165583919733763, + "loss_iou": 0.486328125, + "loss_num": 0.0322265625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 163390092, + "step": 2442 + }, + { + "epoch": 0.27721985815602834, + "grad_norm": 23.46400260925293, + "learning_rate": 5e-05, + "loss": 1.6213, + "num_input_tokens_seen": 163457912, + "step": 2443 + }, + { + "epoch": 0.27721985815602834, + "loss": 1.5821244716644287, + "loss_ce": 0.0049760909751057625, + "loss_iou": 0.69140625, + "loss_num": 0.03857421875, + "loss_xval": 1.578125, + "num_input_tokens_seen": 163457912, + "step": 2443 + }, + { + "epoch": 0.2773333333333333, + "grad_norm": 23.250717163085938, + "learning_rate": 5e-05, + "loss": 1.1259, + "num_input_tokens_seen": 163524984, + "step": 2444 + }, + { + "epoch": 0.2773333333333333, + "loss": 0.9866396188735962, + "loss_ce": 0.0056826150976121426, + "loss_iou": 0.404296875, + "loss_num": 0.03466796875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 163524984, + "step": 2444 + }, + { + "epoch": 0.2774468085106383, + "grad_norm": 85.11067962646484, + "learning_rate": 5e-05, + "loss": 1.1885, + "num_input_tokens_seen": 163591564, + "step": 2445 + }, + { + "epoch": 0.2774468085106383, + "loss": 1.1133747100830078, + "loss_ce": 0.005357715301215649, + "loss_iou": 0.484375, + "loss_num": 0.0281982421875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 163591564, + "step": 2445 + }, + { + "epoch": 0.2775602836879433, + "grad_norm": 41.4667854309082, + "learning_rate": 5e-05, + "loss": 1.5259, + "num_input_tokens_seen": 163658292, + "step": 2446 + }, + { + "epoch": 0.2775602836879433, + "loss": 1.718261480331421, + "loss_ce": 0.0043942066840827465, + "loss_iou": 0.6484375, + "loss_num": 0.0830078125, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 163658292, + "step": 2446 + }, + { + "epoch": 0.27767375886524825, + "grad_norm": 29.07421875, + "learning_rate": 5e-05, + "loss": 1.534, + "num_input_tokens_seen": 163725440, + "step": 2447 + }, + { + "epoch": 0.27767375886524825, + "loss": 1.5130252838134766, + "loss_ce": 0.004724492318928242, + "loss_iou": 0.64453125, + "loss_num": 0.04345703125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 163725440, + "step": 2447 + }, + { + "epoch": 0.2777872340425532, + "grad_norm": 18.38947296142578, + "learning_rate": 5e-05, + "loss": 1.1788, + "num_input_tokens_seen": 163791412, + "step": 2448 + }, + { + "epoch": 0.2777872340425532, + "loss": 1.3658113479614258, + "loss_ce": 0.006436370313167572, + "loss_iou": 0.546875, + "loss_num": 0.052978515625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 163791412, + "step": 2448 + }, + { + "epoch": 0.27790070921985816, + "grad_norm": 16.2481632232666, + "learning_rate": 5e-05, + "loss": 1.1776, + "num_input_tokens_seen": 163858052, + "step": 2449 + }, + { + "epoch": 0.27790070921985816, + "loss": 1.0682129859924316, + "loss_ce": 0.00278333923779428, + "loss_iou": 0.423828125, + "loss_num": 0.04345703125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 163858052, + "step": 2449 + }, + { + "epoch": 0.27801418439716313, + "grad_norm": 34.33193588256836, + "learning_rate": 5e-05, + "loss": 1.5019, + "num_input_tokens_seen": 163925144, + "step": 2450 + }, + { + "epoch": 0.27801418439716313, + "loss": 1.6108782291412354, + "loss_ce": 0.00443287892267108, + "loss_iou": 0.65234375, + "loss_num": 0.06005859375, + "loss_xval": 1.609375, + "num_input_tokens_seen": 163925144, + "step": 2450 + }, + { + "epoch": 0.2781276595744681, + "grad_norm": 23.91892433166504, + "learning_rate": 5e-05, + "loss": 1.5581, + "num_input_tokens_seen": 163992660, + "step": 2451 + }, + { + "epoch": 0.2781276595744681, + "loss": 1.5961499214172363, + "loss_ce": 0.007771036587655544, + "loss_iou": 0.66796875, + "loss_num": 0.049560546875, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 163992660, + "step": 2451 + }, + { + "epoch": 0.27824113475177303, + "grad_norm": 35.97998809814453, + "learning_rate": 5e-05, + "loss": 1.1034, + "num_input_tokens_seen": 164059612, + "step": 2452 + }, + { + "epoch": 0.27824113475177303, + "loss": 1.1235380172729492, + "loss_ce": 0.00537394592538476, + "loss_iou": 0.4921875, + "loss_num": 0.0263671875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 164059612, + "step": 2452 + }, + { + "epoch": 0.278354609929078, + "grad_norm": 26.94955062866211, + "learning_rate": 5e-05, + "loss": 1.4064, + "num_input_tokens_seen": 164126292, + "step": 2453 + }, + { + "epoch": 0.278354609929078, + "loss": 1.200158715248108, + "loss_ce": 0.012170374393463135, + "loss_iou": 0.53125, + "loss_num": 0.025146484375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 164126292, + "step": 2453 + }, + { + "epoch": 0.278468085106383, + "grad_norm": 41.63530731201172, + "learning_rate": 5e-05, + "loss": 1.4851, + "num_input_tokens_seen": 164193448, + "step": 2454 + }, + { + "epoch": 0.278468085106383, + "loss": 1.5033857822418213, + "loss_ce": 0.002897451166063547, + "loss_iou": 0.6328125, + "loss_num": 0.046630859375, + "loss_xval": 1.5, + "num_input_tokens_seen": 164193448, + "step": 2454 + }, + { + "epoch": 0.27858156028368797, + "grad_norm": 32.02942657470703, + "learning_rate": 5e-05, + "loss": 1.6611, + "num_input_tokens_seen": 164260408, + "step": 2455 + }, + { + "epoch": 0.27858156028368797, + "loss": 1.5203098058700562, + "loss_ce": 0.00566141540184617, + "loss_iou": 0.65625, + "loss_num": 0.041015625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 164260408, + "step": 2455 + }, + { + "epoch": 0.2786950354609929, + "grad_norm": 25.008289337158203, + "learning_rate": 5e-05, + "loss": 1.1892, + "num_input_tokens_seen": 164327736, + "step": 2456 + }, + { + "epoch": 0.2786950354609929, + "loss": 1.147188425064087, + "loss_ce": 0.006075242999941111, + "loss_iou": 0.458984375, + "loss_num": 0.044921875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 164327736, + "step": 2456 + }, + { + "epoch": 0.27880851063829787, + "grad_norm": 44.900718688964844, + "learning_rate": 5e-05, + "loss": 1.2711, + "num_input_tokens_seen": 164393780, + "step": 2457 + }, + { + "epoch": 0.27880851063829787, + "loss": 1.20956289768219, + "loss_ce": 0.0015550258103758097, + "loss_iou": 0.53125, + "loss_num": 0.0289306640625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 164393780, + "step": 2457 + }, + { + "epoch": 0.27892198581560285, + "grad_norm": 28.958885192871094, + "learning_rate": 5e-05, + "loss": 1.4311, + "num_input_tokens_seen": 164460652, + "step": 2458 + }, + { + "epoch": 0.27892198581560285, + "loss": 1.625955581665039, + "loss_ce": 0.006815043743699789, + "loss_iou": 0.6953125, + "loss_num": 0.045654296875, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 164460652, + "step": 2458 + }, + { + "epoch": 0.2790354609929078, + "grad_norm": 18.96322250366211, + "learning_rate": 5e-05, + "loss": 1.2771, + "num_input_tokens_seen": 164525804, + "step": 2459 + }, + { + "epoch": 0.2790354609929078, + "loss": 1.055408000946045, + "loss_ce": 0.008044692687690258, + "loss_iou": 0.4375, + "loss_num": 0.03466796875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 164525804, + "step": 2459 + }, + { + "epoch": 0.27914893617021275, + "grad_norm": 25.122861862182617, + "learning_rate": 5e-05, + "loss": 1.2889, + "num_input_tokens_seen": 164590936, + "step": 2460 + }, + { + "epoch": 0.27914893617021275, + "loss": 1.1361613273620605, + "loss_ce": 0.0057902950793504715, + "loss_iou": 0.486328125, + "loss_num": 0.031005859375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 164590936, + "step": 2460 + }, + { + "epoch": 0.2792624113475177, + "grad_norm": 28.502120971679688, + "learning_rate": 5e-05, + "loss": 1.4371, + "num_input_tokens_seen": 164657896, + "step": 2461 + }, + { + "epoch": 0.2792624113475177, + "loss": 1.517765998840332, + "loss_ce": 0.006779602728784084, + "loss_iou": 0.58203125, + "loss_num": 0.06884765625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 164657896, + "step": 2461 + }, + { + "epoch": 0.2793758865248227, + "grad_norm": 36.13401412963867, + "learning_rate": 5e-05, + "loss": 1.3577, + "num_input_tokens_seen": 164724748, + "step": 2462 + }, + { + "epoch": 0.2793758865248227, + "loss": 1.5208979845046997, + "loss_ce": 0.006249533034861088, + "loss_iou": 0.6171875, + "loss_num": 0.055908203125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 164724748, + "step": 2462 + }, + { + "epoch": 0.2794893617021277, + "grad_norm": 50.383262634277344, + "learning_rate": 5e-05, + "loss": 1.5414, + "num_input_tokens_seen": 164791060, + "step": 2463 + }, + { + "epoch": 0.2794893617021277, + "loss": 1.3958574533462524, + "loss_ce": 0.0071855392307043076, + "loss_iou": 0.58984375, + "loss_num": 0.04248046875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 164791060, + "step": 2463 + }, + { + "epoch": 0.2796028368794326, + "grad_norm": 19.251205444335938, + "learning_rate": 5e-05, + "loss": 1.2884, + "num_input_tokens_seen": 164858756, + "step": 2464 + }, + { + "epoch": 0.2796028368794326, + "loss": 1.342451572418213, + "loss_ce": 0.0060258107259869576, + "loss_iou": 0.55078125, + "loss_num": 0.046875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 164858756, + "step": 2464 + }, + { + "epoch": 0.2797163120567376, + "grad_norm": 42.85445785522461, + "learning_rate": 5e-05, + "loss": 1.23, + "num_input_tokens_seen": 164925848, + "step": 2465 + }, + { + "epoch": 0.2797163120567376, + "loss": 1.2793666124343872, + "loss_ce": 0.00788228027522564, + "loss_iou": 0.51953125, + "loss_num": 0.04638671875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 164925848, + "step": 2465 + }, + { + "epoch": 0.27982978723404256, + "grad_norm": 30.566699981689453, + "learning_rate": 5e-05, + "loss": 1.4654, + "num_input_tokens_seen": 164992880, + "step": 2466 + }, + { + "epoch": 0.27982978723404256, + "loss": 1.406442642211914, + "loss_ce": 0.007028556894510984, + "loss_iou": 0.59765625, + "loss_num": 0.04150390625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 164992880, + "step": 2466 + }, + { + "epoch": 0.27994326241134754, + "grad_norm": 16.47623062133789, + "learning_rate": 5e-05, + "loss": 1.3629, + "num_input_tokens_seen": 165060756, + "step": 2467 + }, + { + "epoch": 0.27994326241134754, + "loss": 1.4251329898834229, + "loss_ce": 0.008140853606164455, + "loss_iou": 0.5546875, + "loss_num": 0.061767578125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 165060756, + "step": 2467 + }, + { + "epoch": 0.28005673758865246, + "grad_norm": 28.18708610534668, + "learning_rate": 5e-05, + "loss": 1.1904, + "num_input_tokens_seen": 165128820, + "step": 2468 + }, + { + "epoch": 0.28005673758865246, + "loss": 1.1038897037506104, + "loss_ce": 0.007698281668126583, + "loss_iou": 0.48046875, + "loss_num": 0.0272216796875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 165128820, + "step": 2468 + }, + { + "epoch": 0.28017021276595744, + "grad_norm": 23.232133865356445, + "learning_rate": 5e-05, + "loss": 1.682, + "num_input_tokens_seen": 165195360, + "step": 2469 + }, + { + "epoch": 0.28017021276595744, + "loss": 1.9179296493530273, + "loss_ce": 0.010214789770543575, + "loss_iou": 0.74609375, + "loss_num": 0.0830078125, + "loss_xval": 1.90625, + "num_input_tokens_seen": 165195360, + "step": 2469 + }, + { + "epoch": 0.2802836879432624, + "grad_norm": 14.074429512023926, + "learning_rate": 5e-05, + "loss": 1.252, + "num_input_tokens_seen": 165262072, + "step": 2470 + }, + { + "epoch": 0.2802836879432624, + "loss": 1.2249727249145508, + "loss_ce": 0.0076876431703567505, + "loss_iou": 0.5078125, + "loss_num": 0.039794921875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 165262072, + "step": 2470 + }, + { + "epoch": 0.2803971631205674, + "grad_norm": 47.628841400146484, + "learning_rate": 5e-05, + "loss": 1.3938, + "num_input_tokens_seen": 165327944, + "step": 2471 + }, + { + "epoch": 0.2803971631205674, + "loss": 1.338058590888977, + "loss_ce": 0.0055390577763319016, + "loss_iou": 0.56640625, + "loss_num": 0.0400390625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 165327944, + "step": 2471 + }, + { + "epoch": 0.2805106382978723, + "grad_norm": 37.38849639892578, + "learning_rate": 5e-05, + "loss": 1.4212, + "num_input_tokens_seen": 165394748, + "step": 2472 + }, + { + "epoch": 0.2805106382978723, + "loss": 1.457658290863037, + "loss_ce": 0.007951295003294945, + "loss_iou": 0.625, + "loss_num": 0.03955078125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 165394748, + "step": 2472 + }, + { + "epoch": 0.2806241134751773, + "grad_norm": 22.308551788330078, + "learning_rate": 5e-05, + "loss": 1.2279, + "num_input_tokens_seen": 165461468, + "step": 2473 + }, + { + "epoch": 0.2806241134751773, + "loss": 1.3290811777114868, + "loss_ce": 0.0038858475163578987, + "loss_iou": 0.498046875, + "loss_num": 0.06591796875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 165461468, + "step": 2473 + }, + { + "epoch": 0.2807375886524823, + "grad_norm": 26.085790634155273, + "learning_rate": 5e-05, + "loss": 1.3146, + "num_input_tokens_seen": 165528104, + "step": 2474 + }, + { + "epoch": 0.2807375886524823, + "loss": 1.2065081596374512, + "loss_ce": 0.0019183428958058357, + "loss_iou": 0.52734375, + "loss_num": 0.0294189453125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 165528104, + "step": 2474 + }, + { + "epoch": 0.28085106382978725, + "grad_norm": 30.40644645690918, + "learning_rate": 5e-05, + "loss": 1.5091, + "num_input_tokens_seen": 165594036, + "step": 2475 + }, + { + "epoch": 0.28085106382978725, + "loss": 1.4515976905822754, + "loss_ce": 0.003355491440743208, + "loss_iou": 0.578125, + "loss_num": 0.0576171875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 165594036, + "step": 2475 + }, + { + "epoch": 0.2809645390070922, + "grad_norm": 23.420602798461914, + "learning_rate": 5e-05, + "loss": 1.4556, + "num_input_tokens_seen": 165661232, + "step": 2476 + }, + { + "epoch": 0.2809645390070922, + "loss": 1.5394244194030762, + "loss_ce": 0.011104075238108635, + "loss_iou": 0.6171875, + "loss_num": 0.0595703125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 165661232, + "step": 2476 + }, + { + "epoch": 0.28107801418439715, + "grad_norm": 21.185745239257812, + "learning_rate": 5e-05, + "loss": 1.4016, + "num_input_tokens_seen": 165727988, + "step": 2477 + }, + { + "epoch": 0.28107801418439715, + "loss": 1.3934048414230347, + "loss_ce": 0.005709472578018904, + "loss_iou": 0.6015625, + "loss_num": 0.03759765625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 165727988, + "step": 2477 + }, + { + "epoch": 0.28119148936170213, + "grad_norm": 22.811567306518555, + "learning_rate": 5e-05, + "loss": 1.5145, + "num_input_tokens_seen": 165794540, + "step": 2478 + }, + { + "epoch": 0.28119148936170213, + "loss": 1.457621693611145, + "loss_ce": 0.0064498367719352245, + "loss_iou": 0.58203125, + "loss_num": 0.057373046875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 165794540, + "step": 2478 + }, + { + "epoch": 0.2813049645390071, + "grad_norm": 36.752445220947266, + "learning_rate": 5e-05, + "loss": 1.2884, + "num_input_tokens_seen": 165862160, + "step": 2479 + }, + { + "epoch": 0.2813049645390071, + "loss": 1.2307053804397583, + "loss_ce": 0.0052415295504033566, + "loss_iou": 0.515625, + "loss_num": 0.0390625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 165862160, + "step": 2479 + }, + { + "epoch": 0.28141843971631203, + "grad_norm": 25.116975784301758, + "learning_rate": 5e-05, + "loss": 1.409, + "num_input_tokens_seen": 165927876, + "step": 2480 + }, + { + "epoch": 0.28141843971631203, + "loss": 1.411677598953247, + "loss_ce": 0.005427509546279907, + "loss_iou": 0.59765625, + "loss_num": 0.042724609375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 165927876, + "step": 2480 + }, + { + "epoch": 0.281531914893617, + "grad_norm": 10.379990577697754, + "learning_rate": 5e-05, + "loss": 1.4148, + "num_input_tokens_seen": 165994684, + "step": 2481 + }, + { + "epoch": 0.281531914893617, + "loss": 1.538538932800293, + "loss_ce": 0.0063122897408902645, + "loss_iou": 0.64453125, + "loss_num": 0.048095703125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 165994684, + "step": 2481 + }, + { + "epoch": 0.281645390070922, + "grad_norm": 18.410375595092773, + "learning_rate": 5e-05, + "loss": 1.054, + "num_input_tokens_seen": 166061784, + "step": 2482 + }, + { + "epoch": 0.281645390070922, + "loss": 0.9680804014205933, + "loss_ce": 0.002260112203657627, + "loss_iou": 0.431640625, + "loss_num": 0.0208740234375, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 166061784, + "step": 2482 + }, + { + "epoch": 0.28175886524822696, + "grad_norm": 19.606979370117188, + "learning_rate": 5e-05, + "loss": 1.2727, + "num_input_tokens_seen": 166128764, + "step": 2483 + }, + { + "epoch": 0.28175886524822696, + "loss": 1.3903861045837402, + "loss_ce": 0.005132170394062996, + "loss_iou": 0.57421875, + "loss_num": 0.04736328125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 166128764, + "step": 2483 + }, + { + "epoch": 0.28187234042553194, + "grad_norm": 27.718753814697266, + "learning_rate": 5e-05, + "loss": 1.3472, + "num_input_tokens_seen": 166195884, + "step": 2484 + }, + { + "epoch": 0.28187234042553194, + "loss": 1.3628077507019043, + "loss_ce": 0.005385741591453552, + "loss_iou": 0.58984375, + "loss_num": 0.035400390625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 166195884, + "step": 2484 + }, + { + "epoch": 0.28198581560283686, + "grad_norm": 26.179847717285156, + "learning_rate": 5e-05, + "loss": 1.4662, + "num_input_tokens_seen": 166262552, + "step": 2485 + }, + { + "epoch": 0.28198581560283686, + "loss": 1.43125319480896, + "loss_ce": 0.0069367303512990475, + "loss_iou": 0.6171875, + "loss_num": 0.03857421875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 166262552, + "step": 2485 + }, + { + "epoch": 0.28209929078014184, + "grad_norm": 31.46497917175293, + "learning_rate": 5e-05, + "loss": 1.3935, + "num_input_tokens_seen": 166328732, + "step": 2486 + }, + { + "epoch": 0.28209929078014184, + "loss": 1.4725120067596436, + "loss_ce": 0.008644754998385906, + "loss_iou": 0.56640625, + "loss_num": 0.06689453125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 166328732, + "step": 2486 + }, + { + "epoch": 0.2822127659574468, + "grad_norm": 31.079538345336914, + "learning_rate": 5e-05, + "loss": 1.3592, + "num_input_tokens_seen": 166396096, + "step": 2487 + }, + { + "epoch": 0.2822127659574468, + "loss": 1.3796894550323486, + "loss_ce": 0.014455143362283707, + "loss_iou": 0.5546875, + "loss_num": 0.051025390625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 166396096, + "step": 2487 + }, + { + "epoch": 0.2823262411347518, + "grad_norm": 38.919898986816406, + "learning_rate": 5e-05, + "loss": 1.4213, + "num_input_tokens_seen": 166462836, + "step": 2488 + }, + { + "epoch": 0.2823262411347518, + "loss": 1.5063272714614868, + "loss_ce": 0.005350656807422638, + "loss_iou": 0.6640625, + "loss_num": 0.035400390625, + "loss_xval": 1.5, + "num_input_tokens_seen": 166462836, + "step": 2488 + }, + { + "epoch": 0.2824397163120567, + "grad_norm": 27.711233139038086, + "learning_rate": 5e-05, + "loss": 1.6412, + "num_input_tokens_seen": 166530724, + "step": 2489 + }, + { + "epoch": 0.2824397163120567, + "loss": 1.4478020668029785, + "loss_ce": 0.005419245921075344, + "loss_iou": 0.640625, + "loss_num": 0.033203125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 166530724, + "step": 2489 + }, + { + "epoch": 0.2825531914893617, + "grad_norm": 17.564882278442383, + "learning_rate": 5e-05, + "loss": 1.0824, + "num_input_tokens_seen": 166598376, + "step": 2490 + }, + { + "epoch": 0.2825531914893617, + "loss": 1.2250782251358032, + "loss_ce": 0.007793086115270853, + "loss_iou": 0.494140625, + "loss_num": 0.046142578125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 166598376, + "step": 2490 + }, + { + "epoch": 0.2826666666666667, + "grad_norm": 19.568262100219727, + "learning_rate": 5e-05, + "loss": 1.3646, + "num_input_tokens_seen": 166665964, + "step": 2491 + }, + { + "epoch": 0.2826666666666667, + "loss": 1.4602513313293457, + "loss_ce": 0.005173189099878073, + "loss_iou": 0.609375, + "loss_num": 0.046630859375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 166665964, + "step": 2491 + }, + { + "epoch": 0.28278014184397166, + "grad_norm": 32.236122131347656, + "learning_rate": 5e-05, + "loss": 1.2496, + "num_input_tokens_seen": 166732508, + "step": 2492 + }, + { + "epoch": 0.28278014184397166, + "loss": 1.2626844644546509, + "loss_ce": 0.0058485399931669235, + "loss_iou": 0.54296875, + "loss_num": 0.034423828125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 166732508, + "step": 2492 + }, + { + "epoch": 0.2828936170212766, + "grad_norm": 24.35364532470703, + "learning_rate": 5e-05, + "loss": 1.4714, + "num_input_tokens_seen": 166798788, + "step": 2493 + }, + { + "epoch": 0.2828936170212766, + "loss": 1.4616575241088867, + "loss_ce": 0.0036498145200312138, + "loss_iou": 0.6015625, + "loss_num": 0.051025390625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 166798788, + "step": 2493 + }, + { + "epoch": 0.28300709219858156, + "grad_norm": 15.855512619018555, + "learning_rate": 5e-05, + "loss": 1.0825, + "num_input_tokens_seen": 166866376, + "step": 2494 + }, + { + "epoch": 0.28300709219858156, + "loss": 0.9845428466796875, + "loss_ce": 0.003585743485018611, + "loss_iou": 0.43359375, + "loss_num": 0.0230712890625, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 166866376, + "step": 2494 + }, + { + "epoch": 0.28312056737588653, + "grad_norm": 19.954256057739258, + "learning_rate": 5e-05, + "loss": 1.1309, + "num_input_tokens_seen": 166934076, + "step": 2495 + }, + { + "epoch": 0.28312056737588653, + "loss": 1.0938222408294678, + "loss_ce": 0.008373035117983818, + "loss_iou": 0.41796875, + "loss_num": 0.05029296875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 166934076, + "step": 2495 + }, + { + "epoch": 0.2832340425531915, + "grad_norm": 39.6898193359375, + "learning_rate": 5e-05, + "loss": 1.127, + "num_input_tokens_seen": 167000940, + "step": 2496 + }, + { + "epoch": 0.2832340425531915, + "loss": 1.1289396286010742, + "loss_ce": 0.004916211124509573, + "loss_iou": 0.5, + "loss_num": 0.0252685546875, + "loss_xval": 1.125, + "num_input_tokens_seen": 167000940, + "step": 2496 + }, + { + "epoch": 0.28334751773049643, + "grad_norm": 26.96700668334961, + "learning_rate": 5e-05, + "loss": 1.487, + "num_input_tokens_seen": 167069244, + "step": 2497 + }, + { + "epoch": 0.28334751773049643, + "loss": 1.5445442199707031, + "loss_ce": 0.007434837985783815, + "loss_iou": 0.65625, + "loss_num": 0.046142578125, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 167069244, + "step": 2497 + }, + { + "epoch": 0.2834609929078014, + "grad_norm": 29.7996883392334, + "learning_rate": 5e-05, + "loss": 1.2893, + "num_input_tokens_seen": 167136852, + "step": 2498 + }, + { + "epoch": 0.2834609929078014, + "loss": 1.137412667274475, + "loss_ce": 0.0026469812728464603, + "loss_iou": 0.47265625, + "loss_num": 0.0380859375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 167136852, + "step": 2498 + }, + { + "epoch": 0.2835744680851064, + "grad_norm": 20.61800765991211, + "learning_rate": 5e-05, + "loss": 1.3854, + "num_input_tokens_seen": 167203104, + "step": 2499 + }, + { + "epoch": 0.2835744680851064, + "loss": 1.4257150888442993, + "loss_ce": 0.005304884631186724, + "loss_iou": 0.58203125, + "loss_num": 0.0517578125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 167203104, + "step": 2499 + }, + { + "epoch": 0.28368794326241137, + "grad_norm": 34.08607864379883, + "learning_rate": 5e-05, + "loss": 1.4152, + "num_input_tokens_seen": 167269292, + "step": 2500 + }, + { + "epoch": 0.28368794326241137, + "eval_seeclick_CIoU": 0.3923880308866501, + "eval_seeclick_GIoU": 0.38324685394763947, + "eval_seeclick_IoU": 0.47440074384212494, + "eval_seeclick_MAE_all": 0.1574685424566269, + "eval_seeclick_MAE_h": 0.08097045868635178, + "eval_seeclick_MAE_w": 0.15931436419487, + "eval_seeclick_MAE_x_boxes": 0.24473823606967926, + "eval_seeclick_MAE_y_boxes": 0.09903767704963684, + "eval_seeclick_NUM_probability": 0.999625027179718, + "eval_seeclick_inside_bbox": 0.6770833432674408, + "eval_seeclick_loss": 2.5419704914093018, + "eval_seeclick_loss_ce": 0.015168075449764729, + "eval_seeclick_loss_iou": 0.89459228515625, + "eval_seeclick_loss_num": 0.15945816040039062, + "eval_seeclick_loss_xval": 2.5865478515625, + "eval_seeclick_runtime": 68.152, + "eval_seeclick_samples_per_second": 0.69, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 167269292, + "step": 2500 + }, + { + "epoch": 0.28368794326241137, + "eval_icons_CIoU": 0.4874896705150604, + "eval_icons_GIoU": 0.4747273772954941, + "eval_icons_IoU": 0.538964182138443, + "eval_icons_MAE_all": 0.1321338713169098, + "eval_icons_MAE_h": 0.08779094368219376, + "eval_icons_MAE_w": 0.09168993681669235, + "eval_icons_MAE_x_boxes": 0.12553509697318077, + "eval_icons_MAE_y_boxes": 0.11765136942267418, + "eval_icons_NUM_probability": 0.9999085664749146, + "eval_icons_inside_bbox": 0.65625, + "eval_icons_loss": 2.3772120475769043, + "eval_icons_loss_ce": 2.760270217549987e-05, + "eval_icons_loss_iou": 0.878662109375, + "eval_icons_loss_num": 0.11781692504882812, + "eval_icons_loss_xval": 2.34716796875, + "eval_icons_runtime": 72.028, + "eval_icons_samples_per_second": 0.694, + "eval_icons_steps_per_second": 0.028, + "num_input_tokens_seen": 167269292, + "step": 2500 + }, + { + "epoch": 0.28368794326241137, + "eval_screenspot_CIoU": 0.35845688978830975, + "eval_screenspot_GIoU": 0.3358543614546458, + "eval_screenspot_IoU": 0.42609166105588275, + "eval_screenspot_MAE_all": 0.19146422545115152, + "eval_screenspot_MAE_h": 0.14601909617582956, + "eval_screenspot_MAE_w": 0.19367433587710062, + "eval_screenspot_MAE_x_boxes": 0.2195958892504374, + "eval_screenspot_MAE_y_boxes": 0.11536581565936406, + "eval_screenspot_NUM_probability": 0.9998723467191061, + "eval_screenspot_inside_bbox": 0.6329166690508524, + "eval_screenspot_loss": 2.8910207748413086, + "eval_screenspot_loss_ce": 0.017607105274995167, + "eval_screenspot_loss_iou": 0.96630859375, + "eval_screenspot_loss_num": 0.20067342122395834, + "eval_screenspot_loss_xval": 2.935546875, + "eval_screenspot_runtime": 120.8263, + "eval_screenspot_samples_per_second": 0.737, + "eval_screenspot_steps_per_second": 0.025, + "num_input_tokens_seen": 167269292, + "step": 2500 + }, + { + "epoch": 0.28368794326241137, + "eval_compot_CIoU": 0.3427489995956421, + "eval_compot_GIoU": 0.3189292401075363, + "eval_compot_IoU": 0.42021502554416656, + "eval_compot_MAE_all": 0.1957329660654068, + "eval_compot_MAE_h": 0.11256306990981102, + "eval_compot_MAE_w": 0.2304139882326126, + "eval_compot_MAE_x_boxes": 0.1966693475842476, + "eval_compot_MAE_y_boxes": 0.09484490752220154, + "eval_compot_NUM_probability": 0.9998696744441986, + "eval_compot_inside_bbox": 0.6145833432674408, + "eval_compot_loss": 2.975559711456299, + "eval_compot_loss_ce": 0.005683279596269131, + "eval_compot_loss_iou": 0.986328125, + "eval_compot_loss_num": 0.21435546875, + "eval_compot_loss_xval": 3.04296875, + "eval_compot_runtime": 69.8417, + "eval_compot_samples_per_second": 0.716, + "eval_compot_steps_per_second": 0.029, + "num_input_tokens_seen": 167269292, + "step": 2500 + }, + { + "epoch": 0.28368794326241137, + "loss": 2.892819404602051, + "loss_ce": 0.006100646685808897, + "loss_iou": 0.94921875, + "loss_num": 0.197265625, + "loss_xval": 2.890625, + "num_input_tokens_seen": 167269292, + "step": 2500 + }, + { + "epoch": 0.2838014184397163, + "grad_norm": 29.91407012939453, + "learning_rate": 5e-05, + "loss": 1.499, + "num_input_tokens_seen": 167336044, + "step": 2501 + }, + { + "epoch": 0.2838014184397163, + "loss": 1.5150647163391113, + "loss_ce": 0.005299033131450415, + "loss_iou": 0.6015625, + "loss_num": 0.06103515625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 167336044, + "step": 2501 + }, + { + "epoch": 0.28391489361702127, + "grad_norm": 39.20782470703125, + "learning_rate": 5e-05, + "loss": 1.4981, + "num_input_tokens_seen": 167403024, + "step": 2502 + }, + { + "epoch": 0.28391489361702127, + "loss": 1.392333745956421, + "loss_ce": 0.006103334482759237, + "loss_iou": 0.55859375, + "loss_num": 0.05322265625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 167403024, + "step": 2502 + }, + { + "epoch": 0.28402836879432625, + "grad_norm": 20.571237564086914, + "learning_rate": 5e-05, + "loss": 1.4764, + "num_input_tokens_seen": 167470580, + "step": 2503 + }, + { + "epoch": 0.28402836879432625, + "loss": 1.4923412799835205, + "loss_ce": 0.008942870423197746, + "loss_iou": 0.63671875, + "loss_num": 0.041748046875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 167470580, + "step": 2503 + }, + { + "epoch": 0.2841418439716312, + "grad_norm": 24.57952308654785, + "learning_rate": 5e-05, + "loss": 1.1404, + "num_input_tokens_seen": 167536800, + "step": 2504 + }, + { + "epoch": 0.2841418439716312, + "loss": 1.2287688255310059, + "loss_ce": 0.009042183868587017, + "loss_iou": 0.478515625, + "loss_num": 0.05224609375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 167536800, + "step": 2504 + }, + { + "epoch": 0.28425531914893615, + "grad_norm": 35.00624084472656, + "learning_rate": 5e-05, + "loss": 1.4147, + "num_input_tokens_seen": 167603604, + "step": 2505 + }, + { + "epoch": 0.28425531914893615, + "loss": 1.5855166912078857, + "loss_ce": 0.007391669321805239, + "loss_iou": 0.63671875, + "loss_num": 0.060302734375, + "loss_xval": 1.578125, + "num_input_tokens_seen": 167603604, + "step": 2505 + }, + { + "epoch": 0.2843687943262411, + "grad_norm": 19.434799194335938, + "learning_rate": 5e-05, + "loss": 1.5285, + "num_input_tokens_seen": 167670296, + "step": 2506 + }, + { + "epoch": 0.2843687943262411, + "loss": 1.4172146320343018, + "loss_ce": 0.004616939462721348, + "loss_iou": 0.59765625, + "loss_num": 0.044189453125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 167670296, + "step": 2506 + }, + { + "epoch": 0.2844822695035461, + "grad_norm": 41.30204772949219, + "learning_rate": 5e-05, + "loss": 1.4496, + "num_input_tokens_seen": 167737644, + "step": 2507 + }, + { + "epoch": 0.2844822695035461, + "loss": 1.7241100072860718, + "loss_ce": 0.008289707824587822, + "loss_iou": 0.6953125, + "loss_num": 0.0654296875, + "loss_xval": 1.71875, + "num_input_tokens_seen": 167737644, + "step": 2507 + }, + { + "epoch": 0.2845957446808511, + "grad_norm": 29.301671981811523, + "learning_rate": 5e-05, + "loss": 1.3293, + "num_input_tokens_seen": 167804712, + "step": 2508 + }, + { + "epoch": 0.2845957446808511, + "loss": 1.226935625076294, + "loss_ce": 0.005255932919681072, + "loss_iou": 0.52734375, + "loss_num": 0.03271484375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 167804712, + "step": 2508 + }, + { + "epoch": 0.284709219858156, + "grad_norm": 23.8344669342041, + "learning_rate": 5e-05, + "loss": 1.374, + "num_input_tokens_seen": 167871252, + "step": 2509 + }, + { + "epoch": 0.284709219858156, + "loss": 1.5252187252044678, + "loss_ce": 0.006175762973725796, + "loss_iou": 0.640625, + "loss_num": 0.047119140625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 167871252, + "step": 2509 + }, + { + "epoch": 0.284822695035461, + "grad_norm": 21.547306060791016, + "learning_rate": 5e-05, + "loss": 1.1502, + "num_input_tokens_seen": 167937896, + "step": 2510 + }, + { + "epoch": 0.284822695035461, + "loss": 0.979196310043335, + "loss_ce": 0.0033662260975688696, + "loss_iou": 0.416015625, + "loss_num": 0.0289306640625, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 167937896, + "step": 2510 + }, + { + "epoch": 0.28493617021276596, + "grad_norm": 29.693397521972656, + "learning_rate": 5e-05, + "loss": 1.4201, + "num_input_tokens_seen": 168005860, + "step": 2511 + }, + { + "epoch": 0.28493617021276596, + "loss": 1.452519178390503, + "loss_ce": 0.0028120046481490135, + "loss_iou": 0.63671875, + "loss_num": 0.035888671875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 168005860, + "step": 2511 + }, + { + "epoch": 0.28504964539007094, + "grad_norm": 31.398263931274414, + "learning_rate": 5e-05, + "loss": 1.1109, + "num_input_tokens_seen": 168072472, + "step": 2512 + }, + { + "epoch": 0.28504964539007094, + "loss": 1.0373643636703491, + "loss_ce": 0.007579217664897442, + "loss_iou": 0.4453125, + "loss_num": 0.0281982421875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 168072472, + "step": 2512 + }, + { + "epoch": 0.28516312056737586, + "grad_norm": 28.63449478149414, + "learning_rate": 5e-05, + "loss": 1.4498, + "num_input_tokens_seen": 168139476, + "step": 2513 + }, + { + "epoch": 0.28516312056737586, + "loss": 1.2886741161346436, + "loss_ce": 0.004143485799431801, + "loss_iou": 0.52734375, + "loss_num": 0.046142578125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 168139476, + "step": 2513 + }, + { + "epoch": 0.28527659574468084, + "grad_norm": 18.09467315673828, + "learning_rate": 5e-05, + "loss": 1.4406, + "num_input_tokens_seen": 168206612, + "step": 2514 + }, + { + "epoch": 0.28527659574468084, + "loss": 1.2970714569091797, + "loss_ce": 0.004591017495840788, + "loss_iou": 0.5390625, + "loss_num": 0.0419921875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 168206612, + "step": 2514 + }, + { + "epoch": 0.2853900709219858, + "grad_norm": 45.484153747558594, + "learning_rate": 5e-05, + "loss": 1.3017, + "num_input_tokens_seen": 168273028, + "step": 2515 + }, + { + "epoch": 0.2853900709219858, + "loss": 1.1669553518295288, + "loss_ce": 0.0027708057314157486, + "loss_iou": 0.48046875, + "loss_num": 0.041259765625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 168273028, + "step": 2515 + }, + { + "epoch": 0.2855035460992908, + "grad_norm": 24.655336380004883, + "learning_rate": 5e-05, + "loss": 1.2573, + "num_input_tokens_seen": 168339572, + "step": 2516 + }, + { + "epoch": 0.2855035460992908, + "loss": 1.279505729675293, + "loss_ce": 0.004115086514502764, + "loss_iou": 0.5390625, + "loss_num": 0.040283203125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 168339572, + "step": 2516 + }, + { + "epoch": 0.2856170212765957, + "grad_norm": 35.180782318115234, + "learning_rate": 5e-05, + "loss": 1.3274, + "num_input_tokens_seen": 168407452, + "step": 2517 + }, + { + "epoch": 0.2856170212765957, + "loss": 1.338407039642334, + "loss_ce": 0.0053991940803825855, + "loss_iou": 0.5703125, + "loss_num": 0.038818359375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 168407452, + "step": 2517 + }, + { + "epoch": 0.2857304964539007, + "grad_norm": 35.05885696411133, + "learning_rate": 5e-05, + "loss": 1.4647, + "num_input_tokens_seen": 168473944, + "step": 2518 + }, + { + "epoch": 0.2857304964539007, + "loss": 1.493227481842041, + "loss_ce": 0.005922830663621426, + "loss_iou": 0.62890625, + "loss_num": 0.04638671875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 168473944, + "step": 2518 + }, + { + "epoch": 0.2858439716312057, + "grad_norm": 28.590145111083984, + "learning_rate": 5e-05, + "loss": 1.2625, + "num_input_tokens_seen": 168540556, + "step": 2519 + }, + { + "epoch": 0.2858439716312057, + "loss": 1.4666879177093506, + "loss_ce": 0.0057504610158503056, + "loss_iou": 0.6171875, + "loss_num": 0.04541015625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 168540556, + "step": 2519 + }, + { + "epoch": 0.28595744680851065, + "grad_norm": 35.791053771972656, + "learning_rate": 5e-05, + "loss": 1.2466, + "num_input_tokens_seen": 168607872, + "step": 2520 + }, + { + "epoch": 0.28595744680851065, + "loss": 1.2350144386291504, + "loss_ce": 0.003080788068473339, + "loss_iou": 0.55078125, + "loss_num": 0.0255126953125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 168607872, + "step": 2520 + }, + { + "epoch": 0.28607092198581563, + "grad_norm": 28.12000274658203, + "learning_rate": 5e-05, + "loss": 1.5942, + "num_input_tokens_seen": 168675160, + "step": 2521 + }, + { + "epoch": 0.28607092198581563, + "loss": 1.650639295578003, + "loss_ce": 0.00806125346571207, + "loss_iou": 0.66796875, + "loss_num": 0.06201171875, + "loss_xval": 1.640625, + "num_input_tokens_seen": 168675160, + "step": 2521 + }, + { + "epoch": 0.28618439716312055, + "grad_norm": 13.474660873413086, + "learning_rate": 5e-05, + "loss": 1.0913, + "num_input_tokens_seen": 168741340, + "step": 2522 + }, + { + "epoch": 0.28618439716312055, + "loss": 1.2509021759033203, + "loss_ce": 0.004808403551578522, + "loss_iou": 0.5234375, + "loss_num": 0.040771484375, + "loss_xval": 1.25, + "num_input_tokens_seen": 168741340, + "step": 2522 + }, + { + "epoch": 0.28629787234042553, + "grad_norm": 16.473997116088867, + "learning_rate": 5e-05, + "loss": 1.0425, + "num_input_tokens_seen": 168807404, + "step": 2523 + }, + { + "epoch": 0.28629787234042553, + "loss": 1.2162392139434814, + "loss_ce": 0.007254797965288162, + "loss_iou": 0.54296875, + "loss_num": 0.025146484375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 168807404, + "step": 2523 + }, + { + "epoch": 0.2864113475177305, + "grad_norm": 24.237180709838867, + "learning_rate": 5e-05, + "loss": 1.2333, + "num_input_tokens_seen": 168873492, + "step": 2524 + }, + { + "epoch": 0.2864113475177305, + "loss": 1.1485493183135986, + "loss_ce": 0.007191818207502365, + "loss_iou": 0.4609375, + "loss_num": 0.043701171875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 168873492, + "step": 2524 + }, + { + "epoch": 0.2865248226950355, + "grad_norm": 31.263051986694336, + "learning_rate": 5e-05, + "loss": 1.3453, + "num_input_tokens_seen": 168939496, + "step": 2525 + }, + { + "epoch": 0.2865248226950355, + "loss": 1.3526532649993896, + "loss_ce": 0.004020444117486477, + "loss_iou": 0.5703125, + "loss_num": 0.0419921875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 168939496, + "step": 2525 + }, + { + "epoch": 0.2866382978723404, + "grad_norm": 35.9997444152832, + "learning_rate": 5e-05, + "loss": 1.249, + "num_input_tokens_seen": 169007008, + "step": 2526 + }, + { + "epoch": 0.2866382978723404, + "loss": 1.3178317546844482, + "loss_ce": 0.007284901104867458, + "loss_iou": 0.54296875, + "loss_num": 0.04541015625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 169007008, + "step": 2526 + }, + { + "epoch": 0.2867517730496454, + "grad_norm": 21.77702522277832, + "learning_rate": 5e-05, + "loss": 1.5948, + "num_input_tokens_seen": 169073548, + "step": 2527 + }, + { + "epoch": 0.2867517730496454, + "loss": 1.6762652397155762, + "loss_ce": 0.005366722121834755, + "loss_iou": 0.6953125, + "loss_num": 0.056884765625, + "loss_xval": 1.671875, + "num_input_tokens_seen": 169073548, + "step": 2527 + }, + { + "epoch": 0.28686524822695036, + "grad_norm": 20.642290115356445, + "learning_rate": 5e-05, + "loss": 1.2684, + "num_input_tokens_seen": 169140264, + "step": 2528 + }, + { + "epoch": 0.28686524822695036, + "loss": 1.2531688213348389, + "loss_ce": 0.0070750415325164795, + "loss_iou": 0.51953125, + "loss_num": 0.041748046875, + "loss_xval": 1.25, + "num_input_tokens_seen": 169140264, + "step": 2528 + }, + { + "epoch": 0.28697872340425534, + "grad_norm": 38.950252532958984, + "learning_rate": 5e-05, + "loss": 1.488, + "num_input_tokens_seen": 169206648, + "step": 2529 + }, + { + "epoch": 0.28697872340425534, + "loss": 1.5061357021331787, + "loss_ce": 0.01004206296056509, + "loss_iou": 0.59765625, + "loss_num": 0.060791015625, + "loss_xval": 1.5, + "num_input_tokens_seen": 169206648, + "step": 2529 + }, + { + "epoch": 0.28709219858156027, + "grad_norm": 23.692747116088867, + "learning_rate": 5e-05, + "loss": 1.5975, + "num_input_tokens_seen": 169273720, + "step": 2530 + }, + { + "epoch": 0.28709219858156027, + "loss": 1.6544723510742188, + "loss_ce": 0.005058306269347668, + "loss_iou": 0.7109375, + "loss_num": 0.044921875, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 169273720, + "step": 2530 + }, + { + "epoch": 0.28720567375886524, + "grad_norm": 29.05386734008789, + "learning_rate": 5e-05, + "loss": 1.4669, + "num_input_tokens_seen": 169341392, + "step": 2531 + }, + { + "epoch": 0.28720567375886524, + "loss": 1.4097423553466797, + "loss_ce": 0.008375111036002636, + "loss_iou": 0.62109375, + "loss_num": 0.03125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 169341392, + "step": 2531 + }, + { + "epoch": 0.2873191489361702, + "grad_norm": 23.09993553161621, + "learning_rate": 5e-05, + "loss": 1.2034, + "num_input_tokens_seen": 169408432, + "step": 2532 + }, + { + "epoch": 0.2873191489361702, + "loss": 1.293877124786377, + "loss_ce": 0.0033497815020382404, + "loss_iou": 0.5625, + "loss_num": 0.033203125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 169408432, + "step": 2532 + }, + { + "epoch": 0.2874326241134752, + "grad_norm": 31.962270736694336, + "learning_rate": 5e-05, + "loss": 1.2026, + "num_input_tokens_seen": 169474332, + "step": 2533 + }, + { + "epoch": 0.2874326241134752, + "loss": 1.1068347692489624, + "loss_ce": 0.0038074650801718235, + "loss_iou": 0.5078125, + "loss_num": 0.01708984375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 169474332, + "step": 2533 + }, + { + "epoch": 0.2875460992907801, + "grad_norm": 24.161209106445312, + "learning_rate": 5e-05, + "loss": 1.5168, + "num_input_tokens_seen": 169541112, + "step": 2534 + }, + { + "epoch": 0.2875460992907801, + "loss": 1.5964908599853516, + "loss_ce": 0.007623786106705666, + "loss_iou": 0.671875, + "loss_num": 0.04833984375, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 169541112, + "step": 2534 + }, + { + "epoch": 0.2876595744680851, + "grad_norm": 11.513399124145508, + "learning_rate": 5e-05, + "loss": 1.073, + "num_input_tokens_seen": 169608572, + "step": 2535 + }, + { + "epoch": 0.2876595744680851, + "loss": 1.1013842821121216, + "loss_ce": 0.0027515264227986336, + "loss_iou": 0.45703125, + "loss_num": 0.036865234375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 169608572, + "step": 2535 + }, + { + "epoch": 0.2877730496453901, + "grad_norm": 25.699899673461914, + "learning_rate": 5e-05, + "loss": 1.2018, + "num_input_tokens_seen": 169674812, + "step": 2536 + }, + { + "epoch": 0.2877730496453901, + "loss": 1.075996994972229, + "loss_ce": 0.004829976242035627, + "loss_iou": 0.43359375, + "loss_num": 0.041015625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 169674812, + "step": 2536 + }, + { + "epoch": 0.28788652482269506, + "grad_norm": 46.157371520996094, + "learning_rate": 5e-05, + "loss": 1.4381, + "num_input_tokens_seen": 169741416, + "step": 2537 + }, + { + "epoch": 0.28788652482269506, + "loss": 1.529468297958374, + "loss_ce": 0.004077668767422438, + "loss_iou": 0.703125, + "loss_num": 0.0242919921875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 169741416, + "step": 2537 + }, + { + "epoch": 0.288, + "grad_norm": 22.521121978759766, + "learning_rate": 5e-05, + "loss": 1.6007, + "num_input_tokens_seen": 169808172, + "step": 2538 + }, + { + "epoch": 0.288, + "loss": 1.5440642833709717, + "loss_ce": 0.005001789424568415, + "loss_iou": 0.6484375, + "loss_num": 0.04833984375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 169808172, + "step": 2538 + }, + { + "epoch": 0.28811347517730496, + "grad_norm": 15.938436508178711, + "learning_rate": 5e-05, + "loss": 1.229, + "num_input_tokens_seen": 169874484, + "step": 2539 + }, + { + "epoch": 0.28811347517730496, + "loss": 1.295250654220581, + "loss_ce": 0.008629636839032173, + "loss_iou": 0.53125, + "loss_num": 0.045166015625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 169874484, + "step": 2539 + }, + { + "epoch": 0.28822695035460993, + "grad_norm": 15.257706642150879, + "learning_rate": 5e-05, + "loss": 1.07, + "num_input_tokens_seen": 169941536, + "step": 2540 + }, + { + "epoch": 0.28822695035460993, + "loss": 1.152029037475586, + "loss_ce": 0.008474346250295639, + "loss_iou": 0.45703125, + "loss_num": 0.0458984375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 169941536, + "step": 2540 + }, + { + "epoch": 0.2883404255319149, + "grad_norm": 17.38727569580078, + "learning_rate": 5e-05, + "loss": 1.1796, + "num_input_tokens_seen": 170008268, + "step": 2541 + }, + { + "epoch": 0.2883404255319149, + "loss": 0.9035289883613586, + "loss_ce": 0.006556366104632616, + "loss_iou": 0.40625, + "loss_num": 0.016845703125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 170008268, + "step": 2541 + }, + { + "epoch": 0.28845390070921983, + "grad_norm": 17.054656982421875, + "learning_rate": 5e-05, + "loss": 1.2443, + "num_input_tokens_seen": 170075584, + "step": 2542 + }, + { + "epoch": 0.28845390070921983, + "loss": 1.0870895385742188, + "loss_ce": 0.0055466205812990665, + "loss_iou": 0.48046875, + "loss_num": 0.0244140625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 170075584, + "step": 2542 + }, + { + "epoch": 0.2885673758865248, + "grad_norm": 22.428722381591797, + "learning_rate": 5e-05, + "loss": 1.0906, + "num_input_tokens_seen": 170142188, + "step": 2543 + }, + { + "epoch": 0.2885673758865248, + "loss": 1.2624843120574951, + "loss_ce": 0.004671801812946796, + "loss_iou": 0.50390625, + "loss_num": 0.05078125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 170142188, + "step": 2543 + }, + { + "epoch": 0.2886808510638298, + "grad_norm": 26.559579849243164, + "learning_rate": 5e-05, + "loss": 1.2315, + "num_input_tokens_seen": 170207904, + "step": 2544 + }, + { + "epoch": 0.2886808510638298, + "loss": 1.4411497116088867, + "loss_ce": 0.006091153249144554, + "loss_iou": 0.625, + "loss_num": 0.03759765625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 170207904, + "step": 2544 + }, + { + "epoch": 0.28879432624113477, + "grad_norm": 29.18117904663086, + "learning_rate": 5e-05, + "loss": 1.4674, + "num_input_tokens_seen": 170275068, + "step": 2545 + }, + { + "epoch": 0.28879432624113477, + "loss": 1.5620553493499756, + "loss_ce": 0.007367913145571947, + "loss_iou": 0.65625, + "loss_num": 0.04833984375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 170275068, + "step": 2545 + }, + { + "epoch": 0.2889078014184397, + "grad_norm": 316.42730712890625, + "learning_rate": 5e-05, + "loss": 1.3047, + "num_input_tokens_seen": 170341656, + "step": 2546 + }, + { + "epoch": 0.2889078014184397, + "loss": 1.1513965129852295, + "loss_ce": 0.004454333335161209, + "loss_iou": 0.5, + "loss_num": 0.0284423828125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 170341656, + "step": 2546 + }, + { + "epoch": 0.28902127659574467, + "grad_norm": 29.77237319946289, + "learning_rate": 5e-05, + "loss": 1.372, + "num_input_tokens_seen": 170408376, + "step": 2547 + }, + { + "epoch": 0.28902127659574467, + "loss": 1.3119909763336182, + "loss_ce": 0.0033971171360462904, + "loss_iou": 0.55859375, + "loss_num": 0.038818359375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 170408376, + "step": 2547 + }, + { + "epoch": 0.28913475177304965, + "grad_norm": 35.236270904541016, + "learning_rate": 5e-05, + "loss": 1.3329, + "num_input_tokens_seen": 170475180, + "step": 2548 + }, + { + "epoch": 0.28913475177304965, + "loss": 1.3614858388900757, + "loss_ce": 0.0035756228026002645, + "loss_iou": 0.578125, + "loss_num": 0.03955078125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 170475180, + "step": 2548 + }, + { + "epoch": 0.2892482269503546, + "grad_norm": 28.28318214416504, + "learning_rate": 5e-05, + "loss": 1.3858, + "num_input_tokens_seen": 170542016, + "step": 2549 + }, + { + "epoch": 0.2892482269503546, + "loss": 1.4134399890899658, + "loss_ce": 0.0018189455149695277, + "loss_iou": 0.58203125, + "loss_num": 0.048828125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 170542016, + "step": 2549 + }, + { + "epoch": 0.28936170212765955, + "grad_norm": 25.689844131469727, + "learning_rate": 5e-05, + "loss": 1.3341, + "num_input_tokens_seen": 170608432, + "step": 2550 + }, + { + "epoch": 0.28936170212765955, + "loss": 1.3432279825210571, + "loss_ce": 0.008267085999250412, + "loss_iou": 0.5546875, + "loss_num": 0.0458984375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 170608432, + "step": 2550 + }, + { + "epoch": 0.2894751773049645, + "grad_norm": 44.376708984375, + "learning_rate": 5e-05, + "loss": 1.3062, + "num_input_tokens_seen": 170674460, + "step": 2551 + }, + { + "epoch": 0.2894751773049645, + "loss": 1.2409043312072754, + "loss_ce": 0.007017700932919979, + "loss_iou": 0.56640625, + "loss_num": 0.019775390625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 170674460, + "step": 2551 + }, + { + "epoch": 0.2895886524822695, + "grad_norm": 46.225318908691406, + "learning_rate": 5e-05, + "loss": 1.6212, + "num_input_tokens_seen": 170741532, + "step": 2552 + }, + { + "epoch": 0.2895886524822695, + "loss": 1.6768231391906738, + "loss_ce": 0.004948170389980078, + "loss_iou": 0.6796875, + "loss_num": 0.06298828125, + "loss_xval": 1.671875, + "num_input_tokens_seen": 170741532, + "step": 2552 + }, + { + "epoch": 0.2897021276595745, + "grad_norm": 24.382814407348633, + "learning_rate": 5e-05, + "loss": 1.2881, + "num_input_tokens_seen": 170807552, + "step": 2553 + }, + { + "epoch": 0.2897021276595745, + "loss": 1.3228977918624878, + "loss_ce": 0.010886061005294323, + "loss_iou": 0.51953125, + "loss_num": 0.054443359375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 170807552, + "step": 2553 + }, + { + "epoch": 0.28981560283687946, + "grad_norm": 29.85235023498535, + "learning_rate": 5e-05, + "loss": 1.4232, + "num_input_tokens_seen": 170873196, + "step": 2554 + }, + { + "epoch": 0.28981560283687946, + "loss": 1.638537883758545, + "loss_ce": 0.005725321359932423, + "loss_iou": 0.66796875, + "loss_num": 0.05908203125, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 170873196, + "step": 2554 + }, + { + "epoch": 0.2899290780141844, + "grad_norm": 34.436824798583984, + "learning_rate": 5e-05, + "loss": 1.2213, + "num_input_tokens_seen": 170940536, + "step": 2555 + }, + { + "epoch": 0.2899290780141844, + "loss": 1.1165025234222412, + "loss_ce": 0.0017564864829182625, + "loss_iou": 0.486328125, + "loss_num": 0.0279541015625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 170940536, + "step": 2555 + }, + { + "epoch": 0.29004255319148936, + "grad_norm": 25.734752655029297, + "learning_rate": 5e-05, + "loss": 1.3462, + "num_input_tokens_seen": 171007856, + "step": 2556 + }, + { + "epoch": 0.29004255319148936, + "loss": 1.3173980712890625, + "loss_ce": 0.005386323668062687, + "loss_iou": 0.57421875, + "loss_num": 0.03271484375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 171007856, + "step": 2556 + }, + { + "epoch": 0.29015602836879434, + "grad_norm": 28.7805233001709, + "learning_rate": 5e-05, + "loss": 1.302, + "num_input_tokens_seen": 171075312, + "step": 2557 + }, + { + "epoch": 0.29015602836879434, + "loss": 1.3495100736618042, + "loss_ce": 0.00551589485257864, + "loss_iou": 0.5625, + "loss_num": 0.044189453125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 171075312, + "step": 2557 + }, + { + "epoch": 0.2902695035460993, + "grad_norm": 25.717437744140625, + "learning_rate": 5e-05, + "loss": 1.1475, + "num_input_tokens_seen": 171142984, + "step": 2558 + }, + { + "epoch": 0.2902695035460993, + "loss": 1.2199316024780273, + "loss_ce": 0.00899413414299488, + "loss_iou": 0.5078125, + "loss_num": 0.03857421875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 171142984, + "step": 2558 + }, + { + "epoch": 0.29038297872340424, + "grad_norm": 29.41307830810547, + "learning_rate": 5e-05, + "loss": 1.3397, + "num_input_tokens_seen": 171210556, + "step": 2559 + }, + { + "epoch": 0.29038297872340424, + "loss": 1.417318344116211, + "loss_ce": 0.0032558091916143894, + "loss_iou": 0.5859375, + "loss_num": 0.048583984375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 171210556, + "step": 2559 + }, + { + "epoch": 0.2904964539007092, + "grad_norm": 21.867422103881836, + "learning_rate": 5e-05, + "loss": 1.4848, + "num_input_tokens_seen": 171277536, + "step": 2560 + }, + { + "epoch": 0.2904964539007092, + "loss": 1.4428026676177979, + "loss_ce": 0.005302645731717348, + "loss_iou": 0.609375, + "loss_num": 0.043212890625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 171277536, + "step": 2560 + }, + { + "epoch": 0.2906099290780142, + "grad_norm": 31.813411712646484, + "learning_rate": 5e-05, + "loss": 1.2371, + "num_input_tokens_seen": 171344268, + "step": 2561 + }, + { + "epoch": 0.2906099290780142, + "loss": 1.086500644683838, + "loss_ce": 0.006422621663659811, + "loss_iou": 0.46875, + "loss_num": 0.028564453125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 171344268, + "step": 2561 + }, + { + "epoch": 0.2907234042553192, + "grad_norm": 28.347869873046875, + "learning_rate": 5e-05, + "loss": 1.4156, + "num_input_tokens_seen": 171411712, + "step": 2562 + }, + { + "epoch": 0.2907234042553192, + "loss": 1.3515994548797607, + "loss_ce": 0.003943298012018204, + "loss_iou": 0.57421875, + "loss_num": 0.03955078125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 171411712, + "step": 2562 + }, + { + "epoch": 0.2908368794326241, + "grad_norm": 24.556846618652344, + "learning_rate": 5e-05, + "loss": 1.4103, + "num_input_tokens_seen": 171478924, + "step": 2563 + }, + { + "epoch": 0.2908368794326241, + "loss": 1.4065604209899902, + "loss_ce": 0.004216691944748163, + "loss_iou": 0.5859375, + "loss_num": 0.046142578125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 171478924, + "step": 2563 + }, + { + "epoch": 0.2909503546099291, + "grad_norm": 30.01646614074707, + "learning_rate": 5e-05, + "loss": 1.3144, + "num_input_tokens_seen": 171546096, + "step": 2564 + }, + { + "epoch": 0.2909503546099291, + "loss": 1.4448269605636597, + "loss_ce": 0.005862082354724407, + "loss_iou": 0.578125, + "loss_num": 0.056884765625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 171546096, + "step": 2564 + }, + { + "epoch": 0.29106382978723405, + "grad_norm": 22.367769241333008, + "learning_rate": 5e-05, + "loss": 1.4839, + "num_input_tokens_seen": 171613100, + "step": 2565 + }, + { + "epoch": 0.29106382978723405, + "loss": 1.3687587976455688, + "loss_ce": 0.005477536469697952, + "loss_iou": 0.578125, + "loss_num": 0.040771484375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 171613100, + "step": 2565 + }, + { + "epoch": 0.29117730496453903, + "grad_norm": 24.638011932373047, + "learning_rate": 5e-05, + "loss": 1.4233, + "num_input_tokens_seen": 171679932, + "step": 2566 + }, + { + "epoch": 0.29117730496453903, + "loss": 1.384494662284851, + "loss_ce": 0.004611863289028406, + "loss_iou": 0.55859375, + "loss_num": 0.052978515625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 171679932, + "step": 2566 + }, + { + "epoch": 0.29129078014184395, + "grad_norm": 29.905826568603516, + "learning_rate": 5e-05, + "loss": 1.2663, + "num_input_tokens_seen": 171746724, + "step": 2567 + }, + { + "epoch": 0.29129078014184395, + "loss": 1.3691508769989014, + "loss_ce": 0.004404768347740173, + "loss_iou": 0.6015625, + "loss_num": 0.03271484375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 171746724, + "step": 2567 + }, + { + "epoch": 0.29140425531914893, + "grad_norm": 43.83869171142578, + "learning_rate": 5e-05, + "loss": 1.3319, + "num_input_tokens_seen": 171813428, + "step": 2568 + }, + { + "epoch": 0.29140425531914893, + "loss": 1.409742832183838, + "loss_ce": 0.007399078458547592, + "loss_iou": 0.5859375, + "loss_num": 0.046630859375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 171813428, + "step": 2568 + }, + { + "epoch": 0.2915177304964539, + "grad_norm": 20.137454986572266, + "learning_rate": 5e-05, + "loss": 1.6541, + "num_input_tokens_seen": 171880680, + "step": 2569 + }, + { + "epoch": 0.2915177304964539, + "loss": 1.6576929092407227, + "loss_ce": 0.004372549243271351, + "loss_iou": 0.71484375, + "loss_num": 0.044677734375, + "loss_xval": 1.65625, + "num_input_tokens_seen": 171880680, + "step": 2569 + }, + { + "epoch": 0.2916312056737589, + "grad_norm": 25.678009033203125, + "learning_rate": 5e-05, + "loss": 1.2349, + "num_input_tokens_seen": 171947140, + "step": 2570 + }, + { + "epoch": 0.2916312056737589, + "loss": 1.1118767261505127, + "loss_ce": 0.0059196436777710915, + "loss_iou": 0.484375, + "loss_num": 0.0274658203125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 171947140, + "step": 2570 + }, + { + "epoch": 0.2917446808510638, + "grad_norm": 33.79247283935547, + "learning_rate": 5e-05, + "loss": 1.313, + "num_input_tokens_seen": 172014428, + "step": 2571 + }, + { + "epoch": 0.2917446808510638, + "loss": 1.150189757347107, + "loss_ce": 0.007611612789332867, + "loss_iou": 0.51953125, + "loss_num": 0.02099609375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 172014428, + "step": 2571 + }, + { + "epoch": 0.2918581560283688, + "grad_norm": 24.73785972595215, + "learning_rate": 5e-05, + "loss": 1.5629, + "num_input_tokens_seen": 172081200, + "step": 2572 + }, + { + "epoch": 0.2918581560283688, + "loss": 1.6169214248657227, + "loss_ce": 0.004616811405867338, + "loss_iou": 0.67578125, + "loss_num": 0.051513671875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 172081200, + "step": 2572 + }, + { + "epoch": 0.29197163120567377, + "grad_norm": 17.4343204498291, + "learning_rate": 5e-05, + "loss": 1.1606, + "num_input_tokens_seen": 172147584, + "step": 2573 + }, + { + "epoch": 0.29197163120567377, + "loss": 1.1122424602508545, + "loss_ce": 0.009215041995048523, + "loss_iou": 0.484375, + "loss_num": 0.0264892578125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 172147584, + "step": 2573 + }, + { + "epoch": 0.29208510638297874, + "grad_norm": 25.85651206970215, + "learning_rate": 5e-05, + "loss": 1.2398, + "num_input_tokens_seen": 172213872, + "step": 2574 + }, + { + "epoch": 0.29208510638297874, + "loss": 1.0795984268188477, + "loss_ce": 0.0024499516002833843, + "loss_iou": 0.5, + "loss_num": 0.01556396484375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 172213872, + "step": 2574 + }, + { + "epoch": 0.29219858156028367, + "grad_norm": 37.117218017578125, + "learning_rate": 5e-05, + "loss": 1.3955, + "num_input_tokens_seen": 172281004, + "step": 2575 + }, + { + "epoch": 0.29219858156028367, + "loss": 1.3392531871795654, + "loss_ce": 0.005268720909953117, + "loss_iou": 0.5625, + "loss_num": 0.041015625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 172281004, + "step": 2575 + }, + { + "epoch": 0.29231205673758864, + "grad_norm": 22.901836395263672, + "learning_rate": 5e-05, + "loss": 1.2645, + "num_input_tokens_seen": 172348344, + "step": 2576 + }, + { + "epoch": 0.29231205673758864, + "loss": 1.3740043640136719, + "loss_ce": 0.008281721733510494, + "loss_iou": 0.5859375, + "loss_num": 0.03955078125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 172348344, + "step": 2576 + }, + { + "epoch": 0.2924255319148936, + "grad_norm": 26.57488250732422, + "learning_rate": 5e-05, + "loss": 1.2246, + "num_input_tokens_seen": 172414792, + "step": 2577 + }, + { + "epoch": 0.2924255319148936, + "loss": 1.2093819379806519, + "loss_ce": 0.005280362442135811, + "loss_iou": 0.470703125, + "loss_num": 0.05224609375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 172414792, + "step": 2577 + }, + { + "epoch": 0.2925390070921986, + "grad_norm": 21.80481719970703, + "learning_rate": 5e-05, + "loss": 1.1906, + "num_input_tokens_seen": 172481752, + "step": 2578 + }, + { + "epoch": 0.2925390070921986, + "loss": 1.1530357599258423, + "loss_ce": 0.007527951151132584, + "loss_iou": 0.52734375, + "loss_num": 0.0179443359375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 172481752, + "step": 2578 + }, + { + "epoch": 0.2926524822695035, + "grad_norm": 30.98833465576172, + "learning_rate": 5e-05, + "loss": 1.5465, + "num_input_tokens_seen": 172549336, + "step": 2579 + }, + { + "epoch": 0.2926524822695035, + "loss": 1.691512107849121, + "loss_ce": 0.006941803265362978, + "loss_iou": 0.6640625, + "loss_num": 0.0703125, + "loss_xval": 1.6875, + "num_input_tokens_seen": 172549336, + "step": 2579 + }, + { + "epoch": 0.2927659574468085, + "grad_norm": 19.996578216552734, + "learning_rate": 5e-05, + "loss": 1.4713, + "num_input_tokens_seen": 172616124, + "step": 2580 + }, + { + "epoch": 0.2927659574468085, + "loss": 1.3457257747650146, + "loss_ce": 0.004905432462692261, + "loss_iou": 0.546875, + "loss_num": 0.050048828125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 172616124, + "step": 2580 + }, + { + "epoch": 0.2928794326241135, + "grad_norm": 14.333996772766113, + "learning_rate": 5e-05, + "loss": 1.2731, + "num_input_tokens_seen": 172683132, + "step": 2581 + }, + { + "epoch": 0.2928794326241135, + "loss": 1.3558690547943115, + "loss_ce": 0.0033299617934972048, + "loss_iou": 0.546875, + "loss_num": 0.052001953125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 172683132, + "step": 2581 + }, + { + "epoch": 0.29299290780141846, + "grad_norm": 39.52670669555664, + "learning_rate": 5e-05, + "loss": 1.2037, + "num_input_tokens_seen": 172749752, + "step": 2582 + }, + { + "epoch": 0.29299290780141846, + "loss": 1.2285112142562866, + "loss_ce": 0.005366651341319084, + "loss_iou": 0.5078125, + "loss_num": 0.041259765625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 172749752, + "step": 2582 + }, + { + "epoch": 0.2931063829787234, + "grad_norm": 23.558462142944336, + "learning_rate": 5e-05, + "loss": 1.2438, + "num_input_tokens_seen": 172816072, + "step": 2583 + }, + { + "epoch": 0.2931063829787234, + "loss": 1.1901798248291016, + "loss_ce": 0.0034122050274163485, + "loss_iou": 0.515625, + "loss_num": 0.03125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 172816072, + "step": 2583 + }, + { + "epoch": 0.29321985815602836, + "grad_norm": 26.090219497680664, + "learning_rate": 5e-05, + "loss": 1.4097, + "num_input_tokens_seen": 172882800, + "step": 2584 + }, + { + "epoch": 0.29321985815602836, + "loss": 1.6638679504394531, + "loss_ce": 0.006153111811727285, + "loss_iou": 0.6484375, + "loss_num": 0.07275390625, + "loss_xval": 1.65625, + "num_input_tokens_seen": 172882800, + "step": 2584 + }, + { + "epoch": 0.29333333333333333, + "grad_norm": 18.2885799407959, + "learning_rate": 5e-05, + "loss": 1.1363, + "num_input_tokens_seen": 172950052, + "step": 2585 + }, + { + "epoch": 0.29333333333333333, + "loss": 1.2268482446670532, + "loss_ce": 0.009074836038053036, + "loss_iou": 0.466796875, + "loss_num": 0.056640625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 172950052, + "step": 2585 + }, + { + "epoch": 0.2934468085106383, + "grad_norm": 22.007640838623047, + "learning_rate": 5e-05, + "loss": 1.3731, + "num_input_tokens_seen": 173016776, + "step": 2586 + }, + { + "epoch": 0.2934468085106383, + "loss": 1.3858386278152466, + "loss_ce": 0.004490950610488653, + "loss_iou": 0.54296875, + "loss_num": 0.05859375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 173016776, + "step": 2586 + }, + { + "epoch": 0.29356028368794324, + "grad_norm": 32.39723587036133, + "learning_rate": 5e-05, + "loss": 1.1176, + "num_input_tokens_seen": 173084232, + "step": 2587 + }, + { + "epoch": 0.29356028368794324, + "loss": 1.1499454975128174, + "loss_ce": 0.00687909871339798, + "loss_iou": 0.50390625, + "loss_num": 0.027099609375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 173084232, + "step": 2587 + }, + { + "epoch": 0.2936737588652482, + "grad_norm": 27.38047981262207, + "learning_rate": 5e-05, + "loss": 1.194, + "num_input_tokens_seen": 173151344, + "step": 2588 + }, + { + "epoch": 0.2936737588652482, + "loss": 1.1278278827667236, + "loss_ce": 0.0033161467872560024, + "loss_iou": 0.46484375, + "loss_num": 0.039306640625, + "loss_xval": 1.125, + "num_input_tokens_seen": 173151344, + "step": 2588 + }, + { + "epoch": 0.2937872340425532, + "grad_norm": 25.50086212158203, + "learning_rate": 5e-05, + "loss": 1.252, + "num_input_tokens_seen": 173217408, + "step": 2589 + }, + { + "epoch": 0.2937872340425532, + "loss": 1.043565273284912, + "loss_ce": 0.0025496259331703186, + "loss_iou": 0.427734375, + "loss_num": 0.036865234375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 173217408, + "step": 2589 + }, + { + "epoch": 0.29390070921985817, + "grad_norm": 11.494815826416016, + "learning_rate": 5e-05, + "loss": 1.2765, + "num_input_tokens_seen": 173284664, + "step": 2590 + }, + { + "epoch": 0.29390070921985817, + "loss": 1.2211570739746094, + "loss_ce": 0.007778096944093704, + "loss_iou": 0.4765625, + "loss_num": 0.05224609375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 173284664, + "step": 2590 + }, + { + "epoch": 0.29401418439716315, + "grad_norm": 23.210283279418945, + "learning_rate": 5e-05, + "loss": 1.3619, + "num_input_tokens_seen": 173351012, + "step": 2591 + }, + { + "epoch": 0.29401418439716315, + "loss": 1.2166669368743896, + "loss_ce": 0.0027998103760182858, + "loss_iou": 0.515625, + "loss_num": 0.036376953125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 173351012, + "step": 2591 + }, + { + "epoch": 0.29412765957446807, + "grad_norm": 18.439220428466797, + "learning_rate": 5e-05, + "loss": 1.2717, + "num_input_tokens_seen": 173418236, + "step": 2592 + }, + { + "epoch": 0.29412765957446807, + "loss": 1.2841134071350098, + "loss_ce": 0.010675894096493721, + "loss_iou": 0.53515625, + "loss_num": 0.039794921875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 173418236, + "step": 2592 + }, + { + "epoch": 0.29424113475177305, + "grad_norm": 127.39413452148438, + "learning_rate": 5e-05, + "loss": 1.3435, + "num_input_tokens_seen": 173485760, + "step": 2593 + }, + { + "epoch": 0.29424113475177305, + "loss": 1.2333883047103882, + "loss_ce": 0.011708622798323631, + "loss_iou": 0.484375, + "loss_num": 0.050537109375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 173485760, + "step": 2593 + }, + { + "epoch": 0.294354609929078, + "grad_norm": 25.598041534423828, + "learning_rate": 5e-05, + "loss": 1.2491, + "num_input_tokens_seen": 173552332, + "step": 2594 + }, + { + "epoch": 0.294354609929078, + "loss": 1.3135677576065063, + "loss_ce": 0.006438891403377056, + "loss_iou": 0.53515625, + "loss_num": 0.046875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 173552332, + "step": 2594 + }, + { + "epoch": 0.294468085106383, + "grad_norm": 35.03260040283203, + "learning_rate": 5e-05, + "loss": 1.2901, + "num_input_tokens_seen": 173619496, + "step": 2595 + }, + { + "epoch": 0.294468085106383, + "loss": 1.3832743167877197, + "loss_ce": 0.004856294021010399, + "loss_iou": 0.578125, + "loss_num": 0.0439453125, + "loss_xval": 1.375, + "num_input_tokens_seen": 173619496, + "step": 2595 + }, + { + "epoch": 0.2945815602836879, + "grad_norm": 35.62126541137695, + "learning_rate": 5e-05, + "loss": 1.482, + "num_input_tokens_seen": 173685724, + "step": 2596 + }, + { + "epoch": 0.2945815602836879, + "loss": 1.3688039779663086, + "loss_ce": 0.005888981278985739, + "loss_iou": 0.5546875, + "loss_num": 0.05029296875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 173685724, + "step": 2596 + }, + { + "epoch": 0.2946950354609929, + "grad_norm": 80.91787719726562, + "learning_rate": 5e-05, + "loss": 1.1173, + "num_input_tokens_seen": 173752596, + "step": 2597 + }, + { + "epoch": 0.2946950354609929, + "loss": 1.038255214691162, + "loss_ce": 0.004563807509839535, + "loss_iou": 0.44921875, + "loss_num": 0.026611328125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 173752596, + "step": 2597 + }, + { + "epoch": 0.2948085106382979, + "grad_norm": 35.77443313598633, + "learning_rate": 5e-05, + "loss": 1.2129, + "num_input_tokens_seen": 173818912, + "step": 2598 + }, + { + "epoch": 0.2948085106382979, + "loss": 1.0563175678253174, + "loss_ce": 0.005902576260268688, + "loss_iou": 0.44140625, + "loss_num": 0.033935546875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 173818912, + "step": 2598 + }, + { + "epoch": 0.29492198581560286, + "grad_norm": 18.729907989501953, + "learning_rate": 5e-05, + "loss": 1.1359, + "num_input_tokens_seen": 173885896, + "step": 2599 + }, + { + "epoch": 0.29492198581560286, + "loss": 1.0943446159362793, + "loss_ce": 0.004500940442085266, + "loss_iou": 0.4765625, + "loss_num": 0.02783203125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 173885896, + "step": 2599 + }, + { + "epoch": 0.2950354609929078, + "grad_norm": 24.867408752441406, + "learning_rate": 5e-05, + "loss": 1.1258, + "num_input_tokens_seen": 173952628, + "step": 2600 + }, + { + "epoch": 0.2950354609929078, + "loss": 1.3481884002685547, + "loss_ce": 0.006879833526909351, + "loss_iou": 0.5625, + "loss_num": 0.043212890625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 173952628, + "step": 2600 + }, + { + "epoch": 0.29514893617021276, + "grad_norm": 21.603879928588867, + "learning_rate": 5e-05, + "loss": 1.1823, + "num_input_tokens_seen": 174019424, + "step": 2601 + }, + { + "epoch": 0.29514893617021276, + "loss": 1.1556751728057861, + "loss_ce": 0.007237696088850498, + "loss_iou": 0.4375, + "loss_num": 0.05517578125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 174019424, + "step": 2601 + }, + { + "epoch": 0.29526241134751774, + "grad_norm": 19.144309997558594, + "learning_rate": 5e-05, + "loss": 1.2796, + "num_input_tokens_seen": 174085748, + "step": 2602 + }, + { + "epoch": 0.29526241134751774, + "loss": 1.4312307834625244, + "loss_ce": 0.006426029838621616, + "loss_iou": 0.5625, + "loss_num": 0.059814453125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 174085748, + "step": 2602 + }, + { + "epoch": 0.2953758865248227, + "grad_norm": 30.501911163330078, + "learning_rate": 5e-05, + "loss": 1.4309, + "num_input_tokens_seen": 174152892, + "step": 2603 + }, + { + "epoch": 0.2953758865248227, + "loss": 1.4742170572280884, + "loss_ce": 0.005467066541314125, + "loss_iou": 0.58984375, + "loss_num": 0.056884765625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 174152892, + "step": 2603 + }, + { + "epoch": 0.29548936170212764, + "grad_norm": 19.374038696289062, + "learning_rate": 5e-05, + "loss": 1.2965, + "num_input_tokens_seen": 174219508, + "step": 2604 + }, + { + "epoch": 0.29548936170212764, + "loss": 1.2479228973388672, + "loss_ce": 0.00182917935308069, + "loss_iou": 0.49609375, + "loss_num": 0.051025390625, + "loss_xval": 1.25, + "num_input_tokens_seen": 174219508, + "step": 2604 + }, + { + "epoch": 0.2956028368794326, + "grad_norm": 18.189590454101562, + "learning_rate": 5e-05, + "loss": 1.086, + "num_input_tokens_seen": 174286552, + "step": 2605 + }, + { + "epoch": 0.2956028368794326, + "loss": 0.9718592166900635, + "loss_ce": 0.003109184093773365, + "loss_iou": 0.41015625, + "loss_num": 0.029296875, + "loss_xval": 0.96875, + "num_input_tokens_seen": 174286552, + "step": 2605 + }, + { + "epoch": 0.2957163120567376, + "grad_norm": 34.574737548828125, + "learning_rate": 5e-05, + "loss": 1.5112, + "num_input_tokens_seen": 174352168, + "step": 2606 + }, + { + "epoch": 0.2957163120567376, + "loss": 1.5438109636306763, + "loss_ce": 0.006701570004224777, + "loss_iou": 0.66015625, + "loss_num": 0.043212890625, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 174352168, + "step": 2606 + }, + { + "epoch": 0.2958297872340426, + "grad_norm": 22.837398529052734, + "learning_rate": 5e-05, + "loss": 1.2427, + "num_input_tokens_seen": 174418764, + "step": 2607 + }, + { + "epoch": 0.2958297872340426, + "loss": 1.1224498748779297, + "loss_ce": 0.0033092061057686806, + "loss_iou": 0.51953125, + "loss_num": 0.016357421875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 174418764, + "step": 2607 + }, + { + "epoch": 0.2959432624113475, + "grad_norm": 30.04142189025879, + "learning_rate": 5e-05, + "loss": 1.2495, + "num_input_tokens_seen": 174485652, + "step": 2608 + }, + { + "epoch": 0.2959432624113475, + "loss": 1.2286510467529297, + "loss_ce": 0.005994755774736404, + "loss_iou": 0.51953125, + "loss_num": 0.037109375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 174485652, + "step": 2608 + }, + { + "epoch": 0.2960567375886525, + "grad_norm": 23.514423370361328, + "learning_rate": 5e-05, + "loss": 1.4502, + "num_input_tokens_seen": 174551816, + "step": 2609 + }, + { + "epoch": 0.2960567375886525, + "loss": 1.3551921844482422, + "loss_ce": 0.007535975892096758, + "loss_iou": 0.6015625, + "loss_num": 0.0291748046875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 174551816, + "step": 2609 + }, + { + "epoch": 0.29617021276595745, + "grad_norm": 25.947782516479492, + "learning_rate": 5e-05, + "loss": 1.2787, + "num_input_tokens_seen": 174619044, + "step": 2610 + }, + { + "epoch": 0.29617021276595745, + "loss": 1.2434399127960205, + "loss_ce": 0.005158554762601852, + "loss_iou": 0.51953125, + "loss_num": 0.039794921875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 174619044, + "step": 2610 + }, + { + "epoch": 0.29628368794326243, + "grad_norm": 33.809112548828125, + "learning_rate": 5e-05, + "loss": 1.6202, + "num_input_tokens_seen": 174686416, + "step": 2611 + }, + { + "epoch": 0.29628368794326243, + "loss": 1.8147081136703491, + "loss_ce": 0.0066026863642036915, + "loss_iou": 0.73828125, + "loss_num": 0.06689453125, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 174686416, + "step": 2611 + }, + { + "epoch": 0.29639716312056735, + "grad_norm": 45.27812576293945, + "learning_rate": 5e-05, + "loss": 1.4938, + "num_input_tokens_seen": 174753396, + "step": 2612 + }, + { + "epoch": 0.29639716312056735, + "loss": 1.5129783153533936, + "loss_ce": 0.002724472898989916, + "loss_iou": 0.66015625, + "loss_num": 0.03759765625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 174753396, + "step": 2612 + }, + { + "epoch": 0.29651063829787233, + "grad_norm": 21.373889923095703, + "learning_rate": 5e-05, + "loss": 1.4383, + "num_input_tokens_seen": 174820712, + "step": 2613 + }, + { + "epoch": 0.29651063829787233, + "loss": 1.2916169166564941, + "loss_ce": 0.005972340703010559, + "loss_iou": 0.546875, + "loss_num": 0.039306640625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 174820712, + "step": 2613 + }, + { + "epoch": 0.2966241134751773, + "grad_norm": 17.02994155883789, + "learning_rate": 5e-05, + "loss": 1.3181, + "num_input_tokens_seen": 174887776, + "step": 2614 + }, + { + "epoch": 0.2966241134751773, + "loss": 1.2382643222808838, + "loss_ce": 0.002912732772529125, + "loss_iou": 0.53125, + "loss_num": 0.034423828125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 174887776, + "step": 2614 + }, + { + "epoch": 0.2967375886524823, + "grad_norm": 32.82294845581055, + "learning_rate": 5e-05, + "loss": 1.2588, + "num_input_tokens_seen": 174955012, + "step": 2615 + }, + { + "epoch": 0.2967375886524823, + "loss": 1.2541757822036743, + "loss_ce": 0.008570311591029167, + "loss_iou": 0.48046875, + "loss_num": 0.056640625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 174955012, + "step": 2615 + }, + { + "epoch": 0.2968510638297872, + "grad_norm": 26.739253997802734, + "learning_rate": 5e-05, + "loss": 1.5429, + "num_input_tokens_seen": 175021800, + "step": 2616 + }, + { + "epoch": 0.2968510638297872, + "loss": 1.5890532732009888, + "loss_ce": 0.006533696316182613, + "loss_iou": 0.65625, + "loss_num": 0.054443359375, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 175021800, + "step": 2616 + }, + { + "epoch": 0.2969645390070922, + "grad_norm": 16.801973342895508, + "learning_rate": 5e-05, + "loss": 1.1043, + "num_input_tokens_seen": 175088136, + "step": 2617 + }, + { + "epoch": 0.2969645390070922, + "loss": 1.0122504234313965, + "loss_ce": 0.004437943454831839, + "loss_iou": 0.439453125, + "loss_num": 0.0255126953125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 175088136, + "step": 2617 + }, + { + "epoch": 0.29707801418439717, + "grad_norm": 27.441417694091797, + "learning_rate": 5e-05, + "loss": 1.1789, + "num_input_tokens_seen": 175154908, + "step": 2618 + }, + { + "epoch": 0.29707801418439717, + "loss": 1.2882440090179443, + "loss_ce": 0.0034844260662794113, + "loss_iou": 0.5234375, + "loss_num": 0.047607421875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 175154908, + "step": 2618 + }, + { + "epoch": 0.29719148936170214, + "grad_norm": 31.78968620300293, + "learning_rate": 5e-05, + "loss": 1.3264, + "num_input_tokens_seen": 175222280, + "step": 2619 + }, + { + "epoch": 0.29719148936170214, + "loss": 1.3786168098449707, + "loss_ce": 0.007523118983954191, + "loss_iou": 0.55859375, + "loss_num": 0.05078125, + "loss_xval": 1.375, + "num_input_tokens_seen": 175222280, + "step": 2619 + }, + { + "epoch": 0.29730496453900707, + "grad_norm": 25.138450622558594, + "learning_rate": 5e-05, + "loss": 1.593, + "num_input_tokens_seen": 175289756, + "step": 2620 + }, + { + "epoch": 0.29730496453900707, + "loss": 1.5335829257965088, + "loss_ce": 0.0037978484760969877, + "loss_iou": 0.63671875, + "loss_num": 0.051513671875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 175289756, + "step": 2620 + }, + { + "epoch": 0.29741843971631204, + "grad_norm": 37.95814895629883, + "learning_rate": 5e-05, + "loss": 1.2296, + "num_input_tokens_seen": 175357236, + "step": 2621 + }, + { + "epoch": 0.29741843971631204, + "loss": 1.2020375728607178, + "loss_ce": 0.00526030920445919, + "loss_iou": 0.515625, + "loss_num": 0.032958984375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 175357236, + "step": 2621 + }, + { + "epoch": 0.297531914893617, + "grad_norm": 26.0775203704834, + "learning_rate": 5e-05, + "loss": 1.4089, + "num_input_tokens_seen": 175424032, + "step": 2622 + }, + { + "epoch": 0.297531914893617, + "loss": 1.508231520652771, + "loss_ce": 0.009208086878061295, + "loss_iou": 0.62109375, + "loss_num": 0.052001953125, + "loss_xval": 1.5, + "num_input_tokens_seen": 175424032, + "step": 2622 + }, + { + "epoch": 0.297645390070922, + "grad_norm": 16.798601150512695, + "learning_rate": 5e-05, + "loss": 1.206, + "num_input_tokens_seen": 175492212, + "step": 2623 + }, + { + "epoch": 0.297645390070922, + "loss": 1.2224745750427246, + "loss_ce": 0.003724591340869665, + "loss_iou": 0.4921875, + "loss_num": 0.046142578125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 175492212, + "step": 2623 + }, + { + "epoch": 0.2977588652482269, + "grad_norm": 74.13755798339844, + "learning_rate": 5e-05, + "loss": 1.2868, + "num_input_tokens_seen": 175558536, + "step": 2624 + }, + { + "epoch": 0.2977588652482269, + "loss": 1.315314769744873, + "loss_ce": 0.01209211628884077, + "loss_iou": 0.5078125, + "loss_num": 0.05712890625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 175558536, + "step": 2624 + }, + { + "epoch": 0.2978723404255319, + "grad_norm": 25.711029052734375, + "learning_rate": 5e-05, + "loss": 1.2435, + "num_input_tokens_seen": 175625580, + "step": 2625 + }, + { + "epoch": 0.2978723404255319, + "loss": 1.3562452793121338, + "loss_ce": 0.006147514563053846, + "loss_iou": 0.58203125, + "loss_num": 0.037841796875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 175625580, + "step": 2625 + }, + { + "epoch": 0.2979858156028369, + "grad_norm": 52.172950744628906, + "learning_rate": 5e-05, + "loss": 1.2196, + "num_input_tokens_seen": 175691288, + "step": 2626 + }, + { + "epoch": 0.2979858156028369, + "loss": 1.4446884393692017, + "loss_ce": 0.00914154015481472, + "loss_iou": 0.62109375, + "loss_num": 0.0390625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 175691288, + "step": 2626 + }, + { + "epoch": 0.29809929078014186, + "grad_norm": 22.464641571044922, + "learning_rate": 5e-05, + "loss": 1.5018, + "num_input_tokens_seen": 175758168, + "step": 2627 + }, + { + "epoch": 0.29809929078014186, + "loss": 1.3387837409973145, + "loss_ce": 0.00547067541629076, + "loss_iou": 0.578125, + "loss_num": 0.03564453125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 175758168, + "step": 2627 + }, + { + "epoch": 0.29821276595744683, + "grad_norm": 45.83067321777344, + "learning_rate": 5e-05, + "loss": 1.1273, + "num_input_tokens_seen": 175824500, + "step": 2628 + }, + { + "epoch": 0.29821276595744683, + "loss": 1.0375568866729736, + "loss_ce": 0.0024006073363125324, + "loss_iou": 0.44921875, + "loss_num": 0.0277099609375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 175824500, + "step": 2628 + }, + { + "epoch": 0.29832624113475176, + "grad_norm": 40.6843147277832, + "learning_rate": 5e-05, + "loss": 1.3344, + "num_input_tokens_seen": 175891876, + "step": 2629 + }, + { + "epoch": 0.29832624113475176, + "loss": 1.447104811668396, + "loss_ce": 0.008628175593912601, + "loss_iou": 0.578125, + "loss_num": 0.056884765625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 175891876, + "step": 2629 + }, + { + "epoch": 0.29843971631205674, + "grad_norm": 26.88433837890625, + "learning_rate": 5e-05, + "loss": 1.1198, + "num_input_tokens_seen": 175957900, + "step": 2630 + }, + { + "epoch": 0.29843971631205674, + "loss": 1.1231285333633423, + "loss_ce": 0.003987896256148815, + "loss_iou": 0.46875, + "loss_num": 0.0361328125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 175957900, + "step": 2630 + }, + { + "epoch": 0.2985531914893617, + "grad_norm": 30.747690200805664, + "learning_rate": 5e-05, + "loss": 1.2424, + "num_input_tokens_seen": 176023716, + "step": 2631 + }, + { + "epoch": 0.2985531914893617, + "loss": 1.4837288856506348, + "loss_ce": 0.00618975143879652, + "loss_iou": 0.5859375, + "loss_num": 0.061767578125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 176023716, + "step": 2631 + }, + { + "epoch": 0.2986666666666667, + "grad_norm": 30.295833587646484, + "learning_rate": 5e-05, + "loss": 1.2726, + "num_input_tokens_seen": 176090124, + "step": 2632 + }, + { + "epoch": 0.2986666666666667, + "loss": 1.319242238998413, + "loss_ce": 0.004300741013139486, + "loss_iou": 0.55859375, + "loss_num": 0.03857421875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 176090124, + "step": 2632 + }, + { + "epoch": 0.2987801418439716, + "grad_norm": 22.12700843811035, + "learning_rate": 5e-05, + "loss": 1.0917, + "num_input_tokens_seen": 176155880, + "step": 2633 + }, + { + "epoch": 0.2987801418439716, + "loss": 1.0642850399017334, + "loss_ce": 0.004226445686072111, + "loss_iou": 0.439453125, + "loss_num": 0.035888671875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 176155880, + "step": 2633 + }, + { + "epoch": 0.2988936170212766, + "grad_norm": 16.45393180847168, + "learning_rate": 5e-05, + "loss": 1.2095, + "num_input_tokens_seen": 176222204, + "step": 2634 + }, + { + "epoch": 0.2988936170212766, + "loss": 1.317723035812378, + "loss_ce": 0.0066877808421850204, + "loss_iou": 0.5546875, + "loss_num": 0.041259765625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 176222204, + "step": 2634 + }, + { + "epoch": 0.29900709219858157, + "grad_norm": 14.185986518859863, + "learning_rate": 5e-05, + "loss": 1.1902, + "num_input_tokens_seen": 176289884, + "step": 2635 + }, + { + "epoch": 0.29900709219858157, + "loss": 1.2045857906341553, + "loss_ce": 0.0048787943087518215, + "loss_iou": 0.51171875, + "loss_num": 0.034912109375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 176289884, + "step": 2635 + }, + { + "epoch": 0.29912056737588655, + "grad_norm": 19.564624786376953, + "learning_rate": 5e-05, + "loss": 1.1836, + "num_input_tokens_seen": 176357424, + "step": 2636 + }, + { + "epoch": 0.29912056737588655, + "loss": 1.2314194440841675, + "loss_ce": 0.0043686386197805405, + "loss_iou": 0.515625, + "loss_num": 0.039306640625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 176357424, + "step": 2636 + }, + { + "epoch": 0.29923404255319147, + "grad_norm": 46.84368896484375, + "learning_rate": 5e-05, + "loss": 1.3741, + "num_input_tokens_seen": 176425144, + "step": 2637 + }, + { + "epoch": 0.29923404255319147, + "loss": 1.350968360900879, + "loss_ce": 0.004776919726282358, + "loss_iou": 0.578125, + "loss_num": 0.03759765625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 176425144, + "step": 2637 + }, + { + "epoch": 0.29934751773049645, + "grad_norm": 27.046619415283203, + "learning_rate": 5e-05, + "loss": 1.666, + "num_input_tokens_seen": 176492556, + "step": 2638 + }, + { + "epoch": 0.29934751773049645, + "loss": 1.689856767654419, + "loss_ce": 0.004309843759983778, + "loss_iou": 0.67578125, + "loss_num": 0.06591796875, + "loss_xval": 1.6875, + "num_input_tokens_seen": 176492556, + "step": 2638 + }, + { + "epoch": 0.2994609929078014, + "grad_norm": 26.729843139648438, + "learning_rate": 5e-05, + "loss": 1.3523, + "num_input_tokens_seen": 176559488, + "step": 2639 + }, + { + "epoch": 0.2994609929078014, + "loss": 1.1658461093902588, + "loss_ce": 0.0037366957403719425, + "loss_iou": 0.50390625, + "loss_num": 0.030029296875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 176559488, + "step": 2639 + }, + { + "epoch": 0.2995744680851064, + "grad_norm": 45.69850540161133, + "learning_rate": 5e-05, + "loss": 1.2654, + "num_input_tokens_seen": 176626948, + "step": 2640 + }, + { + "epoch": 0.2995744680851064, + "loss": 1.3066699504852295, + "loss_ce": 0.00784178078174591, + "loss_iou": 0.546875, + "loss_num": 0.04052734375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 176626948, + "step": 2640 + }, + { + "epoch": 0.2996879432624113, + "grad_norm": 22.539188385009766, + "learning_rate": 5e-05, + "loss": 1.5623, + "num_input_tokens_seen": 176693876, + "step": 2641 + }, + { + "epoch": 0.2996879432624113, + "loss": 1.665853500366211, + "loss_ce": 0.0037440345622599125, + "loss_iou": 0.67578125, + "loss_num": 0.062255859375, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 176693876, + "step": 2641 + }, + { + "epoch": 0.2998014184397163, + "grad_norm": 680.4638061523438, + "learning_rate": 5e-05, + "loss": 1.1266, + "num_input_tokens_seen": 176761116, + "step": 2642 + }, + { + "epoch": 0.2998014184397163, + "loss": 1.1065924167633057, + "loss_ce": 0.0045417072251439095, + "loss_iou": 0.482421875, + "loss_num": 0.027587890625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 176761116, + "step": 2642 + }, + { + "epoch": 0.2999148936170213, + "grad_norm": 38.13938522338867, + "learning_rate": 5e-05, + "loss": 1.1482, + "num_input_tokens_seen": 176828592, + "step": 2643 + }, + { + "epoch": 0.2999148936170213, + "loss": 1.2299275398254395, + "loss_ce": 0.0028766992036253214, + "loss_iou": 0.5390625, + "loss_num": 0.029052734375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 176828592, + "step": 2643 + }, + { + "epoch": 0.30002836879432626, + "grad_norm": 25.44341468811035, + "learning_rate": 5e-05, + "loss": 1.4406, + "num_input_tokens_seen": 176895012, + "step": 2644 + }, + { + "epoch": 0.30002836879432626, + "loss": 1.5557128190994263, + "loss_ce": 0.002490132348611951, + "loss_iou": 0.625, + "loss_num": 0.060546875, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 176895012, + "step": 2644 + }, + { + "epoch": 0.3001418439716312, + "grad_norm": 19.625289916992188, + "learning_rate": 5e-05, + "loss": 1.1133, + "num_input_tokens_seen": 176962188, + "step": 2645 + }, + { + "epoch": 0.3001418439716312, + "loss": 1.185588002204895, + "loss_ce": 0.006388772279024124, + "loss_iou": 0.5234375, + "loss_num": 0.02734375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 176962188, + "step": 2645 + }, + { + "epoch": 0.30025531914893616, + "grad_norm": 21.483545303344727, + "learning_rate": 5e-05, + "loss": 1.3732, + "num_input_tokens_seen": 177029492, + "step": 2646 + }, + { + "epoch": 0.30025531914893616, + "loss": 1.4661386013031006, + "loss_ce": 0.006177621893584728, + "loss_iou": 0.6171875, + "loss_num": 0.046142578125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 177029492, + "step": 2646 + }, + { + "epoch": 0.30036879432624114, + "grad_norm": 32.92458724975586, + "learning_rate": 5e-05, + "loss": 1.4639, + "num_input_tokens_seen": 177097496, + "step": 2647 + }, + { + "epoch": 0.30036879432624114, + "loss": 1.5117557048797607, + "loss_ce": 0.005407984368503094, + "loss_iou": 0.609375, + "loss_num": 0.0576171875, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 177097496, + "step": 2647 + }, + { + "epoch": 0.3004822695035461, + "grad_norm": 25.762235641479492, + "learning_rate": 5e-05, + "loss": 1.4874, + "num_input_tokens_seen": 177165152, + "step": 2648 + }, + { + "epoch": 0.3004822695035461, + "loss": 1.4901947975158691, + "loss_ce": 0.004110797308385372, + "loss_iou": 0.58984375, + "loss_num": 0.060791015625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 177165152, + "step": 2648 + }, + { + "epoch": 0.30059574468085104, + "grad_norm": 19.885700225830078, + "learning_rate": 5e-05, + "loss": 1.3133, + "num_input_tokens_seen": 177232200, + "step": 2649 + }, + { + "epoch": 0.30059574468085104, + "loss": 1.411379337310791, + "loss_ce": 0.003176238853484392, + "loss_iou": 0.55859375, + "loss_num": 0.05810546875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 177232200, + "step": 2649 + }, + { + "epoch": 0.300709219858156, + "grad_norm": 38.11457443237305, + "learning_rate": 5e-05, + "loss": 1.4164, + "num_input_tokens_seen": 177299304, + "step": 2650 + }, + { + "epoch": 0.300709219858156, + "loss": 1.600390911102295, + "loss_ce": 0.01005889568477869, + "loss_iou": 0.62890625, + "loss_num": 0.06591796875, + "loss_xval": 1.59375, + "num_input_tokens_seen": 177299304, + "step": 2650 + }, + { + "epoch": 0.300822695035461, + "grad_norm": 35.611427307128906, + "learning_rate": 5e-05, + "loss": 1.3472, + "num_input_tokens_seen": 177365312, + "step": 2651 + }, + { + "epoch": 0.300822695035461, + "loss": 1.229622483253479, + "loss_ce": 0.008431129157543182, + "loss_iou": 0.48046875, + "loss_num": 0.05224609375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 177365312, + "step": 2651 + }, + { + "epoch": 0.300936170212766, + "grad_norm": 28.393117904663086, + "learning_rate": 5e-05, + "loss": 1.2826, + "num_input_tokens_seen": 177430816, + "step": 2652 + }, + { + "epoch": 0.300936170212766, + "loss": 1.2516753673553467, + "loss_ce": 0.0031401505693793297, + "loss_iou": 0.55859375, + "loss_num": 0.027099609375, + "loss_xval": 1.25, + "num_input_tokens_seen": 177430816, + "step": 2652 + }, + { + "epoch": 0.3010496453900709, + "grad_norm": 21.396371841430664, + "learning_rate": 5e-05, + "loss": 1.2628, + "num_input_tokens_seen": 177497500, + "step": 2653 + }, + { + "epoch": 0.3010496453900709, + "loss": 1.277141809463501, + "loss_ce": 0.0037043674383312464, + "loss_iou": 0.5546875, + "loss_num": 0.031982421875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 177497500, + "step": 2653 + }, + { + "epoch": 0.3011631205673759, + "grad_norm": 39.593727111816406, + "learning_rate": 5e-05, + "loss": 1.3096, + "num_input_tokens_seen": 177563680, + "step": 2654 + }, + { + "epoch": 0.3011631205673759, + "loss": 1.393943190574646, + "loss_ce": 0.0072244079783558846, + "loss_iou": 0.578125, + "loss_num": 0.046630859375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 177563680, + "step": 2654 + }, + { + "epoch": 0.30127659574468085, + "grad_norm": 34.786834716796875, + "learning_rate": 5e-05, + "loss": 1.315, + "num_input_tokens_seen": 177630904, + "step": 2655 + }, + { + "epoch": 0.30127659574468085, + "loss": 1.282517671585083, + "loss_ce": 0.003220772137865424, + "loss_iou": 0.5546875, + "loss_num": 0.03466796875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 177630904, + "step": 2655 + }, + { + "epoch": 0.30139007092198583, + "grad_norm": 26.13268280029297, + "learning_rate": 5e-05, + "loss": 1.5031, + "num_input_tokens_seen": 177697544, + "step": 2656 + }, + { + "epoch": 0.30139007092198583, + "loss": 1.568203091621399, + "loss_ce": 0.00472655612975359, + "loss_iou": 0.65625, + "loss_num": 0.05029296875, + "loss_xval": 1.5625, + "num_input_tokens_seen": 177697544, + "step": 2656 + }, + { + "epoch": 0.30150354609929075, + "grad_norm": 17.286697387695312, + "learning_rate": 5e-05, + "loss": 1.341, + "num_input_tokens_seen": 177765280, + "step": 2657 + }, + { + "epoch": 0.30150354609929075, + "loss": 1.3363327980041504, + "loss_ce": 0.003325060708448291, + "loss_iou": 0.55078125, + "loss_num": 0.046142578125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 177765280, + "step": 2657 + }, + { + "epoch": 0.30161702127659573, + "grad_norm": 35.4777717590332, + "learning_rate": 5e-05, + "loss": 1.2801, + "num_input_tokens_seen": 177832016, + "step": 2658 + }, + { + "epoch": 0.30161702127659573, + "loss": 1.352673888206482, + "loss_ce": 0.005994205363094807, + "loss_iou": 0.5078125, + "loss_num": 0.06640625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 177832016, + "step": 2658 + }, + { + "epoch": 0.3017304964539007, + "grad_norm": 16.896085739135742, + "learning_rate": 5e-05, + "loss": 1.2083, + "num_input_tokens_seen": 177899256, + "step": 2659 + }, + { + "epoch": 0.3017304964539007, + "loss": 1.1696598529815674, + "loss_ce": 0.0075504663400352, + "loss_iou": 0.5, + "loss_num": 0.03271484375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 177899256, + "step": 2659 + }, + { + "epoch": 0.3018439716312057, + "grad_norm": 15.23859977722168, + "learning_rate": 5e-05, + "loss": 1.0684, + "num_input_tokens_seen": 177966676, + "step": 2660 + }, + { + "epoch": 0.3018439716312057, + "loss": 0.9654497504234314, + "loss_ce": 0.002070853253826499, + "loss_iou": 0.375, + "loss_num": 0.04248046875, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 177966676, + "step": 2660 + }, + { + "epoch": 0.30195744680851067, + "grad_norm": 22.21193504333496, + "learning_rate": 5e-05, + "loss": 1.359, + "num_input_tokens_seen": 178033620, + "step": 2661 + }, + { + "epoch": 0.30195744680851067, + "loss": 1.2534968852996826, + "loss_ce": 0.003985115326941013, + "loss_iou": 0.51953125, + "loss_num": 0.04248046875, + "loss_xval": 1.25, + "num_input_tokens_seen": 178033620, + "step": 2661 + }, + { + "epoch": 0.3020709219858156, + "grad_norm": 25.026050567626953, + "learning_rate": 5e-05, + "loss": 1.1674, + "num_input_tokens_seen": 178100716, + "step": 2662 + }, + { + "epoch": 0.3020709219858156, + "loss": 1.3278512954711914, + "loss_ce": 0.008027000352740288, + "loss_iou": 0.55859375, + "loss_num": 0.041015625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 178100716, + "step": 2662 + }, + { + "epoch": 0.30218439716312057, + "grad_norm": 37.94057083129883, + "learning_rate": 5e-05, + "loss": 1.4149, + "num_input_tokens_seen": 178168404, + "step": 2663 + }, + { + "epoch": 0.30218439716312057, + "loss": 1.5532423257827759, + "loss_ce": 0.007832195609807968, + "loss_iou": 0.62109375, + "loss_num": 0.06005859375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 178168404, + "step": 2663 + }, + { + "epoch": 0.30229787234042554, + "grad_norm": 24.429101943969727, + "learning_rate": 5e-05, + "loss": 1.6823, + "num_input_tokens_seen": 178234496, + "step": 2664 + }, + { + "epoch": 0.30229787234042554, + "loss": 1.717545509338379, + "loss_ce": 0.00709636602550745, + "loss_iou": 0.6953125, + "loss_num": 0.0634765625, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 178234496, + "step": 2664 + }, + { + "epoch": 0.3024113475177305, + "grad_norm": 23.184194564819336, + "learning_rate": 5e-05, + "loss": 1.4293, + "num_input_tokens_seen": 178301976, + "step": 2665 + }, + { + "epoch": 0.3024113475177305, + "loss": 1.3569800853729248, + "loss_ce": 0.005417523439973593, + "loss_iou": 0.55859375, + "loss_num": 0.0458984375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 178301976, + "step": 2665 + }, + { + "epoch": 0.30252482269503544, + "grad_norm": 19.776763916015625, + "learning_rate": 5e-05, + "loss": 1.4231, + "num_input_tokens_seen": 178368308, + "step": 2666 + }, + { + "epoch": 0.30252482269503544, + "loss": 1.298218011856079, + "loss_ce": 0.003296222537755966, + "loss_iou": 0.55078125, + "loss_num": 0.039306640625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 178368308, + "step": 2666 + }, + { + "epoch": 0.3026382978723404, + "grad_norm": 35.32950210571289, + "learning_rate": 5e-05, + "loss": 1.1721, + "num_input_tokens_seen": 178435936, + "step": 2667 + }, + { + "epoch": 0.3026382978723404, + "loss": 1.165785789489746, + "loss_ce": 0.005141174886375666, + "loss_iou": 0.51171875, + "loss_num": 0.0281982421875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 178435936, + "step": 2667 + }, + { + "epoch": 0.3027517730496454, + "grad_norm": 39.054264068603516, + "learning_rate": 5e-05, + "loss": 1.3548, + "num_input_tokens_seen": 178502576, + "step": 2668 + }, + { + "epoch": 0.3027517730496454, + "loss": 1.338155746459961, + "loss_ce": 0.0107631366699934, + "loss_iou": 0.53515625, + "loss_num": 0.0517578125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 178502576, + "step": 2668 + }, + { + "epoch": 0.3028652482269504, + "grad_norm": 24.19675064086914, + "learning_rate": 5e-05, + "loss": 1.3695, + "num_input_tokens_seen": 178569592, + "step": 2669 + }, + { + "epoch": 0.3028652482269504, + "loss": 1.1114987134933472, + "loss_ce": 0.004565096460282803, + "loss_iou": 0.48828125, + "loss_num": 0.0260009765625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 178569592, + "step": 2669 + }, + { + "epoch": 0.3029787234042553, + "grad_norm": 29.022552490234375, + "learning_rate": 5e-05, + "loss": 1.1674, + "num_input_tokens_seen": 178635548, + "step": 2670 + }, + { + "epoch": 0.3029787234042553, + "loss": 1.215608835220337, + "loss_ce": 0.004671204835176468, + "loss_iou": 0.515625, + "loss_num": 0.035400390625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 178635548, + "step": 2670 + }, + { + "epoch": 0.3030921985815603, + "grad_norm": 45.0393180847168, + "learning_rate": 5e-05, + "loss": 1.2063, + "num_input_tokens_seen": 178703356, + "step": 2671 + }, + { + "epoch": 0.3030921985815603, + "loss": 1.3251597881317139, + "loss_ce": 0.004847303032875061, + "loss_iou": 0.5625, + "loss_num": 0.039794921875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 178703356, + "step": 2671 + }, + { + "epoch": 0.30320567375886526, + "grad_norm": 26.93796730041504, + "learning_rate": 5e-05, + "loss": 1.3417, + "num_input_tokens_seen": 178769280, + "step": 2672 + }, + { + "epoch": 0.30320567375886526, + "loss": 1.4241408109664917, + "loss_ce": 0.004951393231749535, + "loss_iou": 0.56640625, + "loss_num": 0.056640625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 178769280, + "step": 2672 + }, + { + "epoch": 0.30331914893617024, + "grad_norm": 16.245086669921875, + "learning_rate": 5e-05, + "loss": 1.2668, + "num_input_tokens_seen": 178837200, + "step": 2673 + }, + { + "epoch": 0.30331914893617024, + "loss": 1.338793396949768, + "loss_ce": 0.005297351162880659, + "loss_iou": 0.54296875, + "loss_num": 0.049072265625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 178837200, + "step": 2673 + }, + { + "epoch": 0.30343262411347516, + "grad_norm": 31.428895950317383, + "learning_rate": 5e-05, + "loss": 1.1177, + "num_input_tokens_seen": 178903864, + "step": 2674 + }, + { + "epoch": 0.30343262411347516, + "loss": 1.1506943702697754, + "loss_ce": 0.008116262033581734, + "loss_iou": 0.462890625, + "loss_num": 0.043212890625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 178903864, + "step": 2674 + }, + { + "epoch": 0.30354609929078014, + "grad_norm": 34.2166633605957, + "learning_rate": 5e-05, + "loss": 1.3851, + "num_input_tokens_seen": 178970504, + "step": 2675 + }, + { + "epoch": 0.30354609929078014, + "loss": 1.3858332633972168, + "loss_ce": 0.0059505063109099865, + "loss_iou": 0.546875, + "loss_num": 0.05712890625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 178970504, + "step": 2675 + }, + { + "epoch": 0.3036595744680851, + "grad_norm": 26.482152938842773, + "learning_rate": 5e-05, + "loss": 1.5629, + "num_input_tokens_seen": 179037152, + "step": 2676 + }, + { + "epoch": 0.3036595744680851, + "loss": 1.7012784481048584, + "loss_ce": 0.004012854769825935, + "loss_iou": 0.67578125, + "loss_num": 0.0693359375, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 179037152, + "step": 2676 + }, + { + "epoch": 0.3037730496453901, + "grad_norm": 21.079910278320312, + "learning_rate": 5e-05, + "loss": 1.1759, + "num_input_tokens_seen": 179104104, + "step": 2677 + }, + { + "epoch": 0.3037730496453901, + "loss": 1.1887588500976562, + "loss_ce": 0.006629968993365765, + "loss_iou": 0.5078125, + "loss_num": 0.033447265625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 179104104, + "step": 2677 + }, + { + "epoch": 0.303886524822695, + "grad_norm": 22.286542892456055, + "learning_rate": 5e-05, + "loss": 1.4163, + "num_input_tokens_seen": 179172212, + "step": 2678 + }, + { + "epoch": 0.303886524822695, + "loss": 1.3859238624572754, + "loss_ce": 0.004087906796485186, + "loss_iou": 0.56640625, + "loss_num": 0.049560546875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 179172212, + "step": 2678 + }, + { + "epoch": 0.304, + "grad_norm": 24.972455978393555, + "learning_rate": 5e-05, + "loss": 1.3508, + "num_input_tokens_seen": 179239340, + "step": 2679 + }, + { + "epoch": 0.304, + "loss": 1.3641879558563232, + "loss_ce": 0.0038363588973879814, + "loss_iou": 0.58203125, + "loss_num": 0.03857421875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 179239340, + "step": 2679 + }, + { + "epoch": 0.30411347517730497, + "grad_norm": 22.59580421447754, + "learning_rate": 5e-05, + "loss": 1.451, + "num_input_tokens_seen": 179306632, + "step": 2680 + }, + { + "epoch": 0.30411347517730497, + "loss": 1.6243342161178589, + "loss_ce": 0.005193615797907114, + "loss_iou": 0.65234375, + "loss_num": 0.0634765625, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 179306632, + "step": 2680 + }, + { + "epoch": 0.30422695035460995, + "grad_norm": 27.24404525756836, + "learning_rate": 5e-05, + "loss": 1.2822, + "num_input_tokens_seen": 179374020, + "step": 2681 + }, + { + "epoch": 0.30422695035460995, + "loss": 1.2660191059112549, + "loss_ce": 0.0033237694296985865, + "loss_iou": 0.5390625, + "loss_num": 0.0361328125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 179374020, + "step": 2681 + }, + { + "epoch": 0.30434042553191487, + "grad_norm": 30.031269073486328, + "learning_rate": 5e-05, + "loss": 1.3971, + "num_input_tokens_seen": 179440992, + "step": 2682 + }, + { + "epoch": 0.30434042553191487, + "loss": 1.5112076997756958, + "loss_ce": 0.00827803649008274, + "loss_iou": 0.63671875, + "loss_num": 0.0458984375, + "loss_xval": 1.5, + "num_input_tokens_seen": 179440992, + "step": 2682 + }, + { + "epoch": 0.30445390070921985, + "grad_norm": 24.2764835357666, + "learning_rate": 5e-05, + "loss": 1.2706, + "num_input_tokens_seen": 179508004, + "step": 2683 + }, + { + "epoch": 0.30445390070921985, + "loss": 1.1887550354003906, + "loss_ce": 0.006137985736131668, + "loss_iou": 0.451171875, + "loss_num": 0.05615234375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 179508004, + "step": 2683 + }, + { + "epoch": 0.3045673758865248, + "grad_norm": 31.089004516601562, + "learning_rate": 5e-05, + "loss": 1.4426, + "num_input_tokens_seen": 179575492, + "step": 2684 + }, + { + "epoch": 0.3045673758865248, + "loss": 1.5478885173797607, + "loss_ce": 0.00394325889647007, + "loss_iou": 0.6328125, + "loss_num": 0.055419921875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 179575492, + "step": 2684 + }, + { + "epoch": 0.3046808510638298, + "grad_norm": 25.607053756713867, + "learning_rate": 5e-05, + "loss": 1.4711, + "num_input_tokens_seen": 179642528, + "step": 2685 + }, + { + "epoch": 0.3046808510638298, + "loss": 1.5116065740585327, + "loss_ce": 0.01014169491827488, + "loss_iou": 0.58984375, + "loss_num": 0.06494140625, + "loss_xval": 1.5, + "num_input_tokens_seen": 179642528, + "step": 2685 + }, + { + "epoch": 0.3047943262411347, + "grad_norm": 22.49375343322754, + "learning_rate": 5e-05, + "loss": 1.1664, + "num_input_tokens_seen": 179709276, + "step": 2686 + }, + { + "epoch": 0.3047943262411347, + "loss": 1.1870150566101074, + "loss_ce": 0.005374380387365818, + "loss_iou": 0.53515625, + "loss_num": 0.0224609375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 179709276, + "step": 2686 + }, + { + "epoch": 0.3049078014184397, + "grad_norm": 32.28202819824219, + "learning_rate": 5e-05, + "loss": 1.1233, + "num_input_tokens_seen": 179777032, + "step": 2687 + }, + { + "epoch": 0.3049078014184397, + "loss": 1.0363037586212158, + "loss_ce": 0.012378048151731491, + "loss_iou": 0.4140625, + "loss_num": 0.039306640625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 179777032, + "step": 2687 + }, + { + "epoch": 0.3050212765957447, + "grad_norm": 21.319942474365234, + "learning_rate": 5e-05, + "loss": 1.4228, + "num_input_tokens_seen": 179843752, + "step": 2688 + }, + { + "epoch": 0.3050212765957447, + "loss": 1.554673194885254, + "loss_ce": 0.010727968066930771, + "loss_iou": 0.6171875, + "loss_num": 0.0615234375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 179843752, + "step": 2688 + }, + { + "epoch": 0.30513475177304966, + "grad_norm": 18.951107025146484, + "learning_rate": 5e-05, + "loss": 1.2812, + "num_input_tokens_seen": 179911460, + "step": 2689 + }, + { + "epoch": 0.30513475177304966, + "loss": 1.3560824394226074, + "loss_ce": 0.004519937559962273, + "loss_iou": 0.54296875, + "loss_num": 0.053466796875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 179911460, + "step": 2689 + }, + { + "epoch": 0.3052482269503546, + "grad_norm": 17.994792938232422, + "learning_rate": 5e-05, + "loss": 1.2689, + "num_input_tokens_seen": 179979332, + "step": 2690 + }, + { + "epoch": 0.3052482269503546, + "loss": 1.232216715812683, + "loss_ce": 0.0027245208621025085, + "loss_iou": 0.5078125, + "loss_num": 0.04248046875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 179979332, + "step": 2690 + }, + { + "epoch": 0.30536170212765956, + "grad_norm": 22.250707626342773, + "learning_rate": 5e-05, + "loss": 1.2787, + "num_input_tokens_seen": 180045764, + "step": 2691 + }, + { + "epoch": 0.30536170212765956, + "loss": 1.320879340171814, + "loss_ce": 0.004961409606039524, + "loss_iou": 0.50390625, + "loss_num": 0.06103515625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 180045764, + "step": 2691 + }, + { + "epoch": 0.30547517730496454, + "grad_norm": 24.395824432373047, + "learning_rate": 5e-05, + "loss": 1.3418, + "num_input_tokens_seen": 180111404, + "step": 2692 + }, + { + "epoch": 0.30547517730496454, + "loss": 1.531723976135254, + "loss_ce": 0.006333251483738422, + "loss_iou": 0.61328125, + "loss_num": 0.059326171875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 180111404, + "step": 2692 + }, + { + "epoch": 0.3055886524822695, + "grad_norm": 25.43539810180664, + "learning_rate": 5e-05, + "loss": 1.3272, + "num_input_tokens_seen": 180178228, + "step": 2693 + }, + { + "epoch": 0.3055886524822695, + "loss": 1.3929557800292969, + "loss_ce": 0.008190072141587734, + "loss_iou": 0.54296875, + "loss_num": 0.0595703125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 180178228, + "step": 2693 + }, + { + "epoch": 0.30570212765957444, + "grad_norm": 28.06820297241211, + "learning_rate": 5e-05, + "loss": 1.2376, + "num_input_tokens_seen": 180245660, + "step": 2694 + }, + { + "epoch": 0.30570212765957444, + "loss": 1.157502293586731, + "loss_ce": 0.005646854639053345, + "loss_iou": 0.51171875, + "loss_num": 0.0262451171875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 180245660, + "step": 2694 + }, + { + "epoch": 0.3058156028368794, + "grad_norm": 41.55942153930664, + "learning_rate": 5e-05, + "loss": 1.3119, + "num_input_tokens_seen": 180312640, + "step": 2695 + }, + { + "epoch": 0.3058156028368794, + "loss": 1.485375165939331, + "loss_ce": 0.007347751408815384, + "loss_iou": 0.60546875, + "loss_num": 0.052978515625, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 180312640, + "step": 2695 + }, + { + "epoch": 0.3059290780141844, + "grad_norm": 22.582120895385742, + "learning_rate": 5e-05, + "loss": 1.6811, + "num_input_tokens_seen": 180380160, + "step": 2696 + }, + { + "epoch": 0.3059290780141844, + "loss": 1.7436728477478027, + "loss_ce": 0.005391571670770645, + "loss_iou": 0.7109375, + "loss_num": 0.06298828125, + "loss_xval": 1.734375, + "num_input_tokens_seen": 180380160, + "step": 2696 + }, + { + "epoch": 0.3060425531914894, + "grad_norm": 21.70810317993164, + "learning_rate": 5e-05, + "loss": 1.2279, + "num_input_tokens_seen": 180447152, + "step": 2697 + }, + { + "epoch": 0.3060425531914894, + "loss": 1.2219561338424683, + "loss_ce": 0.0036943908780813217, + "loss_iou": 0.5, + "loss_num": 0.04345703125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 180447152, + "step": 2697 + }, + { + "epoch": 0.30615602836879435, + "grad_norm": 31.691316604614258, + "learning_rate": 5e-05, + "loss": 1.2836, + "num_input_tokens_seen": 180514488, + "step": 2698 + }, + { + "epoch": 0.30615602836879435, + "loss": 1.4037349224090576, + "loss_ce": 0.006274103187024593, + "loss_iou": 0.5625, + "loss_num": 0.054931640625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 180514488, + "step": 2698 + }, + { + "epoch": 0.3062695035460993, + "grad_norm": 41.16382598876953, + "learning_rate": 5e-05, + "loss": 1.3415, + "num_input_tokens_seen": 180581524, + "step": 2699 + }, + { + "epoch": 0.3062695035460993, + "loss": 1.3319270610809326, + "loss_ce": 0.004290267825126648, + "loss_iou": 0.56640625, + "loss_num": 0.0380859375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 180581524, + "step": 2699 + }, + { + "epoch": 0.30638297872340425, + "grad_norm": 29.20331382751465, + "learning_rate": 5e-05, + "loss": 1.1192, + "num_input_tokens_seen": 180648776, + "step": 2700 + }, + { + "epoch": 0.30638297872340425, + "loss": 1.0968098640441895, + "loss_ce": 0.005989514756947756, + "loss_iou": 0.46484375, + "loss_num": 0.031982421875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 180648776, + "step": 2700 + }, + { + "epoch": 0.30649645390070923, + "grad_norm": 26.605541229248047, + "learning_rate": 5e-05, + "loss": 1.3855, + "num_input_tokens_seen": 180715420, + "step": 2701 + }, + { + "epoch": 0.30649645390070923, + "loss": 1.3656851053237915, + "loss_ce": 0.006310150492936373, + "loss_iou": 0.6171875, + "loss_num": 0.0244140625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 180715420, + "step": 2701 + }, + { + "epoch": 0.3066099290780142, + "grad_norm": 24.27524185180664, + "learning_rate": 5e-05, + "loss": 1.0352, + "num_input_tokens_seen": 180783616, + "step": 2702 + }, + { + "epoch": 0.3066099290780142, + "loss": 0.998779296875, + "loss_ce": 0.0036620746832340956, + "loss_iou": 0.443359375, + "loss_num": 0.0216064453125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 180783616, + "step": 2702 + }, + { + "epoch": 0.30672340425531913, + "grad_norm": 23.19660758972168, + "learning_rate": 5e-05, + "loss": 1.3254, + "num_input_tokens_seen": 180850824, + "step": 2703 + }, + { + "epoch": 0.30672340425531913, + "loss": 1.2772983312606812, + "loss_ce": 0.004349123686552048, + "loss_iou": 0.55078125, + "loss_num": 0.03369140625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 180850824, + "step": 2703 + }, + { + "epoch": 0.3068368794326241, + "grad_norm": 34.02732467651367, + "learning_rate": 5e-05, + "loss": 1.4493, + "num_input_tokens_seen": 180918604, + "step": 2704 + }, + { + "epoch": 0.3068368794326241, + "loss": 1.4090790748596191, + "loss_ce": 0.0062470464035868645, + "loss_iou": 0.5546875, + "loss_num": 0.058349609375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 180918604, + "step": 2704 + }, + { + "epoch": 0.3069503546099291, + "grad_norm": 24.0518798828125, + "learning_rate": 5e-05, + "loss": 1.6343, + "num_input_tokens_seen": 180985292, + "step": 2705 + }, + { + "epoch": 0.3069503546099291, + "loss": 1.6609770059585571, + "loss_ce": 0.004727034829556942, + "loss_iou": 0.69140625, + "loss_num": 0.0546875, + "loss_xval": 1.65625, + "num_input_tokens_seen": 180985292, + "step": 2705 + }, + { + "epoch": 0.30706382978723407, + "grad_norm": 45.95094299316406, + "learning_rate": 5e-05, + "loss": 1.1639, + "num_input_tokens_seen": 181052044, + "step": 2706 + }, + { + "epoch": 0.30706382978723407, + "loss": 1.2548383474349976, + "loss_ce": 0.009232942014932632, + "loss_iou": 0.5390625, + "loss_num": 0.034423828125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 181052044, + "step": 2706 + }, + { + "epoch": 0.307177304964539, + "grad_norm": 27.363998413085938, + "learning_rate": 5e-05, + "loss": 1.1347, + "num_input_tokens_seen": 181119104, + "step": 2707 + }, + { + "epoch": 0.307177304964539, + "loss": 1.1213475465774536, + "loss_ce": 0.0022069676779210567, + "loss_iou": 0.5078125, + "loss_num": 0.019775390625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 181119104, + "step": 2707 + }, + { + "epoch": 0.30729078014184397, + "grad_norm": 39.82313919067383, + "learning_rate": 5e-05, + "loss": 1.2587, + "num_input_tokens_seen": 181186856, + "step": 2708 + }, + { + "epoch": 0.30729078014184397, + "loss": 1.3944981098175049, + "loss_ce": 0.003873080713674426, + "loss_iou": 0.58203125, + "loss_num": 0.045166015625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 181186856, + "step": 2708 + }, + { + "epoch": 0.30740425531914894, + "grad_norm": 31.74763298034668, + "learning_rate": 5e-05, + "loss": 1.4615, + "num_input_tokens_seen": 181253796, + "step": 2709 + }, + { + "epoch": 0.30740425531914894, + "loss": 1.4119014739990234, + "loss_ce": 0.007604529615491629, + "loss_iou": 0.57421875, + "loss_num": 0.05126953125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 181253796, + "step": 2709 + }, + { + "epoch": 0.3075177304964539, + "grad_norm": 31.057729721069336, + "learning_rate": 5e-05, + "loss": 1.0087, + "num_input_tokens_seen": 181320292, + "step": 2710 + }, + { + "epoch": 0.3075177304964539, + "loss": 0.9798555374145508, + "loss_ce": 0.005734462756663561, + "loss_iou": 0.408203125, + "loss_num": 0.03173828125, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 181320292, + "step": 2710 + }, + { + "epoch": 0.30763120567375885, + "grad_norm": 28.076244354248047, + "learning_rate": 5e-05, + "loss": 1.2945, + "num_input_tokens_seen": 181387972, + "step": 2711 + }, + { + "epoch": 0.30763120567375885, + "loss": 1.2622654438018799, + "loss_ce": 0.004941198043525219, + "loss_iou": 0.5234375, + "loss_num": 0.04296875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 181387972, + "step": 2711 + }, + { + "epoch": 0.3077446808510638, + "grad_norm": 26.109539031982422, + "learning_rate": 5e-05, + "loss": 1.092, + "num_input_tokens_seen": 181455152, + "step": 2712 + }, + { + "epoch": 0.3077446808510638, + "loss": 0.9771748781204224, + "loss_ce": 0.007692448794841766, + "loss_iou": 0.408203125, + "loss_num": 0.03076171875, + "loss_xval": 0.96875, + "num_input_tokens_seen": 181455152, + "step": 2712 + }, + { + "epoch": 0.3078581560283688, + "grad_norm": 21.571619033813477, + "learning_rate": 5e-05, + "loss": 1.2927, + "num_input_tokens_seen": 181522512, + "step": 2713 + }, + { + "epoch": 0.3078581560283688, + "loss": 1.5452593564987183, + "loss_ce": 0.00766171095892787, + "loss_iou": 0.61328125, + "loss_num": 0.0625, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 181522512, + "step": 2713 + }, + { + "epoch": 0.3079716312056738, + "grad_norm": 31.355609893798828, + "learning_rate": 5e-05, + "loss": 1.2996, + "num_input_tokens_seen": 181589064, + "step": 2714 + }, + { + "epoch": 0.3079716312056738, + "loss": 1.1453139781951904, + "loss_ce": 0.004688957240432501, + "loss_iou": 0.5078125, + "loss_num": 0.0255126953125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 181589064, + "step": 2714 + }, + { + "epoch": 0.3080851063829787, + "grad_norm": 23.10286521911621, + "learning_rate": 5e-05, + "loss": 1.3966, + "num_input_tokens_seen": 181655944, + "step": 2715 + }, + { + "epoch": 0.3080851063829787, + "loss": 1.4157531261444092, + "loss_ce": 0.005596928298473358, + "loss_iou": 0.59765625, + "loss_num": 0.042236328125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 181655944, + "step": 2715 + }, + { + "epoch": 0.3081985815602837, + "grad_norm": 36.33033752441406, + "learning_rate": 5e-05, + "loss": 1.1411, + "num_input_tokens_seen": 181723048, + "step": 2716 + }, + { + "epoch": 0.3081985815602837, + "loss": 1.3288384675979614, + "loss_ce": 0.010967452079057693, + "loss_iou": 0.51953125, + "loss_num": 0.056396484375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 181723048, + "step": 2716 + }, + { + "epoch": 0.30831205673758866, + "grad_norm": 31.371681213378906, + "learning_rate": 5e-05, + "loss": 1.3459, + "num_input_tokens_seen": 181791592, + "step": 2717 + }, + { + "epoch": 0.30831205673758866, + "loss": 1.3774513006210327, + "loss_ce": 0.002451288513839245, + "loss_iou": 0.58984375, + "loss_num": 0.0390625, + "loss_xval": 1.375, + "num_input_tokens_seen": 181791592, + "step": 2717 + }, + { + "epoch": 0.30842553191489364, + "grad_norm": 28.38546371459961, + "learning_rate": 5e-05, + "loss": 1.1801, + "num_input_tokens_seen": 181858992, + "step": 2718 + }, + { + "epoch": 0.30842553191489364, + "loss": 1.2566304206848145, + "loss_ce": 0.004189012572169304, + "loss_iou": 0.54296875, + "loss_num": 0.032958984375, + "loss_xval": 1.25, + "num_input_tokens_seen": 181858992, + "step": 2718 + }, + { + "epoch": 0.30853900709219856, + "grad_norm": 31.066509246826172, + "learning_rate": 5e-05, + "loss": 1.5061, + "num_input_tokens_seen": 181926632, + "step": 2719 + }, + { + "epoch": 0.30853900709219856, + "loss": 1.536868929862976, + "loss_ce": 0.010257573798298836, + "loss_iou": 0.6015625, + "loss_num": 0.06494140625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 181926632, + "step": 2719 + }, + { + "epoch": 0.30865248226950354, + "grad_norm": 40.081512451171875, + "learning_rate": 5e-05, + "loss": 1.2842, + "num_input_tokens_seen": 181994188, + "step": 2720 + }, + { + "epoch": 0.30865248226950354, + "loss": 1.279343843460083, + "loss_ce": 0.005906373728066683, + "loss_iou": 0.53125, + "loss_num": 0.041748046875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 181994188, + "step": 2720 + }, + { + "epoch": 0.3087659574468085, + "grad_norm": 26.992385864257812, + "learning_rate": 5e-05, + "loss": 1.3903, + "num_input_tokens_seen": 182061616, + "step": 2721 + }, + { + "epoch": 0.3087659574468085, + "loss": 1.5182228088378906, + "loss_ce": 0.00259787286631763, + "loss_iou": 0.6328125, + "loss_num": 0.05029296875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 182061616, + "step": 2721 + }, + { + "epoch": 0.3088794326241135, + "grad_norm": 27.810894012451172, + "learning_rate": 5e-05, + "loss": 1.0719, + "num_input_tokens_seen": 182128576, + "step": 2722 + }, + { + "epoch": 0.3088794326241135, + "loss": 0.951679527759552, + "loss_ce": 0.005878753494471312, + "loss_iou": 0.41015625, + "loss_num": 0.025634765625, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 182128576, + "step": 2722 + }, + { + "epoch": 0.3089929078014184, + "grad_norm": 26.8863582611084, + "learning_rate": 5e-05, + "loss": 1.1235, + "num_input_tokens_seen": 182195976, + "step": 2723 + }, + { + "epoch": 0.3089929078014184, + "loss": 1.197014570236206, + "loss_ce": 0.018303601071238518, + "loss_iou": 0.478515625, + "loss_num": 0.04443359375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 182195976, + "step": 2723 + }, + { + "epoch": 0.3091063829787234, + "grad_norm": 27.68186378479004, + "learning_rate": 5e-05, + "loss": 1.3774, + "num_input_tokens_seen": 182261660, + "step": 2724 + }, + { + "epoch": 0.3091063829787234, + "loss": 1.6045418977737427, + "loss_ce": 0.0059090880677104, + "loss_iou": 0.6171875, + "loss_num": 0.0732421875, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 182261660, + "step": 2724 + }, + { + "epoch": 0.30921985815602837, + "grad_norm": 34.787410736083984, + "learning_rate": 5e-05, + "loss": 1.3226, + "num_input_tokens_seen": 182328832, + "step": 2725 + }, + { + "epoch": 0.30921985815602837, + "loss": 1.3290482759475708, + "loss_ce": 0.0077592055313289165, + "loss_iou": 0.53125, + "loss_num": 0.052490234375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 182328832, + "step": 2725 + }, + { + "epoch": 0.30933333333333335, + "grad_norm": 25.10344886779785, + "learning_rate": 5e-05, + "loss": 1.2559, + "num_input_tokens_seen": 182394704, + "step": 2726 + }, + { + "epoch": 0.30933333333333335, + "loss": 1.306875228881836, + "loss_ce": 0.004995440132915974, + "loss_iou": 0.5234375, + "loss_num": 0.051025390625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 182394704, + "step": 2726 + }, + { + "epoch": 0.30944680851063827, + "grad_norm": 14.69003677368164, + "learning_rate": 5e-05, + "loss": 1.1468, + "num_input_tokens_seen": 182461488, + "step": 2727 + }, + { + "epoch": 0.30944680851063827, + "loss": 1.156150460243225, + "loss_ce": 0.01015439722687006, + "loss_iou": 0.45703125, + "loss_num": 0.046142578125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 182461488, + "step": 2727 + }, + { + "epoch": 0.30956028368794325, + "grad_norm": 22.823970794677734, + "learning_rate": 5e-05, + "loss": 1.249, + "num_input_tokens_seen": 182528256, + "step": 2728 + }, + { + "epoch": 0.30956028368794325, + "loss": 1.2819604873657227, + "loss_ce": 0.006569781340658665, + "loss_iou": 0.498046875, + "loss_num": 0.056396484375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 182528256, + "step": 2728 + }, + { + "epoch": 0.3096737588652482, + "grad_norm": 23.64857292175293, + "learning_rate": 5e-05, + "loss": 1.1871, + "num_input_tokens_seen": 182595124, + "step": 2729 + }, + { + "epoch": 0.3096737588652482, + "loss": 1.1720490455627441, + "loss_ce": 0.00408035097643733, + "loss_iou": 0.49609375, + "loss_num": 0.034912109375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 182595124, + "step": 2729 + }, + { + "epoch": 0.3097872340425532, + "grad_norm": 35.57906723022461, + "learning_rate": 5e-05, + "loss": 1.2962, + "num_input_tokens_seen": 182662348, + "step": 2730 + }, + { + "epoch": 0.3097872340425532, + "loss": 1.3518683910369873, + "loss_ce": 0.004212142899632454, + "loss_iou": 0.578125, + "loss_num": 0.038330078125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 182662348, + "step": 2730 + }, + { + "epoch": 0.3099007092198582, + "grad_norm": 25.272323608398438, + "learning_rate": 5e-05, + "loss": 1.4691, + "num_input_tokens_seen": 182729808, + "step": 2731 + }, + { + "epoch": 0.3099007092198582, + "loss": 1.4580543041229248, + "loss_ce": 0.005905903875827789, + "loss_iou": 0.6484375, + "loss_num": 0.031005859375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 182729808, + "step": 2731 + }, + { + "epoch": 0.3100141843971631, + "grad_norm": 14.060539245605469, + "learning_rate": 5e-05, + "loss": 1.2503, + "num_input_tokens_seen": 182796832, + "step": 2732 + }, + { + "epoch": 0.3100141843971631, + "loss": 1.271544337272644, + "loss_ce": 0.0044544776901602745, + "loss_iou": 0.51171875, + "loss_num": 0.04833984375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 182796832, + "step": 2732 + }, + { + "epoch": 0.3101276595744681, + "grad_norm": 21.413795471191406, + "learning_rate": 5e-05, + "loss": 1.1417, + "num_input_tokens_seen": 182863584, + "step": 2733 + }, + { + "epoch": 0.3101276595744681, + "loss": 1.1477811336517334, + "loss_ce": 0.005203053820878267, + "loss_iou": 0.474609375, + "loss_num": 0.0390625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 182863584, + "step": 2733 + }, + { + "epoch": 0.31024113475177306, + "grad_norm": 28.778514862060547, + "learning_rate": 5e-05, + "loss": 1.2671, + "num_input_tokens_seen": 182930592, + "step": 2734 + }, + { + "epoch": 0.31024113475177306, + "loss": 1.2000722885131836, + "loss_ce": 0.002806608099490404, + "loss_iou": 0.54296875, + "loss_num": 0.022705078125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 182930592, + "step": 2734 + }, + { + "epoch": 0.31035460992907804, + "grad_norm": 262.37518310546875, + "learning_rate": 5e-05, + "loss": 1.3543, + "num_input_tokens_seen": 182998168, + "step": 2735 + }, + { + "epoch": 0.31035460992907804, + "loss": 1.3421196937561035, + "loss_ce": 0.0047172969207167625, + "loss_iou": 0.59375, + "loss_num": 0.0299072265625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 182998168, + "step": 2735 + }, + { + "epoch": 0.31046808510638296, + "grad_norm": 38.94682312011719, + "learning_rate": 5e-05, + "loss": 1.3878, + "num_input_tokens_seen": 183065064, + "step": 2736 + }, + { + "epoch": 0.31046808510638296, + "loss": 1.4559272527694702, + "loss_ce": 0.0018256669864058495, + "loss_iou": 0.59375, + "loss_num": 0.05322265625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 183065064, + "step": 2736 + }, + { + "epoch": 0.31058156028368794, + "grad_norm": 29.12810707092285, + "learning_rate": 5e-05, + "loss": 1.3052, + "num_input_tokens_seen": 183131964, + "step": 2737 + }, + { + "epoch": 0.31058156028368794, + "loss": 1.1745070219039917, + "loss_ce": 0.00507344538345933, + "loss_iou": 0.4921875, + "loss_num": 0.036865234375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 183131964, + "step": 2737 + }, + { + "epoch": 0.3106950354609929, + "grad_norm": 16.66714096069336, + "learning_rate": 5e-05, + "loss": 1.0281, + "num_input_tokens_seen": 183198708, + "step": 2738 + }, + { + "epoch": 0.3106950354609929, + "loss": 1.1031873226165771, + "loss_ce": 0.0026014025788754225, + "loss_iou": 0.486328125, + "loss_num": 0.0255126953125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 183198708, + "step": 2738 + }, + { + "epoch": 0.3108085106382979, + "grad_norm": 21.206287384033203, + "learning_rate": 5e-05, + "loss": 1.1809, + "num_input_tokens_seen": 183266452, + "step": 2739 + }, + { + "epoch": 0.3108085106382979, + "loss": 1.1053006649017334, + "loss_ce": 0.006179629825055599, + "loss_iou": 0.48046875, + "loss_num": 0.027587890625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 183266452, + "step": 2739 + }, + { + "epoch": 0.3109219858156028, + "grad_norm": 38.37468719482422, + "learning_rate": 5e-05, + "loss": 1.3047, + "num_input_tokens_seen": 183333220, + "step": 2740 + }, + { + "epoch": 0.3109219858156028, + "loss": 1.320117712020874, + "loss_ce": 0.0037114769220352173, + "loss_iou": 0.55859375, + "loss_num": 0.040283203125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 183333220, + "step": 2740 + }, + { + "epoch": 0.3110354609929078, + "grad_norm": 22.088056564331055, + "learning_rate": 5e-05, + "loss": 1.6201, + "num_input_tokens_seen": 183400772, + "step": 2741 + }, + { + "epoch": 0.3110354609929078, + "loss": 1.4989666938781738, + "loss_ce": 0.0018964395858347416, + "loss_iou": 0.6484375, + "loss_num": 0.039794921875, + "loss_xval": 1.5, + "num_input_tokens_seen": 183400772, + "step": 2741 + }, + { + "epoch": 0.3111489361702128, + "grad_norm": 41.014949798583984, + "learning_rate": 5e-05, + "loss": 1.3101, + "num_input_tokens_seen": 183467188, + "step": 2742 + }, + { + "epoch": 0.3111489361702128, + "loss": 1.3524260520935059, + "loss_ce": 0.00623463187366724, + "loss_iou": 0.4921875, + "loss_num": 0.072265625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 183467188, + "step": 2742 + }, + { + "epoch": 0.31126241134751775, + "grad_norm": 38.7069091796875, + "learning_rate": 5e-05, + "loss": 1.1529, + "num_input_tokens_seen": 183533436, + "step": 2743 + }, + { + "epoch": 0.31126241134751775, + "loss": 1.1293691396713257, + "loss_ce": 0.003392582293599844, + "loss_iou": 0.4765625, + "loss_num": 0.0341796875, + "loss_xval": 1.125, + "num_input_tokens_seen": 183533436, + "step": 2743 + }, + { + "epoch": 0.3113758865248227, + "grad_norm": 281.3885498046875, + "learning_rate": 5e-05, + "loss": 1.4712, + "num_input_tokens_seen": 183600484, + "step": 2744 + }, + { + "epoch": 0.3113758865248227, + "loss": 1.5211045742034912, + "loss_ce": 0.003526383312419057, + "loss_iou": 0.6484375, + "loss_num": 0.04443359375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 183600484, + "step": 2744 + }, + { + "epoch": 0.31148936170212765, + "grad_norm": 12.082181930541992, + "learning_rate": 5e-05, + "loss": 1.1389, + "num_input_tokens_seen": 183667808, + "step": 2745 + }, + { + "epoch": 0.31148936170212765, + "loss": 1.2321696281433105, + "loss_ce": 0.0031656580977141857, + "loss_iou": 0.482421875, + "loss_num": 0.05322265625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 183667808, + "step": 2745 + }, + { + "epoch": 0.31160283687943263, + "grad_norm": 17.70760154724121, + "learning_rate": 5e-05, + "loss": 1.1054, + "num_input_tokens_seen": 183735812, + "step": 2746 + }, + { + "epoch": 0.31160283687943263, + "loss": 1.1425162553787231, + "loss_ce": 0.008238828741014004, + "loss_iou": 0.484375, + "loss_num": 0.033203125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 183735812, + "step": 2746 + }, + { + "epoch": 0.3117163120567376, + "grad_norm": 36.15927505493164, + "learning_rate": 5e-05, + "loss": 1.4163, + "num_input_tokens_seen": 183801652, + "step": 2747 + }, + { + "epoch": 0.3117163120567376, + "loss": 1.3737976551055908, + "loss_ce": 0.004657007288187742, + "loss_iou": 0.5625, + "loss_num": 0.04833984375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 183801652, + "step": 2747 + }, + { + "epoch": 0.31182978723404253, + "grad_norm": 26.825803756713867, + "learning_rate": 5e-05, + "loss": 1.3854, + "num_input_tokens_seen": 183868012, + "step": 2748 + }, + { + "epoch": 0.31182978723404253, + "loss": 1.263174295425415, + "loss_ce": 0.004873425699770451, + "loss_iou": 0.5390625, + "loss_num": 0.036865234375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 183868012, + "step": 2748 + }, + { + "epoch": 0.3119432624113475, + "grad_norm": 25.587684631347656, + "learning_rate": 5e-05, + "loss": 1.3014, + "num_input_tokens_seen": 183935012, + "step": 2749 + }, + { + "epoch": 0.3119432624113475, + "loss": 1.3114228248596191, + "loss_ce": 0.006247000768780708, + "loss_iou": 0.55859375, + "loss_num": 0.0380859375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 183935012, + "step": 2749 + }, + { + "epoch": 0.3120567375886525, + "grad_norm": 29.996543884277344, + "learning_rate": 5e-05, + "loss": 1.3236, + "num_input_tokens_seen": 184002140, + "step": 2750 + }, + { + "epoch": 0.3120567375886525, + "eval_seeclick_CIoU": 0.4075109511613846, + "eval_seeclick_GIoU": 0.4036225974559784, + "eval_seeclick_IoU": 0.4797309339046478, + "eval_seeclick_MAE_all": 0.14267563074827194, + "eval_seeclick_MAE_h": 0.06654501147568226, + "eval_seeclick_MAE_w": 0.10932167246937752, + "eval_seeclick_MAE_x_boxes": 0.2500278726220131, + "eval_seeclick_MAE_y_boxes": 0.060515133664011955, + "eval_seeclick_NUM_probability": 0.9996320307254791, + "eval_seeclick_inside_bbox": 0.6791666746139526, + "eval_seeclick_loss": 2.4098832607269287, + "eval_seeclick_loss_ce": 0.014474129769951105, + "eval_seeclick_loss_iou": 0.8468017578125, + "eval_seeclick_loss_num": 0.15228271484375, + "eval_seeclick_loss_xval": 2.4580078125, + "eval_seeclick_runtime": 74.0241, + "eval_seeclick_samples_per_second": 0.635, + "eval_seeclick_steps_per_second": 0.027, + "num_input_tokens_seen": 184002140, + "step": 2750 + }, + { + "epoch": 0.3120567375886525, + "eval_icons_CIoU": 0.4912347346544266, + "eval_icons_GIoU": 0.4759764075279236, + "eval_icons_IoU": 0.5354693681001663, + "eval_icons_MAE_all": 0.11955218762159348, + "eval_icons_MAE_h": 0.0804053395986557, + "eval_icons_MAE_w": 0.10638562962412834, + "eval_icons_MAE_x_boxes": 0.14929157495498657, + "eval_icons_MAE_y_boxes": 0.04444071464240551, + "eval_icons_NUM_probability": 0.9996346533298492, + "eval_icons_inside_bbox": 0.7361111044883728, + "eval_icons_loss": 2.376861810684204, + "eval_icons_loss_ce": 0.0004135592716920655, + "eval_icons_loss_iou": 0.89404296875, + "eval_icons_loss_num": 0.11290359497070312, + "eval_icons_loss_xval": 2.35302734375, + "eval_icons_runtime": 74.9607, + "eval_icons_samples_per_second": 0.667, + "eval_icons_steps_per_second": 0.027, + "num_input_tokens_seen": 184002140, + "step": 2750 + }, + { + "epoch": 0.3120567375886525, + "eval_screenspot_CIoU": 0.28345997134844464, + "eval_screenspot_GIoU": 0.24227159221967062, + "eval_screenspot_IoU": 0.37268055478731793, + "eval_screenspot_MAE_all": 0.2216197301944097, + "eval_screenspot_MAE_h": 0.17973189055919647, + "eval_screenspot_MAE_w": 0.1821718563636144, + "eval_screenspot_MAE_x_boxes": 0.2746278742949168, + "eval_screenspot_MAE_y_boxes": 0.1439644048611323, + "eval_screenspot_NUM_probability": 0.9998110930124918, + "eval_screenspot_inside_bbox": 0.6074999968210856, + "eval_screenspot_loss": 3.092728614807129, + "eval_screenspot_loss_ce": 0.013230047499140104, + "eval_screenspot_loss_iou": 0.978515625, + "eval_screenspot_loss_num": 0.23282877604166666, + "eval_screenspot_loss_xval": 3.1204427083333335, + "eval_screenspot_runtime": 124.6915, + "eval_screenspot_samples_per_second": 0.714, + "eval_screenspot_steps_per_second": 0.024, + "num_input_tokens_seen": 184002140, + "step": 2750 + }, + { + "epoch": 0.3120567375886525, + "eval_compot_CIoU": 0.3083077669143677, + "eval_compot_GIoU": 0.24129639565944672, + "eval_compot_IoU": 0.40925590693950653, + "eval_compot_MAE_all": 0.19621331244707108, + "eval_compot_MAE_h": 0.11777764186263084, + "eval_compot_MAE_w": 0.14110591262578964, + "eval_compot_MAE_x_boxes": 0.2025647610425949, + "eval_compot_MAE_y_boxes": 0.16187934577465057, + "eval_compot_NUM_probability": 0.9998566210269928, + "eval_compot_inside_bbox": 0.5434027910232544, + "eval_compot_loss": 2.900742769241333, + "eval_compot_loss_ce": 0.0031965558882802725, + "eval_compot_loss_iou": 0.96240234375, + "eval_compot_loss_num": 0.20391845703125, + "eval_compot_loss_xval": 2.943359375, + "eval_compot_runtime": 71.5826, + "eval_compot_samples_per_second": 0.698, + "eval_compot_steps_per_second": 0.028, + "num_input_tokens_seen": 184002140, + "step": 2750 + }, + { + "epoch": 0.3120567375886525, + "loss": 2.858072280883789, + "loss_ce": 0.0026033443864434958, + "loss_iou": 0.92578125, + "loss_num": 0.2001953125, + "loss_xval": 2.859375, + "num_input_tokens_seen": 184002140, + "step": 2750 + }, + { + "epoch": 0.31217021276595747, + "grad_norm": 36.65522384643555, + "learning_rate": 5e-05, + "loss": 1.2252, + "num_input_tokens_seen": 184069216, + "step": 2751 + }, + { + "epoch": 0.31217021276595747, + "loss": 1.3525090217590332, + "loss_ce": 0.007294218987226486, + "loss_iou": 0.5703125, + "loss_num": 0.040771484375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 184069216, + "step": 2751 + }, + { + "epoch": 0.3122836879432624, + "grad_norm": 34.4079704284668, + "learning_rate": 5e-05, + "loss": 1.2792, + "num_input_tokens_seen": 184135760, + "step": 2752 + }, + { + "epoch": 0.3122836879432624, + "loss": 1.193387508392334, + "loss_ce": 0.005124552641063929, + "loss_iou": 0.455078125, + "loss_num": 0.0556640625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 184135760, + "step": 2752 + }, + { + "epoch": 0.31239716312056737, + "grad_norm": 41.481021881103516, + "learning_rate": 5e-05, + "loss": 1.1635, + "num_input_tokens_seen": 184204012, + "step": 2753 + }, + { + "epoch": 0.31239716312056737, + "loss": 1.0212039947509766, + "loss_ce": 0.008020378649234772, + "loss_iou": 0.4375, + "loss_num": 0.02734375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 184204012, + "step": 2753 + }, + { + "epoch": 0.31251063829787235, + "grad_norm": 21.895788192749023, + "learning_rate": 5e-05, + "loss": 1.1133, + "num_input_tokens_seen": 184269600, + "step": 2754 + }, + { + "epoch": 0.31251063829787235, + "loss": 1.103895902633667, + "loss_ce": 0.008970912545919418, + "loss_iou": 0.4140625, + "loss_num": 0.053466796875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 184269600, + "step": 2754 + }, + { + "epoch": 0.3126241134751773, + "grad_norm": 32.486351013183594, + "learning_rate": 5e-05, + "loss": 1.2914, + "num_input_tokens_seen": 184335924, + "step": 2755 + }, + { + "epoch": 0.3126241134751773, + "loss": 1.3114969730377197, + "loss_ce": 0.006321198306977749, + "loss_iou": 0.5234375, + "loss_num": 0.052001953125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 184335924, + "step": 2755 + }, + { + "epoch": 0.31273758865248225, + "grad_norm": 28.15127944946289, + "learning_rate": 5e-05, + "loss": 1.2552, + "num_input_tokens_seen": 184403992, + "step": 2756 + }, + { + "epoch": 0.31273758865248225, + "loss": 1.2212690114974976, + "loss_ce": 0.003983843605965376, + "loss_iou": 0.49609375, + "loss_num": 0.04541015625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 184403992, + "step": 2756 + }, + { + "epoch": 0.3128510638297872, + "grad_norm": 23.890058517456055, + "learning_rate": 5e-05, + "loss": 1.2691, + "num_input_tokens_seen": 184471820, + "step": 2757 + }, + { + "epoch": 0.3128510638297872, + "loss": 1.2397170066833496, + "loss_ce": 0.0082717789337039, + "loss_iou": 0.54296875, + "loss_num": 0.028564453125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 184471820, + "step": 2757 + }, + { + "epoch": 0.3129645390070922, + "grad_norm": 24.88927459716797, + "learning_rate": 5e-05, + "loss": 1.1688, + "num_input_tokens_seen": 184539020, + "step": 2758 + }, + { + "epoch": 0.3129645390070922, + "loss": 1.13687264919281, + "loss_ce": 0.003083582501858473, + "loss_iou": 0.4921875, + "loss_num": 0.0299072265625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 184539020, + "step": 2758 + }, + { + "epoch": 0.3130780141843972, + "grad_norm": 26.12917137145996, + "learning_rate": 5e-05, + "loss": 1.3413, + "num_input_tokens_seen": 184606976, + "step": 2759 + }, + { + "epoch": 0.3130780141843972, + "loss": 1.3429181575775146, + "loss_ce": 0.0050274962559342384, + "loss_iou": 0.55859375, + "loss_num": 0.04345703125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 184606976, + "step": 2759 + }, + { + "epoch": 0.3131914893617021, + "grad_norm": 33.37192153930664, + "learning_rate": 5e-05, + "loss": 1.2676, + "num_input_tokens_seen": 184674372, + "step": 2760 + }, + { + "epoch": 0.3131914893617021, + "loss": 1.1568045616149902, + "loss_ce": 0.004949092864990234, + "loss_iou": 0.5, + "loss_num": 0.0301513671875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 184674372, + "step": 2760 + }, + { + "epoch": 0.3133049645390071, + "grad_norm": 28.05744743347168, + "learning_rate": 5e-05, + "loss": 1.5658, + "num_input_tokens_seen": 184741468, + "step": 2761 + }, + { + "epoch": 0.3133049645390071, + "loss": 1.6777856349945068, + "loss_ce": 0.002980953548103571, + "loss_iou": 0.671875, + "loss_num": 0.06640625, + "loss_xval": 1.671875, + "num_input_tokens_seen": 184741468, + "step": 2761 + }, + { + "epoch": 0.31341843971631206, + "grad_norm": 26.361082077026367, + "learning_rate": 5e-05, + "loss": 1.2627, + "num_input_tokens_seen": 184807796, + "step": 2762 + }, + { + "epoch": 0.31341843971631206, + "loss": 1.373702883720398, + "loss_ce": 0.003585700411349535, + "loss_iou": 0.609375, + "loss_num": 0.0306396484375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 184807796, + "step": 2762 + }, + { + "epoch": 0.31353191489361704, + "grad_norm": 29.475004196166992, + "learning_rate": 5e-05, + "loss": 1.453, + "num_input_tokens_seen": 184874664, + "step": 2763 + }, + { + "epoch": 0.31353191489361704, + "loss": 1.4128210544586182, + "loss_ce": 0.009500746615231037, + "loss_iou": 0.58203125, + "loss_num": 0.0478515625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 184874664, + "step": 2763 + }, + { + "epoch": 0.31364539007092196, + "grad_norm": 52.475181579589844, + "learning_rate": 5e-05, + "loss": 1.2106, + "num_input_tokens_seen": 184941804, + "step": 2764 + }, + { + "epoch": 0.31364539007092196, + "loss": 1.211962342262268, + "loss_ce": 0.005907576065510511, + "loss_iou": 0.4921875, + "loss_num": 0.04443359375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 184941804, + "step": 2764 + }, + { + "epoch": 0.31375886524822694, + "grad_norm": 29.164405822753906, + "learning_rate": 5e-05, + "loss": 1.3029, + "num_input_tokens_seen": 185008620, + "step": 2765 + }, + { + "epoch": 0.31375886524822694, + "loss": 1.1472758054733276, + "loss_ce": 0.007871454581618309, + "loss_iou": 0.50390625, + "loss_num": 0.0262451171875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 185008620, + "step": 2765 + }, + { + "epoch": 0.3138723404255319, + "grad_norm": 39.410030364990234, + "learning_rate": 5e-05, + "loss": 1.2497, + "num_input_tokens_seen": 185075392, + "step": 2766 + }, + { + "epoch": 0.3138723404255319, + "loss": 1.308342695236206, + "loss_ce": 0.0056082517839968204, + "loss_iou": 0.52734375, + "loss_num": 0.05029296875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 185075392, + "step": 2766 + }, + { + "epoch": 0.3139858156028369, + "grad_norm": 26.416423797607422, + "learning_rate": 5e-05, + "loss": 1.3051, + "num_input_tokens_seen": 185141184, + "step": 2767 + }, + { + "epoch": 0.3139858156028369, + "loss": 1.233455777168274, + "loss_ce": 0.0044518765062093735, + "loss_iou": 0.546875, + "loss_num": 0.02685546875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 185141184, + "step": 2767 + }, + { + "epoch": 0.31409929078014187, + "grad_norm": 25.07044219970703, + "learning_rate": 5e-05, + "loss": 1.1792, + "num_input_tokens_seen": 185205444, + "step": 2768 + }, + { + "epoch": 0.31409929078014187, + "loss": 1.1020565032958984, + "loss_ce": 0.005376897752285004, + "loss_iou": 0.474609375, + "loss_num": 0.0296630859375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 185205444, + "step": 2768 + }, + { + "epoch": 0.3142127659574468, + "grad_norm": 25.75804328918457, + "learning_rate": 5e-05, + "loss": 1.221, + "num_input_tokens_seen": 185271948, + "step": 2769 + }, + { + "epoch": 0.3142127659574468, + "loss": 1.1494680643081665, + "loss_ce": 0.007378172129392624, + "loss_iou": 0.484375, + "loss_num": 0.034912109375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 185271948, + "step": 2769 + }, + { + "epoch": 0.31432624113475177, + "grad_norm": 23.879520416259766, + "learning_rate": 5e-05, + "loss": 1.2515, + "num_input_tokens_seen": 185339028, + "step": 2770 + }, + { + "epoch": 0.31432624113475177, + "loss": 1.2682456970214844, + "loss_ce": 0.006038585677742958, + "loss_iou": 0.515625, + "loss_num": 0.046142578125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 185339028, + "step": 2770 + }, + { + "epoch": 0.31443971631205675, + "grad_norm": 26.320804595947266, + "learning_rate": 5e-05, + "loss": 1.3691, + "num_input_tokens_seen": 185406252, + "step": 2771 + }, + { + "epoch": 0.31443971631205675, + "loss": 1.4338204860687256, + "loss_ce": 0.0041329823434352875, + "loss_iou": 0.59765625, + "loss_num": 0.04736328125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 185406252, + "step": 2771 + }, + { + "epoch": 0.3145531914893617, + "grad_norm": 28.500568389892578, + "learning_rate": 5e-05, + "loss": 1.2166, + "num_input_tokens_seen": 185472896, + "step": 2772 + }, + { + "epoch": 0.3145531914893617, + "loss": 1.3302264213562012, + "loss_ce": 0.004542794544249773, + "loss_iou": 0.53515625, + "loss_num": 0.05078125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 185472896, + "step": 2772 + }, + { + "epoch": 0.31466666666666665, + "grad_norm": 49.50577163696289, + "learning_rate": 5e-05, + "loss": 1.4429, + "num_input_tokens_seen": 185540684, + "step": 2773 + }, + { + "epoch": 0.31466666666666665, + "loss": 1.3883154392242432, + "loss_ce": 0.003549843095242977, + "loss_iou": 0.609375, + "loss_num": 0.03369140625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 185540684, + "step": 2773 + }, + { + "epoch": 0.31478014184397163, + "grad_norm": 36.010498046875, + "learning_rate": 5e-05, + "loss": 1.3044, + "num_input_tokens_seen": 185608032, + "step": 2774 + }, + { + "epoch": 0.31478014184397163, + "loss": 1.2484828233718872, + "loss_ce": 0.003853933420032263, + "loss_iou": 0.53515625, + "loss_num": 0.03564453125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 185608032, + "step": 2774 + }, + { + "epoch": 0.3148936170212766, + "grad_norm": 32.14525604248047, + "learning_rate": 5e-05, + "loss": 1.38, + "num_input_tokens_seen": 185675300, + "step": 2775 + }, + { + "epoch": 0.3148936170212766, + "loss": 1.4389369487762451, + "loss_ce": 0.004366542212665081, + "loss_iou": 0.609375, + "loss_num": 0.042724609375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 185675300, + "step": 2775 + }, + { + "epoch": 0.3150070921985816, + "grad_norm": 12.857096672058105, + "learning_rate": 5e-05, + "loss": 1.2599, + "num_input_tokens_seen": 185742244, + "step": 2776 + }, + { + "epoch": 0.3150070921985816, + "loss": 1.308903455734253, + "loss_ce": 0.005680765025317669, + "loss_iou": 0.515625, + "loss_num": 0.053955078125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 185742244, + "step": 2776 + }, + { + "epoch": 0.3151205673758865, + "grad_norm": 13.328703880310059, + "learning_rate": 5e-05, + "loss": 1.1189, + "num_input_tokens_seen": 185810248, + "step": 2777 + }, + { + "epoch": 0.3151205673758865, + "loss": 1.2041929960250854, + "loss_ce": 0.0059508513659238815, + "loss_iou": 0.5078125, + "loss_num": 0.0361328125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 185810248, + "step": 2777 + }, + { + "epoch": 0.3152340425531915, + "grad_norm": 19.139686584472656, + "learning_rate": 5e-05, + "loss": 1.2408, + "num_input_tokens_seen": 185877632, + "step": 2778 + }, + { + "epoch": 0.3152340425531915, + "loss": 1.3066151142120361, + "loss_ce": 0.0038806702941656113, + "loss_iou": 0.5703125, + "loss_num": 0.032470703125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 185877632, + "step": 2778 + }, + { + "epoch": 0.31534751773049646, + "grad_norm": 28.80589485168457, + "learning_rate": 5e-05, + "loss": 1.232, + "num_input_tokens_seen": 185944164, + "step": 2779 + }, + { + "epoch": 0.31534751773049646, + "loss": 1.0605261325836182, + "loss_ce": 0.005106277763843536, + "loss_iou": 0.45703125, + "loss_num": 0.028564453125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 185944164, + "step": 2779 + }, + { + "epoch": 0.31546099290780144, + "grad_norm": 41.18254852294922, + "learning_rate": 5e-05, + "loss": 1.2865, + "num_input_tokens_seen": 186011052, + "step": 2780 + }, + { + "epoch": 0.31546099290780144, + "loss": 1.2970972061157227, + "loss_ce": 0.007546345703303814, + "loss_iou": 0.53125, + "loss_num": 0.045654296875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 186011052, + "step": 2780 + }, + { + "epoch": 0.31557446808510636, + "grad_norm": 24.490406036376953, + "learning_rate": 5e-05, + "loss": 1.2127, + "num_input_tokens_seen": 186076068, + "step": 2781 + }, + { + "epoch": 0.31557446808510636, + "loss": 1.4197885990142822, + "loss_ce": 0.004749578423798084, + "loss_iou": 0.56640625, + "loss_num": 0.056640625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 186076068, + "step": 2781 + }, + { + "epoch": 0.31568794326241134, + "grad_norm": 14.872690200805664, + "learning_rate": 5e-05, + "loss": 1.1827, + "num_input_tokens_seen": 186143140, + "step": 2782 + }, + { + "epoch": 0.31568794326241134, + "loss": 1.083127737045288, + "loss_ce": 0.006955910474061966, + "loss_iou": 0.458984375, + "loss_num": 0.03125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 186143140, + "step": 2782 + }, + { + "epoch": 0.3158014184397163, + "grad_norm": 28.023653030395508, + "learning_rate": 5e-05, + "loss": 1.3322, + "num_input_tokens_seen": 186210424, + "step": 2783 + }, + { + "epoch": 0.3158014184397163, + "loss": 1.6195036172866821, + "loss_ce": 0.005245807580649853, + "loss_iou": 0.66015625, + "loss_num": 0.058349609375, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 186210424, + "step": 2783 + }, + { + "epoch": 0.3159148936170213, + "grad_norm": 31.137182235717773, + "learning_rate": 5e-05, + "loss": 1.4266, + "num_input_tokens_seen": 186277472, + "step": 2784 + }, + { + "epoch": 0.3159148936170213, + "loss": 1.412358283996582, + "loss_ce": 0.0075729843229055405, + "loss_iou": 0.59375, + "loss_num": 0.04345703125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 186277472, + "step": 2784 + }, + { + "epoch": 0.3160283687943262, + "grad_norm": 22.724031448364258, + "learning_rate": 5e-05, + "loss": 1.3282, + "num_input_tokens_seen": 186343508, + "step": 2785 + }, + { + "epoch": 0.3160283687943262, + "loss": 1.2965610027313232, + "loss_ce": 0.005057059694081545, + "loss_iou": 0.56640625, + "loss_num": 0.0308837890625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 186343508, + "step": 2785 + }, + { + "epoch": 0.3161418439716312, + "grad_norm": 19.647634506225586, + "learning_rate": 5e-05, + "loss": 1.3908, + "num_input_tokens_seen": 186410512, + "step": 2786 + }, + { + "epoch": 0.3161418439716312, + "loss": 1.2565304040908813, + "loss_ce": 0.007415459956973791, + "loss_iou": 0.515625, + "loss_num": 0.043212890625, + "loss_xval": 1.25, + "num_input_tokens_seen": 186410512, + "step": 2786 + }, + { + "epoch": 0.3162553191489362, + "grad_norm": 17.820159912109375, + "learning_rate": 5e-05, + "loss": 1.2755, + "num_input_tokens_seen": 186477700, + "step": 2787 + }, + { + "epoch": 0.3162553191489362, + "loss": 1.3094217777252197, + "loss_ce": 0.007175619713962078, + "loss_iou": 0.546875, + "loss_num": 0.042236328125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 186477700, + "step": 2787 + }, + { + "epoch": 0.31636879432624115, + "grad_norm": 41.289730072021484, + "learning_rate": 5e-05, + "loss": 1.2626, + "num_input_tokens_seen": 186545664, + "step": 2788 + }, + { + "epoch": 0.31636879432624115, + "loss": 1.2292009592056274, + "loss_ce": 0.0045916251838207245, + "loss_iou": 0.494140625, + "loss_num": 0.04736328125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 186545664, + "step": 2788 + }, + { + "epoch": 0.3164822695035461, + "grad_norm": 25.116165161132812, + "learning_rate": 5e-05, + "loss": 1.2763, + "num_input_tokens_seen": 186612480, + "step": 2789 + }, + { + "epoch": 0.3164822695035461, + "loss": 1.34181809425354, + "loss_ce": 0.007345365360379219, + "loss_iou": 0.55859375, + "loss_num": 0.04296875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 186612480, + "step": 2789 + }, + { + "epoch": 0.31659574468085105, + "grad_norm": 38.95749282836914, + "learning_rate": 5e-05, + "loss": 1.4618, + "num_input_tokens_seen": 186679808, + "step": 2790 + }, + { + "epoch": 0.31659574468085105, + "loss": 1.3329997062683105, + "loss_ce": 0.005362980999052525, + "loss_iou": 0.5546875, + "loss_num": 0.042724609375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 186679808, + "step": 2790 + }, + { + "epoch": 0.31670921985815603, + "grad_norm": 42.4193115234375, + "learning_rate": 5e-05, + "loss": 1.3163, + "num_input_tokens_seen": 186746504, + "step": 2791 + }, + { + "epoch": 0.31670921985815603, + "loss": 1.3669664859771729, + "loss_ce": 0.006614913232624531, + "loss_iou": 0.58984375, + "loss_num": 0.03662109375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 186746504, + "step": 2791 + }, + { + "epoch": 0.316822695035461, + "grad_norm": 28.00661849975586, + "learning_rate": 5e-05, + "loss": 1.5508, + "num_input_tokens_seen": 186813024, + "step": 2792 + }, + { + "epoch": 0.316822695035461, + "loss": 1.6918871402740479, + "loss_ce": 0.005363769363611937, + "loss_iou": 0.6796875, + "loss_num": 0.0654296875, + "loss_xval": 1.6875, + "num_input_tokens_seen": 186813024, + "step": 2792 + }, + { + "epoch": 0.31693617021276593, + "grad_norm": 27.59627342224121, + "learning_rate": 5e-05, + "loss": 1.1597, + "num_input_tokens_seen": 186879648, + "step": 2793 + }, + { + "epoch": 0.31693617021276593, + "loss": 1.2593438625335693, + "loss_ce": 0.00922183133661747, + "loss_iou": 0.50390625, + "loss_num": 0.048583984375, + "loss_xval": 1.25, + "num_input_tokens_seen": 186879648, + "step": 2793 + }, + { + "epoch": 0.3170496453900709, + "grad_norm": 36.19554138183594, + "learning_rate": 5e-05, + "loss": 1.3162, + "num_input_tokens_seen": 186946964, + "step": 2794 + }, + { + "epoch": 0.3170496453900709, + "loss": 1.3339557647705078, + "loss_ce": 0.004854260012507439, + "loss_iou": 0.578125, + "loss_num": 0.03564453125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 186946964, + "step": 2794 + }, + { + "epoch": 0.3171631205673759, + "grad_norm": 59.58390808105469, + "learning_rate": 5e-05, + "loss": 1.5263, + "num_input_tokens_seen": 187013916, + "step": 2795 + }, + { + "epoch": 0.3171631205673759, + "loss": 1.4905167818069458, + "loss_ce": 0.006141710095107555, + "loss_iou": 0.66015625, + "loss_num": 0.033203125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 187013916, + "step": 2795 + }, + { + "epoch": 0.31727659574468087, + "grad_norm": 23.47439193725586, + "learning_rate": 5e-05, + "loss": 1.2297, + "num_input_tokens_seen": 187080284, + "step": 2796 + }, + { + "epoch": 0.31727659574468087, + "loss": 0.9220765233039856, + "loss_ce": 0.005084313452243805, + "loss_iou": 0.388671875, + "loss_num": 0.02783203125, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 187080284, + "step": 2796 + }, + { + "epoch": 0.3173900709219858, + "grad_norm": 25.553823471069336, + "learning_rate": 5e-05, + "loss": 1.2783, + "num_input_tokens_seen": 187145932, + "step": 2797 + }, + { + "epoch": 0.3173900709219858, + "loss": 1.5812273025512695, + "loss_ce": 0.007008594460785389, + "loss_iou": 0.6640625, + "loss_num": 0.0498046875, + "loss_xval": 1.578125, + "num_input_tokens_seen": 187145932, + "step": 2797 + }, + { + "epoch": 0.31750354609929077, + "grad_norm": 33.85667419433594, + "learning_rate": 5e-05, + "loss": 1.22, + "num_input_tokens_seen": 187211908, + "step": 2798 + }, + { + "epoch": 0.31750354609929077, + "loss": 1.1177805662155151, + "loss_ce": 0.0052317408844828606, + "loss_iou": 0.458984375, + "loss_num": 0.038818359375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 187211908, + "step": 2798 + }, + { + "epoch": 0.31761702127659575, + "grad_norm": 27.14866828918457, + "learning_rate": 5e-05, + "loss": 1.4002, + "num_input_tokens_seen": 187278684, + "step": 2799 + }, + { + "epoch": 0.31761702127659575, + "loss": 1.6964250802993774, + "loss_ce": 0.005995411425828934, + "loss_iou": 0.71484375, + "loss_num": 0.05224609375, + "loss_xval": 1.6875, + "num_input_tokens_seen": 187278684, + "step": 2799 + }, + { + "epoch": 0.3177304964539007, + "grad_norm": 15.786540985107422, + "learning_rate": 5e-05, + "loss": 1.1786, + "num_input_tokens_seen": 187344516, + "step": 2800 + }, + { + "epoch": 0.3177304964539007, + "loss": 1.1190354824066162, + "loss_ce": 0.004777653142809868, + "loss_iou": 0.408203125, + "loss_num": 0.06005859375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 187344516, + "step": 2800 + }, + { + "epoch": 0.31784397163120565, + "grad_norm": 26.795000076293945, + "learning_rate": 5e-05, + "loss": 1.4248, + "num_input_tokens_seen": 187412188, + "step": 2801 + }, + { + "epoch": 0.31784397163120565, + "loss": 1.5255917310714722, + "loss_ce": 0.00947847031056881, + "loss_iou": 0.6015625, + "loss_num": 0.06298828125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 187412188, + "step": 2801 + }, + { + "epoch": 0.3179574468085106, + "grad_norm": 34.37614440917969, + "learning_rate": 5e-05, + "loss": 1.3812, + "num_input_tokens_seen": 187479180, + "step": 2802 + }, + { + "epoch": 0.3179574468085106, + "loss": 1.3097538948059082, + "loss_ce": 0.007996013388037682, + "loss_iou": 0.54296875, + "loss_num": 0.043701171875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 187479180, + "step": 2802 + }, + { + "epoch": 0.3180709219858156, + "grad_norm": 24.46014404296875, + "learning_rate": 5e-05, + "loss": 1.2768, + "num_input_tokens_seen": 187546368, + "step": 2803 + }, + { + "epoch": 0.3180709219858156, + "loss": 1.3269680738449097, + "loss_ce": 0.005679001100361347, + "loss_iou": 0.5703125, + "loss_num": 0.0361328125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 187546368, + "step": 2803 + }, + { + "epoch": 0.3181843971631206, + "grad_norm": 21.585012435913086, + "learning_rate": 5e-05, + "loss": 1.1386, + "num_input_tokens_seen": 187612760, + "step": 2804 + }, + { + "epoch": 0.3181843971631206, + "loss": 1.0974717140197754, + "loss_ce": 0.009962518699467182, + "loss_iou": 0.423828125, + "loss_num": 0.047607421875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 187612760, + "step": 2804 + }, + { + "epoch": 0.31829787234042556, + "grad_norm": 22.291311264038086, + "learning_rate": 5e-05, + "loss": 1.2242, + "num_input_tokens_seen": 187679340, + "step": 2805 + }, + { + "epoch": 0.31829787234042556, + "loss": 1.1330628395080566, + "loss_ce": 0.005133150611072779, + "loss_iou": 0.466796875, + "loss_num": 0.038818359375, + "loss_xval": 1.125, + "num_input_tokens_seen": 187679340, + "step": 2805 + }, + { + "epoch": 0.3184113475177305, + "grad_norm": 28.752452850341797, + "learning_rate": 5e-05, + "loss": 1.4124, + "num_input_tokens_seen": 187746264, + "step": 2806 + }, + { + "epoch": 0.3184113475177305, + "loss": 1.3888781070709229, + "loss_ce": 0.0046007512137293816, + "loss_iou": 0.56640625, + "loss_num": 0.05078125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 187746264, + "step": 2806 + }, + { + "epoch": 0.31852482269503546, + "grad_norm": 26.721607208251953, + "learning_rate": 5e-05, + "loss": 1.4274, + "num_input_tokens_seen": 187812900, + "step": 2807 + }, + { + "epoch": 0.31852482269503546, + "loss": 1.4106378555297852, + "loss_ce": 0.006829267833381891, + "loss_iou": 0.57421875, + "loss_num": 0.050537109375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 187812900, + "step": 2807 + }, + { + "epoch": 0.31863829787234044, + "grad_norm": 28.64656639099121, + "learning_rate": 5e-05, + "loss": 1.2417, + "num_input_tokens_seen": 187878916, + "step": 2808 + }, + { + "epoch": 0.31863829787234044, + "loss": 1.4687070846557617, + "loss_ce": 0.0038632857613265514, + "loss_iou": 0.58203125, + "loss_num": 0.059814453125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 187878916, + "step": 2808 + }, + { + "epoch": 0.3187517730496454, + "grad_norm": 25.16413116455078, + "learning_rate": 5e-05, + "loss": 1.2131, + "num_input_tokens_seen": 187943668, + "step": 2809 + }, + { + "epoch": 0.3187517730496454, + "loss": 1.072630524635315, + "loss_ce": 0.0014635459519922733, + "loss_iou": 0.453125, + "loss_num": 0.03271484375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 187943668, + "step": 2809 + }, + { + "epoch": 0.31886524822695034, + "grad_norm": 34.453372955322266, + "learning_rate": 5e-05, + "loss": 1.3408, + "num_input_tokens_seen": 188010220, + "step": 2810 + }, + { + "epoch": 0.31886524822695034, + "loss": 1.4093334674835205, + "loss_ce": 0.006501451134681702, + "loss_iou": 0.546875, + "loss_num": 0.062255859375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 188010220, + "step": 2810 + }, + { + "epoch": 0.3189787234042553, + "grad_norm": 31.081159591674805, + "learning_rate": 5e-05, + "loss": 1.4122, + "num_input_tokens_seen": 188077944, + "step": 2811 + }, + { + "epoch": 0.3189787234042553, + "loss": 1.34330415725708, + "loss_ce": 0.005901921074837446, + "loss_iou": 0.5703125, + "loss_num": 0.038818359375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 188077944, + "step": 2811 + }, + { + "epoch": 0.3190921985815603, + "grad_norm": 28.178783416748047, + "learning_rate": 5e-05, + "loss": 1.2483, + "num_input_tokens_seen": 188144440, + "step": 2812 + }, + { + "epoch": 0.3190921985815603, + "loss": 1.298952341079712, + "loss_ce": 0.006227695383131504, + "loss_iou": 0.55078125, + "loss_num": 0.03759765625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 188144440, + "step": 2812 + }, + { + "epoch": 0.31920567375886527, + "grad_norm": 23.917713165283203, + "learning_rate": 5e-05, + "loss": 1.3437, + "num_input_tokens_seen": 188211680, + "step": 2813 + }, + { + "epoch": 0.31920567375886527, + "loss": 1.3853511810302734, + "loss_ce": 0.004491786472499371, + "loss_iou": 0.5859375, + "loss_num": 0.04150390625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 188211680, + "step": 2813 + }, + { + "epoch": 0.3193191489361702, + "grad_norm": 22.89446258544922, + "learning_rate": 5e-05, + "loss": 1.1725, + "num_input_tokens_seen": 188278988, + "step": 2814 + }, + { + "epoch": 0.3193191489361702, + "loss": 1.2154731750488281, + "loss_ce": 0.004535617306828499, + "loss_iou": 0.49609375, + "loss_num": 0.0439453125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 188278988, + "step": 2814 + }, + { + "epoch": 0.31943262411347517, + "grad_norm": 28.480640411376953, + "learning_rate": 5e-05, + "loss": 1.4724, + "num_input_tokens_seen": 188347368, + "step": 2815 + }, + { + "epoch": 0.31943262411347517, + "loss": 1.303206205368042, + "loss_ce": 0.0029132040217518806, + "loss_iou": 0.55078125, + "loss_num": 0.03955078125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 188347368, + "step": 2815 + }, + { + "epoch": 0.31954609929078015, + "grad_norm": 27.247724533081055, + "learning_rate": 5e-05, + "loss": 1.478, + "num_input_tokens_seen": 188414308, + "step": 2816 + }, + { + "epoch": 0.31954609929078015, + "loss": 1.5388522148132324, + "loss_ce": 0.007113860920071602, + "loss_iou": 0.6171875, + "loss_num": 0.060302734375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 188414308, + "step": 2816 + }, + { + "epoch": 0.31965957446808513, + "grad_norm": 24.876951217651367, + "learning_rate": 5e-05, + "loss": 1.2367, + "num_input_tokens_seen": 188481292, + "step": 2817 + }, + { + "epoch": 0.31965957446808513, + "loss": 1.0301989316940308, + "loss_ce": 0.01091180182993412, + "loss_iou": 0.443359375, + "loss_num": 0.026611328125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 188481292, + "step": 2817 + }, + { + "epoch": 0.31977304964539005, + "grad_norm": 30.703243255615234, + "learning_rate": 5e-05, + "loss": 1.3714, + "num_input_tokens_seen": 188548300, + "step": 2818 + }, + { + "epoch": 0.31977304964539005, + "loss": 1.353420376777649, + "loss_ce": 0.003811019007116556, + "loss_iou": 0.58203125, + "loss_num": 0.037353515625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 188548300, + "step": 2818 + }, + { + "epoch": 0.31988652482269503, + "grad_norm": 21.512460708618164, + "learning_rate": 5e-05, + "loss": 1.2174, + "num_input_tokens_seen": 188615268, + "step": 2819 + }, + { + "epoch": 0.31988652482269503, + "loss": 1.351033329963684, + "loss_ce": 0.0028887931257486343, + "loss_iou": 0.54296875, + "loss_num": 0.052490234375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 188615268, + "step": 2819 + }, + { + "epoch": 0.32, + "grad_norm": 10.335382461547852, + "learning_rate": 5e-05, + "loss": 1.2152, + "num_input_tokens_seen": 188683024, + "step": 2820 + }, + { + "epoch": 0.32, + "loss": 1.0872602462768555, + "loss_ce": 0.006205505691468716, + "loss_iou": 0.4375, + "loss_num": 0.04150390625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 188683024, + "step": 2820 + }, + { + "epoch": 0.320113475177305, + "grad_norm": 12.382668495178223, + "learning_rate": 5e-05, + "loss": 1.0956, + "num_input_tokens_seen": 188750344, + "step": 2821 + }, + { + "epoch": 0.320113475177305, + "loss": 1.161231279373169, + "loss_ce": 0.006446193438023329, + "loss_iou": 0.486328125, + "loss_num": 0.036376953125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 188750344, + "step": 2821 + }, + { + "epoch": 0.3202269503546099, + "grad_norm": 21.34845542907715, + "learning_rate": 5e-05, + "loss": 1.2485, + "num_input_tokens_seen": 188817028, + "step": 2822 + }, + { + "epoch": 0.3202269503546099, + "loss": 1.1792786121368408, + "loss_ce": 0.006915337406098843, + "loss_iou": 0.47265625, + "loss_num": 0.045166015625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 188817028, + "step": 2822 + }, + { + "epoch": 0.3203404255319149, + "grad_norm": 25.250255584716797, + "learning_rate": 5e-05, + "loss": 1.1039, + "num_input_tokens_seen": 188883924, + "step": 2823 + }, + { + "epoch": 0.3203404255319149, + "loss": 1.0116097927093506, + "loss_ce": 0.003308966290205717, + "loss_iou": 0.44140625, + "loss_num": 0.024658203125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 188883924, + "step": 2823 + }, + { + "epoch": 0.32045390070921986, + "grad_norm": 40.61396026611328, + "learning_rate": 5e-05, + "loss": 1.4404, + "num_input_tokens_seen": 188950932, + "step": 2824 + }, + { + "epoch": 0.32045390070921986, + "loss": 1.443070411682129, + "loss_ce": 0.008500168099999428, + "loss_iou": 0.5703125, + "loss_num": 0.05810546875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 188950932, + "step": 2824 + }, + { + "epoch": 0.32056737588652484, + "grad_norm": 29.839601516723633, + "learning_rate": 5e-05, + "loss": 1.4938, + "num_input_tokens_seen": 189017624, + "step": 2825 + }, + { + "epoch": 0.32056737588652484, + "loss": 1.3769413232803345, + "loss_ce": 0.010730369947850704, + "loss_iou": 0.59765625, + "loss_num": 0.033447265625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 189017624, + "step": 2825 + }, + { + "epoch": 0.32068085106382976, + "grad_norm": 33.388938903808594, + "learning_rate": 5e-05, + "loss": 1.2617, + "num_input_tokens_seen": 189084412, + "step": 2826 + }, + { + "epoch": 0.32068085106382976, + "loss": 1.2468980550765991, + "loss_ce": 0.002269106451421976, + "loss_iou": 0.50390625, + "loss_num": 0.048095703125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 189084412, + "step": 2826 + }, + { + "epoch": 0.32079432624113474, + "grad_norm": 62.28837966918945, + "learning_rate": 5e-05, + "loss": 1.2904, + "num_input_tokens_seen": 189151416, + "step": 2827 + }, + { + "epoch": 0.32079432624113474, + "loss": 1.2663966417312622, + "loss_ce": 0.01200208067893982, + "loss_iou": 0.50390625, + "loss_num": 0.04931640625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 189151416, + "step": 2827 + }, + { + "epoch": 0.3209078014184397, + "grad_norm": 22.05023193359375, + "learning_rate": 5e-05, + "loss": 1.4041, + "num_input_tokens_seen": 189218240, + "step": 2828 + }, + { + "epoch": 0.3209078014184397, + "loss": 1.5145001411437988, + "loss_ce": 0.007664239965379238, + "loss_iou": 0.66015625, + "loss_num": 0.037353515625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 189218240, + "step": 2828 + }, + { + "epoch": 0.3210212765957447, + "grad_norm": 25.58769989013672, + "learning_rate": 5e-05, + "loss": 1.325, + "num_input_tokens_seen": 189285516, + "step": 2829 + }, + { + "epoch": 0.3210212765957447, + "loss": 1.3658146858215332, + "loss_ce": 0.004974832758307457, + "loss_iou": 0.52734375, + "loss_num": 0.060546875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 189285516, + "step": 2829 + }, + { + "epoch": 0.3211347517730496, + "grad_norm": 18.20060157775879, + "learning_rate": 5e-05, + "loss": 1.1368, + "num_input_tokens_seen": 189352396, + "step": 2830 + }, + { + "epoch": 0.3211347517730496, + "loss": 1.1956708431243896, + "loss_ce": 0.007194269448518753, + "loss_iou": 0.49609375, + "loss_num": 0.0390625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 189352396, + "step": 2830 + }, + { + "epoch": 0.3212482269503546, + "grad_norm": 19.56775665283203, + "learning_rate": 5e-05, + "loss": 1.1926, + "num_input_tokens_seen": 189419868, + "step": 2831 + }, + { + "epoch": 0.3212482269503546, + "loss": 1.194925308227539, + "loss_ce": 0.004983830265700817, + "loss_iou": 0.515625, + "loss_num": 0.031005859375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 189419868, + "step": 2831 + }, + { + "epoch": 0.3213617021276596, + "grad_norm": 53.732940673828125, + "learning_rate": 5e-05, + "loss": 1.3426, + "num_input_tokens_seen": 189486448, + "step": 2832 + }, + { + "epoch": 0.3213617021276596, + "loss": 1.3895074129104614, + "loss_ce": 0.008159801363945007, + "loss_iou": 0.5625, + "loss_num": 0.051513671875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 189486448, + "step": 2832 + }, + { + "epoch": 0.32147517730496455, + "grad_norm": 25.320329666137695, + "learning_rate": 5e-05, + "loss": 1.5449, + "num_input_tokens_seen": 189553180, + "step": 2833 + }, + { + "epoch": 0.32147517730496455, + "loss": 1.5195848941802979, + "loss_ce": 0.005913025699555874, + "loss_iou": 0.65234375, + "loss_num": 0.042724609375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 189553180, + "step": 2833 + }, + { + "epoch": 0.3215886524822695, + "grad_norm": 27.116539001464844, + "learning_rate": 5e-05, + "loss": 1.3968, + "num_input_tokens_seen": 189621764, + "step": 2834 + }, + { + "epoch": 0.3215886524822695, + "loss": 1.3950059413909912, + "loss_ce": 0.0029161362908780575, + "loss_iou": 0.56640625, + "loss_num": 0.05126953125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 189621764, + "step": 2834 + }, + { + "epoch": 0.32170212765957445, + "grad_norm": 27.843162536621094, + "learning_rate": 5e-05, + "loss": 1.3512, + "num_input_tokens_seen": 189688368, + "step": 2835 + }, + { + "epoch": 0.32170212765957445, + "loss": 1.3436408042907715, + "loss_ce": 0.0018438664264976978, + "loss_iou": 0.53125, + "loss_num": 0.054931640625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 189688368, + "step": 2835 + }, + { + "epoch": 0.32181560283687943, + "grad_norm": 23.130207061767578, + "learning_rate": 5e-05, + "loss": 1.2948, + "num_input_tokens_seen": 189754764, + "step": 2836 + }, + { + "epoch": 0.32181560283687943, + "loss": 1.0104832649230957, + "loss_ce": 0.007126368582248688, + "loss_iou": 0.4140625, + "loss_num": 0.03466796875, + "loss_xval": 1.0, + "num_input_tokens_seen": 189754764, + "step": 2836 + }, + { + "epoch": 0.3219290780141844, + "grad_norm": 44.199920654296875, + "learning_rate": 5e-05, + "loss": 1.3729, + "num_input_tokens_seen": 189821812, + "step": 2837 + }, + { + "epoch": 0.3219290780141844, + "loss": 1.4341508150100708, + "loss_ce": 0.008369633927941322, + "loss_iou": 0.5859375, + "loss_num": 0.05126953125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 189821812, + "step": 2837 + }, + { + "epoch": 0.3220425531914894, + "grad_norm": 20.39291763305664, + "learning_rate": 5e-05, + "loss": 1.4607, + "num_input_tokens_seen": 189888392, + "step": 2838 + }, + { + "epoch": 0.3220425531914894, + "loss": 1.532233476638794, + "loss_ce": 0.005866289604455233, + "loss_iou": 0.6875, + "loss_num": 0.0303955078125, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 189888392, + "step": 2838 + }, + { + "epoch": 0.3221560283687943, + "grad_norm": 62.35999298095703, + "learning_rate": 5e-05, + "loss": 1.1175, + "num_input_tokens_seen": 189955020, + "step": 2839 + }, + { + "epoch": 0.3221560283687943, + "loss": 1.22159743309021, + "loss_ce": 0.0033357858192175627, + "loss_iou": 0.52734375, + "loss_num": 0.032470703125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 189955020, + "step": 2839 + }, + { + "epoch": 0.3222695035460993, + "grad_norm": 41.00619888305664, + "learning_rate": 5e-05, + "loss": 1.1912, + "num_input_tokens_seen": 190021600, + "step": 2840 + }, + { + "epoch": 0.3222695035460993, + "loss": 1.1548521518707275, + "loss_ce": 0.004217399284243584, + "loss_iou": 0.462890625, + "loss_num": 0.045166015625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 190021600, + "step": 2840 + }, + { + "epoch": 0.32238297872340427, + "grad_norm": 27.76328468322754, + "learning_rate": 5e-05, + "loss": 1.3142, + "num_input_tokens_seen": 190088100, + "step": 2841 + }, + { + "epoch": 0.32238297872340427, + "loss": 1.3404709100723267, + "loss_ce": 0.005998223554342985, + "loss_iou": 0.5625, + "loss_num": 0.04150390625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 190088100, + "step": 2841 + }, + { + "epoch": 0.32249645390070925, + "grad_norm": 15.0712308883667, + "learning_rate": 5e-05, + "loss": 1.2016, + "num_input_tokens_seen": 190154984, + "step": 2842 + }, + { + "epoch": 0.32249645390070925, + "loss": 1.1608872413635254, + "loss_ce": 0.006590479519218206, + "loss_iou": 0.47265625, + "loss_num": 0.0419921875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 190154984, + "step": 2842 + }, + { + "epoch": 0.32260992907801417, + "grad_norm": 16.673120498657227, + "learning_rate": 5e-05, + "loss": 1.0501, + "num_input_tokens_seen": 190221064, + "step": 2843 + }, + { + "epoch": 0.32260992907801417, + "loss": 0.972770094871521, + "loss_ce": 0.006888701114803553, + "loss_iou": 0.384765625, + "loss_num": 0.039306640625, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 190221064, + "step": 2843 + }, + { + "epoch": 0.32272340425531915, + "grad_norm": 18.710037231445312, + "learning_rate": 5e-05, + "loss": 1.1828, + "num_input_tokens_seen": 190287668, + "step": 2844 + }, + { + "epoch": 0.32272340425531915, + "loss": 1.1344103813171387, + "loss_ce": 0.00355107756331563, + "loss_iou": 0.51171875, + "loss_num": 0.0220947265625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 190287668, + "step": 2844 + }, + { + "epoch": 0.3228368794326241, + "grad_norm": 16.703147888183594, + "learning_rate": 5e-05, + "loss": 1.2022, + "num_input_tokens_seen": 190353736, + "step": 2845 + }, + { + "epoch": 0.3228368794326241, + "loss": 1.2057287693023682, + "loss_ce": 0.0021155807189643383, + "loss_iou": 0.5, + "loss_num": 0.0400390625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 190353736, + "step": 2845 + }, + { + "epoch": 0.3229503546099291, + "grad_norm": 21.826091766357422, + "learning_rate": 5e-05, + "loss": 1.241, + "num_input_tokens_seen": 190420392, + "step": 2846 + }, + { + "epoch": 0.3229503546099291, + "loss": 1.2529124021530151, + "loss_ce": 0.004865502007305622, + "loss_iou": 0.5078125, + "loss_num": 0.046630859375, + "loss_xval": 1.25, + "num_input_tokens_seen": 190420392, + "step": 2846 + }, + { + "epoch": 0.323063829787234, + "grad_norm": 30.11870765686035, + "learning_rate": 5e-05, + "loss": 1.4168, + "num_input_tokens_seen": 190488168, + "step": 2847 + }, + { + "epoch": 0.323063829787234, + "loss": 1.310617208480835, + "loss_ce": 0.004953229799866676, + "loss_iou": 0.55078125, + "loss_num": 0.041015625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 190488168, + "step": 2847 + }, + { + "epoch": 0.323177304964539, + "grad_norm": 21.68326759338379, + "learning_rate": 5e-05, + "loss": 1.4519, + "num_input_tokens_seen": 190555228, + "step": 2848 + }, + { + "epoch": 0.323177304964539, + "loss": 1.4783198833465576, + "loss_ce": 0.0027339854277670383, + "loss_iou": 0.64453125, + "loss_num": 0.03759765625, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 190555228, + "step": 2848 + }, + { + "epoch": 0.323290780141844, + "grad_norm": 77.71858215332031, + "learning_rate": 5e-05, + "loss": 1.0838, + "num_input_tokens_seen": 190621544, + "step": 2849 + }, + { + "epoch": 0.323290780141844, + "loss": 0.8726780414581299, + "loss_ce": 0.004269851371645927, + "loss_iou": 0.365234375, + "loss_num": 0.027587890625, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 190621544, + "step": 2849 + }, + { + "epoch": 0.32340425531914896, + "grad_norm": 24.25518226623535, + "learning_rate": 5e-05, + "loss": 1.1117, + "num_input_tokens_seen": 190688872, + "step": 2850 + }, + { + "epoch": 0.32340425531914896, + "loss": 1.0525187253952026, + "loss_ce": 0.005155495833605528, + "loss_iou": 0.46875, + "loss_num": 0.022216796875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 190688872, + "step": 2850 + }, + { + "epoch": 0.3235177304964539, + "grad_norm": 21.754201889038086, + "learning_rate": 5e-05, + "loss": 1.294, + "num_input_tokens_seen": 190755620, + "step": 2851 + }, + { + "epoch": 0.3235177304964539, + "loss": 1.2672417163848877, + "loss_ce": 0.005034707486629486, + "loss_iou": 0.52734375, + "loss_num": 0.041015625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 190755620, + "step": 2851 + }, + { + "epoch": 0.32363120567375886, + "grad_norm": 42.83580780029297, + "learning_rate": 5e-05, + "loss": 1.3932, + "num_input_tokens_seen": 190822560, + "step": 2852 + }, + { + "epoch": 0.32363120567375886, + "loss": 1.4311857223510742, + "loss_ce": 0.005404467228800058, + "loss_iou": 0.6015625, + "loss_num": 0.0439453125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 190822560, + "step": 2852 + }, + { + "epoch": 0.32374468085106384, + "grad_norm": 29.857940673828125, + "learning_rate": 5e-05, + "loss": 1.4839, + "num_input_tokens_seen": 190889424, + "step": 2853 + }, + { + "epoch": 0.32374468085106384, + "loss": 1.3589749336242676, + "loss_ce": 0.005459372419863939, + "loss_iou": 0.6015625, + "loss_num": 0.030517578125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 190889424, + "step": 2853 + }, + { + "epoch": 0.3238581560283688, + "grad_norm": 20.613906860351562, + "learning_rate": 5e-05, + "loss": 1.1821, + "num_input_tokens_seen": 190956464, + "step": 2854 + }, + { + "epoch": 0.3238581560283688, + "loss": 1.3929660320281982, + "loss_ce": 0.012106719426810741, + "loss_iou": 0.57421875, + "loss_num": 0.047119140625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 190956464, + "step": 2854 + }, + { + "epoch": 0.32397163120567374, + "grad_norm": 21.88469886779785, + "learning_rate": 5e-05, + "loss": 1.1943, + "num_input_tokens_seen": 191023600, + "step": 2855 + }, + { + "epoch": 0.32397163120567374, + "loss": 1.116499423980713, + "loss_ce": 0.0066362107172608376, + "loss_iou": 0.435546875, + "loss_num": 0.0478515625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 191023600, + "step": 2855 + }, + { + "epoch": 0.3240851063829787, + "grad_norm": 52.06282043457031, + "learning_rate": 5e-05, + "loss": 1.3215, + "num_input_tokens_seen": 191090344, + "step": 2856 + }, + { + "epoch": 0.3240851063829787, + "loss": 1.4861388206481934, + "loss_ce": 0.008599778637290001, + "loss_iou": 0.61328125, + "loss_num": 0.050048828125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 191090344, + "step": 2856 + }, + { + "epoch": 0.3241985815602837, + "grad_norm": 40.4210090637207, + "learning_rate": 5e-05, + "loss": 1.3243, + "num_input_tokens_seen": 191157612, + "step": 2857 + }, + { + "epoch": 0.3241985815602837, + "loss": 1.2165343761444092, + "loss_ce": 0.004131997469812632, + "loss_iou": 0.53125, + "loss_num": 0.0306396484375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 191157612, + "step": 2857 + }, + { + "epoch": 0.32431205673758867, + "grad_norm": 28.11907386779785, + "learning_rate": 5e-05, + "loss": 1.4335, + "num_input_tokens_seen": 191224544, + "step": 2858 + }, + { + "epoch": 0.32431205673758867, + "loss": 1.1938186883926392, + "loss_ce": 0.003877216950058937, + "loss_iou": 0.51953125, + "loss_num": 0.030029296875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 191224544, + "step": 2858 + }, + { + "epoch": 0.3244255319148936, + "grad_norm": 22.686962127685547, + "learning_rate": 5e-05, + "loss": 1.3014, + "num_input_tokens_seen": 191292104, + "step": 2859 + }, + { + "epoch": 0.3244255319148936, + "loss": 1.1972410678863525, + "loss_ce": 0.004369907081127167, + "loss_iou": 0.53125, + "loss_num": 0.026611328125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 191292104, + "step": 2859 + }, + { + "epoch": 0.3245390070921986, + "grad_norm": 17.33407211303711, + "learning_rate": 5e-05, + "loss": 1.033, + "num_input_tokens_seen": 191359036, + "step": 2860 + }, + { + "epoch": 0.3245390070921986, + "loss": 1.263730525970459, + "loss_ce": 0.0025000653695315123, + "loss_iou": 0.546875, + "loss_num": 0.0341796875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 191359036, + "step": 2860 + }, + { + "epoch": 0.32465248226950355, + "grad_norm": 21.780736923217773, + "learning_rate": 5e-05, + "loss": 1.2187, + "num_input_tokens_seen": 191426008, + "step": 2861 + }, + { + "epoch": 0.32465248226950355, + "loss": 1.0121731758117676, + "loss_ce": 0.00533723272383213, + "loss_iou": 0.41015625, + "loss_num": 0.037841796875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 191426008, + "step": 2861 + }, + { + "epoch": 0.32476595744680853, + "grad_norm": 39.580074310302734, + "learning_rate": 5e-05, + "loss": 1.2972, + "num_input_tokens_seen": 191493540, + "step": 2862 + }, + { + "epoch": 0.32476595744680853, + "loss": 1.4486923217773438, + "loss_ce": 0.004356399178504944, + "loss_iou": 0.6015625, + "loss_num": 0.0478515625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 191493540, + "step": 2862 + }, + { + "epoch": 0.32487943262411345, + "grad_norm": 102.66207122802734, + "learning_rate": 5e-05, + "loss": 1.3193, + "num_input_tokens_seen": 191560132, + "step": 2863 + }, + { + "epoch": 0.32487943262411345, + "loss": 1.3658208847045898, + "loss_ce": 0.007788702845573425, + "loss_iou": 0.5703125, + "loss_num": 0.042724609375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 191560132, + "step": 2863 + }, + { + "epoch": 0.32499290780141843, + "grad_norm": 23.96129608154297, + "learning_rate": 5e-05, + "loss": 1.4057, + "num_input_tokens_seen": 191627196, + "step": 2864 + }, + { + "epoch": 0.32499290780141843, + "loss": 1.5980546474456787, + "loss_ce": 0.004304508678615093, + "loss_iou": 0.62890625, + "loss_num": 0.06787109375, + "loss_xval": 1.59375, + "num_input_tokens_seen": 191627196, + "step": 2864 + }, + { + "epoch": 0.3251063829787234, + "grad_norm": 40.71651077270508, + "learning_rate": 5e-05, + "loss": 1.3291, + "num_input_tokens_seen": 191693888, + "step": 2865 + }, + { + "epoch": 0.3251063829787234, + "loss": 1.4174612760543823, + "loss_ce": 0.008281620219349861, + "loss_iou": 0.55859375, + "loss_num": 0.058837890625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 191693888, + "step": 2865 + }, + { + "epoch": 0.3252198581560284, + "grad_norm": 26.273391723632812, + "learning_rate": 5e-05, + "loss": 1.2182, + "num_input_tokens_seen": 191761780, + "step": 2866 + }, + { + "epoch": 0.3252198581560284, + "loss": 1.2295634746551514, + "loss_ce": 0.006418989971280098, + "loss_iou": 0.5078125, + "loss_num": 0.04150390625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 191761780, + "step": 2866 + }, + { + "epoch": 0.3253333333333333, + "grad_norm": 18.44083595275879, + "learning_rate": 5e-05, + "loss": 1.1007, + "num_input_tokens_seen": 191829124, + "step": 2867 + }, + { + "epoch": 0.3253333333333333, + "loss": 1.119567632675171, + "loss_ce": 0.005798080936074257, + "loss_iou": 0.4375, + "loss_num": 0.047119140625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 191829124, + "step": 2867 + }, + { + "epoch": 0.3254468085106383, + "grad_norm": 25.953956604003906, + "learning_rate": 5e-05, + "loss": 1.366, + "num_input_tokens_seen": 191895916, + "step": 2868 + }, + { + "epoch": 0.3254468085106383, + "loss": 1.4674674272537231, + "loss_ce": 0.005065152887254953, + "loss_iou": 0.62890625, + "loss_num": 0.040283203125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 191895916, + "step": 2868 + }, + { + "epoch": 0.32556028368794326, + "grad_norm": 47.64985656738281, + "learning_rate": 5e-05, + "loss": 1.159, + "num_input_tokens_seen": 191961884, + "step": 2869 + }, + { + "epoch": 0.32556028368794326, + "loss": 1.2762014865875244, + "loss_ce": 0.004716997034847736, + "loss_iou": 0.515625, + "loss_num": 0.04736328125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 191961884, + "step": 2869 + }, + { + "epoch": 0.32567375886524824, + "grad_norm": 22.88501739501953, + "learning_rate": 5e-05, + "loss": 1.5705, + "num_input_tokens_seen": 192029584, + "step": 2870 + }, + { + "epoch": 0.32567375886524824, + "loss": 1.7030912637710571, + "loss_ce": 0.0028959051705896854, + "loss_iou": 0.69140625, + "loss_num": 0.0634765625, + "loss_xval": 1.703125, + "num_input_tokens_seen": 192029584, + "step": 2870 + }, + { + "epoch": 0.32578723404255316, + "grad_norm": 17.518789291381836, + "learning_rate": 5e-05, + "loss": 1.4154, + "num_input_tokens_seen": 192097016, + "step": 2871 + }, + { + "epoch": 0.32578723404255316, + "loss": 1.1595032215118408, + "loss_ce": 0.00520634138956666, + "loss_iou": 0.5234375, + "loss_num": 0.022216796875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 192097016, + "step": 2871 + }, + { + "epoch": 0.32590070921985814, + "grad_norm": 19.84493637084961, + "learning_rate": 5e-05, + "loss": 1.3156, + "num_input_tokens_seen": 192163940, + "step": 2872 + }, + { + "epoch": 0.32590070921985814, + "loss": 1.2157235145568848, + "loss_ce": 0.006495061796158552, + "loss_iou": 0.5078125, + "loss_num": 0.038330078125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 192163940, + "step": 2872 + }, + { + "epoch": 0.3260141843971631, + "grad_norm": 18.32696533203125, + "learning_rate": 5e-05, + "loss": 0.9946, + "num_input_tokens_seen": 192230800, + "step": 2873 + }, + { + "epoch": 0.3260141843971631, + "loss": 1.3284823894500732, + "loss_ce": 0.008169911801815033, + "loss_iou": 0.53125, + "loss_num": 0.05126953125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 192230800, + "step": 2873 + }, + { + "epoch": 0.3261276595744681, + "grad_norm": 23.47850227355957, + "learning_rate": 5e-05, + "loss": 1.1675, + "num_input_tokens_seen": 192298324, + "step": 2874 + }, + { + "epoch": 0.3261276595744681, + "loss": 1.1586813926696777, + "loss_ce": 0.009755558334290981, + "loss_iou": 0.470703125, + "loss_num": 0.041748046875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 192298324, + "step": 2874 + }, + { + "epoch": 0.3262411347517731, + "grad_norm": 25.37845802307129, + "learning_rate": 5e-05, + "loss": 1.2966, + "num_input_tokens_seen": 192365316, + "step": 2875 + }, + { + "epoch": 0.3262411347517731, + "loss": 1.3379887342453003, + "loss_ce": 0.0010746889747679234, + "loss_iou": 0.546875, + "loss_num": 0.048583984375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 192365316, + "step": 2875 + }, + { + "epoch": 0.326354609929078, + "grad_norm": 40.75306701660156, + "learning_rate": 5e-05, + "loss": 1.0707, + "num_input_tokens_seen": 192432000, + "step": 2876 + }, + { + "epoch": 0.326354609929078, + "loss": 0.9090614318847656, + "loss_ce": 0.004276266787201166, + "loss_iou": 0.39453125, + "loss_num": 0.0230712890625, + "loss_xval": 0.90625, + "num_input_tokens_seen": 192432000, + "step": 2876 + }, + { + "epoch": 0.326468085106383, + "grad_norm": 28.498228073120117, + "learning_rate": 5e-05, + "loss": 1.4574, + "num_input_tokens_seen": 192499296, + "step": 2877 + }, + { + "epoch": 0.326468085106383, + "loss": 1.442228078842163, + "loss_ce": 0.0066812713630497456, + "loss_iou": 0.609375, + "loss_num": 0.0439453125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 192499296, + "step": 2877 + }, + { + "epoch": 0.32658156028368795, + "grad_norm": 19.989013671875, + "learning_rate": 5e-05, + "loss": 1.0435, + "num_input_tokens_seen": 192566964, + "step": 2878 + }, + { + "epoch": 0.32658156028368795, + "loss": 1.003804087638855, + "loss_ce": 0.007710401900112629, + "loss_iou": 0.423828125, + "loss_num": 0.0299072265625, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 192566964, + "step": 2878 + }, + { + "epoch": 0.32669503546099293, + "grad_norm": 31.408342361450195, + "learning_rate": 5e-05, + "loss": 1.2807, + "num_input_tokens_seen": 192633668, + "step": 2879 + }, + { + "epoch": 0.32669503546099293, + "loss": 1.3664028644561768, + "loss_ce": 0.0031215695198625326, + "loss_iou": 0.55859375, + "loss_num": 0.048828125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 192633668, + "step": 2879 + }, + { + "epoch": 0.32680851063829786, + "grad_norm": 43.19892120361328, + "learning_rate": 5e-05, + "loss": 1.4372, + "num_input_tokens_seen": 192700328, + "step": 2880 + }, + { + "epoch": 0.32680851063829786, + "loss": 1.45831298828125, + "loss_ce": 0.008117652498185635, + "loss_iou": 0.61328125, + "loss_num": 0.044921875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 192700328, + "step": 2880 + }, + { + "epoch": 0.32692198581560283, + "grad_norm": 25.95997428894043, + "learning_rate": 5e-05, + "loss": 1.3497, + "num_input_tokens_seen": 192767824, + "step": 2881 + }, + { + "epoch": 0.32692198581560283, + "loss": 1.3858790397644043, + "loss_ce": 0.010390791110694408, + "loss_iou": 0.62109375, + "loss_num": 0.0264892578125, + "loss_xval": 1.375, + "num_input_tokens_seen": 192767824, + "step": 2881 + }, + { + "epoch": 0.3270354609929078, + "grad_norm": 23.52306365966797, + "learning_rate": 5e-05, + "loss": 1.141, + "num_input_tokens_seen": 192834104, + "step": 2882 + }, + { + "epoch": 0.3270354609929078, + "loss": 1.0571138858795166, + "loss_ce": 0.0029756641015410423, + "loss_iou": 0.44140625, + "loss_num": 0.034423828125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 192834104, + "step": 2882 + }, + { + "epoch": 0.3271489361702128, + "grad_norm": 18.780502319335938, + "learning_rate": 5e-05, + "loss": 1.1553, + "num_input_tokens_seen": 192900780, + "step": 2883 + }, + { + "epoch": 0.3271489361702128, + "loss": 1.0176057815551758, + "loss_ce": 0.006863633636385202, + "loss_iou": 0.439453125, + "loss_num": 0.026611328125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 192900780, + "step": 2883 + }, + { + "epoch": 0.3272624113475177, + "grad_norm": 73.4733657836914, + "learning_rate": 5e-05, + "loss": 1.0789, + "num_input_tokens_seen": 192967356, + "step": 2884 + }, + { + "epoch": 0.3272624113475177, + "loss": 1.0302069187164307, + "loss_ce": 0.0067694466561079025, + "loss_iou": 0.42578125, + "loss_num": 0.0341796875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 192967356, + "step": 2884 + }, + { + "epoch": 0.3273758865248227, + "grad_norm": 30.799978256225586, + "learning_rate": 5e-05, + "loss": 1.3601, + "num_input_tokens_seen": 193033796, + "step": 2885 + }, + { + "epoch": 0.3273758865248227, + "loss": 1.4168224334716797, + "loss_ce": 0.004713076166808605, + "loss_iou": 0.6015625, + "loss_num": 0.041748046875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 193033796, + "step": 2885 + }, + { + "epoch": 0.32748936170212767, + "grad_norm": 27.406999588012695, + "learning_rate": 5e-05, + "loss": 1.0721, + "num_input_tokens_seen": 193100660, + "step": 2886 + }, + { + "epoch": 0.32748936170212767, + "loss": 1.0460987091064453, + "loss_ce": 0.00331312557682395, + "loss_iou": 0.412109375, + "loss_num": 0.04345703125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 193100660, + "step": 2886 + }, + { + "epoch": 0.32760283687943265, + "grad_norm": 29.913297653198242, + "learning_rate": 5e-05, + "loss": 1.2716, + "num_input_tokens_seen": 193167388, + "step": 2887 + }, + { + "epoch": 0.32760283687943265, + "loss": 1.3913217782974243, + "loss_ce": 0.007044398691505194, + "loss_iou": 0.5625, + "loss_num": 0.05126953125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 193167388, + "step": 2887 + }, + { + "epoch": 0.32771631205673757, + "grad_norm": 38.89368438720703, + "learning_rate": 5e-05, + "loss": 1.337, + "num_input_tokens_seen": 193234412, + "step": 2888 + }, + { + "epoch": 0.32771631205673757, + "loss": 1.5128462314605713, + "loss_ce": 0.006986937019973993, + "loss_iou": 0.6171875, + "loss_num": 0.054931640625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 193234412, + "step": 2888 + }, + { + "epoch": 0.32782978723404255, + "grad_norm": 25.122085571289062, + "learning_rate": 5e-05, + "loss": 1.3936, + "num_input_tokens_seen": 193301140, + "step": 2889 + }, + { + "epoch": 0.32782978723404255, + "loss": 1.5180200338363647, + "loss_ce": 0.009719288907945156, + "loss_iou": 0.57421875, + "loss_num": 0.07177734375, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 193301140, + "step": 2889 + }, + { + "epoch": 0.3279432624113475, + "grad_norm": 16.219755172729492, + "learning_rate": 5e-05, + "loss": 1.1875, + "num_input_tokens_seen": 193367492, + "step": 2890 + }, + { + "epoch": 0.3279432624113475, + "loss": 1.2945728302001953, + "loss_ce": 0.006975145079195499, + "loss_iou": 0.5390625, + "loss_num": 0.041259765625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 193367492, + "step": 2890 + }, + { + "epoch": 0.3280567375886525, + "grad_norm": 38.355506896972656, + "learning_rate": 5e-05, + "loss": 1.0343, + "num_input_tokens_seen": 193435180, + "step": 2891 + }, + { + "epoch": 0.3280567375886525, + "loss": 0.9322594404220581, + "loss_ce": 0.0055016642436385155, + "loss_iou": 0.396484375, + "loss_num": 0.02685546875, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 193435180, + "step": 2891 + }, + { + "epoch": 0.3281702127659574, + "grad_norm": 15.588542938232422, + "learning_rate": 5e-05, + "loss": 1.1882, + "num_input_tokens_seen": 193501536, + "step": 2892 + }, + { + "epoch": 0.3281702127659574, + "loss": 1.224928379058838, + "loss_ce": 0.00617837393656373, + "loss_iou": 0.5234375, + "loss_num": 0.03515625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 193501536, + "step": 2892 + }, + { + "epoch": 0.3282836879432624, + "grad_norm": 28.020357131958008, + "learning_rate": 5e-05, + "loss": 1.0653, + "num_input_tokens_seen": 193567916, + "step": 2893 + }, + { + "epoch": 0.3282836879432624, + "loss": 0.9787887334823608, + "loss_ce": 0.00759732723236084, + "loss_iou": 0.435546875, + "loss_num": 0.02001953125, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 193567916, + "step": 2893 + }, + { + "epoch": 0.3283971631205674, + "grad_norm": 39.91033172607422, + "learning_rate": 5e-05, + "loss": 1.1812, + "num_input_tokens_seen": 193634728, + "step": 2894 + }, + { + "epoch": 0.3283971631205674, + "loss": 1.1485424041748047, + "loss_ce": 0.010847192257642746, + "loss_iou": 0.48828125, + "loss_num": 0.03271484375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 193634728, + "step": 2894 + }, + { + "epoch": 0.32851063829787236, + "grad_norm": 26.073421478271484, + "learning_rate": 5e-05, + "loss": 1.4401, + "num_input_tokens_seen": 193701888, + "step": 2895 + }, + { + "epoch": 0.32851063829787236, + "loss": 1.5213230848312378, + "loss_ce": 0.008627786301076412, + "loss_iou": 0.6015625, + "loss_num": 0.0625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 193701888, + "step": 2895 + }, + { + "epoch": 0.3286241134751773, + "grad_norm": 15.285545349121094, + "learning_rate": 5e-05, + "loss": 1.1237, + "num_input_tokens_seen": 193769252, + "step": 2896 + }, + { + "epoch": 0.3286241134751773, + "loss": 1.154411792755127, + "loss_ce": 0.003532819449901581, + "loss_iou": 0.470703125, + "loss_num": 0.041259765625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 193769252, + "step": 2896 + }, + { + "epoch": 0.32873758865248226, + "grad_norm": 17.18181037902832, + "learning_rate": 5e-05, + "loss": 1.1154, + "num_input_tokens_seen": 193835856, + "step": 2897 + }, + { + "epoch": 0.32873758865248226, + "loss": 1.2365942001342773, + "loss_ce": 0.005148963071405888, + "loss_iou": 0.5234375, + "loss_num": 0.037353515625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 193835856, + "step": 2897 + }, + { + "epoch": 0.32885106382978724, + "grad_norm": 19.96324348449707, + "learning_rate": 5e-05, + "loss": 1.165, + "num_input_tokens_seen": 193903300, + "step": 2898 + }, + { + "epoch": 0.32885106382978724, + "loss": 1.346086025238037, + "loss_ce": 0.007218918763101101, + "loss_iou": 0.53125, + "loss_num": 0.0546875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 193903300, + "step": 2898 + }, + { + "epoch": 0.3289645390070922, + "grad_norm": 25.329389572143555, + "learning_rate": 5e-05, + "loss": 1.1482, + "num_input_tokens_seen": 193970144, + "step": 2899 + }, + { + "epoch": 0.3289645390070922, + "loss": 1.2340023517608643, + "loss_ce": 0.005975090898573399, + "loss_iou": 0.5, + "loss_num": 0.045166015625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 193970144, + "step": 2899 + }, + { + "epoch": 0.32907801418439714, + "grad_norm": 27.030820846557617, + "learning_rate": 5e-05, + "loss": 1.5361, + "num_input_tokens_seen": 194037932, + "step": 2900 + }, + { + "epoch": 0.32907801418439714, + "loss": 1.378434181213379, + "loss_ce": 0.007828710600733757, + "loss_iou": 0.609375, + "loss_num": 0.030517578125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 194037932, + "step": 2900 + }, + { + "epoch": 0.3291914893617021, + "grad_norm": 24.8087158203125, + "learning_rate": 5e-05, + "loss": 1.2743, + "num_input_tokens_seen": 194104680, + "step": 2901 + }, + { + "epoch": 0.3291914893617021, + "loss": 1.3140209913253784, + "loss_ce": 0.005427254363894463, + "loss_iou": 0.5390625, + "loss_num": 0.04638671875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 194104680, + "step": 2901 + }, + { + "epoch": 0.3293049645390071, + "grad_norm": 23.686704635620117, + "learning_rate": 5e-05, + "loss": 1.4866, + "num_input_tokens_seen": 194172724, + "step": 2902 + }, + { + "epoch": 0.3293049645390071, + "loss": 1.4177570343017578, + "loss_ce": 0.007112463936209679, + "loss_iou": 0.625, + "loss_num": 0.03271484375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 194172724, + "step": 2902 + }, + { + "epoch": 0.3294184397163121, + "grad_norm": 86.59416961669922, + "learning_rate": 5e-05, + "loss": 1.2285, + "num_input_tokens_seen": 194239076, + "step": 2903 + }, + { + "epoch": 0.3294184397163121, + "loss": 1.3527393341064453, + "loss_ce": 0.004106540232896805, + "loss_iou": 0.5703125, + "loss_num": 0.041748046875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 194239076, + "step": 2903 + }, + { + "epoch": 0.329531914893617, + "grad_norm": 23.051883697509766, + "learning_rate": 5e-05, + "loss": 1.3284, + "num_input_tokens_seen": 194306004, + "step": 2904 + }, + { + "epoch": 0.329531914893617, + "loss": 1.3062758445739746, + "loss_ce": 0.0025648202281445265, + "loss_iou": 0.5390625, + "loss_num": 0.044677734375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 194306004, + "step": 2904 + }, + { + "epoch": 0.329645390070922, + "grad_norm": 25.274818420410156, + "learning_rate": 5e-05, + "loss": 1.2938, + "num_input_tokens_seen": 194371908, + "step": 2905 + }, + { + "epoch": 0.329645390070922, + "loss": 1.2179343700408936, + "loss_ce": 0.009194063022732735, + "loss_iou": 0.486328125, + "loss_num": 0.047119140625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 194371908, + "step": 2905 + }, + { + "epoch": 0.32975886524822695, + "grad_norm": 27.016183853149414, + "learning_rate": 5e-05, + "loss": 1.4115, + "num_input_tokens_seen": 194438464, + "step": 2906 + }, + { + "epoch": 0.32975886524822695, + "loss": 1.5436062812805176, + "loss_ce": 0.009426599368453026, + "loss_iou": 0.640625, + "loss_num": 0.050537109375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 194438464, + "step": 2906 + }, + { + "epoch": 0.32987234042553193, + "grad_norm": 21.492595672607422, + "learning_rate": 5e-05, + "loss": 0.9993, + "num_input_tokens_seen": 194506404, + "step": 2907 + }, + { + "epoch": 0.32987234042553193, + "loss": 1.0186426639556885, + "loss_ce": 0.008877177722752094, + "loss_iou": 0.447265625, + "loss_num": 0.022705078125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 194506404, + "step": 2907 + }, + { + "epoch": 0.3299858156028369, + "grad_norm": 25.60944366455078, + "learning_rate": 5e-05, + "loss": 1.3375, + "num_input_tokens_seen": 194573660, + "step": 2908 + }, + { + "epoch": 0.3299858156028369, + "loss": 1.4815020561218262, + "loss_ce": 0.004939473234117031, + "loss_iou": 0.58984375, + "loss_num": 0.0595703125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 194573660, + "step": 2908 + }, + { + "epoch": 0.33009929078014183, + "grad_norm": 34.77912139892578, + "learning_rate": 5e-05, + "loss": 1.1127, + "num_input_tokens_seen": 194639660, + "step": 2909 + }, + { + "epoch": 0.33009929078014183, + "loss": 1.2241982221603394, + "loss_ce": 0.008377887308597565, + "loss_iou": 0.5078125, + "loss_num": 0.0400390625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 194639660, + "step": 2909 + }, + { + "epoch": 0.3302127659574468, + "grad_norm": 27.86456298828125, + "learning_rate": 5e-05, + "loss": 1.4196, + "num_input_tokens_seen": 194706280, + "step": 2910 + }, + { + "epoch": 0.3302127659574468, + "loss": 1.3770864009857178, + "loss_ce": 0.005016111768782139, + "loss_iou": 0.5703125, + "loss_num": 0.04638671875, + "loss_xval": 1.375, + "num_input_tokens_seen": 194706280, + "step": 2910 + }, + { + "epoch": 0.3303262411347518, + "grad_norm": 18.202741622924805, + "learning_rate": 5e-05, + "loss": 0.9294, + "num_input_tokens_seen": 194773648, + "step": 2911 + }, + { + "epoch": 0.3303262411347518, + "loss": 0.7810925245285034, + "loss_ce": 0.011561279185116291, + "loss_iou": 0.32421875, + "loss_num": 0.0240478515625, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 194773648, + "step": 2911 + }, + { + "epoch": 0.33043971631205676, + "grad_norm": 33.66046905517578, + "learning_rate": 5e-05, + "loss": 1.2571, + "num_input_tokens_seen": 194839592, + "step": 2912 + }, + { + "epoch": 0.33043971631205676, + "loss": 1.1112356185913086, + "loss_ce": 0.0018605939112603664, + "loss_iou": 0.49609375, + "loss_num": 0.0235595703125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 194839592, + "step": 2912 + }, + { + "epoch": 0.3305531914893617, + "grad_norm": 30.84503173828125, + "learning_rate": 5e-05, + "loss": 1.6313, + "num_input_tokens_seen": 194905512, + "step": 2913 + }, + { + "epoch": 0.3305531914893617, + "loss": 1.5729293823242188, + "loss_ce": 0.005546570289880037, + "loss_iou": 0.6484375, + "loss_num": 0.0546875, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 194905512, + "step": 2913 + }, + { + "epoch": 0.33066666666666666, + "grad_norm": 29.696956634521484, + "learning_rate": 5e-05, + "loss": 1.1915, + "num_input_tokens_seen": 194972072, + "step": 2914 + }, + { + "epoch": 0.33066666666666666, + "loss": 1.3040512800216675, + "loss_ce": 0.003270040499046445, + "loss_iou": 0.53125, + "loss_num": 0.047119140625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 194972072, + "step": 2914 + }, + { + "epoch": 0.33078014184397164, + "grad_norm": 26.003345489501953, + "learning_rate": 5e-05, + "loss": 1.4286, + "num_input_tokens_seen": 195039732, + "step": 2915 + }, + { + "epoch": 0.33078014184397164, + "loss": 1.4592349529266357, + "loss_ce": 0.004156794864684343, + "loss_iou": 0.59765625, + "loss_num": 0.0517578125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 195039732, + "step": 2915 + }, + { + "epoch": 0.3308936170212766, + "grad_norm": 32.06193542480469, + "learning_rate": 5e-05, + "loss": 1.2227, + "num_input_tokens_seen": 195106524, + "step": 2916 + }, + { + "epoch": 0.3308936170212766, + "loss": 1.3001331090927124, + "loss_ce": 0.005699521861970425, + "loss_iou": 0.55859375, + "loss_num": 0.035400390625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 195106524, + "step": 2916 + }, + { + "epoch": 0.33100709219858154, + "grad_norm": 26.174516677856445, + "learning_rate": 5e-05, + "loss": 1.224, + "num_input_tokens_seen": 195173584, + "step": 2917 + }, + { + "epoch": 0.33100709219858154, + "loss": 1.303419828414917, + "loss_ce": 0.0038592792116105556, + "loss_iou": 0.52734375, + "loss_num": 0.049072265625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 195173584, + "step": 2917 + }, + { + "epoch": 0.3311205673758865, + "grad_norm": 34.968544006347656, + "learning_rate": 5e-05, + "loss": 1.2942, + "num_input_tokens_seen": 195239816, + "step": 2918 + }, + { + "epoch": 0.3311205673758865, + "loss": 1.1845160722732544, + "loss_ce": 0.003363728290423751, + "loss_iou": 0.5078125, + "loss_num": 0.033447265625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 195239816, + "step": 2918 + }, + { + "epoch": 0.3312340425531915, + "grad_norm": 29.474063873291016, + "learning_rate": 5e-05, + "loss": 1.4197, + "num_input_tokens_seen": 195306728, + "step": 2919 + }, + { + "epoch": 0.3312340425531915, + "loss": 1.4142041206359863, + "loss_ce": 0.006550302729010582, + "loss_iou": 0.59765625, + "loss_num": 0.042724609375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 195306728, + "step": 2919 + }, + { + "epoch": 0.3313475177304965, + "grad_norm": 20.727806091308594, + "learning_rate": 5e-05, + "loss": 1.17, + "num_input_tokens_seen": 195373864, + "step": 2920 + }, + { + "epoch": 0.3313475177304965, + "loss": 1.0435549020767212, + "loss_ce": 0.004980713129043579, + "loss_iou": 0.453125, + "loss_num": 0.02587890625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 195373864, + "step": 2920 + }, + { + "epoch": 0.3314609929078014, + "grad_norm": 24.93427848815918, + "learning_rate": 5e-05, + "loss": 1.3581, + "num_input_tokens_seen": 195440812, + "step": 2921 + }, + { + "epoch": 0.3314609929078014, + "loss": 1.472943663597107, + "loss_ce": 0.007123372983187437, + "loss_iou": 0.58984375, + "loss_num": 0.056884765625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 195440812, + "step": 2921 + }, + { + "epoch": 0.3315744680851064, + "grad_norm": 29.469982147216797, + "learning_rate": 5e-05, + "loss": 1.2908, + "num_input_tokens_seen": 195507520, + "step": 2922 + }, + { + "epoch": 0.3315744680851064, + "loss": 1.4884493350982666, + "loss_ce": 0.007492209319025278, + "loss_iou": 0.625, + "loss_num": 0.0458984375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 195507520, + "step": 2922 + }, + { + "epoch": 0.33168794326241136, + "grad_norm": 26.175691604614258, + "learning_rate": 5e-05, + "loss": 1.3029, + "num_input_tokens_seen": 195574296, + "step": 2923 + }, + { + "epoch": 0.33168794326241136, + "loss": 1.4472365379333496, + "loss_ce": 0.004853773862123489, + "loss_iou": 0.6171875, + "loss_num": 0.04150390625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 195574296, + "step": 2923 + }, + { + "epoch": 0.33180141843971633, + "grad_norm": 17.535327911376953, + "learning_rate": 5e-05, + "loss": 1.217, + "num_input_tokens_seen": 195641796, + "step": 2924 + }, + { + "epoch": 0.33180141843971633, + "loss": 1.0909278392791748, + "loss_ce": 0.005966847762465477, + "loss_iou": 0.478515625, + "loss_num": 0.025390625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 195641796, + "step": 2924 + }, + { + "epoch": 0.33191489361702126, + "grad_norm": 19.628644943237305, + "learning_rate": 5e-05, + "loss": 1.4999, + "num_input_tokens_seen": 195709124, + "step": 2925 + }, + { + "epoch": 0.33191489361702126, + "loss": 1.4345217943191528, + "loss_ce": 0.004834298975765705, + "loss_iou": 0.58203125, + "loss_num": 0.05322265625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 195709124, + "step": 2925 + }, + { + "epoch": 0.33202836879432623, + "grad_norm": 28.538503646850586, + "learning_rate": 5e-05, + "loss": 1.1733, + "num_input_tokens_seen": 195776132, + "step": 2926 + }, + { + "epoch": 0.33202836879432623, + "loss": 1.2333168983459473, + "loss_ce": 0.005289502441883087, + "loss_iou": 0.55078125, + "loss_num": 0.02587890625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 195776132, + "step": 2926 + }, + { + "epoch": 0.3321418439716312, + "grad_norm": 19.34160041809082, + "learning_rate": 5e-05, + "loss": 1.2899, + "num_input_tokens_seen": 195843572, + "step": 2927 + }, + { + "epoch": 0.3321418439716312, + "loss": 1.352628469467163, + "loss_ce": 0.0054605938494205475, + "loss_iou": 0.5390625, + "loss_num": 0.0537109375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 195843572, + "step": 2927 + }, + { + "epoch": 0.3322553191489362, + "grad_norm": 20.761154174804688, + "learning_rate": 5e-05, + "loss": 1.209, + "num_input_tokens_seen": 195909488, + "step": 2928 + }, + { + "epoch": 0.3322553191489362, + "loss": 1.2203954458236694, + "loss_ce": 0.004269926808774471, + "loss_iou": 0.474609375, + "loss_num": 0.052978515625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 195909488, + "step": 2928 + }, + { + "epoch": 0.3323687943262411, + "grad_norm": 20.759037017822266, + "learning_rate": 5e-05, + "loss": 1.2714, + "num_input_tokens_seen": 195975636, + "step": 2929 + }, + { + "epoch": 0.3323687943262411, + "loss": 1.2918460369110107, + "loss_ce": 0.008154556155204773, + "loss_iou": 0.51953125, + "loss_num": 0.04833984375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 195975636, + "step": 2929 + }, + { + "epoch": 0.3324822695035461, + "grad_norm": 21.076168060302734, + "learning_rate": 5e-05, + "loss": 0.9357, + "num_input_tokens_seen": 196043104, + "step": 2930 + }, + { + "epoch": 0.3324822695035461, + "loss": 0.8022745847702026, + "loss_ce": 0.0039957850240170956, + "loss_iou": 0.3203125, + "loss_num": 0.03173828125, + "loss_xval": 0.796875, + "num_input_tokens_seen": 196043104, + "step": 2930 + }, + { + "epoch": 0.33259574468085107, + "grad_norm": 23.435468673706055, + "learning_rate": 5e-05, + "loss": 1.3165, + "num_input_tokens_seen": 196109932, + "step": 2931 + }, + { + "epoch": 0.33259574468085107, + "loss": 1.2700481414794922, + "loss_ce": 0.004423089325428009, + "loss_iou": 0.51171875, + "loss_num": 0.048583984375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 196109932, + "step": 2931 + }, + { + "epoch": 0.33270921985815605, + "grad_norm": 27.80138397216797, + "learning_rate": 5e-05, + "loss": 1.2643, + "num_input_tokens_seen": 196176104, + "step": 2932 + }, + { + "epoch": 0.33270921985815605, + "loss": 1.2200570106506348, + "loss_ce": 0.004725010134279728, + "loss_iou": 0.515625, + "loss_num": 0.03662109375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 196176104, + "step": 2932 + }, + { + "epoch": 0.33282269503546097, + "grad_norm": 31.047752380371094, + "learning_rate": 5e-05, + "loss": 1.2297, + "num_input_tokens_seen": 196242868, + "step": 2933 + }, + { + "epoch": 0.33282269503546097, + "loss": 1.3707659244537354, + "loss_ce": 0.004066750407218933, + "loss_iou": 0.58984375, + "loss_num": 0.036865234375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 196242868, + "step": 2933 + }, + { + "epoch": 0.33293617021276595, + "grad_norm": 45.895450592041016, + "learning_rate": 5e-05, + "loss": 1.4042, + "num_input_tokens_seen": 196309808, + "step": 2934 + }, + { + "epoch": 0.33293617021276595, + "loss": 1.5660544633865356, + "loss_ce": 0.005507584661245346, + "loss_iou": 0.65234375, + "loss_num": 0.051513671875, + "loss_xval": 1.5625, + "num_input_tokens_seen": 196309808, + "step": 2934 + }, + { + "epoch": 0.3330496453900709, + "grad_norm": 64.80780792236328, + "learning_rate": 5e-05, + "loss": 1.3227, + "num_input_tokens_seen": 196376436, + "step": 2935 + }, + { + "epoch": 0.3330496453900709, + "loss": 1.4438225030899048, + "loss_ce": 0.004613618832081556, + "loss_iou": 0.609375, + "loss_num": 0.04345703125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 196376436, + "step": 2935 + }, + { + "epoch": 0.3331631205673759, + "grad_norm": 35.9901237487793, + "learning_rate": 5e-05, + "loss": 1.3523, + "num_input_tokens_seen": 196443532, + "step": 2936 + }, + { + "epoch": 0.3331631205673759, + "loss": 1.2783480882644653, + "loss_ce": 0.003933969419449568, + "loss_iou": 0.515625, + "loss_num": 0.048095703125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 196443532, + "step": 2936 + }, + { + "epoch": 0.3332765957446808, + "grad_norm": 28.268577575683594, + "learning_rate": 5e-05, + "loss": 1.4746, + "num_input_tokens_seen": 196509716, + "step": 2937 + }, + { + "epoch": 0.3332765957446808, + "loss": 1.3649663925170898, + "loss_ce": 0.008032857440412045, + "loss_iou": 0.56640625, + "loss_num": 0.04541015625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 196509716, + "step": 2937 + }, + { + "epoch": 0.3333900709219858, + "grad_norm": 28.907386779785156, + "learning_rate": 5e-05, + "loss": 1.4276, + "num_input_tokens_seen": 196575688, + "step": 2938 + }, + { + "epoch": 0.3333900709219858, + "loss": 1.2797386646270752, + "loss_ce": 0.001906606717966497, + "loss_iou": 0.54296875, + "loss_num": 0.038330078125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 196575688, + "step": 2938 + }, + { + "epoch": 0.3335035460992908, + "grad_norm": 13.259909629821777, + "learning_rate": 5e-05, + "loss": 1.071, + "num_input_tokens_seen": 196642612, + "step": 2939 + }, + { + "epoch": 0.3335035460992908, + "loss": 1.1044636964797974, + "loss_ce": 0.006319242995232344, + "loss_iou": 0.4453125, + "loss_num": 0.0419921875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 196642612, + "step": 2939 + }, + { + "epoch": 0.33361702127659576, + "grad_norm": 21.60189437866211, + "learning_rate": 5e-05, + "loss": 1.2711, + "num_input_tokens_seen": 196709988, + "step": 2940 + }, + { + "epoch": 0.33361702127659576, + "loss": 1.039925456047058, + "loss_ce": 0.008797526359558105, + "loss_iou": 0.453125, + "loss_num": 0.024658203125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 196709988, + "step": 2940 + }, + { + "epoch": 0.3337304964539007, + "grad_norm": 22.772716522216797, + "learning_rate": 5e-05, + "loss": 1.1606, + "num_input_tokens_seen": 196776616, + "step": 2941 + }, + { + "epoch": 0.3337304964539007, + "loss": 1.256264328956604, + "loss_ce": 0.006752651184797287, + "loss_iou": 0.5390625, + "loss_num": 0.03466796875, + "loss_xval": 1.25, + "num_input_tokens_seen": 196776616, + "step": 2941 + }, + { + "epoch": 0.33384397163120566, + "grad_norm": 28.539701461791992, + "learning_rate": 5e-05, + "loss": 1.2966, + "num_input_tokens_seen": 196843424, + "step": 2942 + }, + { + "epoch": 0.33384397163120566, + "loss": 0.9829597473144531, + "loss_ce": 0.002979336306452751, + "loss_iou": 0.419921875, + "loss_num": 0.0279541015625, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 196843424, + "step": 2942 + }, + { + "epoch": 0.33395744680851064, + "grad_norm": 17.278913497924805, + "learning_rate": 5e-05, + "loss": 0.9257, + "num_input_tokens_seen": 196909996, + "step": 2943 + }, + { + "epoch": 0.33395744680851064, + "loss": 0.905280590057373, + "loss_ce": 0.005622347351163626, + "loss_iou": 0.37890625, + "loss_num": 0.028564453125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 196909996, + "step": 2943 + }, + { + "epoch": 0.3340709219858156, + "grad_norm": 44.686126708984375, + "learning_rate": 5e-05, + "loss": 1.2757, + "num_input_tokens_seen": 196976948, + "step": 2944 + }, + { + "epoch": 0.3340709219858156, + "loss": 1.1439507007598877, + "loss_ce": 0.0027153356932103634, + "loss_iou": 0.466796875, + "loss_num": 0.041015625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 196976948, + "step": 2944 + }, + { + "epoch": 0.3341843971631206, + "grad_norm": 25.310407638549805, + "learning_rate": 5e-05, + "loss": 1.6188, + "num_input_tokens_seen": 197043980, + "step": 2945 + }, + { + "epoch": 0.3341843971631206, + "loss": 1.431128978729248, + "loss_ce": 0.0024181059561669827, + "loss_iou": 0.61328125, + "loss_num": 0.04052734375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 197043980, + "step": 2945 + }, + { + "epoch": 0.3342978723404255, + "grad_norm": 32.86634063720703, + "learning_rate": 5e-05, + "loss": 1.408, + "num_input_tokens_seen": 197111024, + "step": 2946 + }, + { + "epoch": 0.3342978723404255, + "loss": 1.4728169441223145, + "loss_ce": 0.006019997876137495, + "loss_iou": 0.60546875, + "loss_num": 0.05126953125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 197111024, + "step": 2946 + }, + { + "epoch": 0.3344113475177305, + "grad_norm": 40.74956130981445, + "learning_rate": 5e-05, + "loss": 1.3754, + "num_input_tokens_seen": 197178556, + "step": 2947 + }, + { + "epoch": 0.3344113475177305, + "loss": 1.1755045652389526, + "loss_ce": 0.003141197608783841, + "loss_iou": 0.48828125, + "loss_num": 0.03857421875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 197178556, + "step": 2947 + }, + { + "epoch": 0.3345248226950355, + "grad_norm": 28.884654998779297, + "learning_rate": 5e-05, + "loss": 1.4139, + "num_input_tokens_seen": 197246088, + "step": 2948 + }, + { + "epoch": 0.3345248226950355, + "loss": 1.416831374168396, + "loss_ce": 0.006675142794847488, + "loss_iou": 0.62890625, + "loss_num": 0.0308837890625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 197246088, + "step": 2948 + }, + { + "epoch": 0.33463829787234045, + "grad_norm": 15.215523719787598, + "learning_rate": 5e-05, + "loss": 1.1339, + "num_input_tokens_seen": 197312628, + "step": 2949 + }, + { + "epoch": 0.33463829787234045, + "loss": 1.1864333152770996, + "loss_ce": 0.004548550117760897, + "loss_iou": 0.44921875, + "loss_num": 0.05712890625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 197312628, + "step": 2949 + }, + { + "epoch": 0.3347517730496454, + "grad_norm": 28.477291107177734, + "learning_rate": 5e-05, + "loss": 1.1285, + "num_input_tokens_seen": 197378888, + "step": 2950 + }, + { + "epoch": 0.3347517730496454, + "loss": 1.178385853767395, + "loss_ce": 0.003092862432822585, + "loss_iou": 0.53125, + "loss_num": 0.023193359375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 197378888, + "step": 2950 + }, + { + "epoch": 0.33486524822695035, + "grad_norm": 22.17989158630371, + "learning_rate": 5e-05, + "loss": 1.1896, + "num_input_tokens_seen": 197445560, + "step": 2951 + }, + { + "epoch": 0.33486524822695035, + "loss": 1.3820010423660278, + "loss_ce": 0.013837005943059921, + "loss_iou": 0.53515625, + "loss_num": 0.06005859375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 197445560, + "step": 2951 + }, + { + "epoch": 0.33497872340425533, + "grad_norm": 76.00291442871094, + "learning_rate": 5e-05, + "loss": 1.3564, + "num_input_tokens_seen": 197512504, + "step": 2952 + }, + { + "epoch": 0.33497872340425533, + "loss": 1.1663689613342285, + "loss_ce": 0.005724412854760885, + "loss_iou": 0.46875, + "loss_num": 0.044921875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 197512504, + "step": 2952 + }, + { + "epoch": 0.3350921985815603, + "grad_norm": 21.551591873168945, + "learning_rate": 5e-05, + "loss": 1.2207, + "num_input_tokens_seen": 197579584, + "step": 2953 + }, + { + "epoch": 0.3350921985815603, + "loss": 1.211984395980835, + "loss_ce": 0.0059296973049640656, + "loss_iou": 0.5078125, + "loss_num": 0.038330078125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 197579584, + "step": 2953 + }, + { + "epoch": 0.33520567375886523, + "grad_norm": 21.538095474243164, + "learning_rate": 5e-05, + "loss": 1.3353, + "num_input_tokens_seen": 197647036, + "step": 2954 + }, + { + "epoch": 0.33520567375886523, + "loss": 1.3923280239105225, + "loss_ce": 0.0036562453024089336, + "loss_iou": 0.5625, + "loss_num": 0.052490234375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 197647036, + "step": 2954 + }, + { + "epoch": 0.3353191489361702, + "grad_norm": 35.01033401489258, + "learning_rate": 5e-05, + "loss": 1.1574, + "num_input_tokens_seen": 197714076, + "step": 2955 + }, + { + "epoch": 0.3353191489361702, + "loss": 1.3294785022735596, + "loss_ce": 0.009654334746301174, + "loss_iou": 0.515625, + "loss_num": 0.057373046875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 197714076, + "step": 2955 + }, + { + "epoch": 0.3354326241134752, + "grad_norm": 33.85853958129883, + "learning_rate": 5e-05, + "loss": 1.4781, + "num_input_tokens_seen": 197781528, + "step": 2956 + }, + { + "epoch": 0.3354326241134752, + "loss": 1.3917853832244873, + "loss_ce": 0.003113493090495467, + "loss_iou": 0.59375, + "loss_num": 0.03955078125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 197781528, + "step": 2956 + }, + { + "epoch": 0.33554609929078016, + "grad_norm": 16.289291381835938, + "learning_rate": 5e-05, + "loss": 0.9777, + "num_input_tokens_seen": 197849496, + "step": 2957 + }, + { + "epoch": 0.33554609929078016, + "loss": 1.1167362928390503, + "loss_ce": 0.005408146418631077, + "loss_iou": 0.49609375, + "loss_num": 0.0240478515625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 197849496, + "step": 2957 + }, + { + "epoch": 0.3356595744680851, + "grad_norm": 29.649452209472656, + "learning_rate": 5e-05, + "loss": 1.2144, + "num_input_tokens_seen": 197916532, + "step": 2958 + }, + { + "epoch": 0.3356595744680851, + "loss": 1.1081571578979492, + "loss_ce": 0.004641558509320021, + "loss_iou": 0.48046875, + "loss_num": 0.02880859375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 197916532, + "step": 2958 + }, + { + "epoch": 0.33577304964539006, + "grad_norm": 36.96073913574219, + "learning_rate": 5e-05, + "loss": 1.3881, + "num_input_tokens_seen": 197983256, + "step": 2959 + }, + { + "epoch": 0.33577304964539006, + "loss": 1.5297282934188843, + "loss_ce": 0.008243914693593979, + "loss_iou": 0.59765625, + "loss_num": 0.06591796875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 197983256, + "step": 2959 + }, + { + "epoch": 0.33588652482269504, + "grad_norm": 26.08258628845215, + "learning_rate": 5e-05, + "loss": 1.3581, + "num_input_tokens_seen": 198051008, + "step": 2960 + }, + { + "epoch": 0.33588652482269504, + "loss": 1.1743085384368896, + "loss_ce": 0.008292886428534985, + "loss_iou": 0.490234375, + "loss_num": 0.036865234375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 198051008, + "step": 2960 + }, + { + "epoch": 0.336, + "grad_norm": 28.505678176879883, + "learning_rate": 5e-05, + "loss": 1.4245, + "num_input_tokens_seen": 198117780, + "step": 2961 + }, + { + "epoch": 0.336, + "loss": 1.4248231649398804, + "loss_ce": 0.006854427512735128, + "loss_iou": 0.5859375, + "loss_num": 0.0498046875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 198117780, + "step": 2961 + }, + { + "epoch": 0.33611347517730494, + "grad_norm": 27.716175079345703, + "learning_rate": 5e-05, + "loss": 1.4164, + "num_input_tokens_seen": 198185536, + "step": 2962 + }, + { + "epoch": 0.33611347517730494, + "loss": 1.4914462566375732, + "loss_ce": 0.007071200758218765, + "loss_iou": 0.609375, + "loss_num": 0.052734375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 198185536, + "step": 2962 + }, + { + "epoch": 0.3362269503546099, + "grad_norm": 55.32089614868164, + "learning_rate": 5e-05, + "loss": 1.3371, + "num_input_tokens_seen": 198252668, + "step": 2963 + }, + { + "epoch": 0.3362269503546099, + "loss": 1.4380497932434082, + "loss_ce": 0.002991204150021076, + "loss_iou": 0.61328125, + "loss_num": 0.041748046875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 198252668, + "step": 2963 + }, + { + "epoch": 0.3363404255319149, + "grad_norm": 23.84047508239746, + "learning_rate": 5e-05, + "loss": 1.2392, + "num_input_tokens_seen": 198320204, + "step": 2964 + }, + { + "epoch": 0.3363404255319149, + "loss": 1.3784716129302979, + "loss_ce": 0.0024950196966528893, + "loss_iou": 0.5625, + "loss_num": 0.051025390625, + "loss_xval": 1.375, + "num_input_tokens_seen": 198320204, + "step": 2964 + }, + { + "epoch": 0.3364539007092199, + "grad_norm": 28.84464454650879, + "learning_rate": 5e-05, + "loss": 1.2489, + "num_input_tokens_seen": 198387356, + "step": 2965 + }, + { + "epoch": 0.3364539007092199, + "loss": 1.3052852153778076, + "loss_ce": 0.0025508073158562183, + "loss_iou": 0.54296875, + "loss_num": 0.042724609375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 198387356, + "step": 2965 + }, + { + "epoch": 0.3365673758865248, + "grad_norm": 32.097618103027344, + "learning_rate": 5e-05, + "loss": 1.2597, + "num_input_tokens_seen": 198454152, + "step": 2966 + }, + { + "epoch": 0.3365673758865248, + "loss": 1.2834608554840088, + "loss_ce": 0.014417910017073154, + "loss_iou": 0.51953125, + "loss_num": 0.045654296875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 198454152, + "step": 2966 + }, + { + "epoch": 0.3366808510638298, + "grad_norm": 24.99037742614746, + "learning_rate": 5e-05, + "loss": 1.5937, + "num_input_tokens_seen": 198521204, + "step": 2967 + }, + { + "epoch": 0.3366808510638298, + "loss": 1.6925885677337646, + "loss_ce": 0.008018318563699722, + "loss_iou": 0.69921875, + "loss_num": 0.057861328125, + "loss_xval": 1.6875, + "num_input_tokens_seen": 198521204, + "step": 2967 + }, + { + "epoch": 0.33679432624113476, + "grad_norm": 14.492483139038086, + "learning_rate": 5e-05, + "loss": 1.2373, + "num_input_tokens_seen": 198588088, + "step": 2968 + }, + { + "epoch": 0.33679432624113476, + "loss": 1.0767502784729004, + "loss_ce": 0.0035081696696579456, + "loss_iou": 0.45703125, + "loss_num": 0.031494140625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 198588088, + "step": 2968 + }, + { + "epoch": 0.33690780141843973, + "grad_norm": 143.1591796875, + "learning_rate": 5e-05, + "loss": 1.4405, + "num_input_tokens_seen": 198654188, + "step": 2969 + }, + { + "epoch": 0.33690780141843973, + "loss": 1.3464324474334717, + "loss_ce": 0.008297673426568508, + "loss_iou": 0.55859375, + "loss_num": 0.045166015625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 198654188, + "step": 2969 + }, + { + "epoch": 0.33702127659574466, + "grad_norm": 203.21151733398438, + "learning_rate": 5e-05, + "loss": 1.4303, + "num_input_tokens_seen": 198720700, + "step": 2970 + }, + { + "epoch": 0.33702127659574466, + "loss": 1.4587750434875488, + "loss_ce": 0.009800480678677559, + "loss_iou": 0.578125, + "loss_num": 0.05810546875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 198720700, + "step": 2970 + }, + { + "epoch": 0.33713475177304963, + "grad_norm": 88.56715393066406, + "learning_rate": 5e-05, + "loss": 1.4839, + "num_input_tokens_seen": 198787552, + "step": 2971 + }, + { + "epoch": 0.33713475177304963, + "loss": 1.2687265872955322, + "loss_ce": 0.005054733715951443, + "loss_iou": 0.51953125, + "loss_num": 0.04443359375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 198787552, + "step": 2971 + }, + { + "epoch": 0.3372482269503546, + "grad_norm": 33.722469329833984, + "learning_rate": 5e-05, + "loss": 1.2254, + "num_input_tokens_seen": 198855232, + "step": 2972 + }, + { + "epoch": 0.3372482269503546, + "loss": 1.3216038942337036, + "loss_ce": 0.008127408102154732, + "loss_iou": 0.53125, + "loss_num": 0.05029296875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 198855232, + "step": 2972 + }, + { + "epoch": 0.3373617021276596, + "grad_norm": 28.915817260742188, + "learning_rate": 5e-05, + "loss": 1.1239, + "num_input_tokens_seen": 198921836, + "step": 2973 + }, + { + "epoch": 0.3373617021276596, + "loss": 1.0982649326324463, + "loss_ce": 0.004026732407510281, + "loss_iou": 0.474609375, + "loss_num": 0.02880859375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 198921836, + "step": 2973 + }, + { + "epoch": 0.3374751773049645, + "grad_norm": 34.13107681274414, + "learning_rate": 5e-05, + "loss": 1.2516, + "num_input_tokens_seen": 198988828, + "step": 2974 + }, + { + "epoch": 0.3374751773049645, + "loss": 1.2632813453674316, + "loss_ce": 0.006445379927754402, + "loss_iou": 0.52734375, + "loss_num": 0.0400390625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 198988828, + "step": 2974 + }, + { + "epoch": 0.3375886524822695, + "grad_norm": 29.45862579345703, + "learning_rate": 5e-05, + "loss": 1.1446, + "num_input_tokens_seen": 199054988, + "step": 2975 + }, + { + "epoch": 0.3375886524822695, + "loss": 1.2580273151397705, + "loss_ce": 0.009492229670286179, + "loss_iou": 0.53515625, + "loss_num": 0.035888671875, + "loss_xval": 1.25, + "num_input_tokens_seen": 199054988, + "step": 2975 + }, + { + "epoch": 0.33770212765957447, + "grad_norm": 27.242841720581055, + "learning_rate": 5e-05, + "loss": 1.2848, + "num_input_tokens_seen": 199121144, + "step": 2976 + }, + { + "epoch": 0.33770212765957447, + "loss": 1.2206456661224365, + "loss_ce": 0.003848799504339695, + "loss_iou": 0.51171875, + "loss_num": 0.038330078125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 199121144, + "step": 2976 + }, + { + "epoch": 0.33781560283687945, + "grad_norm": 22.73362922668457, + "learning_rate": 5e-05, + "loss": 1.0561, + "num_input_tokens_seen": 199186904, + "step": 2977 + }, + { + "epoch": 0.33781560283687945, + "loss": 1.077774167060852, + "loss_ce": 0.006622446700930595, + "loss_iou": 0.439453125, + "loss_num": 0.03857421875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 199186904, + "step": 2977 + }, + { + "epoch": 0.33792907801418437, + "grad_norm": 21.711023330688477, + "learning_rate": 5e-05, + "loss": 1.2025, + "num_input_tokens_seen": 199252652, + "step": 2978 + }, + { + "epoch": 0.33792907801418437, + "loss": 1.11457359790802, + "loss_ce": 0.006663442589342594, + "loss_iou": 0.46875, + "loss_num": 0.034423828125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 199252652, + "step": 2978 + }, + { + "epoch": 0.33804255319148935, + "grad_norm": 20.791534423828125, + "learning_rate": 5e-05, + "loss": 1.0952, + "num_input_tokens_seen": 199318140, + "step": 2979 + }, + { + "epoch": 0.33804255319148935, + "loss": 1.316035509109497, + "loss_ce": 0.004512136802077293, + "loss_iou": 0.55078125, + "loss_num": 0.042724609375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 199318140, + "step": 2979 + }, + { + "epoch": 0.3381560283687943, + "grad_norm": 16.299043655395508, + "learning_rate": 5e-05, + "loss": 1.1495, + "num_input_tokens_seen": 199386296, + "step": 2980 + }, + { + "epoch": 0.3381560283687943, + "loss": 0.9425130486488342, + "loss_ce": 0.0035481909289956093, + "loss_iou": 0.419921875, + "loss_num": 0.0196533203125, + "loss_xval": 0.9375, + "num_input_tokens_seen": 199386296, + "step": 2980 + }, + { + "epoch": 0.3382695035460993, + "grad_norm": 27.543535232543945, + "learning_rate": 5e-05, + "loss": 1.3971, + "num_input_tokens_seen": 199452540, + "step": 2981 + }, + { + "epoch": 0.3382695035460993, + "loss": 1.1281845569610596, + "loss_ce": 0.005137590691447258, + "loss_iou": 0.451171875, + "loss_num": 0.04443359375, + "loss_xval": 1.125, + "num_input_tokens_seen": 199452540, + "step": 2981 + }, + { + "epoch": 0.3383829787234043, + "grad_norm": 31.39117431640625, + "learning_rate": 5e-05, + "loss": 1.5298, + "num_input_tokens_seen": 199519260, + "step": 2982 + }, + { + "epoch": 0.3383829787234043, + "loss": 1.5423130989074707, + "loss_ce": 0.006180215626955032, + "loss_iou": 0.61328125, + "loss_num": 0.0625, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 199519260, + "step": 2982 + }, + { + "epoch": 0.3384964539007092, + "grad_norm": 27.880931854248047, + "learning_rate": 5e-05, + "loss": 1.3788, + "num_input_tokens_seen": 199586496, + "step": 2983 + }, + { + "epoch": 0.3384964539007092, + "loss": 1.3105523586273193, + "loss_ce": 0.004888321738690138, + "loss_iou": 0.5625, + "loss_num": 0.03564453125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 199586496, + "step": 2983 + }, + { + "epoch": 0.3386099290780142, + "grad_norm": 33.41088104248047, + "learning_rate": 5e-05, + "loss": 1.267, + "num_input_tokens_seen": 199653216, + "step": 2984 + }, + { + "epoch": 0.3386099290780142, + "loss": 1.2072813510894775, + "loss_ce": 0.009039098396897316, + "loss_iou": 0.4609375, + "loss_num": 0.054931640625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 199653216, + "step": 2984 + }, + { + "epoch": 0.33872340425531916, + "grad_norm": 28.75536346435547, + "learning_rate": 5e-05, + "loss": 1.3954, + "num_input_tokens_seen": 199719224, + "step": 2985 + }, + { + "epoch": 0.33872340425531916, + "loss": 1.579371452331543, + "loss_ce": 0.0051526776514947414, + "loss_iou": 0.66796875, + "loss_num": 0.047119140625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 199719224, + "step": 2985 + }, + { + "epoch": 0.33883687943262414, + "grad_norm": 23.878877639770508, + "learning_rate": 5e-05, + "loss": 1.3302, + "num_input_tokens_seen": 199786104, + "step": 2986 + }, + { + "epoch": 0.33883687943262414, + "loss": 1.574267864227295, + "loss_ce": 0.003955420572310686, + "loss_iou": 0.60546875, + "loss_num": 0.07177734375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 199786104, + "step": 2986 + }, + { + "epoch": 0.33895035460992906, + "grad_norm": 21.521921157836914, + "learning_rate": 5e-05, + "loss": 1.1905, + "num_input_tokens_seen": 199853084, + "step": 2987 + }, + { + "epoch": 0.33895035460992906, + "loss": 1.1962517499923706, + "loss_ce": 0.0033806334249675274, + "loss_iou": 0.51171875, + "loss_num": 0.03369140625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 199853084, + "step": 2987 + }, + { + "epoch": 0.33906382978723404, + "grad_norm": 32.5815544128418, + "learning_rate": 5e-05, + "loss": 1.337, + "num_input_tokens_seen": 199919520, + "step": 2988 + }, + { + "epoch": 0.33906382978723404, + "loss": 1.431912899017334, + "loss_ce": 0.00564324576407671, + "loss_iou": 0.609375, + "loss_num": 0.04150390625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 199919520, + "step": 2988 + }, + { + "epoch": 0.339177304964539, + "grad_norm": 33.34239959716797, + "learning_rate": 5e-05, + "loss": 1.2673, + "num_input_tokens_seen": 199986568, + "step": 2989 + }, + { + "epoch": 0.339177304964539, + "loss": 1.158626675605774, + "loss_ce": 0.0028649293817579746, + "loss_iou": 0.515625, + "loss_num": 0.02490234375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 199986568, + "step": 2989 + }, + { + "epoch": 0.339290780141844, + "grad_norm": 18.89088249206543, + "learning_rate": 5e-05, + "loss": 1.2013, + "num_input_tokens_seen": 200052864, + "step": 2990 + }, + { + "epoch": 0.339290780141844, + "loss": 1.2120311260223389, + "loss_ce": 0.00841792393475771, + "loss_iou": 0.46484375, + "loss_num": 0.05517578125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 200052864, + "step": 2990 + }, + { + "epoch": 0.3394042553191489, + "grad_norm": 26.313915252685547, + "learning_rate": 5e-05, + "loss": 1.3162, + "num_input_tokens_seen": 200119100, + "step": 2991 + }, + { + "epoch": 0.3394042553191489, + "loss": 1.1959044933319092, + "loss_ce": 0.005963212810456753, + "loss_iou": 0.443359375, + "loss_num": 0.060791015625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 200119100, + "step": 2991 + }, + { + "epoch": 0.3395177304964539, + "grad_norm": 154.52398681640625, + "learning_rate": 5e-05, + "loss": 1.5048, + "num_input_tokens_seen": 200185336, + "step": 2992 + }, + { + "epoch": 0.3395177304964539, + "loss": 1.438680648803711, + "loss_ce": 0.006551767699420452, + "loss_iou": 0.58203125, + "loss_num": 0.05419921875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 200185336, + "step": 2992 + }, + { + "epoch": 0.3396312056737589, + "grad_norm": 37.2259407043457, + "learning_rate": 5e-05, + "loss": 1.3719, + "num_input_tokens_seen": 200253148, + "step": 2993 + }, + { + "epoch": 0.3396312056737589, + "loss": 1.3358080387115479, + "loss_ce": 0.0023120057303458452, + "loss_iou": 0.54296875, + "loss_num": 0.049072265625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 200253148, + "step": 2993 + }, + { + "epoch": 0.33974468085106385, + "grad_norm": 23.29169273376465, + "learning_rate": 5e-05, + "loss": 1.3426, + "num_input_tokens_seen": 200319048, + "step": 2994 + }, + { + "epoch": 0.33974468085106385, + "loss": 1.4057981967926025, + "loss_ce": 0.010290296748280525, + "loss_iou": 0.609375, + "loss_num": 0.034912109375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 200319048, + "step": 2994 + }, + { + "epoch": 0.3398581560283688, + "grad_norm": 20.786956787109375, + "learning_rate": 5e-05, + "loss": 1.0672, + "num_input_tokens_seen": 200386676, + "step": 2995 + }, + { + "epoch": 0.3398581560283688, + "loss": 1.1632598638534546, + "loss_ce": 0.0033477861434221268, + "loss_iou": 0.47265625, + "loss_num": 0.042724609375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 200386676, + "step": 2995 + }, + { + "epoch": 0.33997163120567375, + "grad_norm": 24.291353225708008, + "learning_rate": 5e-05, + "loss": 1.3479, + "num_input_tokens_seen": 200453632, + "step": 2996 + }, + { + "epoch": 0.33997163120567375, + "loss": 1.3276833295822144, + "loss_ce": 0.006394294090569019, + "loss_iou": 0.5546875, + "loss_num": 0.041748046875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 200453632, + "step": 2996 + }, + { + "epoch": 0.34008510638297873, + "grad_norm": 28.978130340576172, + "learning_rate": 5e-05, + "loss": 1.0346, + "num_input_tokens_seen": 200520200, + "step": 2997 + }, + { + "epoch": 0.34008510638297873, + "loss": 0.9231925010681152, + "loss_ce": 0.004735463298857212, + "loss_iou": 0.396484375, + "loss_num": 0.0250244140625, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 200520200, + "step": 2997 + }, + { + "epoch": 0.3401985815602837, + "grad_norm": 20.857616424560547, + "learning_rate": 5e-05, + "loss": 1.3518, + "num_input_tokens_seen": 200587240, + "step": 2998 + }, + { + "epoch": 0.3401985815602837, + "loss": 1.4627025127410889, + "loss_ce": 0.010554106906056404, + "loss_iou": 0.578125, + "loss_num": 0.058837890625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 200587240, + "step": 2998 + }, + { + "epoch": 0.34031205673758863, + "grad_norm": 20.321096420288086, + "learning_rate": 5e-05, + "loss": 1.0192, + "num_input_tokens_seen": 200654676, + "step": 2999 + }, + { + "epoch": 0.34031205673758863, + "loss": 1.0292623043060303, + "loss_ce": 0.0033223473001271486, + "loss_iou": 0.427734375, + "loss_num": 0.0341796875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 200654676, + "step": 2999 + }, + { + "epoch": 0.3404255319148936, + "grad_norm": 22.52290916442871, + "learning_rate": 5e-05, + "loss": 1.3813, + "num_input_tokens_seen": 200721872, + "step": 3000 + }, + { + "epoch": 0.3404255319148936, + "eval_seeclick_CIoU": 0.3793492615222931, + "eval_seeclick_GIoU": 0.3638128936290741, + "eval_seeclick_IoU": 0.46952326595783234, + "eval_seeclick_MAE_all": 0.1614639014005661, + "eval_seeclick_MAE_h": 0.06523976102471352, + "eval_seeclick_MAE_w": 0.11152889207005501, + "eval_seeclick_MAE_x_boxes": 0.24362291395664215, + "eval_seeclick_MAE_y_boxes": 0.13326731324195862, + "eval_seeclick_NUM_probability": 0.9988889098167419, + "eval_seeclick_inside_bbox": 0.6458333432674408, + "eval_seeclick_loss": 2.5379223823547363, + "eval_seeclick_loss_ce": 0.013924223370850086, + "eval_seeclick_loss_iou": 0.8876953125, + "eval_seeclick_loss_num": 0.16458892822265625, + "eval_seeclick_loss_xval": 2.5966796875, + "eval_seeclick_runtime": 68.7695, + "eval_seeclick_samples_per_second": 0.683, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 200721872, + "step": 3000 + }, + { + "epoch": 0.3404255319148936, + "eval_icons_CIoU": 0.4160472899675369, + "eval_icons_GIoU": 0.3970119506120682, + "eval_icons_IoU": 0.4843824803829193, + "eval_icons_MAE_all": 0.14203189313411713, + "eval_icons_MAE_h": 0.049959173426032066, + "eval_icons_MAE_w": 0.0644185058772564, + "eval_icons_MAE_x_boxes": 0.164824016392231, + "eval_icons_MAE_y_boxes": 0.1387418620288372, + "eval_icons_NUM_probability": 0.9998688697814941, + "eval_icons_inside_bbox": 0.7083333432674408, + "eval_icons_loss": 2.333512306213379, + "eval_icons_loss_ce": 7.4054638389498e-05, + "eval_icons_loss_iou": 0.8193359375, + "eval_icons_loss_num": 0.13240432739257812, + "eval_icons_loss_xval": 2.298583984375, + "eval_icons_runtime": 82.8308, + "eval_icons_samples_per_second": 0.604, + "eval_icons_steps_per_second": 0.024, + "num_input_tokens_seen": 200721872, + "step": 3000 + }, + { + "epoch": 0.3404255319148936, + "eval_screenspot_CIoU": 0.33921002348264057, + "eval_screenspot_GIoU": 0.3190649300813675, + "eval_screenspot_IoU": 0.41962480545043945, + "eval_screenspot_MAE_all": 0.1894338329633077, + "eval_screenspot_MAE_h": 0.1287918488184611, + "eval_screenspot_MAE_w": 0.19160888095696768, + "eval_screenspot_MAE_x_boxes": 0.25674809018770856, + "eval_screenspot_MAE_y_boxes": 0.09983008230725925, + "eval_screenspot_NUM_probability": 0.9994677305221558, + "eval_screenspot_inside_bbox": 0.6150000095367432, + "eval_screenspot_loss": 2.88435959815979, + "eval_screenspot_loss_ce": 0.017143954212466877, + "eval_screenspot_loss_iou": 0.9599609375, + "eval_screenspot_loss_num": 0.20298258463541666, + "eval_screenspot_loss_xval": 2.9339192708333335, + "eval_screenspot_runtime": 122.8835, + "eval_screenspot_samples_per_second": 0.724, + "eval_screenspot_steps_per_second": 0.024, + "num_input_tokens_seen": 200721872, + "step": 3000 + }, + { + "epoch": 0.3404255319148936, + "eval_compot_CIoU": 0.40983234345912933, + "eval_compot_GIoU": 0.38453309237957, + "eval_compot_IoU": 0.4879655987024307, + "eval_compot_MAE_all": 0.15548492968082428, + "eval_compot_MAE_h": 0.057424929924309254, + "eval_compot_MAE_w": 0.165506973862648, + "eval_compot_MAE_x_boxes": 0.16740216314792633, + "eval_compot_MAE_y_boxes": 0.10803579166531563, + "eval_compot_NUM_probability": 0.9997179210186005, + "eval_compot_inside_bbox": 0.6145833432674408, + "eval_compot_loss": 2.6516716480255127, + "eval_compot_loss_ce": 0.004432738525792956, + "eval_compot_loss_iou": 0.911865234375, + "eval_compot_loss_num": 0.17632675170898438, + "eval_compot_loss_xval": 2.7060546875, + "eval_compot_runtime": 66.8669, + "eval_compot_samples_per_second": 0.748, + "eval_compot_steps_per_second": 0.03, + "num_input_tokens_seen": 200721872, + "step": 3000 + }, + { + "epoch": 0.3404255319148936, + "loss": 2.5178966522216797, + "loss_ce": 0.004224574193358421, + "loss_iou": 0.875, + "loss_num": 0.15234375, + "loss_xval": 2.515625, + "num_input_tokens_seen": 200721872, + "step": 3000 + }, + { + "epoch": 0.3405390070921986, + "grad_norm": 28.027379989624023, + "learning_rate": 5e-05, + "loss": 1.0055, + "num_input_tokens_seen": 200789108, + "step": 3001 + }, + { + "epoch": 0.3405390070921986, + "loss": 1.187981367111206, + "loss_ce": 0.0063406857661902905, + "loss_iou": 0.515625, + "loss_num": 0.0299072265625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 200789108, + "step": 3001 + }, + { + "epoch": 0.34065248226950356, + "grad_norm": 38.65955352783203, + "learning_rate": 5e-05, + "loss": 1.4119, + "num_input_tokens_seen": 200856552, + "step": 3002 + }, + { + "epoch": 0.34065248226950356, + "loss": 1.4268381595611572, + "loss_ce": 0.006916303653270006, + "loss_iou": 0.58984375, + "loss_num": 0.048583984375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 200856552, + "step": 3002 + }, + { + "epoch": 0.3407659574468085, + "grad_norm": 18.344022750854492, + "learning_rate": 5e-05, + "loss": 1.085, + "num_input_tokens_seen": 200923240, + "step": 3003 + }, + { + "epoch": 0.3407659574468085, + "loss": 1.2428256273269653, + "loss_ce": 0.008450619876384735, + "loss_iou": 0.482421875, + "loss_num": 0.05419921875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 200923240, + "step": 3003 + }, + { + "epoch": 0.34087943262411347, + "grad_norm": 22.122650146484375, + "learning_rate": 5e-05, + "loss": 1.2628, + "num_input_tokens_seen": 200989524, + "step": 3004 + }, + { + "epoch": 0.34087943262411347, + "loss": 1.2361950874328613, + "loss_ce": 0.0027967053465545177, + "loss_iou": 0.515625, + "loss_num": 0.0400390625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 200989524, + "step": 3004 + }, + { + "epoch": 0.34099290780141844, + "grad_norm": 39.888221740722656, + "learning_rate": 5e-05, + "loss": 1.1537, + "num_input_tokens_seen": 201055740, + "step": 3005 + }, + { + "epoch": 0.34099290780141844, + "loss": 1.2134487628936768, + "loss_ce": 0.012276943773031235, + "loss_iou": 0.51953125, + "loss_num": 0.03173828125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 201055740, + "step": 3005 + }, + { + "epoch": 0.3411063829787234, + "grad_norm": 29.678791046142578, + "learning_rate": 5e-05, + "loss": 1.4333, + "num_input_tokens_seen": 201122612, + "step": 3006 + }, + { + "epoch": 0.3411063829787234, + "loss": 1.4021329879760742, + "loss_ce": 0.003695548279210925, + "loss_iou": 0.625, + "loss_num": 0.0301513671875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 201122612, + "step": 3006 + }, + { + "epoch": 0.34121985815602834, + "grad_norm": 17.866178512573242, + "learning_rate": 5e-05, + "loss": 1.2432, + "num_input_tokens_seen": 201189588, + "step": 3007 + }, + { + "epoch": 0.34121985815602834, + "loss": 1.0546663999557495, + "loss_ce": 0.005349957384169102, + "loss_iou": 0.46484375, + "loss_num": 0.0238037109375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 201189588, + "step": 3007 + }, + { + "epoch": 0.3413333333333333, + "grad_norm": 15.524761199951172, + "learning_rate": 5e-05, + "loss": 1.0831, + "num_input_tokens_seen": 201256792, + "step": 3008 + }, + { + "epoch": 0.3413333333333333, + "loss": 1.0805461406707764, + "loss_ce": 0.002421214710921049, + "loss_iou": 0.486328125, + "loss_num": 0.02099609375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 201256792, + "step": 3008 + }, + { + "epoch": 0.3414468085106383, + "grad_norm": 36.35088348388672, + "learning_rate": 5e-05, + "loss": 1.178, + "num_input_tokens_seen": 201323128, + "step": 3009 + }, + { + "epoch": 0.3414468085106383, + "loss": 1.1564648151397705, + "loss_ce": 0.010957046411931515, + "loss_iou": 0.48046875, + "loss_num": 0.03662109375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 201323128, + "step": 3009 + }, + { + "epoch": 0.3415602836879433, + "grad_norm": 19.599735260009766, + "learning_rate": 5e-05, + "loss": 1.2131, + "num_input_tokens_seen": 201389492, + "step": 3010 + }, + { + "epoch": 0.3415602836879433, + "loss": 1.2647507190704346, + "loss_ce": 0.0030320486985147, + "loss_iou": 0.53125, + "loss_num": 0.0400390625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 201389492, + "step": 3010 + }, + { + "epoch": 0.3416737588652482, + "grad_norm": 17.21547508239746, + "learning_rate": 5e-05, + "loss": 1.1061, + "num_input_tokens_seen": 201455768, + "step": 3011 + }, + { + "epoch": 0.3416737588652482, + "loss": 0.9300028681755066, + "loss_ce": 0.005686452612280846, + "loss_iou": 0.40625, + "loss_num": 0.0224609375, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 201455768, + "step": 3011 + }, + { + "epoch": 0.3417872340425532, + "grad_norm": 23.723222732543945, + "learning_rate": 5e-05, + "loss": 1.3414, + "num_input_tokens_seen": 201523088, + "step": 3012 + }, + { + "epoch": 0.3417872340425532, + "loss": 1.1776158809661865, + "loss_ce": 0.006229129619896412, + "loss_iou": 0.515625, + "loss_num": 0.0279541015625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 201523088, + "step": 3012 + }, + { + "epoch": 0.34190070921985816, + "grad_norm": 25.66570281982422, + "learning_rate": 5e-05, + "loss": 1.3006, + "num_input_tokens_seen": 201590976, + "step": 3013 + }, + { + "epoch": 0.34190070921985816, + "loss": 1.3020380735397339, + "loss_ce": 0.005651400424540043, + "loss_iou": 0.484375, + "loss_num": 0.0654296875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 201590976, + "step": 3013 + }, + { + "epoch": 0.34201418439716313, + "grad_norm": 29.805654525756836, + "learning_rate": 5e-05, + "loss": 1.283, + "num_input_tokens_seen": 201658596, + "step": 3014 + }, + { + "epoch": 0.34201418439716313, + "loss": 1.475944995880127, + "loss_ce": 0.007194926962256432, + "loss_iou": 0.6171875, + "loss_num": 0.04638671875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 201658596, + "step": 3014 + }, + { + "epoch": 0.3421276595744681, + "grad_norm": 51.356502532958984, + "learning_rate": 5e-05, + "loss": 1.4752, + "num_input_tokens_seen": 201724856, + "step": 3015 + }, + { + "epoch": 0.3421276595744681, + "loss": 1.3933372497558594, + "loss_ce": 0.006618523970246315, + "loss_iou": 0.58984375, + "loss_num": 0.04150390625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 201724856, + "step": 3015 + }, + { + "epoch": 0.34224113475177304, + "grad_norm": 23.61267852783203, + "learning_rate": 5e-05, + "loss": 1.51, + "num_input_tokens_seen": 201792260, + "step": 3016 + }, + { + "epoch": 0.34224113475177304, + "loss": 1.6243109703063965, + "loss_ce": 0.006635249592363834, + "loss_iou": 0.6796875, + "loss_num": 0.05126953125, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 201792260, + "step": 3016 + }, + { + "epoch": 0.342354609929078, + "grad_norm": 39.73361587524414, + "learning_rate": 5e-05, + "loss": 1.2494, + "num_input_tokens_seen": 201859440, + "step": 3017 + }, + { + "epoch": 0.342354609929078, + "loss": 1.1062920093536377, + "loss_ce": 0.006682570558041334, + "loss_iou": 0.431640625, + "loss_num": 0.046875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 201859440, + "step": 3017 + }, + { + "epoch": 0.342468085106383, + "grad_norm": 17.529081344604492, + "learning_rate": 5e-05, + "loss": 1.1841, + "num_input_tokens_seen": 201926108, + "step": 3018 + }, + { + "epoch": 0.342468085106383, + "loss": 1.0550552606582642, + "loss_ce": 0.007203730754554272, + "loss_iou": 0.44140625, + "loss_num": 0.033203125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 201926108, + "step": 3018 + }, + { + "epoch": 0.34258156028368797, + "grad_norm": 16.765966415405273, + "learning_rate": 5e-05, + "loss": 1.0842, + "num_input_tokens_seen": 201993036, + "step": 3019 + }, + { + "epoch": 0.34258156028368797, + "loss": 1.038224458694458, + "loss_ce": 0.00648622028529644, + "loss_iou": 0.455078125, + "loss_num": 0.0244140625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 201993036, + "step": 3019 + }, + { + "epoch": 0.3426950354609929, + "grad_norm": 24.90684700012207, + "learning_rate": 5e-05, + "loss": 1.329, + "num_input_tokens_seen": 202059920, + "step": 3020 + }, + { + "epoch": 0.3426950354609929, + "loss": 1.2804569005966187, + "loss_ce": 0.0055546327494084835, + "loss_iou": 0.51171875, + "loss_num": 0.0498046875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 202059920, + "step": 3020 + }, + { + "epoch": 0.34280851063829787, + "grad_norm": 26.438861846923828, + "learning_rate": 5e-05, + "loss": 1.4673, + "num_input_tokens_seen": 202126184, + "step": 3021 + }, + { + "epoch": 0.34280851063829787, + "loss": 1.344552755355835, + "loss_ce": 0.004342751577496529, + "loss_iou": 0.53515625, + "loss_num": 0.0546875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 202126184, + "step": 3021 + }, + { + "epoch": 0.34292198581560285, + "grad_norm": 26.54157066345215, + "learning_rate": 5e-05, + "loss": 1.2759, + "num_input_tokens_seen": 202193052, + "step": 3022 + }, + { + "epoch": 0.34292198581560285, + "loss": 1.2384357452392578, + "loss_ce": 0.0035723575856536627, + "loss_iou": 0.5078125, + "loss_num": 0.043701171875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 202193052, + "step": 3022 + }, + { + "epoch": 0.3430354609929078, + "grad_norm": 23.45083236694336, + "learning_rate": 5e-05, + "loss": 1.2988, + "num_input_tokens_seen": 202260476, + "step": 3023 + }, + { + "epoch": 0.3430354609929078, + "loss": 1.237138271331787, + "loss_ce": 0.006669491063803434, + "loss_iou": 0.4921875, + "loss_num": 0.04931640625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 202260476, + "step": 3023 + }, + { + "epoch": 0.34314893617021275, + "grad_norm": 21.570287704467773, + "learning_rate": 5e-05, + "loss": 1.0456, + "num_input_tokens_seen": 202326328, + "step": 3024 + }, + { + "epoch": 0.34314893617021275, + "loss": 1.1283353567123413, + "loss_ce": 0.00455606821924448, + "loss_iou": 0.474609375, + "loss_num": 0.03466796875, + "loss_xval": 1.125, + "num_input_tokens_seen": 202326328, + "step": 3024 + }, + { + "epoch": 0.3432624113475177, + "grad_norm": 38.64506149291992, + "learning_rate": 5e-05, + "loss": 1.7068, + "num_input_tokens_seen": 202393432, + "step": 3025 + }, + { + "epoch": 0.3432624113475177, + "loss": 1.6337846517562866, + "loss_ce": 0.009272843599319458, + "loss_iou": 0.64453125, + "loss_num": 0.06640625, + "loss_xval": 1.625, + "num_input_tokens_seen": 202393432, + "step": 3025 + }, + { + "epoch": 0.3433758865248227, + "grad_norm": 33.45963668823242, + "learning_rate": 5e-05, + "loss": 1.2477, + "num_input_tokens_seen": 202460508, + "step": 3026 + }, + { + "epoch": 0.3433758865248227, + "loss": 1.1193873882293701, + "loss_ce": 0.007265820167958736, + "loss_iou": 0.474609375, + "loss_num": 0.0322265625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 202460508, + "step": 3026 + }, + { + "epoch": 0.3434893617021277, + "grad_norm": 68.07694244384766, + "learning_rate": 5e-05, + "loss": 1.3835, + "num_input_tokens_seen": 202527328, + "step": 3027 + }, + { + "epoch": 0.3434893617021277, + "loss": 1.3699512481689453, + "loss_ce": 0.004716834984719753, + "loss_iou": 0.6015625, + "loss_num": 0.03173828125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 202527328, + "step": 3027 + }, + { + "epoch": 0.3436028368794326, + "grad_norm": 21.087697982788086, + "learning_rate": 5e-05, + "loss": 1.3244, + "num_input_tokens_seen": 202594392, + "step": 3028 + }, + { + "epoch": 0.3436028368794326, + "loss": 1.2922215461730957, + "loss_ce": 0.0051122065633535385, + "loss_iou": 0.51953125, + "loss_num": 0.049072265625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 202594392, + "step": 3028 + }, + { + "epoch": 0.3437163120567376, + "grad_norm": 20.696245193481445, + "learning_rate": 5e-05, + "loss": 1.1842, + "num_input_tokens_seen": 202660900, + "step": 3029 + }, + { + "epoch": 0.3437163120567376, + "loss": 1.2556030750274658, + "loss_ce": 0.007556240539997816, + "loss_iou": 0.546875, + "loss_num": 0.031005859375, + "loss_xval": 1.25, + "num_input_tokens_seen": 202660900, + "step": 3029 + }, + { + "epoch": 0.34382978723404256, + "grad_norm": 34.545448303222656, + "learning_rate": 5e-05, + "loss": 1.3863, + "num_input_tokens_seen": 202727572, + "step": 3030 + }, + { + "epoch": 0.34382978723404256, + "loss": 1.4239609241485596, + "loss_ce": 0.004039098974317312, + "loss_iou": 0.57421875, + "loss_num": 0.0546875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 202727572, + "step": 3030 + }, + { + "epoch": 0.34394326241134754, + "grad_norm": 25.24431037902832, + "learning_rate": 5e-05, + "loss": 1.252, + "num_input_tokens_seen": 202794328, + "step": 3031 + }, + { + "epoch": 0.34394326241134754, + "loss": 1.2374967336654663, + "loss_ce": 0.0031217276118695736, + "loss_iou": 0.515625, + "loss_num": 0.0400390625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 202794328, + "step": 3031 + }, + { + "epoch": 0.34405673758865246, + "grad_norm": 14.629514694213867, + "learning_rate": 5e-05, + "loss": 1.3793, + "num_input_tokens_seen": 202861168, + "step": 3032 + }, + { + "epoch": 0.34405673758865246, + "loss": 1.3612042665481567, + "loss_ce": 0.00866520032286644, + "loss_iou": 0.51953125, + "loss_num": 0.06298828125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 202861168, + "step": 3032 + }, + { + "epoch": 0.34417021276595744, + "grad_norm": 23.110872268676758, + "learning_rate": 5e-05, + "loss": 1.281, + "num_input_tokens_seen": 202927888, + "step": 3033 + }, + { + "epoch": 0.34417021276595744, + "loss": 1.3152868747711182, + "loss_ce": 0.005716449581086636, + "loss_iou": 0.5546875, + "loss_num": 0.040283203125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 202927888, + "step": 3033 + }, + { + "epoch": 0.3442836879432624, + "grad_norm": 19.521944046020508, + "learning_rate": 5e-05, + "loss": 1.3479, + "num_input_tokens_seen": 202995676, + "step": 3034 + }, + { + "epoch": 0.3442836879432624, + "loss": 1.462666630744934, + "loss_ce": 0.0027057018596678972, + "loss_iou": 0.5859375, + "loss_num": 0.057861328125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 202995676, + "step": 3034 + }, + { + "epoch": 0.3443971631205674, + "grad_norm": 17.576671600341797, + "learning_rate": 5e-05, + "loss": 1.1625, + "num_input_tokens_seen": 203061432, + "step": 3035 + }, + { + "epoch": 0.3443971631205674, + "loss": 1.134432315826416, + "loss_ce": 0.015291694551706314, + "loss_iou": 0.46484375, + "loss_num": 0.0380859375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 203061432, + "step": 3035 + }, + { + "epoch": 0.3445106382978723, + "grad_norm": 14.398576736450195, + "learning_rate": 5e-05, + "loss": 1.2415, + "num_input_tokens_seen": 203128368, + "step": 3036 + }, + { + "epoch": 0.3445106382978723, + "loss": 1.2107734680175781, + "loss_ce": 0.0056952983140945435, + "loss_iou": 0.51171875, + "loss_num": 0.036865234375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 203128368, + "step": 3036 + }, + { + "epoch": 0.3446241134751773, + "grad_norm": 22.216812133789062, + "learning_rate": 5e-05, + "loss": 1.1605, + "num_input_tokens_seen": 203193472, + "step": 3037 + }, + { + "epoch": 0.3446241134751773, + "loss": 1.4259395599365234, + "loss_ce": 0.005529340356588364, + "loss_iou": 0.55859375, + "loss_num": 0.060546875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 203193472, + "step": 3037 + }, + { + "epoch": 0.3447375886524823, + "grad_norm": 44.120018005371094, + "learning_rate": 5e-05, + "loss": 1.3164, + "num_input_tokens_seen": 203260888, + "step": 3038 + }, + { + "epoch": 0.3447375886524823, + "loss": 1.457543969154358, + "loss_ce": 0.0053955488838255405, + "loss_iou": 0.59375, + "loss_num": 0.05322265625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 203260888, + "step": 3038 + }, + { + "epoch": 0.34485106382978725, + "grad_norm": 26.342403411865234, + "learning_rate": 5e-05, + "loss": 1.4431, + "num_input_tokens_seen": 203328084, + "step": 3039 + }, + { + "epoch": 0.34485106382978725, + "loss": 1.4906526803970337, + "loss_ce": 0.006277658976614475, + "loss_iou": 0.6328125, + "loss_num": 0.04345703125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 203328084, + "step": 3039 + }, + { + "epoch": 0.3449645390070922, + "grad_norm": 26.055063247680664, + "learning_rate": 5e-05, + "loss": 1.2631, + "num_input_tokens_seen": 203395124, + "step": 3040 + }, + { + "epoch": 0.3449645390070922, + "loss": 1.242133378982544, + "loss_ce": 0.006293573882430792, + "loss_iou": 0.498046875, + "loss_num": 0.048095703125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 203395124, + "step": 3040 + }, + { + "epoch": 0.34507801418439715, + "grad_norm": 16.44096565246582, + "learning_rate": 5e-05, + "loss": 1.015, + "num_input_tokens_seen": 203462188, + "step": 3041 + }, + { + "epoch": 0.34507801418439715, + "loss": 1.047668695449829, + "loss_ce": 0.00469998549669981, + "loss_iou": 0.43359375, + "loss_num": 0.035400390625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 203462188, + "step": 3041 + }, + { + "epoch": 0.34519148936170213, + "grad_norm": 32.14612579345703, + "learning_rate": 5e-05, + "loss": 1.3775, + "num_input_tokens_seen": 203528932, + "step": 3042 + }, + { + "epoch": 0.34519148936170213, + "loss": 1.3714842796325684, + "loss_ce": 0.004785002674907446, + "loss_iou": 0.56640625, + "loss_num": 0.046630859375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 203528932, + "step": 3042 + }, + { + "epoch": 0.3453049645390071, + "grad_norm": 39.873783111572266, + "learning_rate": 5e-05, + "loss": 1.2665, + "num_input_tokens_seen": 203596100, + "step": 3043 + }, + { + "epoch": 0.3453049645390071, + "loss": 1.234605312347412, + "loss_ce": 0.005601497367024422, + "loss_iou": 0.56640625, + "loss_num": 0.018798828125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 203596100, + "step": 3043 + }, + { + "epoch": 0.34541843971631203, + "grad_norm": 25.906042098999023, + "learning_rate": 5e-05, + "loss": 1.3025, + "num_input_tokens_seen": 203662672, + "step": 3044 + }, + { + "epoch": 0.34541843971631203, + "loss": 1.1331520080566406, + "loss_ce": 0.0024605526123195887, + "loss_iou": 0.515625, + "loss_num": 0.019775390625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 203662672, + "step": 3044 + }, + { + "epoch": 0.345531914893617, + "grad_norm": 10.001694679260254, + "learning_rate": 5e-05, + "loss": 1.0165, + "num_input_tokens_seen": 203729024, + "step": 3045 + }, + { + "epoch": 0.345531914893617, + "loss": 1.0407917499542236, + "loss_ce": 0.002705843187868595, + "loss_iou": 0.447265625, + "loss_num": 0.029052734375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 203729024, + "step": 3045 + }, + { + "epoch": 0.345645390070922, + "grad_norm": 21.39406967163086, + "learning_rate": 5e-05, + "loss": 1.1556, + "num_input_tokens_seen": 203796272, + "step": 3046 + }, + { + "epoch": 0.345645390070922, + "loss": 1.083256721496582, + "loss_ce": 0.006108269095420837, + "loss_iou": 0.443359375, + "loss_num": 0.038330078125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 203796272, + "step": 3046 + }, + { + "epoch": 0.34575886524822697, + "grad_norm": 35.07485580444336, + "learning_rate": 5e-05, + "loss": 1.3424, + "num_input_tokens_seen": 203862404, + "step": 3047 + }, + { + "epoch": 0.34575886524822697, + "loss": 1.2523765563964844, + "loss_ce": 0.006282792426645756, + "loss_iou": 0.4921875, + "loss_num": 0.052734375, + "loss_xval": 1.25, + "num_input_tokens_seen": 203862404, + "step": 3047 + }, + { + "epoch": 0.3458723404255319, + "grad_norm": 35.48810577392578, + "learning_rate": 5e-05, + "loss": 1.177, + "num_input_tokens_seen": 203929096, + "step": 3048 + }, + { + "epoch": 0.3458723404255319, + "loss": 1.2453527450561523, + "loss_ce": 0.005118348635733128, + "loss_iou": 0.4921875, + "loss_num": 0.051513671875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 203929096, + "step": 3048 + }, + { + "epoch": 0.34598581560283687, + "grad_norm": 19.827234268188477, + "learning_rate": 5e-05, + "loss": 1.1522, + "num_input_tokens_seen": 203996852, + "step": 3049 + }, + { + "epoch": 0.34598581560283687, + "loss": 1.2800211906433105, + "loss_ce": 0.006095470394939184, + "loss_iou": 0.56640625, + "loss_num": 0.02783203125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 203996852, + "step": 3049 + }, + { + "epoch": 0.34609929078014184, + "grad_norm": 28.365968704223633, + "learning_rate": 5e-05, + "loss": 1.2458, + "num_input_tokens_seen": 204063336, + "step": 3050 + }, + { + "epoch": 0.34609929078014184, + "loss": 1.3251880407333374, + "loss_ce": 0.004875541664659977, + "loss_iou": 0.57421875, + "loss_num": 0.03515625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 204063336, + "step": 3050 + }, + { + "epoch": 0.3462127659574468, + "grad_norm": 24.787118911743164, + "learning_rate": 5e-05, + "loss": 1.3943, + "num_input_tokens_seen": 204129912, + "step": 3051 + }, + { + "epoch": 0.3462127659574468, + "loss": 1.6005091667175293, + "loss_ce": 0.007735834456980228, + "loss_iou": 0.64453125, + "loss_num": 0.061279296875, + "loss_xval": 1.59375, + "num_input_tokens_seen": 204129912, + "step": 3051 + }, + { + "epoch": 0.3463262411347518, + "grad_norm": 37.46104049682617, + "learning_rate": 5e-05, + "loss": 1.1503, + "num_input_tokens_seen": 204196964, + "step": 3052 + }, + { + "epoch": 0.3463262411347518, + "loss": 1.2076523303985596, + "loss_ce": 0.004527395125478506, + "loss_iou": 0.51953125, + "loss_num": 0.033203125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 204196964, + "step": 3052 + }, + { + "epoch": 0.3464397163120567, + "grad_norm": 24.270822525024414, + "learning_rate": 5e-05, + "loss": 1.5866, + "num_input_tokens_seen": 204263980, + "step": 3053 + }, + { + "epoch": 0.3464397163120567, + "loss": 1.3043780326843262, + "loss_ce": 0.005549980327486992, + "loss_iou": 0.57421875, + "loss_num": 0.030517578125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 204263980, + "step": 3053 + }, + { + "epoch": 0.3465531914893617, + "grad_norm": 18.50400161743164, + "learning_rate": 5e-05, + "loss": 1.1267, + "num_input_tokens_seen": 204330716, + "step": 3054 + }, + { + "epoch": 0.3465531914893617, + "loss": 1.138429045677185, + "loss_ce": 0.005616520065814257, + "loss_iou": 0.482421875, + "loss_num": 0.033203125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 204330716, + "step": 3054 + }, + { + "epoch": 0.3466666666666667, + "grad_norm": 39.13465881347656, + "learning_rate": 5e-05, + "loss": 1.4199, + "num_input_tokens_seen": 204397956, + "step": 3055 + }, + { + "epoch": 0.3466666666666667, + "loss": 1.3749544620513916, + "loss_ce": 0.009231874719262123, + "loss_iou": 0.5234375, + "loss_num": 0.06298828125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 204397956, + "step": 3055 + }, + { + "epoch": 0.34678014184397166, + "grad_norm": 26.68291664123535, + "learning_rate": 5e-05, + "loss": 1.5535, + "num_input_tokens_seen": 204463768, + "step": 3056 + }, + { + "epoch": 0.34678014184397166, + "loss": 1.6251540184020996, + "loss_ce": 0.0030836365185678005, + "loss_iou": 0.66015625, + "loss_num": 0.0595703125, + "loss_xval": 1.625, + "num_input_tokens_seen": 204463768, + "step": 3056 + }, + { + "epoch": 0.3468936170212766, + "grad_norm": 31.61162757873535, + "learning_rate": 5e-05, + "loss": 1.0107, + "num_input_tokens_seen": 204530468, + "step": 3057 + }, + { + "epoch": 0.3468936170212766, + "loss": 1.0500919818878174, + "loss_ce": 0.005658403970301151, + "loss_iou": 0.44921875, + "loss_num": 0.029296875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 204530468, + "step": 3057 + }, + { + "epoch": 0.34700709219858156, + "grad_norm": 28.51485252380371, + "learning_rate": 5e-05, + "loss": 1.1842, + "num_input_tokens_seen": 204597272, + "step": 3058 + }, + { + "epoch": 0.34700709219858156, + "loss": 1.0270273685455322, + "loss_ce": 0.0052989209070801735, + "loss_iou": 0.447265625, + "loss_num": 0.0250244140625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 204597272, + "step": 3058 + }, + { + "epoch": 0.34712056737588654, + "grad_norm": 45.36738586425781, + "learning_rate": 5e-05, + "loss": 1.5041, + "num_input_tokens_seen": 204665328, + "step": 3059 + }, + { + "epoch": 0.34712056737588654, + "loss": 1.6069321632385254, + "loss_ce": 0.004393164999783039, + "loss_iou": 0.65234375, + "loss_num": 0.060302734375, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 204665328, + "step": 3059 + }, + { + "epoch": 0.3472340425531915, + "grad_norm": 22.128795623779297, + "learning_rate": 5e-05, + "loss": 1.6857, + "num_input_tokens_seen": 204732120, + "step": 3060 + }, + { + "epoch": 0.3472340425531915, + "loss": 1.5905349254608154, + "loss_ce": 0.0026442562229931355, + "loss_iou": 0.6796875, + "loss_num": 0.04541015625, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 204732120, + "step": 3060 + }, + { + "epoch": 0.34734751773049644, + "grad_norm": 20.558719635009766, + "learning_rate": 5e-05, + "loss": 1.2222, + "num_input_tokens_seen": 204797892, + "step": 3061 + }, + { + "epoch": 0.34734751773049644, + "loss": 1.270999789237976, + "loss_ce": 0.007816263474524021, + "loss_iou": 0.46875, + "loss_num": 0.06494140625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 204797892, + "step": 3061 + }, + { + "epoch": 0.3474609929078014, + "grad_norm": 17.72226905822754, + "learning_rate": 5e-05, + "loss": 1.0322, + "num_input_tokens_seen": 204863740, + "step": 3062 + }, + { + "epoch": 0.3474609929078014, + "loss": 0.9886425733566284, + "loss_ce": 0.0039013889618217945, + "loss_iou": 0.39453125, + "loss_num": 0.0390625, + "loss_xval": 0.984375, + "num_input_tokens_seen": 204863740, + "step": 3062 + }, + { + "epoch": 0.3475744680851064, + "grad_norm": 22.519838333129883, + "learning_rate": 5e-05, + "loss": 1.1877, + "num_input_tokens_seen": 204931688, + "step": 3063 + }, + { + "epoch": 0.3475744680851064, + "loss": 1.0642187595367432, + "loss_ce": 0.004160224925726652, + "loss_iou": 0.435546875, + "loss_num": 0.037841796875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 204931688, + "step": 3063 + }, + { + "epoch": 0.34768794326241137, + "grad_norm": 48.31447219848633, + "learning_rate": 5e-05, + "loss": 1.2599, + "num_input_tokens_seen": 204999748, + "step": 3064 + }, + { + "epoch": 0.34768794326241137, + "loss": 1.3717166185379028, + "loss_ce": 0.007947033271193504, + "loss_iou": 0.57421875, + "loss_num": 0.043212890625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 204999748, + "step": 3064 + }, + { + "epoch": 0.3478014184397163, + "grad_norm": 26.640987396240234, + "learning_rate": 5e-05, + "loss": 1.4292, + "num_input_tokens_seen": 205067892, + "step": 3065 + }, + { + "epoch": 0.3478014184397163, + "loss": 1.5202170610427856, + "loss_ce": 0.005568698979914188, + "loss_iou": 0.6640625, + "loss_num": 0.037109375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 205067892, + "step": 3065 + }, + { + "epoch": 0.34791489361702127, + "grad_norm": 16.41693687438965, + "learning_rate": 5e-05, + "loss": 1.3755, + "num_input_tokens_seen": 205133608, + "step": 3066 + }, + { + "epoch": 0.34791489361702127, + "loss": 1.4332621097564697, + "loss_ce": 0.004795233719050884, + "loss_iou": 0.5625, + "loss_num": 0.059326171875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 205133608, + "step": 3066 + }, + { + "epoch": 0.34802836879432625, + "grad_norm": 53.894344329833984, + "learning_rate": 5e-05, + "loss": 1.2168, + "num_input_tokens_seen": 205201752, + "step": 3067 + }, + { + "epoch": 0.34802836879432625, + "loss": 1.2217426300048828, + "loss_ce": 0.0034809017088264227, + "loss_iou": 0.53125, + "loss_num": 0.0306396484375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 205201752, + "step": 3067 + }, + { + "epoch": 0.3481418439716312, + "grad_norm": 33.965728759765625, + "learning_rate": 5e-05, + "loss": 1.2889, + "num_input_tokens_seen": 205267644, + "step": 3068 + }, + { + "epoch": 0.3481418439716312, + "loss": 1.1964237689971924, + "loss_ce": 0.004040936939418316, + "loss_iou": 0.5234375, + "loss_num": 0.029052734375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 205267644, + "step": 3068 + }, + { + "epoch": 0.34825531914893615, + "grad_norm": 31.776124954223633, + "learning_rate": 5e-05, + "loss": 1.2239, + "num_input_tokens_seen": 205333932, + "step": 3069 + }, + { + "epoch": 0.34825531914893615, + "loss": 1.0537822246551514, + "loss_ce": 0.005442418158054352, + "loss_iou": 0.4453125, + "loss_num": 0.031982421875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 205333932, + "step": 3069 + }, + { + "epoch": 0.3483687943262411, + "grad_norm": 22.16116714477539, + "learning_rate": 5e-05, + "loss": 1.235, + "num_input_tokens_seen": 205401152, + "step": 3070 + }, + { + "epoch": 0.3483687943262411, + "loss": 1.1639695167541504, + "loss_ce": 0.0023484337143599987, + "loss_iou": 0.490234375, + "loss_num": 0.03662109375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 205401152, + "step": 3070 + }, + { + "epoch": 0.3484822695035461, + "grad_norm": 46.185096740722656, + "learning_rate": 5e-05, + "loss": 1.3531, + "num_input_tokens_seen": 205468008, + "step": 3071 + }, + { + "epoch": 0.3484822695035461, + "loss": 1.5678776502609253, + "loss_ce": 0.009772241115570068, + "loss_iou": 0.578125, + "loss_num": 0.07958984375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 205468008, + "step": 3071 + }, + { + "epoch": 0.3485957446808511, + "grad_norm": 25.523765563964844, + "learning_rate": 5e-05, + "loss": 1.4314, + "num_input_tokens_seen": 205534700, + "step": 3072 + }, + { + "epoch": 0.3485957446808511, + "loss": 1.2688010931015015, + "loss_ce": 0.0025656907819211483, + "loss_iou": 0.5390625, + "loss_num": 0.038330078125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 205534700, + "step": 3072 + }, + { + "epoch": 0.348709219858156, + "grad_norm": 38.98274230957031, + "learning_rate": 5e-05, + "loss": 1.1929, + "num_input_tokens_seen": 205602000, + "step": 3073 + }, + { + "epoch": 0.348709219858156, + "loss": 1.1963233947753906, + "loss_ce": 0.004428847227245569, + "loss_iou": 0.490234375, + "loss_num": 0.042236328125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 205602000, + "step": 3073 + }, + { + "epoch": 0.348822695035461, + "grad_norm": 29.147310256958008, + "learning_rate": 5e-05, + "loss": 1.2937, + "num_input_tokens_seen": 205669788, + "step": 3074 + }, + { + "epoch": 0.348822695035461, + "loss": 1.423830509185791, + "loss_ce": 0.006838309578597546, + "loss_iou": 0.57421875, + "loss_num": 0.0537109375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 205669788, + "step": 3074 + }, + { + "epoch": 0.34893617021276596, + "grad_norm": 16.708223342895508, + "learning_rate": 5e-05, + "loss": 1.1701, + "num_input_tokens_seen": 205737348, + "step": 3075 + }, + { + "epoch": 0.34893617021276596, + "loss": 1.0585209131240845, + "loss_ce": 0.009204497560858727, + "loss_iou": 0.466796875, + "loss_num": 0.0234375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 205737348, + "step": 3075 + }, + { + "epoch": 0.34904964539007094, + "grad_norm": 29.09507179260254, + "learning_rate": 5e-05, + "loss": 1.2715, + "num_input_tokens_seen": 205804072, + "step": 3076 + }, + { + "epoch": 0.34904964539007094, + "loss": 1.1246085166931152, + "loss_ce": 0.003026543650776148, + "loss_iou": 0.46484375, + "loss_num": 0.0390625, + "loss_xval": 1.125, + "num_input_tokens_seen": 205804072, + "step": 3076 + }, + { + "epoch": 0.34916312056737586, + "grad_norm": 25.60533905029297, + "learning_rate": 5e-05, + "loss": 1.0915, + "num_input_tokens_seen": 205871116, + "step": 3077 + }, + { + "epoch": 0.34916312056737586, + "loss": 1.0664525032043457, + "loss_ce": 0.010544242337346077, + "loss_iou": 0.44921875, + "loss_num": 0.03125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 205871116, + "step": 3077 + }, + { + "epoch": 0.34927659574468084, + "grad_norm": 36.35441589355469, + "learning_rate": 5e-05, + "loss": 1.0944, + "num_input_tokens_seen": 205938336, + "step": 3078 + }, + { + "epoch": 0.34927659574468084, + "loss": 0.9897160530090332, + "loss_ce": 0.00582928117364645, + "loss_iou": 0.40625, + "loss_num": 0.034423828125, + "loss_xval": 0.984375, + "num_input_tokens_seen": 205938336, + "step": 3078 + }, + { + "epoch": 0.3493900709219858, + "grad_norm": 27.584579467773438, + "learning_rate": 5e-05, + "loss": 1.4773, + "num_input_tokens_seen": 206004856, + "step": 3079 + }, + { + "epoch": 0.3493900709219858, + "loss": 1.5301278829574585, + "loss_ce": 0.010108353570103645, + "loss_iou": 0.578125, + "loss_num": 0.07275390625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 206004856, + "step": 3079 + }, + { + "epoch": 0.3495035460992908, + "grad_norm": 98.37042999267578, + "learning_rate": 5e-05, + "loss": 1.0594, + "num_input_tokens_seen": 206072060, + "step": 3080 + }, + { + "epoch": 0.3495035460992908, + "loss": 1.0014194250106812, + "loss_ce": 0.006790535524487495, + "loss_iou": 0.451171875, + "loss_num": 0.018798828125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 206072060, + "step": 3080 + }, + { + "epoch": 0.3496170212765957, + "grad_norm": 23.475587844848633, + "learning_rate": 5e-05, + "loss": 1.3907, + "num_input_tokens_seen": 206139088, + "step": 3081 + }, + { + "epoch": 0.3496170212765957, + "loss": 1.3443069458007812, + "loss_ce": 0.005928048864006996, + "loss_iou": 0.5703125, + "loss_num": 0.04052734375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 206139088, + "step": 3081 + }, + { + "epoch": 0.3497304964539007, + "grad_norm": 84.47042083740234, + "learning_rate": 5e-05, + "loss": 1.1164, + "num_input_tokens_seen": 206206124, + "step": 3082 + }, + { + "epoch": 0.3497304964539007, + "loss": 1.111972689628601, + "loss_ce": 0.0060156527906656265, + "loss_iou": 0.431640625, + "loss_num": 0.048095703125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 206206124, + "step": 3082 + }, + { + "epoch": 0.3498439716312057, + "grad_norm": 26.665943145751953, + "learning_rate": 5e-05, + "loss": 1.3742, + "num_input_tokens_seen": 206273288, + "step": 3083 + }, + { + "epoch": 0.3498439716312057, + "loss": 1.4593613147735596, + "loss_ce": 0.005259787198156118, + "loss_iou": 0.6015625, + "loss_num": 0.049560546875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 206273288, + "step": 3083 + }, + { + "epoch": 0.34995744680851065, + "grad_norm": 33.032493591308594, + "learning_rate": 5e-05, + "loss": 1.3498, + "num_input_tokens_seen": 206340276, + "step": 3084 + }, + { + "epoch": 0.34995744680851065, + "loss": 1.4364395141601562, + "loss_ce": 0.008216876536607742, + "loss_iou": 0.5625, + "loss_num": 0.060546875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 206340276, + "step": 3084 + }, + { + "epoch": 0.3500709219858156, + "grad_norm": 23.87954330444336, + "learning_rate": 5e-05, + "loss": 1.5108, + "num_input_tokens_seen": 206406620, + "step": 3085 + }, + { + "epoch": 0.3500709219858156, + "loss": 1.4045207500457764, + "loss_ce": 0.003153556026518345, + "loss_iou": 0.63671875, + "loss_num": 0.02587890625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 206406620, + "step": 3085 + }, + { + "epoch": 0.35018439716312055, + "grad_norm": 18.021663665771484, + "learning_rate": 5e-05, + "loss": 1.1801, + "num_input_tokens_seen": 206472916, + "step": 3086 + }, + { + "epoch": 0.35018439716312055, + "loss": 1.392836570739746, + "loss_ce": 0.006117869634181261, + "loss_iou": 0.5625, + "loss_num": 0.05224609375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 206472916, + "step": 3086 + }, + { + "epoch": 0.35029787234042553, + "grad_norm": 17.8908634185791, + "learning_rate": 5e-05, + "loss": 1.0089, + "num_input_tokens_seen": 206539928, + "step": 3087 + }, + { + "epoch": 0.35029787234042553, + "loss": 0.9478181600570679, + "loss_ce": 0.008365030400454998, + "loss_iou": 0.359375, + "loss_num": 0.044189453125, + "loss_xval": 0.9375, + "num_input_tokens_seen": 206539928, + "step": 3087 + }, + { + "epoch": 0.3504113475177305, + "grad_norm": 33.245941162109375, + "learning_rate": 5e-05, + "loss": 1.1648, + "num_input_tokens_seen": 206606964, + "step": 3088 + }, + { + "epoch": 0.3504113475177305, + "loss": 1.3087880611419678, + "loss_ce": 0.0050771525129675865, + "loss_iou": 0.5234375, + "loss_num": 0.05078125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 206606964, + "step": 3088 + }, + { + "epoch": 0.3505248226950355, + "grad_norm": 57.49820327758789, + "learning_rate": 5e-05, + "loss": 1.5019, + "num_input_tokens_seen": 206673664, + "step": 3089 + }, + { + "epoch": 0.3505248226950355, + "loss": 1.4780166149139404, + "loss_ce": 0.0073135411366820335, + "loss_iou": 0.66015625, + "loss_num": 0.03076171875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 206673664, + "step": 3089 + }, + { + "epoch": 0.3506382978723404, + "grad_norm": 21.191186904907227, + "learning_rate": 5e-05, + "loss": 1.4678, + "num_input_tokens_seen": 206739616, + "step": 3090 + }, + { + "epoch": 0.3506382978723404, + "loss": 1.5474704504013062, + "loss_ce": 0.007431297563016415, + "loss_iou": 0.6328125, + "loss_num": 0.05419921875, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 206739616, + "step": 3090 + }, + { + "epoch": 0.3507517730496454, + "grad_norm": 16.714397430419922, + "learning_rate": 5e-05, + "loss": 1.2318, + "num_input_tokens_seen": 206805852, + "step": 3091 + }, + { + "epoch": 0.3507517730496454, + "loss": 1.0401906967163086, + "loss_ce": 0.010893858969211578, + "loss_iou": 0.435546875, + "loss_num": 0.031494140625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 206805852, + "step": 3091 + }, + { + "epoch": 0.35086524822695037, + "grad_norm": 14.92155647277832, + "learning_rate": 5e-05, + "loss": 1.2236, + "num_input_tokens_seen": 206873172, + "step": 3092 + }, + { + "epoch": 0.35086524822695037, + "loss": 1.2855709791183472, + "loss_ce": 0.006762370932847261, + "loss_iou": 0.50390625, + "loss_num": 0.053955078125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 206873172, + "step": 3092 + }, + { + "epoch": 0.35097872340425534, + "grad_norm": 19.173667907714844, + "learning_rate": 5e-05, + "loss": 1.128, + "num_input_tokens_seen": 206940852, + "step": 3093 + }, + { + "epoch": 0.35097872340425534, + "loss": 1.1214548349380493, + "loss_ce": 0.005243897438049316, + "loss_iou": 0.486328125, + "loss_num": 0.028564453125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 206940852, + "step": 3093 + }, + { + "epoch": 0.35109219858156027, + "grad_norm": 32.16895294189453, + "learning_rate": 5e-05, + "loss": 1.1517, + "num_input_tokens_seen": 207008456, + "step": 3094 + }, + { + "epoch": 0.35109219858156027, + "loss": 1.3285222053527832, + "loss_ce": 0.007233086507767439, + "loss_iou": 0.54296875, + "loss_num": 0.04736328125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 207008456, + "step": 3094 + }, + { + "epoch": 0.35120567375886524, + "grad_norm": 23.63302230834961, + "learning_rate": 5e-05, + "loss": 1.1811, + "num_input_tokens_seen": 207075508, + "step": 3095 + }, + { + "epoch": 0.35120567375886524, + "loss": 1.33005690574646, + "loss_ce": 0.004373366478830576, + "loss_iou": 0.53515625, + "loss_num": 0.05078125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 207075508, + "step": 3095 + }, + { + "epoch": 0.3513191489361702, + "grad_norm": 17.192873001098633, + "learning_rate": 5e-05, + "loss": 1.0785, + "num_input_tokens_seen": 207143400, + "step": 3096 + }, + { + "epoch": 0.3513191489361702, + "loss": 1.0287851095199585, + "loss_ce": 0.002906175795942545, + "loss_iou": 0.443359375, + "loss_num": 0.028076171875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 207143400, + "step": 3096 + }, + { + "epoch": 0.3514326241134752, + "grad_norm": 43.407955169677734, + "learning_rate": 5e-05, + "loss": 1.2461, + "num_input_tokens_seen": 207210240, + "step": 3097 + }, + { + "epoch": 0.3514326241134752, + "loss": 1.2671713829040527, + "loss_ce": 0.00252294703386724, + "loss_iou": 0.52734375, + "loss_num": 0.04150390625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 207210240, + "step": 3097 + }, + { + "epoch": 0.3515460992907801, + "grad_norm": 26.118608474731445, + "learning_rate": 5e-05, + "loss": 1.1245, + "num_input_tokens_seen": 207276092, + "step": 3098 + }, + { + "epoch": 0.3515460992907801, + "loss": 1.1861600875854492, + "loss_ce": 0.008425610139966011, + "loss_iou": 0.49609375, + "loss_num": 0.03759765625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 207276092, + "step": 3098 + }, + { + "epoch": 0.3516595744680851, + "grad_norm": 32.118316650390625, + "learning_rate": 5e-05, + "loss": 1.4378, + "num_input_tokens_seen": 207343568, + "step": 3099 + }, + { + "epoch": 0.3516595744680851, + "loss": 1.4346565008163452, + "loss_ce": 0.005945492535829544, + "loss_iou": 0.5390625, + "loss_num": 0.0703125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 207343568, + "step": 3099 + }, + { + "epoch": 0.3517730496453901, + "grad_norm": 63.009742736816406, + "learning_rate": 5e-05, + "loss": 1.5706, + "num_input_tokens_seen": 207408972, + "step": 3100 + }, + { + "epoch": 0.3517730496453901, + "loss": 1.7289443016052246, + "loss_ce": 0.0072646914049983025, + "loss_iou": 0.734375, + "loss_num": 0.05078125, + "loss_xval": 1.71875, + "num_input_tokens_seen": 207408972, + "step": 3100 + }, + { + "epoch": 0.35188652482269506, + "grad_norm": 28.349468231201172, + "learning_rate": 5e-05, + "loss": 1.2867, + "num_input_tokens_seen": 207476612, + "step": 3101 + }, + { + "epoch": 0.35188652482269506, + "loss": 1.371462106704712, + "loss_ce": 0.004274615086615086, + "loss_iou": 0.5625, + "loss_num": 0.048095703125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 207476612, + "step": 3101 + }, + { + "epoch": 0.352, + "grad_norm": 24.999771118164062, + "learning_rate": 5e-05, + "loss": 1.2902, + "num_input_tokens_seen": 207542996, + "step": 3102 + }, + { + "epoch": 0.352, + "loss": 1.4302586317062378, + "loss_ce": 0.006430503912270069, + "loss_iou": 0.57421875, + "loss_num": 0.0546875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 207542996, + "step": 3102 + }, + { + "epoch": 0.35211347517730496, + "grad_norm": 40.61882019042969, + "learning_rate": 5e-05, + "loss": 1.1678, + "num_input_tokens_seen": 207611560, + "step": 3103 + }, + { + "epoch": 0.35211347517730496, + "loss": 1.3125429153442383, + "loss_ce": 0.005902374628931284, + "loss_iou": 0.53125, + "loss_num": 0.048583984375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 207611560, + "step": 3103 + }, + { + "epoch": 0.35222695035460994, + "grad_norm": 22.501401901245117, + "learning_rate": 5e-05, + "loss": 1.4597, + "num_input_tokens_seen": 207676860, + "step": 3104 + }, + { + "epoch": 0.35222695035460994, + "loss": 1.4007610082626343, + "loss_ce": 0.0030558789148926735, + "loss_iou": 0.5703125, + "loss_num": 0.051025390625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 207676860, + "step": 3104 + }, + { + "epoch": 0.3523404255319149, + "grad_norm": 16.241621017456055, + "learning_rate": 5e-05, + "loss": 1.1495, + "num_input_tokens_seen": 207743200, + "step": 3105 + }, + { + "epoch": 0.3523404255319149, + "loss": 1.2519822120666504, + "loss_ce": 0.005888409446924925, + "loss_iou": 0.5078125, + "loss_num": 0.046875, + "loss_xval": 1.25, + "num_input_tokens_seen": 207743200, + "step": 3105 + }, + { + "epoch": 0.35245390070921984, + "grad_norm": 19.381805419921875, + "learning_rate": 5e-05, + "loss": 1.3785, + "num_input_tokens_seen": 207810812, + "step": 3106 + }, + { + "epoch": 0.35245390070921984, + "loss": 1.4135425090789795, + "loss_ce": 0.00729244202375412, + "loss_iou": 0.578125, + "loss_num": 0.050048828125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 207810812, + "step": 3106 + }, + { + "epoch": 0.3525673758865248, + "grad_norm": 37.069305419921875, + "learning_rate": 5e-05, + "loss": 1.2275, + "num_input_tokens_seen": 207877744, + "step": 3107 + }, + { + "epoch": 0.3525673758865248, + "loss": 1.0187244415283203, + "loss_ce": 0.0035877127666026354, + "loss_iou": 0.447265625, + "loss_num": 0.0244140625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 207877744, + "step": 3107 + }, + { + "epoch": 0.3526808510638298, + "grad_norm": 23.725757598876953, + "learning_rate": 5e-05, + "loss": 1.5056, + "num_input_tokens_seen": 207943608, + "step": 3108 + }, + { + "epoch": 0.3526808510638298, + "loss": 1.5237023830413818, + "loss_ce": 0.0036829079035669565, + "loss_iou": 0.64453125, + "loss_num": 0.04638671875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 207943608, + "step": 3108 + }, + { + "epoch": 0.35279432624113477, + "grad_norm": 17.09134292602539, + "learning_rate": 5e-05, + "loss": 1.2129, + "num_input_tokens_seen": 208009812, + "step": 3109 + }, + { + "epoch": 0.35279432624113477, + "loss": 1.2617195844650269, + "loss_ce": 0.008789951913058758, + "loss_iou": 0.486328125, + "loss_num": 0.055908203125, + "loss_xval": 1.25, + "num_input_tokens_seen": 208009812, + "step": 3109 + }, + { + "epoch": 0.3529078014184397, + "grad_norm": 30.65178108215332, + "learning_rate": 5e-05, + "loss": 1.0939, + "num_input_tokens_seen": 208075736, + "step": 3110 + }, + { + "epoch": 0.3529078014184397, + "loss": 1.0622490644454956, + "loss_ce": 0.003899487666785717, + "loss_iou": 0.416015625, + "loss_num": 0.045166015625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 208075736, + "step": 3110 + }, + { + "epoch": 0.35302127659574467, + "grad_norm": 27.147544860839844, + "learning_rate": 5e-05, + "loss": 1.2608, + "num_input_tokens_seen": 208141712, + "step": 3111 + }, + { + "epoch": 0.35302127659574467, + "loss": 1.2625935077667236, + "loss_ce": 0.006367824040353298, + "loss_iou": 0.5078125, + "loss_num": 0.0478515625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 208141712, + "step": 3111 + }, + { + "epoch": 0.35313475177304965, + "grad_norm": 64.80696105957031, + "learning_rate": 5e-05, + "loss": 1.3274, + "num_input_tokens_seen": 208208280, + "step": 3112 + }, + { + "epoch": 0.35313475177304965, + "loss": 1.0651638507843018, + "loss_ce": 0.005837731063365936, + "loss_iou": 0.44921875, + "loss_num": 0.0322265625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 208208280, + "step": 3112 + }, + { + "epoch": 0.3532482269503546, + "grad_norm": 26.726951599121094, + "learning_rate": 5e-05, + "loss": 1.1444, + "num_input_tokens_seen": 208275648, + "step": 3113 + }, + { + "epoch": 0.3532482269503546, + "loss": 1.3031401634216309, + "loss_ce": 0.0043119932524859905, + "loss_iou": 0.53125, + "loss_num": 0.04736328125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 208275648, + "step": 3113 + }, + { + "epoch": 0.35336170212765955, + "grad_norm": 46.95985412597656, + "learning_rate": 5e-05, + "loss": 1.4146, + "num_input_tokens_seen": 208342496, + "step": 3114 + }, + { + "epoch": 0.35336170212765955, + "loss": 1.4612858295440674, + "loss_ce": 0.006207684054970741, + "loss_iou": 0.62109375, + "loss_num": 0.042724609375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 208342496, + "step": 3114 + }, + { + "epoch": 0.3534751773049645, + "grad_norm": 22.46433448791504, + "learning_rate": 5e-05, + "loss": 1.2727, + "num_input_tokens_seen": 208408284, + "step": 3115 + }, + { + "epoch": 0.3534751773049645, + "loss": 1.2397124767303467, + "loss_ce": 0.007900843396782875, + "loss_iou": 0.51953125, + "loss_num": 0.0390625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 208408284, + "step": 3115 + }, + { + "epoch": 0.3535886524822695, + "grad_norm": 30.469167709350586, + "learning_rate": 5e-05, + "loss": 1.2566, + "num_input_tokens_seen": 208474852, + "step": 3116 + }, + { + "epoch": 0.3535886524822695, + "loss": 1.243051528930664, + "loss_ce": 0.007699977140873671, + "loss_iou": 0.498046875, + "loss_num": 0.04833984375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 208474852, + "step": 3116 + }, + { + "epoch": 0.3537021276595745, + "grad_norm": 33.19437026977539, + "learning_rate": 5e-05, + "loss": 1.4707, + "num_input_tokens_seen": 208541944, + "step": 3117 + }, + { + "epoch": 0.3537021276595745, + "loss": 1.480332374572754, + "loss_ce": 0.004258132539689541, + "loss_iou": 0.63671875, + "loss_num": 0.040771484375, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 208541944, + "step": 3117 + }, + { + "epoch": 0.3538156028368794, + "grad_norm": 25.8477840423584, + "learning_rate": 5e-05, + "loss": 1.2979, + "num_input_tokens_seen": 208607708, + "step": 3118 + }, + { + "epoch": 0.3538156028368794, + "loss": 1.2446389198303223, + "loss_ce": 0.006357711739838123, + "loss_iou": 0.49609375, + "loss_num": 0.049072265625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 208607708, + "step": 3118 + }, + { + "epoch": 0.3539290780141844, + "grad_norm": 15.604764938354492, + "learning_rate": 5e-05, + "loss": 1.0385, + "num_input_tokens_seen": 208673936, + "step": 3119 + }, + { + "epoch": 0.3539290780141844, + "loss": 1.1506720781326294, + "loss_ce": 0.005652587860822678, + "loss_iou": 0.4765625, + "loss_num": 0.0380859375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 208673936, + "step": 3119 + }, + { + "epoch": 0.35404255319148936, + "grad_norm": 20.643587112426758, + "learning_rate": 5e-05, + "loss": 1.2179, + "num_input_tokens_seen": 208740212, + "step": 3120 + }, + { + "epoch": 0.35404255319148936, + "loss": 1.470407485961914, + "loss_ce": 0.006052004173398018, + "loss_iou": 0.6328125, + "loss_num": 0.0390625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 208740212, + "step": 3120 + }, + { + "epoch": 0.35415602836879434, + "grad_norm": 30.508766174316406, + "learning_rate": 5e-05, + "loss": 1.3311, + "num_input_tokens_seen": 208807804, + "step": 3121 + }, + { + "epoch": 0.35415602836879434, + "loss": 1.2463617324829102, + "loss_ce": 0.004174291621893644, + "loss_iou": 0.50390625, + "loss_num": 0.046142578125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 208807804, + "step": 3121 + }, + { + "epoch": 0.3542695035460993, + "grad_norm": 27.106151580810547, + "learning_rate": 5e-05, + "loss": 1.474, + "num_input_tokens_seen": 208875816, + "step": 3122 + }, + { + "epoch": 0.3542695035460993, + "loss": 1.4458322525024414, + "loss_ce": 0.004425926133990288, + "loss_iou": 0.57421875, + "loss_num": 0.057861328125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 208875816, + "step": 3122 + }, + { + "epoch": 0.35438297872340424, + "grad_norm": 24.375905990600586, + "learning_rate": 5e-05, + "loss": 1.1194, + "num_input_tokens_seen": 208942360, + "step": 3123 + }, + { + "epoch": 0.35438297872340424, + "loss": 1.213590145111084, + "loss_ce": 0.004605728201568127, + "loss_iou": 0.51953125, + "loss_num": 0.03369140625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 208942360, + "step": 3123 + }, + { + "epoch": 0.3544964539007092, + "grad_norm": 29.383703231811523, + "learning_rate": 5e-05, + "loss": 1.2762, + "num_input_tokens_seen": 209009288, + "step": 3124 + }, + { + "epoch": 0.3544964539007092, + "loss": 1.2645542621612549, + "loss_ce": 0.004788605496287346, + "loss_iou": 0.54296875, + "loss_num": 0.035400390625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 209009288, + "step": 3124 + }, + { + "epoch": 0.3546099290780142, + "grad_norm": 36.271148681640625, + "learning_rate": 5e-05, + "loss": 1.2174, + "num_input_tokens_seen": 209076084, + "step": 3125 + }, + { + "epoch": 0.3546099290780142, + "loss": 1.2604835033416748, + "loss_ce": 0.005600674077868462, + "loss_iou": 0.54296875, + "loss_num": 0.034423828125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 209076084, + "step": 3125 + }, + { + "epoch": 0.3547234042553192, + "grad_norm": 23.586368560791016, + "learning_rate": 5e-05, + "loss": 1.5684, + "num_input_tokens_seen": 209144052, + "step": 3126 + }, + { + "epoch": 0.3547234042553192, + "loss": 1.7205556631088257, + "loss_ce": 0.005711900070309639, + "loss_iou": 0.73046875, + "loss_num": 0.051025390625, + "loss_xval": 1.71875, + "num_input_tokens_seen": 209144052, + "step": 3126 + }, + { + "epoch": 0.3548368794326241, + "grad_norm": 11.745843887329102, + "learning_rate": 5e-05, + "loss": 1.0724, + "num_input_tokens_seen": 209210748, + "step": 3127 + }, + { + "epoch": 0.3548368794326241, + "loss": 1.1917736530303955, + "loss_ce": 0.007691549137234688, + "loss_iou": 0.48828125, + "loss_num": 0.041748046875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 209210748, + "step": 3127 + }, + { + "epoch": 0.3549503546099291, + "grad_norm": 21.60396385192871, + "learning_rate": 5e-05, + "loss": 1.0221, + "num_input_tokens_seen": 209277768, + "step": 3128 + }, + { + "epoch": 0.3549503546099291, + "loss": 0.9637152552604675, + "loss_ce": 0.005707466974854469, + "loss_iou": 0.412109375, + "loss_num": 0.027099609375, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 209277768, + "step": 3128 + }, + { + "epoch": 0.35506382978723405, + "grad_norm": 14.74501895904541, + "learning_rate": 5e-05, + "loss": 1.1314, + "num_input_tokens_seen": 209344024, + "step": 3129 + }, + { + "epoch": 0.35506382978723405, + "loss": 0.9882721900939941, + "loss_ce": 0.010733095929026604, + "loss_iou": 0.380859375, + "loss_num": 0.04296875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 209344024, + "step": 3129 + }, + { + "epoch": 0.35517730496453903, + "grad_norm": 16.56069564819336, + "learning_rate": 5e-05, + "loss": 1.245, + "num_input_tokens_seen": 209411096, + "step": 3130 + }, + { + "epoch": 0.35517730496453903, + "loss": 1.1977448463439941, + "loss_ce": 0.008566400036215782, + "loss_iou": 0.486328125, + "loss_num": 0.04345703125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 209411096, + "step": 3130 + }, + { + "epoch": 0.35529078014184395, + "grad_norm": 26.314531326293945, + "learning_rate": 5e-05, + "loss": 1.2726, + "num_input_tokens_seen": 209476864, + "step": 3131 + }, + { + "epoch": 0.35529078014184395, + "loss": 1.178656816482544, + "loss_ce": 0.005805302876979113, + "loss_iou": 0.494140625, + "loss_num": 0.03662109375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 209476864, + "step": 3131 + }, + { + "epoch": 0.35540425531914893, + "grad_norm": 28.661184310913086, + "learning_rate": 5e-05, + "loss": 1.5037, + "num_input_tokens_seen": 209544176, + "step": 3132 + }, + { + "epoch": 0.35540425531914893, + "loss": 1.3385627269744873, + "loss_ce": 0.005554909817874432, + "loss_iou": 0.59375, + "loss_num": 0.0284423828125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 209544176, + "step": 3132 + }, + { + "epoch": 0.3555177304964539, + "grad_norm": 34.35911560058594, + "learning_rate": 5e-05, + "loss": 1.1401, + "num_input_tokens_seen": 209610852, + "step": 3133 + }, + { + "epoch": 0.3555177304964539, + "loss": 1.0352908372879028, + "loss_ce": 0.004651213996112347, + "loss_iou": 0.439453125, + "loss_num": 0.030517578125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 209610852, + "step": 3133 + }, + { + "epoch": 0.3556312056737589, + "grad_norm": 28.31713104248047, + "learning_rate": 5e-05, + "loss": 1.4619, + "num_input_tokens_seen": 209677512, + "step": 3134 + }, + { + "epoch": 0.3556312056737589, + "loss": 1.456712007522583, + "loss_ce": 0.004563447088003159, + "loss_iou": 0.61328125, + "loss_num": 0.044921875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 209677512, + "step": 3134 + }, + { + "epoch": 0.3557446808510638, + "grad_norm": 25.208948135375977, + "learning_rate": 5e-05, + "loss": 1.1308, + "num_input_tokens_seen": 209745196, + "step": 3135 + }, + { + "epoch": 0.3557446808510638, + "loss": 1.0509467124938965, + "loss_ce": 0.006635104306042194, + "loss_iou": 0.419921875, + "loss_num": 0.041015625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 209745196, + "step": 3135 + }, + { + "epoch": 0.3558581560283688, + "grad_norm": 32.31318283081055, + "learning_rate": 5e-05, + "loss": 1.3732, + "num_input_tokens_seen": 209811396, + "step": 3136 + }, + { + "epoch": 0.3558581560283688, + "loss": 1.3667716979980469, + "loss_ce": 0.004466944374144077, + "loss_iou": 0.56640625, + "loss_num": 0.0458984375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 209811396, + "step": 3136 + }, + { + "epoch": 0.35597163120567377, + "grad_norm": 26.883607864379883, + "learning_rate": 5e-05, + "loss": 1.2482, + "num_input_tokens_seen": 209878348, + "step": 3137 + }, + { + "epoch": 0.35597163120567377, + "loss": 1.385557770729065, + "loss_ce": 0.006651528179645538, + "loss_iou": 0.53515625, + "loss_num": 0.06103515625, + "loss_xval": 1.375, + "num_input_tokens_seen": 209878348, + "step": 3137 + }, + { + "epoch": 0.35608510638297874, + "grad_norm": 26.017131805419922, + "learning_rate": 5e-05, + "loss": 1.2885, + "num_input_tokens_seen": 209945196, + "step": 3138 + }, + { + "epoch": 0.35608510638297874, + "loss": 1.4003580808639526, + "loss_ce": 0.00436193123459816, + "loss_iou": 0.58203125, + "loss_num": 0.0458984375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 209945196, + "step": 3138 + }, + { + "epoch": 0.35619858156028367, + "grad_norm": 31.474254608154297, + "learning_rate": 5e-05, + "loss": 1.1588, + "num_input_tokens_seen": 210010988, + "step": 3139 + }, + { + "epoch": 0.35619858156028367, + "loss": 1.1994078159332275, + "loss_ce": 0.006292553152889013, + "loss_iou": 0.515625, + "loss_num": 0.032470703125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 210010988, + "step": 3139 + }, + { + "epoch": 0.35631205673758864, + "grad_norm": 21.61728858947754, + "learning_rate": 5e-05, + "loss": 1.5297, + "num_input_tokens_seen": 210077864, + "step": 3140 + }, + { + "epoch": 0.35631205673758864, + "loss": 1.4656885862350464, + "loss_ce": 0.0037745567969977856, + "loss_iou": 0.63671875, + "loss_num": 0.037841796875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 210077864, + "step": 3140 + }, + { + "epoch": 0.3564255319148936, + "grad_norm": 15.573614120483398, + "learning_rate": 5e-05, + "loss": 1.2113, + "num_input_tokens_seen": 210144016, + "step": 3141 + }, + { + "epoch": 0.3564255319148936, + "loss": 1.3946099281311035, + "loss_ce": 0.007891176268458366, + "loss_iou": 0.578125, + "loss_num": 0.04541015625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 210144016, + "step": 3141 + }, + { + "epoch": 0.3565390070921986, + "grad_norm": 20.955106735229492, + "learning_rate": 5e-05, + "loss": 1.2273, + "num_input_tokens_seen": 210211200, + "step": 3142 + }, + { + "epoch": 0.3565390070921986, + "loss": 1.1842809915542603, + "loss_ce": 0.0041052172891795635, + "loss_iou": 0.48046875, + "loss_num": 0.044189453125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 210211200, + "step": 3142 + }, + { + "epoch": 0.3566524822695035, + "grad_norm": 23.930931091308594, + "learning_rate": 5e-05, + "loss": 1.2828, + "num_input_tokens_seen": 210278644, + "step": 3143 + }, + { + "epoch": 0.3566524822695035, + "loss": 1.2503341436386108, + "loss_ce": 0.004637075588107109, + "loss_iou": 0.52734375, + "loss_num": 0.038818359375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 210278644, + "step": 3143 + }, + { + "epoch": 0.3567659574468085, + "grad_norm": 22.424205780029297, + "learning_rate": 5e-05, + "loss": 1.1836, + "num_input_tokens_seen": 210345272, + "step": 3144 + }, + { + "epoch": 0.3567659574468085, + "loss": 1.1811435222625732, + "loss_ce": 0.0068270377814769745, + "loss_iou": 0.5234375, + "loss_num": 0.025390625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 210345272, + "step": 3144 + }, + { + "epoch": 0.3568794326241135, + "grad_norm": 28.7054443359375, + "learning_rate": 5e-05, + "loss": 1.1106, + "num_input_tokens_seen": 210411564, + "step": 3145 + }, + { + "epoch": 0.3568794326241135, + "loss": 1.044061541557312, + "loss_ce": 0.006036643870174885, + "loss_iou": 0.4375, + "loss_num": 0.0322265625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 210411564, + "step": 3145 + }, + { + "epoch": 0.35699290780141846, + "grad_norm": 44.818511962890625, + "learning_rate": 5e-05, + "loss": 1.2668, + "num_input_tokens_seen": 210478136, + "step": 3146 + }, + { + "epoch": 0.35699290780141846, + "loss": 1.2004718780517578, + "loss_ce": 0.004671151749789715, + "loss_iou": 0.51171875, + "loss_num": 0.034912109375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 210478136, + "step": 3146 + }, + { + "epoch": 0.3571063829787234, + "grad_norm": 20.560476303100586, + "learning_rate": 5e-05, + "loss": 1.5953, + "num_input_tokens_seen": 210544724, + "step": 3147 + }, + { + "epoch": 0.3571063829787234, + "loss": 1.4649064540863037, + "loss_ce": 0.006898595951497555, + "loss_iou": 0.59375, + "loss_num": 0.054931640625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 210544724, + "step": 3147 + }, + { + "epoch": 0.35721985815602836, + "grad_norm": 18.659570693969727, + "learning_rate": 5e-05, + "loss": 1.1926, + "num_input_tokens_seen": 210611948, + "step": 3148 + }, + { + "epoch": 0.35721985815602836, + "loss": 1.2584900856018066, + "loss_ce": 0.005072093568742275, + "loss_iou": 0.5234375, + "loss_num": 0.04150390625, + "loss_xval": 1.25, + "num_input_tokens_seen": 210611948, + "step": 3148 + }, + { + "epoch": 0.35733333333333334, + "grad_norm": 28.718984603881836, + "learning_rate": 5e-05, + "loss": 1.1425, + "num_input_tokens_seen": 210679572, + "step": 3149 + }, + { + "epoch": 0.35733333333333334, + "loss": 1.237381935119629, + "loss_ce": 0.005448414944112301, + "loss_iou": 0.5078125, + "loss_num": 0.04296875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 210679572, + "step": 3149 + }, + { + "epoch": 0.3574468085106383, + "grad_norm": 27.6782169342041, + "learning_rate": 5e-05, + "loss": 1.5546, + "num_input_tokens_seen": 210747024, + "step": 3150 + }, + { + "epoch": 0.3574468085106383, + "loss": 1.4498354196548462, + "loss_ce": 0.005011225119233131, + "loss_iou": 0.62109375, + "loss_num": 0.04052734375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 210747024, + "step": 3150 + }, + { + "epoch": 0.35756028368794324, + "grad_norm": 17.966331481933594, + "learning_rate": 5e-05, + "loss": 1.1611, + "num_input_tokens_seen": 210813956, + "step": 3151 + }, + { + "epoch": 0.35756028368794324, + "loss": 1.1993980407714844, + "loss_ce": 0.00603871326893568, + "loss_iou": 0.5078125, + "loss_num": 0.03564453125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 210813956, + "step": 3151 + }, + { + "epoch": 0.3576737588652482, + "grad_norm": 21.472335815429688, + "learning_rate": 5e-05, + "loss": 1.3808, + "num_input_tokens_seen": 210880196, + "step": 3152 + }, + { + "epoch": 0.3576737588652482, + "loss": 1.4513789415359497, + "loss_ce": 0.004601588938385248, + "loss_iou": 0.55859375, + "loss_num": 0.0654296875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 210880196, + "step": 3152 + }, + { + "epoch": 0.3577872340425532, + "grad_norm": 29.666824340820312, + "learning_rate": 5e-05, + "loss": 1.3077, + "num_input_tokens_seen": 210946956, + "step": 3153 + }, + { + "epoch": 0.3577872340425532, + "loss": 1.5479094982147217, + "loss_ce": 0.003964181523770094, + "loss_iou": 0.66015625, + "loss_num": 0.044677734375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 210946956, + "step": 3153 + }, + { + "epoch": 0.35790070921985817, + "grad_norm": 25.64878273010254, + "learning_rate": 5e-05, + "loss": 1.2284, + "num_input_tokens_seen": 211013548, + "step": 3154 + }, + { + "epoch": 0.35790070921985817, + "loss": 1.171358346939087, + "loss_ce": 0.002901345957070589, + "loss_iou": 0.5390625, + "loss_num": 0.0186767578125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 211013548, + "step": 3154 + }, + { + "epoch": 0.3580141843971631, + "grad_norm": 24.066476821899414, + "learning_rate": 5e-05, + "loss": 1.1878, + "num_input_tokens_seen": 211080228, + "step": 3155 + }, + { + "epoch": 0.3580141843971631, + "loss": 1.2163305282592773, + "loss_ce": 0.004416411742568016, + "loss_iou": 0.5234375, + "loss_num": 0.032470703125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 211080228, + "step": 3155 + }, + { + "epoch": 0.35812765957446807, + "grad_norm": 20.511642456054688, + "learning_rate": 5e-05, + "loss": 1.0516, + "num_input_tokens_seen": 211147320, + "step": 3156 + }, + { + "epoch": 0.35812765957446807, + "loss": 0.9914476275444031, + "loss_ce": 0.005607792176306248, + "loss_iou": 0.40234375, + "loss_num": 0.03662109375, + "loss_xval": 0.984375, + "num_input_tokens_seen": 211147320, + "step": 3156 + }, + { + "epoch": 0.35824113475177305, + "grad_norm": 14.581916809082031, + "learning_rate": 5e-05, + "loss": 1.275, + "num_input_tokens_seen": 211213284, + "step": 3157 + }, + { + "epoch": 0.35824113475177305, + "loss": 1.2650039196014404, + "loss_ce": 0.005238319747149944, + "loss_iou": 0.53515625, + "loss_num": 0.037109375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 211213284, + "step": 3157 + }, + { + "epoch": 0.358354609929078, + "grad_norm": 48.72414779663086, + "learning_rate": 5e-05, + "loss": 1.2742, + "num_input_tokens_seen": 211280296, + "step": 3158 + }, + { + "epoch": 0.358354609929078, + "loss": 1.5696978569030762, + "loss_ce": 0.007197877392172813, + "loss_iou": 0.62109375, + "loss_num": 0.0634765625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 211280296, + "step": 3158 + }, + { + "epoch": 0.358468085106383, + "grad_norm": 55.261627197265625, + "learning_rate": 5e-05, + "loss": 1.3545, + "num_input_tokens_seen": 211346776, + "step": 3159 + }, + { + "epoch": 0.358468085106383, + "loss": 1.5657129287719727, + "loss_ce": 0.005166041664779186, + "loss_iou": 0.6171875, + "loss_num": 0.0654296875, + "loss_xval": 1.5625, + "num_input_tokens_seen": 211346776, + "step": 3159 + }, + { + "epoch": 0.35858156028368793, + "grad_norm": 27.297494888305664, + "learning_rate": 5e-05, + "loss": 1.5183, + "num_input_tokens_seen": 211413440, + "step": 3160 + }, + { + "epoch": 0.35858156028368793, + "loss": 1.5336666107177734, + "loss_ce": 0.007787744514644146, + "loss_iou": 0.6171875, + "loss_num": 0.057861328125, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 211413440, + "step": 3160 + }, + { + "epoch": 0.3586950354609929, + "grad_norm": 22.278013229370117, + "learning_rate": 5e-05, + "loss": 1.1273, + "num_input_tokens_seen": 211480840, + "step": 3161 + }, + { + "epoch": 0.3586950354609929, + "loss": 1.114207148551941, + "loss_ce": 0.0028790123760700226, + "loss_iou": 0.486328125, + "loss_num": 0.027587890625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 211480840, + "step": 3161 + }, + { + "epoch": 0.3588085106382979, + "grad_norm": 25.792224884033203, + "learning_rate": 5e-05, + "loss": 1.1098, + "num_input_tokens_seen": 211548668, + "step": 3162 + }, + { + "epoch": 0.3588085106382979, + "loss": 1.0957074165344238, + "loss_ce": 0.00488703977316618, + "loss_iou": 0.490234375, + "loss_num": 0.021728515625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 211548668, + "step": 3162 + }, + { + "epoch": 0.35892198581560286, + "grad_norm": 29.856182098388672, + "learning_rate": 5e-05, + "loss": 1.2819, + "num_input_tokens_seen": 211615764, + "step": 3163 + }, + { + "epoch": 0.35892198581560286, + "loss": 1.3139678239822388, + "loss_ce": 0.004885799717158079, + "loss_iou": 0.5703125, + "loss_num": 0.032958984375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 211615764, + "step": 3163 + }, + { + "epoch": 0.3590354609929078, + "grad_norm": 33.752174377441406, + "learning_rate": 5e-05, + "loss": 1.1065, + "num_input_tokens_seen": 211681820, + "step": 3164 + }, + { + "epoch": 0.3590354609929078, + "loss": 1.128412127494812, + "loss_ce": 0.006341856904327869, + "loss_iou": 0.498046875, + "loss_num": 0.0250244140625, + "loss_xval": 1.125, + "num_input_tokens_seen": 211681820, + "step": 3164 + }, + { + "epoch": 0.35914893617021276, + "grad_norm": 33.826663970947266, + "learning_rate": 5e-05, + "loss": 1.3979, + "num_input_tokens_seen": 211749728, + "step": 3165 + }, + { + "epoch": 0.35914893617021276, + "loss": 1.2607892751693726, + "loss_ce": 0.002976752584800124, + "loss_iou": 0.546875, + "loss_num": 0.033447265625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 211749728, + "step": 3165 + }, + { + "epoch": 0.35926241134751774, + "grad_norm": 29.589570999145508, + "learning_rate": 5e-05, + "loss": 1.2177, + "num_input_tokens_seen": 211815924, + "step": 3166 + }, + { + "epoch": 0.35926241134751774, + "loss": 1.3500397205352783, + "loss_ce": 0.005313201807439327, + "loss_iou": 0.51953125, + "loss_num": 0.06103515625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 211815924, + "step": 3166 + }, + { + "epoch": 0.3593758865248227, + "grad_norm": 23.642993927001953, + "learning_rate": 5e-05, + "loss": 1.1937, + "num_input_tokens_seen": 211881860, + "step": 3167 + }, + { + "epoch": 0.3593758865248227, + "loss": 1.2422575950622559, + "loss_ce": 0.003976353909820318, + "loss_iou": 0.51171875, + "loss_num": 0.043212890625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 211881860, + "step": 3167 + }, + { + "epoch": 0.35948936170212764, + "grad_norm": 22.093069076538086, + "learning_rate": 5e-05, + "loss": 0.9914, + "num_input_tokens_seen": 211948744, + "step": 3168 + }, + { + "epoch": 0.35948936170212764, + "loss": 0.9358556270599365, + "loss_ce": 0.005191503092646599, + "loss_iou": 0.380859375, + "loss_num": 0.033935546875, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 211948744, + "step": 3168 + }, + { + "epoch": 0.3596028368794326, + "grad_norm": 23.653867721557617, + "learning_rate": 5e-05, + "loss": 1.0651, + "num_input_tokens_seen": 212016028, + "step": 3169 + }, + { + "epoch": 0.3596028368794326, + "loss": 1.0488712787628174, + "loss_ce": 0.00687905540689826, + "loss_iou": 0.458984375, + "loss_num": 0.0250244140625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 212016028, + "step": 3169 + }, + { + "epoch": 0.3597163120567376, + "grad_norm": 21.995031356811523, + "learning_rate": 5e-05, + "loss": 1.0765, + "num_input_tokens_seen": 212083520, + "step": 3170 + }, + { + "epoch": 0.3597163120567376, + "loss": 1.1086080074310303, + "loss_ce": 0.008022088557481766, + "loss_iou": 0.48828125, + "loss_num": 0.0250244140625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 212083520, + "step": 3170 + }, + { + "epoch": 0.3598297872340426, + "grad_norm": 20.934349060058594, + "learning_rate": 5e-05, + "loss": 1.3717, + "num_input_tokens_seen": 212150948, + "step": 3171 + }, + { + "epoch": 0.3598297872340426, + "loss": 1.374504804611206, + "loss_ce": 0.006340701133012772, + "loss_iou": 0.58984375, + "loss_num": 0.037353515625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 212150948, + "step": 3171 + }, + { + "epoch": 0.3599432624113475, + "grad_norm": 62.7325553894043, + "learning_rate": 5e-05, + "loss": 1.2656, + "num_input_tokens_seen": 212217848, + "step": 3172 + }, + { + "epoch": 0.3599432624113475, + "loss": 1.2468552589416504, + "loss_ce": 0.005156052764505148, + "loss_iou": 0.53515625, + "loss_num": 0.033935546875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 212217848, + "step": 3172 + }, + { + "epoch": 0.3600567375886525, + "grad_norm": 28.389083862304688, + "learning_rate": 5e-05, + "loss": 1.5009, + "num_input_tokens_seen": 212283928, + "step": 3173 + }, + { + "epoch": 0.3600567375886525, + "loss": 1.6605854034423828, + "loss_ce": 0.004335397854447365, + "loss_iou": 0.6875, + "loss_num": 0.056396484375, + "loss_xval": 1.65625, + "num_input_tokens_seen": 212283928, + "step": 3173 + }, + { + "epoch": 0.36017021276595745, + "grad_norm": 26.950363159179688, + "learning_rate": 5e-05, + "loss": 1.1786, + "num_input_tokens_seen": 212351100, + "step": 3174 + }, + { + "epoch": 0.36017021276595745, + "loss": 1.2448720932006836, + "loss_ce": 0.00927647016942501, + "loss_iou": 0.486328125, + "loss_num": 0.052001953125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 212351100, + "step": 3174 + }, + { + "epoch": 0.36028368794326243, + "grad_norm": 31.69976806640625, + "learning_rate": 5e-05, + "loss": 1.2567, + "num_input_tokens_seen": 212417836, + "step": 3175 + }, + { + "epoch": 0.36028368794326243, + "loss": 1.4295811653137207, + "loss_ce": 0.010147511959075928, + "loss_iou": 0.58984375, + "loss_num": 0.04736328125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 212417836, + "step": 3175 + }, + { + "epoch": 0.36039716312056735, + "grad_norm": 42.99263381958008, + "learning_rate": 5e-05, + "loss": 1.5024, + "num_input_tokens_seen": 212485424, + "step": 3176 + }, + { + "epoch": 0.36039716312056735, + "loss": 1.527337670326233, + "loss_ce": 0.0039002166595309973, + "loss_iou": 0.62890625, + "loss_num": 0.052978515625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 212485424, + "step": 3176 + }, + { + "epoch": 0.36051063829787233, + "grad_norm": 21.900150299072266, + "learning_rate": 5e-05, + "loss": 1.4456, + "num_input_tokens_seen": 212553848, + "step": 3177 + }, + { + "epoch": 0.36051063829787233, + "loss": 1.5178619623184204, + "loss_ce": 0.005166614893823862, + "loss_iou": 0.62890625, + "loss_num": 0.05078125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 212553848, + "step": 3177 + }, + { + "epoch": 0.3606241134751773, + "grad_norm": 14.78594970703125, + "learning_rate": 5e-05, + "loss": 1.371, + "num_input_tokens_seen": 212621096, + "step": 3178 + }, + { + "epoch": 0.3606241134751773, + "loss": 1.2945277690887451, + "loss_ce": 0.004976995289325714, + "loss_iou": 0.5234375, + "loss_num": 0.04931640625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 212621096, + "step": 3178 + }, + { + "epoch": 0.3607375886524823, + "grad_norm": 23.005332946777344, + "learning_rate": 5e-05, + "loss": 1.27, + "num_input_tokens_seen": 212688888, + "step": 3179 + }, + { + "epoch": 0.3607375886524823, + "loss": 1.3429336547851562, + "loss_ce": 0.008461028337478638, + "loss_iou": 0.51953125, + "loss_num": 0.05908203125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 212688888, + "step": 3179 + }, + { + "epoch": 0.3608510638297872, + "grad_norm": 23.81438636779785, + "learning_rate": 5e-05, + "loss": 1.2497, + "num_input_tokens_seen": 212755912, + "step": 3180 + }, + { + "epoch": 0.3608510638297872, + "loss": 1.3266239166259766, + "loss_ce": 0.005823152139782906, + "loss_iou": 0.5078125, + "loss_num": 0.061279296875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 212755912, + "step": 3180 + }, + { + "epoch": 0.3609645390070922, + "grad_norm": 23.885601043701172, + "learning_rate": 5e-05, + "loss": 1.2618, + "num_input_tokens_seen": 212823560, + "step": 3181 + }, + { + "epoch": 0.3609645390070922, + "loss": 1.21333646774292, + "loss_ce": 0.008502431213855743, + "loss_iou": 0.48046875, + "loss_num": 0.048095703125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 212823560, + "step": 3181 + }, + { + "epoch": 0.36107801418439717, + "grad_norm": 15.280482292175293, + "learning_rate": 5e-05, + "loss": 1.1462, + "num_input_tokens_seen": 212890848, + "step": 3182 + }, + { + "epoch": 0.36107801418439717, + "loss": 1.2079577445983887, + "loss_ce": 0.002879680134356022, + "loss_iou": 0.462890625, + "loss_num": 0.05615234375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 212890848, + "step": 3182 + }, + { + "epoch": 0.36119148936170214, + "grad_norm": 21.681753158569336, + "learning_rate": 5e-05, + "loss": 1.2528, + "num_input_tokens_seen": 212958472, + "step": 3183 + }, + { + "epoch": 0.36119148936170214, + "loss": 1.0537681579589844, + "loss_ce": 0.008357938379049301, + "loss_iou": 0.451171875, + "loss_num": 0.0281982421875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 212958472, + "step": 3183 + }, + { + "epoch": 0.36130496453900707, + "grad_norm": 34.162113189697266, + "learning_rate": 5e-05, + "loss": 1.2931, + "num_input_tokens_seen": 213023976, + "step": 3184 + }, + { + "epoch": 0.36130496453900707, + "loss": 1.26265287399292, + "loss_ce": 0.004352000541985035, + "loss_iou": 0.546875, + "loss_num": 0.03369140625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 213023976, + "step": 3184 + }, + { + "epoch": 0.36141843971631205, + "grad_norm": 28.6635684967041, + "learning_rate": 5e-05, + "loss": 1.3837, + "num_input_tokens_seen": 213091380, + "step": 3185 + }, + { + "epoch": 0.36141843971631205, + "loss": 1.3494935035705566, + "loss_ce": 0.006231758277863264, + "loss_iou": 0.56640625, + "loss_num": 0.041748046875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 213091380, + "step": 3185 + }, + { + "epoch": 0.361531914893617, + "grad_norm": 18.403594970703125, + "learning_rate": 5e-05, + "loss": 1.1389, + "num_input_tokens_seen": 213158488, + "step": 3186 + }, + { + "epoch": 0.361531914893617, + "loss": 1.202412486076355, + "loss_ce": 0.004170289728790522, + "loss_iou": 0.49609375, + "loss_num": 0.040771484375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 213158488, + "step": 3186 + }, + { + "epoch": 0.361645390070922, + "grad_norm": 21.789794921875, + "learning_rate": 5e-05, + "loss": 1.1597, + "num_input_tokens_seen": 213225520, + "step": 3187 + }, + { + "epoch": 0.361645390070922, + "loss": 1.3203890323638916, + "loss_ce": 0.006424308754503727, + "loss_iou": 0.53515625, + "loss_num": 0.048583984375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 213225520, + "step": 3187 + }, + { + "epoch": 0.3617588652482269, + "grad_norm": 44.29216003417969, + "learning_rate": 5e-05, + "loss": 1.2293, + "num_input_tokens_seen": 213291504, + "step": 3188 + }, + { + "epoch": 0.3617588652482269, + "loss": 1.1301944255828857, + "loss_ce": 0.006598223932087421, + "loss_iou": 0.50390625, + "loss_num": 0.0235595703125, + "loss_xval": 1.125, + "num_input_tokens_seen": 213291504, + "step": 3188 + }, + { + "epoch": 0.3618723404255319, + "grad_norm": 60.814083099365234, + "learning_rate": 5e-05, + "loss": 1.7086, + "num_input_tokens_seen": 213358508, + "step": 3189 + }, + { + "epoch": 0.3618723404255319, + "loss": 1.7836658954620361, + "loss_ce": 0.00925182830542326, + "loss_iou": 0.76171875, + "loss_num": 0.049560546875, + "loss_xval": 1.7734375, + "num_input_tokens_seen": 213358508, + "step": 3189 + }, + { + "epoch": 0.3619858156028369, + "grad_norm": 20.146163940429688, + "learning_rate": 5e-05, + "loss": 1.1097, + "num_input_tokens_seen": 213424880, + "step": 3190 + }, + { + "epoch": 0.3619858156028369, + "loss": 1.2130303382873535, + "loss_ce": 0.004778414964675903, + "loss_iou": 0.46484375, + "loss_num": 0.05615234375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 213424880, + "step": 3190 + }, + { + "epoch": 0.36209929078014186, + "grad_norm": 34.047264099121094, + "learning_rate": 5e-05, + "loss": 1.376, + "num_input_tokens_seen": 213491200, + "step": 3191 + }, + { + "epoch": 0.36209929078014186, + "loss": 1.2917065620422363, + "loss_ce": 0.003132321173325181, + "loss_iou": 0.5546875, + "loss_num": 0.035400390625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 213491200, + "step": 3191 + }, + { + "epoch": 0.36221276595744684, + "grad_norm": 29.448701858520508, + "learning_rate": 5e-05, + "loss": 1.5102, + "num_input_tokens_seen": 213558760, + "step": 3192 + }, + { + "epoch": 0.36221276595744684, + "loss": 1.6198821067810059, + "loss_ce": 0.00855402834713459, + "loss_iou": 0.66796875, + "loss_num": 0.054931640625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 213558760, + "step": 3192 + }, + { + "epoch": 0.36232624113475176, + "grad_norm": 22.51900863647461, + "learning_rate": 5e-05, + "loss": 1.3319, + "num_input_tokens_seen": 213626064, + "step": 3193 + }, + { + "epoch": 0.36232624113475176, + "loss": 1.2775449752807617, + "loss_ce": 0.00606070039793849, + "loss_iou": 0.5546875, + "loss_num": 0.0322265625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 213626064, + "step": 3193 + }, + { + "epoch": 0.36243971631205674, + "grad_norm": 27.4692325592041, + "learning_rate": 5e-05, + "loss": 1.188, + "num_input_tokens_seen": 213693044, + "step": 3194 + }, + { + "epoch": 0.36243971631205674, + "loss": 1.0367861986160278, + "loss_ce": 0.005536217242479324, + "loss_iou": 0.447265625, + "loss_num": 0.027587890625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 213693044, + "step": 3194 + }, + { + "epoch": 0.3625531914893617, + "grad_norm": 26.5058536529541, + "learning_rate": 5e-05, + "loss": 1.345, + "num_input_tokens_seen": 213759820, + "step": 3195 + }, + { + "epoch": 0.3625531914893617, + "loss": 1.3370805978775024, + "loss_ce": 0.008223176002502441, + "loss_iou": 0.51171875, + "loss_num": 0.0615234375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 213759820, + "step": 3195 + }, + { + "epoch": 0.3626666666666667, + "grad_norm": 22.249217987060547, + "learning_rate": 5e-05, + "loss": 0.9459, + "num_input_tokens_seen": 213825700, + "step": 3196 + }, + { + "epoch": 0.3626666666666667, + "loss": 0.9522532224655151, + "loss_ce": 0.005964202806353569, + "loss_iou": 0.408203125, + "loss_num": 0.0257568359375, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 213825700, + "step": 3196 + }, + { + "epoch": 0.3627801418439716, + "grad_norm": 31.974979400634766, + "learning_rate": 5e-05, + "loss": 1.1385, + "num_input_tokens_seen": 213892716, + "step": 3197 + }, + { + "epoch": 0.3627801418439716, + "loss": 1.1773279905319214, + "loss_ce": 0.007161938585340977, + "loss_iou": 0.4765625, + "loss_num": 0.04296875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 213892716, + "step": 3197 + }, + { + "epoch": 0.3628936170212766, + "grad_norm": 24.611900329589844, + "learning_rate": 5e-05, + "loss": 1.3395, + "num_input_tokens_seen": 213960480, + "step": 3198 + }, + { + "epoch": 0.3628936170212766, + "loss": 1.2644097805023193, + "loss_ce": 0.003179363440722227, + "loss_iou": 0.5625, + "loss_num": 0.0269775390625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 213960480, + "step": 3198 + }, + { + "epoch": 0.36300709219858157, + "grad_norm": 54.0391845703125, + "learning_rate": 5e-05, + "loss": 1.2117, + "num_input_tokens_seen": 214027068, + "step": 3199 + }, + { + "epoch": 0.36300709219858157, + "loss": 1.055281639099121, + "loss_ce": 0.005965277552604675, + "loss_iou": 0.490234375, + "loss_num": 0.01385498046875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 214027068, + "step": 3199 + }, + { + "epoch": 0.36312056737588655, + "grad_norm": 44.29044723510742, + "learning_rate": 5e-05, + "loss": 1.4653, + "num_input_tokens_seen": 214094892, + "step": 3200 + }, + { + "epoch": 0.36312056737588655, + "loss": 1.2737919092178345, + "loss_ce": 0.0037724191788583994, + "loss_iou": 0.546875, + "loss_num": 0.0361328125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 214094892, + "step": 3200 + }, + { + "epoch": 0.36323404255319147, + "grad_norm": 15.908844947814941, + "learning_rate": 5e-05, + "loss": 1.2018, + "num_input_tokens_seen": 214162136, + "step": 3201 + }, + { + "epoch": 0.36323404255319147, + "loss": 1.288679838180542, + "loss_ce": 0.007429778575897217, + "loss_iou": 0.515625, + "loss_num": 0.049072265625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 214162136, + "step": 3201 + }, + { + "epoch": 0.36334751773049645, + "grad_norm": 21.39947509765625, + "learning_rate": 5e-05, + "loss": 1.3421, + "num_input_tokens_seen": 214228772, + "step": 3202 + }, + { + "epoch": 0.36334751773049645, + "loss": 1.2817444801330566, + "loss_ce": 0.0058655026368796825, + "loss_iou": 0.53515625, + "loss_num": 0.04150390625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 214228772, + "step": 3202 + }, + { + "epoch": 0.36346099290780143, + "grad_norm": 31.142675399780273, + "learning_rate": 5e-05, + "loss": 1.2242, + "num_input_tokens_seen": 214296736, + "step": 3203 + }, + { + "epoch": 0.36346099290780143, + "loss": 1.16749906539917, + "loss_ce": 0.009784253314137459, + "loss_iou": 0.478515625, + "loss_num": 0.040283203125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 214296736, + "step": 3203 + }, + { + "epoch": 0.3635744680851064, + "grad_norm": 28.999523162841797, + "learning_rate": 5e-05, + "loss": 1.1653, + "num_input_tokens_seen": 214362964, + "step": 3204 + }, + { + "epoch": 0.3635744680851064, + "loss": 1.2030014991760254, + "loss_ce": 0.011107034981250763, + "loss_iou": 0.5, + "loss_num": 0.038330078125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 214362964, + "step": 3204 + }, + { + "epoch": 0.36368794326241133, + "grad_norm": 20.045129776000977, + "learning_rate": 5e-05, + "loss": 1.1995, + "num_input_tokens_seen": 214429652, + "step": 3205 + }, + { + "epoch": 0.36368794326241133, + "loss": 1.3533527851104736, + "loss_ce": 0.008137976750731468, + "loss_iou": 0.53125, + "loss_num": 0.056884765625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 214429652, + "step": 3205 + }, + { + "epoch": 0.3638014184397163, + "grad_norm": 30.7613582611084, + "learning_rate": 5e-05, + "loss": 1.3804, + "num_input_tokens_seen": 214495652, + "step": 3206 + }, + { + "epoch": 0.3638014184397163, + "loss": 1.4256789684295654, + "loss_ce": 0.009663313627243042, + "loss_iou": 0.6015625, + "loss_num": 0.043212890625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 214495652, + "step": 3206 + }, + { + "epoch": 0.3639148936170213, + "grad_norm": 25.976228713989258, + "learning_rate": 5e-05, + "loss": 1.2395, + "num_input_tokens_seen": 214562600, + "step": 3207 + }, + { + "epoch": 0.3639148936170213, + "loss": 1.225529432296753, + "loss_ce": 0.00580283347517252, + "loss_iou": 0.54296875, + "loss_num": 0.0264892578125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 214562600, + "step": 3207 + }, + { + "epoch": 0.36402836879432626, + "grad_norm": 13.439358711242676, + "learning_rate": 5e-05, + "loss": 1.1674, + "num_input_tokens_seen": 214629956, + "step": 3208 + }, + { + "epoch": 0.36402836879432626, + "loss": 1.375262975692749, + "loss_ce": 0.008075473830103874, + "loss_iou": 0.5703125, + "loss_num": 0.045654296875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 214629956, + "step": 3208 + }, + { + "epoch": 0.3641418439716312, + "grad_norm": 17.518692016601562, + "learning_rate": 5e-05, + "loss": 1.0466, + "num_input_tokens_seen": 214697320, + "step": 3209 + }, + { + "epoch": 0.3641418439716312, + "loss": 1.2693650722503662, + "loss_ce": 0.0037401027511805296, + "loss_iou": 0.515625, + "loss_num": 0.0458984375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 214697320, + "step": 3209 + }, + { + "epoch": 0.36425531914893616, + "grad_norm": 24.08272933959961, + "learning_rate": 5e-05, + "loss": 1.0676, + "num_input_tokens_seen": 214764760, + "step": 3210 + }, + { + "epoch": 0.36425531914893616, + "loss": 1.171233057975769, + "loss_ce": 0.005705730989575386, + "loss_iou": 0.478515625, + "loss_num": 0.041748046875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 214764760, + "step": 3210 + }, + { + "epoch": 0.36436879432624114, + "grad_norm": 30.745656967163086, + "learning_rate": 5e-05, + "loss": 1.3049, + "num_input_tokens_seen": 214831836, + "step": 3211 + }, + { + "epoch": 0.36436879432624114, + "loss": 1.3453588485717773, + "loss_ce": 0.005515092983841896, + "loss_iou": 0.6015625, + "loss_num": 0.0274658203125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 214831836, + "step": 3211 + }, + { + "epoch": 0.3644822695035461, + "grad_norm": 31.547073364257812, + "learning_rate": 5e-05, + "loss": 1.2606, + "num_input_tokens_seen": 214898864, + "step": 3212 + }, + { + "epoch": 0.3644822695035461, + "loss": 1.3253282308578491, + "loss_ce": 0.0045274728909134865, + "loss_iou": 0.5859375, + "loss_num": 0.030029296875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 214898864, + "step": 3212 + }, + { + "epoch": 0.36459574468085104, + "grad_norm": 34.247535705566406, + "learning_rate": 5e-05, + "loss": 1.2298, + "num_input_tokens_seen": 214964944, + "step": 3213 + }, + { + "epoch": 0.36459574468085104, + "loss": 1.1145738363265991, + "loss_ce": 0.006785758771002293, + "loss_iou": 0.498046875, + "loss_num": 0.0224609375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 214964944, + "step": 3213 + }, + { + "epoch": 0.364709219858156, + "grad_norm": 24.068500518798828, + "learning_rate": 5e-05, + "loss": 1.1373, + "num_input_tokens_seen": 215031556, + "step": 3214 + }, + { + "epoch": 0.364709219858156, + "loss": 1.308344841003418, + "loss_ce": 0.006098673678934574, + "loss_iou": 0.546875, + "loss_num": 0.041259765625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 215031556, + "step": 3214 + }, + { + "epoch": 0.364822695035461, + "grad_norm": 13.108895301818848, + "learning_rate": 5e-05, + "loss": 1.0374, + "num_input_tokens_seen": 215098668, + "step": 3215 + }, + { + "epoch": 0.364822695035461, + "loss": 1.1766674518585205, + "loss_ce": 0.003815951757133007, + "loss_iou": 0.51171875, + "loss_num": 0.030029296875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 215098668, + "step": 3215 + }, + { + "epoch": 0.364936170212766, + "grad_norm": 18.795406341552734, + "learning_rate": 5e-05, + "loss": 1.1633, + "num_input_tokens_seen": 215165092, + "step": 3216 + }, + { + "epoch": 0.364936170212766, + "loss": 0.958239734172821, + "loss_ce": 0.004473825916647911, + "loss_iou": 0.400390625, + "loss_num": 0.031005859375, + "loss_xval": 0.953125, + "num_input_tokens_seen": 215165092, + "step": 3216 + }, + { + "epoch": 0.3650496453900709, + "grad_norm": 23.35940170288086, + "learning_rate": 5e-05, + "loss": 1.0252, + "num_input_tokens_seen": 215231276, + "step": 3217 + }, + { + "epoch": 0.3650496453900709, + "loss": 1.1065150499343872, + "loss_ce": 0.0039148880168795586, + "loss_iou": 0.470703125, + "loss_num": 0.032470703125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 215231276, + "step": 3217 + }, + { + "epoch": 0.3651631205673759, + "grad_norm": 27.73565101623535, + "learning_rate": 5e-05, + "loss": 1.3135, + "num_input_tokens_seen": 215298960, + "step": 3218 + }, + { + "epoch": 0.3651631205673759, + "loss": 1.4953263998031616, + "loss_ce": 0.006556892767548561, + "loss_iou": 0.625, + "loss_num": 0.0478515625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 215298960, + "step": 3218 + }, + { + "epoch": 0.36527659574468085, + "grad_norm": 47.39767074584961, + "learning_rate": 5e-05, + "loss": 1.3445, + "num_input_tokens_seen": 215365968, + "step": 3219 + }, + { + "epoch": 0.36527659574468085, + "loss": 1.3298770189285278, + "loss_ce": 0.00468170503154397, + "loss_iou": 0.56640625, + "loss_num": 0.0390625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 215365968, + "step": 3219 + }, + { + "epoch": 0.36539007092198583, + "grad_norm": 23.596176147460938, + "learning_rate": 5e-05, + "loss": 1.2007, + "num_input_tokens_seen": 215432260, + "step": 3220 + }, + { + "epoch": 0.36539007092198583, + "loss": 1.0506236553192139, + "loss_ce": 0.005610167048871517, + "loss_iou": 0.46875, + "loss_num": 0.021240234375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 215432260, + "step": 3220 + }, + { + "epoch": 0.36550354609929075, + "grad_norm": 20.583280563354492, + "learning_rate": 5e-05, + "loss": 1.1836, + "num_input_tokens_seen": 215499732, + "step": 3221 + }, + { + "epoch": 0.36550354609929075, + "loss": 1.1153173446655273, + "loss_ce": 0.00887210387736559, + "loss_iou": 0.46875, + "loss_num": 0.033935546875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 215499732, + "step": 3221 + }, + { + "epoch": 0.36561702127659573, + "grad_norm": 25.376325607299805, + "learning_rate": 5e-05, + "loss": 1.1466, + "num_input_tokens_seen": 215567392, + "step": 3222 + }, + { + "epoch": 0.36561702127659573, + "loss": 1.1441233158111572, + "loss_ce": 0.0025218490045517683, + "loss_iou": 0.5234375, + "loss_num": 0.0191650390625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 215567392, + "step": 3222 + }, + { + "epoch": 0.3657304964539007, + "grad_norm": 35.96242904663086, + "learning_rate": 5e-05, + "loss": 1.3752, + "num_input_tokens_seen": 215633908, + "step": 3223 + }, + { + "epoch": 0.3657304964539007, + "loss": 1.3306622505187988, + "loss_ce": 0.005466931499540806, + "loss_iou": 0.56640625, + "loss_num": 0.0380859375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 215633908, + "step": 3223 + }, + { + "epoch": 0.3658439716312057, + "grad_norm": 28.3781681060791, + "learning_rate": 5e-05, + "loss": 1.444, + "num_input_tokens_seen": 215701264, + "step": 3224 + }, + { + "epoch": 0.3658439716312057, + "loss": 1.5003137588500977, + "loss_ce": 0.0032434463500976562, + "loss_iou": 0.6640625, + "loss_num": 0.033935546875, + "loss_xval": 1.5, + "num_input_tokens_seen": 215701264, + "step": 3224 + }, + { + "epoch": 0.3659574468085106, + "grad_norm": 12.765093803405762, + "learning_rate": 5e-05, + "loss": 1.1328, + "num_input_tokens_seen": 215767708, + "step": 3225 + }, + { + "epoch": 0.3659574468085106, + "loss": 1.1054250001907349, + "loss_ce": 0.0063038975931704044, + "loss_iou": 0.451171875, + "loss_num": 0.03955078125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 215767708, + "step": 3225 + }, + { + "epoch": 0.3660709219858156, + "grad_norm": 23.870922088623047, + "learning_rate": 5e-05, + "loss": 1.0654, + "num_input_tokens_seen": 215834484, + "step": 3226 + }, + { + "epoch": 0.3660709219858156, + "loss": 1.0310635566711426, + "loss_ce": 0.005184647627174854, + "loss_iou": 0.4296875, + "loss_num": 0.03369140625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 215834484, + "step": 3226 + }, + { + "epoch": 0.36618439716312057, + "grad_norm": 23.558265686035156, + "learning_rate": 5e-05, + "loss": 1.4251, + "num_input_tokens_seen": 215900668, + "step": 3227 + }, + { + "epoch": 0.36618439716312057, + "loss": 1.3272970914840698, + "loss_ce": 0.008937718346714973, + "loss_iou": 0.52734375, + "loss_num": 0.05224609375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 215900668, + "step": 3227 + }, + { + "epoch": 0.36629787234042555, + "grad_norm": 16.932764053344727, + "learning_rate": 5e-05, + "loss": 1.0935, + "num_input_tokens_seen": 215967192, + "step": 3228 + }, + { + "epoch": 0.36629787234042555, + "loss": 1.1780511140823364, + "loss_ce": 0.004711290821433067, + "loss_iou": 0.482421875, + "loss_num": 0.041748046875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 215967192, + "step": 3228 + }, + { + "epoch": 0.3664113475177305, + "grad_norm": 30.35987091064453, + "learning_rate": 5e-05, + "loss": 1.2171, + "num_input_tokens_seen": 216034812, + "step": 3229 + }, + { + "epoch": 0.3664113475177305, + "loss": 1.2819392681121826, + "loss_ce": 0.006548614241182804, + "loss_iou": 0.5390625, + "loss_num": 0.039794921875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 216034812, + "step": 3229 + }, + { + "epoch": 0.36652482269503545, + "grad_norm": 41.49618148803711, + "learning_rate": 5e-05, + "loss": 1.3742, + "num_input_tokens_seen": 216102264, + "step": 3230 + }, + { + "epoch": 0.36652482269503545, + "loss": 1.377427101135254, + "loss_ce": 0.0029154368676245213, + "loss_iou": 0.5546875, + "loss_num": 0.052490234375, + "loss_xval": 1.375, + "num_input_tokens_seen": 216102264, + "step": 3230 + }, + { + "epoch": 0.3666382978723404, + "grad_norm": 29.32648277282715, + "learning_rate": 5e-05, + "loss": 1.2673, + "num_input_tokens_seen": 216168352, + "step": 3231 + }, + { + "epoch": 0.3666382978723404, + "loss": 1.3182711601257324, + "loss_ce": 0.0062593999318778515, + "loss_iou": 0.5234375, + "loss_num": 0.053955078125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 216168352, + "step": 3231 + }, + { + "epoch": 0.3667517730496454, + "grad_norm": 25.12345314025879, + "learning_rate": 5e-05, + "loss": 1.409, + "num_input_tokens_seen": 216234952, + "step": 3232 + }, + { + "epoch": 0.3667517730496454, + "loss": 1.6999417543411255, + "loss_ce": 0.005605765618383884, + "loss_iou": 0.6796875, + "loss_num": 0.068359375, + "loss_xval": 1.6953125, + "num_input_tokens_seen": 216234952, + "step": 3232 + }, + { + "epoch": 0.3668652482269504, + "grad_norm": 27.30108642578125, + "learning_rate": 5e-05, + "loss": 1.1826, + "num_input_tokens_seen": 216302116, + "step": 3233 + }, + { + "epoch": 0.3668652482269504, + "loss": 1.2862625122070312, + "loss_ce": 0.027473516762256622, + "loss_iou": 0.5, + "loss_num": 0.051025390625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 216302116, + "step": 3233 + }, + { + "epoch": 0.3669787234042553, + "grad_norm": 22.625022888183594, + "learning_rate": 5e-05, + "loss": 1.2694, + "num_input_tokens_seen": 216368688, + "step": 3234 + }, + { + "epoch": 0.3669787234042553, + "loss": 1.3310425281524658, + "loss_ce": 0.005114763509482145, + "loss_iou": 0.5234375, + "loss_num": 0.055908203125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 216368688, + "step": 3234 + }, + { + "epoch": 0.3670921985815603, + "grad_norm": 71.65615844726562, + "learning_rate": 5e-05, + "loss": 1.2258, + "num_input_tokens_seen": 216435664, + "step": 3235 + }, + { + "epoch": 0.3670921985815603, + "loss": 1.2712007761001587, + "loss_ce": 0.005575790069997311, + "loss_iou": 0.55078125, + "loss_num": 0.03271484375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 216435664, + "step": 3235 + }, + { + "epoch": 0.36720567375886526, + "grad_norm": 25.146663665771484, + "learning_rate": 5e-05, + "loss": 1.2129, + "num_input_tokens_seen": 216502404, + "step": 3236 + }, + { + "epoch": 0.36720567375886526, + "loss": 1.1988426446914673, + "loss_ce": 0.009389542043209076, + "loss_iou": 0.48828125, + "loss_num": 0.04248046875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 216502404, + "step": 3236 + }, + { + "epoch": 0.36731914893617024, + "grad_norm": 23.217594146728516, + "learning_rate": 5e-05, + "loss": 1.1748, + "num_input_tokens_seen": 216569236, + "step": 3237 + }, + { + "epoch": 0.36731914893617024, + "loss": 1.1713603734970093, + "loss_ce": 0.0036357813514769077, + "loss_iou": 0.486328125, + "loss_num": 0.0390625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 216569236, + "step": 3237 + }, + { + "epoch": 0.36743262411347516, + "grad_norm": 22.043336868286133, + "learning_rate": 5e-05, + "loss": 1.3527, + "num_input_tokens_seen": 216637196, + "step": 3238 + }, + { + "epoch": 0.36743262411347516, + "loss": 1.2806315422058105, + "loss_ce": 0.007193927653133869, + "loss_iou": 0.55078125, + "loss_num": 0.033935546875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 216637196, + "step": 3238 + }, + { + "epoch": 0.36754609929078014, + "grad_norm": 27.74353790283203, + "learning_rate": 5e-05, + "loss": 1.3408, + "num_input_tokens_seen": 216703812, + "step": 3239 + }, + { + "epoch": 0.36754609929078014, + "loss": 1.2489643096923828, + "loss_ce": 0.006288527511060238, + "loss_iou": 0.5078125, + "loss_num": 0.0458984375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 216703812, + "step": 3239 + }, + { + "epoch": 0.3676595744680851, + "grad_norm": 24.407100677490234, + "learning_rate": 5e-05, + "loss": 1.4141, + "num_input_tokens_seen": 216772472, + "step": 3240 + }, + { + "epoch": 0.3676595744680851, + "loss": 1.4717416763305664, + "loss_ce": 0.007386092096567154, + "loss_iou": 0.625, + "loss_num": 0.042236328125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 216772472, + "step": 3240 + }, + { + "epoch": 0.3677730496453901, + "grad_norm": 36.66350555419922, + "learning_rate": 5e-05, + "loss": 1.2303, + "num_input_tokens_seen": 216840472, + "step": 3241 + }, + { + "epoch": 0.3677730496453901, + "loss": 1.095409631729126, + "loss_ce": 0.00312450947239995, + "loss_iou": 0.4609375, + "loss_num": 0.03466796875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 216840472, + "step": 3241 + }, + { + "epoch": 0.367886524822695, + "grad_norm": 20.370756149291992, + "learning_rate": 5e-05, + "loss": 1.6301, + "num_input_tokens_seen": 216907516, + "step": 3242 + }, + { + "epoch": 0.367886524822695, + "loss": 1.8100051879882812, + "loss_ce": 0.006294318474829197, + "loss_iou": 0.7421875, + "loss_num": 0.06298828125, + "loss_xval": 1.8046875, + "num_input_tokens_seen": 216907516, + "step": 3242 + }, + { + "epoch": 0.368, + "grad_norm": 30.578378677368164, + "learning_rate": 5e-05, + "loss": 1.2176, + "num_input_tokens_seen": 216974476, + "step": 3243 + }, + { + "epoch": 0.368, + "loss": 1.1134564876556396, + "loss_ce": 0.003104962408542633, + "loss_iou": 0.49609375, + "loss_num": 0.0238037109375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 216974476, + "step": 3243 + }, + { + "epoch": 0.36811347517730497, + "grad_norm": 28.39762306213379, + "learning_rate": 5e-05, + "loss": 1.3068, + "num_input_tokens_seen": 217042320, + "step": 3244 + }, + { + "epoch": 0.36811347517730497, + "loss": 1.3016433715820312, + "loss_ce": 0.004768322221934795, + "loss_iou": 0.5859375, + "loss_num": 0.024658203125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 217042320, + "step": 3244 + }, + { + "epoch": 0.36822695035460995, + "grad_norm": 27.341880798339844, + "learning_rate": 5e-05, + "loss": 1.4932, + "num_input_tokens_seen": 217108996, + "step": 3245 + }, + { + "epoch": 0.36822695035460995, + "loss": 1.3645386695861816, + "loss_ce": 0.006140204146504402, + "loss_iou": 0.546875, + "loss_num": 0.052734375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 217108996, + "step": 3245 + }, + { + "epoch": 0.3683404255319149, + "grad_norm": 25.94162368774414, + "learning_rate": 5e-05, + "loss": 1.4364, + "num_input_tokens_seen": 217175268, + "step": 3246 + }, + { + "epoch": 0.3683404255319149, + "loss": 1.6442445516586304, + "loss_ce": 0.008014085702598095, + "loss_iou": 0.671875, + "loss_num": 0.058349609375, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 217175268, + "step": 3246 + }, + { + "epoch": 0.36845390070921985, + "grad_norm": 31.113449096679688, + "learning_rate": 5e-05, + "loss": 1.4648, + "num_input_tokens_seen": 217242976, + "step": 3247 + }, + { + "epoch": 0.36845390070921985, + "loss": 1.302340030670166, + "loss_ce": 0.004000186920166016, + "loss_iou": 0.59765625, + "loss_num": 0.020263671875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 217242976, + "step": 3247 + }, + { + "epoch": 0.36856737588652483, + "grad_norm": 11.927726745605469, + "learning_rate": 5e-05, + "loss": 1.126, + "num_input_tokens_seen": 217310220, + "step": 3248 + }, + { + "epoch": 0.36856737588652483, + "loss": 1.3743977546691895, + "loss_ce": 0.004768902435898781, + "loss_iou": 0.5546875, + "loss_num": 0.0517578125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 217310220, + "step": 3248 + }, + { + "epoch": 0.3686808510638298, + "grad_norm": 80.71435546875, + "learning_rate": 5e-05, + "loss": 1.1927, + "num_input_tokens_seen": 217377252, + "step": 3249 + }, + { + "epoch": 0.3686808510638298, + "loss": 1.2351690530776978, + "loss_ce": 0.006653440184891224, + "loss_iou": 0.5078125, + "loss_num": 0.04248046875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 217377252, + "step": 3249 + }, + { + "epoch": 0.36879432624113473, + "grad_norm": 22.285778045654297, + "learning_rate": 5e-05, + "loss": 1.2642, + "num_input_tokens_seen": 217445120, + "step": 3250 + }, + { + "epoch": 0.36879432624113473, + "eval_seeclick_CIoU": 0.40060803294181824, + "eval_seeclick_GIoU": 0.38109369575977325, + "eval_seeclick_IoU": 0.4836716651916504, + "eval_seeclick_MAE_all": 0.15267430990934372, + "eval_seeclick_MAE_h": 0.05875071883201599, + "eval_seeclick_MAE_w": 0.09611580893397331, + "eval_seeclick_MAE_x_boxes": 0.22905012965202332, + "eval_seeclick_MAE_y_boxes": 0.11931376531720161, + "eval_seeclick_NUM_probability": 0.9998897314071655, + "eval_seeclick_inside_bbox": 0.643750011920929, + "eval_seeclick_loss": 2.401559829711914, + "eval_seeclick_loss_ce": 0.015524983871728182, + "eval_seeclick_loss_iou": 0.833984375, + "eval_seeclick_loss_num": 0.1488494873046875, + "eval_seeclick_loss_xval": 2.410400390625, + "eval_seeclick_runtime": 68.6542, + "eval_seeclick_samples_per_second": 0.685, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 217445120, + "step": 3250 + }, + { + "epoch": 0.36879432624113473, + "eval_icons_CIoU": 0.5148889422416687, + "eval_icons_GIoU": 0.5096205174922943, + "eval_icons_IoU": 0.5554593503475189, + "eval_icons_MAE_all": 0.13712047412991524, + "eval_icons_MAE_h": 0.08917549252510071, + "eval_icons_MAE_w": 0.14309988170862198, + "eval_icons_MAE_x_boxes": 0.10090167820453644, + "eval_icons_MAE_y_boxes": 0.09290000051259995, + "eval_icons_NUM_probability": 0.9998701810836792, + "eval_icons_inside_bbox": 0.7795138955116272, + "eval_icons_loss": 2.3334522247314453, + "eval_icons_loss_ce": 0.00027263931406196207, + "eval_icons_loss_iou": 0.838134765625, + "eval_icons_loss_num": 0.13117027282714844, + "eval_icons_loss_xval": 2.33203125, + "eval_icons_runtime": 67.4053, + "eval_icons_samples_per_second": 0.742, + "eval_icons_steps_per_second": 0.03, + "num_input_tokens_seen": 217445120, + "step": 3250 + }, + { + "epoch": 0.36879432624113473, + "eval_screenspot_CIoU": 0.379441777865092, + "eval_screenspot_GIoU": 0.3600425223509471, + "eval_screenspot_IoU": 0.4518655339876811, + "eval_screenspot_MAE_all": 0.18298419813315073, + "eval_screenspot_MAE_h": 0.16700831055641174, + "eval_screenspot_MAE_w": 0.15390720466772714, + "eval_screenspot_MAE_x_boxes": 0.2448193977276484, + "eval_screenspot_MAE_y_boxes": 0.07759405300021172, + "eval_screenspot_NUM_probability": 0.9999005794525146, + "eval_screenspot_inside_bbox": 0.662500003973643, + "eval_screenspot_loss": 2.7993860244750977, + "eval_screenspot_loss_ce": 0.01805821682016055, + "eval_screenspot_loss_iou": 0.9309895833333334, + "eval_screenspot_loss_num": 0.18996175130208334, + "eval_screenspot_loss_xval": 2.8125, + "eval_screenspot_runtime": 129.4347, + "eval_screenspot_samples_per_second": 0.688, + "eval_screenspot_steps_per_second": 0.023, + "num_input_tokens_seen": 217445120, + "step": 3250 + }, + { + "epoch": 0.36879432624113473, + "eval_compot_CIoU": 0.4201883524656296, + "eval_compot_GIoU": 0.3820558041334152, + "eval_compot_IoU": 0.48527899384498596, + "eval_compot_MAE_all": 0.17298943549394608, + "eval_compot_MAE_h": 0.10627938061952591, + "eval_compot_MAE_w": 0.19055261090397835, + "eval_compot_MAE_x_boxes": 0.13789181411266327, + "eval_compot_MAE_y_boxes": 0.12091796845197678, + "eval_compot_NUM_probability": 0.9999137818813324, + "eval_compot_inside_bbox": 0.6302083432674408, + "eval_compot_loss": 2.7004828453063965, + "eval_compot_loss_ce": 0.00414286321029067, + "eval_compot_loss_iou": 0.90673828125, + "eval_compot_loss_num": 0.18836212158203125, + "eval_compot_loss_xval": 2.75537109375, + "eval_compot_runtime": 70.6383, + "eval_compot_samples_per_second": 0.708, + "eval_compot_steps_per_second": 0.028, + "num_input_tokens_seen": 217445120, + "step": 3250 + }, + { + "epoch": 0.36879432624113473, + "loss": 2.6531131267547607, + "loss_ce": 0.0036989101208746433, + "loss_iou": 0.890625, + "loss_num": 0.173828125, + "loss_xval": 2.65625, + "num_input_tokens_seen": 217445120, + "step": 3250 + }, + { + "epoch": 0.3689078014184397, + "grad_norm": 30.304794311523438, + "learning_rate": 5e-05, + "loss": 1.4345, + "num_input_tokens_seen": 217512568, + "step": 3251 + }, + { + "epoch": 0.3689078014184397, + "loss": 1.4169806241989136, + "loss_ce": 0.003894682275131345, + "loss_iou": 0.59375, + "loss_num": 0.04541015625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 217512568, + "step": 3251 + }, + { + "epoch": 0.3690212765957447, + "grad_norm": 29.363811492919922, + "learning_rate": 5e-05, + "loss": 1.486, + "num_input_tokens_seen": 217579368, + "step": 3252 + }, + { + "epoch": 0.3690212765957447, + "loss": 1.366232991218567, + "loss_ce": 0.007834547199308872, + "loss_iou": 0.5546875, + "loss_num": 0.049072265625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 217579368, + "step": 3252 + }, + { + "epoch": 0.36913475177304966, + "grad_norm": 18.353355407714844, + "learning_rate": 5e-05, + "loss": 0.9833, + "num_input_tokens_seen": 217645820, + "step": 3253 + }, + { + "epoch": 0.36913475177304966, + "loss": 1.0936939716339111, + "loss_ce": 0.010197822004556656, + "loss_iou": 0.392578125, + "loss_num": 0.059326171875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 217645820, + "step": 3253 + }, + { + "epoch": 0.3692482269503546, + "grad_norm": 27.679311752319336, + "learning_rate": 5e-05, + "loss": 1.272, + "num_input_tokens_seen": 217711760, + "step": 3254 + }, + { + "epoch": 0.3692482269503546, + "loss": 1.4030251502990723, + "loss_ce": 0.0055642686784267426, + "loss_iou": 0.546875, + "loss_num": 0.06103515625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 217711760, + "step": 3254 + }, + { + "epoch": 0.36936170212765956, + "grad_norm": 48.85744857788086, + "learning_rate": 5e-05, + "loss": 1.3359, + "num_input_tokens_seen": 217779096, + "step": 3255 + }, + { + "epoch": 0.36936170212765956, + "loss": 1.299462080001831, + "loss_ce": 0.0035636702086776495, + "loss_iou": 0.53125, + "loss_num": 0.0458984375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 217779096, + "step": 3255 + }, + { + "epoch": 0.36947517730496454, + "grad_norm": 22.656875610351562, + "learning_rate": 5e-05, + "loss": 1.5501, + "num_input_tokens_seen": 217845672, + "step": 3256 + }, + { + "epoch": 0.36947517730496454, + "loss": 1.5259480476379395, + "loss_ce": 0.004463578574359417, + "loss_iou": 0.64453125, + "loss_num": 0.0458984375, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 217845672, + "step": 3256 + }, + { + "epoch": 0.3695886524822695, + "grad_norm": 16.524845123291016, + "learning_rate": 5e-05, + "loss": 1.2873, + "num_input_tokens_seen": 217913084, + "step": 3257 + }, + { + "epoch": 0.3695886524822695, + "loss": 1.3733972311019897, + "loss_ce": 0.007186284288764, + "loss_iou": 0.5390625, + "loss_num": 0.05859375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 217913084, + "step": 3257 + }, + { + "epoch": 0.36970212765957444, + "grad_norm": 51.957759857177734, + "learning_rate": 5e-05, + "loss": 1.3748, + "num_input_tokens_seen": 217981000, + "step": 3258 + }, + { + "epoch": 0.36970212765957444, + "loss": 1.278260350227356, + "loss_ce": 0.0033579992596060038, + "loss_iou": 0.515625, + "loss_num": 0.048828125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 217981000, + "step": 3258 + }, + { + "epoch": 0.3698156028368794, + "grad_norm": 24.110204696655273, + "learning_rate": 5e-05, + "loss": 1.3758, + "num_input_tokens_seen": 218047608, + "step": 3259 + }, + { + "epoch": 0.3698156028368794, + "loss": 1.2137844562530518, + "loss_ce": 0.003396296640858054, + "loss_iou": 0.5078125, + "loss_num": 0.03857421875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 218047608, + "step": 3259 + }, + { + "epoch": 0.3699290780141844, + "grad_norm": 20.323978424072266, + "learning_rate": 5e-05, + "loss": 1.0424, + "num_input_tokens_seen": 218115724, + "step": 3260 + }, + { + "epoch": 0.3699290780141844, + "loss": 1.0024864673614502, + "loss_ce": 0.0034630228765308857, + "loss_iou": 0.419921875, + "loss_num": 0.0322265625, + "loss_xval": 1.0, + "num_input_tokens_seen": 218115724, + "step": 3260 + }, + { + "epoch": 0.3700425531914894, + "grad_norm": 25.769838333129883, + "learning_rate": 5e-05, + "loss": 1.3418, + "num_input_tokens_seen": 218183480, + "step": 3261 + }, + { + "epoch": 0.3700425531914894, + "loss": 1.2591469287872314, + "loss_ce": 0.006217248737812042, + "loss_iou": 0.53515625, + "loss_num": 0.03662109375, + "loss_xval": 1.25, + "num_input_tokens_seen": 218183480, + "step": 3261 + }, + { + "epoch": 0.3701560283687943, + "grad_norm": 43.111366271972656, + "learning_rate": 5e-05, + "loss": 1.2327, + "num_input_tokens_seen": 218250336, + "step": 3262 + }, + { + "epoch": 0.3701560283687943, + "loss": 1.1840027570724487, + "loss_ce": 0.00480350386351347, + "loss_iou": 0.5234375, + "loss_num": 0.0260009765625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 218250336, + "step": 3262 + }, + { + "epoch": 0.3702695035460993, + "grad_norm": 25.657119750976562, + "learning_rate": 5e-05, + "loss": 1.603, + "num_input_tokens_seen": 218317620, + "step": 3263 + }, + { + "epoch": 0.3702695035460993, + "loss": 1.5002933740615845, + "loss_ce": 0.0037113833241164684, + "loss_iou": 0.63671875, + "loss_num": 0.044189453125, + "loss_xval": 1.5, + "num_input_tokens_seen": 218317620, + "step": 3263 + }, + { + "epoch": 0.37038297872340425, + "grad_norm": 20.373132705688477, + "learning_rate": 5e-05, + "loss": 1.164, + "num_input_tokens_seen": 218384048, + "step": 3264 + }, + { + "epoch": 0.37038297872340425, + "loss": 0.8946362733840942, + "loss_ce": 0.006239033304154873, + "loss_iou": 0.3984375, + "loss_num": 0.0186767578125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 218384048, + "step": 3264 + }, + { + "epoch": 0.37049645390070923, + "grad_norm": 32.707115173339844, + "learning_rate": 5e-05, + "loss": 1.3425, + "num_input_tokens_seen": 218450952, + "step": 3265 + }, + { + "epoch": 0.37049645390070923, + "loss": 1.3684629201889038, + "loss_ce": 0.0071347602643072605, + "loss_iou": 0.54296875, + "loss_num": 0.055419921875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 218450952, + "step": 3265 + }, + { + "epoch": 0.3706099290780142, + "grad_norm": 21.571191787719727, + "learning_rate": 5e-05, + "loss": 1.2251, + "num_input_tokens_seen": 218517476, + "step": 3266 + }, + { + "epoch": 0.3706099290780142, + "loss": 1.2147250175476074, + "loss_ce": 0.002322604414075613, + "loss_iou": 0.43359375, + "loss_num": 0.06884765625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 218517476, + "step": 3266 + }, + { + "epoch": 0.37072340425531913, + "grad_norm": 14.100569725036621, + "learning_rate": 5e-05, + "loss": 1.156, + "num_input_tokens_seen": 218585216, + "step": 3267 + }, + { + "epoch": 0.37072340425531913, + "loss": 1.3551912307739258, + "loss_ce": 0.005581959150731564, + "loss_iou": 0.5234375, + "loss_num": 0.060302734375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 218585216, + "step": 3267 + }, + { + "epoch": 0.3708368794326241, + "grad_norm": 171.14749145507812, + "learning_rate": 5e-05, + "loss": 1.0444, + "num_input_tokens_seen": 218651896, + "step": 3268 + }, + { + "epoch": 0.3708368794326241, + "loss": 1.1172270774841309, + "loss_ce": 0.0034575772006064653, + "loss_iou": 0.494140625, + "loss_num": 0.025146484375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 218651896, + "step": 3268 + }, + { + "epoch": 0.3709503546099291, + "grad_norm": 32.13038635253906, + "learning_rate": 5e-05, + "loss": 1.1506, + "num_input_tokens_seen": 218718948, + "step": 3269 + }, + { + "epoch": 0.3709503546099291, + "loss": 1.236799955368042, + "loss_ce": 0.006331227254122496, + "loss_iou": 0.494140625, + "loss_num": 0.048828125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 218718948, + "step": 3269 + }, + { + "epoch": 0.37106382978723407, + "grad_norm": 26.000558853149414, + "learning_rate": 5e-05, + "loss": 1.2963, + "num_input_tokens_seen": 218785848, + "step": 3270 + }, + { + "epoch": 0.37106382978723407, + "loss": 1.4734700918197632, + "loss_ce": 0.005208299495279789, + "loss_iou": 0.5703125, + "loss_num": 0.06494140625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 218785848, + "step": 3270 + }, + { + "epoch": 0.371177304964539, + "grad_norm": 28.71162986755371, + "learning_rate": 5e-05, + "loss": 1.4352, + "num_input_tokens_seen": 218854012, + "step": 3271 + }, + { + "epoch": 0.371177304964539, + "loss": 1.4940989017486572, + "loss_ce": 0.005817593075335026, + "loss_iou": 0.62890625, + "loss_num": 0.046875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 218854012, + "step": 3271 + }, + { + "epoch": 0.37129078014184397, + "grad_norm": 30.38423728942871, + "learning_rate": 5e-05, + "loss": 1.2997, + "num_input_tokens_seen": 218920868, + "step": 3272 + }, + { + "epoch": 0.37129078014184397, + "loss": 1.3830649852752686, + "loss_ce": 0.002205637516453862, + "loss_iou": 0.54296875, + "loss_num": 0.058349609375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 218920868, + "step": 3272 + }, + { + "epoch": 0.37140425531914895, + "grad_norm": 20.085323333740234, + "learning_rate": 5e-05, + "loss": 1.4006, + "num_input_tokens_seen": 218987840, + "step": 3273 + }, + { + "epoch": 0.37140425531914895, + "loss": 1.413448452949524, + "loss_ce": 0.00768677331507206, + "loss_iou": 0.62890625, + "loss_num": 0.0301513671875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 218987840, + "step": 3273 + }, + { + "epoch": 0.3715177304964539, + "grad_norm": 35.71054458618164, + "learning_rate": 5e-05, + "loss": 1.2709, + "num_input_tokens_seen": 219055584, + "step": 3274 + }, + { + "epoch": 0.3715177304964539, + "loss": 1.2447307109832764, + "loss_ce": 0.0044962684623897076, + "loss_iou": 0.51953125, + "loss_num": 0.0400390625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 219055584, + "step": 3274 + }, + { + "epoch": 0.37163120567375885, + "grad_norm": 21.140460968017578, + "learning_rate": 5e-05, + "loss": 1.1472, + "num_input_tokens_seen": 219120292, + "step": 3275 + }, + { + "epoch": 0.37163120567375885, + "loss": 1.1135340929031372, + "loss_ce": 0.004159093368798494, + "loss_iou": 0.5, + "loss_num": 0.02197265625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 219120292, + "step": 3275 + }, + { + "epoch": 0.3717446808510638, + "grad_norm": 30.86375617980957, + "learning_rate": 5e-05, + "loss": 1.3914, + "num_input_tokens_seen": 219187496, + "step": 3276 + }, + { + "epoch": 0.3717446808510638, + "loss": 1.3354575634002686, + "loss_ce": 0.007820804603397846, + "loss_iou": 0.54296875, + "loss_num": 0.0478515625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 219187496, + "step": 3276 + }, + { + "epoch": 0.3718581560283688, + "grad_norm": 28.519046783447266, + "learning_rate": 5e-05, + "loss": 1.2628, + "num_input_tokens_seen": 219253580, + "step": 3277 + }, + { + "epoch": 0.3718581560283688, + "loss": 1.2195008993148804, + "loss_ce": 0.01100478321313858, + "loss_iou": 0.54296875, + "loss_num": 0.0242919921875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 219253580, + "step": 3277 + }, + { + "epoch": 0.3719716312056738, + "grad_norm": 31.15489959716797, + "learning_rate": 5e-05, + "loss": 1.1414, + "num_input_tokens_seen": 219320380, + "step": 3278 + }, + { + "epoch": 0.3719716312056738, + "loss": 1.1523547172546387, + "loss_ce": 0.00538209592923522, + "loss_iou": 0.4921875, + "loss_num": 0.032958984375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 219320380, + "step": 3278 + }, + { + "epoch": 0.3720851063829787, + "grad_norm": 18.934097290039062, + "learning_rate": 5e-05, + "loss": 1.1681, + "num_input_tokens_seen": 219387060, + "step": 3279 + }, + { + "epoch": 0.3720851063829787, + "loss": 1.2093925476074219, + "loss_ce": 0.008220665156841278, + "loss_iou": 0.50390625, + "loss_num": 0.0390625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 219387060, + "step": 3279 + }, + { + "epoch": 0.3721985815602837, + "grad_norm": 31.881887435913086, + "learning_rate": 5e-05, + "loss": 1.1314, + "num_input_tokens_seen": 219454856, + "step": 3280 + }, + { + "epoch": 0.3721985815602837, + "loss": 1.0521092414855957, + "loss_ce": 0.004257666878402233, + "loss_iou": 0.453125, + "loss_num": 0.028076171875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 219454856, + "step": 3280 + }, + { + "epoch": 0.37231205673758866, + "grad_norm": 33.44252395629883, + "learning_rate": 5e-05, + "loss": 1.5168, + "num_input_tokens_seen": 219522208, + "step": 3281 + }, + { + "epoch": 0.37231205673758866, + "loss": 1.545689344406128, + "loss_ce": 0.00418551079928875, + "loss_iou": 0.63671875, + "loss_num": 0.054443359375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 219522208, + "step": 3281 + }, + { + "epoch": 0.37242553191489364, + "grad_norm": 16.838539123535156, + "learning_rate": 5e-05, + "loss": 1.0206, + "num_input_tokens_seen": 219588512, + "step": 3282 + }, + { + "epoch": 0.37242553191489364, + "loss": 1.149899959564209, + "loss_ce": 0.008054344914853573, + "loss_iou": 0.474609375, + "loss_num": 0.038330078125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 219588512, + "step": 3282 + }, + { + "epoch": 0.37253900709219856, + "grad_norm": 18.22523307800293, + "learning_rate": 5e-05, + "loss": 1.2286, + "num_input_tokens_seen": 219654912, + "step": 3283 + }, + { + "epoch": 0.37253900709219856, + "loss": 1.003081202507019, + "loss_ce": 0.005827777087688446, + "loss_iou": 0.388671875, + "loss_num": 0.044189453125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 219654912, + "step": 3283 + }, + { + "epoch": 0.37265248226950354, + "grad_norm": 22.7337589263916, + "learning_rate": 5e-05, + "loss": 1.2379, + "num_input_tokens_seen": 219722220, + "step": 3284 + }, + { + "epoch": 0.37265248226950354, + "loss": 1.272045612335205, + "loss_ce": 0.00544405821710825, + "loss_iou": 0.51171875, + "loss_num": 0.049072265625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 219722220, + "step": 3284 + }, + { + "epoch": 0.3727659574468085, + "grad_norm": 25.46648597717285, + "learning_rate": 5e-05, + "loss": 1.4158, + "num_input_tokens_seen": 219788664, + "step": 3285 + }, + { + "epoch": 0.3727659574468085, + "loss": 1.4576036930084229, + "loss_ce": 0.01033802516758442, + "loss_iou": 0.57421875, + "loss_num": 0.059326171875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 219788664, + "step": 3285 + }, + { + "epoch": 0.3728794326241135, + "grad_norm": 15.828330993652344, + "learning_rate": 5e-05, + "loss": 1.0931, + "num_input_tokens_seen": 219853840, + "step": 3286 + }, + { + "epoch": 0.3728794326241135, + "loss": 1.0675898790359497, + "loss_ce": 0.0033198564779013395, + "loss_iou": 0.4453125, + "loss_num": 0.03466796875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 219853840, + "step": 3286 + }, + { + "epoch": 0.3729929078014184, + "grad_norm": 29.06595802307129, + "learning_rate": 5e-05, + "loss": 1.1234, + "num_input_tokens_seen": 219920188, + "step": 3287 + }, + { + "epoch": 0.3729929078014184, + "loss": 1.0351265668869019, + "loss_ce": 0.010468367487192154, + "loss_iou": 0.439453125, + "loss_num": 0.0291748046875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 219920188, + "step": 3287 + }, + { + "epoch": 0.3731063829787234, + "grad_norm": 38.787147521972656, + "learning_rate": 5e-05, + "loss": 1.4332, + "num_input_tokens_seen": 219986876, + "step": 3288 + }, + { + "epoch": 0.3731063829787234, + "loss": 1.3870651721954346, + "loss_ce": 0.007182449102401733, + "loss_iou": 0.56640625, + "loss_num": 0.049560546875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 219986876, + "step": 3288 + }, + { + "epoch": 0.3732198581560284, + "grad_norm": 120.8529052734375, + "learning_rate": 5e-05, + "loss": 1.3658, + "num_input_tokens_seen": 220054612, + "step": 3289 + }, + { + "epoch": 0.3732198581560284, + "loss": 1.3126153945922852, + "loss_ce": 0.009392691776156425, + "loss_iou": 0.54296875, + "loss_num": 0.043212890625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 220054612, + "step": 3289 + }, + { + "epoch": 0.37333333333333335, + "grad_norm": 21.74187469482422, + "learning_rate": 5e-05, + "loss": 1.157, + "num_input_tokens_seen": 220121608, + "step": 3290 + }, + { + "epoch": 0.37333333333333335, + "loss": 1.2503046989440918, + "loss_ce": 0.005187485367059708, + "loss_iou": 0.51953125, + "loss_num": 0.041748046875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 220121608, + "step": 3290 + }, + { + "epoch": 0.3734468085106383, + "grad_norm": 19.91753387451172, + "learning_rate": 5e-05, + "loss": 1.1671, + "num_input_tokens_seen": 220188420, + "step": 3291 + }, + { + "epoch": 0.3734468085106383, + "loss": 1.2328122854232788, + "loss_ce": 0.0028318744152784348, + "loss_iou": 0.5390625, + "loss_num": 0.0303955078125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 220188420, + "step": 3291 + }, + { + "epoch": 0.37356028368794325, + "grad_norm": 30.94002914428711, + "learning_rate": 5e-05, + "loss": 1.2348, + "num_input_tokens_seen": 220254836, + "step": 3292 + }, + { + "epoch": 0.37356028368794325, + "loss": 1.3354289531707764, + "loss_ce": 0.002909401198849082, + "loss_iou": 0.5703125, + "loss_num": 0.0390625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 220254836, + "step": 3292 + }, + { + "epoch": 0.37367375886524823, + "grad_norm": 27.752809524536133, + "learning_rate": 5e-05, + "loss": 1.3471, + "num_input_tokens_seen": 220322560, + "step": 3293 + }, + { + "epoch": 0.37367375886524823, + "loss": 1.3134181499481201, + "loss_ce": 0.003847735933959484, + "loss_iou": 0.55859375, + "loss_num": 0.0390625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 220322560, + "step": 3293 + }, + { + "epoch": 0.3737872340425532, + "grad_norm": 24.589426040649414, + "learning_rate": 5e-05, + "loss": 1.144, + "num_input_tokens_seen": 220388860, + "step": 3294 + }, + { + "epoch": 0.3737872340425532, + "loss": 1.1360044479370117, + "loss_ce": 0.009173381142318249, + "loss_iou": 0.447265625, + "loss_num": 0.04638671875, + "loss_xval": 1.125, + "num_input_tokens_seen": 220388860, + "step": 3294 + }, + { + "epoch": 0.37390070921985813, + "grad_norm": 17.005796432495117, + "learning_rate": 5e-05, + "loss": 1.1903, + "num_input_tokens_seen": 220456012, + "step": 3295 + }, + { + "epoch": 0.37390070921985813, + "loss": 1.1406984329223633, + "loss_ce": 0.004712176509201527, + "loss_iou": 0.47265625, + "loss_num": 0.0380859375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 220456012, + "step": 3295 + }, + { + "epoch": 0.3740141843971631, + "grad_norm": 27.25898551940918, + "learning_rate": 5e-05, + "loss": 1.0875, + "num_input_tokens_seen": 220523328, + "step": 3296 + }, + { + "epoch": 0.3740141843971631, + "loss": 0.9300693273544312, + "loss_ce": 0.0062411692924797535, + "loss_iou": 0.373046875, + "loss_num": 0.035400390625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 220523328, + "step": 3296 + }, + { + "epoch": 0.3741276595744681, + "grad_norm": 32.00631332397461, + "learning_rate": 5e-05, + "loss": 1.4686, + "num_input_tokens_seen": 220590072, + "step": 3297 + }, + { + "epoch": 0.3741276595744681, + "loss": 1.5121175050735474, + "loss_ce": 0.002840148750692606, + "loss_iou": 0.625, + "loss_num": 0.051513671875, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 220590072, + "step": 3297 + }, + { + "epoch": 0.37424113475177306, + "grad_norm": 79.22298431396484, + "learning_rate": 5e-05, + "loss": 1.2465, + "num_input_tokens_seen": 220657572, + "step": 3298 + }, + { + "epoch": 0.37424113475177306, + "loss": 1.2150063514709473, + "loss_ce": 0.00406893203034997, + "loss_iou": 0.55859375, + "loss_num": 0.01953125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 220657572, + "step": 3298 + }, + { + "epoch": 0.37435460992907804, + "grad_norm": 27.780818939208984, + "learning_rate": 5e-05, + "loss": 1.5288, + "num_input_tokens_seen": 220724568, + "step": 3299 + }, + { + "epoch": 0.37435460992907804, + "loss": 1.6386756896972656, + "loss_ce": 0.0019570121075958014, + "loss_iou": 0.6953125, + "loss_num": 0.048828125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 220724568, + "step": 3299 + }, + { + "epoch": 0.37446808510638296, + "grad_norm": 15.33681869506836, + "learning_rate": 5e-05, + "loss": 1.0391, + "num_input_tokens_seen": 220790152, + "step": 3300 + }, + { + "epoch": 0.37446808510638296, + "loss": 1.0854356288909912, + "loss_ce": 0.00486919516697526, + "loss_iou": 0.4765625, + "loss_num": 0.0260009765625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 220790152, + "step": 3300 + }, + { + "epoch": 0.37458156028368794, + "grad_norm": 18.539411544799805, + "learning_rate": 5e-05, + "loss": 1.0361, + "num_input_tokens_seen": 220857404, + "step": 3301 + }, + { + "epoch": 0.37458156028368794, + "loss": 1.0382211208343506, + "loss_ce": 0.007459341548383236, + "loss_iou": 0.43359375, + "loss_num": 0.032958984375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 220857404, + "step": 3301 + }, + { + "epoch": 0.3746950354609929, + "grad_norm": 21.841508865356445, + "learning_rate": 5e-05, + "loss": 1.099, + "num_input_tokens_seen": 220923660, + "step": 3302 + }, + { + "epoch": 0.3746950354609929, + "loss": 1.1365855932235718, + "loss_ce": 0.001331657636910677, + "loss_iou": 0.484375, + "loss_num": 0.033447265625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 220923660, + "step": 3302 + }, + { + "epoch": 0.3748085106382979, + "grad_norm": 25.7033634185791, + "learning_rate": 5e-05, + "loss": 1.4101, + "num_input_tokens_seen": 220990872, + "step": 3303 + }, + { + "epoch": 0.3748085106382979, + "loss": 1.349363923072815, + "loss_ce": 0.0031724791042506695, + "loss_iou": 0.5703125, + "loss_num": 0.041259765625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 220990872, + "step": 3303 + }, + { + "epoch": 0.3749219858156028, + "grad_norm": 43.50148391723633, + "learning_rate": 5e-05, + "loss": 1.3142, + "num_input_tokens_seen": 221055484, + "step": 3304 + }, + { + "epoch": 0.3749219858156028, + "loss": 1.4521567821502686, + "loss_ce": 0.013192012906074524, + "loss_iou": 0.6015625, + "loss_num": 0.04638671875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 221055484, + "step": 3304 + }, + { + "epoch": 0.3750354609929078, + "grad_norm": 28.176713943481445, + "learning_rate": 5e-05, + "loss": 1.6059, + "num_input_tokens_seen": 221121812, + "step": 3305 + }, + { + "epoch": 0.3750354609929078, + "loss": 1.6779563426971436, + "loss_ce": 0.007057972252368927, + "loss_iou": 0.703125, + "loss_num": 0.0537109375, + "loss_xval": 1.671875, + "num_input_tokens_seen": 221121812, + "step": 3305 + }, + { + "epoch": 0.3751489361702128, + "grad_norm": 33.12329864501953, + "learning_rate": 5e-05, + "loss": 1.3083, + "num_input_tokens_seen": 221188032, + "step": 3306 + }, + { + "epoch": 0.3751489361702128, + "loss": 1.284397840499878, + "loss_ce": 0.006565829738974571, + "loss_iou": 0.53125, + "loss_num": 0.04248046875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 221188032, + "step": 3306 + }, + { + "epoch": 0.37526241134751775, + "grad_norm": 42.39912033081055, + "learning_rate": 5e-05, + "loss": 1.329, + "num_input_tokens_seen": 221255208, + "step": 3307 + }, + { + "epoch": 0.37526241134751775, + "loss": 1.1827532052993774, + "loss_ce": 0.005507135763764381, + "loss_iou": 0.53515625, + "loss_num": 0.0206298828125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 221255208, + "step": 3307 + }, + { + "epoch": 0.3753758865248227, + "grad_norm": 26.64386749267578, + "learning_rate": 5e-05, + "loss": 1.4878, + "num_input_tokens_seen": 221322660, + "step": 3308 + }, + { + "epoch": 0.3753758865248227, + "loss": 1.5546942949295044, + "loss_ce": 0.002936522476375103, + "loss_iou": 0.69140625, + "loss_num": 0.034423828125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 221322660, + "step": 3308 + }, + { + "epoch": 0.37548936170212766, + "grad_norm": 21.651769638061523, + "learning_rate": 5e-05, + "loss": 1.2153, + "num_input_tokens_seen": 221388808, + "step": 3309 + }, + { + "epoch": 0.37548936170212766, + "loss": 1.1746125221252441, + "loss_ce": 0.0027375458739697933, + "loss_iou": 0.462890625, + "loss_num": 0.049072265625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 221388808, + "step": 3309 + }, + { + "epoch": 0.37560283687943263, + "grad_norm": 42.793277740478516, + "learning_rate": 5e-05, + "loss": 1.1907, + "num_input_tokens_seen": 221456476, + "step": 3310 + }, + { + "epoch": 0.37560283687943263, + "loss": 1.2309997081756592, + "loss_ce": 0.005902144126594067, + "loss_iou": 0.48828125, + "loss_num": 0.050048828125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 221456476, + "step": 3310 + }, + { + "epoch": 0.3757163120567376, + "grad_norm": 44.02124786376953, + "learning_rate": 5e-05, + "loss": 1.2296, + "num_input_tokens_seen": 221523768, + "step": 3311 + }, + { + "epoch": 0.3757163120567376, + "loss": 1.1443644762039185, + "loss_ce": 0.008622312918305397, + "loss_iou": 0.48828125, + "loss_num": 0.031982421875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 221523768, + "step": 3311 + }, + { + "epoch": 0.37582978723404253, + "grad_norm": 27.99067497253418, + "learning_rate": 5e-05, + "loss": 1.1173, + "num_input_tokens_seen": 221590404, + "step": 3312 + }, + { + "epoch": 0.37582978723404253, + "loss": 1.1731328964233398, + "loss_ce": 0.004675894044339657, + "loss_iou": 0.515625, + "loss_num": 0.0274658203125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 221590404, + "step": 3312 + }, + { + "epoch": 0.3759432624113475, + "grad_norm": 27.23967170715332, + "learning_rate": 5e-05, + "loss": 1.3817, + "num_input_tokens_seen": 221656432, + "step": 3313 + }, + { + "epoch": 0.3759432624113475, + "loss": 1.3405849933624268, + "loss_ce": 0.007577195763587952, + "loss_iou": 0.54296875, + "loss_num": 0.0498046875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 221656432, + "step": 3313 + }, + { + "epoch": 0.3760567375886525, + "grad_norm": 54.85825729370117, + "learning_rate": 5e-05, + "loss": 1.1576, + "num_input_tokens_seen": 221723884, + "step": 3314 + }, + { + "epoch": 0.3760567375886525, + "loss": 1.2885122299194336, + "loss_ce": 0.0060414960607886314, + "loss_iou": 0.5390625, + "loss_num": 0.041259765625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 221723884, + "step": 3314 + }, + { + "epoch": 0.37617021276595747, + "grad_norm": 26.57561683654785, + "learning_rate": 5e-05, + "loss": 1.3971, + "num_input_tokens_seen": 221790824, + "step": 3315 + }, + { + "epoch": 0.37617021276595747, + "loss": 1.4740521907806396, + "loss_ce": 0.00823192484676838, + "loss_iou": 0.59765625, + "loss_num": 0.053955078125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 221790824, + "step": 3315 + }, + { + "epoch": 0.3762836879432624, + "grad_norm": 38.939762115478516, + "learning_rate": 5e-05, + "loss": 1.0541, + "num_input_tokens_seen": 221857772, + "step": 3316 + }, + { + "epoch": 0.3762836879432624, + "loss": 1.0680124759674072, + "loss_ce": 0.003071068786084652, + "loss_iou": 0.47265625, + "loss_num": 0.024169921875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 221857772, + "step": 3316 + }, + { + "epoch": 0.37639716312056737, + "grad_norm": 25.577699661254883, + "learning_rate": 5e-05, + "loss": 1.5622, + "num_input_tokens_seen": 221924172, + "step": 3317 + }, + { + "epoch": 0.37639716312056737, + "loss": 1.4627742767333984, + "loss_ce": 0.008184448815882206, + "loss_iou": 0.62109375, + "loss_num": 0.042724609375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 221924172, + "step": 3317 + }, + { + "epoch": 0.37651063829787235, + "grad_norm": 13.159913063049316, + "learning_rate": 5e-05, + "loss": 1.2273, + "num_input_tokens_seen": 221992040, + "step": 3318 + }, + { + "epoch": 0.37651063829787235, + "loss": 1.1159813404083252, + "loss_ce": 0.00562971830368042, + "loss_iou": 0.466796875, + "loss_num": 0.03515625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 221992040, + "step": 3318 + }, + { + "epoch": 0.3766241134751773, + "grad_norm": 32.138648986816406, + "learning_rate": 5e-05, + "loss": 1.1915, + "num_input_tokens_seen": 222058364, + "step": 3319 + }, + { + "epoch": 0.3766241134751773, + "loss": 1.1536216735839844, + "loss_ce": 0.005184173583984375, + "loss_iou": 0.48828125, + "loss_num": 0.034423828125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 222058364, + "step": 3319 + }, + { + "epoch": 0.37673758865248225, + "grad_norm": 27.462406158447266, + "learning_rate": 5e-05, + "loss": 1.044, + "num_input_tokens_seen": 222125148, + "step": 3320 + }, + { + "epoch": 0.37673758865248225, + "loss": 1.0826846361160278, + "loss_ce": 0.006512743420898914, + "loss_iou": 0.44140625, + "loss_num": 0.0390625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 222125148, + "step": 3320 + }, + { + "epoch": 0.3768510638297872, + "grad_norm": 29.538143157958984, + "learning_rate": 5e-05, + "loss": 1.2674, + "num_input_tokens_seen": 222193364, + "step": 3321 + }, + { + "epoch": 0.3768510638297872, + "loss": 1.115936279296875, + "loss_ce": 0.004608128219842911, + "loss_iou": 0.48046875, + "loss_num": 0.030029296875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 222193364, + "step": 3321 + }, + { + "epoch": 0.3769645390070922, + "grad_norm": 34.31596374511719, + "learning_rate": 5e-05, + "loss": 1.2088, + "num_input_tokens_seen": 222260276, + "step": 3322 + }, + { + "epoch": 0.3769645390070922, + "loss": 1.3029872179031372, + "loss_ce": 0.007088774815201759, + "loss_iou": 0.58984375, + "loss_num": 0.022705078125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 222260276, + "step": 3322 + }, + { + "epoch": 0.3770780141843972, + "grad_norm": 37.521488189697266, + "learning_rate": 5e-05, + "loss": 1.4632, + "num_input_tokens_seen": 222327420, + "step": 3323 + }, + { + "epoch": 0.3770780141843972, + "loss": 1.3214954137802124, + "loss_ce": 0.005089161917567253, + "loss_iou": 0.546875, + "loss_num": 0.043701171875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 222327420, + "step": 3323 + }, + { + "epoch": 0.3771914893617021, + "grad_norm": 48.89350509643555, + "learning_rate": 5e-05, + "loss": 1.2551, + "num_input_tokens_seen": 222394772, + "step": 3324 + }, + { + "epoch": 0.3771914893617021, + "loss": 1.2361395359039307, + "loss_ce": 0.003717755898833275, + "loss_iou": 0.5234375, + "loss_num": 0.036376953125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 222394772, + "step": 3324 + }, + { + "epoch": 0.3773049645390071, + "grad_norm": 26.681148529052734, + "learning_rate": 5e-05, + "loss": 1.1244, + "num_input_tokens_seen": 222462168, + "step": 3325 + }, + { + "epoch": 0.3773049645390071, + "loss": 1.1023074388504028, + "loss_ce": 0.008069172501564026, + "loss_iou": 0.462890625, + "loss_num": 0.033935546875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 222462168, + "step": 3325 + }, + { + "epoch": 0.37741843971631206, + "grad_norm": 26.844539642333984, + "learning_rate": 5e-05, + "loss": 1.5114, + "num_input_tokens_seen": 222528684, + "step": 3326 + }, + { + "epoch": 0.37741843971631206, + "loss": 1.4445281028747559, + "loss_ce": 0.0055632516741752625, + "loss_iou": 0.59375, + "loss_num": 0.049560546875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 222528684, + "step": 3326 + }, + { + "epoch": 0.37753191489361704, + "grad_norm": 21.084835052490234, + "learning_rate": 5e-05, + "loss": 1.1179, + "num_input_tokens_seen": 222595624, + "step": 3327 + }, + { + "epoch": 0.37753191489361704, + "loss": 1.1724423170089722, + "loss_ce": 0.00593841727823019, + "loss_iou": 0.494140625, + "loss_num": 0.03564453125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 222595624, + "step": 3327 + }, + { + "epoch": 0.37764539007092196, + "grad_norm": 25.554407119750977, + "learning_rate": 5e-05, + "loss": 1.3354, + "num_input_tokens_seen": 222662512, + "step": 3328 + }, + { + "epoch": 0.37764539007092196, + "loss": 1.3306939601898193, + "loss_ce": 0.007451807148754597, + "loss_iou": 0.5546875, + "loss_num": 0.04296875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 222662512, + "step": 3328 + }, + { + "epoch": 0.37775886524822694, + "grad_norm": 24.57375717163086, + "learning_rate": 5e-05, + "loss": 1.3508, + "num_input_tokens_seen": 222731036, + "step": 3329 + }, + { + "epoch": 0.37775886524822694, + "loss": 1.3889784812927246, + "loss_ce": 0.003724508685991168, + "loss_iou": 0.57421875, + "loss_num": 0.046875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 222731036, + "step": 3329 + }, + { + "epoch": 0.3778723404255319, + "grad_norm": 24.285804748535156, + "learning_rate": 5e-05, + "loss": 1.5513, + "num_input_tokens_seen": 222796720, + "step": 3330 + }, + { + "epoch": 0.3778723404255319, + "loss": 1.4405431747436523, + "loss_ce": 0.004019759129732847, + "loss_iou": 0.59375, + "loss_num": 0.049072265625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 222796720, + "step": 3330 + }, + { + "epoch": 0.3779858156028369, + "grad_norm": 13.212430953979492, + "learning_rate": 5e-05, + "loss": 0.9771, + "num_input_tokens_seen": 222862724, + "step": 3331 + }, + { + "epoch": 0.3779858156028369, + "loss": 1.0058681964874268, + "loss_ce": 0.008309648372232914, + "loss_iou": 0.40234375, + "loss_num": 0.038818359375, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 222862724, + "step": 3331 + }, + { + "epoch": 0.3780992907801418, + "grad_norm": 27.64640998840332, + "learning_rate": 5e-05, + "loss": 1.0307, + "num_input_tokens_seen": 222928964, + "step": 3332 + }, + { + "epoch": 0.3780992907801418, + "loss": 0.9457686543464661, + "loss_ce": 0.0014326899545267224, + "loss_iou": 0.38671875, + "loss_num": 0.03369140625, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 222928964, + "step": 3332 + }, + { + "epoch": 0.3782127659574468, + "grad_norm": 40.90646743774414, + "learning_rate": 5e-05, + "loss": 1.4012, + "num_input_tokens_seen": 222996356, + "step": 3333 + }, + { + "epoch": 0.3782127659574468, + "loss": 1.4873225688934326, + "loss_ce": 0.007830392569303513, + "loss_iou": 0.60546875, + "loss_num": 0.05419921875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 222996356, + "step": 3333 + }, + { + "epoch": 0.3783262411347518, + "grad_norm": 55.41337585449219, + "learning_rate": 5e-05, + "loss": 1.5745, + "num_input_tokens_seen": 223062784, + "step": 3334 + }, + { + "epoch": 0.3783262411347518, + "loss": 1.566131830215454, + "loss_ce": 0.004120162222534418, + "loss_iou": 0.640625, + "loss_num": 0.056396484375, + "loss_xval": 1.5625, + "num_input_tokens_seen": 223062784, + "step": 3334 + }, + { + "epoch": 0.37843971631205675, + "grad_norm": 12.470696449279785, + "learning_rate": 5e-05, + "loss": 1.2967, + "num_input_tokens_seen": 223129468, + "step": 3335 + }, + { + "epoch": 0.37843971631205675, + "loss": 1.3063613176345825, + "loss_ce": 0.008509833365678787, + "loss_iou": 0.5390625, + "loss_num": 0.04443359375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 223129468, + "step": 3335 + }, + { + "epoch": 0.37855319148936173, + "grad_norm": 15.65739917755127, + "learning_rate": 5e-05, + "loss": 1.0586, + "num_input_tokens_seen": 223196940, + "step": 3336 + }, + { + "epoch": 0.37855319148936173, + "loss": 1.0673612356185913, + "loss_ce": 0.0073026083409786224, + "loss_iou": 0.435546875, + "loss_num": 0.037841796875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 223196940, + "step": 3336 + }, + { + "epoch": 0.37866666666666665, + "grad_norm": 22.367191314697266, + "learning_rate": 5e-05, + "loss": 1.1357, + "num_input_tokens_seen": 223262872, + "step": 3337 + }, + { + "epoch": 0.37866666666666665, + "loss": 1.0489269495010376, + "loss_ce": 0.005469878204166889, + "loss_iou": 0.435546875, + "loss_num": 0.034423828125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 223262872, + "step": 3337 + }, + { + "epoch": 0.37878014184397163, + "grad_norm": 39.624656677246094, + "learning_rate": 5e-05, + "loss": 1.2928, + "num_input_tokens_seen": 223329936, + "step": 3338 + }, + { + "epoch": 0.37878014184397163, + "loss": 1.153259038925171, + "loss_ce": 0.00823950208723545, + "loss_iou": 0.490234375, + "loss_num": 0.032958984375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 223329936, + "step": 3338 + }, + { + "epoch": 0.3788936170212766, + "grad_norm": 37.33665466308594, + "learning_rate": 5e-05, + "loss": 1.3728, + "num_input_tokens_seen": 223397040, + "step": 3339 + }, + { + "epoch": 0.3788936170212766, + "loss": 1.414336919784546, + "loss_ce": 0.005645559635013342, + "loss_iou": 0.6171875, + "loss_num": 0.03515625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 223397040, + "step": 3339 + }, + { + "epoch": 0.3790070921985816, + "grad_norm": 27.809036254882812, + "learning_rate": 5e-05, + "loss": 1.1148, + "num_input_tokens_seen": 223464456, + "step": 3340 + }, + { + "epoch": 0.3790070921985816, + "loss": 1.0032670497894287, + "loss_ce": 0.0071732765063643456, + "loss_iou": 0.4453125, + "loss_num": 0.0211181640625, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 223464456, + "step": 3340 + }, + { + "epoch": 0.3791205673758865, + "grad_norm": 21.71916389465332, + "learning_rate": 5e-05, + "loss": 1.3673, + "num_input_tokens_seen": 223530992, + "step": 3341 + }, + { + "epoch": 0.3791205673758865, + "loss": 1.514188528060913, + "loss_ce": 0.004422798287123442, + "loss_iou": 0.6484375, + "loss_num": 0.042724609375, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 223530992, + "step": 3341 + }, + { + "epoch": 0.3792340425531915, + "grad_norm": 19.111038208007812, + "learning_rate": 5e-05, + "loss": 1.1586, + "num_input_tokens_seen": 223597932, + "step": 3342 + }, + { + "epoch": 0.3792340425531915, + "loss": 1.2394347190856934, + "loss_ce": 0.0035948900040239096, + "loss_iou": 0.51171875, + "loss_num": 0.04296875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 223597932, + "step": 3342 + }, + { + "epoch": 0.37934751773049646, + "grad_norm": 27.372013092041016, + "learning_rate": 5e-05, + "loss": 1.3945, + "num_input_tokens_seen": 223664912, + "step": 3343 + }, + { + "epoch": 0.37934751773049646, + "loss": 1.4158992767333984, + "loss_ce": 0.010137509554624557, + "loss_iou": 0.5703125, + "loss_num": 0.053466796875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 223664912, + "step": 3343 + }, + { + "epoch": 0.37946099290780144, + "grad_norm": 35.92925262451172, + "learning_rate": 5e-05, + "loss": 1.38, + "num_input_tokens_seen": 223732284, + "step": 3344 + }, + { + "epoch": 0.37946099290780144, + "loss": 1.125256061553955, + "loss_ce": 0.0036741173826158047, + "loss_iou": 0.48828125, + "loss_num": 0.02880859375, + "loss_xval": 1.125, + "num_input_tokens_seen": 223732284, + "step": 3344 + }, + { + "epoch": 0.37957446808510636, + "grad_norm": 26.590930938720703, + "learning_rate": 5e-05, + "loss": 1.4057, + "num_input_tokens_seen": 223800696, + "step": 3345 + }, + { + "epoch": 0.37957446808510636, + "loss": 1.345066785812378, + "loss_ce": 0.003758174367249012, + "loss_iou": 0.578125, + "loss_num": 0.036865234375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 223800696, + "step": 3345 + }, + { + "epoch": 0.37968794326241134, + "grad_norm": 16.60592269897461, + "learning_rate": 5e-05, + "loss": 1.1989, + "num_input_tokens_seen": 223866752, + "step": 3346 + }, + { + "epoch": 0.37968794326241134, + "loss": 1.2876150608062744, + "loss_ce": 0.004900200758129358, + "loss_iou": 0.51953125, + "loss_num": 0.048583984375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 223866752, + "step": 3346 + }, + { + "epoch": 0.3798014184397163, + "grad_norm": 30.16049575805664, + "learning_rate": 5e-05, + "loss": 1.1272, + "num_input_tokens_seen": 223934232, + "step": 3347 + }, + { + "epoch": 0.3798014184397163, + "loss": 1.2235333919525146, + "loss_ce": 0.006003980524837971, + "loss_iou": 0.490234375, + "loss_num": 0.04736328125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 223934232, + "step": 3347 + }, + { + "epoch": 0.3799148936170213, + "grad_norm": 26.0128173828125, + "learning_rate": 5e-05, + "loss": 1.1502, + "num_input_tokens_seen": 224000064, + "step": 3348 + }, + { + "epoch": 0.3799148936170213, + "loss": 1.3619974851608276, + "loss_ce": 0.007017000112682581, + "loss_iou": 0.56640625, + "loss_num": 0.04443359375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 224000064, + "step": 3348 + }, + { + "epoch": 0.3800283687943262, + "grad_norm": 20.104419708251953, + "learning_rate": 5e-05, + "loss": 1.1844, + "num_input_tokens_seen": 224067200, + "step": 3349 + }, + { + "epoch": 0.3800283687943262, + "loss": 1.315737247467041, + "loss_ce": 0.00421368982642889, + "loss_iou": 0.53515625, + "loss_num": 0.04833984375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 224067200, + "step": 3349 + }, + { + "epoch": 0.3801418439716312, + "grad_norm": 21.79138946533203, + "learning_rate": 5e-05, + "loss": 1.4033, + "num_input_tokens_seen": 224134260, + "step": 3350 + }, + { + "epoch": 0.3801418439716312, + "loss": 1.4872978925704956, + "loss_ce": 0.005852558650076389, + "loss_iou": 0.56640625, + "loss_num": 0.0703125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 224134260, + "step": 3350 + }, + { + "epoch": 0.3802553191489362, + "grad_norm": 26.02128028869629, + "learning_rate": 5e-05, + "loss": 1.157, + "num_input_tokens_seen": 224200460, + "step": 3351 + }, + { + "epoch": 0.3802553191489362, + "loss": 1.1499075889587402, + "loss_ce": 0.004888007417321205, + "loss_iou": 0.4921875, + "loss_num": 0.032470703125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 224200460, + "step": 3351 + }, + { + "epoch": 0.38036879432624116, + "grad_norm": 27.13918113708496, + "learning_rate": 5e-05, + "loss": 1.4044, + "num_input_tokens_seen": 224267452, + "step": 3352 + }, + { + "epoch": 0.38036879432624116, + "loss": 1.6516934633255005, + "loss_ce": 0.00569737795740366, + "loss_iou": 0.65625, + "loss_num": 0.06640625, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 224267452, + "step": 3352 + }, + { + "epoch": 0.3804822695035461, + "grad_norm": 28.69064712524414, + "learning_rate": 5e-05, + "loss": 1.0605, + "num_input_tokens_seen": 224333560, + "step": 3353 + }, + { + "epoch": 0.3804822695035461, + "loss": 1.1297065019607544, + "loss_ce": 0.0071479156613349915, + "loss_iou": 0.478515625, + "loss_num": 0.032958984375, + "loss_xval": 1.125, + "num_input_tokens_seen": 224333560, + "step": 3353 + }, + { + "epoch": 0.38059574468085106, + "grad_norm": 39.724308013916016, + "learning_rate": 5e-05, + "loss": 1.2382, + "num_input_tokens_seen": 224400688, + "step": 3354 + }, + { + "epoch": 0.38059574468085106, + "loss": 1.3536638021469116, + "loss_ce": 0.010890372097492218, + "loss_iou": 0.5859375, + "loss_num": 0.034423828125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 224400688, + "step": 3354 + }, + { + "epoch": 0.38070921985815603, + "grad_norm": 38.66010665893555, + "learning_rate": 5e-05, + "loss": 1.2413, + "num_input_tokens_seen": 224467892, + "step": 3355 + }, + { + "epoch": 0.38070921985815603, + "loss": 1.0182757377624512, + "loss_ce": 0.004848005250096321, + "loss_iou": 0.419921875, + "loss_num": 0.03466796875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 224467892, + "step": 3355 + }, + { + "epoch": 0.380822695035461, + "grad_norm": 30.595684051513672, + "learning_rate": 5e-05, + "loss": 1.1624, + "num_input_tokens_seen": 224535424, + "step": 3356 + }, + { + "epoch": 0.380822695035461, + "loss": 1.2597755193710327, + "loss_ce": 0.0039161439053714275, + "loss_iou": 0.5625, + "loss_num": 0.0263671875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 224535424, + "step": 3356 + }, + { + "epoch": 0.38093617021276593, + "grad_norm": 84.62626647949219, + "learning_rate": 5e-05, + "loss": 1.0838, + "num_input_tokens_seen": 224602264, + "step": 3357 + }, + { + "epoch": 0.38093617021276593, + "loss": 1.006521224975586, + "loss_ce": 0.004842702299356461, + "loss_iou": 0.447265625, + "loss_num": 0.021484375, + "loss_xval": 1.0, + "num_input_tokens_seen": 224602264, + "step": 3357 + }, + { + "epoch": 0.3810496453900709, + "grad_norm": 22.511863708496094, + "learning_rate": 5e-05, + "loss": 1.4434, + "num_input_tokens_seen": 224669152, + "step": 3358 + }, + { + "epoch": 0.3810496453900709, + "loss": 1.4686436653137207, + "loss_ce": 0.002823358867317438, + "loss_iou": 0.64453125, + "loss_num": 0.035400390625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 224669152, + "step": 3358 + }, + { + "epoch": 0.3811631205673759, + "grad_norm": 16.00748634338379, + "learning_rate": 5e-05, + "loss": 1.3287, + "num_input_tokens_seen": 224736072, + "step": 3359 + }, + { + "epoch": 0.3811631205673759, + "loss": 1.4258774518966675, + "loss_ce": 0.010838440619409084, + "loss_iou": 0.5703125, + "loss_num": 0.0546875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 224736072, + "step": 3359 + }, + { + "epoch": 0.38127659574468087, + "grad_norm": 20.88277244567871, + "learning_rate": 5e-05, + "loss": 1.0649, + "num_input_tokens_seen": 224803580, + "step": 3360 + }, + { + "epoch": 0.38127659574468087, + "loss": 0.8982692956924438, + "loss_ce": 0.006423537153750658, + "loss_iou": 0.35546875, + "loss_num": 0.035888671875, + "loss_xval": 0.890625, + "num_input_tokens_seen": 224803580, + "step": 3360 + }, + { + "epoch": 0.3813900709219858, + "grad_norm": 22.815765380859375, + "learning_rate": 5e-05, + "loss": 1.2724, + "num_input_tokens_seen": 224870664, + "step": 3361 + }, + { + "epoch": 0.3813900709219858, + "loss": 1.3799811601638794, + "loss_ce": 0.0035164086148142815, + "loss_iou": 0.5703125, + "loss_num": 0.0478515625, + "loss_xval": 1.375, + "num_input_tokens_seen": 224870664, + "step": 3361 + }, + { + "epoch": 0.38150354609929077, + "grad_norm": 26.52817726135254, + "learning_rate": 5e-05, + "loss": 1.3512, + "num_input_tokens_seen": 224937456, + "step": 3362 + }, + { + "epoch": 0.38150354609929077, + "loss": 1.3198671340942383, + "loss_ce": 0.0024842158891260624, + "loss_iou": 0.5390625, + "loss_num": 0.048583984375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 224937456, + "step": 3362 + }, + { + "epoch": 0.38161702127659575, + "grad_norm": 37.31229782104492, + "learning_rate": 5e-05, + "loss": 1.1776, + "num_input_tokens_seen": 225003884, + "step": 3363 + }, + { + "epoch": 0.38161702127659575, + "loss": 1.1343984603881836, + "loss_ce": 0.004759710747748613, + "loss_iou": 0.474609375, + "loss_num": 0.0361328125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 225003884, + "step": 3363 + }, + { + "epoch": 0.3817304964539007, + "grad_norm": 22.36355972290039, + "learning_rate": 5e-05, + "loss": 1.3428, + "num_input_tokens_seen": 225071264, + "step": 3364 + }, + { + "epoch": 0.3817304964539007, + "loss": 1.2324771881103516, + "loss_ce": 0.005914741195738316, + "loss_iou": 0.51953125, + "loss_num": 0.036865234375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 225071264, + "step": 3364 + }, + { + "epoch": 0.38184397163120565, + "grad_norm": 20.851964950561523, + "learning_rate": 5e-05, + "loss": 1.343, + "num_input_tokens_seen": 225139096, + "step": 3365 + }, + { + "epoch": 0.38184397163120565, + "loss": 1.447009563446045, + "loss_ce": 0.005603324621915817, + "loss_iou": 0.57421875, + "loss_num": 0.058349609375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 225139096, + "step": 3365 + }, + { + "epoch": 0.3819574468085106, + "grad_norm": 26.804946899414062, + "learning_rate": 5e-05, + "loss": 1.2409, + "num_input_tokens_seen": 225206888, + "step": 3366 + }, + { + "epoch": 0.3819574468085106, + "loss": 1.443039894104004, + "loss_ce": 0.007004746235907078, + "loss_iou": 0.59375, + "loss_num": 0.04931640625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 225206888, + "step": 3366 + }, + { + "epoch": 0.3820709219858156, + "grad_norm": 37.03543472290039, + "learning_rate": 5e-05, + "loss": 1.0198, + "num_input_tokens_seen": 225273536, + "step": 3367 + }, + { + "epoch": 0.3820709219858156, + "loss": 0.9697384834289551, + "loss_ce": 0.009167223237454891, + "loss_iou": 0.39453125, + "loss_num": 0.03466796875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 225273536, + "step": 3367 + }, + { + "epoch": 0.3821843971631206, + "grad_norm": 65.90558624267578, + "learning_rate": 5e-05, + "loss": 1.7482, + "num_input_tokens_seen": 225339340, + "step": 3368 + }, + { + "epoch": 0.3821843971631206, + "loss": 1.6946330070495605, + "loss_ce": 0.00810965895652771, + "loss_iou": 0.703125, + "loss_num": 0.055419921875, + "loss_xval": 1.6875, + "num_input_tokens_seen": 225339340, + "step": 3368 + }, + { + "epoch": 0.38229787234042556, + "grad_norm": 21.411346435546875, + "learning_rate": 5e-05, + "loss": 1.1971, + "num_input_tokens_seen": 225406308, + "step": 3369 + }, + { + "epoch": 0.38229787234042556, + "loss": 1.1649017333984375, + "loss_ce": 0.005844132974743843, + "loss_iou": 0.458984375, + "loss_num": 0.04833984375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 225406308, + "step": 3369 + }, + { + "epoch": 0.3824113475177305, + "grad_norm": 62.51517105102539, + "learning_rate": 5e-05, + "loss": 1.2744, + "num_input_tokens_seen": 225473712, + "step": 3370 + }, + { + "epoch": 0.3824113475177305, + "loss": 1.3224506378173828, + "loss_ce": 0.002138178562745452, + "loss_iou": 0.5625, + "loss_num": 0.038330078125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 225473712, + "step": 3370 + }, + { + "epoch": 0.38252482269503546, + "grad_norm": 38.3774528503418, + "learning_rate": 5e-05, + "loss": 1.3428, + "num_input_tokens_seen": 225540532, + "step": 3371 + }, + { + "epoch": 0.38252482269503546, + "loss": 1.6212546825408936, + "loss_ce": 0.007973436266183853, + "loss_iou": 0.62109375, + "loss_num": 0.0751953125, + "loss_xval": 1.609375, + "num_input_tokens_seen": 225540532, + "step": 3371 + }, + { + "epoch": 0.38263829787234044, + "grad_norm": 29.716981887817383, + "learning_rate": 5e-05, + "loss": 1.3064, + "num_input_tokens_seen": 225607848, + "step": 3372 + }, + { + "epoch": 0.38263829787234044, + "loss": 1.3779737949371338, + "loss_ce": 0.002973766764625907, + "loss_iou": 0.53515625, + "loss_num": 0.06005859375, + "loss_xval": 1.375, + "num_input_tokens_seen": 225607848, + "step": 3372 + }, + { + "epoch": 0.3827517730496454, + "grad_norm": 33.07007598876953, + "learning_rate": 5e-05, + "loss": 1.3419, + "num_input_tokens_seen": 225674780, + "step": 3373 + }, + { + "epoch": 0.3827517730496454, + "loss": 1.4619429111480713, + "loss_ce": 0.004911739379167557, + "loss_iou": 0.640625, + "loss_num": 0.03466796875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 225674780, + "step": 3373 + }, + { + "epoch": 0.38286524822695034, + "grad_norm": 24.97610855102539, + "learning_rate": 5e-05, + "loss": 1.4451, + "num_input_tokens_seen": 225741824, + "step": 3374 + }, + { + "epoch": 0.38286524822695034, + "loss": 1.3577680587768555, + "loss_ce": 0.011088301427662373, + "loss_iou": 0.56640625, + "loss_num": 0.043212890625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 225741824, + "step": 3374 + }, + { + "epoch": 0.3829787234042553, + "grad_norm": 16.207672119140625, + "learning_rate": 5e-05, + "loss": 1.2855, + "num_input_tokens_seen": 225808528, + "step": 3375 + }, + { + "epoch": 0.3829787234042553, + "loss": 1.137776494026184, + "loss_ce": 0.005208142101764679, + "loss_iou": 0.48046875, + "loss_num": 0.03466796875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 225808528, + "step": 3375 + }, + { + "epoch": 0.3830921985815603, + "grad_norm": 14.176820755004883, + "learning_rate": 5e-05, + "loss": 1.2493, + "num_input_tokens_seen": 225875400, + "step": 3376 + }, + { + "epoch": 0.3830921985815603, + "loss": 1.3661141395568848, + "loss_ce": 0.006739089265465736, + "loss_iou": 0.55859375, + "loss_num": 0.048583984375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 225875400, + "step": 3376 + }, + { + "epoch": 0.3832056737588653, + "grad_norm": 28.082744598388672, + "learning_rate": 5e-05, + "loss": 1.0614, + "num_input_tokens_seen": 225942000, + "step": 3377 + }, + { + "epoch": 0.3832056737588653, + "loss": 0.8885849118232727, + "loss_ce": 0.0065047889947891235, + "loss_iou": 0.41015625, + "loss_num": 0.0128173828125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 225942000, + "step": 3377 + }, + { + "epoch": 0.3833191489361702, + "grad_norm": 24.688983917236328, + "learning_rate": 5e-05, + "loss": 1.1661, + "num_input_tokens_seen": 226009588, + "step": 3378 + }, + { + "epoch": 0.3833191489361702, + "loss": 1.2727041244506836, + "loss_ce": 0.004149391315877438, + "loss_iou": 0.53515625, + "loss_num": 0.0390625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 226009588, + "step": 3378 + }, + { + "epoch": 0.3834326241134752, + "grad_norm": 37.82018280029297, + "learning_rate": 5e-05, + "loss": 1.2937, + "num_input_tokens_seen": 226076032, + "step": 3379 + }, + { + "epoch": 0.3834326241134752, + "loss": 1.3535529375076294, + "loss_ce": 0.004920065402984619, + "loss_iou": 0.56640625, + "loss_num": 0.04248046875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 226076032, + "step": 3379 + }, + { + "epoch": 0.38354609929078015, + "grad_norm": 24.3555850982666, + "learning_rate": 5e-05, + "loss": 1.3807, + "num_input_tokens_seen": 226142204, + "step": 3380 + }, + { + "epoch": 0.38354609929078015, + "loss": 1.2616465091705322, + "loss_ce": 0.004078064113855362, + "loss_iou": 0.54296875, + "loss_num": 0.034912109375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 226142204, + "step": 3380 + }, + { + "epoch": 0.38365957446808513, + "grad_norm": 143.63677978515625, + "learning_rate": 5e-05, + "loss": 1.2721, + "num_input_tokens_seen": 226209332, + "step": 3381 + }, + { + "epoch": 0.38365957446808513, + "loss": 1.14890718460083, + "loss_ce": 0.0038875755853950977, + "loss_iou": 0.5, + "loss_num": 0.0289306640625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 226209332, + "step": 3381 + }, + { + "epoch": 0.38377304964539005, + "grad_norm": 18.061172485351562, + "learning_rate": 5e-05, + "loss": 1.2731, + "num_input_tokens_seen": 226274988, + "step": 3382 + }, + { + "epoch": 0.38377304964539005, + "loss": 1.2948909997940063, + "loss_ce": 0.007293383590877056, + "loss_iou": 0.52734375, + "loss_num": 0.046875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 226274988, + "step": 3382 + }, + { + "epoch": 0.38388652482269503, + "grad_norm": 66.65072631835938, + "learning_rate": 5e-05, + "loss": 1.1255, + "num_input_tokens_seen": 226341656, + "step": 3383 + }, + { + "epoch": 0.38388652482269503, + "loss": 1.195166826248169, + "loss_ce": 0.004737109411507845, + "loss_iou": 0.486328125, + "loss_num": 0.0439453125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 226341656, + "step": 3383 + }, + { + "epoch": 0.384, + "grad_norm": 31.460988998413086, + "learning_rate": 5e-05, + "loss": 1.116, + "num_input_tokens_seen": 226408508, + "step": 3384 + }, + { + "epoch": 0.384, + "loss": 1.0934733152389526, + "loss_ce": 0.0046061365865170956, + "loss_iou": 0.47265625, + "loss_num": 0.0284423828125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 226408508, + "step": 3384 + }, + { + "epoch": 0.384113475177305, + "grad_norm": 26.557710647583008, + "learning_rate": 5e-05, + "loss": 1.1309, + "num_input_tokens_seen": 226475852, + "step": 3385 + }, + { + "epoch": 0.384113475177305, + "loss": 1.0500242710113525, + "loss_ce": 0.007787878159433603, + "loss_iou": 0.427734375, + "loss_num": 0.03759765625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 226475852, + "step": 3385 + }, + { + "epoch": 0.3842269503546099, + "grad_norm": 29.179506301879883, + "learning_rate": 5e-05, + "loss": 1.2708, + "num_input_tokens_seen": 226541684, + "step": 3386 + }, + { + "epoch": 0.3842269503546099, + "loss": 1.1474623680114746, + "loss_ce": 0.004151868633925915, + "loss_iou": 0.4921875, + "loss_num": 0.03125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 226541684, + "step": 3386 + }, + { + "epoch": 0.3843404255319149, + "grad_norm": 26.723995208740234, + "learning_rate": 5e-05, + "loss": 1.4048, + "num_input_tokens_seen": 226609296, + "step": 3387 + }, + { + "epoch": 0.3843404255319149, + "loss": 1.2895145416259766, + "loss_ce": 0.0033817365765571594, + "loss_iou": 0.5390625, + "loss_num": 0.0419921875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 226609296, + "step": 3387 + }, + { + "epoch": 0.38445390070921986, + "grad_norm": 31.908428192138672, + "learning_rate": 5e-05, + "loss": 1.1303, + "num_input_tokens_seen": 226675500, + "step": 3388 + }, + { + "epoch": 0.38445390070921986, + "loss": 1.0681140422821045, + "loss_ce": 0.005125680007040501, + "loss_iou": 0.443359375, + "loss_num": 0.03515625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 226675500, + "step": 3388 + }, + { + "epoch": 0.38456737588652484, + "grad_norm": 33.59274673461914, + "learning_rate": 5e-05, + "loss": 1.3613, + "num_input_tokens_seen": 226742504, + "step": 3389 + }, + { + "epoch": 0.38456737588652484, + "loss": 1.5442699193954468, + "loss_ce": 0.005207412876188755, + "loss_iou": 0.6640625, + "loss_num": 0.042236328125, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 226742504, + "step": 3389 + }, + { + "epoch": 0.38468085106382977, + "grad_norm": 613.1690673828125, + "learning_rate": 5e-05, + "loss": 1.366, + "num_input_tokens_seen": 226808480, + "step": 3390 + }, + { + "epoch": 0.38468085106382977, + "loss": 1.3811883926391602, + "loss_ce": 0.0057001556269824505, + "loss_iou": 0.5078125, + "loss_num": 0.072265625, + "loss_xval": 1.375, + "num_input_tokens_seen": 226808480, + "step": 3390 + }, + { + "epoch": 0.38479432624113474, + "grad_norm": 55.412315368652344, + "learning_rate": 5e-05, + "loss": 1.214, + "num_input_tokens_seen": 226875360, + "step": 3391 + }, + { + "epoch": 0.38479432624113474, + "loss": 1.2112438678741455, + "loss_ce": 0.007630621083080769, + "loss_iou": 0.486328125, + "loss_num": 0.046142578125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 226875360, + "step": 3391 + }, + { + "epoch": 0.3849078014184397, + "grad_norm": 26.90813636779785, + "learning_rate": 5e-05, + "loss": 1.1132, + "num_input_tokens_seen": 226941460, + "step": 3392 + }, + { + "epoch": 0.3849078014184397, + "loss": 1.3825359344482422, + "loss_ce": 0.008512529544532299, + "loss_iou": 0.54296875, + "loss_num": 0.057373046875, + "loss_xval": 1.375, + "num_input_tokens_seen": 226941460, + "step": 3392 + }, + { + "epoch": 0.3850212765957447, + "grad_norm": 24.310894012451172, + "learning_rate": 5e-05, + "loss": 1.2082, + "num_input_tokens_seen": 227007284, + "step": 3393 + }, + { + "epoch": 0.3850212765957447, + "loss": 1.258016586303711, + "loss_ce": 0.007040016818791628, + "loss_iou": 0.494140625, + "loss_num": 0.052978515625, + "loss_xval": 1.25, + "num_input_tokens_seen": 227007284, + "step": 3393 + }, + { + "epoch": 0.3851347517730496, + "grad_norm": 28.74772834777832, + "learning_rate": 5e-05, + "loss": 1.2851, + "num_input_tokens_seen": 227074256, + "step": 3394 + }, + { + "epoch": 0.3851347517730496, + "loss": 1.1614105701446533, + "loss_ce": 0.004672175273299217, + "loss_iou": 0.49609375, + "loss_num": 0.032470703125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 227074256, + "step": 3394 + }, + { + "epoch": 0.3852482269503546, + "grad_norm": 23.422313690185547, + "learning_rate": 5e-05, + "loss": 1.282, + "num_input_tokens_seen": 227141412, + "step": 3395 + }, + { + "epoch": 0.3852482269503546, + "loss": 1.0960137844085693, + "loss_ce": 0.004705187864601612, + "loss_iou": 0.470703125, + "loss_num": 0.0301513671875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 227141412, + "step": 3395 + }, + { + "epoch": 0.3853617021276596, + "grad_norm": 19.94443702697754, + "learning_rate": 5e-05, + "loss": 1.1071, + "num_input_tokens_seen": 227208620, + "step": 3396 + }, + { + "epoch": 0.3853617021276596, + "loss": 1.0158319473266602, + "loss_ce": 0.006554649211466312, + "loss_iou": 0.3984375, + "loss_num": 0.042236328125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 227208620, + "step": 3396 + }, + { + "epoch": 0.38547517730496456, + "grad_norm": 23.939781188964844, + "learning_rate": 5e-05, + "loss": 1.235, + "num_input_tokens_seen": 227275520, + "step": 3397 + }, + { + "epoch": 0.38547517730496456, + "loss": 1.0616021156311035, + "loss_ce": 0.008867757394909859, + "loss_iou": 0.46484375, + "loss_num": 0.0244140625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 227275520, + "step": 3397 + }, + { + "epoch": 0.3855886524822695, + "grad_norm": 39.244178771972656, + "learning_rate": 5e-05, + "loss": 1.1564, + "num_input_tokens_seen": 227342800, + "step": 3398 + }, + { + "epoch": 0.3855886524822695, + "loss": 1.2188197374343872, + "loss_ce": 0.006417395547032356, + "loss_iou": 0.51171875, + "loss_num": 0.03759765625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 227342800, + "step": 3398 + }, + { + "epoch": 0.38570212765957446, + "grad_norm": 67.97711944580078, + "learning_rate": 5e-05, + "loss": 1.275, + "num_input_tokens_seen": 227409064, + "step": 3399 + }, + { + "epoch": 0.38570212765957446, + "loss": 1.234712839126587, + "loss_ce": 0.0052207461558282375, + "loss_iou": 0.515625, + "loss_num": 0.0400390625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 227409064, + "step": 3399 + }, + { + "epoch": 0.38581560283687943, + "grad_norm": 16.80172348022461, + "learning_rate": 5e-05, + "loss": 1.2717, + "num_input_tokens_seen": 227477096, + "step": 3400 + }, + { + "epoch": 0.38581560283687943, + "loss": 1.4482907056808472, + "loss_ce": 0.003954773303121328, + "loss_iou": 0.609375, + "loss_num": 0.04541015625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 227477096, + "step": 3400 + }, + { + "epoch": 0.3859290780141844, + "grad_norm": 21.408687591552734, + "learning_rate": 5e-05, + "loss": 1.0895, + "num_input_tokens_seen": 227544520, + "step": 3401 + }, + { + "epoch": 0.3859290780141844, + "loss": 0.9484153389930725, + "loss_ce": 0.0045676399022340775, + "loss_iou": 0.412109375, + "loss_num": 0.02392578125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 227544520, + "step": 3401 + }, + { + "epoch": 0.38604255319148933, + "grad_norm": 38.86284637451172, + "learning_rate": 5e-05, + "loss": 1.412, + "num_input_tokens_seen": 227610352, + "step": 3402 + }, + { + "epoch": 0.38604255319148933, + "loss": 1.210290551185608, + "loss_ce": 0.00917968899011612, + "loss_iou": 0.490234375, + "loss_num": 0.04443359375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 227610352, + "step": 3402 + }, + { + "epoch": 0.3861560283687943, + "grad_norm": 20.131816864013672, + "learning_rate": 5e-05, + "loss": 1.6071, + "num_input_tokens_seen": 227677764, + "step": 3403 + }, + { + "epoch": 0.3861560283687943, + "loss": 1.5693026781082153, + "loss_ce": 0.005337907001376152, + "loss_iou": 0.671875, + "loss_num": 0.044189453125, + "loss_xval": 1.5625, + "num_input_tokens_seen": 227677764, + "step": 3403 + }, + { + "epoch": 0.3862695035460993, + "grad_norm": 15.513402938842773, + "learning_rate": 5e-05, + "loss": 1.258, + "num_input_tokens_seen": 227743472, + "step": 3404 + }, + { + "epoch": 0.3862695035460993, + "loss": 1.1365658044815063, + "loss_ce": 0.007171220611780882, + "loss_iou": 0.490234375, + "loss_num": 0.0303955078125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 227743472, + "step": 3404 + }, + { + "epoch": 0.38638297872340427, + "grad_norm": 24.4223690032959, + "learning_rate": 5e-05, + "loss": 1.2315, + "num_input_tokens_seen": 227810480, + "step": 3405 + }, + { + "epoch": 0.38638297872340427, + "loss": 1.1788427829742432, + "loss_ce": 0.005258764140307903, + "loss_iou": 0.474609375, + "loss_num": 0.044677734375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 227810480, + "step": 3405 + }, + { + "epoch": 0.38649645390070925, + "grad_norm": 29.23533058166504, + "learning_rate": 5e-05, + "loss": 1.2616, + "num_input_tokens_seen": 227876804, + "step": 3406 + }, + { + "epoch": 0.38649645390070925, + "loss": 1.2987549304962158, + "loss_ce": 0.008227549493312836, + "loss_iou": 0.55078125, + "loss_num": 0.0380859375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 227876804, + "step": 3406 + }, + { + "epoch": 0.38660992907801417, + "grad_norm": 70.591796875, + "learning_rate": 5e-05, + "loss": 1.1523, + "num_input_tokens_seen": 227943400, + "step": 3407 + }, + { + "epoch": 0.38660992907801417, + "loss": 1.1667543649673462, + "loss_ce": 0.0044008479453623295, + "loss_iou": 0.44921875, + "loss_num": 0.052490234375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 227943400, + "step": 3407 + }, + { + "epoch": 0.38672340425531915, + "grad_norm": 37.336544036865234, + "learning_rate": 5e-05, + "loss": 1.1605, + "num_input_tokens_seen": 228011488, + "step": 3408 + }, + { + "epoch": 0.38672340425531915, + "loss": 1.1919777393341064, + "loss_ce": 0.005454345140606165, + "loss_iou": 0.5078125, + "loss_num": 0.03369140625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 228011488, + "step": 3408 + }, + { + "epoch": 0.3868368794326241, + "grad_norm": 24.173925399780273, + "learning_rate": 5e-05, + "loss": 0.8706, + "num_input_tokens_seen": 228078044, + "step": 3409 + }, + { + "epoch": 0.3868368794326241, + "loss": 0.9175159931182861, + "loss_ce": 0.0029652551747858524, + "loss_iou": 0.404296875, + "loss_num": 0.0211181640625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 228078044, + "step": 3409 + }, + { + "epoch": 0.3869503546099291, + "grad_norm": 24.529155731201172, + "learning_rate": 5e-05, + "loss": 1.0863, + "num_input_tokens_seen": 228143928, + "step": 3410 + }, + { + "epoch": 0.3869503546099291, + "loss": 1.3516223430633545, + "loss_ce": 0.010802153497934341, + "loss_iou": 0.45703125, + "loss_num": 0.0849609375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 228143928, + "step": 3410 + }, + { + "epoch": 0.387063829787234, + "grad_norm": 30.771873474121094, + "learning_rate": 5e-05, + "loss": 1.1302, + "num_input_tokens_seen": 228210880, + "step": 3411 + }, + { + "epoch": 0.387063829787234, + "loss": 1.241262674331665, + "loss_ce": 0.00591106666252017, + "loss_iou": 0.53515625, + "loss_num": 0.033203125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 228210880, + "step": 3411 + }, + { + "epoch": 0.387177304964539, + "grad_norm": 26.970500946044922, + "learning_rate": 5e-05, + "loss": 1.2332, + "num_input_tokens_seen": 228277572, + "step": 3412 + }, + { + "epoch": 0.387177304964539, + "loss": 1.2060182094573975, + "loss_ce": 0.002893205499276519, + "loss_iou": 0.52734375, + "loss_num": 0.0301513671875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 228277572, + "step": 3412 + }, + { + "epoch": 0.387290780141844, + "grad_norm": 28.179962158203125, + "learning_rate": 5e-05, + "loss": 1.3008, + "num_input_tokens_seen": 228344244, + "step": 3413 + }, + { + "epoch": 0.387290780141844, + "loss": 1.4839797019958496, + "loss_ce": 0.0015578935854136944, + "loss_iou": 0.5625, + "loss_num": 0.07177734375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 228344244, + "step": 3413 + }, + { + "epoch": 0.38740425531914896, + "grad_norm": 21.045337677001953, + "learning_rate": 5e-05, + "loss": 1.2275, + "num_input_tokens_seen": 228410572, + "step": 3414 + }, + { + "epoch": 0.38740425531914896, + "loss": 1.303765892982483, + "loss_ce": 0.0029846555553376675, + "loss_iou": 0.5859375, + "loss_num": 0.025634765625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 228410572, + "step": 3414 + }, + { + "epoch": 0.3875177304964539, + "grad_norm": 21.22640037536621, + "learning_rate": 5e-05, + "loss": 1.2447, + "num_input_tokens_seen": 228478836, + "step": 3415 + }, + { + "epoch": 0.3875177304964539, + "loss": 1.4920399188995361, + "loss_ce": 0.0037586658727377653, + "loss_iou": 0.5859375, + "loss_num": 0.06396484375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 228478836, + "step": 3415 + }, + { + "epoch": 0.38763120567375886, + "grad_norm": 26.169960021972656, + "learning_rate": 5e-05, + "loss": 1.1144, + "num_input_tokens_seen": 228545296, + "step": 3416 + }, + { + "epoch": 0.38763120567375886, + "loss": 0.9281066656112671, + "loss_ce": 0.0047668395563960075, + "loss_iou": 0.43359375, + "loss_num": 0.0113525390625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 228545296, + "step": 3416 + }, + { + "epoch": 0.38774468085106384, + "grad_norm": 38.397579193115234, + "learning_rate": 5e-05, + "loss": 1.1775, + "num_input_tokens_seen": 228613164, + "step": 3417 + }, + { + "epoch": 0.38774468085106384, + "loss": 1.1020722389221191, + "loss_ce": 0.0014862497337162495, + "loss_iou": 0.482421875, + "loss_num": 0.0272216796875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 228613164, + "step": 3417 + }, + { + "epoch": 0.3878581560283688, + "grad_norm": 25.066801071166992, + "learning_rate": 5e-05, + "loss": 1.2692, + "num_input_tokens_seen": 228678940, + "step": 3418 + }, + { + "epoch": 0.3878581560283688, + "loss": 1.280526876449585, + "loss_ce": 0.003427285933867097, + "loss_iou": 0.5390625, + "loss_num": 0.039306640625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 228678940, + "step": 3418 + }, + { + "epoch": 0.38797163120567374, + "grad_norm": 19.677587509155273, + "learning_rate": 5e-05, + "loss": 0.9683, + "num_input_tokens_seen": 228746476, + "step": 3419 + }, + { + "epoch": 0.38797163120567374, + "loss": 1.1049859523773193, + "loss_ce": 0.0048883832059800625, + "loss_iou": 0.47265625, + "loss_num": 0.0306396484375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 228746476, + "step": 3419 + }, + { + "epoch": 0.3880851063829787, + "grad_norm": 31.927635192871094, + "learning_rate": 5e-05, + "loss": 1.1237, + "num_input_tokens_seen": 228813016, + "step": 3420 + }, + { + "epoch": 0.3880851063829787, + "loss": 1.171750783920288, + "loss_ce": 0.003782030660659075, + "loss_iou": 0.49609375, + "loss_num": 0.03466796875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 228813016, + "step": 3420 + }, + { + "epoch": 0.3881985815602837, + "grad_norm": 43.48148727416992, + "learning_rate": 5e-05, + "loss": 1.4427, + "num_input_tokens_seen": 228881232, + "step": 3421 + }, + { + "epoch": 0.3881985815602837, + "loss": 1.4272589683532715, + "loss_ce": 0.002454269677400589, + "loss_iou": 0.6171875, + "loss_num": 0.037841796875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 228881232, + "step": 3421 + }, + { + "epoch": 0.3883120567375887, + "grad_norm": 25.909420013427734, + "learning_rate": 5e-05, + "loss": 1.3388, + "num_input_tokens_seen": 228949444, + "step": 3422 + }, + { + "epoch": 0.3883120567375887, + "loss": 1.3892946243286133, + "loss_ce": 0.008435173891484737, + "loss_iou": 0.56640625, + "loss_num": 0.049560546875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 228949444, + "step": 3422 + }, + { + "epoch": 0.3884255319148936, + "grad_norm": 11.211869239807129, + "learning_rate": 5e-05, + "loss": 0.9951, + "num_input_tokens_seen": 229015488, + "step": 3423 + }, + { + "epoch": 0.3884255319148936, + "loss": 1.03031587600708, + "loss_ce": 0.005901741329580545, + "loss_iou": 0.427734375, + "loss_num": 0.033935546875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 229015488, + "step": 3423 + }, + { + "epoch": 0.3885390070921986, + "grad_norm": 14.665229797363281, + "learning_rate": 5e-05, + "loss": 1.1093, + "num_input_tokens_seen": 229082352, + "step": 3424 + }, + { + "epoch": 0.3885390070921986, + "loss": 0.9931819438934326, + "loss_ce": 0.00392414815723896, + "loss_iou": 0.43359375, + "loss_num": 0.02392578125, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 229082352, + "step": 3424 + }, + { + "epoch": 0.38865248226950355, + "grad_norm": 24.56360626220703, + "learning_rate": 5e-05, + "loss": 1.2215, + "num_input_tokens_seen": 229148808, + "step": 3425 + }, + { + "epoch": 0.38865248226950355, + "loss": 1.383383870124817, + "loss_ce": 0.0035010322462767363, + "loss_iou": 0.55078125, + "loss_num": 0.055908203125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 229148808, + "step": 3425 + }, + { + "epoch": 0.38876595744680853, + "grad_norm": 25.45563316345215, + "learning_rate": 5e-05, + "loss": 1.428, + "num_input_tokens_seen": 229215848, + "step": 3426 + }, + { + "epoch": 0.38876595744680853, + "loss": 1.4130306243896484, + "loss_ce": 0.011663485318422318, + "loss_iou": 0.61328125, + "loss_num": 0.03564453125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 229215848, + "step": 3426 + }, + { + "epoch": 0.38887943262411345, + "grad_norm": 26.538549423217773, + "learning_rate": 5e-05, + "loss": 1.1799, + "num_input_tokens_seen": 229282876, + "step": 3427 + }, + { + "epoch": 0.38887943262411345, + "loss": 1.056354284286499, + "loss_ce": 0.0026433179154992104, + "loss_iou": 0.447265625, + "loss_num": 0.031982421875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 229282876, + "step": 3427 + }, + { + "epoch": 0.38899290780141843, + "grad_norm": 22.58030128479004, + "learning_rate": 5e-05, + "loss": 1.0626, + "num_input_tokens_seen": 229349388, + "step": 3428 + }, + { + "epoch": 0.38899290780141843, + "loss": 0.902190625667572, + "loss_ce": 0.006682823412120342, + "loss_iou": 0.369140625, + "loss_num": 0.03125, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 229349388, + "step": 3428 + }, + { + "epoch": 0.3891063829787234, + "grad_norm": 14.839865684509277, + "learning_rate": 5e-05, + "loss": 1.0427, + "num_input_tokens_seen": 229416232, + "step": 3429 + }, + { + "epoch": 0.3891063829787234, + "loss": 0.9753662347793579, + "loss_ce": 0.0041748229414224625, + "loss_iou": 0.419921875, + "loss_num": 0.0260009765625, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 229416232, + "step": 3429 + }, + { + "epoch": 0.3892198581560284, + "grad_norm": 13.039563179016113, + "learning_rate": 5e-05, + "loss": 1.1208, + "num_input_tokens_seen": 229484156, + "step": 3430 + }, + { + "epoch": 0.3892198581560284, + "loss": 1.1976035833358765, + "loss_ce": 0.0025351981166750193, + "loss_iou": 0.48828125, + "loss_num": 0.0439453125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 229484156, + "step": 3430 + }, + { + "epoch": 0.3893333333333333, + "grad_norm": 12.480695724487305, + "learning_rate": 5e-05, + "loss": 1.1723, + "num_input_tokens_seen": 229551428, + "step": 3431 + }, + { + "epoch": 0.3893333333333333, + "loss": 1.0484800338745117, + "loss_ce": 0.005999501794576645, + "loss_iou": 0.439453125, + "loss_num": 0.032958984375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 229551428, + "step": 3431 + }, + { + "epoch": 0.3894468085106383, + "grad_norm": 34.8492546081543, + "learning_rate": 5e-05, + "loss": 1.1802, + "num_input_tokens_seen": 229618196, + "step": 3432 + }, + { + "epoch": 0.3894468085106383, + "loss": 1.1774466037750244, + "loss_ce": 0.010942677035927773, + "loss_iou": 0.5, + "loss_num": 0.032958984375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 229618196, + "step": 3432 + }, + { + "epoch": 0.38956028368794327, + "grad_norm": 28.298166275024414, + "learning_rate": 5e-05, + "loss": 1.3207, + "num_input_tokens_seen": 229684372, + "step": 3433 + }, + { + "epoch": 0.38956028368794327, + "loss": 1.4605591297149658, + "loss_ce": 0.003527894150465727, + "loss_iou": 0.61328125, + "loss_num": 0.046630859375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 229684372, + "step": 3433 + }, + { + "epoch": 0.38967375886524824, + "grad_norm": 24.875362396240234, + "learning_rate": 5e-05, + "loss": 1.1539, + "num_input_tokens_seen": 229751224, + "step": 3434 + }, + { + "epoch": 0.38967375886524824, + "loss": 0.9872658848762512, + "loss_ce": 0.00533226877450943, + "loss_iou": 0.408203125, + "loss_num": 0.033203125, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 229751224, + "step": 3434 + }, + { + "epoch": 0.38978723404255317, + "grad_norm": 64.1444320678711, + "learning_rate": 5e-05, + "loss": 1.1269, + "num_input_tokens_seen": 229818320, + "step": 3435 + }, + { + "epoch": 0.38978723404255317, + "loss": 1.258838176727295, + "loss_ce": 0.003955364227294922, + "loss_iou": 0.5234375, + "loss_num": 0.041748046875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 229818320, + "step": 3435 + }, + { + "epoch": 0.38990070921985814, + "grad_norm": 53.26341247558594, + "learning_rate": 5e-05, + "loss": 1.3167, + "num_input_tokens_seen": 229884668, + "step": 3436 + }, + { + "epoch": 0.38990070921985814, + "loss": 1.4453074932098389, + "loss_ce": 0.005854352377355099, + "loss_iou": 0.58984375, + "loss_num": 0.05224609375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 229884668, + "step": 3436 + }, + { + "epoch": 0.3900141843971631, + "grad_norm": 46.23554229736328, + "learning_rate": 5e-05, + "loss": 1.5181, + "num_input_tokens_seen": 229951784, + "step": 3437 + }, + { + "epoch": 0.3900141843971631, + "loss": 1.4343678951263428, + "loss_ce": 0.007610174361616373, + "loss_iou": 0.578125, + "loss_num": 0.05517578125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 229951784, + "step": 3437 + }, + { + "epoch": 0.3901276595744681, + "grad_norm": 25.437206268310547, + "learning_rate": 5e-05, + "loss": 1.3753, + "num_input_tokens_seen": 230018296, + "step": 3438 + }, + { + "epoch": 0.3901276595744681, + "loss": 1.3472585678100586, + "loss_ce": 0.00887974165380001, + "loss_iou": 0.5859375, + "loss_num": 0.033447265625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 230018296, + "step": 3438 + }, + { + "epoch": 0.390241134751773, + "grad_norm": 44.80255126953125, + "learning_rate": 5e-05, + "loss": 1.2486, + "num_input_tokens_seen": 230085740, + "step": 3439 + }, + { + "epoch": 0.390241134751773, + "loss": 1.35242760181427, + "loss_ce": 0.007212796248495579, + "loss_iou": 0.5546875, + "loss_num": 0.04736328125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 230085740, + "step": 3439 + }, + { + "epoch": 0.390354609929078, + "grad_norm": 32.56498718261719, + "learning_rate": 5e-05, + "loss": 1.1034, + "num_input_tokens_seen": 230152580, + "step": 3440 + }, + { + "epoch": 0.390354609929078, + "loss": 1.0379751920700073, + "loss_ce": 0.002544248476624489, + "loss_iou": 0.439453125, + "loss_num": 0.031494140625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 230152580, + "step": 3440 + }, + { + "epoch": 0.390468085106383, + "grad_norm": 40.48455810546875, + "learning_rate": 5e-05, + "loss": 1.5469, + "num_input_tokens_seen": 230218584, + "step": 3441 + }, + { + "epoch": 0.390468085106383, + "loss": 1.6329929828643799, + "loss_ce": 0.004086756147444248, + "loss_iou": 0.69140625, + "loss_num": 0.05029296875, + "loss_xval": 1.625, + "num_input_tokens_seen": 230218584, + "step": 3441 + }, + { + "epoch": 0.39058156028368796, + "grad_norm": 30.40910530090332, + "learning_rate": 5e-05, + "loss": 1.1254, + "num_input_tokens_seen": 230285172, + "step": 3442 + }, + { + "epoch": 0.39058156028368796, + "loss": 0.9670929908752441, + "loss_ce": 0.005423117429018021, + "loss_iou": 0.40234375, + "loss_num": 0.031494140625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 230285172, + "step": 3442 + }, + { + "epoch": 0.39069503546099293, + "grad_norm": 28.20383644104004, + "learning_rate": 5e-05, + "loss": 1.0977, + "num_input_tokens_seen": 230351304, + "step": 3443 + }, + { + "epoch": 0.39069503546099293, + "loss": 0.9967844486236572, + "loss_ce": 0.00362038379535079, + "loss_iou": 0.412109375, + "loss_num": 0.033935546875, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 230351304, + "step": 3443 + }, + { + "epoch": 0.39080851063829786, + "grad_norm": 58.35902404785156, + "learning_rate": 5e-05, + "loss": 1.3748, + "num_input_tokens_seen": 230418064, + "step": 3444 + }, + { + "epoch": 0.39080851063829786, + "loss": 1.2032957077026367, + "loss_ce": 0.00505349226295948, + "loss_iou": 0.5390625, + "loss_num": 0.024658203125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 230418064, + "step": 3444 + }, + { + "epoch": 0.39092198581560283, + "grad_norm": 47.159244537353516, + "learning_rate": 5e-05, + "loss": 1.5732, + "num_input_tokens_seen": 230484728, + "step": 3445 + }, + { + "epoch": 0.39092198581560283, + "loss": 1.5778322219848633, + "loss_ce": 0.005566595587879419, + "loss_iou": 0.65625, + "loss_num": 0.0517578125, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 230484728, + "step": 3445 + }, + { + "epoch": 0.3910354609929078, + "grad_norm": 19.14703941345215, + "learning_rate": 5e-05, + "loss": 1.143, + "num_input_tokens_seen": 230550580, + "step": 3446 + }, + { + "epoch": 0.3910354609929078, + "loss": 1.0849369764328003, + "loss_ce": 0.00925338827073574, + "loss_iou": 0.478515625, + "loss_num": 0.0238037109375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 230550580, + "step": 3446 + }, + { + "epoch": 0.3911489361702128, + "grad_norm": 25.914337158203125, + "learning_rate": 5e-05, + "loss": 1.0359, + "num_input_tokens_seen": 230616208, + "step": 3447 + }, + { + "epoch": 0.3911489361702128, + "loss": 1.1453471183776855, + "loss_ce": 0.0044779605232179165, + "loss_iou": 0.48828125, + "loss_num": 0.032958984375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 230616208, + "step": 3447 + }, + { + "epoch": 0.3912624113475177, + "grad_norm": 19.188432693481445, + "learning_rate": 5e-05, + "loss": 1.1612, + "num_input_tokens_seen": 230683152, + "step": 3448 + }, + { + "epoch": 0.3912624113475177, + "loss": 1.256508469581604, + "loss_ce": 0.004555363208055496, + "loss_iou": 0.51171875, + "loss_num": 0.04541015625, + "loss_xval": 1.25, + "num_input_tokens_seen": 230683152, + "step": 3448 + }, + { + "epoch": 0.3913758865248227, + "grad_norm": 29.7945613861084, + "learning_rate": 5e-05, + "loss": 1.2898, + "num_input_tokens_seen": 230750012, + "step": 3449 + }, + { + "epoch": 0.3913758865248227, + "loss": 1.5885975360870361, + "loss_ce": 0.003636580891907215, + "loss_iou": 0.62109375, + "loss_num": 0.06884765625, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 230750012, + "step": 3449 + }, + { + "epoch": 0.39148936170212767, + "grad_norm": 30.30754280090332, + "learning_rate": 5e-05, + "loss": 1.425, + "num_input_tokens_seen": 230817792, + "step": 3450 + }, + { + "epoch": 0.39148936170212767, + "loss": 1.3420299291610718, + "loss_ce": 0.004627557471394539, + "loss_iou": 0.56640625, + "loss_num": 0.0400390625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 230817792, + "step": 3450 + }, + { + "epoch": 0.39160283687943265, + "grad_norm": 25.706064224243164, + "learning_rate": 5e-05, + "loss": 1.0631, + "num_input_tokens_seen": 230884128, + "step": 3451 + }, + { + "epoch": 0.39160283687943265, + "loss": 1.0031626224517822, + "loss_ce": 0.008899981155991554, + "loss_iou": 0.44140625, + "loss_num": 0.022216796875, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 230884128, + "step": 3451 + }, + { + "epoch": 0.39171631205673757, + "grad_norm": 26.512062072753906, + "learning_rate": 5e-05, + "loss": 1.4908, + "num_input_tokens_seen": 230952032, + "step": 3452 + }, + { + "epoch": 0.39171631205673757, + "loss": 1.5469563007354736, + "loss_ce": 0.004475854337215424, + "loss_iou": 0.63671875, + "loss_num": 0.0537109375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 230952032, + "step": 3452 + }, + { + "epoch": 0.39182978723404255, + "grad_norm": 29.334016799926758, + "learning_rate": 5e-05, + "loss": 1.006, + "num_input_tokens_seen": 231018800, + "step": 3453 + }, + { + "epoch": 0.39182978723404255, + "loss": 1.174908995628357, + "loss_ce": 0.005963605362921953, + "loss_iou": 0.46484375, + "loss_num": 0.047607421875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 231018800, + "step": 3453 + }, + { + "epoch": 0.3919432624113475, + "grad_norm": 29.87752342224121, + "learning_rate": 5e-05, + "loss": 1.1907, + "num_input_tokens_seen": 231084744, + "step": 3454 + }, + { + "epoch": 0.3919432624113475, + "loss": 1.271141767501831, + "loss_ce": 0.006493373773992062, + "loss_iou": 0.55859375, + "loss_num": 0.029296875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 231084744, + "step": 3454 + }, + { + "epoch": 0.3920567375886525, + "grad_norm": 30.28527069091797, + "learning_rate": 5e-05, + "loss": 1.2627, + "num_input_tokens_seen": 231150744, + "step": 3455 + }, + { + "epoch": 0.3920567375886525, + "loss": 1.3392860889434814, + "loss_ce": 0.008231507614254951, + "loss_iou": 0.5, + "loss_num": 0.06591796875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 231150744, + "step": 3455 + }, + { + "epoch": 0.3921702127659574, + "grad_norm": 22.653705596923828, + "learning_rate": 5e-05, + "loss": 1.4176, + "num_input_tokens_seen": 231218676, + "step": 3456 + }, + { + "epoch": 0.3921702127659574, + "loss": 1.5382219552993774, + "loss_ce": 0.0040422468446195126, + "loss_iou": 0.64453125, + "loss_num": 0.048828125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 231218676, + "step": 3456 + }, + { + "epoch": 0.3922836879432624, + "grad_norm": 23.95098876953125, + "learning_rate": 5e-05, + "loss": 1.1161, + "num_input_tokens_seen": 231284700, + "step": 3457 + }, + { + "epoch": 0.3922836879432624, + "loss": 1.0436851978302002, + "loss_ce": 0.006575810723006725, + "loss_iou": 0.4453125, + "loss_num": 0.029541015625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 231284700, + "step": 3457 + }, + { + "epoch": 0.3923971631205674, + "grad_norm": 34.64886474609375, + "learning_rate": 5e-05, + "loss": 1.1647, + "num_input_tokens_seen": 231351348, + "step": 3458 + }, + { + "epoch": 0.3923971631205674, + "loss": 1.1460121870040894, + "loss_ce": 0.007828611880540848, + "loss_iou": 0.443359375, + "loss_num": 0.050048828125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 231351348, + "step": 3458 + }, + { + "epoch": 0.39251063829787236, + "grad_norm": 31.15568733215332, + "learning_rate": 5e-05, + "loss": 1.0732, + "num_input_tokens_seen": 231418588, + "step": 3459 + }, + { + "epoch": 0.39251063829787236, + "loss": 1.038590431213379, + "loss_ce": 0.006363862659782171, + "loss_iou": 0.46484375, + "loss_num": 0.0203857421875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 231418588, + "step": 3459 + }, + { + "epoch": 0.3926241134751773, + "grad_norm": 22.66976547241211, + "learning_rate": 5e-05, + "loss": 1.3576, + "num_input_tokens_seen": 231485868, + "step": 3460 + }, + { + "epoch": 0.3926241134751773, + "loss": 1.1887837648391724, + "loss_ce": 0.006654791533946991, + "loss_iou": 0.5234375, + "loss_num": 0.02734375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 231485868, + "step": 3460 + }, + { + "epoch": 0.39273758865248226, + "grad_norm": 32.20545196533203, + "learning_rate": 5e-05, + "loss": 1.1353, + "num_input_tokens_seen": 231553448, + "step": 3461 + }, + { + "epoch": 0.39273758865248226, + "loss": 0.9140375256538391, + "loss_ce": 0.0038813177961856127, + "loss_iou": 0.376953125, + "loss_num": 0.031005859375, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 231553448, + "step": 3461 + }, + { + "epoch": 0.39285106382978724, + "grad_norm": 24.693931579589844, + "learning_rate": 5e-05, + "loss": 1.4853, + "num_input_tokens_seen": 231619624, + "step": 3462 + }, + { + "epoch": 0.39285106382978724, + "loss": 1.5743153095245361, + "loss_ce": 0.005955889821052551, + "loss_iou": 0.62109375, + "loss_num": 0.064453125, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 231619624, + "step": 3462 + }, + { + "epoch": 0.3929645390070922, + "grad_norm": 35.78971481323242, + "learning_rate": 5e-05, + "loss": 1.1738, + "num_input_tokens_seen": 231687132, + "step": 3463 + }, + { + "epoch": 0.3929645390070922, + "loss": 1.0781127214431763, + "loss_ce": 0.004382280167192221, + "loss_iou": 0.482421875, + "loss_num": 0.022216796875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 231687132, + "step": 3463 + }, + { + "epoch": 0.39307801418439714, + "grad_norm": 26.132108688354492, + "learning_rate": 5e-05, + "loss": 1.3879, + "num_input_tokens_seen": 231754612, + "step": 3464 + }, + { + "epoch": 0.39307801418439714, + "loss": 1.2984856367111206, + "loss_ce": 0.0069817244075238705, + "loss_iou": 0.5546875, + "loss_num": 0.035888671875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 231754612, + "step": 3464 + }, + { + "epoch": 0.3931914893617021, + "grad_norm": 26.209335327148438, + "learning_rate": 5e-05, + "loss": 1.1503, + "num_input_tokens_seen": 231821236, + "step": 3465 + }, + { + "epoch": 0.3931914893617021, + "loss": 1.0039582252502441, + "loss_ce": 0.005911325104534626, + "loss_iou": 0.40625, + "loss_num": 0.036865234375, + "loss_xval": 1.0, + "num_input_tokens_seen": 231821236, + "step": 3465 + }, + { + "epoch": 0.3933049645390071, + "grad_norm": 21.71442985534668, + "learning_rate": 5e-05, + "loss": 1.0547, + "num_input_tokens_seen": 231887924, + "step": 3466 + }, + { + "epoch": 0.3933049645390071, + "loss": 1.0318329334259033, + "loss_ce": 0.00448907446116209, + "loss_iou": 0.451171875, + "loss_num": 0.025146484375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 231887924, + "step": 3466 + }, + { + "epoch": 0.3934184397163121, + "grad_norm": 35.267887115478516, + "learning_rate": 5e-05, + "loss": 1.3353, + "num_input_tokens_seen": 231954964, + "step": 3467 + }, + { + "epoch": 0.3934184397163121, + "loss": 1.3277287483215332, + "loss_ce": 0.00497481832280755, + "loss_iou": 0.5546875, + "loss_num": 0.04345703125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 231954964, + "step": 3467 + }, + { + "epoch": 0.393531914893617, + "grad_norm": 47.6064453125, + "learning_rate": 5e-05, + "loss": 1.3635, + "num_input_tokens_seen": 232021868, + "step": 3468 + }, + { + "epoch": 0.393531914893617, + "loss": 1.3049595355987549, + "loss_ce": 0.0032017033081501722, + "loss_iou": 0.55078125, + "loss_num": 0.039794921875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 232021868, + "step": 3468 + }, + { + "epoch": 0.393645390070922, + "grad_norm": 21.286537170410156, + "learning_rate": 5e-05, + "loss": 1.1104, + "num_input_tokens_seen": 232089128, + "step": 3469 + }, + { + "epoch": 0.393645390070922, + "loss": 1.0722415447235107, + "loss_ce": 0.0038821997586637735, + "loss_iou": 0.478515625, + "loss_num": 0.0224609375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 232089128, + "step": 3469 + }, + { + "epoch": 0.39375886524822695, + "grad_norm": 25.152610778808594, + "learning_rate": 5e-05, + "loss": 1.2749, + "num_input_tokens_seen": 232156128, + "step": 3470 + }, + { + "epoch": 0.39375886524822695, + "loss": 1.2185182571411133, + "loss_ce": 0.004651118069887161, + "loss_iou": 0.515625, + "loss_num": 0.036865234375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 232156128, + "step": 3470 + }, + { + "epoch": 0.39387234042553193, + "grad_norm": 32.277076721191406, + "learning_rate": 5e-05, + "loss": 1.5017, + "num_input_tokens_seen": 232223488, + "step": 3471 + }, + { + "epoch": 0.39387234042553193, + "loss": 1.4640353918075562, + "loss_ce": 0.006515851244330406, + "loss_iou": 0.65234375, + "loss_num": 0.0308837890625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 232223488, + "step": 3471 + }, + { + "epoch": 0.39398581560283685, + "grad_norm": 76.92194366455078, + "learning_rate": 5e-05, + "loss": 1.5268, + "num_input_tokens_seen": 232290928, + "step": 3472 + }, + { + "epoch": 0.39398581560283685, + "loss": 1.6025234460830688, + "loss_ce": 0.007796903606504202, + "loss_iou": 0.6875, + "loss_num": 0.04443359375, + "loss_xval": 1.59375, + "num_input_tokens_seen": 232290928, + "step": 3472 + }, + { + "epoch": 0.39409929078014183, + "grad_norm": 21.154918670654297, + "learning_rate": 5e-05, + "loss": 1.1377, + "num_input_tokens_seen": 232358188, + "step": 3473 + }, + { + "epoch": 0.39409929078014183, + "loss": 1.2154048681259155, + "loss_ce": 0.008861895650625229, + "loss_iou": 0.50390625, + "loss_num": 0.039306640625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 232358188, + "step": 3473 + }, + { + "epoch": 0.3942127659574468, + "grad_norm": 27.553316116333008, + "learning_rate": 5e-05, + "loss": 1.2866, + "num_input_tokens_seen": 232424732, + "step": 3474 + }, + { + "epoch": 0.3942127659574468, + "loss": 1.1371413469314575, + "loss_ce": 0.005305444356054068, + "loss_iou": 0.482421875, + "loss_num": 0.033203125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 232424732, + "step": 3474 + }, + { + "epoch": 0.3943262411347518, + "grad_norm": 38.09130859375, + "learning_rate": 5e-05, + "loss": 1.0733, + "num_input_tokens_seen": 232492032, + "step": 3475 + }, + { + "epoch": 0.3943262411347518, + "loss": 1.006328821182251, + "loss_ce": 0.002910793060436845, + "loss_iou": 0.423828125, + "loss_num": 0.031005859375, + "loss_xval": 1.0, + "num_input_tokens_seen": 232492032, + "step": 3475 + }, + { + "epoch": 0.39443971631205677, + "grad_norm": 25.438507080078125, + "learning_rate": 5e-05, + "loss": 1.3706, + "num_input_tokens_seen": 232558120, + "step": 3476 + }, + { + "epoch": 0.39443971631205677, + "loss": 1.5469386577606201, + "loss_ce": 0.005923077464103699, + "loss_iou": 0.66015625, + "loss_num": 0.043701171875, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 232558120, + "step": 3476 + }, + { + "epoch": 0.3945531914893617, + "grad_norm": 19.32138442993164, + "learning_rate": 5e-05, + "loss": 1.1639, + "num_input_tokens_seen": 232624996, + "step": 3477 + }, + { + "epoch": 0.3945531914893617, + "loss": 1.1743805408477783, + "loss_ce": 0.006899992935359478, + "loss_iou": 0.451171875, + "loss_num": 0.052978515625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 232624996, + "step": 3477 + }, + { + "epoch": 0.39466666666666667, + "grad_norm": 23.389644622802734, + "learning_rate": 5e-05, + "loss": 1.2274, + "num_input_tokens_seen": 232691740, + "step": 3478 + }, + { + "epoch": 0.39466666666666667, + "loss": 1.224210262298584, + "loss_ce": 0.007413352839648724, + "loss_iou": 0.46484375, + "loss_num": 0.0576171875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 232691740, + "step": 3478 + }, + { + "epoch": 0.39478014184397164, + "grad_norm": 31.54852867126465, + "learning_rate": 5e-05, + "loss": 1.2893, + "num_input_tokens_seen": 232758744, + "step": 3479 + }, + { + "epoch": 0.39478014184397164, + "loss": 1.1507163047790527, + "loss_ce": 0.0027671577408909798, + "loss_iou": 0.49609375, + "loss_num": 0.031494140625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 232758744, + "step": 3479 + }, + { + "epoch": 0.3948936170212766, + "grad_norm": 70.9140853881836, + "learning_rate": 5e-05, + "loss": 1.5633, + "num_input_tokens_seen": 232826336, + "step": 3480 + }, + { + "epoch": 0.3948936170212766, + "loss": 1.5803773403167725, + "loss_ce": 0.007623415440320969, + "loss_iou": 0.64453125, + "loss_num": 0.05712890625, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 232826336, + "step": 3480 + }, + { + "epoch": 0.39500709219858154, + "grad_norm": 18.728710174560547, + "learning_rate": 5e-05, + "loss": 0.9751, + "num_input_tokens_seen": 232892668, + "step": 3481 + }, + { + "epoch": 0.39500709219858154, + "loss": 0.9600925445556641, + "loss_ce": 0.005014444701373577, + "loss_iou": 0.412109375, + "loss_num": 0.0262451171875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 232892668, + "step": 3481 + }, + { + "epoch": 0.3951205673758865, + "grad_norm": 33.72964096069336, + "learning_rate": 5e-05, + "loss": 1.2755, + "num_input_tokens_seen": 232959468, + "step": 3482 + }, + { + "epoch": 0.3951205673758865, + "loss": 1.3484759330749512, + "loss_ce": 0.0032610949128866196, + "loss_iou": 0.5546875, + "loss_num": 0.047119140625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 232959468, + "step": 3482 + }, + { + "epoch": 0.3952340425531915, + "grad_norm": 74.19390869140625, + "learning_rate": 5e-05, + "loss": 1.2383, + "num_input_tokens_seen": 233026304, + "step": 3483 + }, + { + "epoch": 0.3952340425531915, + "loss": 1.2998806238174438, + "loss_ce": 0.004958729725331068, + "loss_iou": 0.5546875, + "loss_num": 0.036865234375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 233026304, + "step": 3483 + }, + { + "epoch": 0.3953475177304965, + "grad_norm": 28.78199577331543, + "learning_rate": 5e-05, + "loss": 1.2996, + "num_input_tokens_seen": 233093208, + "step": 3484 + }, + { + "epoch": 0.3953475177304965, + "loss": 1.2609636783599854, + "loss_ce": 0.002174614928662777, + "loss_iou": 0.5625, + "loss_num": 0.026611328125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 233093208, + "step": 3484 + }, + { + "epoch": 0.3954609929078014, + "grad_norm": 22.857519149780273, + "learning_rate": 5e-05, + "loss": 1.1916, + "num_input_tokens_seen": 233160812, + "step": 3485 + }, + { + "epoch": 0.3954609929078014, + "loss": 1.2008591890335083, + "loss_ce": 0.0040818629786372185, + "loss_iou": 0.51171875, + "loss_num": 0.035400390625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 233160812, + "step": 3485 + }, + { + "epoch": 0.3955744680851064, + "grad_norm": 19.151853561401367, + "learning_rate": 5e-05, + "loss": 1.2796, + "num_input_tokens_seen": 233227440, + "step": 3486 + }, + { + "epoch": 0.3955744680851064, + "loss": 1.3409000635147095, + "loss_ce": 0.003986035473644733, + "loss_iou": 0.51953125, + "loss_num": 0.058837890625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 233227440, + "step": 3486 + }, + { + "epoch": 0.39568794326241136, + "grad_norm": 422.7994384765625, + "learning_rate": 5e-05, + "loss": 1.1544, + "num_input_tokens_seen": 233294504, + "step": 3487 + }, + { + "epoch": 0.39568794326241136, + "loss": 1.2754268646240234, + "loss_ce": 0.008825374767184258, + "loss_iou": 0.498046875, + "loss_num": 0.053955078125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 233294504, + "step": 3487 + }, + { + "epoch": 0.39580141843971633, + "grad_norm": 22.393190383911133, + "learning_rate": 5e-05, + "loss": 1.2087, + "num_input_tokens_seen": 233362224, + "step": 3488 + }, + { + "epoch": 0.39580141843971633, + "loss": 1.3061952590942383, + "loss_ce": 0.003460770472884178, + "loss_iou": 0.55859375, + "loss_num": 0.037353515625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 233362224, + "step": 3488 + }, + { + "epoch": 0.39591489361702126, + "grad_norm": 33.666011810302734, + "learning_rate": 5e-05, + "loss": 1.2879, + "num_input_tokens_seen": 233429488, + "step": 3489 + }, + { + "epoch": 0.39591489361702126, + "loss": 1.3675484657287598, + "loss_ce": 0.009150032885372639, + "loss_iou": 0.55078125, + "loss_num": 0.051025390625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 233429488, + "step": 3489 + }, + { + "epoch": 0.39602836879432624, + "grad_norm": 34.1074333190918, + "learning_rate": 5e-05, + "loss": 1.4245, + "num_input_tokens_seen": 233496456, + "step": 3490 + }, + { + "epoch": 0.39602836879432624, + "loss": 1.354030966758728, + "loss_ce": 0.010280933231115341, + "loss_iou": 0.5703125, + "loss_num": 0.0400390625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 233496456, + "step": 3490 + }, + { + "epoch": 0.3961418439716312, + "grad_norm": 25.802743911743164, + "learning_rate": 5e-05, + "loss": 1.015, + "num_input_tokens_seen": 233561868, + "step": 3491 + }, + { + "epoch": 0.3961418439716312, + "loss": 1.2948482036590576, + "loss_ce": 0.004320905543863773, + "loss_iou": 0.5390625, + "loss_num": 0.04296875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 233561868, + "step": 3491 + }, + { + "epoch": 0.3962553191489362, + "grad_norm": 33.5124626159668, + "learning_rate": 5e-05, + "loss": 1.2774, + "num_input_tokens_seen": 233628860, + "step": 3492 + }, + { + "epoch": 0.3962553191489362, + "loss": 1.0597399473190308, + "loss_ce": 0.007493862882256508, + "loss_iou": 0.404296875, + "loss_num": 0.048828125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 233628860, + "step": 3492 + }, + { + "epoch": 0.3963687943262411, + "grad_norm": 27.936323165893555, + "learning_rate": 5e-05, + "loss": 1.1265, + "num_input_tokens_seen": 233695936, + "step": 3493 + }, + { + "epoch": 0.3963687943262411, + "loss": 1.0017365217208862, + "loss_ce": 0.002713085152208805, + "loss_iou": 0.4453125, + "loss_num": 0.0220947265625, + "loss_xval": 1.0, + "num_input_tokens_seen": 233695936, + "step": 3493 + }, + { + "epoch": 0.3964822695035461, + "grad_norm": 38.0588264465332, + "learning_rate": 5e-05, + "loss": 1.3145, + "num_input_tokens_seen": 233763432, + "step": 3494 + }, + { + "epoch": 0.3964822695035461, + "loss": 1.1918222904205322, + "loss_ce": 0.006275464780628681, + "loss_iou": 0.4765625, + "loss_num": 0.046142578125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 233763432, + "step": 3494 + }, + { + "epoch": 0.39659574468085107, + "grad_norm": 28.70833969116211, + "learning_rate": 5e-05, + "loss": 1.0877, + "num_input_tokens_seen": 233830384, + "step": 3495 + }, + { + "epoch": 0.39659574468085107, + "loss": 1.1393078565597534, + "loss_ce": 0.0021008290350437164, + "loss_iou": 0.5078125, + "loss_num": 0.02490234375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 233830384, + "step": 3495 + }, + { + "epoch": 0.39670921985815605, + "grad_norm": 24.982868194580078, + "learning_rate": 5e-05, + "loss": 1.3543, + "num_input_tokens_seen": 233898204, + "step": 3496 + }, + { + "epoch": 0.39670921985815605, + "loss": 1.463196039199829, + "loss_ce": 0.004211696330457926, + "loss_iou": 0.640625, + "loss_num": 0.0361328125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 233898204, + "step": 3496 + }, + { + "epoch": 0.39682269503546097, + "grad_norm": 30.866601943969727, + "learning_rate": 5e-05, + "loss": 1.3237, + "num_input_tokens_seen": 233965912, + "step": 3497 + }, + { + "epoch": 0.39682269503546097, + "loss": 1.4863240718841553, + "loss_ce": 0.0058552855625748634, + "loss_iou": 0.60546875, + "loss_num": 0.053955078125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 233965912, + "step": 3497 + }, + { + "epoch": 0.39693617021276595, + "grad_norm": 33.04235076904297, + "learning_rate": 5e-05, + "loss": 1.0972, + "num_input_tokens_seen": 234032848, + "step": 3498 + }, + { + "epoch": 0.39693617021276595, + "loss": 1.1054202318191528, + "loss_ce": 0.006299071479588747, + "loss_iou": 0.48046875, + "loss_num": 0.0277099609375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 234032848, + "step": 3498 + }, + { + "epoch": 0.3970496453900709, + "grad_norm": 41.034385681152344, + "learning_rate": 5e-05, + "loss": 1.5238, + "num_input_tokens_seen": 234099852, + "step": 3499 + }, + { + "epoch": 0.3970496453900709, + "loss": 1.3332200050354004, + "loss_ce": 0.0060715219005942345, + "loss_iou": 0.52734375, + "loss_num": 0.0546875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 234099852, + "step": 3499 + }, + { + "epoch": 0.3971631205673759, + "grad_norm": 26.27503776550293, + "learning_rate": 5e-05, + "loss": 1.4556, + "num_input_tokens_seen": 234166452, + "step": 3500 + }, + { + "epoch": 0.3971631205673759, + "eval_seeclick_CIoU": 0.3828083276748657, + "eval_seeclick_GIoU": 0.3673580437898636, + "eval_seeclick_IoU": 0.4724496304988861, + "eval_seeclick_MAE_all": 0.15279089659452438, + "eval_seeclick_MAE_h": 0.06342875957489014, + "eval_seeclick_MAE_w": 0.08839592710137367, + "eval_seeclick_MAE_x_boxes": 0.23941389471292496, + "eval_seeclick_MAE_y_boxes": 0.13173523545265198, + "eval_seeclick_NUM_probability": 0.9998590052127838, + "eval_seeclick_inside_bbox": 0.612500011920929, + "eval_seeclick_loss": 2.4411842823028564, + "eval_seeclick_loss_ce": 0.013861890882253647, + "eval_seeclick_loss_iou": 0.85888671875, + "eval_seeclick_loss_num": 0.1485443115234375, + "eval_seeclick_loss_xval": 2.45849609375, + "eval_seeclick_runtime": 65.2269, + "eval_seeclick_samples_per_second": 0.721, + "eval_seeclick_steps_per_second": 0.031, + "num_input_tokens_seen": 234166452, + "step": 3500 + }, + { + "epoch": 0.3971631205673759, + "eval_icons_CIoU": 0.49523258209228516, + "eval_icons_GIoU": 0.49590863287448883, + "eval_icons_IoU": 0.5311029553413391, + "eval_icons_MAE_all": 0.1428934708237648, + "eval_icons_MAE_h": 0.12516894564032555, + "eval_icons_MAE_w": 0.1452162116765976, + "eval_icons_MAE_x_boxes": 0.09285186976194382, + "eval_icons_MAE_y_boxes": 0.07766019552946091, + "eval_icons_NUM_probability": 0.9999511241912842, + "eval_icons_inside_bbox": 0.7638888955116272, + "eval_icons_loss": 2.4981696605682373, + "eval_icons_loss_ce": 4.569304655888118e-05, + "eval_icons_loss_iou": 0.922119140625, + "eval_icons_loss_num": 0.144287109375, + "eval_icons_loss_xval": 2.56494140625, + "eval_icons_runtime": 77.9448, + "eval_icons_samples_per_second": 0.641, + "eval_icons_steps_per_second": 0.026, + "num_input_tokens_seen": 234166452, + "step": 3500 + }, + { + "epoch": 0.3971631205673759, + "eval_screenspot_CIoU": 0.28773043553034466, + "eval_screenspot_GIoU": 0.26793476939201355, + "eval_screenspot_IoU": 0.3978353838125865, + "eval_screenspot_MAE_all": 0.20118204255898794, + "eval_screenspot_MAE_h": 0.09572045505046844, + "eval_screenspot_MAE_w": 0.15273178865512213, + "eval_screenspot_MAE_x_boxes": 0.31308706601460773, + "eval_screenspot_MAE_y_boxes": 0.13868616273005804, + "eval_screenspot_NUM_probability": 0.9999252557754517, + "eval_screenspot_inside_bbox": 0.5704166690508524, + "eval_screenspot_loss": 2.9200150966644287, + "eval_screenspot_loss_ce": 0.01661480280260245, + "eval_screenspot_loss_iou": 0.9474283854166666, + "eval_screenspot_loss_num": 0.20556640625, + "eval_screenspot_loss_xval": 2.9251302083333335, + "eval_screenspot_runtime": 118.2722, + "eval_screenspot_samples_per_second": 0.753, + "eval_screenspot_steps_per_second": 0.025, + "num_input_tokens_seen": 234166452, + "step": 3500 + }, + { + "epoch": 0.3971631205673759, + "eval_compot_CIoU": 0.34264305979013443, + "eval_compot_GIoU": 0.2976820059120655, + "eval_compot_IoU": 0.4239781051874161, + "eval_compot_MAE_all": 0.19991843774914742, + "eval_compot_MAE_h": 0.10325559414923191, + "eval_compot_MAE_w": 0.19959226250648499, + "eval_compot_MAE_x_boxes": 0.19732292741537094, + "eval_compot_MAE_y_boxes": 0.1327148824930191, + "eval_compot_NUM_probability": 0.9999364912509918, + "eval_compot_inside_bbox": 0.5364583432674408, + "eval_compot_loss": 2.980023145675659, + "eval_compot_loss_ce": 0.005893677240237594, + "eval_compot_loss_iou": 0.947509765625, + "eval_compot_loss_num": 0.23949432373046875, + "eval_compot_loss_xval": 3.0927734375, + "eval_compot_runtime": 70.9788, + "eval_compot_samples_per_second": 0.704, + "eval_compot_steps_per_second": 0.028, + "num_input_tokens_seen": 234166452, + "step": 3500 + }, + { + "epoch": 0.3971631205673759, + "loss": 2.769190788269043, + "loss_ce": 0.005518749356269836, + "loss_iou": 0.88671875, + "loss_num": 0.1982421875, + "loss_xval": 2.765625, + "num_input_tokens_seen": 234166452, + "step": 3500 + }, + { + "epoch": 0.3972765957446808, + "grad_norm": 37.518455505371094, + "learning_rate": 5e-05, + "loss": 1.1518, + "num_input_tokens_seen": 234233172, + "step": 3501 + }, + { + "epoch": 0.3972765957446808, + "loss": 1.116152286529541, + "loss_ce": 0.00775383785367012, + "loss_iou": 0.46875, + "loss_num": 0.0341796875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 234233172, + "step": 3501 + }, + { + "epoch": 0.3973900709219858, + "grad_norm": 33.05767059326172, + "learning_rate": 5e-05, + "loss": 1.1405, + "num_input_tokens_seen": 234300080, + "step": 3502 + }, + { + "epoch": 0.3973900709219858, + "loss": 1.1081159114837646, + "loss_ce": 0.006065139546990395, + "loss_iou": 0.45703125, + "loss_num": 0.037841796875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 234300080, + "step": 3502 + }, + { + "epoch": 0.3975035460992908, + "grad_norm": 30.863174438476562, + "learning_rate": 5e-05, + "loss": 1.2056, + "num_input_tokens_seen": 234367600, + "step": 3503 + }, + { + "epoch": 0.3975035460992908, + "loss": 1.2413583993911743, + "loss_ce": 0.007471662014722824, + "loss_iou": 0.494140625, + "loss_num": 0.049072265625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 234367600, + "step": 3503 + }, + { + "epoch": 0.39761702127659576, + "grad_norm": 35.30138397216797, + "learning_rate": 5e-05, + "loss": 1.1579, + "num_input_tokens_seen": 234434284, + "step": 3504 + }, + { + "epoch": 0.39761702127659576, + "loss": 1.2577183246612549, + "loss_ce": 0.008694866672158241, + "loss_iou": 0.490234375, + "loss_num": 0.053466796875, + "loss_xval": 1.25, + "num_input_tokens_seen": 234434284, + "step": 3504 + }, + { + "epoch": 0.3977304964539007, + "grad_norm": 24.798555374145508, + "learning_rate": 5e-05, + "loss": 1.4654, + "num_input_tokens_seen": 234501212, + "step": 3505 + }, + { + "epoch": 0.3977304964539007, + "loss": 1.5239920616149902, + "loss_ce": 0.004460799973458052, + "loss_iou": 0.6171875, + "loss_num": 0.057373046875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 234501212, + "step": 3505 + }, + { + "epoch": 0.39784397163120566, + "grad_norm": 15.76697063446045, + "learning_rate": 5e-05, + "loss": 1.345, + "num_input_tokens_seen": 234567632, + "step": 3506 + }, + { + "epoch": 0.39784397163120566, + "loss": 1.4547853469848633, + "loss_ce": 0.004101792350411415, + "loss_iou": 0.58984375, + "loss_num": 0.053466796875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 234567632, + "step": 3506 + }, + { + "epoch": 0.39795744680851064, + "grad_norm": 19.389081954956055, + "learning_rate": 5e-05, + "loss": 1.2036, + "num_input_tokens_seen": 234634092, + "step": 3507 + }, + { + "epoch": 0.39795744680851064, + "loss": 1.41050124168396, + "loss_ce": 0.0069367289543151855, + "loss_iou": 0.5703125, + "loss_num": 0.053466796875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 234634092, + "step": 3507 + }, + { + "epoch": 0.3980709219858156, + "grad_norm": 42.15480041503906, + "learning_rate": 5e-05, + "loss": 1.4479, + "num_input_tokens_seen": 234701020, + "step": 3508 + }, + { + "epoch": 0.3980709219858156, + "loss": 1.5008060932159424, + "loss_ce": 0.002271008677780628, + "loss_iou": 0.6015625, + "loss_num": 0.058837890625, + "loss_xval": 1.5, + "num_input_tokens_seen": 234701020, + "step": 3508 + }, + { + "epoch": 0.39818439716312054, + "grad_norm": 21.38660430908203, + "learning_rate": 5e-05, + "loss": 1.4812, + "num_input_tokens_seen": 234766532, + "step": 3509 + }, + { + "epoch": 0.39818439716312054, + "loss": 1.1766963005065918, + "loss_ce": 0.005096063949167728, + "loss_iou": 0.5, + "loss_num": 0.03369140625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 234766532, + "step": 3509 + }, + { + "epoch": 0.3982978723404255, + "grad_norm": 17.39634132385254, + "learning_rate": 5e-05, + "loss": 1.2233, + "num_input_tokens_seen": 234833324, + "step": 3510 + }, + { + "epoch": 0.3982978723404255, + "loss": 1.204797625541687, + "loss_ce": 0.007043754681944847, + "loss_iou": 0.486328125, + "loss_num": 0.045166015625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 234833324, + "step": 3510 + }, + { + "epoch": 0.3984113475177305, + "grad_norm": 22.966541290283203, + "learning_rate": 5e-05, + "loss": 1.2051, + "num_input_tokens_seen": 234898836, + "step": 3511 + }, + { + "epoch": 0.3984113475177305, + "loss": 1.3384308815002441, + "loss_ce": 0.003469967283308506, + "loss_iou": 0.5703125, + "loss_num": 0.0390625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 234898836, + "step": 3511 + }, + { + "epoch": 0.3985248226950355, + "grad_norm": 27.30621910095215, + "learning_rate": 5e-05, + "loss": 1.3966, + "num_input_tokens_seen": 234965720, + "step": 3512 + }, + { + "epoch": 0.3985248226950355, + "loss": 1.6285288333892822, + "loss_ce": 0.007435109466314316, + "loss_iou": 0.6796875, + "loss_num": 0.052734375, + "loss_xval": 1.625, + "num_input_tokens_seen": 234965720, + "step": 3512 + }, + { + "epoch": 0.39863829787234045, + "grad_norm": 42.04314041137695, + "learning_rate": 5e-05, + "loss": 1.2496, + "num_input_tokens_seen": 235033532, + "step": 3513 + }, + { + "epoch": 0.39863829787234045, + "loss": 1.0896127223968506, + "loss_ce": 0.003186932299286127, + "loss_iou": 0.484375, + "loss_num": 0.0238037109375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 235033532, + "step": 3513 + }, + { + "epoch": 0.3987517730496454, + "grad_norm": 20.516225814819336, + "learning_rate": 5e-05, + "loss": 1.2943, + "num_input_tokens_seen": 235100804, + "step": 3514 + }, + { + "epoch": 0.3987517730496454, + "loss": 1.5250914096832275, + "loss_ce": 0.004583672154694796, + "loss_iou": 0.62109375, + "loss_num": 0.056396484375, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 235100804, + "step": 3514 + }, + { + "epoch": 0.39886524822695035, + "grad_norm": 49.514122009277344, + "learning_rate": 5e-05, + "loss": 1.1542, + "num_input_tokens_seen": 235167120, + "step": 3515 + }, + { + "epoch": 0.39886524822695035, + "loss": 1.1032328605651855, + "loss_ce": 0.01143600419163704, + "loss_iou": 0.498046875, + "loss_num": 0.01904296875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 235167120, + "step": 3515 + }, + { + "epoch": 0.39897872340425533, + "grad_norm": 29.354022979736328, + "learning_rate": 5e-05, + "loss": 1.4153, + "num_input_tokens_seen": 235235600, + "step": 3516 + }, + { + "epoch": 0.39897872340425533, + "loss": 1.551264762878418, + "loss_ce": 0.005366330500692129, + "loss_iou": 0.66796875, + "loss_num": 0.041748046875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 235235600, + "step": 3516 + }, + { + "epoch": 0.3990921985815603, + "grad_norm": 19.68077850341797, + "learning_rate": 5e-05, + "loss": 1.185, + "num_input_tokens_seen": 235303108, + "step": 3517 + }, + { + "epoch": 0.3990921985815603, + "loss": 1.1777793169021606, + "loss_ce": 0.008345715701580048, + "loss_iou": 0.490234375, + "loss_num": 0.038330078125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 235303108, + "step": 3517 + }, + { + "epoch": 0.39920567375886523, + "grad_norm": 25.14664649963379, + "learning_rate": 5e-05, + "loss": 1.2606, + "num_input_tokens_seen": 235370748, + "step": 3518 + }, + { + "epoch": 0.39920567375886523, + "loss": 1.1468007564544678, + "loss_ce": 0.005199265666306019, + "loss_iou": 0.4609375, + "loss_num": 0.04443359375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 235370748, + "step": 3518 + }, + { + "epoch": 0.3993191489361702, + "grad_norm": 87.7977066040039, + "learning_rate": 5e-05, + "loss": 1.0689, + "num_input_tokens_seen": 235437048, + "step": 3519 + }, + { + "epoch": 0.3993191489361702, + "loss": 1.1596531867980957, + "loss_ce": 0.007797667756676674, + "loss_iou": 0.5, + "loss_num": 0.030517578125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 235437048, + "step": 3519 + }, + { + "epoch": 0.3994326241134752, + "grad_norm": 26.093265533447266, + "learning_rate": 5e-05, + "loss": 1.3398, + "num_input_tokens_seen": 235503164, + "step": 3520 + }, + { + "epoch": 0.3994326241134752, + "loss": 1.356345295906067, + "loss_ce": 0.004294475540518761, + "loss_iou": 0.609375, + "loss_num": 0.02587890625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 235503164, + "step": 3520 + }, + { + "epoch": 0.39954609929078017, + "grad_norm": 11.990647315979004, + "learning_rate": 5e-05, + "loss": 1.1995, + "num_input_tokens_seen": 235570296, + "step": 3521 + }, + { + "epoch": 0.39954609929078017, + "loss": 1.1945507526397705, + "loss_ce": 0.0026561575941741467, + "loss_iou": 0.443359375, + "loss_num": 0.06103515625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 235570296, + "step": 3521 + }, + { + "epoch": 0.3996595744680851, + "grad_norm": 17.36941909790039, + "learning_rate": 5e-05, + "loss": 0.8936, + "num_input_tokens_seen": 235638072, + "step": 3522 + }, + { + "epoch": 0.3996595744680851, + "loss": 1.022852897644043, + "loss_ce": 0.008204544894397259, + "loss_iou": 0.416015625, + "loss_num": 0.03662109375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 235638072, + "step": 3522 + }, + { + "epoch": 0.39977304964539007, + "grad_norm": 20.0167236328125, + "learning_rate": 5e-05, + "loss": 1.2244, + "num_input_tokens_seen": 235705616, + "step": 3523 + }, + { + "epoch": 0.39977304964539007, + "loss": 1.152099370956421, + "loss_ce": 0.005614914000034332, + "loss_iou": 0.490234375, + "loss_num": 0.033203125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 235705616, + "step": 3523 + }, + { + "epoch": 0.39988652482269504, + "grad_norm": 32.658329010009766, + "learning_rate": 5e-05, + "loss": 1.1074, + "num_input_tokens_seen": 235772664, + "step": 3524 + }, + { + "epoch": 0.39988652482269504, + "loss": 1.1255723237991333, + "loss_ce": 0.003990269266068935, + "loss_iou": 0.47265625, + "loss_num": 0.035400390625, + "loss_xval": 1.125, + "num_input_tokens_seen": 235772664, + "step": 3524 + }, + { + "epoch": 0.4, + "grad_norm": 71.82789611816406, + "learning_rate": 5e-05, + "loss": 1.4178, + "num_input_tokens_seen": 235839560, + "step": 3525 + }, + { + "epoch": 0.4, + "loss": 1.3571975231170654, + "loss_ce": 0.0036819621454924345, + "loss_iou": 0.58984375, + "loss_num": 0.03515625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 235839560, + "step": 3525 + }, + { + "epoch": 0.40011347517730494, + "grad_norm": 110.2022476196289, + "learning_rate": 5e-05, + "loss": 1.3526, + "num_input_tokens_seen": 235906208, + "step": 3526 + }, + { + "epoch": 0.40011347517730494, + "loss": 1.464845895767212, + "loss_ce": 0.008303040638566017, + "loss_iou": 0.55078125, + "loss_num": 0.07080078125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 235906208, + "step": 3526 + }, + { + "epoch": 0.4002269503546099, + "grad_norm": 26.68907928466797, + "learning_rate": 5e-05, + "loss": 1.1723, + "num_input_tokens_seen": 235973308, + "step": 3527 + }, + { + "epoch": 0.4002269503546099, + "loss": 1.1739726066589355, + "loss_ce": 0.008292607963085175, + "loss_iou": 0.52734375, + "loss_num": 0.0225830078125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 235973308, + "step": 3527 + }, + { + "epoch": 0.4003404255319149, + "grad_norm": 34.716651916503906, + "learning_rate": 5e-05, + "loss": 1.1991, + "num_input_tokens_seen": 236040172, + "step": 3528 + }, + { + "epoch": 0.4003404255319149, + "loss": 1.1961114406585693, + "loss_ce": 0.0037285909056663513, + "loss_iou": 0.52734375, + "loss_num": 0.0281982421875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 236040172, + "step": 3528 + }, + { + "epoch": 0.4004539007092199, + "grad_norm": 27.25352668762207, + "learning_rate": 5e-05, + "loss": 1.5727, + "num_input_tokens_seen": 236106212, + "step": 3529 + }, + { + "epoch": 0.4004539007092199, + "loss": 1.5862317085266113, + "loss_ce": 0.010059811174869537, + "loss_iou": 0.66015625, + "loss_num": 0.0517578125, + "loss_xval": 1.578125, + "num_input_tokens_seen": 236106212, + "step": 3529 + }, + { + "epoch": 0.4005673758865248, + "grad_norm": 13.553520202636719, + "learning_rate": 5e-05, + "loss": 1.1728, + "num_input_tokens_seen": 236173928, + "step": 3530 + }, + { + "epoch": 0.4005673758865248, + "loss": 1.2912700176239014, + "loss_ce": 0.008066865615546703, + "loss_iou": 0.51171875, + "loss_num": 0.0517578125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 236173928, + "step": 3530 + }, + { + "epoch": 0.4006808510638298, + "grad_norm": 24.090877532958984, + "learning_rate": 5e-05, + "loss": 1.1641, + "num_input_tokens_seen": 236241144, + "step": 3531 + }, + { + "epoch": 0.4006808510638298, + "loss": 1.0967373847961426, + "loss_ce": 0.00664955098181963, + "loss_iou": 0.431640625, + "loss_num": 0.045166015625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 236241144, + "step": 3531 + }, + { + "epoch": 0.40079432624113476, + "grad_norm": 42.241432189941406, + "learning_rate": 5e-05, + "loss": 1.3063, + "num_input_tokens_seen": 236308356, + "step": 3532 + }, + { + "epoch": 0.40079432624113476, + "loss": 1.1326243877410889, + "loss_ce": 0.0029856632463634014, + "loss_iou": 0.46875, + "loss_num": 0.038818359375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 236308356, + "step": 3532 + }, + { + "epoch": 0.40090780141843974, + "grad_norm": 21.08864974975586, + "learning_rate": 5e-05, + "loss": 1.3184, + "num_input_tokens_seen": 236375900, + "step": 3533 + }, + { + "epoch": 0.40090780141843974, + "loss": 1.2138171195983887, + "loss_ce": 0.004466519691050053, + "loss_iou": 0.55078125, + "loss_num": 0.02197265625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 236375900, + "step": 3533 + }, + { + "epoch": 0.40102127659574466, + "grad_norm": 17.48419952392578, + "learning_rate": 5e-05, + "loss": 1.2431, + "num_input_tokens_seen": 236442328, + "step": 3534 + }, + { + "epoch": 0.40102127659574466, + "loss": 1.3643200397491455, + "loss_ce": 0.007386333774775267, + "loss_iou": 0.53125, + "loss_num": 0.059814453125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 236442328, + "step": 3534 + }, + { + "epoch": 0.40113475177304964, + "grad_norm": 28.896257400512695, + "learning_rate": 5e-05, + "loss": 1.1815, + "num_input_tokens_seen": 236508628, + "step": 3535 + }, + { + "epoch": 0.40113475177304964, + "loss": 1.2177634239196777, + "loss_ce": 0.004872878082096577, + "loss_iou": 0.5390625, + "loss_num": 0.0277099609375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 236508628, + "step": 3535 + }, + { + "epoch": 0.4012482269503546, + "grad_norm": 24.246976852416992, + "learning_rate": 5e-05, + "loss": 1.334, + "num_input_tokens_seen": 236575560, + "step": 3536 + }, + { + "epoch": 0.4012482269503546, + "loss": 1.5766284465789795, + "loss_ce": 0.007780726067721844, + "loss_iou": 0.63671875, + "loss_num": 0.05908203125, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 236575560, + "step": 3536 + }, + { + "epoch": 0.4013617021276596, + "grad_norm": 36.59306716918945, + "learning_rate": 5e-05, + "loss": 1.2165, + "num_input_tokens_seen": 236643276, + "step": 3537 + }, + { + "epoch": 0.4013617021276596, + "loss": 1.3661067485809326, + "loss_ce": 0.006243427284061909, + "loss_iou": 0.58984375, + "loss_num": 0.03662109375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 236643276, + "step": 3537 + }, + { + "epoch": 0.4014751773049645, + "grad_norm": 25.16383934020996, + "learning_rate": 5e-05, + "loss": 1.2951, + "num_input_tokens_seen": 236709112, + "step": 3538 + }, + { + "epoch": 0.4014751773049645, + "loss": 0.9902499318122864, + "loss_ce": 0.0049593886360526085, + "loss_iou": 0.4296875, + "loss_num": 0.0252685546875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 236709112, + "step": 3538 + }, + { + "epoch": 0.4015886524822695, + "grad_norm": 20.847265243530273, + "learning_rate": 5e-05, + "loss": 1.2229, + "num_input_tokens_seen": 236775732, + "step": 3539 + }, + { + "epoch": 0.4015886524822695, + "loss": 1.3388986587524414, + "loss_ce": 0.00882047601044178, + "loss_iou": 0.5390625, + "loss_num": 0.049560546875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 236775732, + "step": 3539 + }, + { + "epoch": 0.40170212765957447, + "grad_norm": 40.277984619140625, + "learning_rate": 5e-05, + "loss": 1.2331, + "num_input_tokens_seen": 236842520, + "step": 3540 + }, + { + "epoch": 0.40170212765957447, + "loss": 1.2389287948608398, + "loss_ce": 0.0035771955735981464, + "loss_iou": 0.546875, + "loss_num": 0.0286865234375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 236842520, + "step": 3540 + }, + { + "epoch": 0.40181560283687945, + "grad_norm": 40.86174774169922, + "learning_rate": 5e-05, + "loss": 1.1486, + "num_input_tokens_seen": 236909944, + "step": 3541 + }, + { + "epoch": 0.40181560283687945, + "loss": 1.2787859439849854, + "loss_ce": 0.0038837033789604902, + "loss_iou": 0.5390625, + "loss_num": 0.0400390625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 236909944, + "step": 3541 + }, + { + "epoch": 0.40192907801418437, + "grad_norm": 22.009620666503906, + "learning_rate": 5e-05, + "loss": 1.341, + "num_input_tokens_seen": 236975272, + "step": 3542 + }, + { + "epoch": 0.40192907801418437, + "loss": 1.3227509260177612, + "loss_ce": 0.00414740014821291, + "loss_iou": 0.5546875, + "loss_num": 0.041259765625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 236975272, + "step": 3542 + }, + { + "epoch": 0.40204255319148935, + "grad_norm": 16.16021156311035, + "learning_rate": 5e-05, + "loss": 1.1318, + "num_input_tokens_seen": 237042636, + "step": 3543 + }, + { + "epoch": 0.40204255319148935, + "loss": 1.2171745300292969, + "loss_ce": 0.0037957062013447285, + "loss_iou": 0.49609375, + "loss_num": 0.0439453125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 237042636, + "step": 3543 + }, + { + "epoch": 0.4021560283687943, + "grad_norm": 17.773670196533203, + "learning_rate": 5e-05, + "loss": 1.0872, + "num_input_tokens_seen": 237110548, + "step": 3544 + }, + { + "epoch": 0.4021560283687943, + "loss": 1.2585034370422363, + "loss_ce": 0.008503451943397522, + "loss_iou": 0.5234375, + "loss_num": 0.040771484375, + "loss_xval": 1.25, + "num_input_tokens_seen": 237110548, + "step": 3544 + }, + { + "epoch": 0.4022695035460993, + "grad_norm": 23.55775260925293, + "learning_rate": 5e-05, + "loss": 1.0597, + "num_input_tokens_seen": 237176692, + "step": 3545 + }, + { + "epoch": 0.4022695035460993, + "loss": 0.8851802349090576, + "loss_ce": 0.005297386087477207, + "loss_iou": 0.373046875, + "loss_num": 0.0263671875, + "loss_xval": 0.87890625, + "num_input_tokens_seen": 237176692, + "step": 3545 + }, + { + "epoch": 0.4023829787234042, + "grad_norm": 23.056055068969727, + "learning_rate": 5e-05, + "loss": 1.1817, + "num_input_tokens_seen": 237244120, + "step": 3546 + }, + { + "epoch": 0.4023829787234042, + "loss": 1.2724359035491943, + "loss_ce": 0.008275833912193775, + "loss_iou": 0.57421875, + "loss_num": 0.0228271484375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 237244120, + "step": 3546 + }, + { + "epoch": 0.4024964539007092, + "grad_norm": 32.972904205322266, + "learning_rate": 5e-05, + "loss": 1.3378, + "num_input_tokens_seen": 237311416, + "step": 3547 + }, + { + "epoch": 0.4024964539007092, + "loss": 1.3459280729293823, + "loss_ce": 0.008525723591446877, + "loss_iou": 0.5390625, + "loss_num": 0.052734375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 237311416, + "step": 3547 + }, + { + "epoch": 0.4026099290780142, + "grad_norm": 55.05682373046875, + "learning_rate": 5e-05, + "loss": 1.6351, + "num_input_tokens_seen": 237378576, + "step": 3548 + }, + { + "epoch": 0.4026099290780142, + "loss": 1.666985273361206, + "loss_ce": 0.011223589070141315, + "loss_iou": 0.68359375, + "loss_num": 0.05810546875, + "loss_xval": 1.65625, + "num_input_tokens_seen": 237378576, + "step": 3548 + }, + { + "epoch": 0.40272340425531916, + "grad_norm": 22.967342376708984, + "learning_rate": 5e-05, + "loss": 1.166, + "num_input_tokens_seen": 237445028, + "step": 3549 + }, + { + "epoch": 0.40272340425531916, + "loss": 1.1692757606506348, + "loss_ce": 0.006678079254925251, + "loss_iou": 0.4609375, + "loss_num": 0.048095703125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 237445028, + "step": 3549 + }, + { + "epoch": 0.40283687943262414, + "grad_norm": 30.739789962768555, + "learning_rate": 5e-05, + "loss": 1.2511, + "num_input_tokens_seen": 237511892, + "step": 3550 + }, + { + "epoch": 0.40283687943262414, + "loss": 1.157974123954773, + "loss_ce": 0.006118663586676121, + "loss_iou": 0.474609375, + "loss_num": 0.04052734375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 237511892, + "step": 3550 + }, + { + "epoch": 0.40295035460992906, + "grad_norm": 31.42778968811035, + "learning_rate": 5e-05, + "loss": 1.4216, + "num_input_tokens_seen": 237579552, + "step": 3551 + }, + { + "epoch": 0.40295035460992906, + "loss": 1.3628865480422974, + "loss_ce": 0.005464648362249136, + "loss_iou": 0.5859375, + "loss_num": 0.037353515625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 237579552, + "step": 3551 + }, + { + "epoch": 0.40306382978723404, + "grad_norm": 92.42414855957031, + "learning_rate": 5e-05, + "loss": 1.3908, + "num_input_tokens_seen": 237645624, + "step": 3552 + }, + { + "epoch": 0.40306382978723404, + "loss": 1.3899158239364624, + "loss_ce": 0.007591618224978447, + "loss_iou": 0.5234375, + "loss_num": 0.06689453125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 237645624, + "step": 3552 + }, + { + "epoch": 0.403177304964539, + "grad_norm": 45.43193817138672, + "learning_rate": 5e-05, + "loss": 1.2949, + "num_input_tokens_seen": 237712712, + "step": 3553 + }, + { + "epoch": 0.403177304964539, + "loss": 1.3384599685668945, + "loss_ce": 0.00398736447095871, + "loss_iou": 0.5625, + "loss_num": 0.042236328125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 237712712, + "step": 3553 + }, + { + "epoch": 0.403290780141844, + "grad_norm": 18.247224807739258, + "learning_rate": 5e-05, + "loss": 1.3252, + "num_input_tokens_seen": 237779576, + "step": 3554 + }, + { + "epoch": 0.403290780141844, + "loss": 1.2007648944854736, + "loss_ce": 0.003499238984659314, + "loss_iou": 0.498046875, + "loss_num": 0.040283203125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 237779576, + "step": 3554 + }, + { + "epoch": 0.4034042553191489, + "grad_norm": 33.09628677368164, + "learning_rate": 5e-05, + "loss": 0.9964, + "num_input_tokens_seen": 237847072, + "step": 3555 + }, + { + "epoch": 0.4034042553191489, + "loss": 1.0224350690841675, + "loss_ce": 0.004368642345070839, + "loss_iou": 0.44921875, + "loss_num": 0.0245361328125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 237847072, + "step": 3555 + }, + { + "epoch": 0.4035177304964539, + "grad_norm": 66.78299713134766, + "learning_rate": 5e-05, + "loss": 1.1717, + "num_input_tokens_seen": 237913916, + "step": 3556 + }, + { + "epoch": 0.4035177304964539, + "loss": 1.1709802150726318, + "loss_ce": 0.003499687183648348, + "loss_iou": 0.52734375, + "loss_num": 0.0228271484375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 237913916, + "step": 3556 + }, + { + "epoch": 0.4036312056737589, + "grad_norm": 24.497699737548828, + "learning_rate": 5e-05, + "loss": 1.1385, + "num_input_tokens_seen": 237981776, + "step": 3557 + }, + { + "epoch": 0.4036312056737589, + "loss": 1.2033708095550537, + "loss_ce": 0.009034850634634495, + "loss_iou": 0.50390625, + "loss_num": 0.037109375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 237981776, + "step": 3557 + }, + { + "epoch": 0.40374468085106385, + "grad_norm": 51.42173767089844, + "learning_rate": 5e-05, + "loss": 1.2423, + "num_input_tokens_seen": 238048996, + "step": 3558 + }, + { + "epoch": 0.40374468085106385, + "loss": 1.3026212453842163, + "loss_ce": 0.00867593102157116, + "loss_iou": 0.5078125, + "loss_num": 0.055419921875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 238048996, + "step": 3558 + }, + { + "epoch": 0.4038581560283688, + "grad_norm": 35.11296463012695, + "learning_rate": 5e-05, + "loss": 1.1574, + "num_input_tokens_seen": 238115292, + "step": 3559 + }, + { + "epoch": 0.4038581560283688, + "loss": 1.1615896224975586, + "loss_ce": 0.0033864504657685757, + "loss_iou": 0.4921875, + "loss_num": 0.03466796875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 238115292, + "step": 3559 + }, + { + "epoch": 0.40397163120567375, + "grad_norm": 25.766508102416992, + "learning_rate": 5e-05, + "loss": 1.4299, + "num_input_tokens_seen": 238182764, + "step": 3560 + }, + { + "epoch": 0.40397163120567375, + "loss": 1.4216675758361816, + "loss_ce": 0.011511348187923431, + "loss_iou": 0.578125, + "loss_num": 0.05078125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 238182764, + "step": 3560 + }, + { + "epoch": 0.40408510638297873, + "grad_norm": 73.59732818603516, + "learning_rate": 5e-05, + "loss": 1.1915, + "num_input_tokens_seen": 238249728, + "step": 3561 + }, + { + "epoch": 0.40408510638297873, + "loss": 1.206667423248291, + "loss_ce": 0.0074486518278717995, + "loss_iou": 0.47265625, + "loss_num": 0.05078125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 238249728, + "step": 3561 + }, + { + "epoch": 0.4041985815602837, + "grad_norm": 32.88972854614258, + "learning_rate": 5e-05, + "loss": 1.1447, + "num_input_tokens_seen": 238316184, + "step": 3562 + }, + { + "epoch": 0.4041985815602837, + "loss": 1.172642469406128, + "loss_ce": 0.005161905195564032, + "loss_iou": 0.470703125, + "loss_num": 0.045166015625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 238316184, + "step": 3562 + }, + { + "epoch": 0.40431205673758863, + "grad_norm": 20.39299201965332, + "learning_rate": 5e-05, + "loss": 1.1032, + "num_input_tokens_seen": 238382956, + "step": 3563 + }, + { + "epoch": 0.40431205673758863, + "loss": 1.0630438327789307, + "loss_ce": 0.004144995007663965, + "loss_iou": 0.40625, + "loss_num": 0.049072265625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 238382956, + "step": 3563 + }, + { + "epoch": 0.4044255319148936, + "grad_norm": 32.708709716796875, + "learning_rate": 5e-05, + "loss": 1.0876, + "num_input_tokens_seen": 238449592, + "step": 3564 + }, + { + "epoch": 0.4044255319148936, + "loss": 1.0935728549957275, + "loss_ce": 0.0047057573683559895, + "loss_iou": 0.447265625, + "loss_num": 0.0390625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 238449592, + "step": 3564 + }, + { + "epoch": 0.4045390070921986, + "grad_norm": 31.441585540771484, + "learning_rate": 5e-05, + "loss": 1.3867, + "num_input_tokens_seen": 238517240, + "step": 3565 + }, + { + "epoch": 0.4045390070921986, + "loss": 1.6036186218261719, + "loss_ce": 0.00449746660888195, + "loss_iou": 0.62109375, + "loss_num": 0.0712890625, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 238517240, + "step": 3565 + }, + { + "epoch": 0.40465248226950357, + "grad_norm": 42.46713638305664, + "learning_rate": 5e-05, + "loss": 1.2031, + "num_input_tokens_seen": 238584128, + "step": 3566 + }, + { + "epoch": 0.40465248226950357, + "loss": 1.2794932126998901, + "loss_ce": 0.0031260151881724596, + "loss_iou": 0.55078125, + "loss_num": 0.035400390625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 238584128, + "step": 3566 + }, + { + "epoch": 0.4047659574468085, + "grad_norm": 32.231815338134766, + "learning_rate": 5e-05, + "loss": 1.3333, + "num_input_tokens_seen": 238651444, + "step": 3567 + }, + { + "epoch": 0.4047659574468085, + "loss": 1.3664175271987915, + "loss_ce": 0.0021597875747829676, + "loss_iou": 0.5703125, + "loss_num": 0.044921875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 238651444, + "step": 3567 + }, + { + "epoch": 0.40487943262411347, + "grad_norm": 31.92702865600586, + "learning_rate": 5e-05, + "loss": 1.3762, + "num_input_tokens_seen": 238720124, + "step": 3568 + }, + { + "epoch": 0.40487943262411347, + "loss": 1.4946506023406982, + "loss_ce": 0.006857630796730518, + "loss_iou": 0.60546875, + "loss_num": 0.0556640625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 238720124, + "step": 3568 + }, + { + "epoch": 0.40499290780141844, + "grad_norm": 28.018268585205078, + "learning_rate": 5e-05, + "loss": 1.3, + "num_input_tokens_seen": 238787488, + "step": 3569 + }, + { + "epoch": 0.40499290780141844, + "loss": 1.2295377254486084, + "loss_ce": 0.00688148895278573, + "loss_iou": 0.53125, + "loss_num": 0.0322265625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 238787488, + "step": 3569 + }, + { + "epoch": 0.4051063829787234, + "grad_norm": 29.26284408569336, + "learning_rate": 5e-05, + "loss": 1.3604, + "num_input_tokens_seen": 238854784, + "step": 3570 + }, + { + "epoch": 0.4051063829787234, + "loss": 1.3989393711090088, + "loss_ce": 0.005384619347751141, + "loss_iou": 0.56640625, + "loss_num": 0.052490234375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 238854784, + "step": 3570 + }, + { + "epoch": 0.40521985815602835, + "grad_norm": 28.752635955810547, + "learning_rate": 5e-05, + "loss": 1.3593, + "num_input_tokens_seen": 238921796, + "step": 3571 + }, + { + "epoch": 0.40521985815602835, + "loss": 1.30820894241333, + "loss_ce": 0.0074276975356042385, + "loss_iou": 0.546875, + "loss_num": 0.04150390625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 238921796, + "step": 3571 + }, + { + "epoch": 0.4053333333333333, + "grad_norm": 16.67066764831543, + "learning_rate": 5e-05, + "loss": 1.2538, + "num_input_tokens_seen": 238988592, + "step": 3572 + }, + { + "epoch": 0.4053333333333333, + "loss": 1.2092781066894531, + "loss_ce": 0.005939577240496874, + "loss_iou": 0.49609375, + "loss_num": 0.0419921875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 238988592, + "step": 3572 + }, + { + "epoch": 0.4054468085106383, + "grad_norm": 19.330015182495117, + "learning_rate": 5e-05, + "loss": 1.4215, + "num_input_tokens_seen": 239055140, + "step": 3573 + }, + { + "epoch": 0.4054468085106383, + "loss": 1.501136302947998, + "loss_ce": 0.0060192132368683815, + "loss_iou": 0.578125, + "loss_num": 0.06787109375, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 239055140, + "step": 3573 + }, + { + "epoch": 0.4055602836879433, + "grad_norm": 16.181198120117188, + "learning_rate": 5e-05, + "loss": 1.0356, + "num_input_tokens_seen": 239121512, + "step": 3574 + }, + { + "epoch": 0.4055602836879433, + "loss": 1.0550981760025024, + "loss_ce": 0.005537655670195818, + "loss_iou": 0.458984375, + "loss_num": 0.0263671875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 239121512, + "step": 3574 + }, + { + "epoch": 0.4056737588652482, + "grad_norm": 24.190521240234375, + "learning_rate": 5e-05, + "loss": 1.2676, + "num_input_tokens_seen": 239188328, + "step": 3575 + }, + { + "epoch": 0.4056737588652482, + "loss": 1.4239697456359863, + "loss_ce": 0.006000969093292952, + "loss_iou": 0.546875, + "loss_num": 0.06494140625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 239188328, + "step": 3575 + }, + { + "epoch": 0.4057872340425532, + "grad_norm": 34.847469329833984, + "learning_rate": 5e-05, + "loss": 1.3852, + "num_input_tokens_seen": 239254880, + "step": 3576 + }, + { + "epoch": 0.4057872340425532, + "loss": 1.34921133518219, + "loss_ce": 0.011076570488512516, + "loss_iou": 0.51171875, + "loss_num": 0.06298828125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 239254880, + "step": 3576 + }, + { + "epoch": 0.40590070921985816, + "grad_norm": 32.209617614746094, + "learning_rate": 5e-05, + "loss": 1.3995, + "num_input_tokens_seen": 239321240, + "step": 3577 + }, + { + "epoch": 0.40590070921985816, + "loss": 1.093125581741333, + "loss_ce": 0.005723138339817524, + "loss_iou": 0.46484375, + "loss_num": 0.03173828125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 239321240, + "step": 3577 + }, + { + "epoch": 0.40601418439716314, + "grad_norm": 58.3940544128418, + "learning_rate": 5e-05, + "loss": 1.3121, + "num_input_tokens_seen": 239387940, + "step": 3578 + }, + { + "epoch": 0.40601418439716314, + "loss": 1.0494565963745117, + "loss_ce": 0.005023108795285225, + "loss_iou": 0.453125, + "loss_num": 0.027587890625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 239387940, + "step": 3578 + }, + { + "epoch": 0.40612765957446806, + "grad_norm": 26.4588680267334, + "learning_rate": 5e-05, + "loss": 1.2623, + "num_input_tokens_seen": 239454932, + "step": 3579 + }, + { + "epoch": 0.40612765957446806, + "loss": 1.3463743925094604, + "loss_ce": 0.003112691454589367, + "loss_iou": 0.5078125, + "loss_num": 0.0654296875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 239454932, + "step": 3579 + }, + { + "epoch": 0.40624113475177304, + "grad_norm": 30.790889739990234, + "learning_rate": 5e-05, + "loss": 1.2087, + "num_input_tokens_seen": 239521376, + "step": 3580 + }, + { + "epoch": 0.40624113475177304, + "loss": 1.0799518823623657, + "loss_ce": 0.006709692068397999, + "loss_iou": 0.474609375, + "loss_num": 0.0252685546875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 239521376, + "step": 3580 + }, + { + "epoch": 0.406354609929078, + "grad_norm": 24.430131912231445, + "learning_rate": 5e-05, + "loss": 1.3816, + "num_input_tokens_seen": 239588640, + "step": 3581 + }, + { + "epoch": 0.406354609929078, + "loss": 1.4685648679733276, + "loss_ce": 0.008848004974424839, + "loss_iou": 0.5625, + "loss_num": 0.06591796875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 239588640, + "step": 3581 + }, + { + "epoch": 0.406468085106383, + "grad_norm": 30.65288543701172, + "learning_rate": 5e-05, + "loss": 1.2063, + "num_input_tokens_seen": 239656264, + "step": 3582 + }, + { + "epoch": 0.406468085106383, + "loss": 1.178104043006897, + "loss_ce": 0.0028110677376389503, + "loss_iou": 0.515625, + "loss_num": 0.02978515625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 239656264, + "step": 3582 + }, + { + "epoch": 0.40658156028368797, + "grad_norm": 36.98930740356445, + "learning_rate": 5e-05, + "loss": 1.079, + "num_input_tokens_seen": 239724008, + "step": 3583 + }, + { + "epoch": 0.40658156028368797, + "loss": 0.9335687160491943, + "loss_ce": 0.002416327130049467, + "loss_iou": 0.40234375, + "loss_num": 0.024658203125, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 239724008, + "step": 3583 + }, + { + "epoch": 0.4066950354609929, + "grad_norm": 16.004060745239258, + "learning_rate": 5e-05, + "loss": 1.1784, + "num_input_tokens_seen": 239790980, + "step": 3584 + }, + { + "epoch": 0.4066950354609929, + "loss": 1.2602629661560059, + "loss_ce": 0.006845066789537668, + "loss_iou": 0.498046875, + "loss_num": 0.05126953125, + "loss_xval": 1.25, + "num_input_tokens_seen": 239790980, + "step": 3584 + }, + { + "epoch": 0.40680851063829787, + "grad_norm": 35.524261474609375, + "learning_rate": 5e-05, + "loss": 1.1984, + "num_input_tokens_seen": 239858048, + "step": 3585 + }, + { + "epoch": 0.40680851063829787, + "loss": 1.0769225358963013, + "loss_ce": 0.005145189352333546, + "loss_iou": 0.4296875, + "loss_num": 0.042724609375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 239858048, + "step": 3585 + }, + { + "epoch": 0.40692198581560285, + "grad_norm": 21.4593448638916, + "learning_rate": 5e-05, + "loss": 1.1928, + "num_input_tokens_seen": 239923988, + "step": 3586 + }, + { + "epoch": 0.40692198581560285, + "loss": 1.1333272457122803, + "loss_ce": 0.006129923276603222, + "loss_iou": 0.498046875, + "loss_num": 0.0262451171875, + "loss_xval": 1.125, + "num_input_tokens_seen": 239923988, + "step": 3586 + }, + { + "epoch": 0.4070354609929078, + "grad_norm": 32.365081787109375, + "learning_rate": 5e-05, + "loss": 1.2006, + "num_input_tokens_seen": 239990520, + "step": 3587 + }, + { + "epoch": 0.4070354609929078, + "loss": 1.0157090425491333, + "loss_ce": 0.004478611517697573, + "loss_iou": 0.451171875, + "loss_num": 0.0220947265625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 239990520, + "step": 3587 + }, + { + "epoch": 0.40714893617021275, + "grad_norm": 29.81865692138672, + "learning_rate": 5e-05, + "loss": 1.2027, + "num_input_tokens_seen": 240057276, + "step": 3588 + }, + { + "epoch": 0.40714893617021275, + "loss": 1.0043058395385742, + "loss_ce": 0.0072354963049292564, + "loss_iou": 0.4375, + "loss_num": 0.0245361328125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 240057276, + "step": 3588 + }, + { + "epoch": 0.4072624113475177, + "grad_norm": 75.88202667236328, + "learning_rate": 5e-05, + "loss": 1.1976, + "num_input_tokens_seen": 240124740, + "step": 3589 + }, + { + "epoch": 0.4072624113475177, + "loss": 1.2474520206451416, + "loss_ce": 0.0057528577744960785, + "loss_iou": 0.51171875, + "loss_num": 0.0439453125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 240124740, + "step": 3589 + }, + { + "epoch": 0.4073758865248227, + "grad_norm": 35.75727462768555, + "learning_rate": 5e-05, + "loss": 1.236, + "num_input_tokens_seen": 240191880, + "step": 3590 + }, + { + "epoch": 0.4073758865248227, + "loss": 1.3447327613830566, + "loss_ce": 0.006353958509862423, + "loss_iou": 0.56640625, + "loss_num": 0.041748046875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 240191880, + "step": 3590 + }, + { + "epoch": 0.4074893617021277, + "grad_norm": 31.994903564453125, + "learning_rate": 5e-05, + "loss": 1.3071, + "num_input_tokens_seen": 240259372, + "step": 3591 + }, + { + "epoch": 0.4074893617021277, + "loss": 1.362094759941101, + "loss_ce": 0.005649458151310682, + "loss_iou": 0.5390625, + "loss_num": 0.054931640625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 240259372, + "step": 3591 + }, + { + "epoch": 0.4076028368794326, + "grad_norm": 25.04945945739746, + "learning_rate": 5e-05, + "loss": 1.1097, + "num_input_tokens_seen": 240326248, + "step": 3592 + }, + { + "epoch": 0.4076028368794326, + "loss": 1.2137439250946045, + "loss_ce": 0.0032947841100394726, + "loss_iou": 0.515625, + "loss_num": 0.03564453125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 240326248, + "step": 3592 + }, + { + "epoch": 0.4077163120567376, + "grad_norm": 15.547513961791992, + "learning_rate": 5e-05, + "loss": 1.2235, + "num_input_tokens_seen": 240393268, + "step": 3593 + }, + { + "epoch": 0.4077163120567376, + "loss": 1.1644673347473145, + "loss_ce": 0.004799338057637215, + "loss_iou": 0.5, + "loss_num": 0.0322265625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 240393268, + "step": 3593 + }, + { + "epoch": 0.40782978723404256, + "grad_norm": 37.730899810791016, + "learning_rate": 5e-05, + "loss": 1.2212, + "num_input_tokens_seen": 240459284, + "step": 3594 + }, + { + "epoch": 0.40782978723404256, + "loss": 1.1650667190551758, + "loss_ce": 0.0034455247223377228, + "loss_iou": 0.466796875, + "loss_num": 0.045654296875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 240459284, + "step": 3594 + }, + { + "epoch": 0.40794326241134754, + "grad_norm": 24.970691680908203, + "learning_rate": 5e-05, + "loss": 1.5179, + "num_input_tokens_seen": 240527072, + "step": 3595 + }, + { + "epoch": 0.40794326241134754, + "loss": 1.5323305130004883, + "loss_ce": 0.004986739717423916, + "loss_iou": 0.6328125, + "loss_num": 0.0517578125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 240527072, + "step": 3595 + }, + { + "epoch": 0.40805673758865246, + "grad_norm": 148.8797149658203, + "learning_rate": 5e-05, + "loss": 0.9647, + "num_input_tokens_seen": 240593024, + "step": 3596 + }, + { + "epoch": 0.40805673758865246, + "loss": 0.9563778042793274, + "loss_ce": 0.005938339978456497, + "loss_iou": 0.390625, + "loss_num": 0.033935546875, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 240593024, + "step": 3596 + }, + { + "epoch": 0.40817021276595744, + "grad_norm": 33.13808059692383, + "learning_rate": 5e-05, + "loss": 1.2825, + "num_input_tokens_seen": 240658688, + "step": 3597 + }, + { + "epoch": 0.40817021276595744, + "loss": 1.504515290260315, + "loss_ce": 0.010374683886766434, + "loss_iou": 0.53125, + "loss_num": 0.0849609375, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 240658688, + "step": 3597 + }, + { + "epoch": 0.4082836879432624, + "grad_norm": 27.01314926147461, + "learning_rate": 5e-05, + "loss": 1.2612, + "num_input_tokens_seen": 240725672, + "step": 3598 + }, + { + "epoch": 0.4082836879432624, + "loss": 1.2225996255874634, + "loss_ce": 0.006168946158140898, + "loss_iou": 0.478515625, + "loss_num": 0.05224609375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 240725672, + "step": 3598 + }, + { + "epoch": 0.4083971631205674, + "grad_norm": 63.87114334106445, + "learning_rate": 5e-05, + "loss": 1.5164, + "num_input_tokens_seen": 240793244, + "step": 3599 + }, + { + "epoch": 0.4083971631205674, + "loss": 1.4606068134307861, + "loss_ce": 0.005528618581593037, + "loss_iou": 0.58984375, + "loss_num": 0.05517578125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 240793244, + "step": 3599 + }, + { + "epoch": 0.4085106382978723, + "grad_norm": 22.425050735473633, + "learning_rate": 5e-05, + "loss": 1.2844, + "num_input_tokens_seen": 240859804, + "step": 3600 + }, + { + "epoch": 0.4085106382978723, + "loss": 1.1118849515914917, + "loss_ce": 0.004951383452862501, + "loss_iou": 0.48828125, + "loss_num": 0.0255126953125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 240859804, + "step": 3600 + }, + { + "epoch": 0.4086241134751773, + "grad_norm": 24.131183624267578, + "learning_rate": 5e-05, + "loss": 1.335, + "num_input_tokens_seen": 240926432, + "step": 3601 + }, + { + "epoch": 0.4086241134751773, + "loss": 1.4357013702392578, + "loss_ce": 0.005525572225451469, + "loss_iou": 0.5703125, + "loss_num": 0.05859375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 240926432, + "step": 3601 + }, + { + "epoch": 0.4087375886524823, + "grad_norm": 28.59232521057129, + "learning_rate": 5e-05, + "loss": 1.0056, + "num_input_tokens_seen": 240993476, + "step": 3602 + }, + { + "epoch": 0.4087375886524823, + "loss": 0.8237601518630981, + "loss_ce": 0.012358753010630608, + "loss_iou": 0.369140625, + "loss_num": 0.0147705078125, + "loss_xval": 0.8125, + "num_input_tokens_seen": 240993476, + "step": 3602 + }, + { + "epoch": 0.40885106382978725, + "grad_norm": 42.96214294433594, + "learning_rate": 5e-05, + "loss": 1.2286, + "num_input_tokens_seen": 241060152, + "step": 3603 + }, + { + "epoch": 0.40885106382978725, + "loss": 1.3188563585281372, + "loss_ce": 0.00391492061316967, + "loss_iou": 0.55859375, + "loss_num": 0.03955078125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 241060152, + "step": 3603 + }, + { + "epoch": 0.4089645390070922, + "grad_norm": 40.68187713623047, + "learning_rate": 5e-05, + "loss": 1.1697, + "num_input_tokens_seen": 241125668, + "step": 3604 + }, + { + "epoch": 0.4089645390070922, + "loss": 1.241736888885498, + "loss_ce": 0.003455741098150611, + "loss_iou": 0.5546875, + "loss_num": 0.0263671875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 241125668, + "step": 3604 + }, + { + "epoch": 0.40907801418439715, + "grad_norm": 67.18024444580078, + "learning_rate": 5e-05, + "loss": 1.6615, + "num_input_tokens_seen": 241192840, + "step": 3605 + }, + { + "epoch": 0.40907801418439715, + "loss": 1.450260877609253, + "loss_ce": 0.0039717936888337135, + "loss_iou": 0.62109375, + "loss_num": 0.041015625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 241192840, + "step": 3605 + }, + { + "epoch": 0.40919148936170213, + "grad_norm": 59.269840240478516, + "learning_rate": 5e-05, + "loss": 1.4111, + "num_input_tokens_seen": 241260540, + "step": 3606 + }, + { + "epoch": 0.40919148936170213, + "loss": 1.492042064666748, + "loss_ce": 0.009620205499231815, + "loss_iou": 0.59765625, + "loss_num": 0.056884765625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 241260540, + "step": 3606 + }, + { + "epoch": 0.4093049645390071, + "grad_norm": 25.947118759155273, + "learning_rate": 5e-05, + "loss": 1.3034, + "num_input_tokens_seen": 241328156, + "step": 3607 + }, + { + "epoch": 0.4093049645390071, + "loss": 1.394667387008667, + "loss_ce": 0.005507284309715033, + "loss_iou": 0.57421875, + "loss_num": 0.047119140625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 241328156, + "step": 3607 + }, + { + "epoch": 0.40941843971631203, + "grad_norm": 26.08195686340332, + "learning_rate": 5e-05, + "loss": 1.3567, + "num_input_tokens_seen": 241395156, + "step": 3608 + }, + { + "epoch": 0.40941843971631203, + "loss": 1.2970547676086426, + "loss_ce": 0.007504085078835487, + "loss_iou": 0.4921875, + "loss_num": 0.06103515625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 241395156, + "step": 3608 + }, + { + "epoch": 0.409531914893617, + "grad_norm": 844.8603515625, + "learning_rate": 5e-05, + "loss": 1.3819, + "num_input_tokens_seen": 241461264, + "step": 3609 + }, + { + "epoch": 0.409531914893617, + "loss": 1.3629751205444336, + "loss_ce": 0.005064926575869322, + "loss_iou": 0.53515625, + "loss_num": 0.05810546875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 241461264, + "step": 3609 + }, + { + "epoch": 0.409645390070922, + "grad_norm": 32.55344009399414, + "learning_rate": 5e-05, + "loss": 1.2378, + "num_input_tokens_seen": 241527776, + "step": 3610 + }, + { + "epoch": 0.409645390070922, + "loss": 1.301077127456665, + "loss_ce": 0.003957951441407204, + "loss_iou": 0.56640625, + "loss_num": 0.032958984375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 241527776, + "step": 3610 + }, + { + "epoch": 0.40975886524822697, + "grad_norm": 32.37969207763672, + "learning_rate": 5e-05, + "loss": 1.2931, + "num_input_tokens_seen": 241595056, + "step": 3611 + }, + { + "epoch": 0.40975886524822697, + "loss": 1.0465431213378906, + "loss_ce": 0.002597758313640952, + "loss_iou": 0.4609375, + "loss_num": 0.0244140625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 241595056, + "step": 3611 + }, + { + "epoch": 0.4098723404255319, + "grad_norm": 32.50665283203125, + "learning_rate": 5e-05, + "loss": 1.2115, + "num_input_tokens_seen": 241661932, + "step": 3612 + }, + { + "epoch": 0.4098723404255319, + "loss": 1.0936944484710693, + "loss_ce": 0.007268694229424, + "loss_iou": 0.4375, + "loss_num": 0.041748046875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 241661932, + "step": 3612 + }, + { + "epoch": 0.40998581560283687, + "grad_norm": 425.3451232910156, + "learning_rate": 5e-05, + "loss": 1.4769, + "num_input_tokens_seen": 241728744, + "step": 3613 + }, + { + "epoch": 0.40998581560283687, + "loss": 1.485255479812622, + "loss_ce": 0.013575742021203041, + "loss_iou": 0.578125, + "loss_num": 0.0625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 241728744, + "step": 3613 + }, + { + "epoch": 0.41009929078014185, + "grad_norm": 33.570709228515625, + "learning_rate": 5e-05, + "loss": 1.1609, + "num_input_tokens_seen": 241796648, + "step": 3614 + }, + { + "epoch": 0.41009929078014185, + "loss": 1.1318705081939697, + "loss_ce": 0.004429114982485771, + "loss_iou": 0.470703125, + "loss_num": 0.037109375, + "loss_xval": 1.125, + "num_input_tokens_seen": 241796648, + "step": 3614 + }, + { + "epoch": 0.4102127659574468, + "grad_norm": 26.937108993530273, + "learning_rate": 5e-05, + "loss": 1.297, + "num_input_tokens_seen": 241863180, + "step": 3615 + }, + { + "epoch": 0.4102127659574468, + "loss": 1.2524100542068481, + "loss_ce": 0.006865595001727343, + "loss_iou": 0.52734375, + "loss_num": 0.03857421875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 241863180, + "step": 3615 + }, + { + "epoch": 0.41032624113475175, + "grad_norm": 17.109153747558594, + "learning_rate": 5e-05, + "loss": 1.4019, + "num_input_tokens_seen": 241930156, + "step": 3616 + }, + { + "epoch": 0.41032624113475175, + "loss": 1.380568265914917, + "loss_ce": 0.0036151930689811707, + "loss_iou": 0.53515625, + "loss_num": 0.061279296875, + "loss_xval": 1.375, + "num_input_tokens_seen": 241930156, + "step": 3616 + }, + { + "epoch": 0.4104397163120567, + "grad_norm": 16.455154418945312, + "learning_rate": 5e-05, + "loss": 1.1934, + "num_input_tokens_seen": 241996672, + "step": 3617 + }, + { + "epoch": 0.4104397163120567, + "loss": 1.144505262374878, + "loss_ce": 0.006809964310377836, + "loss_iou": 0.46484375, + "loss_num": 0.04150390625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 241996672, + "step": 3617 + }, + { + "epoch": 0.4105531914893617, + "grad_norm": 39.51834487915039, + "learning_rate": 5e-05, + "loss": 1.1926, + "num_input_tokens_seen": 242063428, + "step": 3618 + }, + { + "epoch": 0.4105531914893617, + "loss": 1.1891323328018188, + "loss_ce": 0.005538592580705881, + "loss_iou": 0.484375, + "loss_num": 0.042724609375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 242063428, + "step": 3618 + }, + { + "epoch": 0.4106666666666667, + "grad_norm": 40.02389907836914, + "learning_rate": 5e-05, + "loss": 1.6292, + "num_input_tokens_seen": 242130900, + "step": 3619 + }, + { + "epoch": 0.4106666666666667, + "loss": 1.6581201553344727, + "loss_ce": 0.005776275880634785, + "loss_iou": 0.64453125, + "loss_num": 0.07275390625, + "loss_xval": 1.65625, + "num_input_tokens_seen": 242130900, + "step": 3619 + }, + { + "epoch": 0.41078014184397166, + "grad_norm": 16.81020736694336, + "learning_rate": 5e-05, + "loss": 1.2255, + "num_input_tokens_seen": 242198028, + "step": 3620 + }, + { + "epoch": 0.41078014184397166, + "loss": 1.1402884721755981, + "loss_ce": 0.0030813938938081264, + "loss_iou": 0.49609375, + "loss_num": 0.029541015625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 242198028, + "step": 3620 + }, + { + "epoch": 0.4108936170212766, + "grad_norm": 19.64426040649414, + "learning_rate": 5e-05, + "loss": 1.2261, + "num_input_tokens_seen": 242264348, + "step": 3621 + }, + { + "epoch": 0.4108936170212766, + "loss": 1.3272440433502197, + "loss_ce": 0.0030252663418650627, + "loss_iou": 0.54296875, + "loss_num": 0.0478515625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 242264348, + "step": 3621 + }, + { + "epoch": 0.41100709219858156, + "grad_norm": 52.6038818359375, + "learning_rate": 5e-05, + "loss": 1.2155, + "num_input_tokens_seen": 242332728, + "step": 3622 + }, + { + "epoch": 0.41100709219858156, + "loss": 1.13837730884552, + "loss_ce": 0.0055647678673267365, + "loss_iou": 0.49609375, + "loss_num": 0.0281982421875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 242332728, + "step": 3622 + }, + { + "epoch": 0.41112056737588654, + "grad_norm": 44.35395050048828, + "learning_rate": 5e-05, + "loss": 1.4295, + "num_input_tokens_seen": 242400184, + "step": 3623 + }, + { + "epoch": 0.41112056737588654, + "loss": 1.5352399349212646, + "loss_ce": 0.010825826786458492, + "loss_iou": 0.6015625, + "loss_num": 0.0634765625, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 242400184, + "step": 3623 + }, + { + "epoch": 0.4112340425531915, + "grad_norm": 21.463809967041016, + "learning_rate": 5e-05, + "loss": 1.4816, + "num_input_tokens_seen": 242466992, + "step": 3624 + }, + { + "epoch": 0.4112340425531915, + "loss": 1.483283519744873, + "loss_ce": 0.005256203934550285, + "loss_iou": 0.640625, + "loss_num": 0.038818359375, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 242466992, + "step": 3624 + }, + { + "epoch": 0.41134751773049644, + "grad_norm": 35.35702133178711, + "learning_rate": 5e-05, + "loss": 1.3177, + "num_input_tokens_seen": 242533644, + "step": 3625 + }, + { + "epoch": 0.41134751773049644, + "loss": 1.315652847290039, + "loss_ce": 0.005594127811491489, + "loss_iou": 0.5703125, + "loss_num": 0.0341796875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 242533644, + "step": 3625 + }, + { + "epoch": 0.4114609929078014, + "grad_norm": 39.66505813598633, + "learning_rate": 5e-05, + "loss": 1.382, + "num_input_tokens_seen": 242600256, + "step": 3626 + }, + { + "epoch": 0.4114609929078014, + "loss": 1.482403039932251, + "loss_ce": 0.006817043758928776, + "loss_iou": 0.57421875, + "loss_num": 0.06591796875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 242600256, + "step": 3626 + }, + { + "epoch": 0.4115744680851064, + "grad_norm": 28.114377975463867, + "learning_rate": 5e-05, + "loss": 1.2242, + "num_input_tokens_seen": 242666664, + "step": 3627 + }, + { + "epoch": 0.4115744680851064, + "loss": 1.0933935642242432, + "loss_ce": 0.004038003273308277, + "loss_iou": 0.484375, + "loss_num": 0.024658203125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 242666664, + "step": 3627 + }, + { + "epoch": 0.41168794326241137, + "grad_norm": 32.11578369140625, + "learning_rate": 5e-05, + "loss": 1.315, + "num_input_tokens_seen": 242732744, + "step": 3628 + }, + { + "epoch": 0.41168794326241137, + "loss": 1.2709224224090576, + "loss_ce": 0.004320794716477394, + "loss_iou": 0.498046875, + "loss_num": 0.0546875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 242732744, + "step": 3628 + }, + { + "epoch": 0.4118014184397163, + "grad_norm": 30.01591682434082, + "learning_rate": 5e-05, + "loss": 1.4525, + "num_input_tokens_seen": 242800636, + "step": 3629 + }, + { + "epoch": 0.4118014184397163, + "loss": 1.4713490009307861, + "loss_ce": 0.005528676323592663, + "loss_iou": 0.5625, + "loss_num": 0.06787109375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 242800636, + "step": 3629 + }, + { + "epoch": 0.41191489361702127, + "grad_norm": 36.97578430175781, + "learning_rate": 5e-05, + "loss": 1.2294, + "num_input_tokens_seen": 242867772, + "step": 3630 + }, + { + "epoch": 0.41191489361702127, + "loss": 1.132497787475586, + "loss_ce": 0.007497882470488548, + "loss_iou": 0.486328125, + "loss_num": 0.0306396484375, + "loss_xval": 1.125, + "num_input_tokens_seen": 242867772, + "step": 3630 + }, + { + "epoch": 0.41202836879432625, + "grad_norm": 28.92682456970215, + "learning_rate": 5e-05, + "loss": 1.2425, + "num_input_tokens_seen": 242934716, + "step": 3631 + }, + { + "epoch": 0.41202836879432625, + "loss": 1.319493293762207, + "loss_ce": 0.00259873503819108, + "loss_iou": 0.56640625, + "loss_num": 0.037353515625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 242934716, + "step": 3631 + }, + { + "epoch": 0.4121418439716312, + "grad_norm": 24.650390625, + "learning_rate": 5e-05, + "loss": 1.1244, + "num_input_tokens_seen": 243001320, + "step": 3632 + }, + { + "epoch": 0.4121418439716312, + "loss": 0.9435156583786011, + "loss_ce": 0.00552739854902029, + "loss_iou": 0.37109375, + "loss_num": 0.03955078125, + "loss_xval": 0.9375, + "num_input_tokens_seen": 243001320, + "step": 3632 + }, + { + "epoch": 0.41225531914893615, + "grad_norm": 31.05750846862793, + "learning_rate": 5e-05, + "loss": 1.3131, + "num_input_tokens_seen": 243069276, + "step": 3633 + }, + { + "epoch": 0.41225531914893615, + "loss": 1.1766674518585205, + "loss_ce": 0.007233812008053064, + "loss_iou": 0.48046875, + "loss_num": 0.041748046875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 243069276, + "step": 3633 + }, + { + "epoch": 0.41236879432624113, + "grad_norm": 32.28243637084961, + "learning_rate": 5e-05, + "loss": 1.0643, + "num_input_tokens_seen": 243135916, + "step": 3634 + }, + { + "epoch": 0.41236879432624113, + "loss": 1.107203722000122, + "loss_ce": 0.005641252268105745, + "loss_iou": 0.482421875, + "loss_num": 0.02783203125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 243135916, + "step": 3634 + }, + { + "epoch": 0.4124822695035461, + "grad_norm": 32.018096923828125, + "learning_rate": 5e-05, + "loss": 1.1549, + "num_input_tokens_seen": 243203896, + "step": 3635 + }, + { + "epoch": 0.4124822695035461, + "loss": 1.134103536605835, + "loss_ce": 0.004220707342028618, + "loss_iou": 0.4921875, + "loss_num": 0.02880859375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 243203896, + "step": 3635 + }, + { + "epoch": 0.4125957446808511, + "grad_norm": 157.8903045654297, + "learning_rate": 5e-05, + "loss": 1.3201, + "num_input_tokens_seen": 243270616, + "step": 3636 + }, + { + "epoch": 0.4125957446808511, + "loss": 1.3260868787765503, + "loss_ce": 0.009192319586873055, + "loss_iou": 0.55078125, + "loss_num": 0.04345703125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 243270616, + "step": 3636 + }, + { + "epoch": 0.412709219858156, + "grad_norm": 28.228044509887695, + "learning_rate": 5e-05, + "loss": 1.2808, + "num_input_tokens_seen": 243337904, + "step": 3637 + }, + { + "epoch": 0.412709219858156, + "loss": 1.2780518531799316, + "loss_ce": 0.0031495431903749704, + "loss_iou": 0.57421875, + "loss_num": 0.025146484375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 243337904, + "step": 3637 + }, + { + "epoch": 0.412822695035461, + "grad_norm": 14.198616027832031, + "learning_rate": 5e-05, + "loss": 1.0955, + "num_input_tokens_seen": 243404368, + "step": 3638 + }, + { + "epoch": 0.412822695035461, + "loss": 0.9635648131370544, + "loss_ce": 0.003848088439553976, + "loss_iou": 0.4140625, + "loss_num": 0.0267333984375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 243404368, + "step": 3638 + }, + { + "epoch": 0.41293617021276596, + "grad_norm": 27.82749366760254, + "learning_rate": 5e-05, + "loss": 0.9441, + "num_input_tokens_seen": 243469968, + "step": 3639 + }, + { + "epoch": 0.41293617021276596, + "loss": 1.020429015159607, + "loss_ce": 0.001874394714832306, + "loss_iou": 0.427734375, + "loss_num": 0.032470703125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 243469968, + "step": 3639 + }, + { + "epoch": 0.41304964539007094, + "grad_norm": 28.517425537109375, + "learning_rate": 5e-05, + "loss": 1.3388, + "num_input_tokens_seen": 243536572, + "step": 3640 + }, + { + "epoch": 0.41304964539007094, + "loss": 1.3572267293930054, + "loss_ce": 0.0041993772611021996, + "loss_iou": 0.55078125, + "loss_num": 0.049560546875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 243536572, + "step": 3640 + }, + { + "epoch": 0.41316312056737586, + "grad_norm": 43.658714294433594, + "learning_rate": 5e-05, + "loss": 1.2259, + "num_input_tokens_seen": 243603928, + "step": 3641 + }, + { + "epoch": 0.41316312056737586, + "loss": 1.317453145980835, + "loss_ce": 0.004953077994287014, + "loss_iou": 0.52734375, + "loss_num": 0.052001953125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 243603928, + "step": 3641 + }, + { + "epoch": 0.41327659574468084, + "grad_norm": 31.165313720703125, + "learning_rate": 5e-05, + "loss": 1.4589, + "num_input_tokens_seen": 243670408, + "step": 3642 + }, + { + "epoch": 0.41327659574468084, + "loss": 1.5232696533203125, + "loss_ce": 0.004715018905699253, + "loss_iou": 0.62109375, + "loss_num": 0.054443359375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 243670408, + "step": 3642 + }, + { + "epoch": 0.4133900709219858, + "grad_norm": 25.071325302124023, + "learning_rate": 5e-05, + "loss": 1.0695, + "num_input_tokens_seen": 243737724, + "step": 3643 + }, + { + "epoch": 0.4133900709219858, + "loss": 1.1847786903381348, + "loss_ce": 0.008753323927521706, + "loss_iou": 0.4375, + "loss_num": 0.06005859375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 243737724, + "step": 3643 + }, + { + "epoch": 0.4135035460992908, + "grad_norm": 17.334178924560547, + "learning_rate": 5e-05, + "loss": 1.3911, + "num_input_tokens_seen": 243804588, + "step": 3644 + }, + { + "epoch": 0.4135035460992908, + "loss": 1.14728844165802, + "loss_ce": 0.003916925750672817, + "loss_iou": 0.45703125, + "loss_num": 0.04541015625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 243804588, + "step": 3644 + }, + { + "epoch": 0.4136170212765957, + "grad_norm": 20.748489379882812, + "learning_rate": 5e-05, + "loss": 1.0896, + "num_input_tokens_seen": 243870988, + "step": 3645 + }, + { + "epoch": 0.4136170212765957, + "loss": 1.1953840255737305, + "loss_ce": 0.008860467001795769, + "loss_iou": 0.4921875, + "loss_num": 0.040283203125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 243870988, + "step": 3645 + }, + { + "epoch": 0.4137304964539007, + "grad_norm": 19.422290802001953, + "learning_rate": 5e-05, + "loss": 1.1819, + "num_input_tokens_seen": 243937968, + "step": 3646 + }, + { + "epoch": 0.4137304964539007, + "loss": 1.020146369934082, + "loss_ce": 0.007451056968420744, + "loss_iou": 0.40234375, + "loss_num": 0.041259765625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 243937968, + "step": 3646 + }, + { + "epoch": 0.4138439716312057, + "grad_norm": 25.406845092773438, + "learning_rate": 5e-05, + "loss": 1.0826, + "num_input_tokens_seen": 244004960, + "step": 3647 + }, + { + "epoch": 0.4138439716312057, + "loss": 1.2003458738327026, + "loss_ce": 0.006498188246041536, + "loss_iou": 0.494140625, + "loss_num": 0.041015625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 244004960, + "step": 3647 + }, + { + "epoch": 0.41395744680851065, + "grad_norm": 32.91703796386719, + "learning_rate": 5e-05, + "loss": 1.1126, + "num_input_tokens_seen": 244071860, + "step": 3648 + }, + { + "epoch": 0.41395744680851065, + "loss": 1.2340530157089233, + "loss_ce": 0.007978743873536587, + "loss_iou": 0.515625, + "loss_num": 0.039306640625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 244071860, + "step": 3648 + }, + { + "epoch": 0.4140709219858156, + "grad_norm": 27.350929260253906, + "learning_rate": 5e-05, + "loss": 1.288, + "num_input_tokens_seen": 244138212, + "step": 3649 + }, + { + "epoch": 0.4140709219858156, + "loss": 1.1313059329986572, + "loss_ce": 0.00825900211930275, + "loss_iou": 0.435546875, + "loss_num": 0.050537109375, + "loss_xval": 1.125, + "num_input_tokens_seen": 244138212, + "step": 3649 + }, + { + "epoch": 0.41418439716312055, + "grad_norm": 24.340938568115234, + "learning_rate": 5e-05, + "loss": 1.2514, + "num_input_tokens_seen": 244204648, + "step": 3650 + }, + { + "epoch": 0.41418439716312055, + "loss": 1.2058484554290771, + "loss_ce": 0.005653127562254667, + "loss_iou": 0.482421875, + "loss_num": 0.047119140625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 244204648, + "step": 3650 + }, + { + "epoch": 0.41429787234042553, + "grad_norm": 34.17368698120117, + "learning_rate": 5e-05, + "loss": 1.3243, + "num_input_tokens_seen": 244271076, + "step": 3651 + }, + { + "epoch": 0.41429787234042553, + "loss": 1.2673754692077637, + "loss_ce": 0.0046800910495221615, + "loss_iou": 0.5, + "loss_num": 0.05322265625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 244271076, + "step": 3651 + }, + { + "epoch": 0.4144113475177305, + "grad_norm": 25.994918823242188, + "learning_rate": 5e-05, + "loss": 1.4926, + "num_input_tokens_seen": 244338320, + "step": 3652 + }, + { + "epoch": 0.4144113475177305, + "loss": 1.4090769290924072, + "loss_ce": 0.005756522994488478, + "loss_iou": 0.59375, + "loss_num": 0.04345703125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 244338320, + "step": 3652 + }, + { + "epoch": 0.4145248226950355, + "grad_norm": 30.63885498046875, + "learning_rate": 5e-05, + "loss": 1.1774, + "num_input_tokens_seen": 244405652, + "step": 3653 + }, + { + "epoch": 0.4145248226950355, + "loss": 1.2083473205566406, + "loss_ce": 0.003757443279027939, + "loss_iou": 0.4921875, + "loss_num": 0.043701171875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 244405652, + "step": 3653 + }, + { + "epoch": 0.4146382978723404, + "grad_norm": 28.253400802612305, + "learning_rate": 5e-05, + "loss": 1.2684, + "num_input_tokens_seen": 244471992, + "step": 3654 + }, + { + "epoch": 0.4146382978723404, + "loss": 1.2553608417510986, + "loss_ce": 0.002675340510904789, + "loss_iou": 0.515625, + "loss_num": 0.044677734375, + "loss_xval": 1.25, + "num_input_tokens_seen": 244471992, + "step": 3654 + }, + { + "epoch": 0.4147517730496454, + "grad_norm": 31.912004470825195, + "learning_rate": 5e-05, + "loss": 1.1344, + "num_input_tokens_seen": 244538744, + "step": 3655 + }, + { + "epoch": 0.4147517730496454, + "loss": 1.164458155632019, + "loss_ce": 0.005278502590954304, + "loss_iou": 0.474609375, + "loss_num": 0.042236328125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 244538744, + "step": 3655 + }, + { + "epoch": 0.41486524822695037, + "grad_norm": 30.353652954101562, + "learning_rate": 5e-05, + "loss": 1.4507, + "num_input_tokens_seen": 244604740, + "step": 3656 + }, + { + "epoch": 0.41486524822695037, + "loss": 1.3939779996871948, + "loss_ce": 0.0023764977231621742, + "loss_iou": 0.55859375, + "loss_num": 0.0546875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 244604740, + "step": 3656 + }, + { + "epoch": 0.41497872340425535, + "grad_norm": 13.68918228149414, + "learning_rate": 5e-05, + "loss": 1.3252, + "num_input_tokens_seen": 244673124, + "step": 3657 + }, + { + "epoch": 0.41497872340425535, + "loss": 1.374145746231079, + "loss_ce": 0.009399697184562683, + "loss_iou": 0.494140625, + "loss_num": 0.07470703125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 244673124, + "step": 3657 + }, + { + "epoch": 0.41509219858156027, + "grad_norm": 19.201553344726562, + "learning_rate": 5e-05, + "loss": 1.2547, + "num_input_tokens_seen": 244739984, + "step": 3658 + }, + { + "epoch": 0.41509219858156027, + "loss": 1.186464548110962, + "loss_ce": 0.008241779170930386, + "loss_iou": 0.498046875, + "loss_num": 0.036865234375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 244739984, + "step": 3658 + }, + { + "epoch": 0.41520567375886525, + "grad_norm": 32.16255187988281, + "learning_rate": 5e-05, + "loss": 1.3267, + "num_input_tokens_seen": 244806344, + "step": 3659 + }, + { + "epoch": 0.41520567375886525, + "loss": 1.5538208484649658, + "loss_ce": 0.006945927161723375, + "loss_iou": 0.61328125, + "loss_num": 0.064453125, + "loss_xval": 1.546875, + "num_input_tokens_seen": 244806344, + "step": 3659 + }, + { + "epoch": 0.4153191489361702, + "grad_norm": 43.32359313964844, + "learning_rate": 5e-05, + "loss": 1.2957, + "num_input_tokens_seen": 244872640, + "step": 3660 + }, + { + "epoch": 0.4153191489361702, + "loss": 1.1320788860321045, + "loss_ce": 0.007139980792999268, + "loss_iou": 0.4296875, + "loss_num": 0.052978515625, + "loss_xval": 1.125, + "num_input_tokens_seen": 244872640, + "step": 3660 + }, + { + "epoch": 0.4154326241134752, + "grad_norm": 39.81546401977539, + "learning_rate": 5e-05, + "loss": 1.2835, + "num_input_tokens_seen": 244939876, + "step": 3661 + }, + { + "epoch": 0.4154326241134752, + "loss": 1.3685224056243896, + "loss_ce": 0.004264629911631346, + "loss_iou": 0.58984375, + "loss_num": 0.0380859375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 244939876, + "step": 3661 + }, + { + "epoch": 0.4155460992907801, + "grad_norm": 29.480525970458984, + "learning_rate": 5e-05, + "loss": 1.4794, + "num_input_tokens_seen": 245006992, + "step": 3662 + }, + { + "epoch": 0.4155460992907801, + "loss": 1.400665283203125, + "loss_ce": 0.003692608093842864, + "loss_iou": 0.5625, + "loss_num": 0.05517578125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 245006992, + "step": 3662 + }, + { + "epoch": 0.4156595744680851, + "grad_norm": 30.717987060546875, + "learning_rate": 5e-05, + "loss": 1.1138, + "num_input_tokens_seen": 245074316, + "step": 3663 + }, + { + "epoch": 0.4156595744680851, + "loss": 1.1026041507720947, + "loss_ce": 0.00592450937256217, + "loss_iou": 0.4453125, + "loss_num": 0.041259765625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 245074316, + "step": 3663 + }, + { + "epoch": 0.4157730496453901, + "grad_norm": 65.97978210449219, + "learning_rate": 5e-05, + "loss": 1.2858, + "num_input_tokens_seen": 245141304, + "step": 3664 + }, + { + "epoch": 0.4157730496453901, + "loss": 1.4257612228393555, + "loss_ce": 0.0043744854629039764, + "loss_iou": 0.625, + "loss_num": 0.034912109375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 245141304, + "step": 3664 + }, + { + "epoch": 0.41588652482269506, + "grad_norm": 16.621274948120117, + "learning_rate": 5e-05, + "loss": 0.886, + "num_input_tokens_seen": 245208276, + "step": 3665 + }, + { + "epoch": 0.41588652482269506, + "loss": 0.9014815092086792, + "loss_ce": 0.005973692052066326, + "loss_iou": 0.37890625, + "loss_num": 0.0277099609375, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 245208276, + "step": 3665 + }, + { + "epoch": 0.416, + "grad_norm": 12.28150463104248, + "learning_rate": 5e-05, + "loss": 1.1736, + "num_input_tokens_seen": 245274772, + "step": 3666 + }, + { + "epoch": 0.416, + "loss": 1.4303898811340332, + "loss_ce": 0.007050022482872009, + "loss_iou": 0.52734375, + "loss_num": 0.07470703125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 245274772, + "step": 3666 + }, + { + "epoch": 0.41611347517730496, + "grad_norm": 15.692136764526367, + "learning_rate": 5e-05, + "loss": 1.0678, + "num_input_tokens_seen": 245341632, + "step": 3667 + }, + { + "epoch": 0.41611347517730496, + "loss": 1.0075621604919434, + "loss_ce": 0.006097308825701475, + "loss_iou": 0.419921875, + "loss_num": 0.031982421875, + "loss_xval": 1.0, + "num_input_tokens_seen": 245341632, + "step": 3667 + }, + { + "epoch": 0.41622695035460994, + "grad_norm": 49.50270462036133, + "learning_rate": 5e-05, + "loss": 1.214, + "num_input_tokens_seen": 245409048, + "step": 3668 + }, + { + "epoch": 0.41622695035460994, + "loss": 1.1183782815933228, + "loss_ce": 0.007538411300629377, + "loss_iou": 0.4921875, + "loss_num": 0.025146484375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 245409048, + "step": 3668 + }, + { + "epoch": 0.4163404255319149, + "grad_norm": 24.755212783813477, + "learning_rate": 5e-05, + "loss": 1.0913, + "num_input_tokens_seen": 245474512, + "step": 3669 + }, + { + "epoch": 0.4163404255319149, + "loss": 1.1907881498336792, + "loss_ce": 0.006828173995018005, + "loss_iou": 0.46875, + "loss_num": 0.049072265625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 245474512, + "step": 3669 + }, + { + "epoch": 0.41645390070921984, + "grad_norm": 22.014148712158203, + "learning_rate": 5e-05, + "loss": 1.2011, + "num_input_tokens_seen": 245541608, + "step": 3670 + }, + { + "epoch": 0.41645390070921984, + "loss": 1.2228590250015259, + "loss_ce": 0.008015299215912819, + "loss_iou": 0.48046875, + "loss_num": 0.051025390625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 245541608, + "step": 3670 + }, + { + "epoch": 0.4165673758865248, + "grad_norm": 31.98136329650879, + "learning_rate": 5e-05, + "loss": 1.1987, + "num_input_tokens_seen": 245608568, + "step": 3671 + }, + { + "epoch": 0.4165673758865248, + "loss": 1.220295786857605, + "loss_ce": 0.006916837301105261, + "loss_iou": 0.486328125, + "loss_num": 0.04833984375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 245608568, + "step": 3671 + }, + { + "epoch": 0.4166808510638298, + "grad_norm": 32.36111831665039, + "learning_rate": 5e-05, + "loss": 1.2808, + "num_input_tokens_seen": 245675344, + "step": 3672 + }, + { + "epoch": 0.4166808510638298, + "loss": 1.4122556447982788, + "loss_ce": 0.006494004279375076, + "loss_iou": 0.578125, + "loss_num": 0.05029296875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 245675344, + "step": 3672 + }, + { + "epoch": 0.41679432624113477, + "grad_norm": 23.854248046875, + "learning_rate": 5e-05, + "loss": 1.3293, + "num_input_tokens_seen": 245742852, + "step": 3673 + }, + { + "epoch": 0.41679432624113477, + "loss": 1.1884280443191528, + "loss_ce": 0.0062992083840072155, + "loss_iou": 0.51953125, + "loss_num": 0.0286865234375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 245742852, + "step": 3673 + }, + { + "epoch": 0.4169078014184397, + "grad_norm": 36.357330322265625, + "learning_rate": 5e-05, + "loss": 1.3301, + "num_input_tokens_seen": 245809852, + "step": 3674 + }, + { + "epoch": 0.4169078014184397, + "loss": 1.3729311227798462, + "loss_ce": 0.010626411065459251, + "loss_iou": 0.5390625, + "loss_num": 0.056884765625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 245809852, + "step": 3674 + }, + { + "epoch": 0.41702127659574467, + "grad_norm": 26.800756454467773, + "learning_rate": 5e-05, + "loss": 1.6371, + "num_input_tokens_seen": 245876796, + "step": 3675 + }, + { + "epoch": 0.41702127659574467, + "loss": 1.6856975555419922, + "loss_ce": 0.009427933022379875, + "loss_iou": 0.65234375, + "loss_num": 0.0751953125, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 245876796, + "step": 3675 + }, + { + "epoch": 0.41713475177304965, + "grad_norm": 14.12352466583252, + "learning_rate": 5e-05, + "loss": 1.059, + "num_input_tokens_seen": 245944296, + "step": 3676 + }, + { + "epoch": 0.41713475177304965, + "loss": 1.172513484954834, + "loss_ce": 0.010404079221189022, + "loss_iou": 0.46875, + "loss_num": 0.045166015625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 245944296, + "step": 3676 + }, + { + "epoch": 0.41724822695035463, + "grad_norm": 34.31031036376953, + "learning_rate": 5e-05, + "loss": 1.1018, + "num_input_tokens_seen": 246010804, + "step": 3677 + }, + { + "epoch": 0.41724822695035463, + "loss": 0.9866012334823608, + "loss_ce": 0.005644220393151045, + "loss_iou": 0.41796875, + "loss_num": 0.029052734375, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 246010804, + "step": 3677 + }, + { + "epoch": 0.41736170212765955, + "grad_norm": 15.055088996887207, + "learning_rate": 5e-05, + "loss": 1.274, + "num_input_tokens_seen": 246078584, + "step": 3678 + }, + { + "epoch": 0.41736170212765955, + "loss": 1.2344210147857666, + "loss_ce": 0.006393706426024437, + "loss_iou": 0.478515625, + "loss_num": 0.0537109375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 246078584, + "step": 3678 + }, + { + "epoch": 0.41747517730496453, + "grad_norm": 32.024497985839844, + "learning_rate": 5e-05, + "loss": 1.2603, + "num_input_tokens_seen": 246145556, + "step": 3679 + }, + { + "epoch": 0.41747517730496453, + "loss": 1.336264967918396, + "loss_ce": 0.007407572586089373, + "loss_iou": 0.474609375, + "loss_num": 0.0751953125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 246145556, + "step": 3679 + }, + { + "epoch": 0.4175886524822695, + "grad_norm": 25.320999145507812, + "learning_rate": 5e-05, + "loss": 1.3315, + "num_input_tokens_seen": 246212212, + "step": 3680 + }, + { + "epoch": 0.4175886524822695, + "loss": 1.4243723154067993, + "loss_ce": 0.007380109280347824, + "loss_iou": 0.5390625, + "loss_num": 0.06689453125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 246212212, + "step": 3680 + }, + { + "epoch": 0.4177021276595745, + "grad_norm": 41.366668701171875, + "learning_rate": 5e-05, + "loss": 1.1345, + "num_input_tokens_seen": 246279052, + "step": 3681 + }, + { + "epoch": 0.4177021276595745, + "loss": 1.162351369857788, + "loss_ce": 0.004636482335627079, + "loss_iou": 0.498046875, + "loss_num": 0.03271484375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 246279052, + "step": 3681 + }, + { + "epoch": 0.4178156028368794, + "grad_norm": 24.004732131958008, + "learning_rate": 5e-05, + "loss": 1.3667, + "num_input_tokens_seen": 246346360, + "step": 3682 + }, + { + "epoch": 0.4178156028368794, + "loss": 1.5055290460586548, + "loss_ce": 0.006017347797751427, + "loss_iou": 0.60546875, + "loss_num": 0.05810546875, + "loss_xval": 1.5, + "num_input_tokens_seen": 246346360, + "step": 3682 + }, + { + "epoch": 0.4179290780141844, + "grad_norm": 19.81182098388672, + "learning_rate": 5e-05, + "loss": 1.257, + "num_input_tokens_seen": 246412868, + "step": 3683 + }, + { + "epoch": 0.4179290780141844, + "loss": 1.3250970840454102, + "loss_ce": 0.006249452009797096, + "loss_iou": 0.546875, + "loss_num": 0.0458984375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 246412868, + "step": 3683 + }, + { + "epoch": 0.41804255319148936, + "grad_norm": 19.607702255249023, + "learning_rate": 5e-05, + "loss": 1.3338, + "num_input_tokens_seen": 246480076, + "step": 3684 + }, + { + "epoch": 0.41804255319148936, + "loss": 1.2589097023010254, + "loss_ce": 0.008177278563380241, + "loss_iou": 0.455078125, + "loss_num": 0.068359375, + "loss_xval": 1.25, + "num_input_tokens_seen": 246480076, + "step": 3684 + }, + { + "epoch": 0.41815602836879434, + "grad_norm": 20.704694747924805, + "learning_rate": 5e-05, + "loss": 1.3156, + "num_input_tokens_seen": 246547760, + "step": 3685 + }, + { + "epoch": 0.41815602836879434, + "loss": 1.5950441360473633, + "loss_ce": 0.007153478916734457, + "loss_iou": 0.59765625, + "loss_num": 0.07763671875, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 246547760, + "step": 3685 + }, + { + "epoch": 0.41826950354609926, + "grad_norm": 27.760042190551758, + "learning_rate": 5e-05, + "loss": 1.3979, + "num_input_tokens_seen": 246614652, + "step": 3686 + }, + { + "epoch": 0.41826950354609926, + "loss": 1.3714252710342407, + "loss_ce": 0.007167463190853596, + "loss_iou": 0.50390625, + "loss_num": 0.0712890625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 246614652, + "step": 3686 + }, + { + "epoch": 0.41838297872340424, + "grad_norm": 29.560117721557617, + "learning_rate": 5e-05, + "loss": 1.1741, + "num_input_tokens_seen": 246681392, + "step": 3687 + }, + { + "epoch": 0.41838297872340424, + "loss": 1.3271846771240234, + "loss_ce": 0.005407391116023064, + "loss_iou": 0.51953125, + "loss_num": 0.056640625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 246681392, + "step": 3687 + }, + { + "epoch": 0.4184964539007092, + "grad_norm": 32.6290168762207, + "learning_rate": 5e-05, + "loss": 1.5199, + "num_input_tokens_seen": 246748576, + "step": 3688 + }, + { + "epoch": 0.4184964539007092, + "loss": 1.7188044786453247, + "loss_ce": 0.002984155435115099, + "loss_iou": 0.6640625, + "loss_num": 0.0771484375, + "loss_xval": 1.71875, + "num_input_tokens_seen": 246748576, + "step": 3688 + }, + { + "epoch": 0.4186099290780142, + "grad_norm": 26.706544876098633, + "learning_rate": 5e-05, + "loss": 1.1278, + "num_input_tokens_seen": 246816032, + "step": 3689 + }, + { + "epoch": 0.4186099290780142, + "loss": 1.0905941724777222, + "loss_ce": 0.005633210763335228, + "loss_iou": 0.4765625, + "loss_num": 0.0260009765625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 246816032, + "step": 3689 + }, + { + "epoch": 0.4187234042553192, + "grad_norm": 29.66267204284668, + "learning_rate": 5e-05, + "loss": 1.4208, + "num_input_tokens_seen": 246881932, + "step": 3690 + }, + { + "epoch": 0.4187234042553192, + "loss": 1.5751557350158691, + "loss_ce": 0.003866620361804962, + "loss_iou": 0.6171875, + "loss_num": 0.0673828125, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 246881932, + "step": 3690 + }, + { + "epoch": 0.4188368794326241, + "grad_norm": 40.6899528503418, + "learning_rate": 5e-05, + "loss": 1.2896, + "num_input_tokens_seen": 246949028, + "step": 3691 + }, + { + "epoch": 0.4188368794326241, + "loss": 1.3940625190734863, + "loss_ce": 0.005390603095293045, + "loss_iou": 0.58984375, + "loss_num": 0.04150390625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 246949028, + "step": 3691 + }, + { + "epoch": 0.4189503546099291, + "grad_norm": 34.614009857177734, + "learning_rate": 5e-05, + "loss": 1.5758, + "num_input_tokens_seen": 247015776, + "step": 3692 + }, + { + "epoch": 0.4189503546099291, + "loss": 1.5896375179290771, + "loss_ce": 0.005653250962495804, + "loss_iou": 0.6640625, + "loss_num": 0.05078125, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 247015776, + "step": 3692 + }, + { + "epoch": 0.41906382978723405, + "grad_norm": 34.36503219604492, + "learning_rate": 5e-05, + "loss": 1.2799, + "num_input_tokens_seen": 247082496, + "step": 3693 + }, + { + "epoch": 0.41906382978723405, + "loss": 1.1613237857818604, + "loss_ce": 0.00605030357837677, + "loss_iou": 0.484375, + "loss_num": 0.03759765625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 247082496, + "step": 3693 + }, + { + "epoch": 0.41917730496453903, + "grad_norm": 26.7771053314209, + "learning_rate": 5e-05, + "loss": 1.4016, + "num_input_tokens_seen": 247148896, + "step": 3694 + }, + { + "epoch": 0.41917730496453903, + "loss": 1.5495752096176147, + "loss_ce": 0.009536245837807655, + "loss_iou": 0.62890625, + "loss_num": 0.05615234375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 247148896, + "step": 3694 + }, + { + "epoch": 0.41929078014184396, + "grad_norm": 378.200927734375, + "learning_rate": 5e-05, + "loss": 1.0936, + "num_input_tokens_seen": 247214700, + "step": 3695 + }, + { + "epoch": 0.41929078014184396, + "loss": 1.1184715032577515, + "loss_ce": 0.0071434397250413895, + "loss_iou": 0.466796875, + "loss_num": 0.03564453125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 247214700, + "step": 3695 + }, + { + "epoch": 0.41940425531914893, + "grad_norm": 20.727937698364258, + "learning_rate": 5e-05, + "loss": 1.0941, + "num_input_tokens_seen": 247282268, + "step": 3696 + }, + { + "epoch": 0.41940425531914893, + "loss": 1.0941691398620605, + "loss_ce": 0.003837136086076498, + "loss_iou": 0.419921875, + "loss_num": 0.050537109375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 247282268, + "step": 3696 + }, + { + "epoch": 0.4195177304964539, + "grad_norm": 56.03878402709961, + "learning_rate": 5e-05, + "loss": 1.1915, + "num_input_tokens_seen": 247349112, + "step": 3697 + }, + { + "epoch": 0.4195177304964539, + "loss": 1.2301981449127197, + "loss_ce": 0.004123893566429615, + "loss_iou": 0.498046875, + "loss_num": 0.045654296875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 247349112, + "step": 3697 + }, + { + "epoch": 0.4196312056737589, + "grad_norm": 22.534671783447266, + "learning_rate": 5e-05, + "loss": 1.0775, + "num_input_tokens_seen": 247416132, + "step": 3698 + }, + { + "epoch": 0.4196312056737589, + "loss": 1.1417169570922852, + "loss_ce": 0.008599262684583664, + "loss_iou": 0.44921875, + "loss_num": 0.046630859375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 247416132, + "step": 3698 + }, + { + "epoch": 0.4197446808510638, + "grad_norm": 27.206117630004883, + "learning_rate": 5e-05, + "loss": 1.4295, + "num_input_tokens_seen": 247482508, + "step": 3699 + }, + { + "epoch": 0.4197446808510638, + "loss": 1.3274562358856201, + "loss_ce": 0.007143697235733271, + "loss_iou": 0.53125, + "loss_num": 0.052001953125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 247482508, + "step": 3699 + }, + { + "epoch": 0.4198581560283688, + "grad_norm": 45.544395446777344, + "learning_rate": 5e-05, + "loss": 1.2672, + "num_input_tokens_seen": 247549844, + "step": 3700 + }, + { + "epoch": 0.4198581560283688, + "loss": 1.2367931604385376, + "loss_ce": 0.005347882397472858, + "loss_iou": 0.48828125, + "loss_num": 0.05126953125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 247549844, + "step": 3700 + }, + { + "epoch": 0.41997163120567377, + "grad_norm": 38.71300506591797, + "learning_rate": 5e-05, + "loss": 1.5579, + "num_input_tokens_seen": 247616056, + "step": 3701 + }, + { + "epoch": 0.41997163120567377, + "loss": 1.3984490633010864, + "loss_ce": 0.004406072199344635, + "loss_iou": 0.58203125, + "loss_num": 0.0458984375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 247616056, + "step": 3701 + }, + { + "epoch": 0.42008510638297875, + "grad_norm": 27.87663459777832, + "learning_rate": 5e-05, + "loss": 1.33, + "num_input_tokens_seen": 247682344, + "step": 3702 + }, + { + "epoch": 0.42008510638297875, + "loss": 1.3287599086761475, + "loss_ce": 0.009423972107470036, + "loss_iou": 0.5625, + "loss_num": 0.038330078125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 247682344, + "step": 3702 + }, + { + "epoch": 0.42019858156028367, + "grad_norm": 31.250404357910156, + "learning_rate": 5e-05, + "loss": 1.2849, + "num_input_tokens_seen": 247749112, + "step": 3703 + }, + { + "epoch": 0.42019858156028367, + "loss": 1.258285641670227, + "loss_ce": 0.006820746697485447, + "loss_iou": 0.515625, + "loss_num": 0.043701171875, + "loss_xval": 1.25, + "num_input_tokens_seen": 247749112, + "step": 3703 + }, + { + "epoch": 0.42031205673758865, + "grad_norm": 32.18536376953125, + "learning_rate": 5e-05, + "loss": 1.4112, + "num_input_tokens_seen": 247816120, + "step": 3704 + }, + { + "epoch": 0.42031205673758865, + "loss": 1.3348851203918457, + "loss_ce": 0.009201491251587868, + "loss_iou": 0.5546875, + "loss_num": 0.0439453125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 247816120, + "step": 3704 + }, + { + "epoch": 0.4204255319148936, + "grad_norm": 27.582469940185547, + "learning_rate": 5e-05, + "loss": 1.0897, + "num_input_tokens_seen": 247882592, + "step": 3705 + }, + { + "epoch": 0.4204255319148936, + "loss": 0.8913125991821289, + "loss_ce": 0.005936594679951668, + "loss_iou": 0.365234375, + "loss_num": 0.0311279296875, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 247882592, + "step": 3705 + }, + { + "epoch": 0.4205390070921986, + "grad_norm": 21.29157066345215, + "learning_rate": 5e-05, + "loss": 1.0129, + "num_input_tokens_seen": 247948952, + "step": 3706 + }, + { + "epoch": 0.4205390070921986, + "loss": 0.9784420728683472, + "loss_ce": 0.007220185827463865, + "loss_iou": 0.40625, + "loss_num": 0.03125, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 247948952, + "step": 3706 + }, + { + "epoch": 0.4206524822695035, + "grad_norm": 50.76822280883789, + "learning_rate": 5e-05, + "loss": 1.3608, + "num_input_tokens_seen": 248015772, + "step": 3707 + }, + { + "epoch": 0.4206524822695035, + "loss": 1.351981520652771, + "loss_ce": 0.003348708851262927, + "loss_iou": 0.58203125, + "loss_num": 0.03759765625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 248015772, + "step": 3707 + }, + { + "epoch": 0.4207659574468085, + "grad_norm": 21.77083969116211, + "learning_rate": 5e-05, + "loss": 1.3906, + "num_input_tokens_seen": 248082396, + "step": 3708 + }, + { + "epoch": 0.4207659574468085, + "loss": 1.4521348476409912, + "loss_ce": 0.0043809618800878525, + "loss_iou": 0.5625, + "loss_num": 0.064453125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 248082396, + "step": 3708 + }, + { + "epoch": 0.4208794326241135, + "grad_norm": 147.21298217773438, + "learning_rate": 5e-05, + "loss": 1.2183, + "num_input_tokens_seen": 248150832, + "step": 3709 + }, + { + "epoch": 0.4208794326241135, + "loss": 1.1581742763519287, + "loss_ce": 0.01022501103579998, + "loss_iou": 0.482421875, + "loss_num": 0.036865234375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 248150832, + "step": 3709 + }, + { + "epoch": 0.42099290780141846, + "grad_norm": 33.564170837402344, + "learning_rate": 5e-05, + "loss": 1.3662, + "num_input_tokens_seen": 248218296, + "step": 3710 + }, + { + "epoch": 0.42099290780141846, + "loss": 1.4451160430908203, + "loss_ce": 0.001756685902364552, + "loss_iou": 0.55078125, + "loss_num": 0.068359375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 248218296, + "step": 3710 + }, + { + "epoch": 0.4211063829787234, + "grad_norm": 29.19655990600586, + "learning_rate": 5e-05, + "loss": 1.4733, + "num_input_tokens_seen": 248285168, + "step": 3711 + }, + { + "epoch": 0.4211063829787234, + "loss": 1.330430507659912, + "loss_ce": 0.00425877096131444, + "loss_iou": 0.54296875, + "loss_num": 0.04736328125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 248285168, + "step": 3711 + }, + { + "epoch": 0.42121985815602836, + "grad_norm": 36.74614715576172, + "learning_rate": 5e-05, + "loss": 1.3727, + "num_input_tokens_seen": 248351736, + "step": 3712 + }, + { + "epoch": 0.42121985815602836, + "loss": 1.4068217277526855, + "loss_ce": 0.006431143265217543, + "loss_iou": 0.578125, + "loss_num": 0.049560546875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 248351736, + "step": 3712 + }, + { + "epoch": 0.42133333333333334, + "grad_norm": 27.862791061401367, + "learning_rate": 5e-05, + "loss": 1.4354, + "num_input_tokens_seen": 248418572, + "step": 3713 + }, + { + "epoch": 0.42133333333333334, + "loss": 1.3692240715026855, + "loss_ce": 0.0030131309758871794, + "loss_iou": 0.5546875, + "loss_num": 0.051513671875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 248418572, + "step": 3713 + }, + { + "epoch": 0.4214468085106383, + "grad_norm": 19.75771141052246, + "learning_rate": 5e-05, + "loss": 1.129, + "num_input_tokens_seen": 248484480, + "step": 3714 + }, + { + "epoch": 0.4214468085106383, + "loss": 1.1102503538131714, + "loss_ce": 0.007467165123671293, + "loss_iou": 0.4453125, + "loss_num": 0.04296875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 248484480, + "step": 3714 + }, + { + "epoch": 0.42156028368794324, + "grad_norm": 101.50293731689453, + "learning_rate": 5e-05, + "loss": 1.3708, + "num_input_tokens_seen": 248551000, + "step": 3715 + }, + { + "epoch": 0.42156028368794324, + "loss": 1.4374594688415527, + "loss_ce": 0.004353970289230347, + "loss_iou": 0.5859375, + "loss_num": 0.052001953125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 248551000, + "step": 3715 + }, + { + "epoch": 0.4216737588652482, + "grad_norm": 26.8526668548584, + "learning_rate": 5e-05, + "loss": 1.2691, + "num_input_tokens_seen": 248616912, + "step": 3716 + }, + { + "epoch": 0.4216737588652482, + "loss": 1.183821439743042, + "loss_ce": 0.0036456361413002014, + "loss_iou": 0.5, + "loss_num": 0.03564453125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 248616912, + "step": 3716 + }, + { + "epoch": 0.4217872340425532, + "grad_norm": 26.018108367919922, + "learning_rate": 5e-05, + "loss": 1.2598, + "num_input_tokens_seen": 248684696, + "step": 3717 + }, + { + "epoch": 0.4217872340425532, + "loss": 1.4133245944976807, + "loss_ce": 0.007074578665196896, + "loss_iou": 0.546875, + "loss_num": 0.06298828125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 248684696, + "step": 3717 + }, + { + "epoch": 0.42190070921985817, + "grad_norm": 41.48230743408203, + "learning_rate": 5e-05, + "loss": 1.1191, + "num_input_tokens_seen": 248751256, + "step": 3718 + }, + { + "epoch": 0.42190070921985817, + "loss": 1.1741515398025513, + "loss_ce": 0.0066711390390992165, + "loss_iou": 0.51171875, + "loss_num": 0.0284423828125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 248751256, + "step": 3718 + }, + { + "epoch": 0.4220141843971631, + "grad_norm": 25.53894805908203, + "learning_rate": 5e-05, + "loss": 1.3655, + "num_input_tokens_seen": 248818512, + "step": 3719 + }, + { + "epoch": 0.4220141843971631, + "loss": 1.2975339889526367, + "loss_ce": 0.008715675212442875, + "loss_iou": 0.5390625, + "loss_num": 0.042236328125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 248818512, + "step": 3719 + }, + { + "epoch": 0.4221276595744681, + "grad_norm": 16.37216567993164, + "learning_rate": 5e-05, + "loss": 1.1478, + "num_input_tokens_seen": 248885756, + "step": 3720 + }, + { + "epoch": 0.4221276595744681, + "loss": 1.2473869323730469, + "loss_ce": 0.0042229099199175835, + "loss_iou": 0.48828125, + "loss_num": 0.0537109375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 248885756, + "step": 3720 + }, + { + "epoch": 0.42224113475177305, + "grad_norm": 35.889198303222656, + "learning_rate": 5e-05, + "loss": 1.2295, + "num_input_tokens_seen": 248952876, + "step": 3721 + }, + { + "epoch": 0.42224113475177305, + "loss": 1.4443130493164062, + "loss_ce": 0.0038833627477288246, + "loss_iou": 0.5390625, + "loss_num": 0.0712890625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 248952876, + "step": 3721 + }, + { + "epoch": 0.42235460992907803, + "grad_norm": 24.646120071411133, + "learning_rate": 5e-05, + "loss": 1.2013, + "num_input_tokens_seen": 249019200, + "step": 3722 + }, + { + "epoch": 0.42235460992907803, + "loss": 1.2740919589996338, + "loss_ce": 0.005048954393714666, + "loss_iou": 0.515625, + "loss_num": 0.048095703125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 249019200, + "step": 3722 + }, + { + "epoch": 0.42246808510638295, + "grad_norm": 26.231687545776367, + "learning_rate": 5e-05, + "loss": 1.1544, + "num_input_tokens_seen": 249086080, + "step": 3723 + }, + { + "epoch": 0.42246808510638295, + "loss": 1.240591049194336, + "loss_ce": 0.0032863724045455456, + "loss_iou": 0.5234375, + "loss_num": 0.0390625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 249086080, + "step": 3723 + }, + { + "epoch": 0.42258156028368793, + "grad_norm": 23.329740524291992, + "learning_rate": 5e-05, + "loss": 1.4507, + "num_input_tokens_seen": 249153492, + "step": 3724 + }, + { + "epoch": 0.42258156028368793, + "loss": 1.3747680187225342, + "loss_ce": 0.005627368576824665, + "loss_iou": 0.53515625, + "loss_num": 0.05908203125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 249153492, + "step": 3724 + }, + { + "epoch": 0.4226950354609929, + "grad_norm": 39.694854736328125, + "learning_rate": 5e-05, + "loss": 0.9694, + "num_input_tokens_seen": 249219560, + "step": 3725 + }, + { + "epoch": 0.4226950354609929, + "loss": 0.9694892168045044, + "loss_ce": 0.005866187624633312, + "loss_iou": 0.373046875, + "loss_num": 0.043701171875, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 249219560, + "step": 3725 + }, + { + "epoch": 0.4228085106382979, + "grad_norm": 37.69912338256836, + "learning_rate": 5e-05, + "loss": 1.462, + "num_input_tokens_seen": 249287320, + "step": 3726 + }, + { + "epoch": 0.4228085106382979, + "loss": 1.4785374402999878, + "loss_ce": 0.005392926745116711, + "loss_iou": 0.62109375, + "loss_num": 0.046142578125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 249287320, + "step": 3726 + }, + { + "epoch": 0.42292198581560286, + "grad_norm": 15.434120178222656, + "learning_rate": 5e-05, + "loss": 1.0438, + "num_input_tokens_seen": 249354056, + "step": 3727 + }, + { + "epoch": 0.42292198581560286, + "loss": 1.0023903846740723, + "loss_ce": 0.009226381778717041, + "loss_iou": 0.388671875, + "loss_num": 0.043701171875, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 249354056, + "step": 3727 + }, + { + "epoch": 0.4230354609929078, + "grad_norm": 27.198867797851562, + "learning_rate": 5e-05, + "loss": 1.3373, + "num_input_tokens_seen": 249420496, + "step": 3728 + }, + { + "epoch": 0.4230354609929078, + "loss": 1.2537498474121094, + "loss_ce": 0.008144441992044449, + "loss_iou": 0.5078125, + "loss_num": 0.04638671875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 249420496, + "step": 3728 + }, + { + "epoch": 0.42314893617021276, + "grad_norm": 26.11910057067871, + "learning_rate": 5e-05, + "loss": 1.3127, + "num_input_tokens_seen": 249486184, + "step": 3729 + }, + { + "epoch": 0.42314893617021276, + "loss": 1.259822130203247, + "loss_ce": 0.005915907211601734, + "loss_iou": 0.55078125, + "loss_num": 0.03076171875, + "loss_xval": 1.25, + "num_input_tokens_seen": 249486184, + "step": 3729 + }, + { + "epoch": 0.42326241134751774, + "grad_norm": 28.12274169921875, + "learning_rate": 5e-05, + "loss": 1.1318, + "num_input_tokens_seen": 249553132, + "step": 3730 + }, + { + "epoch": 0.42326241134751774, + "loss": 0.928669810295105, + "loss_ce": 0.002888609655201435, + "loss_iou": 0.390625, + "loss_num": 0.0289306640625, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 249553132, + "step": 3730 + }, + { + "epoch": 0.4233758865248227, + "grad_norm": 22.420148849487305, + "learning_rate": 5e-05, + "loss": 1.0902, + "num_input_tokens_seen": 249619916, + "step": 3731 + }, + { + "epoch": 0.4233758865248227, + "loss": 0.9625710248947144, + "loss_ce": 0.003586691804230213, + "loss_iou": 0.396484375, + "loss_num": 0.033203125, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 249619916, + "step": 3731 + }, + { + "epoch": 0.42348936170212764, + "grad_norm": 26.7435359954834, + "learning_rate": 5e-05, + "loss": 1.3342, + "num_input_tokens_seen": 249686820, + "step": 3732 + }, + { + "epoch": 0.42348936170212764, + "loss": 1.4254206418991089, + "loss_ce": 0.011358197778463364, + "loss_iou": 0.515625, + "loss_num": 0.0771484375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 249686820, + "step": 3732 + }, + { + "epoch": 0.4236028368794326, + "grad_norm": 29.37957191467285, + "learning_rate": 5e-05, + "loss": 1.3491, + "num_input_tokens_seen": 249753904, + "step": 3733 + }, + { + "epoch": 0.4236028368794326, + "loss": 1.2134464979171753, + "loss_ce": 0.00397386122494936, + "loss_iou": 0.5, + "loss_num": 0.0419921875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 249753904, + "step": 3733 + }, + { + "epoch": 0.4237163120567376, + "grad_norm": 35.35844802856445, + "learning_rate": 5e-05, + "loss": 1.0372, + "num_input_tokens_seen": 249820788, + "step": 3734 + }, + { + "epoch": 0.4237163120567376, + "loss": 1.087091326713562, + "loss_ce": 0.001642058021388948, + "loss_iou": 0.421875, + "loss_num": 0.048095703125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 249820788, + "step": 3734 + }, + { + "epoch": 0.4238297872340426, + "grad_norm": 29.03750228881836, + "learning_rate": 5e-05, + "loss": 1.3631, + "num_input_tokens_seen": 249887600, + "step": 3735 + }, + { + "epoch": 0.4238297872340426, + "loss": 1.4760165214538574, + "loss_ce": 0.005313451401889324, + "loss_iou": 0.6171875, + "loss_num": 0.046875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 249887600, + "step": 3735 + }, + { + "epoch": 0.4239432624113475, + "grad_norm": 25.34063148498535, + "learning_rate": 5e-05, + "loss": 1.2785, + "num_input_tokens_seen": 249953940, + "step": 3736 + }, + { + "epoch": 0.4239432624113475, + "loss": 1.1382861137390137, + "loss_ce": 0.005778909660875797, + "loss_iou": 0.44140625, + "loss_num": 0.05029296875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 249953940, + "step": 3736 + }, + { + "epoch": 0.4240567375886525, + "grad_norm": 25.12356948852539, + "learning_rate": 5e-05, + "loss": 1.1215, + "num_input_tokens_seen": 250020800, + "step": 3737 + }, + { + "epoch": 0.4240567375886525, + "loss": 1.2137901782989502, + "loss_ce": 0.003829207504168153, + "loss_iou": 0.48046875, + "loss_num": 0.049560546875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 250020800, + "step": 3737 + }, + { + "epoch": 0.42417021276595746, + "grad_norm": 50.972190856933594, + "learning_rate": 5e-05, + "loss": 1.3273, + "num_input_tokens_seen": 250087452, + "step": 3738 + }, + { + "epoch": 0.42417021276595746, + "loss": 1.3575067520141602, + "loss_ce": 0.007897449657320976, + "loss_iou": 0.55859375, + "loss_num": 0.045654296875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 250087452, + "step": 3738 + }, + { + "epoch": 0.42428368794326243, + "grad_norm": 32.29669189453125, + "learning_rate": 5e-05, + "loss": 1.3564, + "num_input_tokens_seen": 250155080, + "step": 3739 + }, + { + "epoch": 0.42428368794326243, + "loss": 1.2287490367889404, + "loss_ce": 0.004627914167940617, + "loss_iou": 0.55078125, + "loss_num": 0.024169921875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 250155080, + "step": 3739 + }, + { + "epoch": 0.42439716312056736, + "grad_norm": 28.863706588745117, + "learning_rate": 5e-05, + "loss": 1.1394, + "num_input_tokens_seen": 250221648, + "step": 3740 + }, + { + "epoch": 0.42439716312056736, + "loss": 1.078518271446228, + "loss_ce": 0.00820576585829258, + "loss_iou": 0.40625, + "loss_num": 0.051513671875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 250221648, + "step": 3740 + }, + { + "epoch": 0.42451063829787233, + "grad_norm": 31.49152183532715, + "learning_rate": 5e-05, + "loss": 1.2472, + "num_input_tokens_seen": 250289444, + "step": 3741 + }, + { + "epoch": 0.42451063829787233, + "loss": 1.2634055614471436, + "loss_ce": 0.004128210246562958, + "loss_iou": 0.53515625, + "loss_num": 0.037109375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 250289444, + "step": 3741 + }, + { + "epoch": 0.4246241134751773, + "grad_norm": 86.28459930419922, + "learning_rate": 5e-05, + "loss": 1.4227, + "num_input_tokens_seen": 250356976, + "step": 3742 + }, + { + "epoch": 0.4246241134751773, + "loss": 1.374215841293335, + "loss_ce": 0.003122089896351099, + "loss_iou": 0.59375, + "loss_num": 0.037109375, + "loss_xval": 1.375, + "num_input_tokens_seen": 250356976, + "step": 3742 + }, + { + "epoch": 0.4247375886524823, + "grad_norm": 23.75172233581543, + "learning_rate": 5e-05, + "loss": 1.4281, + "num_input_tokens_seen": 250423384, + "step": 3743 + }, + { + "epoch": 0.4247375886524823, + "loss": 1.155463457107544, + "loss_ce": 0.0050728218629956245, + "loss_iou": 0.5078125, + "loss_num": 0.02783203125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 250423384, + "step": 3743 + }, + { + "epoch": 0.4248510638297872, + "grad_norm": 20.75948143005371, + "learning_rate": 5e-05, + "loss": 1.1406, + "num_input_tokens_seen": 250490072, + "step": 3744 + }, + { + "epoch": 0.4248510638297872, + "loss": 1.2656089067459106, + "loss_ce": 0.0043784379959106445, + "loss_iou": 0.5234375, + "loss_num": 0.04296875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 250490072, + "step": 3744 + }, + { + "epoch": 0.4249645390070922, + "grad_norm": 35.63673782348633, + "learning_rate": 5e-05, + "loss": 1.3652, + "num_input_tokens_seen": 250557368, + "step": 3745 + }, + { + "epoch": 0.4249645390070922, + "loss": 1.3642957210540771, + "loss_ce": 0.01663956418633461, + "loss_iou": 0.52734375, + "loss_num": 0.058837890625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 250557368, + "step": 3745 + }, + { + "epoch": 0.42507801418439717, + "grad_norm": 31.57618522644043, + "learning_rate": 5e-05, + "loss": 1.3155, + "num_input_tokens_seen": 250623480, + "step": 3746 + }, + { + "epoch": 0.42507801418439717, + "loss": 1.256431221961975, + "loss_ce": 0.008994700387120247, + "loss_iou": 0.5078125, + "loss_num": 0.045654296875, + "loss_xval": 1.25, + "num_input_tokens_seen": 250623480, + "step": 3746 + }, + { + "epoch": 0.42519148936170215, + "grad_norm": 19.677967071533203, + "learning_rate": 5e-05, + "loss": 1.0202, + "num_input_tokens_seen": 250689668, + "step": 3747 + }, + { + "epoch": 0.42519148936170215, + "loss": 0.8773292303085327, + "loss_ce": 0.004282362759113312, + "loss_iou": 0.39453125, + "loss_num": 0.01708984375, + "loss_xval": 0.875, + "num_input_tokens_seen": 250689668, + "step": 3747 + }, + { + "epoch": 0.42530496453900707, + "grad_norm": 22.91827392578125, + "learning_rate": 5e-05, + "loss": 1.1767, + "num_input_tokens_seen": 250757320, + "step": 3748 + }, + { + "epoch": 0.42530496453900707, + "loss": 1.2833894491195679, + "loss_ce": 0.0075105587020516396, + "loss_iou": 0.51171875, + "loss_num": 0.05078125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 250757320, + "step": 3748 + }, + { + "epoch": 0.42541843971631205, + "grad_norm": 125.3395767211914, + "learning_rate": 5e-05, + "loss": 1.1663, + "num_input_tokens_seen": 250823296, + "step": 3749 + }, + { + "epoch": 0.42541843971631205, + "loss": 1.2449257373809814, + "loss_ce": 0.004203106742352247, + "loss_iou": 0.498046875, + "loss_num": 0.048583984375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 250823296, + "step": 3749 + }, + { + "epoch": 0.425531914893617, + "grad_norm": 28.624832153320312, + "learning_rate": 5e-05, + "loss": 1.5169, + "num_input_tokens_seen": 250889760, + "step": 3750 + }, + { + "epoch": 0.425531914893617, + "eval_seeclick_CIoU": 0.3768230080604553, + "eval_seeclick_GIoU": 0.35249297320842743, + "eval_seeclick_IoU": 0.4618520736694336, + "eval_seeclick_MAE_all": 0.1566527932882309, + "eval_seeclick_MAE_h": 0.06243489496409893, + "eval_seeclick_MAE_w": 0.11364665627479553, + "eval_seeclick_MAE_x_boxes": 0.23593676090240479, + "eval_seeclick_MAE_y_boxes": 0.1277388483285904, + "eval_seeclick_NUM_probability": 0.9999420046806335, + "eval_seeclick_inside_bbox": 0.6458333432674408, + "eval_seeclick_loss": 2.4655957221984863, + "eval_seeclick_loss_ce": 0.014444830361753702, + "eval_seeclick_loss_iou": 0.8541259765625, + "eval_seeclick_loss_num": 0.15345001220703125, + "eval_seeclick_loss_xval": 2.4755859375, + "eval_seeclick_runtime": 69.4647, + "eval_seeclick_samples_per_second": 0.677, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 250889760, + "step": 3750 + }, + { + "epoch": 0.425531914893617, + "eval_icons_CIoU": 0.524435892701149, + "eval_icons_GIoU": 0.5172564685344696, + "eval_icons_IoU": 0.5502668470144272, + "eval_icons_MAE_all": 0.1296873800456524, + "eval_icons_MAE_h": 0.09152811020612717, + "eval_icons_MAE_w": 0.17928803712129593, + "eval_icons_MAE_x_boxes": 0.07228491455316544, + "eval_icons_MAE_y_boxes": 0.05806456692516804, + "eval_icons_NUM_probability": 0.9999792575836182, + "eval_icons_inside_bbox": 0.8663194477558136, + "eval_icons_loss": 2.3561394214630127, + "eval_icons_loss_ce": 0.000224250354222022, + "eval_icons_loss_iou": 0.8367919921875, + "eval_icons_loss_num": 0.12252426147460938, + "eval_icons_loss_xval": 2.2861328125, + "eval_icons_runtime": 68.18, + "eval_icons_samples_per_second": 0.733, + "eval_icons_steps_per_second": 0.029, + "num_input_tokens_seen": 250889760, + "step": 3750 + }, + { + "epoch": 0.425531914893617, + "eval_screenspot_CIoU": 0.3344839910666148, + "eval_screenspot_GIoU": 0.3120170533657074, + "eval_screenspot_IoU": 0.41609054803848267, + "eval_screenspot_MAE_all": 0.1842394322156906, + "eval_screenspot_MAE_h": 0.1346618210275968, + "eval_screenspot_MAE_w": 0.12805359562238058, + "eval_screenspot_MAE_x_boxes": 0.2632874796787898, + "eval_screenspot_MAE_y_boxes": 0.11344280342260997, + "eval_screenspot_NUM_probability": 0.9999666412671407, + "eval_screenspot_inside_bbox": 0.6804166634877523, + "eval_screenspot_loss": 2.828378677368164, + "eval_screenspot_loss_ce": 0.015234525936345259, + "eval_screenspot_loss_iou": 0.9392903645833334, + "eval_screenspot_loss_num": 0.19051106770833334, + "eval_screenspot_loss_xval": 2.8304036458333335, + "eval_screenspot_runtime": 119.793, + "eval_screenspot_samples_per_second": 0.743, + "eval_screenspot_steps_per_second": 0.025, + "num_input_tokens_seen": 250889760, + "step": 3750 + }, + { + "epoch": 0.425531914893617, + "eval_compot_CIoU": 0.2964083030819893, + "eval_compot_GIoU": 0.24534140527248383, + "eval_compot_IoU": 0.3773292899131775, + "eval_compot_MAE_all": 0.23540061712265015, + "eval_compot_MAE_h": 0.16733094304800034, + "eval_compot_MAE_w": 0.26810725778341293, + "eval_compot_MAE_x_boxes": 0.20040027797222137, + "eval_compot_MAE_y_boxes": 0.13199836015701294, + "eval_compot_NUM_probability": 0.9999605119228363, + "eval_compot_inside_bbox": 0.5399305522441864, + "eval_compot_loss": 3.273641347885132, + "eval_compot_loss_ce": 0.00555493775755167, + "eval_compot_loss_iou": 1.04833984375, + "eval_compot_loss_num": 0.2506103515625, + "eval_compot_loss_xval": 3.35107421875, + "eval_compot_runtime": 70.6666, + "eval_compot_samples_per_second": 0.708, + "eval_compot_steps_per_second": 0.028, + "num_input_tokens_seen": 250889760, + "step": 3750 + }, + { + "epoch": 0.425531914893617, + "loss": 3.2859482765197754, + "loss_ce": 0.0037215505726635456, + "loss_iou": 1.0390625, + "loss_num": 0.240234375, + "loss_xval": 3.28125, + "num_input_tokens_seen": 250889760, + "step": 3750 + }, + { + "epoch": 0.425645390070922, + "grad_norm": 36.51622772216797, + "learning_rate": 5e-05, + "loss": 0.9775, + "num_input_tokens_seen": 250956496, + "step": 3751 + }, + { + "epoch": 0.425645390070922, + "loss": 0.9602559804916382, + "loss_ce": 0.005940798670053482, + "loss_iou": 0.408203125, + "loss_num": 0.0277099609375, + "loss_xval": 0.953125, + "num_input_tokens_seen": 250956496, + "step": 3751 + }, + { + "epoch": 0.4257588652482269, + "grad_norm": 34.126914978027344, + "learning_rate": 5e-05, + "loss": 1.4025, + "num_input_tokens_seen": 251022552, + "step": 3752 + }, + { + "epoch": 0.4257588652482269, + "loss": 1.1699782609939575, + "loss_ce": 0.004450923763215542, + "loss_iou": 0.4921875, + "loss_num": 0.0361328125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 251022552, + "step": 3752 + }, + { + "epoch": 0.4258723404255319, + "grad_norm": 35.47367477416992, + "learning_rate": 5e-05, + "loss": 1.133, + "num_input_tokens_seen": 251089680, + "step": 3753 + }, + { + "epoch": 0.4258723404255319, + "loss": 1.080306053161621, + "loss_ce": 0.004622538108378649, + "loss_iou": 0.45703125, + "loss_num": 0.031982421875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 251089680, + "step": 3753 + }, + { + "epoch": 0.4259858156028369, + "grad_norm": 30.477584838867188, + "learning_rate": 5e-05, + "loss": 1.4458, + "num_input_tokens_seen": 251157280, + "step": 3754 + }, + { + "epoch": 0.4259858156028369, + "loss": 1.5329742431640625, + "loss_ce": 0.0027008671313524246, + "loss_iou": 0.609375, + "loss_num": 0.06201171875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 251157280, + "step": 3754 + }, + { + "epoch": 0.42609929078014186, + "grad_norm": 23.1093692779541, + "learning_rate": 5e-05, + "loss": 1.1975, + "num_input_tokens_seen": 251224956, + "step": 3755 + }, + { + "epoch": 0.42609929078014186, + "loss": 1.1306170225143433, + "loss_ce": 0.010011496022343636, + "loss_iou": 0.458984375, + "loss_num": 0.040771484375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 251224956, + "step": 3755 + }, + { + "epoch": 0.4262127659574468, + "grad_norm": 24.508325576782227, + "learning_rate": 5e-05, + "loss": 1.3345, + "num_input_tokens_seen": 251291784, + "step": 3756 + }, + { + "epoch": 0.4262127659574468, + "loss": 1.1515969038009644, + "loss_ce": 0.0070656780153512955, + "loss_iou": 0.44140625, + "loss_num": 0.052490234375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 251291784, + "step": 3756 + }, + { + "epoch": 0.42632624113475176, + "grad_norm": 20.04400062561035, + "learning_rate": 5e-05, + "loss": 1.1256, + "num_input_tokens_seen": 251358524, + "step": 3757 + }, + { + "epoch": 0.42632624113475176, + "loss": 1.1228450536727905, + "loss_ce": 0.005657580681145191, + "loss_iou": 0.4375, + "loss_num": 0.04833984375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 251358524, + "step": 3757 + }, + { + "epoch": 0.42643971631205674, + "grad_norm": 34.01975631713867, + "learning_rate": 5e-05, + "loss": 1.621, + "num_input_tokens_seen": 251425572, + "step": 3758 + }, + { + "epoch": 0.42643971631205674, + "loss": 1.6502799987792969, + "loss_ce": 0.008678432554006577, + "loss_iou": 0.63671875, + "loss_num": 0.07373046875, + "loss_xval": 1.640625, + "num_input_tokens_seen": 251425572, + "step": 3758 + }, + { + "epoch": 0.4265531914893617, + "grad_norm": 33.731910705566406, + "learning_rate": 5e-05, + "loss": 1.2817, + "num_input_tokens_seen": 251492244, + "step": 3759 + }, + { + "epoch": 0.4265531914893617, + "loss": 1.2147836685180664, + "loss_ce": 0.004822771996259689, + "loss_iou": 0.515625, + "loss_num": 0.0361328125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 251492244, + "step": 3759 + }, + { + "epoch": 0.4266666666666667, + "grad_norm": 26.371538162231445, + "learning_rate": 5e-05, + "loss": 1.187, + "num_input_tokens_seen": 251559436, + "step": 3760 + }, + { + "epoch": 0.4266666666666667, + "loss": 1.071873426437378, + "loss_ce": 0.00986173003911972, + "loss_iou": 0.4296875, + "loss_num": 0.04052734375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 251559436, + "step": 3760 + }, + { + "epoch": 0.4267801418439716, + "grad_norm": 38.19269561767578, + "learning_rate": 5e-05, + "loss": 1.2676, + "num_input_tokens_seen": 251626868, + "step": 3761 + }, + { + "epoch": 0.4267801418439716, + "loss": 1.4529246091842651, + "loss_ce": 0.005659022368490696, + "loss_iou": 0.62109375, + "loss_num": 0.04052734375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 251626868, + "step": 3761 + }, + { + "epoch": 0.4268936170212766, + "grad_norm": 35.738887786865234, + "learning_rate": 5e-05, + "loss": 1.4187, + "num_input_tokens_seen": 251693272, + "step": 3762 + }, + { + "epoch": 0.4268936170212766, + "loss": 1.3341517448425293, + "loss_ce": 0.00602676859125495, + "loss_iou": 0.5390625, + "loss_num": 0.05078125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 251693272, + "step": 3762 + }, + { + "epoch": 0.4270070921985816, + "grad_norm": 40.04500961303711, + "learning_rate": 5e-05, + "loss": 1.4652, + "num_input_tokens_seen": 251760640, + "step": 3763 + }, + { + "epoch": 0.4270070921985816, + "loss": 1.370509386062622, + "loss_ce": 0.004298422951251268, + "loss_iou": 0.56640625, + "loss_num": 0.04736328125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 251760640, + "step": 3763 + }, + { + "epoch": 0.42712056737588655, + "grad_norm": 43.519351959228516, + "learning_rate": 5e-05, + "loss": 1.1681, + "num_input_tokens_seen": 251827980, + "step": 3764 + }, + { + "epoch": 0.42712056737588655, + "loss": 1.1242852210998535, + "loss_ce": 0.006121196784079075, + "loss_iou": 0.48046875, + "loss_num": 0.03173828125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 251827980, + "step": 3764 + }, + { + "epoch": 0.4272340425531915, + "grad_norm": 65.92765045166016, + "learning_rate": 5e-05, + "loss": 1.3502, + "num_input_tokens_seen": 251894536, + "step": 3765 + }, + { + "epoch": 0.4272340425531915, + "loss": 1.4157332181930542, + "loss_ce": 0.0094832181930542, + "loss_iou": 0.58984375, + "loss_num": 0.044677734375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 251894536, + "step": 3765 + }, + { + "epoch": 0.42734751773049645, + "grad_norm": 21.350496292114258, + "learning_rate": 5e-05, + "loss": 1.3992, + "num_input_tokens_seen": 251961520, + "step": 3766 + }, + { + "epoch": 0.42734751773049645, + "loss": 1.2876120805740356, + "loss_ce": 0.0024557816796004772, + "loss_iou": 0.5234375, + "loss_num": 0.04736328125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 251961520, + "step": 3766 + }, + { + "epoch": 0.42746099290780143, + "grad_norm": 22.853635787963867, + "learning_rate": 5e-05, + "loss": 1.1478, + "num_input_tokens_seen": 252028608, + "step": 3767 + }, + { + "epoch": 0.42746099290780143, + "loss": 1.0214778184890747, + "loss_ce": 0.00585283525288105, + "loss_iou": 0.41015625, + "loss_num": 0.039306640625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 252028608, + "step": 3767 + }, + { + "epoch": 0.4275744680851064, + "grad_norm": 21.97739028930664, + "learning_rate": 5e-05, + "loss": 1.2366, + "num_input_tokens_seen": 252095220, + "step": 3768 + }, + { + "epoch": 0.4275744680851064, + "loss": 1.1552040576934814, + "loss_ce": 0.005301617085933685, + "loss_iou": 0.5078125, + "loss_num": 0.0264892578125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 252095220, + "step": 3768 + }, + { + "epoch": 0.42768794326241133, + "grad_norm": 23.515138626098633, + "learning_rate": 5e-05, + "loss": 1.137, + "num_input_tokens_seen": 252162548, + "step": 3769 + }, + { + "epoch": 0.42768794326241133, + "loss": 1.0494804382324219, + "loss_ce": 0.0045585352927446365, + "loss_iou": 0.408203125, + "loss_num": 0.045654296875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 252162548, + "step": 3769 + }, + { + "epoch": 0.4278014184397163, + "grad_norm": 34.561378479003906, + "learning_rate": 5e-05, + "loss": 1.5131, + "num_input_tokens_seen": 252229608, + "step": 3770 + }, + { + "epoch": 0.4278014184397163, + "loss": 1.4178197383880615, + "loss_ce": 0.008151734247803688, + "loss_iou": 0.55859375, + "loss_num": 0.05908203125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 252229608, + "step": 3770 + }, + { + "epoch": 0.4279148936170213, + "grad_norm": 32.58829116821289, + "learning_rate": 5e-05, + "loss": 1.4522, + "num_input_tokens_seen": 252295688, + "step": 3771 + }, + { + "epoch": 0.4279148936170213, + "loss": 1.4949326515197754, + "loss_ce": 0.012510782107710838, + "loss_iou": 0.5703125, + "loss_num": 0.068359375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 252295688, + "step": 3771 + }, + { + "epoch": 0.42802836879432626, + "grad_norm": 29.99901008605957, + "learning_rate": 5e-05, + "loss": 1.386, + "num_input_tokens_seen": 252363364, + "step": 3772 + }, + { + "epoch": 0.42802836879432626, + "loss": 1.32668137550354, + "loss_ce": 0.00881026592105627, + "loss_iou": 0.546875, + "loss_num": 0.0439453125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 252363364, + "step": 3772 + }, + { + "epoch": 0.4281418439716312, + "grad_norm": 31.70166015625, + "learning_rate": 5e-05, + "loss": 1.1781, + "num_input_tokens_seen": 252430092, + "step": 3773 + }, + { + "epoch": 0.4281418439716312, + "loss": 1.0290260314941406, + "loss_ce": 0.009708507917821407, + "loss_iou": 0.43359375, + "loss_num": 0.030517578125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 252430092, + "step": 3773 + }, + { + "epoch": 0.42825531914893616, + "grad_norm": 22.699214935302734, + "learning_rate": 5e-05, + "loss": 1.2802, + "num_input_tokens_seen": 252496796, + "step": 3774 + }, + { + "epoch": 0.42825531914893616, + "loss": 1.268432378768921, + "loss_ce": 0.007690260652452707, + "loss_iou": 0.5, + "loss_num": 0.0517578125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 252496796, + "step": 3774 + }, + { + "epoch": 0.42836879432624114, + "grad_norm": 20.312021255493164, + "learning_rate": 5e-05, + "loss": 1.1162, + "num_input_tokens_seen": 252563392, + "step": 3775 + }, + { + "epoch": 0.42836879432624114, + "loss": 1.1571993827819824, + "loss_ce": 0.010959114879369736, + "loss_iou": 0.49609375, + "loss_num": 0.031005859375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 252563392, + "step": 3775 + }, + { + "epoch": 0.4284822695035461, + "grad_norm": 43.126522064208984, + "learning_rate": 5e-05, + "loss": 1.0593, + "num_input_tokens_seen": 252629316, + "step": 3776 + }, + { + "epoch": 0.4284822695035461, + "loss": 1.1990981101989746, + "loss_ce": 0.005006404593586922, + "loss_iou": 0.51171875, + "loss_num": 0.03466796875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 252629316, + "step": 3776 + }, + { + "epoch": 0.42859574468085104, + "grad_norm": 23.998714447021484, + "learning_rate": 5e-05, + "loss": 1.1572, + "num_input_tokens_seen": 252696408, + "step": 3777 + }, + { + "epoch": 0.42859574468085104, + "loss": 1.169569730758667, + "loss_ce": 0.00648373831063509, + "loss_iou": 0.46484375, + "loss_num": 0.046630859375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 252696408, + "step": 3777 + }, + { + "epoch": 0.428709219858156, + "grad_norm": 28.363435745239258, + "learning_rate": 5e-05, + "loss": 1.3833, + "num_input_tokens_seen": 252762580, + "step": 3778 + }, + { + "epoch": 0.428709219858156, + "loss": 1.296118974685669, + "loss_ce": 0.0075447093695402145, + "loss_iou": 0.5546875, + "loss_num": 0.0361328125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 252762580, + "step": 3778 + }, + { + "epoch": 0.428822695035461, + "grad_norm": 39.91801834106445, + "learning_rate": 5e-05, + "loss": 1.18, + "num_input_tokens_seen": 252830508, + "step": 3779 + }, + { + "epoch": 0.428822695035461, + "loss": 1.0109152793884277, + "loss_ce": 0.007985523901879787, + "loss_iou": 0.400390625, + "loss_num": 0.04052734375, + "loss_xval": 1.0, + "num_input_tokens_seen": 252830508, + "step": 3779 + }, + { + "epoch": 0.428936170212766, + "grad_norm": 38.355411529541016, + "learning_rate": 5e-05, + "loss": 1.2984, + "num_input_tokens_seen": 252897276, + "step": 3780 + }, + { + "epoch": 0.428936170212766, + "loss": 1.277346134185791, + "loss_ce": 0.0058617671020329, + "loss_iou": 0.5, + "loss_num": 0.053466796875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 252897276, + "step": 3780 + }, + { + "epoch": 0.4290496453900709, + "grad_norm": 34.87319564819336, + "learning_rate": 5e-05, + "loss": 1.4517, + "num_input_tokens_seen": 252963696, + "step": 3781 + }, + { + "epoch": 0.4290496453900709, + "loss": 1.2652512788772583, + "loss_ce": 0.0059739528223872185, + "loss_iou": 0.462890625, + "loss_num": 0.06689453125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 252963696, + "step": 3781 + }, + { + "epoch": 0.4291631205673759, + "grad_norm": 55.507022857666016, + "learning_rate": 5e-05, + "loss": 1.5704, + "num_input_tokens_seen": 253030596, + "step": 3782 + }, + { + "epoch": 0.4291631205673759, + "loss": 1.6310760974884033, + "loss_ce": 0.0065643684938549995, + "loss_iou": 0.65234375, + "loss_num": 0.06298828125, + "loss_xval": 1.625, + "num_input_tokens_seen": 253030596, + "step": 3782 + }, + { + "epoch": 0.42927659574468086, + "grad_norm": 24.933069229125977, + "learning_rate": 5e-05, + "loss": 1.0701, + "num_input_tokens_seen": 253097672, + "step": 3783 + }, + { + "epoch": 0.42927659574468086, + "loss": 1.2321432828903198, + "loss_ce": 0.005092425271868706, + "loss_iou": 0.5234375, + "loss_num": 0.035400390625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 253097672, + "step": 3783 + }, + { + "epoch": 0.42939007092198583, + "grad_norm": 30.427288055419922, + "learning_rate": 5e-05, + "loss": 1.2458, + "num_input_tokens_seen": 253164760, + "step": 3784 + }, + { + "epoch": 0.42939007092198583, + "loss": 1.2772605419158936, + "loss_ce": 0.009682448580861092, + "loss_iou": 0.546875, + "loss_num": 0.03466796875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 253164760, + "step": 3784 + }, + { + "epoch": 0.42950354609929076, + "grad_norm": 45.162166595458984, + "learning_rate": 5e-05, + "loss": 1.4083, + "num_input_tokens_seen": 253232652, + "step": 3785 + }, + { + "epoch": 0.42950354609929076, + "loss": 1.443390965461731, + "loss_ce": 0.006379242986440659, + "loss_iou": 0.55078125, + "loss_num": 0.0673828125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 253232652, + "step": 3785 + }, + { + "epoch": 0.42961702127659573, + "grad_norm": 27.946718215942383, + "learning_rate": 5e-05, + "loss": 1.3003, + "num_input_tokens_seen": 253298812, + "step": 3786 + }, + { + "epoch": 0.42961702127659573, + "loss": 1.227020502090454, + "loss_ce": 0.007293845992535353, + "loss_iou": 0.50390625, + "loss_num": 0.04248046875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 253298812, + "step": 3786 + }, + { + "epoch": 0.4297304964539007, + "grad_norm": 12.841599464416504, + "learning_rate": 5e-05, + "loss": 1.2651, + "num_input_tokens_seen": 253366120, + "step": 3787 + }, + { + "epoch": 0.4297304964539007, + "loss": 1.2645108699798584, + "loss_ce": 0.0057218619622290134, + "loss_iou": 0.4375, + "loss_num": 0.07666015625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 253366120, + "step": 3787 + }, + { + "epoch": 0.4298439716312057, + "grad_norm": 17.149137496948242, + "learning_rate": 5e-05, + "loss": 1.0436, + "num_input_tokens_seen": 253433208, + "step": 3788 + }, + { + "epoch": 0.4298439716312057, + "loss": 1.105372428894043, + "loss_ce": 0.00429818220436573, + "loss_iou": 0.474609375, + "loss_num": 0.0301513671875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 253433208, + "step": 3788 + }, + { + "epoch": 0.4299574468085106, + "grad_norm": 24.377912521362305, + "learning_rate": 5e-05, + "loss": 1.4069, + "num_input_tokens_seen": 253499380, + "step": 3789 + }, + { + "epoch": 0.4299574468085106, + "loss": 1.4168752431869507, + "loss_ce": 0.006719033233821392, + "loss_iou": 0.59375, + "loss_num": 0.044189453125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 253499380, + "step": 3789 + }, + { + "epoch": 0.4300709219858156, + "grad_norm": 36.81869125366211, + "learning_rate": 5e-05, + "loss": 1.3488, + "num_input_tokens_seen": 253566536, + "step": 3790 + }, + { + "epoch": 0.4300709219858156, + "loss": 1.4593424797058105, + "loss_ce": 0.004264264367520809, + "loss_iou": 0.578125, + "loss_num": 0.059814453125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 253566536, + "step": 3790 + }, + { + "epoch": 0.43018439716312057, + "grad_norm": 29.33944320678711, + "learning_rate": 5e-05, + "loss": 1.2547, + "num_input_tokens_seen": 253633324, + "step": 3791 + }, + { + "epoch": 0.43018439716312057, + "loss": 1.2892600297927856, + "loss_ce": 0.0050803618505597115, + "loss_iou": 0.5390625, + "loss_num": 0.040771484375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 253633324, + "step": 3791 + }, + { + "epoch": 0.43029787234042555, + "grad_norm": 24.055444717407227, + "learning_rate": 5e-05, + "loss": 1.1899, + "num_input_tokens_seen": 253701420, + "step": 3792 + }, + { + "epoch": 0.43029787234042555, + "loss": 1.1920640468597412, + "loss_ce": 0.009446847252547741, + "loss_iou": 0.50390625, + "loss_num": 0.03466796875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 253701420, + "step": 3792 + }, + { + "epoch": 0.43041134751773047, + "grad_norm": 66.61148834228516, + "learning_rate": 5e-05, + "loss": 1.105, + "num_input_tokens_seen": 253768000, + "step": 3793 + }, + { + "epoch": 0.43041134751773047, + "loss": 1.1812758445739746, + "loss_ce": 0.011353992857038975, + "loss_iou": 0.482421875, + "loss_num": 0.041259765625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 253768000, + "step": 3793 + }, + { + "epoch": 0.43052482269503545, + "grad_norm": 30.853139877319336, + "learning_rate": 5e-05, + "loss": 1.2524, + "num_input_tokens_seen": 253834408, + "step": 3794 + }, + { + "epoch": 0.43052482269503545, + "loss": 1.0289697647094727, + "loss_ce": 0.004067438188940287, + "loss_iou": 0.375, + "loss_num": 0.0546875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 253834408, + "step": 3794 + }, + { + "epoch": 0.4306382978723404, + "grad_norm": 42.88096618652344, + "learning_rate": 5e-05, + "loss": 1.4931, + "num_input_tokens_seen": 253901520, + "step": 3795 + }, + { + "epoch": 0.4306382978723404, + "loss": 1.527862548828125, + "loss_ce": 0.008331302553415298, + "loss_iou": 0.60546875, + "loss_num": 0.061279296875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 253901520, + "step": 3795 + }, + { + "epoch": 0.4307517730496454, + "grad_norm": 26.542844772338867, + "learning_rate": 5e-05, + "loss": 1.4184, + "num_input_tokens_seen": 253968208, + "step": 3796 + }, + { + "epoch": 0.4307517730496454, + "loss": 1.3839777708053589, + "loss_ce": 0.002141838427633047, + "loss_iou": 0.56640625, + "loss_num": 0.05029296875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 253968208, + "step": 3796 + }, + { + "epoch": 0.4308652482269504, + "grad_norm": 44.703086853027344, + "learning_rate": 5e-05, + "loss": 1.312, + "num_input_tokens_seen": 254035100, + "step": 3797 + }, + { + "epoch": 0.4308652482269504, + "loss": 1.3277913331985474, + "loss_ce": 0.011385043151676655, + "loss_iou": 0.53125, + "loss_num": 0.051025390625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 254035100, + "step": 3797 + }, + { + "epoch": 0.4309787234042553, + "grad_norm": 14.78239917755127, + "learning_rate": 5e-05, + "loss": 1.1735, + "num_input_tokens_seen": 254101944, + "step": 3798 + }, + { + "epoch": 0.4309787234042553, + "loss": 1.0201102495193481, + "loss_ce": 0.008879754692316055, + "loss_iou": 0.376953125, + "loss_num": 0.051513671875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 254101944, + "step": 3798 + }, + { + "epoch": 0.4310921985815603, + "grad_norm": 36.35893249511719, + "learning_rate": 5e-05, + "loss": 1.329, + "num_input_tokens_seen": 254168976, + "step": 3799 + }, + { + "epoch": 0.4310921985815603, + "loss": 1.3391852378845215, + "loss_ce": 0.009107047691941261, + "loss_iou": 0.498046875, + "loss_num": 0.06640625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 254168976, + "step": 3799 + }, + { + "epoch": 0.43120567375886526, + "grad_norm": 28.958995819091797, + "learning_rate": 5e-05, + "loss": 1.2016, + "num_input_tokens_seen": 254236232, + "step": 3800 + }, + { + "epoch": 0.43120567375886526, + "loss": 1.3518487215042114, + "loss_ce": 0.0066338879987597466, + "loss_iou": 0.55078125, + "loss_num": 0.04931640625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 254236232, + "step": 3800 + }, + { + "epoch": 0.43131914893617024, + "grad_norm": 38.12914276123047, + "learning_rate": 5e-05, + "loss": 1.3166, + "num_input_tokens_seen": 254302588, + "step": 3801 + }, + { + "epoch": 0.43131914893617024, + "loss": 1.2044380903244019, + "loss_ce": 0.00521937757730484, + "loss_iou": 0.51171875, + "loss_num": 0.03564453125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 254302588, + "step": 3801 + }, + { + "epoch": 0.43143262411347516, + "grad_norm": 29.920217514038086, + "learning_rate": 5e-05, + "loss": 1.45, + "num_input_tokens_seen": 254370332, + "step": 3802 + }, + { + "epoch": 0.43143262411347516, + "loss": 1.4326703548431396, + "loss_ce": 0.007865693420171738, + "loss_iou": 0.59375, + "loss_num": 0.04736328125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 254370332, + "step": 3802 + }, + { + "epoch": 0.43154609929078014, + "grad_norm": 23.289710998535156, + "learning_rate": 5e-05, + "loss": 1.3125, + "num_input_tokens_seen": 254437456, + "step": 3803 + }, + { + "epoch": 0.43154609929078014, + "loss": 1.188354730606079, + "loss_ce": 0.007202415727078915, + "loss_iou": 0.458984375, + "loss_num": 0.05224609375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 254437456, + "step": 3803 + }, + { + "epoch": 0.4316595744680851, + "grad_norm": 148.1215057373047, + "learning_rate": 5e-05, + "loss": 1.1696, + "num_input_tokens_seen": 254504008, + "step": 3804 + }, + { + "epoch": 0.4316595744680851, + "loss": 1.277350664138794, + "loss_ce": 0.009284283965826035, + "loss_iou": 0.4921875, + "loss_num": 0.056884765625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 254504008, + "step": 3804 + }, + { + "epoch": 0.4317730496453901, + "grad_norm": 27.188344955444336, + "learning_rate": 5e-05, + "loss": 1.2461, + "num_input_tokens_seen": 254570080, + "step": 3805 + }, + { + "epoch": 0.4317730496453901, + "loss": 1.2206181287765503, + "loss_ce": 0.005408232565969229, + "loss_iou": 0.498046875, + "loss_num": 0.04443359375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 254570080, + "step": 3805 + }, + { + "epoch": 0.431886524822695, + "grad_norm": 70.03002166748047, + "learning_rate": 5e-05, + "loss": 1.4272, + "num_input_tokens_seen": 254636024, + "step": 3806 + }, + { + "epoch": 0.431886524822695, + "loss": 1.3780720233917236, + "loss_ce": 0.006978406570851803, + "loss_iou": 0.5546875, + "loss_num": 0.0517578125, + "loss_xval": 1.375, + "num_input_tokens_seen": 254636024, + "step": 3806 + }, + { + "epoch": 0.432, + "grad_norm": 33.15486526489258, + "learning_rate": 5e-05, + "loss": 1.2335, + "num_input_tokens_seen": 254703616, + "step": 3807 + }, + { + "epoch": 0.432, + "loss": 1.2153270244598389, + "loss_ce": 0.003901258111000061, + "loss_iou": 0.53515625, + "loss_num": 0.02783203125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 254703616, + "step": 3807 + }, + { + "epoch": 0.432113475177305, + "grad_norm": 24.163671493530273, + "learning_rate": 5e-05, + "loss": 1.5118, + "num_input_tokens_seen": 254771000, + "step": 3808 + }, + { + "epoch": 0.432113475177305, + "loss": 1.2765960693359375, + "loss_ce": 0.0046234093606472015, + "loss_iou": 0.55078125, + "loss_num": 0.033447265625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 254771000, + "step": 3808 + }, + { + "epoch": 0.43222695035460995, + "grad_norm": 13.600176811218262, + "learning_rate": 5e-05, + "loss": 1.276, + "num_input_tokens_seen": 254837052, + "step": 3809 + }, + { + "epoch": 0.43222695035460995, + "loss": 1.3914899826049805, + "loss_ce": 0.002329910174012184, + "loss_iou": 0.53125, + "loss_num": 0.0654296875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 254837052, + "step": 3809 + }, + { + "epoch": 0.4323404255319149, + "grad_norm": 18.428266525268555, + "learning_rate": 5e-05, + "loss": 1.036, + "num_input_tokens_seen": 254902852, + "step": 3810 + }, + { + "epoch": 0.4323404255319149, + "loss": 1.1155685186386108, + "loss_ce": 0.010343842208385468, + "loss_iou": 0.44921875, + "loss_num": 0.04150390625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 254902852, + "step": 3810 + }, + { + "epoch": 0.43245390070921985, + "grad_norm": 40.96482467651367, + "learning_rate": 5e-05, + "loss": 1.2675, + "num_input_tokens_seen": 254970280, + "step": 3811 + }, + { + "epoch": 0.43245390070921985, + "loss": 1.3232505321502686, + "loss_ce": 0.005867733154445887, + "loss_iou": 0.5625, + "loss_num": 0.038330078125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 254970280, + "step": 3811 + }, + { + "epoch": 0.43256737588652483, + "grad_norm": 26.47947883605957, + "learning_rate": 5e-05, + "loss": 1.1013, + "num_input_tokens_seen": 255036732, + "step": 3812 + }, + { + "epoch": 0.43256737588652483, + "loss": 1.0440008640289307, + "loss_ce": 0.005914956796914339, + "loss_iou": 0.455078125, + "loss_num": 0.0257568359375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 255036732, + "step": 3812 + }, + { + "epoch": 0.4326808510638298, + "grad_norm": 28.593338012695312, + "learning_rate": 5e-05, + "loss": 1.4236, + "num_input_tokens_seen": 255103484, + "step": 3813 + }, + { + "epoch": 0.4326808510638298, + "loss": 1.4164373874664307, + "loss_ce": 0.006769439205527306, + "loss_iou": 0.5625, + "loss_num": 0.0576171875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 255103484, + "step": 3813 + }, + { + "epoch": 0.43279432624113473, + "grad_norm": 36.37594223022461, + "learning_rate": 5e-05, + "loss": 1.4242, + "num_input_tokens_seen": 255170268, + "step": 3814 + }, + { + "epoch": 0.43279432624113473, + "loss": 1.3271986246109009, + "loss_ce": 0.007374361157417297, + "loss_iou": 0.5625, + "loss_num": 0.03857421875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 255170268, + "step": 3814 + }, + { + "epoch": 0.4329078014184397, + "grad_norm": 27.920949935913086, + "learning_rate": 5e-05, + "loss": 1.328, + "num_input_tokens_seen": 255237184, + "step": 3815 + }, + { + "epoch": 0.4329078014184397, + "loss": 1.3604297637939453, + "loss_ce": 0.003984389360994101, + "loss_iou": 0.546875, + "loss_num": 0.0517578125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 255237184, + "step": 3815 + }, + { + "epoch": 0.4330212765957447, + "grad_norm": 22.215845108032227, + "learning_rate": 5e-05, + "loss": 1.2519, + "num_input_tokens_seen": 255303384, + "step": 3816 + }, + { + "epoch": 0.4330212765957447, + "loss": 1.5156242847442627, + "loss_ce": 0.009276550263166428, + "loss_iou": 0.58984375, + "loss_num": 0.064453125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 255303384, + "step": 3816 + }, + { + "epoch": 0.43313475177304966, + "grad_norm": 21.091278076171875, + "learning_rate": 5e-05, + "loss": 1.2265, + "num_input_tokens_seen": 255369468, + "step": 3817 + }, + { + "epoch": 0.43313475177304966, + "loss": 1.2346303462982178, + "loss_ce": 0.005138056818395853, + "loss_iou": 0.53515625, + "loss_num": 0.032470703125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 255369468, + "step": 3817 + }, + { + "epoch": 0.4332482269503546, + "grad_norm": 29.813398361206055, + "learning_rate": 5e-05, + "loss": 1.2664, + "num_input_tokens_seen": 255437548, + "step": 3818 + }, + { + "epoch": 0.4332482269503546, + "loss": 1.4213957786560059, + "loss_ce": 0.005380153656005859, + "loss_iou": 0.5546875, + "loss_num": 0.061767578125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 255437548, + "step": 3818 + }, + { + "epoch": 0.43336170212765956, + "grad_norm": 61.34482192993164, + "learning_rate": 5e-05, + "loss": 1.4044, + "num_input_tokens_seen": 255503332, + "step": 3819 + }, + { + "epoch": 0.43336170212765956, + "loss": 1.3655389547348022, + "loss_ce": 0.007140447851270437, + "loss_iou": 0.55859375, + "loss_num": 0.04833984375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 255503332, + "step": 3819 + }, + { + "epoch": 0.43347517730496454, + "grad_norm": 30.54789924621582, + "learning_rate": 5e-05, + "loss": 1.051, + "num_input_tokens_seen": 255569552, + "step": 3820 + }, + { + "epoch": 0.43347517730496454, + "loss": 1.0250506401062012, + "loss_ce": 0.0049700019881129265, + "loss_iou": 0.390625, + "loss_num": 0.048095703125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 255569552, + "step": 3820 + }, + { + "epoch": 0.4335886524822695, + "grad_norm": 59.11077880859375, + "learning_rate": 5e-05, + "loss": 1.2106, + "num_input_tokens_seen": 255635668, + "step": 3821 + }, + { + "epoch": 0.4335886524822695, + "loss": 0.9498270750045776, + "loss_ce": 0.006833929568529129, + "loss_iou": 0.392578125, + "loss_num": 0.03173828125, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 255635668, + "step": 3821 + }, + { + "epoch": 0.43370212765957444, + "grad_norm": 23.345762252807617, + "learning_rate": 5e-05, + "loss": 1.3997, + "num_input_tokens_seen": 255703440, + "step": 3822 + }, + { + "epoch": 0.43370212765957444, + "loss": 1.4094382524490356, + "loss_ce": 0.00807106588035822, + "loss_iou": 0.57421875, + "loss_num": 0.051025390625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 255703440, + "step": 3822 + }, + { + "epoch": 0.4338156028368794, + "grad_norm": 14.336852073669434, + "learning_rate": 5e-05, + "loss": 1.267, + "num_input_tokens_seen": 255770284, + "step": 3823 + }, + { + "epoch": 0.4338156028368794, + "loss": 1.1518361568450928, + "loss_ce": 0.00730486586689949, + "loss_iou": 0.451171875, + "loss_num": 0.048828125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 255770284, + "step": 3823 + }, + { + "epoch": 0.4339290780141844, + "grad_norm": 36.47365951538086, + "learning_rate": 5e-05, + "loss": 1.0009, + "num_input_tokens_seen": 255835988, + "step": 3824 + }, + { + "epoch": 0.4339290780141844, + "loss": 0.9012048244476318, + "loss_ce": 0.009847399778664112, + "loss_iou": 0.3515625, + "loss_num": 0.037109375, + "loss_xval": 0.890625, + "num_input_tokens_seen": 255835988, + "step": 3824 + }, + { + "epoch": 0.4340425531914894, + "grad_norm": 34.300331115722656, + "learning_rate": 5e-05, + "loss": 1.1639, + "num_input_tokens_seen": 255902816, + "step": 3825 + }, + { + "epoch": 0.4340425531914894, + "loss": 1.117577314376831, + "loss_ce": 0.0062492224387824535, + "loss_iou": 0.474609375, + "loss_num": 0.031982421875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 255902816, + "step": 3825 + }, + { + "epoch": 0.4341560283687943, + "grad_norm": 40.28498077392578, + "learning_rate": 5e-05, + "loss": 1.3335, + "num_input_tokens_seen": 255969420, + "step": 3826 + }, + { + "epoch": 0.4341560283687943, + "loss": 1.254817247390747, + "loss_ce": 0.010585086420178413, + "loss_iou": 0.52734375, + "loss_num": 0.03857421875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 255969420, + "step": 3826 + }, + { + "epoch": 0.4342695035460993, + "grad_norm": 18.017597198486328, + "learning_rate": 5e-05, + "loss": 1.1266, + "num_input_tokens_seen": 256036304, + "step": 3827 + }, + { + "epoch": 0.4342695035460993, + "loss": 1.361757516860962, + "loss_ce": 0.005800453945994377, + "loss_iou": 0.5546875, + "loss_num": 0.048583984375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 256036304, + "step": 3827 + }, + { + "epoch": 0.43438297872340426, + "grad_norm": 20.07405662536621, + "learning_rate": 5e-05, + "loss": 1.1407, + "num_input_tokens_seen": 256102104, + "step": 3828 + }, + { + "epoch": 0.43438297872340426, + "loss": 1.2398431301116943, + "loss_ce": 0.011327467858791351, + "loss_iou": 0.5, + "loss_num": 0.044677734375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 256102104, + "step": 3828 + }, + { + "epoch": 0.43449645390070923, + "grad_norm": 36.64396667480469, + "learning_rate": 5e-05, + "loss": 1.1931, + "num_input_tokens_seen": 256168980, + "step": 3829 + }, + { + "epoch": 0.43449645390070923, + "loss": 1.161494493484497, + "loss_ce": 0.00426788255572319, + "loss_iou": 0.5078125, + "loss_num": 0.0283203125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 256168980, + "step": 3829 + }, + { + "epoch": 0.4346099290780142, + "grad_norm": 28.098682403564453, + "learning_rate": 5e-05, + "loss": 1.5445, + "num_input_tokens_seen": 256234932, + "step": 3830 + }, + { + "epoch": 0.4346099290780142, + "loss": 1.42311429977417, + "loss_ce": 0.004168973304331303, + "loss_iou": 0.6328125, + "loss_num": 0.02978515625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 256234932, + "step": 3830 + }, + { + "epoch": 0.43472340425531913, + "grad_norm": 29.986921310424805, + "learning_rate": 5e-05, + "loss": 1.1568, + "num_input_tokens_seen": 256302340, + "step": 3831 + }, + { + "epoch": 0.43472340425531913, + "loss": 1.2387926578521729, + "loss_ce": 0.00441768579185009, + "loss_iou": 0.51171875, + "loss_num": 0.04150390625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 256302340, + "step": 3831 + }, + { + "epoch": 0.4348368794326241, + "grad_norm": 34.457725524902344, + "learning_rate": 5e-05, + "loss": 1.2515, + "num_input_tokens_seen": 256370524, + "step": 3832 + }, + { + "epoch": 0.4348368794326241, + "loss": 1.1486051082611084, + "loss_ce": 0.006027025170624256, + "loss_iou": 0.48828125, + "loss_num": 0.033203125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 256370524, + "step": 3832 + }, + { + "epoch": 0.4349503546099291, + "grad_norm": 33.598880767822266, + "learning_rate": 5e-05, + "loss": 1.1618, + "num_input_tokens_seen": 256437840, + "step": 3833 + }, + { + "epoch": 0.4349503546099291, + "loss": 1.145570993423462, + "loss_ce": 0.008363994769752026, + "loss_iou": 0.46875, + "loss_num": 0.03955078125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 256437840, + "step": 3833 + }, + { + "epoch": 0.43506382978723407, + "grad_norm": 22.530364990234375, + "learning_rate": 5e-05, + "loss": 1.3354, + "num_input_tokens_seen": 256504308, + "step": 3834 + }, + { + "epoch": 0.43506382978723407, + "loss": 1.122908353805542, + "loss_ce": 0.004744287114590406, + "loss_iou": 0.453125, + "loss_num": 0.042724609375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 256504308, + "step": 3834 + }, + { + "epoch": 0.435177304964539, + "grad_norm": 18.479642868041992, + "learning_rate": 5e-05, + "loss": 1.2835, + "num_input_tokens_seen": 256570664, + "step": 3835 + }, + { + "epoch": 0.435177304964539, + "loss": 1.3204542398452759, + "loss_ce": 0.008930834010243416, + "loss_iou": 0.5, + "loss_num": 0.0615234375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 256570664, + "step": 3835 + }, + { + "epoch": 0.43529078014184397, + "grad_norm": 26.85984230041504, + "learning_rate": 5e-05, + "loss": 1.1623, + "num_input_tokens_seen": 256638016, + "step": 3836 + }, + { + "epoch": 0.43529078014184397, + "loss": 1.0455515384674072, + "loss_ce": 0.003803457599133253, + "loss_iou": 0.439453125, + "loss_num": 0.032470703125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 256638016, + "step": 3836 + }, + { + "epoch": 0.43540425531914895, + "grad_norm": 26.121829986572266, + "learning_rate": 5e-05, + "loss": 1.3181, + "num_input_tokens_seen": 256705820, + "step": 3837 + }, + { + "epoch": 0.43540425531914895, + "loss": 1.4648408889770508, + "loss_ce": 0.007809656672179699, + "loss_iou": 0.62890625, + "loss_num": 0.03955078125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 256705820, + "step": 3837 + }, + { + "epoch": 0.4355177304964539, + "grad_norm": 45.77682876586914, + "learning_rate": 5e-05, + "loss": 1.2386, + "num_input_tokens_seen": 256772156, + "step": 3838 + }, + { + "epoch": 0.4355177304964539, + "loss": 1.1253348588943481, + "loss_ce": 0.007048747502267361, + "loss_iou": 0.44140625, + "loss_num": 0.04736328125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 256772156, + "step": 3838 + }, + { + "epoch": 0.43563120567375885, + "grad_norm": 64.75957489013672, + "learning_rate": 5e-05, + "loss": 1.3511, + "num_input_tokens_seen": 256840268, + "step": 3839 + }, + { + "epoch": 0.43563120567375885, + "loss": 1.3519057035446167, + "loss_ce": 0.00962050911039114, + "loss_iou": 0.58203125, + "loss_num": 0.03515625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 256840268, + "step": 3839 + }, + { + "epoch": 0.4357446808510638, + "grad_norm": 21.027786254882812, + "learning_rate": 5e-05, + "loss": 1.1496, + "num_input_tokens_seen": 256907120, + "step": 3840 + }, + { + "epoch": 0.4357446808510638, + "loss": 1.1209092140197754, + "loss_ce": 0.005674826912581921, + "loss_iou": 0.466796875, + "loss_num": 0.03662109375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 256907120, + "step": 3840 + }, + { + "epoch": 0.4358581560283688, + "grad_norm": 39.536434173583984, + "learning_rate": 5e-05, + "loss": 1.3186, + "num_input_tokens_seen": 256974140, + "step": 3841 + }, + { + "epoch": 0.4358581560283688, + "loss": 1.4435704946517944, + "loss_ce": 0.006070507690310478, + "loss_iou": 0.5703125, + "loss_num": 0.0595703125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 256974140, + "step": 3841 + }, + { + "epoch": 0.4359716312056738, + "grad_norm": 30.318775177001953, + "learning_rate": 5e-05, + "loss": 1.3633, + "num_input_tokens_seen": 257040164, + "step": 3842 + }, + { + "epoch": 0.4359716312056738, + "loss": 1.3855148553848267, + "loss_ce": 0.01173555850982666, + "loss_iou": 0.51953125, + "loss_num": 0.06640625, + "loss_xval": 1.375, + "num_input_tokens_seen": 257040164, + "step": 3842 + }, + { + "epoch": 0.4360851063829787, + "grad_norm": 35.91849899291992, + "learning_rate": 5e-05, + "loss": 1.2624, + "num_input_tokens_seen": 257107500, + "step": 3843 + }, + { + "epoch": 0.4360851063829787, + "loss": 1.1640605926513672, + "loss_ce": 0.0063457489013671875, + "loss_iou": 0.498046875, + "loss_num": 0.031982421875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 257107500, + "step": 3843 + }, + { + "epoch": 0.4361985815602837, + "grad_norm": 41.93077850341797, + "learning_rate": 5e-05, + "loss": 1.2377, + "num_input_tokens_seen": 257174580, + "step": 3844 + }, + { + "epoch": 0.4361985815602837, + "loss": 1.3527319431304932, + "loss_ce": 0.0070288763381540775, + "loss_iou": 0.53515625, + "loss_num": 0.055419921875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 257174580, + "step": 3844 + }, + { + "epoch": 0.43631205673758866, + "grad_norm": 37.35511016845703, + "learning_rate": 5e-05, + "loss": 1.4187, + "num_input_tokens_seen": 257241472, + "step": 3845 + }, + { + "epoch": 0.43631205673758866, + "loss": 1.444635033607483, + "loss_ce": 0.012506108731031418, + "loss_iou": 0.578125, + "loss_num": 0.055419921875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 257241472, + "step": 3845 + }, + { + "epoch": 0.43642553191489364, + "grad_norm": 18.949560165405273, + "learning_rate": 5e-05, + "loss": 1.3505, + "num_input_tokens_seen": 257309236, + "step": 3846 + }, + { + "epoch": 0.43642553191489364, + "loss": 1.4523018598556519, + "loss_ce": 0.010895566083490849, + "loss_iou": 0.578125, + "loss_num": 0.056396484375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 257309236, + "step": 3846 + }, + { + "epoch": 0.43653900709219856, + "grad_norm": 33.95450973510742, + "learning_rate": 5e-05, + "loss": 1.3172, + "num_input_tokens_seen": 257376492, + "step": 3847 + }, + { + "epoch": 0.43653900709219856, + "loss": 1.2918281555175781, + "loss_ce": 0.0052070957608520985, + "loss_iou": 0.5390625, + "loss_num": 0.041259765625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 257376492, + "step": 3847 + }, + { + "epoch": 0.43665248226950354, + "grad_norm": 26.96889877319336, + "learning_rate": 5e-05, + "loss": 1.4995, + "num_input_tokens_seen": 257444700, + "step": 3848 + }, + { + "epoch": 0.43665248226950354, + "loss": 1.6362617015838623, + "loss_ce": 0.009308517910540104, + "loss_iou": 0.6953125, + "loss_num": 0.047119140625, + "loss_xval": 1.625, + "num_input_tokens_seen": 257444700, + "step": 3848 + }, + { + "epoch": 0.4367659574468085, + "grad_norm": 60.80446243286133, + "learning_rate": 5e-05, + "loss": 1.208, + "num_input_tokens_seen": 257512180, + "step": 3849 + }, + { + "epoch": 0.4367659574468085, + "loss": 1.3342783451080322, + "loss_ce": 0.006641621235758066, + "loss_iou": 0.57421875, + "loss_num": 0.03564453125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 257512180, + "step": 3849 + }, + { + "epoch": 0.4368794326241135, + "grad_norm": 22.410673141479492, + "learning_rate": 5e-05, + "loss": 1.2348, + "num_input_tokens_seen": 257578664, + "step": 3850 + }, + { + "epoch": 0.4368794326241135, + "loss": 1.169844388961792, + "loss_ce": 0.0030963777098804712, + "loss_iou": 0.48046875, + "loss_num": 0.041259765625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 257578664, + "step": 3850 + }, + { + "epoch": 0.4369929078014184, + "grad_norm": 30.296701431274414, + "learning_rate": 5e-05, + "loss": 1.3846, + "num_input_tokens_seen": 257646904, + "step": 3851 + }, + { + "epoch": 0.4369929078014184, + "loss": 1.2926759719848633, + "loss_ce": 0.009961094707250595, + "loss_iou": 0.56640625, + "loss_num": 0.0303955078125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 257646904, + "step": 3851 + }, + { + "epoch": 0.4371063829787234, + "grad_norm": 27.065982818603516, + "learning_rate": 5e-05, + "loss": 1.348, + "num_input_tokens_seen": 257713204, + "step": 3852 + }, + { + "epoch": 0.4371063829787234, + "loss": 1.175128698348999, + "loss_ce": 0.00764821283519268, + "loss_iou": 0.470703125, + "loss_num": 0.04541015625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 257713204, + "step": 3852 + }, + { + "epoch": 0.4372198581560284, + "grad_norm": 32.70939254760742, + "learning_rate": 5e-05, + "loss": 1.2117, + "num_input_tokens_seen": 257780604, + "step": 3853 + }, + { + "epoch": 0.4372198581560284, + "loss": 1.0009257793426514, + "loss_ce": 0.005808554589748383, + "loss_iou": 0.43359375, + "loss_num": 0.0252685546875, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 257780604, + "step": 3853 + }, + { + "epoch": 0.43733333333333335, + "grad_norm": 117.43338012695312, + "learning_rate": 5e-05, + "loss": 1.3566, + "num_input_tokens_seen": 257847976, + "step": 3854 + }, + { + "epoch": 0.43733333333333335, + "loss": 1.3041298389434814, + "loss_ce": 0.003348603378981352, + "loss_iou": 0.5546875, + "loss_num": 0.037353515625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 257847976, + "step": 3854 + }, + { + "epoch": 0.4374468085106383, + "grad_norm": 28.05640411376953, + "learning_rate": 5e-05, + "loss": 1.2256, + "num_input_tokens_seen": 257914856, + "step": 3855 + }, + { + "epoch": 0.4374468085106383, + "loss": 1.2550201416015625, + "loss_ce": 0.00599676463752985, + "loss_iou": 0.53125, + "loss_num": 0.037841796875, + "loss_xval": 1.25, + "num_input_tokens_seen": 257914856, + "step": 3855 + }, + { + "epoch": 0.43756028368794325, + "grad_norm": 32.503387451171875, + "learning_rate": 5e-05, + "loss": 1.3312, + "num_input_tokens_seen": 257982064, + "step": 3856 + }, + { + "epoch": 0.43756028368794325, + "loss": 1.3869421482086182, + "loss_ce": 0.007059242110699415, + "loss_iou": 0.5546875, + "loss_num": 0.054931640625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 257982064, + "step": 3856 + }, + { + "epoch": 0.43767375886524823, + "grad_norm": 42.8702507019043, + "learning_rate": 5e-05, + "loss": 1.332, + "num_input_tokens_seen": 258049996, + "step": 3857 + }, + { + "epoch": 0.43767375886524823, + "loss": 1.1388177871704102, + "loss_ce": 0.008446619845926762, + "loss_iou": 0.50390625, + "loss_num": 0.024658203125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 258049996, + "step": 3857 + }, + { + "epoch": 0.4377872340425532, + "grad_norm": 28.241531372070312, + "learning_rate": 5e-05, + "loss": 1.2169, + "num_input_tokens_seen": 258115960, + "step": 3858 + }, + { + "epoch": 0.4377872340425532, + "loss": 1.0574207305908203, + "loss_ce": 0.002885854337364435, + "loss_iou": 0.41015625, + "loss_num": 0.046875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 258115960, + "step": 3858 + }, + { + "epoch": 0.43790070921985813, + "grad_norm": 19.595619201660156, + "learning_rate": 5e-05, + "loss": 1.3407, + "num_input_tokens_seen": 258182904, + "step": 3859 + }, + { + "epoch": 0.43790070921985813, + "loss": 1.477217197418213, + "loss_ce": 0.00700240395963192, + "loss_iou": 0.5546875, + "loss_num": 0.07177734375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 258182904, + "step": 3859 + }, + { + "epoch": 0.4380141843971631, + "grad_norm": 21.289159774780273, + "learning_rate": 5e-05, + "loss": 1.2624, + "num_input_tokens_seen": 258250956, + "step": 3860 + }, + { + "epoch": 0.4380141843971631, + "loss": 1.18040132522583, + "loss_ce": 0.004620145075023174, + "loss_iou": 0.478515625, + "loss_num": 0.043701171875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 258250956, + "step": 3860 + }, + { + "epoch": 0.4381276595744681, + "grad_norm": 21.693790435791016, + "learning_rate": 5e-05, + "loss": 1.2666, + "num_input_tokens_seen": 258317304, + "step": 3861 + }, + { + "epoch": 0.4381276595744681, + "loss": 1.310861349105835, + "loss_ce": 0.0061738756485283375, + "loss_iou": 0.4921875, + "loss_num": 0.06396484375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 258317304, + "step": 3861 + }, + { + "epoch": 0.43824113475177306, + "grad_norm": 23.071001052856445, + "learning_rate": 5e-05, + "loss": 1.1676, + "num_input_tokens_seen": 258384232, + "step": 3862 + }, + { + "epoch": 0.43824113475177306, + "loss": 1.1576499938964844, + "loss_ce": 0.00481786672025919, + "loss_iou": 0.4765625, + "loss_num": 0.039794921875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 258384232, + "step": 3862 + }, + { + "epoch": 0.438354609929078, + "grad_norm": 25.843984603881836, + "learning_rate": 5e-05, + "loss": 1.0889, + "num_input_tokens_seen": 258450760, + "step": 3863 + }, + { + "epoch": 0.438354609929078, + "loss": 1.045968770980835, + "loss_ce": 0.009347689338028431, + "loss_iou": 0.4140625, + "loss_num": 0.04248046875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 258450760, + "step": 3863 + }, + { + "epoch": 0.43846808510638297, + "grad_norm": 41.49958038330078, + "learning_rate": 5e-05, + "loss": 1.4006, + "num_input_tokens_seen": 258518752, + "step": 3864 + }, + { + "epoch": 0.43846808510638297, + "loss": 1.4797000885009766, + "loss_ce": 0.006067218258976936, + "loss_iou": 0.61328125, + "loss_num": 0.0498046875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 258518752, + "step": 3864 + }, + { + "epoch": 0.43858156028368794, + "grad_norm": 31.464998245239258, + "learning_rate": 5e-05, + "loss": 1.5282, + "num_input_tokens_seen": 258585108, + "step": 3865 + }, + { + "epoch": 0.43858156028368794, + "loss": 1.511899709701538, + "loss_ce": 0.008481747470796108, + "loss_iou": 0.6015625, + "loss_num": 0.0595703125, + "loss_xval": 1.5, + "num_input_tokens_seen": 258585108, + "step": 3865 + }, + { + "epoch": 0.4386950354609929, + "grad_norm": 20.428632736206055, + "learning_rate": 5e-05, + "loss": 1.2015, + "num_input_tokens_seen": 258652072, + "step": 3866 + }, + { + "epoch": 0.4386950354609929, + "loss": 1.080025315284729, + "loss_ce": 0.009224563837051392, + "loss_iou": 0.44921875, + "loss_num": 0.0341796875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 258652072, + "step": 3866 + }, + { + "epoch": 0.4388085106382979, + "grad_norm": 36.09545135498047, + "learning_rate": 5e-05, + "loss": 1.3748, + "num_input_tokens_seen": 258718860, + "step": 3867 + }, + { + "epoch": 0.4388085106382979, + "loss": 1.6134631633758545, + "loss_ce": 0.007994452491402626, + "loss_iou": 0.640625, + "loss_num": 0.0654296875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 258718860, + "step": 3867 + }, + { + "epoch": 0.4389219858156028, + "grad_norm": 32.49566650390625, + "learning_rate": 5e-05, + "loss": 1.4021, + "num_input_tokens_seen": 258785252, + "step": 3868 + }, + { + "epoch": 0.4389219858156028, + "loss": 1.3918604850769043, + "loss_ce": 0.006606653798371553, + "loss_iou": 0.5859375, + "loss_num": 0.043212890625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 258785252, + "step": 3868 + }, + { + "epoch": 0.4390354609929078, + "grad_norm": 21.076377868652344, + "learning_rate": 5e-05, + "loss": 1.1734, + "num_input_tokens_seen": 258852092, + "step": 3869 + }, + { + "epoch": 0.4390354609929078, + "loss": 1.201064109802246, + "loss_ce": 0.005751668941229582, + "loss_iou": 0.4609375, + "loss_num": 0.0546875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 258852092, + "step": 3869 + }, + { + "epoch": 0.4391489361702128, + "grad_norm": 30.917530059814453, + "learning_rate": 5e-05, + "loss": 1.2062, + "num_input_tokens_seen": 258918668, + "step": 3870 + }, + { + "epoch": 0.4391489361702128, + "loss": 1.3915523290634155, + "loss_ce": 0.011669454164803028, + "loss_iou": 0.546875, + "loss_num": 0.057373046875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 258918668, + "step": 3870 + }, + { + "epoch": 0.43926241134751776, + "grad_norm": 38.16911697387695, + "learning_rate": 5e-05, + "loss": 1.0534, + "num_input_tokens_seen": 258983312, + "step": 3871 + }, + { + "epoch": 0.43926241134751776, + "loss": 1.087688684463501, + "loss_ce": 0.00712227588519454, + "loss_iou": 0.44921875, + "loss_num": 0.0361328125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 258983312, + "step": 3871 + }, + { + "epoch": 0.4393758865248227, + "grad_norm": 56.2728157043457, + "learning_rate": 5e-05, + "loss": 1.232, + "num_input_tokens_seen": 259050304, + "step": 3872 + }, + { + "epoch": 0.4393758865248227, + "loss": 1.2813082933425903, + "loss_ce": 0.0056735435500741005, + "loss_iou": 0.5390625, + "loss_num": 0.039794921875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 259050304, + "step": 3872 + }, + { + "epoch": 0.43948936170212766, + "grad_norm": 28.458871841430664, + "learning_rate": 5e-05, + "loss": 1.3475, + "num_input_tokens_seen": 259117408, + "step": 3873 + }, + { + "epoch": 0.43948936170212766, + "loss": 1.1535805463790894, + "loss_ce": 0.007584454491734505, + "loss_iou": 0.4921875, + "loss_num": 0.031982421875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 259117408, + "step": 3873 + }, + { + "epoch": 0.43960283687943263, + "grad_norm": 33.63649368286133, + "learning_rate": 5e-05, + "loss": 1.3587, + "num_input_tokens_seen": 259182480, + "step": 3874 + }, + { + "epoch": 0.43960283687943263, + "loss": 1.6605347394943237, + "loss_ce": 0.011120709590613842, + "loss_iou": 0.62890625, + "loss_num": 0.0791015625, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 259182480, + "step": 3874 + }, + { + "epoch": 0.4397163120567376, + "grad_norm": 33.33298873901367, + "learning_rate": 5e-05, + "loss": 1.5008, + "num_input_tokens_seen": 259250112, + "step": 3875 + }, + { + "epoch": 0.4397163120567376, + "loss": 1.428746223449707, + "loss_ce": 0.010777588933706284, + "loss_iou": 0.58203125, + "loss_num": 0.050537109375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 259250112, + "step": 3875 + }, + { + "epoch": 0.43982978723404254, + "grad_norm": 88.04798889160156, + "learning_rate": 5e-05, + "loss": 1.1949, + "num_input_tokens_seen": 259316704, + "step": 3876 + }, + { + "epoch": 0.43982978723404254, + "loss": 1.1112685203552246, + "loss_ce": 0.0043349736370146275, + "loss_iou": 0.455078125, + "loss_num": 0.0390625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 259316704, + "step": 3876 + }, + { + "epoch": 0.4399432624113475, + "grad_norm": 27.701147079467773, + "learning_rate": 5e-05, + "loss": 1.2048, + "num_input_tokens_seen": 259383632, + "step": 3877 + }, + { + "epoch": 0.4399432624113475, + "loss": 1.286081314086914, + "loss_ce": 0.0033665441442281008, + "loss_iou": 0.53515625, + "loss_num": 0.0419921875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 259383632, + "step": 3877 + }, + { + "epoch": 0.4400567375886525, + "grad_norm": 29.425207138061523, + "learning_rate": 5e-05, + "loss": 1.2888, + "num_input_tokens_seen": 259450228, + "step": 3878 + }, + { + "epoch": 0.4400567375886525, + "loss": 1.2260019779205322, + "loss_ce": 0.004322216846048832, + "loss_iou": 0.5234375, + "loss_num": 0.034912109375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 259450228, + "step": 3878 + }, + { + "epoch": 0.44017021276595747, + "grad_norm": 28.22897720336914, + "learning_rate": 5e-05, + "loss": 1.2152, + "num_input_tokens_seen": 259517728, + "step": 3879 + }, + { + "epoch": 0.44017021276595747, + "loss": 1.2417809963226318, + "loss_ce": 0.003011531662195921, + "loss_iou": 0.56640625, + "loss_num": 0.021240234375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 259517728, + "step": 3879 + }, + { + "epoch": 0.4402836879432624, + "grad_norm": 35.65264892578125, + "learning_rate": 5e-05, + "loss": 1.5216, + "num_input_tokens_seen": 259583752, + "step": 3880 + }, + { + "epoch": 0.4402836879432624, + "loss": 1.5079989433288574, + "loss_ce": 0.009952097199857235, + "loss_iou": 0.6328125, + "loss_num": 0.04638671875, + "loss_xval": 1.5, + "num_input_tokens_seen": 259583752, + "step": 3880 + }, + { + "epoch": 0.44039716312056737, + "grad_norm": 41.82237243652344, + "learning_rate": 5e-05, + "loss": 1.3869, + "num_input_tokens_seen": 259650224, + "step": 3881 + }, + { + "epoch": 0.44039716312056737, + "loss": 1.6088690757751465, + "loss_ce": 0.004376936703920364, + "loss_iou": 0.625, + "loss_num": 0.0712890625, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 259650224, + "step": 3881 + }, + { + "epoch": 0.44051063829787235, + "grad_norm": 37.61747360229492, + "learning_rate": 5e-05, + "loss": 1.0865, + "num_input_tokens_seen": 259717128, + "step": 3882 + }, + { + "epoch": 0.44051063829787235, + "loss": 1.068966269493103, + "loss_ce": 0.00451316311955452, + "loss_iou": 0.43359375, + "loss_num": 0.0390625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 259717128, + "step": 3882 + }, + { + "epoch": 0.4406241134751773, + "grad_norm": 42.161865234375, + "learning_rate": 5e-05, + "loss": 1.1108, + "num_input_tokens_seen": 259783508, + "step": 3883 + }, + { + "epoch": 0.4406241134751773, + "loss": 1.068213701248169, + "loss_ce": 0.0047371843829751015, + "loss_iou": 0.4375, + "loss_num": 0.037841796875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 259783508, + "step": 3883 + }, + { + "epoch": 0.44073758865248225, + "grad_norm": 43.577476501464844, + "learning_rate": 5e-05, + "loss": 1.2445, + "num_input_tokens_seen": 259851524, + "step": 3884 + }, + { + "epoch": 0.44073758865248225, + "loss": 1.1032073497772217, + "loss_ce": 0.00750430254265666, + "loss_iou": 0.470703125, + "loss_num": 0.0303955078125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 259851524, + "step": 3884 + }, + { + "epoch": 0.4408510638297872, + "grad_norm": 58.6839485168457, + "learning_rate": 5e-05, + "loss": 1.2508, + "num_input_tokens_seen": 259918424, + "step": 3885 + }, + { + "epoch": 0.4408510638297872, + "loss": 1.177564263343811, + "loss_ce": 0.004712686408311129, + "loss_iou": 0.515625, + "loss_num": 0.02880859375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 259918424, + "step": 3885 + }, + { + "epoch": 0.4409645390070922, + "grad_norm": 23.57305145263672, + "learning_rate": 5e-05, + "loss": 1.1132, + "num_input_tokens_seen": 259984700, + "step": 3886 + }, + { + "epoch": 0.4409645390070922, + "loss": 1.2495996952056885, + "loss_ce": 0.00790043082088232, + "loss_iou": 0.5078125, + "loss_num": 0.04443359375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 259984700, + "step": 3886 + }, + { + "epoch": 0.4410780141843972, + "grad_norm": 19.032936096191406, + "learning_rate": 5e-05, + "loss": 1.2126, + "num_input_tokens_seen": 260051764, + "step": 3887 + }, + { + "epoch": 0.4410780141843972, + "loss": 1.4534697532653809, + "loss_ce": 0.01108706183731556, + "loss_iou": 0.57421875, + "loss_num": 0.058349609375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 260051764, + "step": 3887 + }, + { + "epoch": 0.4411914893617021, + "grad_norm": 22.44428253173828, + "learning_rate": 5e-05, + "loss": 1.2176, + "num_input_tokens_seen": 260117584, + "step": 3888 + }, + { + "epoch": 0.4411914893617021, + "loss": 1.1240055561065674, + "loss_ce": 0.0024235080927610397, + "loss_iou": 0.470703125, + "loss_num": 0.03564453125, + "loss_xval": 1.125, + "num_input_tokens_seen": 260117584, + "step": 3888 + }, + { + "epoch": 0.4413049645390071, + "grad_norm": 16.997968673706055, + "learning_rate": 5e-05, + "loss": 1.2731, + "num_input_tokens_seen": 260183256, + "step": 3889 + }, + { + "epoch": 0.4413049645390071, + "loss": 1.3859132528305054, + "loss_ce": 0.0033448683097958565, + "loss_iou": 0.5390625, + "loss_num": 0.060302734375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 260183256, + "step": 3889 + }, + { + "epoch": 0.44141843971631206, + "grad_norm": 16.892847061157227, + "learning_rate": 5e-05, + "loss": 1.1388, + "num_input_tokens_seen": 260249760, + "step": 3890 + }, + { + "epoch": 0.44141843971631206, + "loss": 1.113063097000122, + "loss_ce": 0.00710616260766983, + "loss_iou": 0.423828125, + "loss_num": 0.0517578125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 260249760, + "step": 3890 + }, + { + "epoch": 0.44153191489361704, + "grad_norm": 34.66706466674805, + "learning_rate": 5e-05, + "loss": 1.1851, + "num_input_tokens_seen": 260317080, + "step": 3891 + }, + { + "epoch": 0.44153191489361704, + "loss": 1.0286352634429932, + "loss_ce": 0.002268049167469144, + "loss_iou": 0.4453125, + "loss_num": 0.0274658203125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 260317080, + "step": 3891 + }, + { + "epoch": 0.44164539007092196, + "grad_norm": 39.149471282958984, + "learning_rate": 5e-05, + "loss": 1.43, + "num_input_tokens_seen": 260382688, + "step": 3892 + }, + { + "epoch": 0.44164539007092196, + "loss": 1.1851385831832886, + "loss_ce": 0.00789248663932085, + "loss_iou": 0.48828125, + "loss_num": 0.04052734375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 260382688, + "step": 3892 + }, + { + "epoch": 0.44175886524822694, + "grad_norm": 43.82826614379883, + "learning_rate": 5e-05, + "loss": 1.2325, + "num_input_tokens_seen": 260449016, + "step": 3893 + }, + { + "epoch": 0.44175886524822694, + "loss": 1.2587671279907227, + "loss_ce": 0.008278768509626389, + "loss_iou": 0.52734375, + "loss_num": 0.039306640625, + "loss_xval": 1.25, + "num_input_tokens_seen": 260449016, + "step": 3893 + }, + { + "epoch": 0.4418723404255319, + "grad_norm": 26.825973510742188, + "learning_rate": 5e-05, + "loss": 1.2517, + "num_input_tokens_seen": 260515576, + "step": 3894 + }, + { + "epoch": 0.4418723404255319, + "loss": 1.2758848667144775, + "loss_ce": 0.004644542001187801, + "loss_iou": 0.5390625, + "loss_num": 0.03857421875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 260515576, + "step": 3894 + }, + { + "epoch": 0.4419858156028369, + "grad_norm": 29.360198974609375, + "learning_rate": 5e-05, + "loss": 1.1828, + "num_input_tokens_seen": 260581540, + "step": 3895 + }, + { + "epoch": 0.4419858156028369, + "loss": 1.158257007598877, + "loss_ce": 0.007866403087973595, + "loss_iou": 0.486328125, + "loss_num": 0.03564453125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 260581540, + "step": 3895 + }, + { + "epoch": 0.4420992907801418, + "grad_norm": 27.326507568359375, + "learning_rate": 5e-05, + "loss": 1.2773, + "num_input_tokens_seen": 260650068, + "step": 3896 + }, + { + "epoch": 0.4420992907801418, + "loss": 1.3966755867004395, + "loss_ce": 0.005074058193713427, + "loss_iou": 0.578125, + "loss_num": 0.04638671875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 260650068, + "step": 3896 + }, + { + "epoch": 0.4422127659574468, + "grad_norm": 34.41632843017578, + "learning_rate": 5e-05, + "loss": 1.2397, + "num_input_tokens_seen": 260715904, + "step": 3897 + }, + { + "epoch": 0.4422127659574468, + "loss": 1.1872708797454834, + "loss_ce": 0.0032498412765562534, + "loss_iou": 0.52734375, + "loss_num": 0.0255126953125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 260715904, + "step": 3897 + }, + { + "epoch": 0.4423262411347518, + "grad_norm": 20.33496856689453, + "learning_rate": 5e-05, + "loss": 1.4101, + "num_input_tokens_seen": 260782664, + "step": 3898 + }, + { + "epoch": 0.4423262411347518, + "loss": 1.4366666078567505, + "loss_ce": 0.00795565452426672, + "loss_iou": 0.5625, + "loss_num": 0.060302734375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 260782664, + "step": 3898 + }, + { + "epoch": 0.44243971631205675, + "grad_norm": 19.059375762939453, + "learning_rate": 5e-05, + "loss": 1.2917, + "num_input_tokens_seen": 260849852, + "step": 3899 + }, + { + "epoch": 0.44243971631205675, + "loss": 1.2540934085845947, + "loss_ce": 0.003605128498747945, + "loss_iou": 0.54296875, + "loss_num": 0.032470703125, + "loss_xval": 1.25, + "num_input_tokens_seen": 260849852, + "step": 3899 + }, + { + "epoch": 0.4425531914893617, + "grad_norm": 37.58598327636719, + "learning_rate": 5e-05, + "loss": 1.2544, + "num_input_tokens_seen": 260915084, + "step": 3900 + }, + { + "epoch": 0.4425531914893617, + "loss": 1.2184313535690308, + "loss_ce": 0.00798217486590147, + "loss_iou": 0.515625, + "loss_num": 0.0361328125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 260915084, + "step": 3900 + }, + { + "epoch": 0.44266666666666665, + "grad_norm": 52.31794357299805, + "learning_rate": 5e-05, + "loss": 1.6008, + "num_input_tokens_seen": 260982524, + "step": 3901 + }, + { + "epoch": 0.44266666666666665, + "loss": 1.6353964805603027, + "loss_ce": 0.0055136531591415405, + "loss_iou": 0.69921875, + "loss_num": 0.04638671875, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 260982524, + "step": 3901 + }, + { + "epoch": 0.44278014184397163, + "grad_norm": 20.51738929748535, + "learning_rate": 5e-05, + "loss": 1.0011, + "num_input_tokens_seen": 261049224, + "step": 3902 + }, + { + "epoch": 0.44278014184397163, + "loss": 1.0593552589416504, + "loss_ce": 0.0066208429634571075, + "loss_iou": 0.38671875, + "loss_num": 0.055419921875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 261049224, + "step": 3902 + }, + { + "epoch": 0.4428936170212766, + "grad_norm": 26.748165130615234, + "learning_rate": 5e-05, + "loss": 1.1734, + "num_input_tokens_seen": 261116136, + "step": 3903 + }, + { + "epoch": 0.4428936170212766, + "loss": 1.0884974002838135, + "loss_ce": 0.008419295772910118, + "loss_iou": 0.46484375, + "loss_num": 0.0306396484375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 261116136, + "step": 3903 + }, + { + "epoch": 0.4430070921985816, + "grad_norm": 27.995986938476562, + "learning_rate": 5e-05, + "loss": 1.3797, + "num_input_tokens_seen": 261183728, + "step": 3904 + }, + { + "epoch": 0.4430070921985816, + "loss": 1.1965086460113525, + "loss_ce": 0.009496946819126606, + "loss_iou": 0.5078125, + "loss_num": 0.03515625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 261183728, + "step": 3904 + }, + { + "epoch": 0.4431205673758865, + "grad_norm": 35.67471694946289, + "learning_rate": 5e-05, + "loss": 1.2443, + "num_input_tokens_seen": 261250796, + "step": 3905 + }, + { + "epoch": 0.4431205673758865, + "loss": 1.1714961528778076, + "loss_ce": 0.00889856368303299, + "loss_iou": 0.498046875, + "loss_num": 0.033447265625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 261250796, + "step": 3905 + }, + { + "epoch": 0.4432340425531915, + "grad_norm": 22.626663208007812, + "learning_rate": 5e-05, + "loss": 1.2845, + "num_input_tokens_seen": 261317364, + "step": 3906 + }, + { + "epoch": 0.4432340425531915, + "loss": 1.4061524868011475, + "loss_ce": 0.0052735875360667706, + "loss_iou": 0.578125, + "loss_num": 0.048095703125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 261317364, + "step": 3906 + }, + { + "epoch": 0.44334751773049647, + "grad_norm": 21.97503662109375, + "learning_rate": 5e-05, + "loss": 1.3181, + "num_input_tokens_seen": 261384972, + "step": 3907 + }, + { + "epoch": 0.44334751773049647, + "loss": 1.207067608833313, + "loss_ce": 0.004919182509183884, + "loss_iou": 0.51171875, + "loss_num": 0.03564453125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 261384972, + "step": 3907 + }, + { + "epoch": 0.44346099290780144, + "grad_norm": 26.02085304260254, + "learning_rate": 5e-05, + "loss": 1.0954, + "num_input_tokens_seen": 261451180, + "step": 3908 + }, + { + "epoch": 0.44346099290780144, + "loss": 0.9926005005836487, + "loss_ce": 0.006516550201922655, + "loss_iou": 0.3828125, + "loss_num": 0.044677734375, + "loss_xval": 0.984375, + "num_input_tokens_seen": 261451180, + "step": 3908 + }, + { + "epoch": 0.44357446808510637, + "grad_norm": 34.16218948364258, + "learning_rate": 5e-05, + "loss": 1.1212, + "num_input_tokens_seen": 261518980, + "step": 3909 + }, + { + "epoch": 0.44357446808510637, + "loss": 1.0252816677093506, + "loss_ce": 0.006238704081624746, + "loss_iou": 0.43359375, + "loss_num": 0.0306396484375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 261518980, + "step": 3909 + }, + { + "epoch": 0.44368794326241134, + "grad_norm": 38.942562103271484, + "learning_rate": 5e-05, + "loss": 1.2821, + "num_input_tokens_seen": 261586672, + "step": 3910 + }, + { + "epoch": 0.44368794326241134, + "loss": 1.2917239665985107, + "loss_ce": 0.006079454440623522, + "loss_iou": 0.5546875, + "loss_num": 0.03564453125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 261586672, + "step": 3910 + }, + { + "epoch": 0.4438014184397163, + "grad_norm": 32.477272033691406, + "learning_rate": 5e-05, + "loss": 1.3281, + "num_input_tokens_seen": 261653492, + "step": 3911 + }, + { + "epoch": 0.4438014184397163, + "loss": 1.3995002508163452, + "loss_ce": 0.0030158325098454952, + "loss_iou": 0.578125, + "loss_num": 0.048828125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 261653492, + "step": 3911 + }, + { + "epoch": 0.4439148936170213, + "grad_norm": 36.55487060546875, + "learning_rate": 5e-05, + "loss": 1.3519, + "num_input_tokens_seen": 261719172, + "step": 3912 + }, + { + "epoch": 0.4439148936170213, + "loss": 1.4267208576202393, + "loss_ce": 0.0038692664820700884, + "loss_iou": 0.5546875, + "loss_num": 0.062255859375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 261719172, + "step": 3912 + }, + { + "epoch": 0.4440283687943262, + "grad_norm": 27.35784912109375, + "learning_rate": 5e-05, + "loss": 1.4762, + "num_input_tokens_seen": 261786472, + "step": 3913 + }, + { + "epoch": 0.4440283687943262, + "loss": 1.5464837551116943, + "loss_ce": 0.007909508422017097, + "loss_iou": 0.62890625, + "loss_num": 0.055908203125, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 261786472, + "step": 3913 + }, + { + "epoch": 0.4441418439716312, + "grad_norm": 21.390310287475586, + "learning_rate": 5e-05, + "loss": 1.1393, + "num_input_tokens_seen": 261852808, + "step": 3914 + }, + { + "epoch": 0.4441418439716312, + "loss": 1.2243014574050903, + "loss_ce": 0.0068942527286708355, + "loss_iou": 0.490234375, + "loss_num": 0.047607421875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 261852808, + "step": 3914 + }, + { + "epoch": 0.4442553191489362, + "grad_norm": 26.841310501098633, + "learning_rate": 5e-05, + "loss": 1.3002, + "num_input_tokens_seen": 261919984, + "step": 3915 + }, + { + "epoch": 0.4442553191489362, + "loss": 1.3581740856170654, + "loss_ce": 0.0036818054504692554, + "loss_iou": 0.51953125, + "loss_num": 0.0634765625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 261919984, + "step": 3915 + }, + { + "epoch": 0.44436879432624116, + "grad_norm": 30.06488609313965, + "learning_rate": 5e-05, + "loss": 1.3302, + "num_input_tokens_seen": 261987076, + "step": 3916 + }, + { + "epoch": 0.44436879432624116, + "loss": 1.104669213294983, + "loss_ce": 0.008966159075498581, + "loss_iou": 0.451171875, + "loss_num": 0.0380859375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 261987076, + "step": 3916 + }, + { + "epoch": 0.4444822695035461, + "grad_norm": 33.99651336669922, + "learning_rate": 5e-05, + "loss": 1.3156, + "num_input_tokens_seen": 262055176, + "step": 3917 + }, + { + "epoch": 0.4444822695035461, + "loss": 1.2999459505081177, + "loss_ce": 0.004047509282827377, + "loss_iou": 0.5546875, + "loss_num": 0.037841796875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 262055176, + "step": 3917 + }, + { + "epoch": 0.44459574468085106, + "grad_norm": 48.32487106323242, + "learning_rate": 5e-05, + "loss": 1.2315, + "num_input_tokens_seen": 262122088, + "step": 3918 + }, + { + "epoch": 0.44459574468085106, + "loss": 1.1176648139953613, + "loss_ce": 0.003407070180401206, + "loss_iou": 0.447265625, + "loss_num": 0.043701171875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 262122088, + "step": 3918 + }, + { + "epoch": 0.44470921985815604, + "grad_norm": 27.13886260986328, + "learning_rate": 5e-05, + "loss": 1.1813, + "num_input_tokens_seen": 262189264, + "step": 3919 + }, + { + "epoch": 0.44470921985815604, + "loss": 1.246977686882019, + "loss_ce": 0.011137869209051132, + "loss_iou": 0.5, + "loss_num": 0.046875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 262189264, + "step": 3919 + }, + { + "epoch": 0.444822695035461, + "grad_norm": 44.113033294677734, + "learning_rate": 5e-05, + "loss": 1.386, + "num_input_tokens_seen": 262256512, + "step": 3920 + }, + { + "epoch": 0.444822695035461, + "loss": 1.6299564838409424, + "loss_ce": 0.00495650339871645, + "loss_iou": 0.640625, + "loss_num": 0.06884765625, + "loss_xval": 1.625, + "num_input_tokens_seen": 262256512, + "step": 3920 + }, + { + "epoch": 0.44493617021276594, + "grad_norm": 51.827369689941406, + "learning_rate": 5e-05, + "loss": 1.1948, + "num_input_tokens_seen": 262323264, + "step": 3921 + }, + { + "epoch": 0.44493617021276594, + "loss": 1.3722565174102783, + "loss_ce": 0.003115883097052574, + "loss_iou": 0.609375, + "loss_num": 0.030517578125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 262323264, + "step": 3921 + }, + { + "epoch": 0.4450496453900709, + "grad_norm": 20.627784729003906, + "learning_rate": 5e-05, + "loss": 1.38, + "num_input_tokens_seen": 262390664, + "step": 3922 + }, + { + "epoch": 0.4450496453900709, + "loss": 1.3037807941436768, + "loss_ce": 0.007882319390773773, + "loss_iou": 0.5390625, + "loss_num": 0.043212890625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 262390664, + "step": 3922 + }, + { + "epoch": 0.4451631205673759, + "grad_norm": 14.86811637878418, + "learning_rate": 5e-05, + "loss": 0.8685, + "num_input_tokens_seen": 262457572, + "step": 3923 + }, + { + "epoch": 0.4451631205673759, + "loss": 0.9546880722045898, + "loss_ce": 0.006445880979299545, + "loss_iou": 0.392578125, + "loss_num": 0.033203125, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 262457572, + "step": 3923 + }, + { + "epoch": 0.44527659574468087, + "grad_norm": 41.02214813232422, + "learning_rate": 5e-05, + "loss": 1.2905, + "num_input_tokens_seen": 262524256, + "step": 3924 + }, + { + "epoch": 0.44527659574468087, + "loss": 1.4215439558029175, + "loss_ce": 0.0055283294059336185, + "loss_iou": 0.55859375, + "loss_num": 0.058837890625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 262524256, + "step": 3924 + }, + { + "epoch": 0.4453900709219858, + "grad_norm": 25.519853591918945, + "learning_rate": 5e-05, + "loss": 1.4968, + "num_input_tokens_seen": 262591348, + "step": 3925 + }, + { + "epoch": 0.4453900709219858, + "loss": 1.500037670135498, + "loss_ce": 0.006873684003949165, + "loss_iou": 0.6328125, + "loss_num": 0.044677734375, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 262591348, + "step": 3925 + }, + { + "epoch": 0.44550354609929077, + "grad_norm": 39.63321304321289, + "learning_rate": 5e-05, + "loss": 1.2604, + "num_input_tokens_seen": 262657800, + "step": 3926 + }, + { + "epoch": 0.44550354609929077, + "loss": 1.1997225284576416, + "loss_ce": 0.0019685812294483185, + "loss_iou": 0.5, + "loss_num": 0.03955078125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 262657800, + "step": 3926 + }, + { + "epoch": 0.44561702127659575, + "grad_norm": 25.972087860107422, + "learning_rate": 5e-05, + "loss": 1.262, + "num_input_tokens_seen": 262724772, + "step": 3927 + }, + { + "epoch": 0.44561702127659575, + "loss": 1.2162785530090332, + "loss_ce": 0.007294214330613613, + "loss_iou": 0.5, + "loss_num": 0.0419921875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 262724772, + "step": 3927 + }, + { + "epoch": 0.4457304964539007, + "grad_norm": 34.69384002685547, + "learning_rate": 5e-05, + "loss": 1.2079, + "num_input_tokens_seen": 262791012, + "step": 3928 + }, + { + "epoch": 0.4457304964539007, + "loss": 1.1340599060058594, + "loss_ce": 0.004177072085440159, + "loss_iou": 0.48828125, + "loss_num": 0.0311279296875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 262791012, + "step": 3928 + }, + { + "epoch": 0.44584397163120565, + "grad_norm": 25.88766860961914, + "learning_rate": 5e-05, + "loss": 1.3471, + "num_input_tokens_seen": 262857644, + "step": 3929 + }, + { + "epoch": 0.44584397163120565, + "loss": 1.2958953380584717, + "loss_ce": 0.0063446881249547005, + "loss_iou": 0.52734375, + "loss_num": 0.04638671875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 262857644, + "step": 3929 + }, + { + "epoch": 0.4459574468085106, + "grad_norm": 26.706735610961914, + "learning_rate": 5e-05, + "loss": 1.2354, + "num_input_tokens_seen": 262924504, + "step": 3930 + }, + { + "epoch": 0.4459574468085106, + "loss": 1.1004695892333984, + "loss_ce": 0.006963755935430527, + "loss_iou": 0.431640625, + "loss_num": 0.046142578125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 262924504, + "step": 3930 + }, + { + "epoch": 0.4460709219858156, + "grad_norm": 36.568782806396484, + "learning_rate": 5e-05, + "loss": 1.3436, + "num_input_tokens_seen": 262992064, + "step": 3931 + }, + { + "epoch": 0.4460709219858156, + "loss": 1.378645658493042, + "loss_ce": 0.004622169770300388, + "loss_iou": 0.56640625, + "loss_num": 0.048095703125, + "loss_xval": 1.375, + "num_input_tokens_seen": 262992064, + "step": 3931 + }, + { + "epoch": 0.4461843971631206, + "grad_norm": 20.363170623779297, + "learning_rate": 5e-05, + "loss": 1.3871, + "num_input_tokens_seen": 263059028, + "step": 3932 + }, + { + "epoch": 0.4461843971631206, + "loss": 1.2608885765075684, + "loss_ce": 0.0030760387890040874, + "loss_iou": 0.55078125, + "loss_num": 0.031005859375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 263059028, + "step": 3932 + }, + { + "epoch": 0.4462978723404255, + "grad_norm": 17.959423065185547, + "learning_rate": 5e-05, + "loss": 1.16, + "num_input_tokens_seen": 263125640, + "step": 3933 + }, + { + "epoch": 0.4462978723404255, + "loss": 1.0657273530960083, + "loss_ce": 0.011039928533136845, + "loss_iou": 0.44921875, + "loss_num": 0.031005859375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 263125640, + "step": 3933 + }, + { + "epoch": 0.4464113475177305, + "grad_norm": 31.709972381591797, + "learning_rate": 5e-05, + "loss": 1.3768, + "num_input_tokens_seen": 263192976, + "step": 3934 + }, + { + "epoch": 0.4464113475177305, + "loss": 1.2004621028900146, + "loss_ce": 0.009055878967046738, + "loss_iou": 0.50390625, + "loss_num": 0.0361328125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 263192976, + "step": 3934 + }, + { + "epoch": 0.44652482269503546, + "grad_norm": 23.368194580078125, + "learning_rate": 5e-05, + "loss": 1.6087, + "num_input_tokens_seen": 263259796, + "step": 3935 + }, + { + "epoch": 0.44652482269503546, + "loss": 1.762556791305542, + "loss_ce": 0.01060377899557352, + "loss_iou": 0.70703125, + "loss_num": 0.0673828125, + "loss_xval": 1.75, + "num_input_tokens_seen": 263259796, + "step": 3935 + }, + { + "epoch": 0.44663829787234044, + "grad_norm": 15.982648849487305, + "learning_rate": 5e-05, + "loss": 1.0855, + "num_input_tokens_seen": 263327256, + "step": 3936 + }, + { + "epoch": 0.44663829787234044, + "loss": 1.123452067375183, + "loss_ce": 0.007241154555231333, + "loss_iou": 0.447265625, + "loss_num": 0.04443359375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 263327256, + "step": 3936 + }, + { + "epoch": 0.4467517730496454, + "grad_norm": 16.993650436401367, + "learning_rate": 5e-05, + "loss": 1.1728, + "num_input_tokens_seen": 263395212, + "step": 3937 + }, + { + "epoch": 0.4467517730496454, + "loss": 1.0825941562652588, + "loss_ce": 0.003004369791597128, + "loss_iou": 0.455078125, + "loss_num": 0.03369140625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 263395212, + "step": 3937 + }, + { + "epoch": 0.44686524822695034, + "grad_norm": 30.535512924194336, + "learning_rate": 5e-05, + "loss": 0.9618, + "num_input_tokens_seen": 263461968, + "step": 3938 + }, + { + "epoch": 0.44686524822695034, + "loss": 0.8661698698997498, + "loss_ce": 0.006062468513846397, + "loss_iou": 0.349609375, + "loss_num": 0.0322265625, + "loss_xval": 0.859375, + "num_input_tokens_seen": 263461968, + "step": 3938 + }, + { + "epoch": 0.4469787234042553, + "grad_norm": 43.445987701416016, + "learning_rate": 5e-05, + "loss": 1.4122, + "num_input_tokens_seen": 263528316, + "step": 3939 + }, + { + "epoch": 0.4469787234042553, + "loss": 1.1855406761169434, + "loss_ce": 0.0034117083996534348, + "loss_iou": 0.51171875, + "loss_num": 0.031494140625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 263528316, + "step": 3939 + }, + { + "epoch": 0.4470921985815603, + "grad_norm": 26.77507209777832, + "learning_rate": 5e-05, + "loss": 1.6738, + "num_input_tokens_seen": 263595352, + "step": 3940 + }, + { + "epoch": 0.4470921985815603, + "loss": 1.4935301542282104, + "loss_ce": 0.00427229143679142, + "loss_iou": 0.66015625, + "loss_num": 0.033935546875, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 263595352, + "step": 3940 + }, + { + "epoch": 0.4472056737588653, + "grad_norm": 22.094829559326172, + "learning_rate": 5e-05, + "loss": 1.1469, + "num_input_tokens_seen": 263662544, + "step": 3941 + }, + { + "epoch": 0.4472056737588653, + "loss": 1.27013099193573, + "loss_ce": 0.005970850586891174, + "loss_iou": 0.49609375, + "loss_num": 0.0546875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 263662544, + "step": 3941 + }, + { + "epoch": 0.4473191489361702, + "grad_norm": 22.792686462402344, + "learning_rate": 5e-05, + "loss": 1.1504, + "num_input_tokens_seen": 263729028, + "step": 3942 + }, + { + "epoch": 0.4473191489361702, + "loss": 1.199885606765747, + "loss_ce": 0.005061347968876362, + "loss_iou": 0.5, + "loss_num": 0.038330078125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 263729028, + "step": 3942 + }, + { + "epoch": 0.4474326241134752, + "grad_norm": 27.85095977783203, + "learning_rate": 5e-05, + "loss": 1.2396, + "num_input_tokens_seen": 263795728, + "step": 3943 + }, + { + "epoch": 0.4474326241134752, + "loss": 1.3747353553771973, + "loss_ce": 0.0075478339567780495, + "loss_iou": 0.5625, + "loss_num": 0.049072265625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 263795728, + "step": 3943 + }, + { + "epoch": 0.44754609929078015, + "grad_norm": 31.019012451171875, + "learning_rate": 5e-05, + "loss": 1.2934, + "num_input_tokens_seen": 263862896, + "step": 3944 + }, + { + "epoch": 0.44754609929078015, + "loss": 1.389816164970398, + "loss_ce": 0.006027103401720524, + "loss_iou": 0.56640625, + "loss_num": 0.050537109375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 263862896, + "step": 3944 + }, + { + "epoch": 0.44765957446808513, + "grad_norm": 36.20641326904297, + "learning_rate": 5e-05, + "loss": 1.2374, + "num_input_tokens_seen": 263929976, + "step": 3945 + }, + { + "epoch": 0.44765957446808513, + "loss": 1.1073267459869385, + "loss_ce": 0.004787665791809559, + "loss_iou": 0.486328125, + "loss_num": 0.02587890625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 263929976, + "step": 3945 + }, + { + "epoch": 0.44777304964539005, + "grad_norm": 38.156986236572266, + "learning_rate": 5e-05, + "loss": 1.1072, + "num_input_tokens_seen": 263997100, + "step": 3946 + }, + { + "epoch": 0.44777304964539005, + "loss": 1.3064830303192139, + "loss_ce": 0.004725263919681311, + "loss_iou": 0.546875, + "loss_num": 0.041748046875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 263997100, + "step": 3946 + }, + { + "epoch": 0.44788652482269503, + "grad_norm": 28.51410484313965, + "learning_rate": 5e-05, + "loss": 1.3565, + "num_input_tokens_seen": 264063808, + "step": 3947 + }, + { + "epoch": 0.44788652482269503, + "loss": 1.5866658687591553, + "loss_ce": 0.007564200554043055, + "loss_iou": 0.65234375, + "loss_num": 0.055419921875, + "loss_xval": 1.578125, + "num_input_tokens_seen": 264063808, + "step": 3947 + }, + { + "epoch": 0.448, + "grad_norm": 70.0388412475586, + "learning_rate": 5e-05, + "loss": 1.2649, + "num_input_tokens_seen": 264130020, + "step": 3948 + }, + { + "epoch": 0.448, + "loss": 1.4313483238220215, + "loss_ce": 0.005567001178860664, + "loss_iou": 0.5234375, + "loss_num": 0.076171875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 264130020, + "step": 3948 + }, + { + "epoch": 0.448113475177305, + "grad_norm": 24.61136245727539, + "learning_rate": 5e-05, + "loss": 1.415, + "num_input_tokens_seen": 264197696, + "step": 3949 + }, + { + "epoch": 0.448113475177305, + "loss": 1.2656409740447998, + "loss_ce": 0.005386957898736, + "loss_iou": 0.515625, + "loss_num": 0.04638671875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 264197696, + "step": 3949 + }, + { + "epoch": 0.4482269503546099, + "grad_norm": 21.895158767700195, + "learning_rate": 5e-05, + "loss": 1.1614, + "num_input_tokens_seen": 264264328, + "step": 3950 + }, + { + "epoch": 0.4482269503546099, + "loss": 1.1333658695220947, + "loss_ce": 0.006412745453417301, + "loss_iou": 0.47265625, + "loss_num": 0.03662109375, + "loss_xval": 1.125, + "num_input_tokens_seen": 264264328, + "step": 3950 + }, + { + "epoch": 0.4483404255319149, + "grad_norm": 22.928733825683594, + "learning_rate": 5e-05, + "loss": 1.0603, + "num_input_tokens_seen": 264331084, + "step": 3951 + }, + { + "epoch": 0.4483404255319149, + "loss": 1.0364831686019897, + "loss_ce": 0.008162867277860641, + "loss_iou": 0.43359375, + "loss_num": 0.032470703125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 264331084, + "step": 3951 + }, + { + "epoch": 0.44845390070921987, + "grad_norm": 38.010467529296875, + "learning_rate": 5e-05, + "loss": 1.164, + "num_input_tokens_seen": 264397992, + "step": 3952 + }, + { + "epoch": 0.44845390070921987, + "loss": 1.195197582244873, + "loss_ce": 0.007697663269937038, + "loss_iou": 0.490234375, + "loss_num": 0.0419921875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 264397992, + "step": 3952 + }, + { + "epoch": 0.44856737588652484, + "grad_norm": 28.037748336791992, + "learning_rate": 5e-05, + "loss": 1.5187, + "num_input_tokens_seen": 264465116, + "step": 3953 + }, + { + "epoch": 0.44856737588652484, + "loss": 1.554029941558838, + "loss_ce": 0.005201810505241156, + "loss_iou": 0.6484375, + "loss_num": 0.0498046875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 264465116, + "step": 3953 + }, + { + "epoch": 0.44868085106382977, + "grad_norm": 11.676199913024902, + "learning_rate": 5e-05, + "loss": 1.2024, + "num_input_tokens_seen": 264531712, + "step": 3954 + }, + { + "epoch": 0.44868085106382977, + "loss": 1.1371700763702393, + "loss_ce": 0.0033811056055128574, + "loss_iou": 0.482421875, + "loss_num": 0.0341796875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 264531712, + "step": 3954 + }, + { + "epoch": 0.44879432624113474, + "grad_norm": 22.500112533569336, + "learning_rate": 5e-05, + "loss": 1.2158, + "num_input_tokens_seen": 264598748, + "step": 3955 + }, + { + "epoch": 0.44879432624113474, + "loss": 1.322205901145935, + "loss_ce": 0.005311394110321999, + "loss_iou": 0.52734375, + "loss_num": 0.0517578125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 264598748, + "step": 3955 + }, + { + "epoch": 0.4489078014184397, + "grad_norm": 43.271785736083984, + "learning_rate": 5e-05, + "loss": 1.2234, + "num_input_tokens_seen": 264665944, + "step": 3956 + }, + { + "epoch": 0.4489078014184397, + "loss": 1.2653599977493286, + "loss_ce": 0.008035771548748016, + "loss_iou": 0.490234375, + "loss_num": 0.05517578125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 264665944, + "step": 3956 + }, + { + "epoch": 0.4490212765957447, + "grad_norm": 46.66841125488281, + "learning_rate": 5e-05, + "loss": 1.3601, + "num_input_tokens_seen": 264733828, + "step": 3957 + }, + { + "epoch": 0.4490212765957447, + "loss": 1.2557377815246582, + "loss_ce": 0.0069584595039486885, + "loss_iou": 0.478515625, + "loss_num": 0.05810546875, + "loss_xval": 1.25, + "num_input_tokens_seen": 264733828, + "step": 3957 + }, + { + "epoch": 0.4491347517730496, + "grad_norm": 37.405120849609375, + "learning_rate": 5e-05, + "loss": 1.2037, + "num_input_tokens_seen": 264800516, + "step": 3958 + }, + { + "epoch": 0.4491347517730496, + "loss": 1.194948673248291, + "loss_ce": 0.00793697964400053, + "loss_iou": 0.494140625, + "loss_num": 0.0400390625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 264800516, + "step": 3958 + }, + { + "epoch": 0.4492482269503546, + "grad_norm": 23.529691696166992, + "learning_rate": 5e-05, + "loss": 1.4713, + "num_input_tokens_seen": 264867524, + "step": 3959 + }, + { + "epoch": 0.4492482269503546, + "loss": 1.2460665702819824, + "loss_ce": 0.007297100033611059, + "loss_iou": 0.52734375, + "loss_num": 0.03759765625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 264867524, + "step": 3959 + }, + { + "epoch": 0.4493617021276596, + "grad_norm": 22.873592376708984, + "learning_rate": 5e-05, + "loss": 1.2599, + "num_input_tokens_seen": 264934376, + "step": 3960 + }, + { + "epoch": 0.4493617021276596, + "loss": 1.4032576084136963, + "loss_ce": 0.006773203145712614, + "loss_iou": 0.56640625, + "loss_num": 0.05322265625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 264934376, + "step": 3960 + }, + { + "epoch": 0.44947517730496456, + "grad_norm": 31.737096786499023, + "learning_rate": 5e-05, + "loss": 1.2533, + "num_input_tokens_seen": 265001776, + "step": 3961 + }, + { + "epoch": 0.44947517730496456, + "loss": 1.2417937517166138, + "loss_ce": 0.005953874904662371, + "loss_iou": 0.4921875, + "loss_num": 0.050537109375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 265001776, + "step": 3961 + }, + { + "epoch": 0.4495886524822695, + "grad_norm": 28.956512451171875, + "learning_rate": 5e-05, + "loss": 1.3914, + "num_input_tokens_seen": 265068228, + "step": 3962 + }, + { + "epoch": 0.4495886524822695, + "loss": 1.318879246711731, + "loss_ce": 0.005890933331102133, + "loss_iou": 0.48828125, + "loss_num": 0.06689453125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 265068228, + "step": 3962 + }, + { + "epoch": 0.44970212765957446, + "grad_norm": 27.841144561767578, + "learning_rate": 5e-05, + "loss": 1.3433, + "num_input_tokens_seen": 265134644, + "step": 3963 + }, + { + "epoch": 0.44970212765957446, + "loss": 1.0937156677246094, + "loss_ce": 0.004848474636673927, + "loss_iou": 0.44921875, + "loss_num": 0.038330078125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 265134644, + "step": 3963 + }, + { + "epoch": 0.44981560283687944, + "grad_norm": 22.93748664855957, + "learning_rate": 5e-05, + "loss": 1.3213, + "num_input_tokens_seen": 265201608, + "step": 3964 + }, + { + "epoch": 0.44981560283687944, + "loss": 1.3375048637390137, + "loss_ce": 0.002544023096561432, + "loss_iou": 0.56640625, + "loss_num": 0.03955078125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 265201608, + "step": 3964 + }, + { + "epoch": 0.4499290780141844, + "grad_norm": 14.928388595581055, + "learning_rate": 5e-05, + "loss": 1.2922, + "num_input_tokens_seen": 265269404, + "step": 3965 + }, + { + "epoch": 0.4499290780141844, + "loss": 1.2188442945480347, + "loss_ce": 0.006441938690841198, + "loss_iou": 0.50390625, + "loss_num": 0.040771484375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 265269404, + "step": 3965 + }, + { + "epoch": 0.45004255319148934, + "grad_norm": 15.395776748657227, + "learning_rate": 5e-05, + "loss": 1.2562, + "num_input_tokens_seen": 265336624, + "step": 3966 + }, + { + "epoch": 0.45004255319148934, + "loss": 1.2266435623168945, + "loss_ce": 0.006917031016200781, + "loss_iou": 0.515625, + "loss_num": 0.0380859375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 265336624, + "step": 3966 + }, + { + "epoch": 0.4501560283687943, + "grad_norm": 17.22894859313965, + "learning_rate": 5e-05, + "loss": 1.0609, + "num_input_tokens_seen": 265404080, + "step": 3967 + }, + { + "epoch": 0.4501560283687943, + "loss": 0.9811216592788696, + "loss_ce": 0.003094319486990571, + "loss_iou": 0.423828125, + "loss_num": 0.0262451171875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 265404080, + "step": 3967 + }, + { + "epoch": 0.4502695035460993, + "grad_norm": 28.528091430664062, + "learning_rate": 5e-05, + "loss": 1.2598, + "num_input_tokens_seen": 265470052, + "step": 3968 + }, + { + "epoch": 0.4502695035460993, + "loss": 1.1823835372924805, + "loss_ce": 0.007090515457093716, + "loss_iou": 0.484375, + "loss_num": 0.040771484375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 265470052, + "step": 3968 + }, + { + "epoch": 0.45038297872340427, + "grad_norm": 29.160390853881836, + "learning_rate": 5e-05, + "loss": 1.4179, + "num_input_tokens_seen": 265537152, + "step": 3969 + }, + { + "epoch": 0.45038297872340427, + "loss": 1.3681161403656006, + "loss_ce": 0.003858390264213085, + "loss_iou": 0.55859375, + "loss_num": 0.048828125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 265537152, + "step": 3969 + }, + { + "epoch": 0.4504964539007092, + "grad_norm": 35.01380157470703, + "learning_rate": 5e-05, + "loss": 1.386, + "num_input_tokens_seen": 265604088, + "step": 3970 + }, + { + "epoch": 0.4504964539007092, + "loss": 1.3545353412628174, + "loss_ce": 0.0078555503860116, + "loss_iou": 0.53515625, + "loss_num": 0.05517578125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 265604088, + "step": 3970 + }, + { + "epoch": 0.45060992907801417, + "grad_norm": 47.05083084106445, + "learning_rate": 5e-05, + "loss": 1.5652, + "num_input_tokens_seen": 265670268, + "step": 3971 + }, + { + "epoch": 0.45060992907801417, + "loss": 1.5878045558929443, + "loss_ce": 0.009679503738880157, + "loss_iou": 0.66015625, + "loss_num": 0.052001953125, + "loss_xval": 1.578125, + "num_input_tokens_seen": 265670268, + "step": 3971 + }, + { + "epoch": 0.45072340425531915, + "grad_norm": 18.31070899963379, + "learning_rate": 5e-05, + "loss": 1.0176, + "num_input_tokens_seen": 265736624, + "step": 3972 + }, + { + "epoch": 0.45072340425531915, + "loss": 1.029491901397705, + "loss_ce": 0.004589492455124855, + "loss_iou": 0.44140625, + "loss_num": 0.0283203125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 265736624, + "step": 3972 + }, + { + "epoch": 0.4508368794326241, + "grad_norm": 23.517831802368164, + "learning_rate": 5e-05, + "loss": 1.1892, + "num_input_tokens_seen": 265803588, + "step": 3973 + }, + { + "epoch": 0.4508368794326241, + "loss": 1.3193047046661377, + "loss_ce": 0.00485166534781456, + "loss_iou": 0.5546875, + "loss_num": 0.040771484375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 265803588, + "step": 3973 + }, + { + "epoch": 0.4509503546099291, + "grad_norm": 37.1669807434082, + "learning_rate": 5e-05, + "loss": 1.4576, + "num_input_tokens_seen": 265869228, + "step": 3974 + }, + { + "epoch": 0.4509503546099291, + "loss": 1.5240345001220703, + "loss_ce": 0.006456312257796526, + "loss_iou": 0.59375, + "loss_num": 0.06640625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 265869228, + "step": 3974 + }, + { + "epoch": 0.451063829787234, + "grad_norm": 23.058677673339844, + "learning_rate": 5e-05, + "loss": 1.8154, + "num_input_tokens_seen": 265937008, + "step": 3975 + }, + { + "epoch": 0.451063829787234, + "loss": 1.6970279216766357, + "loss_ce": 0.007574864663183689, + "loss_iou": 0.7109375, + "loss_num": 0.053466796875, + "loss_xval": 1.6875, + "num_input_tokens_seen": 265937008, + "step": 3975 + }, + { + "epoch": 0.451177304964539, + "grad_norm": 14.27718734741211, + "learning_rate": 5e-05, + "loss": 1.156, + "num_input_tokens_seen": 266002532, + "step": 3976 + }, + { + "epoch": 0.451177304964539, + "loss": 1.0750603675842285, + "loss_ce": 0.0030388233717530966, + "loss_iou": 0.419921875, + "loss_num": 0.046630859375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 266002532, + "step": 3976 + }, + { + "epoch": 0.451290780141844, + "grad_norm": 27.317806243896484, + "learning_rate": 5e-05, + "loss": 0.9758, + "num_input_tokens_seen": 266070032, + "step": 3977 + }, + { + "epoch": 0.451290780141844, + "loss": 0.8948764801025391, + "loss_ce": 0.008157732896506786, + "loss_iou": 0.365234375, + "loss_num": 0.031494140625, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 266070032, + "step": 3977 + }, + { + "epoch": 0.45140425531914896, + "grad_norm": 20.42237091064453, + "learning_rate": 5e-05, + "loss": 1.2899, + "num_input_tokens_seen": 266136316, + "step": 3978 + }, + { + "epoch": 0.45140425531914896, + "loss": 1.2093086242675781, + "loss_ce": 0.01076123584061861, + "loss_iou": 0.466796875, + "loss_num": 0.052978515625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 266136316, + "step": 3978 + }, + { + "epoch": 0.4515177304964539, + "grad_norm": 40.331329345703125, + "learning_rate": 5e-05, + "loss": 1.1054, + "num_input_tokens_seen": 266203012, + "step": 3979 + }, + { + "epoch": 0.4515177304964539, + "loss": 1.0336203575134277, + "loss_ce": 0.008718025870621204, + "loss_iou": 0.36328125, + "loss_num": 0.0595703125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 266203012, + "step": 3979 + }, + { + "epoch": 0.45163120567375886, + "grad_norm": 25.080961227416992, + "learning_rate": 5e-05, + "loss": 1.3046, + "num_input_tokens_seen": 266268636, + "step": 3980 + }, + { + "epoch": 0.45163120567375886, + "loss": 1.2813622951507568, + "loss_ce": 0.007924706675112247, + "loss_iou": 0.53515625, + "loss_num": 0.040771484375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 266268636, + "step": 3980 + }, + { + "epoch": 0.45174468085106384, + "grad_norm": 25.330398559570312, + "learning_rate": 5e-05, + "loss": 1.1006, + "num_input_tokens_seen": 266335452, + "step": 3981 + }, + { + "epoch": 0.45174468085106384, + "loss": 1.1692241430282593, + "loss_ce": 0.0022319601848721504, + "loss_iou": 0.51171875, + "loss_num": 0.0284423828125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 266335452, + "step": 3981 + }, + { + "epoch": 0.4518581560283688, + "grad_norm": 57.71709442138672, + "learning_rate": 5e-05, + "loss": 1.2907, + "num_input_tokens_seen": 266401504, + "step": 3982 + }, + { + "epoch": 0.4518581560283688, + "loss": 1.2030425071716309, + "loss_ce": 0.002374220173805952, + "loss_iou": 0.455078125, + "loss_num": 0.058349609375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 266401504, + "step": 3982 + }, + { + "epoch": 0.45197163120567374, + "grad_norm": 23.140512466430664, + "learning_rate": 5e-05, + "loss": 1.1352, + "num_input_tokens_seen": 266469720, + "step": 3983 + }, + { + "epoch": 0.45197163120567374, + "loss": 1.1877516508102417, + "loss_ce": 0.002204843331128359, + "loss_iou": 0.49609375, + "loss_num": 0.038818359375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 266469720, + "step": 3983 + }, + { + "epoch": 0.4520851063829787, + "grad_norm": 65.07368469238281, + "learning_rate": 5e-05, + "loss": 1.1757, + "num_input_tokens_seen": 266536760, + "step": 3984 + }, + { + "epoch": 0.4520851063829787, + "loss": 1.2559013366699219, + "loss_ce": 0.0029716971330344677, + "loss_iou": 0.5390625, + "loss_num": 0.035400390625, + "loss_xval": 1.25, + "num_input_tokens_seen": 266536760, + "step": 3984 + }, + { + "epoch": 0.4521985815602837, + "grad_norm": 48.29365921020508, + "learning_rate": 5e-05, + "loss": 1.334, + "num_input_tokens_seen": 266603928, + "step": 3985 + }, + { + "epoch": 0.4521985815602837, + "loss": 1.311265230178833, + "loss_ce": 0.00462458236142993, + "loss_iou": 0.5546875, + "loss_num": 0.038818359375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 266603928, + "step": 3985 + }, + { + "epoch": 0.4523120567375887, + "grad_norm": 20.3747501373291, + "learning_rate": 5e-05, + "loss": 1.1605, + "num_input_tokens_seen": 266669272, + "step": 3986 + }, + { + "epoch": 0.4523120567375887, + "loss": 1.268519401550293, + "loss_ce": 0.0048475004732608795, + "loss_iou": 0.52734375, + "loss_num": 0.04248046875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 266669272, + "step": 3986 + }, + { + "epoch": 0.4524255319148936, + "grad_norm": 25.4898681640625, + "learning_rate": 5e-05, + "loss": 1.0923, + "num_input_tokens_seen": 266735340, + "step": 3987 + }, + { + "epoch": 0.4524255319148936, + "loss": 1.0987272262573242, + "loss_ce": 0.006930400617420673, + "loss_iou": 0.4453125, + "loss_num": 0.0400390625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 266735340, + "step": 3987 + }, + { + "epoch": 0.4525390070921986, + "grad_norm": 33.78923034667969, + "learning_rate": 5e-05, + "loss": 1.0202, + "num_input_tokens_seen": 266802708, + "step": 3988 + }, + { + "epoch": 0.4525390070921986, + "loss": 1.076154112815857, + "loss_ce": 0.0053533632308244705, + "loss_iou": 0.4453125, + "loss_num": 0.036376953125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 266802708, + "step": 3988 + }, + { + "epoch": 0.45265248226950355, + "grad_norm": 30.1629695892334, + "learning_rate": 5e-05, + "loss": 1.3165, + "num_input_tokens_seen": 266869496, + "step": 3989 + }, + { + "epoch": 0.45265248226950355, + "loss": 1.2349885702133179, + "loss_ce": 0.006350880488753319, + "loss_iou": 0.458984375, + "loss_num": 0.061767578125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 266869496, + "step": 3989 + }, + { + "epoch": 0.45276595744680853, + "grad_norm": 49.88169860839844, + "learning_rate": 5e-05, + "loss": 1.3392, + "num_input_tokens_seen": 266936008, + "step": 3990 + }, + { + "epoch": 0.45276595744680853, + "loss": 1.166137456893921, + "loss_ce": 0.00970437005162239, + "loss_iou": 0.4765625, + "loss_num": 0.04052734375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 266936008, + "step": 3990 + }, + { + "epoch": 0.45287943262411345, + "grad_norm": 20.027814865112305, + "learning_rate": 5e-05, + "loss": 1.2876, + "num_input_tokens_seen": 267002120, + "step": 3991 + }, + { + "epoch": 0.45287943262411345, + "loss": 1.290771722793579, + "loss_ce": 0.0046388679184019566, + "loss_iou": 0.494140625, + "loss_num": 0.059814453125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 267002120, + "step": 3991 + }, + { + "epoch": 0.45299290780141843, + "grad_norm": 20.326398849487305, + "learning_rate": 5e-05, + "loss": 1.2665, + "num_input_tokens_seen": 267069324, + "step": 3992 + }, + { + "epoch": 0.45299290780141843, + "loss": 1.188624620437622, + "loss_ce": 0.00844887737184763, + "loss_iou": 0.482421875, + "loss_num": 0.04345703125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 267069324, + "step": 3992 + }, + { + "epoch": 0.4531063829787234, + "grad_norm": 24.514963150024414, + "learning_rate": 5e-05, + "loss": 1.1772, + "num_input_tokens_seen": 267135564, + "step": 3993 + }, + { + "epoch": 0.4531063829787234, + "loss": 1.1874992847442627, + "loss_ce": 0.004393856972455978, + "loss_iou": 0.46875, + "loss_num": 0.049560546875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 267135564, + "step": 3993 + }, + { + "epoch": 0.4532198581560284, + "grad_norm": 31.72841453552246, + "learning_rate": 5e-05, + "loss": 1.5324, + "num_input_tokens_seen": 267201876, + "step": 3994 + }, + { + "epoch": 0.4532198581560284, + "loss": 1.5358178615570068, + "loss_ce": 0.010915612801909447, + "loss_iou": 0.6328125, + "loss_num": 0.05224609375, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 267201876, + "step": 3994 + }, + { + "epoch": 0.4533333333333333, + "grad_norm": 37.050079345703125, + "learning_rate": 5e-05, + "loss": 1.3297, + "num_input_tokens_seen": 267268824, + "step": 3995 + }, + { + "epoch": 0.4533333333333333, + "loss": 1.2929099798202515, + "loss_ce": 0.008241998963057995, + "loss_iou": 0.578125, + "loss_num": 0.0257568359375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 267268824, + "step": 3995 + }, + { + "epoch": 0.4534468085106383, + "grad_norm": 25.55604362487793, + "learning_rate": 5e-05, + "loss": 1.3309, + "num_input_tokens_seen": 267335308, + "step": 3996 + }, + { + "epoch": 0.4534468085106383, + "loss": 1.3370139598846436, + "loss_ce": 0.007424104493111372, + "loss_iou": 0.5703125, + "loss_num": 0.0380859375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 267335308, + "step": 3996 + }, + { + "epoch": 0.45356028368794327, + "grad_norm": 44.91867446899414, + "learning_rate": 5e-05, + "loss": 1.1443, + "num_input_tokens_seen": 267402416, + "step": 3997 + }, + { + "epoch": 0.45356028368794327, + "loss": 1.1906543970108032, + "loss_ce": 0.005107440985739231, + "loss_iou": 0.515625, + "loss_num": 0.031494140625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 267402416, + "step": 3997 + }, + { + "epoch": 0.45367375886524824, + "grad_norm": 70.6112289428711, + "learning_rate": 5e-05, + "loss": 1.0181, + "num_input_tokens_seen": 267468832, + "step": 3998 + }, + { + "epoch": 0.45367375886524824, + "loss": 1.1243751049041748, + "loss_ce": 0.006699301302433014, + "loss_iou": 0.5078125, + "loss_num": 0.020751953125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 267468832, + "step": 3998 + }, + { + "epoch": 0.45378723404255317, + "grad_norm": 30.50783920288086, + "learning_rate": 5e-05, + "loss": 1.2861, + "num_input_tokens_seen": 267535896, + "step": 3999 + }, + { + "epoch": 0.45378723404255317, + "loss": 1.2761125564575195, + "loss_ce": 0.005116515792906284, + "loss_iou": 0.53125, + "loss_num": 0.041748046875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 267535896, + "step": 3999 + }, + { + "epoch": 0.45390070921985815, + "grad_norm": 22.262807846069336, + "learning_rate": 5e-05, + "loss": 1.1101, + "num_input_tokens_seen": 267602320, + "step": 4000 + }, + { + "epoch": 0.45390070921985815, + "eval_seeclick_CIoU": 0.3861577659845352, + "eval_seeclick_GIoU": 0.3719574064016342, + "eval_seeclick_IoU": 0.47716131806373596, + "eval_seeclick_MAE_all": 0.15014147013425827, + "eval_seeclick_MAE_h": 0.05138102360069752, + "eval_seeclick_MAE_w": 0.10272932425141335, + "eval_seeclick_MAE_x_boxes": 0.2479938119649887, + "eval_seeclick_MAE_y_boxes": 0.12701904401183128, + "eval_seeclick_NUM_probability": 0.9999577403068542, + "eval_seeclick_inside_bbox": 0.6770833432674408, + "eval_seeclick_loss": 2.3840577602386475, + "eval_seeclick_loss_ce": 0.014085615985095501, + "eval_seeclick_loss_iou": 0.8402099609375, + "eval_seeclick_loss_num": 0.14776992797851562, + "eval_seeclick_loss_xval": 2.419189453125, + "eval_seeclick_runtime": 69.1547, + "eval_seeclick_samples_per_second": 0.68, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 267602320, + "step": 4000 + }, + { + "epoch": 0.45390070921985815, + "eval_icons_CIoU": 0.5045610666275024, + "eval_icons_GIoU": 0.4975966513156891, + "eval_icons_IoU": 0.537529245018959, + "eval_icons_MAE_all": 0.13836603611707687, + "eval_icons_MAE_h": 0.0996682271361351, + "eval_icons_MAE_w": 0.15849052369594574, + "eval_icons_MAE_x_boxes": 0.099407859146595, + "eval_icons_MAE_y_boxes": 0.06123197823762894, + "eval_icons_NUM_probability": 0.9999710619449615, + "eval_icons_inside_bbox": 0.8263888955116272, + "eval_icons_loss": 2.348884344100952, + "eval_icons_loss_ce": 0.00011979043847532012, + "eval_icons_loss_iou": 0.819091796875, + "eval_icons_loss_num": 0.13355255126953125, + "eval_icons_loss_xval": 2.306640625, + "eval_icons_runtime": 69.1089, + "eval_icons_samples_per_second": 0.723, + "eval_icons_steps_per_second": 0.029, + "num_input_tokens_seen": 267602320, + "step": 4000 + }, + { + "epoch": 0.45390070921985815, + "eval_screenspot_CIoU": 0.29416482150554657, + "eval_screenspot_GIoU": 0.2694934805234273, + "eval_screenspot_IoU": 0.39218910535176593, + "eval_screenspot_MAE_all": 0.204283540447553, + "eval_screenspot_MAE_h": 0.11700431754191716, + "eval_screenspot_MAE_w": 0.15140802164872488, + "eval_screenspot_MAE_x_boxes": 0.32048242290814716, + "eval_screenspot_MAE_y_boxes": 0.10343605776627858, + "eval_screenspot_NUM_probability": 0.9999204874038696, + "eval_screenspot_inside_bbox": 0.6120833357175192, + "eval_screenspot_loss": 2.9047248363494873, + "eval_screenspot_loss_ce": 0.018920354545116425, + "eval_screenspot_loss_iou": 0.9401041666666666, + "eval_screenspot_loss_num": 0.20920817057291666, + "eval_screenspot_loss_xval": 2.9248046875, + "eval_screenspot_runtime": 120.195, + "eval_screenspot_samples_per_second": 0.74, + "eval_screenspot_steps_per_second": 0.025, + "num_input_tokens_seen": 267602320, + "step": 4000 + }, + { + "epoch": 0.45390070921985815, + "eval_compot_CIoU": 0.2918728291988373, + "eval_compot_GIoU": 0.2740505263209343, + "eval_compot_IoU": 0.36475537717342377, + "eval_compot_MAE_all": 0.22796060889959335, + "eval_compot_MAE_h": 0.2079058736562729, + "eval_compot_MAE_w": 0.227742001414299, + "eval_compot_MAE_x_boxes": 0.17945845425128937, + "eval_compot_MAE_y_boxes": 0.11012450605630875, + "eval_compot_NUM_probability": 0.9999523460865021, + "eval_compot_inside_bbox": 0.5798611044883728, + "eval_compot_loss": 3.099078416824341, + "eval_compot_loss_ce": 0.005438861204311252, + "eval_compot_loss_iou": 1.001708984375, + "eval_compot_loss_num": 0.23980712890625, + "eval_compot_loss_xval": 3.2021484375, + "eval_compot_runtime": 70.5065, + "eval_compot_samples_per_second": 0.709, + "eval_compot_steps_per_second": 0.028, + "num_input_tokens_seen": 267602320, + "step": 4000 + }, + { + "epoch": 0.45390070921985815, + "loss": 3.2501273155212402, + "loss_ce": 0.004033475648611784, + "loss_iou": 0.9921875, + "loss_num": 0.251953125, + "loss_xval": 3.25, + "num_input_tokens_seen": 267602320, + "step": 4000 + }, + { + "epoch": 0.4540141843971631, + "grad_norm": 41.28748321533203, + "learning_rate": 5e-05, + "loss": 1.2887, + "num_input_tokens_seen": 267670288, + "step": 4001 + }, + { + "epoch": 0.4540141843971631, + "loss": 1.4388360977172852, + "loss_ce": 0.0057306354865431786, + "loss_iou": 0.546875, + "loss_num": 0.068359375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 267670288, + "step": 4001 + }, + { + "epoch": 0.4541276595744681, + "grad_norm": 32.1502685546875, + "learning_rate": 5e-05, + "loss": 1.2477, + "num_input_tokens_seen": 267737348, + "step": 4002 + }, + { + "epoch": 0.4541276595744681, + "loss": 1.406829833984375, + "loss_ce": 0.004974382929503918, + "loss_iou": 0.578125, + "loss_num": 0.04931640625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 267737348, + "step": 4002 + }, + { + "epoch": 0.454241134751773, + "grad_norm": 22.795907974243164, + "learning_rate": 5e-05, + "loss": 1.1465, + "num_input_tokens_seen": 267803232, + "step": 4003 + }, + { + "epoch": 0.454241134751773, + "loss": 1.2924981117248535, + "loss_ce": 0.005388785153627396, + "loss_iou": 0.52734375, + "loss_num": 0.046875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 267803232, + "step": 4003 + }, + { + "epoch": 0.454354609929078, + "grad_norm": 68.9832992553711, + "learning_rate": 5e-05, + "loss": 1.1309, + "num_input_tokens_seen": 267870164, + "step": 4004 + }, + { + "epoch": 0.454354609929078, + "loss": 1.1247918605804443, + "loss_ce": 0.0066278367303311825, + "loss_iou": 0.466796875, + "loss_num": 0.037109375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 267870164, + "step": 4004 + }, + { + "epoch": 0.454468085106383, + "grad_norm": 40.917171478271484, + "learning_rate": 5e-05, + "loss": 1.0566, + "num_input_tokens_seen": 267937468, + "step": 4005 + }, + { + "epoch": 0.454468085106383, + "loss": 1.1523098945617676, + "loss_ce": 0.0038724930491298437, + "loss_iou": 0.41796875, + "loss_num": 0.0625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 267937468, + "step": 4005 + }, + { + "epoch": 0.45458156028368796, + "grad_norm": 28.398988723754883, + "learning_rate": 5e-05, + "loss": 1.2949, + "num_input_tokens_seen": 268004276, + "step": 4006 + }, + { + "epoch": 0.45458156028368796, + "loss": 1.2874243259429932, + "loss_ce": 0.0027562982868403196, + "loss_iou": 0.5390625, + "loss_num": 0.042236328125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 268004276, + "step": 4006 + }, + { + "epoch": 0.4546950354609929, + "grad_norm": 24.204008102416992, + "learning_rate": 5e-05, + "loss": 1.2846, + "num_input_tokens_seen": 268071360, + "step": 4007 + }, + { + "epoch": 0.4546950354609929, + "loss": 1.3038997650146484, + "loss_ce": 0.011419232934713364, + "loss_iou": 0.51953125, + "loss_num": 0.05029296875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 268071360, + "step": 4007 + }, + { + "epoch": 0.45480851063829786, + "grad_norm": 28.38566017150879, + "learning_rate": 5e-05, + "loss": 1.4415, + "num_input_tokens_seen": 268138600, + "step": 4008 + }, + { + "epoch": 0.45480851063829786, + "loss": 1.3594064712524414, + "loss_ce": 0.003937702160328627, + "loss_iou": 0.57421875, + "loss_num": 0.040771484375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 268138600, + "step": 4008 + }, + { + "epoch": 0.45492198581560284, + "grad_norm": 24.470821380615234, + "learning_rate": 5e-05, + "loss": 1.4853, + "num_input_tokens_seen": 268204476, + "step": 4009 + }, + { + "epoch": 0.45492198581560284, + "loss": 1.5145964622497559, + "loss_ce": 0.007760519161820412, + "loss_iou": 0.5859375, + "loss_num": 0.06787109375, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 268204476, + "step": 4009 + }, + { + "epoch": 0.4550354609929078, + "grad_norm": 24.091373443603516, + "learning_rate": 5e-05, + "loss": 1.352, + "num_input_tokens_seen": 268271024, + "step": 4010 + }, + { + "epoch": 0.4550354609929078, + "loss": 1.3019554615020752, + "loss_ce": 0.00459220539778471, + "loss_iou": 0.55078125, + "loss_num": 0.039306640625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 268271024, + "step": 4010 + }, + { + "epoch": 0.4551489361702128, + "grad_norm": 24.440412521362305, + "learning_rate": 5e-05, + "loss": 1.2432, + "num_input_tokens_seen": 268338136, + "step": 4011 + }, + { + "epoch": 0.4551489361702128, + "loss": 1.1804425716400146, + "loss_ce": 0.007102716248482466, + "loss_iou": 0.498046875, + "loss_num": 0.035400390625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 268338136, + "step": 4011 + }, + { + "epoch": 0.4552624113475177, + "grad_norm": 22.3687801361084, + "learning_rate": 5e-05, + "loss": 1.3075, + "num_input_tokens_seen": 268404896, + "step": 4012 + }, + { + "epoch": 0.4552624113475177, + "loss": 1.3718141317367554, + "loss_ce": 0.007556311786174774, + "loss_iou": 0.55078125, + "loss_num": 0.05322265625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 268404896, + "step": 4012 + }, + { + "epoch": 0.4553758865248227, + "grad_norm": 26.711294174194336, + "learning_rate": 5e-05, + "loss": 1.312, + "num_input_tokens_seen": 268472120, + "step": 4013 + }, + { + "epoch": 0.4553758865248227, + "loss": 1.3329081535339355, + "loss_ce": 0.004294881597161293, + "loss_iou": 0.5546875, + "loss_num": 0.04443359375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 268472120, + "step": 4013 + }, + { + "epoch": 0.45548936170212767, + "grad_norm": 55.22983932495117, + "learning_rate": 5e-05, + "loss": 1.5489, + "num_input_tokens_seen": 268540108, + "step": 4014 + }, + { + "epoch": 0.45548936170212767, + "loss": 1.5134241580963135, + "loss_ce": 0.010494494810700417, + "loss_iou": 0.578125, + "loss_num": 0.068359375, + "loss_xval": 1.5, + "num_input_tokens_seen": 268540108, + "step": 4014 + }, + { + "epoch": 0.45560283687943265, + "grad_norm": 36.640380859375, + "learning_rate": 5e-05, + "loss": 1.2023, + "num_input_tokens_seen": 268607644, + "step": 4015 + }, + { + "epoch": 0.45560283687943265, + "loss": 1.1000241041183472, + "loss_ce": 0.006274166516959667, + "loss_iou": 0.482421875, + "loss_num": 0.0260009765625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 268607644, + "step": 4015 + }, + { + "epoch": 0.45571631205673757, + "grad_norm": 35.433067321777344, + "learning_rate": 5e-05, + "loss": 1.2518, + "num_input_tokens_seen": 268675296, + "step": 4016 + }, + { + "epoch": 0.45571631205673757, + "loss": 1.2095667123794556, + "loss_ce": 0.003023667261004448, + "loss_iou": 0.53515625, + "loss_num": 0.0269775390625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 268675296, + "step": 4016 + }, + { + "epoch": 0.45582978723404255, + "grad_norm": 27.163259506225586, + "learning_rate": 5e-05, + "loss": 1.304, + "num_input_tokens_seen": 268742088, + "step": 4017 + }, + { + "epoch": 0.45582978723404255, + "loss": 1.0856212377548218, + "loss_ce": 0.01067007053643465, + "loss_iou": 0.43359375, + "loss_num": 0.0419921875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 268742088, + "step": 4017 + }, + { + "epoch": 0.4559432624113475, + "grad_norm": 28.282894134521484, + "learning_rate": 5e-05, + "loss": 1.3266, + "num_input_tokens_seen": 268808888, + "step": 4018 + }, + { + "epoch": 0.4559432624113475, + "loss": 1.2835344076156616, + "loss_ce": 0.00814381055533886, + "loss_iou": 0.5078125, + "loss_num": 0.052001953125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 268808888, + "step": 4018 + }, + { + "epoch": 0.4560567375886525, + "grad_norm": 30.046329498291016, + "learning_rate": 5e-05, + "loss": 1.2949, + "num_input_tokens_seen": 268875404, + "step": 4019 + }, + { + "epoch": 0.4560567375886525, + "loss": 1.2758122682571411, + "loss_ce": 0.006769356317818165, + "loss_iou": 0.5390625, + "loss_num": 0.038330078125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 268875404, + "step": 4019 + }, + { + "epoch": 0.45617021276595743, + "grad_norm": 24.812175750732422, + "learning_rate": 5e-05, + "loss": 1.4461, + "num_input_tokens_seen": 268942520, + "step": 4020 + }, + { + "epoch": 0.45617021276595743, + "loss": 1.4196116924285889, + "loss_ce": 0.011164514347910881, + "loss_iou": 0.57421875, + "loss_num": 0.0517578125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 268942520, + "step": 4020 + }, + { + "epoch": 0.4562836879432624, + "grad_norm": 13.625162124633789, + "learning_rate": 5e-05, + "loss": 1.0906, + "num_input_tokens_seen": 269010484, + "step": 4021 + }, + { + "epoch": 0.4562836879432624, + "loss": 0.9341651201248169, + "loss_ce": 0.002951746340841055, + "loss_iou": 0.408203125, + "loss_num": 0.0230712890625, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 269010484, + "step": 4021 + }, + { + "epoch": 0.4563971631205674, + "grad_norm": 36.7103157043457, + "learning_rate": 5e-05, + "loss": 1.2307, + "num_input_tokens_seen": 269076732, + "step": 4022 + }, + { + "epoch": 0.4563971631205674, + "loss": 1.2338106632232666, + "loss_ce": 0.00334182963706553, + "loss_iou": 0.52734375, + "loss_num": 0.03564453125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 269076732, + "step": 4022 + }, + { + "epoch": 0.45651063829787236, + "grad_norm": 92.6868667602539, + "learning_rate": 5e-05, + "loss": 1.3361, + "num_input_tokens_seen": 269143812, + "step": 4023 + }, + { + "epoch": 0.45651063829787236, + "loss": 1.1804176568984985, + "loss_ce": 0.003659783164039254, + "loss_iou": 0.5, + "loss_num": 0.03515625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 269143812, + "step": 4023 + }, + { + "epoch": 0.4566241134751773, + "grad_norm": 28.96554183959961, + "learning_rate": 5e-05, + "loss": 1.398, + "num_input_tokens_seen": 269210588, + "step": 4024 + }, + { + "epoch": 0.4566241134751773, + "loss": 1.4178167581558228, + "loss_ce": 0.0042425538413226604, + "loss_iou": 0.5703125, + "loss_num": 0.05517578125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 269210588, + "step": 4024 + }, + { + "epoch": 0.45673758865248226, + "grad_norm": 26.519437789916992, + "learning_rate": 5e-05, + "loss": 1.148, + "num_input_tokens_seen": 269277984, + "step": 4025 + }, + { + "epoch": 0.45673758865248226, + "loss": 1.2287631034851074, + "loss_ce": 0.008059990592300892, + "loss_iou": 0.515625, + "loss_num": 0.037841796875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 269277984, + "step": 4025 + }, + { + "epoch": 0.45685106382978724, + "grad_norm": 32.012203216552734, + "learning_rate": 5e-05, + "loss": 1.3448, + "num_input_tokens_seen": 269344248, + "step": 4026 + }, + { + "epoch": 0.45685106382978724, + "loss": 1.391566514968872, + "loss_ce": 0.006312641315162182, + "loss_iou": 0.5703125, + "loss_num": 0.049072265625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 269344248, + "step": 4026 + }, + { + "epoch": 0.4569645390070922, + "grad_norm": 25.354175567626953, + "learning_rate": 5e-05, + "loss": 1.2446, + "num_input_tokens_seen": 269411308, + "step": 4027 + }, + { + "epoch": 0.4569645390070922, + "loss": 1.3909238576889038, + "loss_ce": 0.0066465213894844055, + "loss_iou": 0.58984375, + "loss_num": 0.04052734375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 269411308, + "step": 4027 + }, + { + "epoch": 0.45707801418439714, + "grad_norm": 27.077184677124023, + "learning_rate": 5e-05, + "loss": 1.1963, + "num_input_tokens_seen": 269477932, + "step": 4028 + }, + { + "epoch": 0.45707801418439714, + "loss": 1.290672779083252, + "loss_ce": 0.0045400457456707954, + "loss_iou": 0.54296875, + "loss_num": 0.040283203125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 269477932, + "step": 4028 + }, + { + "epoch": 0.4571914893617021, + "grad_norm": 25.52065658569336, + "learning_rate": 5e-05, + "loss": 1.2755, + "num_input_tokens_seen": 269544648, + "step": 4029 + }, + { + "epoch": 0.4571914893617021, + "loss": 1.2241711616516113, + "loss_ce": 0.004932872019708157, + "loss_iou": 0.5, + "loss_num": 0.0439453125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 269544648, + "step": 4029 + }, + { + "epoch": 0.4573049645390071, + "grad_norm": 21.74411964416504, + "learning_rate": 5e-05, + "loss": 1.376, + "num_input_tokens_seen": 269611180, + "step": 4030 + }, + { + "epoch": 0.4573049645390071, + "loss": 1.4248660802841187, + "loss_ce": 0.005920803174376488, + "loss_iou": 0.5859375, + "loss_num": 0.04931640625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 269611180, + "step": 4030 + }, + { + "epoch": 0.4574184397163121, + "grad_norm": 26.62788200378418, + "learning_rate": 5e-05, + "loss": 1.0889, + "num_input_tokens_seen": 269678496, + "step": 4031 + }, + { + "epoch": 0.4574184397163121, + "loss": 1.1488984823226929, + "loss_ce": 0.00216995389200747, + "loss_iou": 0.482421875, + "loss_num": 0.036376953125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 269678496, + "step": 4031 + }, + { + "epoch": 0.457531914893617, + "grad_norm": 26.726985931396484, + "learning_rate": 5e-05, + "loss": 1.1739, + "num_input_tokens_seen": 269745596, + "step": 4032 + }, + { + "epoch": 0.457531914893617, + "loss": 1.153306007385254, + "loss_ce": 0.005845041014254093, + "loss_iou": 0.51171875, + "loss_num": 0.025146484375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 269745596, + "step": 4032 + }, + { + "epoch": 0.457645390070922, + "grad_norm": 29.7810001373291, + "learning_rate": 5e-05, + "loss": 1.4765, + "num_input_tokens_seen": 269812676, + "step": 4033 + }, + { + "epoch": 0.457645390070922, + "loss": 1.5096756219863892, + "loss_ce": 0.004792753607034683, + "loss_iou": 0.609375, + "loss_num": 0.057373046875, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 269812676, + "step": 4033 + }, + { + "epoch": 0.45775886524822695, + "grad_norm": 40.97587203979492, + "learning_rate": 5e-05, + "loss": 1.4726, + "num_input_tokens_seen": 269879028, + "step": 4034 + }, + { + "epoch": 0.45775886524822695, + "loss": 1.4895638227462769, + "loss_ce": 0.0051888064481318, + "loss_iou": 0.61328125, + "loss_num": 0.0517578125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 269879028, + "step": 4034 + }, + { + "epoch": 0.45787234042553193, + "grad_norm": 34.75934982299805, + "learning_rate": 5e-05, + "loss": 1.3908, + "num_input_tokens_seen": 269947016, + "step": 4035 + }, + { + "epoch": 0.45787234042553193, + "loss": 1.597087025642395, + "loss_ce": 0.004801860544830561, + "loss_iou": 0.625, + "loss_num": 0.06884765625, + "loss_xval": 1.59375, + "num_input_tokens_seen": 269947016, + "step": 4035 + }, + { + "epoch": 0.45798581560283685, + "grad_norm": 30.013032913208008, + "learning_rate": 5e-05, + "loss": 1.4581, + "num_input_tokens_seen": 270013444, + "step": 4036 + }, + { + "epoch": 0.45798581560283685, + "loss": 1.581665277481079, + "loss_ce": 0.009399740025401115, + "loss_iou": 0.6640625, + "loss_num": 0.0478515625, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 270013444, + "step": 4036 + }, + { + "epoch": 0.45809929078014183, + "grad_norm": 21.580890655517578, + "learning_rate": 5e-05, + "loss": 1.3247, + "num_input_tokens_seen": 270080952, + "step": 4037 + }, + { + "epoch": 0.45809929078014183, + "loss": 1.2539926767349243, + "loss_ce": 0.004969193134456873, + "loss_iou": 0.5390625, + "loss_num": 0.033203125, + "loss_xval": 1.25, + "num_input_tokens_seen": 270080952, + "step": 4037 + }, + { + "epoch": 0.4582127659574468, + "grad_norm": 31.507701873779297, + "learning_rate": 5e-05, + "loss": 1.3165, + "num_input_tokens_seen": 270149024, + "step": 4038 + }, + { + "epoch": 0.4582127659574468, + "loss": 1.4703044891357422, + "loss_ce": 0.008390495553612709, + "loss_iou": 0.6015625, + "loss_num": 0.052490234375, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 270149024, + "step": 4038 + }, + { + "epoch": 0.4583262411347518, + "grad_norm": 26.023910522460938, + "learning_rate": 5e-05, + "loss": 1.2508, + "num_input_tokens_seen": 270215876, + "step": 4039 + }, + { + "epoch": 0.4583262411347518, + "loss": 1.0076394081115723, + "loss_ce": 0.00886010006070137, + "loss_iou": 0.40625, + "loss_num": 0.037353515625, + "loss_xval": 1.0, + "num_input_tokens_seen": 270215876, + "step": 4039 + }, + { + "epoch": 0.4584397163120567, + "grad_norm": 28.95343589782715, + "learning_rate": 5e-05, + "loss": 1.2747, + "num_input_tokens_seen": 270283220, + "step": 4040 + }, + { + "epoch": 0.4584397163120567, + "loss": 1.2360446453094482, + "loss_ce": 0.00557589391246438, + "loss_iou": 0.4765625, + "loss_num": 0.0556640625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 270283220, + "step": 4040 + }, + { + "epoch": 0.4585531914893617, + "grad_norm": 53.88239669799805, + "learning_rate": 5e-05, + "loss": 1.1588, + "num_input_tokens_seen": 270350060, + "step": 4041 + }, + { + "epoch": 0.4585531914893617, + "loss": 1.2402421236038208, + "loss_ce": 0.00879687163978815, + "loss_iou": 0.5, + "loss_num": 0.046142578125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 270350060, + "step": 4041 + }, + { + "epoch": 0.45866666666666667, + "grad_norm": 26.397493362426758, + "learning_rate": 5e-05, + "loss": 1.1818, + "num_input_tokens_seen": 270416012, + "step": 4042 + }, + { + "epoch": 0.45866666666666667, + "loss": 1.0467203855514526, + "loss_ce": 0.005460631567984819, + "loss_iou": 0.43359375, + "loss_num": 0.03466796875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 270416012, + "step": 4042 + }, + { + "epoch": 0.45878014184397164, + "grad_norm": 22.08910369873047, + "learning_rate": 5e-05, + "loss": 1.2166, + "num_input_tokens_seen": 270482636, + "step": 4043 + }, + { + "epoch": 0.45878014184397164, + "loss": 1.2263262271881104, + "loss_ce": 0.007576304022222757, + "loss_iou": 0.5234375, + "loss_num": 0.03369140625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 270482636, + "step": 4043 + }, + { + "epoch": 0.4588936170212766, + "grad_norm": 26.611331939697266, + "learning_rate": 5e-05, + "loss": 1.2981, + "num_input_tokens_seen": 270550524, + "step": 4044 + }, + { + "epoch": 0.4588936170212766, + "loss": 1.2661998271942139, + "loss_ce": 0.006922547239810228, + "loss_iou": 0.515625, + "loss_num": 0.045166015625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 270550524, + "step": 4044 + }, + { + "epoch": 0.45900709219858155, + "grad_norm": 21.03318214416504, + "learning_rate": 5e-05, + "loss": 1.1064, + "num_input_tokens_seen": 270617128, + "step": 4045 + }, + { + "epoch": 0.45900709219858155, + "loss": 1.1833105087280273, + "loss_ce": 0.006552664563059807, + "loss_iou": 0.50390625, + "loss_num": 0.033447265625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 270617128, + "step": 4045 + }, + { + "epoch": 0.4591205673758865, + "grad_norm": 18.550209045410156, + "learning_rate": 5e-05, + "loss": 1.1655, + "num_input_tokens_seen": 270683280, + "step": 4046 + }, + { + "epoch": 0.4591205673758865, + "loss": 1.2287228107452393, + "loss_ce": 0.005089951679110527, + "loss_iou": 0.48046875, + "loss_num": 0.052490234375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 270683280, + "step": 4046 + }, + { + "epoch": 0.4592340425531915, + "grad_norm": 21.027761459350586, + "learning_rate": 5e-05, + "loss": 1.3419, + "num_input_tokens_seen": 270749724, + "step": 4047 + }, + { + "epoch": 0.4592340425531915, + "loss": 1.290027141571045, + "loss_ce": 0.00926540419459343, + "loss_iou": 0.51953125, + "loss_num": 0.0478515625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 270749724, + "step": 4047 + }, + { + "epoch": 0.4593475177304965, + "grad_norm": 30.73868751525879, + "learning_rate": 5e-05, + "loss": 1.3666, + "num_input_tokens_seen": 270815804, + "step": 4048 + }, + { + "epoch": 0.4593475177304965, + "loss": 1.2662897109985352, + "loss_ce": 0.007988996803760529, + "loss_iou": 0.53515625, + "loss_num": 0.037353515625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 270815804, + "step": 4048 + }, + { + "epoch": 0.4594609929078014, + "grad_norm": 59.48126983642578, + "learning_rate": 5e-05, + "loss": 1.4942, + "num_input_tokens_seen": 270883008, + "step": 4049 + }, + { + "epoch": 0.4594609929078014, + "loss": 1.6129093170166016, + "loss_ce": 0.007440617773681879, + "loss_iou": 0.6953125, + "loss_num": 0.04248046875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 270883008, + "step": 4049 + }, + { + "epoch": 0.4595744680851064, + "grad_norm": 24.763750076293945, + "learning_rate": 5e-05, + "loss": 1.1129, + "num_input_tokens_seen": 270949856, + "step": 4050 + }, + { + "epoch": 0.4595744680851064, + "loss": 0.9265837669372559, + "loss_ce": 0.003243934828788042, + "loss_iou": 0.400390625, + "loss_num": 0.024658203125, + "loss_xval": 0.921875, + "num_input_tokens_seen": 270949856, + "step": 4050 + }, + { + "epoch": 0.45968794326241136, + "grad_norm": 33.522090911865234, + "learning_rate": 5e-05, + "loss": 1.3731, + "num_input_tokens_seen": 271016672, + "step": 4051 + }, + { + "epoch": 0.45968794326241136, + "loss": 1.3869907855987549, + "loss_ce": 0.0027134050615131855, + "loss_iou": 0.55078125, + "loss_num": 0.055908203125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 271016672, + "step": 4051 + }, + { + "epoch": 0.45980141843971634, + "grad_norm": 33.11986541748047, + "learning_rate": 5e-05, + "loss": 1.2717, + "num_input_tokens_seen": 271083544, + "step": 4052 + }, + { + "epoch": 0.45980141843971634, + "loss": 1.0975366830825806, + "loss_ce": 0.004763238597661257, + "loss_iou": 0.4453125, + "loss_num": 0.0400390625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 271083544, + "step": 4052 + }, + { + "epoch": 0.45991489361702126, + "grad_norm": 28.280176162719727, + "learning_rate": 5e-05, + "loss": 1.3928, + "num_input_tokens_seen": 271150628, + "step": 4053 + }, + { + "epoch": 0.45991489361702126, + "loss": 1.2493016719818115, + "loss_ce": 0.004672844894230366, + "loss_iou": 0.5390625, + "loss_num": 0.033935546875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 271150628, + "step": 4053 + }, + { + "epoch": 0.46002836879432624, + "grad_norm": 23.645994186401367, + "learning_rate": 5e-05, + "loss": 1.247, + "num_input_tokens_seen": 271217240, + "step": 4054 + }, + { + "epoch": 0.46002836879432624, + "loss": 1.0710015296936035, + "loss_ce": 0.003618766786530614, + "loss_iou": 0.4453125, + "loss_num": 0.03564453125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 271217240, + "step": 4054 + }, + { + "epoch": 0.4601418439716312, + "grad_norm": 28.146467208862305, + "learning_rate": 5e-05, + "loss": 1.0243, + "num_input_tokens_seen": 271284324, + "step": 4055 + }, + { + "epoch": 0.4601418439716312, + "loss": 1.0616602897644043, + "loss_ce": 0.003310779109597206, + "loss_iou": 0.427734375, + "loss_num": 0.04052734375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 271284324, + "step": 4055 + }, + { + "epoch": 0.4602553191489362, + "grad_norm": 30.795185089111328, + "learning_rate": 5e-05, + "loss": 1.2185, + "num_input_tokens_seen": 271350592, + "step": 4056 + }, + { + "epoch": 0.4602553191489362, + "loss": 1.277483344078064, + "loss_ce": 0.0031302536372095346, + "loss_iou": 0.5, + "loss_num": 0.05517578125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 271350592, + "step": 4056 + }, + { + "epoch": 0.4603687943262411, + "grad_norm": 91.35368347167969, + "learning_rate": 5e-05, + "loss": 1.3522, + "num_input_tokens_seen": 271417052, + "step": 4057 + }, + { + "epoch": 0.4603687943262411, + "loss": 1.606946349143982, + "loss_ce": 0.0063603725284338, + "loss_iou": 0.60546875, + "loss_num": 0.07763671875, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 271417052, + "step": 4057 + }, + { + "epoch": 0.4604822695035461, + "grad_norm": 30.62421989440918, + "learning_rate": 5e-05, + "loss": 1.2306, + "num_input_tokens_seen": 271483652, + "step": 4058 + }, + { + "epoch": 0.4604822695035461, + "loss": 1.312777042388916, + "loss_ce": 0.009554367512464523, + "loss_iou": 0.53515625, + "loss_num": 0.046630859375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 271483652, + "step": 4058 + }, + { + "epoch": 0.46059574468085107, + "grad_norm": 28.08611488342285, + "learning_rate": 5e-05, + "loss": 1.3757, + "num_input_tokens_seen": 271550392, + "step": 4059 + }, + { + "epoch": 0.46059574468085107, + "loss": 1.5494489669799805, + "loss_ce": 0.009898154065012932, + "loss_iou": 0.6015625, + "loss_num": 0.0673828125, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 271550392, + "step": 4059 + }, + { + "epoch": 0.46070921985815605, + "grad_norm": 34.69454574584961, + "learning_rate": 5e-05, + "loss": 1.0776, + "num_input_tokens_seen": 271617336, + "step": 4060 + }, + { + "epoch": 0.46070921985815605, + "loss": 0.9986217617988586, + "loss_ce": 0.003992854617536068, + "loss_iou": 0.40625, + "loss_num": 0.0361328125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 271617336, + "step": 4060 + }, + { + "epoch": 0.46082269503546097, + "grad_norm": 27.6487979888916, + "learning_rate": 5e-05, + "loss": 1.2034, + "num_input_tokens_seen": 271683204, + "step": 4061 + }, + { + "epoch": 0.46082269503546097, + "loss": 1.3394508361816406, + "loss_ce": 0.004489899147301912, + "loss_iou": 0.5703125, + "loss_num": 0.0390625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 271683204, + "step": 4061 + }, + { + "epoch": 0.46093617021276595, + "grad_norm": 31.033349990844727, + "learning_rate": 5e-05, + "loss": 1.2683, + "num_input_tokens_seen": 271750216, + "step": 4062 + }, + { + "epoch": 0.46093617021276595, + "loss": 1.2661283016204834, + "loss_ce": 0.004897888284176588, + "loss_iou": 0.56640625, + "loss_num": 0.0262451171875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 271750216, + "step": 4062 + }, + { + "epoch": 0.46104964539007093, + "grad_norm": 27.249887466430664, + "learning_rate": 5e-05, + "loss": 1.2893, + "num_input_tokens_seen": 271817444, + "step": 4063 + }, + { + "epoch": 0.46104964539007093, + "loss": 1.2579725980758667, + "loss_ce": 0.004066328518092632, + "loss_iou": 0.546875, + "loss_num": 0.03271484375, + "loss_xval": 1.25, + "num_input_tokens_seen": 271817444, + "step": 4063 + }, + { + "epoch": 0.4611631205673759, + "grad_norm": 12.99606704711914, + "learning_rate": 5e-05, + "loss": 1.1662, + "num_input_tokens_seen": 271884000, + "step": 4064 + }, + { + "epoch": 0.4611631205673759, + "loss": 1.1611132621765137, + "loss_ce": 0.00437502097338438, + "loss_iou": 0.490234375, + "loss_num": 0.034912109375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 271884000, + "step": 4064 + }, + { + "epoch": 0.46127659574468083, + "grad_norm": 46.52296829223633, + "learning_rate": 5e-05, + "loss": 1.1942, + "num_input_tokens_seen": 271950352, + "step": 4065 + }, + { + "epoch": 0.46127659574468083, + "loss": 1.2044768333435059, + "loss_ce": 0.005258125718683004, + "loss_iou": 0.51171875, + "loss_num": 0.034912109375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 271950352, + "step": 4065 + }, + { + "epoch": 0.4613900709219858, + "grad_norm": 39.753517150878906, + "learning_rate": 5e-05, + "loss": 1.3366, + "num_input_tokens_seen": 272017168, + "step": 4066 + }, + { + "epoch": 0.4613900709219858, + "loss": 1.2834841012954712, + "loss_ce": 0.0027224132791161537, + "loss_iou": 0.54296875, + "loss_num": 0.038330078125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 272017168, + "step": 4066 + }, + { + "epoch": 0.4615035460992908, + "grad_norm": 24.709232330322266, + "learning_rate": 5e-05, + "loss": 1.3489, + "num_input_tokens_seen": 272083804, + "step": 4067 + }, + { + "epoch": 0.4615035460992908, + "loss": 1.3781089782714844, + "loss_ce": 0.03338247165083885, + "loss_iou": 0.54296875, + "loss_num": 0.052490234375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 272083804, + "step": 4067 + }, + { + "epoch": 0.46161702127659576, + "grad_norm": 19.322294235229492, + "learning_rate": 5e-05, + "loss": 1.0931, + "num_input_tokens_seen": 272152032, + "step": 4068 + }, + { + "epoch": 0.46161702127659576, + "loss": 0.9768104553222656, + "loss_ce": 0.0026893734466284513, + "loss_iou": 0.416015625, + "loss_num": 0.0283203125, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 272152032, + "step": 4068 + }, + { + "epoch": 0.4617304964539007, + "grad_norm": 32.57515335083008, + "learning_rate": 5e-05, + "loss": 1.2105, + "num_input_tokens_seen": 272218676, + "step": 4069 + }, + { + "epoch": 0.4617304964539007, + "loss": 1.360945701599121, + "loss_ce": 0.008406629785895348, + "loss_iou": 0.55859375, + "loss_num": 0.047119140625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 272218676, + "step": 4069 + }, + { + "epoch": 0.46184397163120566, + "grad_norm": 36.09178161621094, + "learning_rate": 5e-05, + "loss": 1.3767, + "num_input_tokens_seen": 272285660, + "step": 4070 + }, + { + "epoch": 0.46184397163120566, + "loss": 1.3965824842453003, + "loss_ce": 0.004980886820703745, + "loss_iou": 0.57421875, + "loss_num": 0.048095703125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 272285660, + "step": 4070 + }, + { + "epoch": 0.46195744680851064, + "grad_norm": 28.07522201538086, + "learning_rate": 5e-05, + "loss": 1.4052, + "num_input_tokens_seen": 272352656, + "step": 4071 + }, + { + "epoch": 0.46195744680851064, + "loss": 1.3222568035125732, + "loss_ce": 0.011587836779654026, + "loss_iou": 0.546875, + "loss_num": 0.043701171875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 272352656, + "step": 4071 + }, + { + "epoch": 0.4620709219858156, + "grad_norm": 35.95526885986328, + "learning_rate": 5e-05, + "loss": 1.2349, + "num_input_tokens_seen": 272419420, + "step": 4072 + }, + { + "epoch": 0.4620709219858156, + "loss": 1.0171478986740112, + "loss_ce": 0.00787058100104332, + "loss_iou": 0.41796875, + "loss_num": 0.03515625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 272419420, + "step": 4072 + }, + { + "epoch": 0.46218439716312054, + "grad_norm": 30.683923721313477, + "learning_rate": 5e-05, + "loss": 1.4929, + "num_input_tokens_seen": 272486732, + "step": 4073 + }, + { + "epoch": 0.46218439716312054, + "loss": 1.355893611907959, + "loss_ce": 0.010678868740797043, + "loss_iou": 0.58984375, + "loss_num": 0.033203125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 272486732, + "step": 4073 + }, + { + "epoch": 0.4622978723404255, + "grad_norm": 23.2539119720459, + "learning_rate": 5e-05, + "loss": 1.2086, + "num_input_tokens_seen": 272553084, + "step": 4074 + }, + { + "epoch": 0.4622978723404255, + "loss": 1.0455880165100098, + "loss_ce": 0.0072578550316393375, + "loss_iou": 0.42578125, + "loss_num": 0.037841796875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 272553084, + "step": 4074 + }, + { + "epoch": 0.4624113475177305, + "grad_norm": 22.944137573242188, + "learning_rate": 5e-05, + "loss": 1.1532, + "num_input_tokens_seen": 272619724, + "step": 4075 + }, + { + "epoch": 0.4624113475177305, + "loss": 1.0604920387268066, + "loss_ce": 0.004339755512773991, + "loss_iou": 0.46875, + "loss_num": 0.02392578125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 272619724, + "step": 4075 + }, + { + "epoch": 0.4625248226950355, + "grad_norm": 22.09409523010254, + "learning_rate": 5e-05, + "loss": 1.0989, + "num_input_tokens_seen": 272685788, + "step": 4076 + }, + { + "epoch": 0.4625248226950355, + "loss": 1.2686275243759155, + "loss_ce": 0.004467370919883251, + "loss_iou": 0.52734375, + "loss_num": 0.041748046875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 272685788, + "step": 4076 + }, + { + "epoch": 0.4626382978723404, + "grad_norm": 17.235736846923828, + "learning_rate": 5e-05, + "loss": 0.9054, + "num_input_tokens_seen": 272753476, + "step": 4077 + }, + { + "epoch": 0.4626382978723404, + "loss": 0.9642890095710754, + "loss_ce": 0.004328079521656036, + "loss_iou": 0.4296875, + "loss_num": 0.020263671875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 272753476, + "step": 4077 + }, + { + "epoch": 0.4627517730496454, + "grad_norm": 28.26335906982422, + "learning_rate": 5e-05, + "loss": 1.1068, + "num_input_tokens_seen": 272819972, + "step": 4078 + }, + { + "epoch": 0.4627517730496454, + "loss": 1.0593308210372925, + "loss_ce": 0.006108212284743786, + "loss_iou": 0.423828125, + "loss_num": 0.041015625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 272819972, + "step": 4078 + }, + { + "epoch": 0.46286524822695035, + "grad_norm": 24.419002532958984, + "learning_rate": 5e-05, + "loss": 1.1381, + "num_input_tokens_seen": 272886588, + "step": 4079 + }, + { + "epoch": 0.46286524822695035, + "loss": 1.0179811716079712, + "loss_ce": 0.004309279844164848, + "loss_iou": 0.46875, + "loss_num": 0.0157470703125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 272886588, + "step": 4079 + }, + { + "epoch": 0.46297872340425533, + "grad_norm": 28.700420379638672, + "learning_rate": 5e-05, + "loss": 1.3173, + "num_input_tokens_seen": 272954560, + "step": 4080 + }, + { + "epoch": 0.46297872340425533, + "loss": 1.3840370178222656, + "loss_ce": 0.0061073023825883865, + "loss_iou": 0.58203125, + "loss_num": 0.0419921875, + "loss_xval": 1.375, + "num_input_tokens_seen": 272954560, + "step": 4080 + }, + { + "epoch": 0.4630921985815603, + "grad_norm": 34.60419464111328, + "learning_rate": 5e-05, + "loss": 1.4483, + "num_input_tokens_seen": 273022276, + "step": 4081 + }, + { + "epoch": 0.4630921985815603, + "loss": 1.5664582252502441, + "loss_ce": 0.005911408457905054, + "loss_iou": 0.6640625, + "loss_num": 0.04638671875, + "loss_xval": 1.5625, + "num_input_tokens_seen": 273022276, + "step": 4081 + }, + { + "epoch": 0.46320567375886523, + "grad_norm": 23.84861183166504, + "learning_rate": 5e-05, + "loss": 1.633, + "num_input_tokens_seen": 273088280, + "step": 4082 + }, + { + "epoch": 0.46320567375886523, + "loss": 1.4512419700622559, + "loss_ce": 0.008859079331159592, + "loss_iou": 0.6015625, + "loss_num": 0.04736328125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 273088280, + "step": 4082 + }, + { + "epoch": 0.4633191489361702, + "grad_norm": 18.790910720825195, + "learning_rate": 5e-05, + "loss": 1.1737, + "num_input_tokens_seen": 273155268, + "step": 4083 + }, + { + "epoch": 0.4633191489361702, + "loss": 1.0926082134246826, + "loss_ce": 0.00825769267976284, + "loss_iou": 0.466796875, + "loss_num": 0.0301513671875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 273155268, + "step": 4083 + }, + { + "epoch": 0.4634326241134752, + "grad_norm": 19.206079483032227, + "learning_rate": 5e-05, + "loss": 1.0487, + "num_input_tokens_seen": 273222380, + "step": 4084 + }, + { + "epoch": 0.4634326241134752, + "loss": 1.2261240482330322, + "loss_ce": 0.005909159779548645, + "loss_iou": 0.49609375, + "loss_num": 0.04541015625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 273222380, + "step": 4084 + }, + { + "epoch": 0.46354609929078017, + "grad_norm": 20.131467819213867, + "learning_rate": 5e-05, + "loss": 1.1598, + "num_input_tokens_seen": 273288796, + "step": 4085 + }, + { + "epoch": 0.46354609929078017, + "loss": 1.0574923753738403, + "loss_ce": 0.007199333515018225, + "loss_iou": 0.4375, + "loss_num": 0.034912109375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 273288796, + "step": 4085 + }, + { + "epoch": 0.4636595744680851, + "grad_norm": 21.020326614379883, + "learning_rate": 5e-05, + "loss": 1.2063, + "num_input_tokens_seen": 273356584, + "step": 4086 + }, + { + "epoch": 0.4636595744680851, + "loss": 1.290401816368103, + "loss_ce": 0.012081491760909557, + "loss_iou": 0.51953125, + "loss_num": 0.047119140625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 273356584, + "step": 4086 + }, + { + "epoch": 0.46377304964539007, + "grad_norm": 20.254268646240234, + "learning_rate": 5e-05, + "loss": 0.9415, + "num_input_tokens_seen": 273422824, + "step": 4087 + }, + { + "epoch": 0.46377304964539007, + "loss": 0.9717677235603333, + "loss_ce": 0.006191561929881573, + "loss_iou": 0.3984375, + "loss_num": 0.033935546875, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 273422824, + "step": 4087 + }, + { + "epoch": 0.46388652482269505, + "grad_norm": 297.9291076660156, + "learning_rate": 5e-05, + "loss": 1.2541, + "num_input_tokens_seen": 273489804, + "step": 4088 + }, + { + "epoch": 0.46388652482269505, + "loss": 1.324453592300415, + "loss_ce": 0.007559106685221195, + "loss_iou": 0.5703125, + "loss_num": 0.03466796875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 273489804, + "step": 4088 + }, + { + "epoch": 0.464, + "grad_norm": 24.482833862304688, + "learning_rate": 5e-05, + "loss": 1.2913, + "num_input_tokens_seen": 273557204, + "step": 4089 + }, + { + "epoch": 0.464, + "loss": 1.194167137145996, + "loss_ce": 0.00764372618868947, + "loss_iou": 0.49609375, + "loss_num": 0.039306640625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 273557204, + "step": 4089 + }, + { + "epoch": 0.46411347517730495, + "grad_norm": 30.301786422729492, + "learning_rate": 5e-05, + "loss": 1.253, + "num_input_tokens_seen": 273623364, + "step": 4090 + }, + { + "epoch": 0.46411347517730495, + "loss": 1.3735941648483276, + "loss_ce": 0.008054656907916069, + "loss_iou": 0.51953125, + "loss_num": 0.064453125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 273623364, + "step": 4090 + }, + { + "epoch": 0.4642269503546099, + "grad_norm": 36.195892333984375, + "learning_rate": 5e-05, + "loss": 1.2594, + "num_input_tokens_seen": 273689844, + "step": 4091 + }, + { + "epoch": 0.4642269503546099, + "loss": 1.242626667022705, + "loss_ce": 0.008495882153511047, + "loss_iou": 0.50390625, + "loss_num": 0.045166015625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 273689844, + "step": 4091 + }, + { + "epoch": 0.4643404255319149, + "grad_norm": 29.24736976623535, + "learning_rate": 5e-05, + "loss": 1.3423, + "num_input_tokens_seen": 273756424, + "step": 4092 + }, + { + "epoch": 0.4643404255319149, + "loss": 1.2885832786560059, + "loss_ce": 0.002938689896836877, + "loss_iou": 0.546875, + "loss_num": 0.03857421875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 273756424, + "step": 4092 + }, + { + "epoch": 0.4644539007092199, + "grad_norm": 30.261350631713867, + "learning_rate": 5e-05, + "loss": 1.2029, + "num_input_tokens_seen": 273823108, + "step": 4093 + }, + { + "epoch": 0.4644539007092199, + "loss": 1.1865618228912354, + "loss_ce": 0.006141954101622105, + "loss_iou": 0.51171875, + "loss_num": 0.0322265625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 273823108, + "step": 4093 + }, + { + "epoch": 0.4645673758865248, + "grad_norm": 29.429407119750977, + "learning_rate": 5e-05, + "loss": 1.3587, + "num_input_tokens_seen": 273890116, + "step": 4094 + }, + { + "epoch": 0.4645673758865248, + "loss": 1.2234971523284912, + "loss_ce": 0.005723653361201286, + "loss_iou": 0.4765625, + "loss_num": 0.052734375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 273890116, + "step": 4094 + }, + { + "epoch": 0.4646808510638298, + "grad_norm": 29.130403518676758, + "learning_rate": 5e-05, + "loss": 1.5289, + "num_input_tokens_seen": 273957460, + "step": 4095 + }, + { + "epoch": 0.4646808510638298, + "loss": 1.4781147241592407, + "loss_ce": 0.006923330947756767, + "loss_iou": 0.58203125, + "loss_num": 0.060791015625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 273957460, + "step": 4095 + }, + { + "epoch": 0.46479432624113476, + "grad_norm": 26.221965789794922, + "learning_rate": 5e-05, + "loss": 1.5642, + "num_input_tokens_seen": 274022968, + "step": 4096 + }, + { + "epoch": 0.46479432624113476, + "loss": 1.7877981662750244, + "loss_ce": 0.007524746935814619, + "loss_iou": 0.703125, + "loss_num": 0.0751953125, + "loss_xval": 1.78125, + "num_input_tokens_seen": 274022968, + "step": 4096 + }, + { + "epoch": 0.46490780141843974, + "grad_norm": 37.2325439453125, + "learning_rate": 5e-05, + "loss": 1.181, + "num_input_tokens_seen": 274089876, + "step": 4097 + }, + { + "epoch": 0.46490780141843974, + "loss": 1.2996833324432373, + "loss_ce": 0.004273166414350271, + "loss_iou": 0.52734375, + "loss_num": 0.048828125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 274089876, + "step": 4097 + }, + { + "epoch": 0.46502127659574466, + "grad_norm": 25.866424560546875, + "learning_rate": 5e-05, + "loss": 1.3734, + "num_input_tokens_seen": 274158168, + "step": 4098 + }, + { + "epoch": 0.46502127659574466, + "loss": 1.4773056507110596, + "loss_ce": 0.004649454262107611, + "loss_iou": 0.62890625, + "loss_num": 0.04296875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 274158168, + "step": 4098 + }, + { + "epoch": 0.46513475177304964, + "grad_norm": 28.958913803100586, + "learning_rate": 5e-05, + "loss": 1.2702, + "num_input_tokens_seen": 274224336, + "step": 4099 + }, + { + "epoch": 0.46513475177304964, + "loss": 1.277779221534729, + "loss_ce": 0.007027272135019302, + "loss_iou": 0.47265625, + "loss_num": 0.06494140625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 274224336, + "step": 4099 + }, + { + "epoch": 0.4652482269503546, + "grad_norm": 19.561424255371094, + "learning_rate": 5e-05, + "loss": 1.4583, + "num_input_tokens_seen": 274291128, + "step": 4100 + }, + { + "epoch": 0.4652482269503546, + "loss": 1.4610974788665771, + "loss_ce": 0.005042693577706814, + "loss_iou": 0.6328125, + "loss_num": 0.037841796875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 274291128, + "step": 4100 + }, + { + "epoch": 0.4653617021276596, + "grad_norm": 16.69084358215332, + "learning_rate": 5e-05, + "loss": 1.135, + "num_input_tokens_seen": 274357188, + "step": 4101 + }, + { + "epoch": 0.4653617021276596, + "loss": 1.1783989667892456, + "loss_ce": 0.005791572853922844, + "loss_iou": 0.482421875, + "loss_num": 0.041748046875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 274357188, + "step": 4101 + }, + { + "epoch": 0.4654751773049645, + "grad_norm": 31.049409866333008, + "learning_rate": 5e-05, + "loss": 1.3815, + "num_input_tokens_seen": 274424848, + "step": 4102 + }, + { + "epoch": 0.4654751773049645, + "loss": 1.4538671970367432, + "loss_ce": 0.005624906625598669, + "loss_iou": 0.56640625, + "loss_num": 0.0634765625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 274424848, + "step": 4102 + }, + { + "epoch": 0.4655886524822695, + "grad_norm": 25.0222110748291, + "learning_rate": 5e-05, + "loss": 1.3286, + "num_input_tokens_seen": 274491852, + "step": 4103 + }, + { + "epoch": 0.4655886524822695, + "loss": 1.3634182214736938, + "loss_ce": 0.005019715055823326, + "loss_iou": 0.55859375, + "loss_num": 0.0478515625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 274491852, + "step": 4103 + }, + { + "epoch": 0.46570212765957447, + "grad_norm": 26.606271743774414, + "learning_rate": 5e-05, + "loss": 1.2884, + "num_input_tokens_seen": 274559028, + "step": 4104 + }, + { + "epoch": 0.46570212765957447, + "loss": 1.1247589588165283, + "loss_ce": 0.0036651361733675003, + "loss_iou": 0.470703125, + "loss_num": 0.0361328125, + "loss_xval": 1.125, + "num_input_tokens_seen": 274559028, + "step": 4104 + }, + { + "epoch": 0.46581560283687945, + "grad_norm": 33.399879455566406, + "learning_rate": 5e-05, + "loss": 1.2144, + "num_input_tokens_seen": 274625276, + "step": 4105 + }, + { + "epoch": 0.46581560283687945, + "loss": 1.4093761444091797, + "loss_ce": 0.005079240072518587, + "loss_iou": 0.56640625, + "loss_num": 0.054931640625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 274625276, + "step": 4105 + }, + { + "epoch": 0.4659290780141844, + "grad_norm": 24.42327117919922, + "learning_rate": 5e-05, + "loss": 1.5026, + "num_input_tokens_seen": 274691344, + "step": 4106 + }, + { + "epoch": 0.4659290780141844, + "loss": 1.603269338607788, + "loss_ce": 0.010495959781110287, + "loss_iou": 0.6640625, + "loss_num": 0.052490234375, + "loss_xval": 1.59375, + "num_input_tokens_seen": 274691344, + "step": 4106 + }, + { + "epoch": 0.46604255319148935, + "grad_norm": 38.777828216552734, + "learning_rate": 5e-05, + "loss": 1.0435, + "num_input_tokens_seen": 274758672, + "step": 4107 + }, + { + "epoch": 0.46604255319148935, + "loss": 1.1165508031845093, + "loss_ce": 0.004246128723025322, + "loss_iou": 0.443359375, + "loss_num": 0.045166015625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 274758672, + "step": 4107 + }, + { + "epoch": 0.46615602836879433, + "grad_norm": 25.172475814819336, + "learning_rate": 5e-05, + "loss": 1.2866, + "num_input_tokens_seen": 274825392, + "step": 4108 + }, + { + "epoch": 0.46615602836879433, + "loss": 1.3791258335113525, + "loss_ce": 0.0060789198614656925, + "loss_iou": 0.59765625, + "loss_num": 0.03564453125, + "loss_xval": 1.375, + "num_input_tokens_seen": 274825392, + "step": 4108 + }, + { + "epoch": 0.4662695035460993, + "grad_norm": 25.244394302368164, + "learning_rate": 5e-05, + "loss": 1.1768, + "num_input_tokens_seen": 274891912, + "step": 4109 + }, + { + "epoch": 0.4662695035460993, + "loss": 1.23469877243042, + "loss_ce": 0.008624636568129063, + "loss_iou": 0.51953125, + "loss_num": 0.036865234375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 274891912, + "step": 4109 + }, + { + "epoch": 0.46638297872340423, + "grad_norm": 29.668886184692383, + "learning_rate": 5e-05, + "loss": 1.2219, + "num_input_tokens_seen": 274958404, + "step": 4110 + }, + { + "epoch": 0.46638297872340423, + "loss": 1.2000375986099243, + "loss_ce": 0.0032602017745375633, + "loss_iou": 0.5, + "loss_num": 0.039306640625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 274958404, + "step": 4110 + }, + { + "epoch": 0.4664964539007092, + "grad_norm": 37.585750579833984, + "learning_rate": 5e-05, + "loss": 1.1815, + "num_input_tokens_seen": 275026424, + "step": 4111 + }, + { + "epoch": 0.4664964539007092, + "loss": 1.0805991888046265, + "loss_ce": 0.00540389958769083, + "loss_iou": 0.4765625, + "loss_num": 0.0250244140625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 275026424, + "step": 4111 + }, + { + "epoch": 0.4666099290780142, + "grad_norm": 28.923349380493164, + "learning_rate": 5e-05, + "loss": 1.3302, + "num_input_tokens_seen": 275092124, + "step": 4112 + }, + { + "epoch": 0.4666099290780142, + "loss": 1.3817387819290161, + "loss_ce": 0.009668467566370964, + "loss_iou": 0.54296875, + "loss_num": 0.057861328125, + "loss_xval": 1.375, + "num_input_tokens_seen": 275092124, + "step": 4112 + }, + { + "epoch": 0.46672340425531916, + "grad_norm": 27.46706199645996, + "learning_rate": 5e-05, + "loss": 1.2212, + "num_input_tokens_seen": 275158768, + "step": 4113 + }, + { + "epoch": 0.46672340425531916, + "loss": 1.1713414192199707, + "loss_ce": 0.007278924807906151, + "loss_iou": 0.51171875, + "loss_num": 0.02880859375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 275158768, + "step": 4113 + }, + { + "epoch": 0.46683687943262414, + "grad_norm": 32.50676345825195, + "learning_rate": 5e-05, + "loss": 1.3802, + "num_input_tokens_seen": 275225500, + "step": 4114 + }, + { + "epoch": 0.46683687943262414, + "loss": 1.381192684173584, + "loss_ce": 0.004727913532406092, + "loss_iou": 0.59765625, + "loss_num": 0.03662109375, + "loss_xval": 1.375, + "num_input_tokens_seen": 275225500, + "step": 4114 + }, + { + "epoch": 0.46695035460992906, + "grad_norm": 24.88094139099121, + "learning_rate": 5e-05, + "loss": 1.3856, + "num_input_tokens_seen": 275293212, + "step": 4115 + }, + { + "epoch": 0.46695035460992906, + "loss": 1.4853415489196777, + "loss_ce": 0.008290820755064487, + "loss_iou": 0.57421875, + "loss_num": 0.0654296875, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 275293212, + "step": 4115 + }, + { + "epoch": 0.46706382978723404, + "grad_norm": 29.348724365234375, + "learning_rate": 5e-05, + "loss": 1.3733, + "num_input_tokens_seen": 275360176, + "step": 4116 + }, + { + "epoch": 0.46706382978723404, + "loss": 1.426600456237793, + "loss_ce": 0.006190227344632149, + "loss_iou": 0.55859375, + "loss_num": 0.06103515625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 275360176, + "step": 4116 + }, + { + "epoch": 0.467177304964539, + "grad_norm": 25.558204650878906, + "learning_rate": 5e-05, + "loss": 1.0571, + "num_input_tokens_seen": 275426096, + "step": 4117 + }, + { + "epoch": 0.467177304964539, + "loss": 1.2505136728286743, + "loss_ce": 0.006861294619739056, + "loss_iou": 0.54296875, + "loss_num": 0.031982421875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 275426096, + "step": 4117 + }, + { + "epoch": 0.467290780141844, + "grad_norm": 27.679014205932617, + "learning_rate": 5e-05, + "loss": 0.9969, + "num_input_tokens_seen": 275492072, + "step": 4118 + }, + { + "epoch": 0.467290780141844, + "loss": 0.9549717903137207, + "loss_ce": 0.0050206175073981285, + "loss_iou": 0.365234375, + "loss_num": 0.043701171875, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 275492072, + "step": 4118 + }, + { + "epoch": 0.4674042553191489, + "grad_norm": 43.869163513183594, + "learning_rate": 5e-05, + "loss": 1.2732, + "num_input_tokens_seen": 275558284, + "step": 4119 + }, + { + "epoch": 0.4674042553191489, + "loss": 1.2115882635116577, + "loss_ce": 0.0118812695145607, + "loss_iou": 0.4921875, + "loss_num": 0.04296875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 275558284, + "step": 4119 + }, + { + "epoch": 0.4675177304964539, + "grad_norm": 23.623598098754883, + "learning_rate": 5e-05, + "loss": 1.162, + "num_input_tokens_seen": 275624192, + "step": 4120 + }, + { + "epoch": 0.4675177304964539, + "loss": 1.1271096467971802, + "loss_ce": 0.005039307288825512, + "loss_iou": 0.455078125, + "loss_num": 0.042236328125, + "loss_xval": 1.125, + "num_input_tokens_seen": 275624192, + "step": 4120 + }, + { + "epoch": 0.4676312056737589, + "grad_norm": 16.601604461669922, + "learning_rate": 5e-05, + "loss": 1.1345, + "num_input_tokens_seen": 275691224, + "step": 4121 + }, + { + "epoch": 0.4676312056737589, + "loss": 1.0279536247253418, + "loss_ce": 0.008178319782018661, + "loss_iou": 0.443359375, + "loss_num": 0.026611328125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 275691224, + "step": 4121 + }, + { + "epoch": 0.46774468085106385, + "grad_norm": 127.6872787475586, + "learning_rate": 5e-05, + "loss": 1.3944, + "num_input_tokens_seen": 275757716, + "step": 4122 + }, + { + "epoch": 0.46774468085106385, + "loss": 1.5550200939178467, + "loss_ce": 0.006192040164023638, + "loss_iou": 0.625, + "loss_num": 0.059326171875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 275757716, + "step": 4122 + }, + { + "epoch": 0.4678581560283688, + "grad_norm": 35.00843048095703, + "learning_rate": 5e-05, + "loss": 1.3979, + "num_input_tokens_seen": 275823328, + "step": 4123 + }, + { + "epoch": 0.4678581560283688, + "loss": 1.388049840927124, + "loss_ce": 0.006213965825736523, + "loss_iou": 0.58984375, + "loss_num": 0.0400390625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 275823328, + "step": 4123 + }, + { + "epoch": 0.46797163120567375, + "grad_norm": 19.349668502807617, + "learning_rate": 5e-05, + "loss": 1.1622, + "num_input_tokens_seen": 275890628, + "step": 4124 + }, + { + "epoch": 0.46797163120567375, + "loss": 1.2843742370605469, + "loss_ce": 0.003612388391047716, + "loss_iou": 0.53515625, + "loss_num": 0.04248046875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 275890628, + "step": 4124 + }, + { + "epoch": 0.46808510638297873, + "grad_norm": 13.766879081726074, + "learning_rate": 5e-05, + "loss": 1.3714, + "num_input_tokens_seen": 275957408, + "step": 4125 + }, + { + "epoch": 0.46808510638297873, + "loss": 1.5177977085113525, + "loss_ce": 0.002172679640352726, + "loss_iou": 0.58984375, + "loss_num": 0.06689453125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 275957408, + "step": 4125 + }, + { + "epoch": 0.4681985815602837, + "grad_norm": 16.99340057373047, + "learning_rate": 5e-05, + "loss": 1.3249, + "num_input_tokens_seen": 276024856, + "step": 4126 + }, + { + "epoch": 0.4681985815602837, + "loss": 1.4137804508209229, + "loss_ce": 0.005089062266051769, + "loss_iou": 0.55859375, + "loss_num": 0.05859375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 276024856, + "step": 4126 + }, + { + "epoch": 0.46831205673758863, + "grad_norm": 33.19005584716797, + "learning_rate": 5e-05, + "loss": 1.3056, + "num_input_tokens_seen": 276091488, + "step": 4127 + }, + { + "epoch": 0.46831205673758863, + "loss": 1.4254264831542969, + "loss_ce": 0.005016271024942398, + "loss_iou": 0.546875, + "loss_num": 0.06591796875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 276091488, + "step": 4127 + }, + { + "epoch": 0.4684255319148936, + "grad_norm": 42.82107162475586, + "learning_rate": 5e-05, + "loss": 1.1767, + "num_input_tokens_seen": 276158132, + "step": 4128 + }, + { + "epoch": 0.4684255319148936, + "loss": 1.1045615673065186, + "loss_ce": 0.0044638062827289104, + "loss_iou": 0.482421875, + "loss_num": 0.0267333984375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 276158132, + "step": 4128 + }, + { + "epoch": 0.4685390070921986, + "grad_norm": 28.126876831054688, + "learning_rate": 5e-05, + "loss": 1.4832, + "num_input_tokens_seen": 276225044, + "step": 4129 + }, + { + "epoch": 0.4685390070921986, + "loss": 1.7333810329437256, + "loss_ce": 0.008771685883402824, + "loss_iou": 0.66796875, + "loss_num": 0.0771484375, + "loss_xval": 1.7265625, + "num_input_tokens_seen": 276225044, + "step": 4129 + }, + { + "epoch": 0.46865248226950357, + "grad_norm": 17.205320358276367, + "learning_rate": 5e-05, + "loss": 1.2491, + "num_input_tokens_seen": 276292340, + "step": 4130 + }, + { + "epoch": 0.46865248226950357, + "loss": 1.2589205503463745, + "loss_ce": 0.005990813486278057, + "loss_iou": 0.53125, + "loss_num": 0.037841796875, + "loss_xval": 1.25, + "num_input_tokens_seen": 276292340, + "step": 4130 + }, + { + "epoch": 0.4687659574468085, + "grad_norm": 21.768810272216797, + "learning_rate": 5e-05, + "loss": 1.3041, + "num_input_tokens_seen": 276359280, + "step": 4131 + }, + { + "epoch": 0.4687659574468085, + "loss": 1.2683801651000977, + "loss_ce": 0.003731763456016779, + "loss_iou": 0.50390625, + "loss_num": 0.0517578125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 276359280, + "step": 4131 + }, + { + "epoch": 0.46887943262411347, + "grad_norm": 34.69977951049805, + "learning_rate": 5e-05, + "loss": 1.4095, + "num_input_tokens_seen": 276426988, + "step": 4132 + }, + { + "epoch": 0.46887943262411347, + "loss": 1.352888822555542, + "loss_ce": 0.006453224457800388, + "loss_iou": 0.5546875, + "loss_num": 0.046875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 276426988, + "step": 4132 + }, + { + "epoch": 0.46899290780141845, + "grad_norm": 27.90000343322754, + "learning_rate": 5e-05, + "loss": 1.1701, + "num_input_tokens_seen": 276494408, + "step": 4133 + }, + { + "epoch": 0.46899290780141845, + "loss": 1.121314287185669, + "loss_ce": 0.010474351234734058, + "loss_iou": 0.490234375, + "loss_num": 0.026123046875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 276494408, + "step": 4133 + }, + { + "epoch": 0.4691063829787234, + "grad_norm": 34.39746856689453, + "learning_rate": 5e-05, + "loss": 1.4243, + "num_input_tokens_seen": 276560568, + "step": 4134 + }, + { + "epoch": 0.4691063829787234, + "loss": 1.3584809303283691, + "loss_ce": 0.005453621037304401, + "loss_iou": 0.546875, + "loss_num": 0.05126953125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 276560568, + "step": 4134 + }, + { + "epoch": 0.46921985815602835, + "grad_norm": 21.67863655090332, + "learning_rate": 5e-05, + "loss": 1.2115, + "num_input_tokens_seen": 276627476, + "step": 4135 + }, + { + "epoch": 0.46921985815602835, + "loss": 1.2341442108154297, + "loss_ce": 0.0046520475298166275, + "loss_iou": 0.490234375, + "loss_num": 0.0498046875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 276627476, + "step": 4135 + }, + { + "epoch": 0.4693333333333333, + "grad_norm": 32.50197982788086, + "learning_rate": 5e-05, + "loss": 1.2173, + "num_input_tokens_seen": 276694380, + "step": 4136 + }, + { + "epoch": 0.4693333333333333, + "loss": 1.1224257946014404, + "loss_ce": 0.0057266587391495705, + "loss_iou": 0.478515625, + "loss_num": 0.031494140625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 276694380, + "step": 4136 + }, + { + "epoch": 0.4694468085106383, + "grad_norm": 31.32550811767578, + "learning_rate": 5e-05, + "loss": 1.28, + "num_input_tokens_seen": 276760800, + "step": 4137 + }, + { + "epoch": 0.4694468085106383, + "loss": 1.3000365495681763, + "loss_ce": 0.008532638661563396, + "loss_iou": 0.52734375, + "loss_num": 0.046875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 276760800, + "step": 4137 + }, + { + "epoch": 0.4695602836879433, + "grad_norm": 71.58734130859375, + "learning_rate": 5e-05, + "loss": 1.3148, + "num_input_tokens_seen": 276828252, + "step": 4138 + }, + { + "epoch": 0.4695602836879433, + "loss": 1.120336890220642, + "loss_ce": 0.005117819644510746, + "loss_iou": 0.4765625, + "loss_num": 0.032470703125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 276828252, + "step": 4138 + }, + { + "epoch": 0.4696737588652482, + "grad_norm": 15.29085922241211, + "learning_rate": 5e-05, + "loss": 1.1951, + "num_input_tokens_seen": 276896084, + "step": 4139 + }, + { + "epoch": 0.4696737588652482, + "loss": 1.1911876201629639, + "loss_ce": 0.004664144013077021, + "loss_iou": 0.498046875, + "loss_num": 0.03759765625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 276896084, + "step": 4139 + }, + { + "epoch": 0.4697872340425532, + "grad_norm": 24.75590705871582, + "learning_rate": 5e-05, + "loss": 1.2194, + "num_input_tokens_seen": 276962756, + "step": 4140 + }, + { + "epoch": 0.4697872340425532, + "loss": 1.1431512832641602, + "loss_ce": 0.0035027984995394945, + "loss_iou": 0.48828125, + "loss_num": 0.032470703125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 276962756, + "step": 4140 + }, + { + "epoch": 0.46990070921985816, + "grad_norm": 40.458736419677734, + "learning_rate": 5e-05, + "loss": 1.3238, + "num_input_tokens_seen": 277029028, + "step": 4141 + }, + { + "epoch": 0.46990070921985816, + "loss": 1.1946673393249512, + "loss_ce": 0.00307805510237813, + "loss_iou": 0.51953125, + "loss_num": 0.0303955078125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 277029028, + "step": 4141 + }, + { + "epoch": 0.47001418439716314, + "grad_norm": 42.02995681762695, + "learning_rate": 5e-05, + "loss": 1.3328, + "num_input_tokens_seen": 277097512, + "step": 4142 + }, + { + "epoch": 0.47001418439716314, + "loss": 1.298785924911499, + "loss_ce": 0.002887503243982792, + "loss_iou": 0.5390625, + "loss_num": 0.043701171875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 277097512, + "step": 4142 + }, + { + "epoch": 0.47012765957446806, + "grad_norm": 18.41635513305664, + "learning_rate": 5e-05, + "loss": 1.1503, + "num_input_tokens_seen": 277163820, + "step": 4143 + }, + { + "epoch": 0.47012765957446806, + "loss": 1.2341145277023315, + "loss_ce": 0.013655487447977066, + "loss_iou": 0.470703125, + "loss_num": 0.0556640625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 277163820, + "step": 4143 + }, + { + "epoch": 0.47024113475177304, + "grad_norm": 23.179935455322266, + "learning_rate": 5e-05, + "loss": 1.105, + "num_input_tokens_seen": 277231064, + "step": 4144 + }, + { + "epoch": 0.47024113475177304, + "loss": 1.068935751914978, + "loss_ce": 0.00545920617878437, + "loss_iou": 0.4375, + "loss_num": 0.0380859375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 277231064, + "step": 4144 + }, + { + "epoch": 0.470354609929078, + "grad_norm": 34.474098205566406, + "learning_rate": 5e-05, + "loss": 1.4213, + "num_input_tokens_seen": 277297680, + "step": 4145 + }, + { + "epoch": 0.470354609929078, + "loss": 1.3497459888458252, + "loss_ce": 0.0030662603676319122, + "loss_iou": 0.5234375, + "loss_num": 0.059814453125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 277297680, + "step": 4145 + }, + { + "epoch": 0.470468085106383, + "grad_norm": 27.49770164489746, + "learning_rate": 5e-05, + "loss": 1.4802, + "num_input_tokens_seen": 277364740, + "step": 4146 + }, + { + "epoch": 0.470468085106383, + "loss": 1.3272819519042969, + "loss_ce": 0.0020865658298134804, + "loss_iou": 0.55859375, + "loss_num": 0.041259765625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 277364740, + "step": 4146 + }, + { + "epoch": 0.4705815602836879, + "grad_norm": 71.28256225585938, + "learning_rate": 5e-05, + "loss": 1.13, + "num_input_tokens_seen": 277432468, + "step": 4147 + }, + { + "epoch": 0.4705815602836879, + "loss": 1.1031285524368286, + "loss_ce": 0.006937153171747923, + "loss_iou": 0.462890625, + "loss_num": 0.034423828125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 277432468, + "step": 4147 + }, + { + "epoch": 0.4706950354609929, + "grad_norm": 25.310077667236328, + "learning_rate": 5e-05, + "loss": 1.092, + "num_input_tokens_seen": 277499692, + "step": 4148 + }, + { + "epoch": 0.4706950354609929, + "loss": 1.0247461795806885, + "loss_ce": 0.005214953329414129, + "loss_iou": 0.400390625, + "loss_num": 0.04345703125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 277499692, + "step": 4148 + }, + { + "epoch": 0.4708085106382979, + "grad_norm": 40.55874252319336, + "learning_rate": 5e-05, + "loss": 1.3517, + "num_input_tokens_seen": 277567004, + "step": 4149 + }, + { + "epoch": 0.4708085106382979, + "loss": 1.3687894344329834, + "loss_ce": 0.0016020156908780336, + "loss_iou": 0.58203125, + "loss_num": 0.04052734375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 277567004, + "step": 4149 + }, + { + "epoch": 0.47092198581560285, + "grad_norm": 27.548145294189453, + "learning_rate": 5e-05, + "loss": 1.3906, + "num_input_tokens_seen": 277634216, + "step": 4150 + }, + { + "epoch": 0.47092198581560285, + "loss": 1.517421007156372, + "loss_ce": 0.0057021742686629295, + "loss_iou": 0.6484375, + "loss_num": 0.042724609375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 277634216, + "step": 4150 + }, + { + "epoch": 0.47103546099290783, + "grad_norm": 61.73857116699219, + "learning_rate": 5e-05, + "loss": 1.2763, + "num_input_tokens_seen": 277701456, + "step": 4151 + }, + { + "epoch": 0.47103546099290783, + "loss": 1.4666616916656494, + "loss_ce": 0.009630445390939713, + "loss_iou": 0.5625, + "loss_num": 0.06591796875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 277701456, + "step": 4151 + }, + { + "epoch": 0.47114893617021275, + "grad_norm": 13.802217483520508, + "learning_rate": 5e-05, + "loss": 1.0545, + "num_input_tokens_seen": 277769292, + "step": 4152 + }, + { + "epoch": 0.47114893617021275, + "loss": 0.8999587893486023, + "loss_ce": 0.007380632683634758, + "loss_iou": 0.400390625, + "loss_num": 0.018798828125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 277769292, + "step": 4152 + }, + { + "epoch": 0.47126241134751773, + "grad_norm": 10.921869277954102, + "learning_rate": 5e-05, + "loss": 1.0732, + "num_input_tokens_seen": 277834532, + "step": 4153 + }, + { + "epoch": 0.47126241134751773, + "loss": 0.9435267448425293, + "loss_ce": 0.008468124084174633, + "loss_iou": 0.357421875, + "loss_num": 0.044189453125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 277834532, + "step": 4153 + }, + { + "epoch": 0.4713758865248227, + "grad_norm": 15.830458641052246, + "learning_rate": 5e-05, + "loss": 1.1819, + "num_input_tokens_seen": 277900832, + "step": 4154 + }, + { + "epoch": 0.4713758865248227, + "loss": 1.0266659259796143, + "loss_ce": 0.0027401004917919636, + "loss_iou": 0.439453125, + "loss_num": 0.029296875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 277900832, + "step": 4154 + }, + { + "epoch": 0.4714893617021277, + "grad_norm": 36.2877311706543, + "learning_rate": 5e-05, + "loss": 1.1725, + "num_input_tokens_seen": 277968044, + "step": 4155 + }, + { + "epoch": 0.4714893617021277, + "loss": 1.399031639099121, + "loss_ce": 0.0059651900082826614, + "loss_iou": 0.53515625, + "loss_num": 0.06396484375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 277968044, + "step": 4155 + }, + { + "epoch": 0.4716028368794326, + "grad_norm": 47.59819793701172, + "learning_rate": 5e-05, + "loss": 1.5676, + "num_input_tokens_seen": 278034832, + "step": 4156 + }, + { + "epoch": 0.4716028368794326, + "loss": 1.7196083068847656, + "loss_ce": 0.010623801499605179, + "loss_iou": 0.69921875, + "loss_num": 0.062255859375, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 278034832, + "step": 4156 + }, + { + "epoch": 0.4717163120567376, + "grad_norm": 28.59760284423828, + "learning_rate": 5e-05, + "loss": 1.4744, + "num_input_tokens_seen": 278102020, + "step": 4157 + }, + { + "epoch": 0.4717163120567376, + "loss": 1.4266271591186523, + "loss_ce": 0.004263911861926317, + "loss_iou": 0.58984375, + "loss_num": 0.04833984375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 278102020, + "step": 4157 + }, + { + "epoch": 0.47182978723404256, + "grad_norm": 31.170833587646484, + "learning_rate": 5e-05, + "loss": 1.345, + "num_input_tokens_seen": 278169220, + "step": 4158 + }, + { + "epoch": 0.47182978723404256, + "loss": 1.4923419952392578, + "loss_ce": 0.004549089819192886, + "loss_iou": 0.62109375, + "loss_num": 0.049560546875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 278169220, + "step": 4158 + }, + { + "epoch": 0.47194326241134754, + "grad_norm": 23.517976760864258, + "learning_rate": 5e-05, + "loss": 1.4681, + "num_input_tokens_seen": 278234972, + "step": 4159 + }, + { + "epoch": 0.47194326241134754, + "loss": 1.4579095840454102, + "loss_ce": 0.005730704870074987, + "loss_iou": 0.57421875, + "loss_num": 0.060546875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 278234972, + "step": 4159 + }, + { + "epoch": 0.47205673758865246, + "grad_norm": 14.285892486572266, + "learning_rate": 5e-05, + "loss": 1.1605, + "num_input_tokens_seen": 278301428, + "step": 4160 + }, + { + "epoch": 0.47205673758865246, + "loss": 1.0305272340774536, + "loss_ce": 0.00708972942084074, + "loss_iou": 0.404296875, + "loss_num": 0.043212890625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 278301428, + "step": 4160 + }, + { + "epoch": 0.47217021276595744, + "grad_norm": 18.438827514648438, + "learning_rate": 5e-05, + "loss": 1.0897, + "num_input_tokens_seen": 278367300, + "step": 4161 + }, + { + "epoch": 0.47217021276595744, + "loss": 1.1466078758239746, + "loss_ce": 0.004517995286732912, + "loss_iou": 0.44921875, + "loss_num": 0.048828125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 278367300, + "step": 4161 + }, + { + "epoch": 0.4722836879432624, + "grad_norm": 24.100107192993164, + "learning_rate": 5e-05, + "loss": 1.009, + "num_input_tokens_seen": 278434880, + "step": 4162 + }, + { + "epoch": 0.4722836879432624, + "loss": 1.065349817276001, + "loss_ce": 0.005779589992016554, + "loss_iou": 0.427734375, + "loss_num": 0.041015625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 278434880, + "step": 4162 + }, + { + "epoch": 0.4723971631205674, + "grad_norm": 23.476938247680664, + "learning_rate": 5e-05, + "loss": 1.3771, + "num_input_tokens_seen": 278501512, + "step": 4163 + }, + { + "epoch": 0.4723971631205674, + "loss": 1.51619291305542, + "loss_ce": 0.005938932299613953, + "loss_iou": 0.6484375, + "loss_num": 0.042724609375, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 278501512, + "step": 4163 + }, + { + "epoch": 0.4725106382978723, + "grad_norm": 21.686567306518555, + "learning_rate": 5e-05, + "loss": 1.194, + "num_input_tokens_seen": 278569720, + "step": 4164 + }, + { + "epoch": 0.4725106382978723, + "loss": 1.0826867818832397, + "loss_ce": 0.008193383924663067, + "loss_iou": 0.4765625, + "loss_num": 0.024169921875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 278569720, + "step": 4164 + }, + { + "epoch": 0.4726241134751773, + "grad_norm": 31.40447998046875, + "learning_rate": 5e-05, + "loss": 1.1583, + "num_input_tokens_seen": 278636468, + "step": 4165 + }, + { + "epoch": 0.4726241134751773, + "loss": 1.3013677597045898, + "loss_ce": 0.011817025020718575, + "loss_iou": 0.5234375, + "loss_num": 0.049072265625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 278636468, + "step": 4165 + }, + { + "epoch": 0.4727375886524823, + "grad_norm": 28.88954734802246, + "learning_rate": 5e-05, + "loss": 1.2936, + "num_input_tokens_seen": 278703556, + "step": 4166 + }, + { + "epoch": 0.4727375886524823, + "loss": 1.1807302236557007, + "loss_ce": 0.004704761318862438, + "loss_iou": 0.498046875, + "loss_num": 0.035888671875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 278703556, + "step": 4166 + }, + { + "epoch": 0.47285106382978725, + "grad_norm": 187.43626403808594, + "learning_rate": 5e-05, + "loss": 1.2791, + "num_input_tokens_seen": 278771184, + "step": 4167 + }, + { + "epoch": 0.47285106382978725, + "loss": 1.2622321844100952, + "loss_ce": 0.005396221298724413, + "loss_iou": 0.54296875, + "loss_num": 0.03466796875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 278771184, + "step": 4167 + }, + { + "epoch": 0.4729645390070922, + "grad_norm": 13.719305992126465, + "learning_rate": 5e-05, + "loss": 1.2469, + "num_input_tokens_seen": 278837436, + "step": 4168 + }, + { + "epoch": 0.4729645390070922, + "loss": 1.1708166599273682, + "loss_ce": 0.007486548274755478, + "loss_iou": 0.435546875, + "loss_num": 0.058349609375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 278837436, + "step": 4168 + }, + { + "epoch": 0.47307801418439716, + "grad_norm": 77.91871643066406, + "learning_rate": 5e-05, + "loss": 1.2055, + "num_input_tokens_seen": 278905820, + "step": 4169 + }, + { + "epoch": 0.47307801418439716, + "loss": 1.168322205543518, + "loss_ce": 0.004747950471937656, + "loss_iou": 0.486328125, + "loss_num": 0.0380859375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 278905820, + "step": 4169 + }, + { + "epoch": 0.47319148936170213, + "grad_norm": 28.183061599731445, + "learning_rate": 5e-05, + "loss": 1.1484, + "num_input_tokens_seen": 278973036, + "step": 4170 + }, + { + "epoch": 0.47319148936170213, + "loss": 1.0727388858795166, + "loss_ce": 0.006332720629870892, + "loss_iou": 0.447265625, + "loss_num": 0.0341796875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 278973036, + "step": 4170 + }, + { + "epoch": 0.4733049645390071, + "grad_norm": 20.058053970336914, + "learning_rate": 5e-05, + "loss": 1.3187, + "num_input_tokens_seen": 279039872, + "step": 4171 + }, + { + "epoch": 0.4733049645390071, + "loss": 1.4423305988311768, + "loss_ce": 0.0028774593956768513, + "loss_iou": 0.57421875, + "loss_num": 0.05810546875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 279039872, + "step": 4171 + }, + { + "epoch": 0.47341843971631203, + "grad_norm": 35.570556640625, + "learning_rate": 5e-05, + "loss": 1.1305, + "num_input_tokens_seen": 279106832, + "step": 4172 + }, + { + "epoch": 0.47341843971631203, + "loss": 0.9220348000526428, + "loss_ce": 0.0052867671474814415, + "loss_iou": 0.384765625, + "loss_num": 0.0296630859375, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 279106832, + "step": 4172 + }, + { + "epoch": 0.473531914893617, + "grad_norm": 24.60494041442871, + "learning_rate": 5e-05, + "loss": 1.2339, + "num_input_tokens_seen": 279173252, + "step": 4173 + }, + { + "epoch": 0.473531914893617, + "loss": 1.0304917097091675, + "loss_ce": 0.0031479133758693933, + "loss_iou": 0.43359375, + "loss_num": 0.031982421875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 279173252, + "step": 4173 + }, + { + "epoch": 0.473645390070922, + "grad_norm": 27.987834930419922, + "learning_rate": 5e-05, + "loss": 1.2106, + "num_input_tokens_seen": 279240080, + "step": 4174 + }, + { + "epoch": 0.473645390070922, + "loss": 1.1519198417663574, + "loss_ce": 0.006656220182776451, + "loss_iou": 0.490234375, + "loss_num": 0.03271484375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 279240080, + "step": 4174 + }, + { + "epoch": 0.47375886524822697, + "grad_norm": 29.30059051513672, + "learning_rate": 5e-05, + "loss": 1.1185, + "num_input_tokens_seen": 279306216, + "step": 4175 + }, + { + "epoch": 0.47375886524822697, + "loss": 1.1304373741149902, + "loss_ce": 0.006902228109538555, + "loss_iou": 0.5, + "loss_num": 0.0247802734375, + "loss_xval": 1.125, + "num_input_tokens_seen": 279306216, + "step": 4175 + }, + { + "epoch": 0.4738723404255319, + "grad_norm": 26.842491149902344, + "learning_rate": 5e-05, + "loss": 1.0262, + "num_input_tokens_seen": 279372836, + "step": 4176 + }, + { + "epoch": 0.4738723404255319, + "loss": 0.8452000021934509, + "loss_ce": 0.006821123883128166, + "loss_iou": 0.326171875, + "loss_num": 0.03759765625, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 279372836, + "step": 4176 + }, + { + "epoch": 0.47398581560283687, + "grad_norm": 26.354093551635742, + "learning_rate": 5e-05, + "loss": 1.3549, + "num_input_tokens_seen": 279440332, + "step": 4177 + }, + { + "epoch": 0.47398581560283687, + "loss": 1.3153889179229736, + "loss_ce": 0.001912456937134266, + "loss_iou": 0.55859375, + "loss_num": 0.03857421875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 279440332, + "step": 4177 + }, + { + "epoch": 0.47409929078014185, + "grad_norm": 37.961063385009766, + "learning_rate": 5e-05, + "loss": 1.1318, + "num_input_tokens_seen": 279506780, + "step": 4178 + }, + { + "epoch": 0.47409929078014185, + "loss": 1.266591191291809, + "loss_ce": 0.006337329745292664, + "loss_iou": 0.498046875, + "loss_num": 0.052490234375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 279506780, + "step": 4178 + }, + { + "epoch": 0.4742127659574468, + "grad_norm": 134.6585235595703, + "learning_rate": 5e-05, + "loss": 1.4128, + "num_input_tokens_seen": 279573788, + "step": 4179 + }, + { + "epoch": 0.4742127659574468, + "loss": 1.3863012790679932, + "loss_ce": 0.0073950523510575294, + "loss_iou": 0.5703125, + "loss_num": 0.04638671875, + "loss_xval": 1.375, + "num_input_tokens_seen": 279573788, + "step": 4179 + }, + { + "epoch": 0.47432624113475175, + "grad_norm": 23.70966911315918, + "learning_rate": 5e-05, + "loss": 1.2113, + "num_input_tokens_seen": 279642320, + "step": 4180 + }, + { + "epoch": 0.47432624113475175, + "loss": 1.2620880603790283, + "loss_ce": 0.014041106216609478, + "loss_iou": 0.50390625, + "loss_num": 0.047607421875, + "loss_xval": 1.25, + "num_input_tokens_seen": 279642320, + "step": 4180 + }, + { + "epoch": 0.4744397163120567, + "grad_norm": 28.007959365844727, + "learning_rate": 5e-05, + "loss": 1.0906, + "num_input_tokens_seen": 279709188, + "step": 4181 + }, + { + "epoch": 0.4744397163120567, + "loss": 1.0318384170532227, + "loss_ce": 0.004982878919690847, + "loss_iou": 0.42578125, + "loss_num": 0.035400390625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 279709188, + "step": 4181 + }, + { + "epoch": 0.4745531914893617, + "grad_norm": 30.77260398864746, + "learning_rate": 5e-05, + "loss": 1.5544, + "num_input_tokens_seen": 279776148, + "step": 4182 + }, + { + "epoch": 0.4745531914893617, + "loss": 1.5480542182922363, + "loss_ce": 0.007038567215204239, + "loss_iou": 0.62890625, + "loss_num": 0.05712890625, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 279776148, + "step": 4182 + }, + { + "epoch": 0.4746666666666667, + "grad_norm": 28.866989135742188, + "learning_rate": 5e-05, + "loss": 1.1878, + "num_input_tokens_seen": 279843008, + "step": 4183 + }, + { + "epoch": 0.4746666666666667, + "loss": 0.9964739084243774, + "loss_ce": 0.003798130201175809, + "loss_iou": 0.419921875, + "loss_num": 0.030517578125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 279843008, + "step": 4183 + }, + { + "epoch": 0.4747801418439716, + "grad_norm": 26.54865074157715, + "learning_rate": 5e-05, + "loss": 1.2736, + "num_input_tokens_seen": 279910544, + "step": 4184 + }, + { + "epoch": 0.4747801418439716, + "loss": 1.3512449264526367, + "loss_ce": 0.007494865916669369, + "loss_iou": 0.57421875, + "loss_num": 0.039306640625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 279910544, + "step": 4184 + }, + { + "epoch": 0.4748936170212766, + "grad_norm": 23.039836883544922, + "learning_rate": 5e-05, + "loss": 1.128, + "num_input_tokens_seen": 279977388, + "step": 4185 + }, + { + "epoch": 0.4748936170212766, + "loss": 1.0816996097564697, + "loss_ce": 0.0069925738498568535, + "loss_iou": 0.43359375, + "loss_num": 0.041748046875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 279977388, + "step": 4185 + }, + { + "epoch": 0.47500709219858156, + "grad_norm": 22.732336044311523, + "learning_rate": 5e-05, + "loss": 1.1691, + "num_input_tokens_seen": 280043404, + "step": 4186 + }, + { + "epoch": 0.47500709219858156, + "loss": 1.252638578414917, + "loss_ce": 0.005568268708884716, + "loss_iou": 0.5078125, + "loss_num": 0.0458984375, + "loss_xval": 1.25, + "num_input_tokens_seen": 280043404, + "step": 4186 + }, + { + "epoch": 0.47512056737588654, + "grad_norm": 41.29817581176758, + "learning_rate": 5e-05, + "loss": 1.5273, + "num_input_tokens_seen": 280110824, + "step": 4187 + }, + { + "epoch": 0.47512056737588654, + "loss": 1.4895994663238525, + "loss_ce": 0.00766581017524004, + "loss_iou": 0.62109375, + "loss_num": 0.046875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 280110824, + "step": 4187 + }, + { + "epoch": 0.4752340425531915, + "grad_norm": 29.61109733581543, + "learning_rate": 5e-05, + "loss": 1.4675, + "num_input_tokens_seen": 280178568, + "step": 4188 + }, + { + "epoch": 0.4752340425531915, + "loss": 1.347121000289917, + "loss_ce": 0.0038592382334172726, + "loss_iou": 0.5859375, + "loss_num": 0.0341796875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 280178568, + "step": 4188 + }, + { + "epoch": 0.47534751773049644, + "grad_norm": 29.661237716674805, + "learning_rate": 5e-05, + "loss": 1.1951, + "num_input_tokens_seen": 280244788, + "step": 4189 + }, + { + "epoch": 0.47534751773049644, + "loss": 1.151482343673706, + "loss_ce": 0.005486217327415943, + "loss_iou": 0.46875, + "loss_num": 0.041748046875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 280244788, + "step": 4189 + }, + { + "epoch": 0.4754609929078014, + "grad_norm": 18.95663070678711, + "learning_rate": 5e-05, + "loss": 1.2096, + "num_input_tokens_seen": 280310848, + "step": 4190 + }, + { + "epoch": 0.4754609929078014, + "loss": 1.1238160133361816, + "loss_ce": 0.004919456318020821, + "loss_iou": 0.4453125, + "loss_num": 0.04541015625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 280310848, + "step": 4190 + }, + { + "epoch": 0.4755744680851064, + "grad_norm": 36.35870361328125, + "learning_rate": 5e-05, + "loss": 1.0899, + "num_input_tokens_seen": 280377784, + "step": 4191 + }, + { + "epoch": 0.4755744680851064, + "loss": 1.1684749126434326, + "loss_ce": 0.008562853559851646, + "loss_iou": 0.486328125, + "loss_num": 0.03759765625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 280377784, + "step": 4191 + }, + { + "epoch": 0.4756879432624114, + "grad_norm": 29.975751876831055, + "learning_rate": 5e-05, + "loss": 1.1661, + "num_input_tokens_seen": 280445136, + "step": 4192 + }, + { + "epoch": 0.4756879432624114, + "loss": 1.0290369987487793, + "loss_ce": 0.003768389578908682, + "loss_iou": 0.41796875, + "loss_num": 0.03759765625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 280445136, + "step": 4192 + }, + { + "epoch": 0.4758014184397163, + "grad_norm": 35.638797760009766, + "learning_rate": 5e-05, + "loss": 1.0525, + "num_input_tokens_seen": 280512244, + "step": 4193 + }, + { + "epoch": 0.4758014184397163, + "loss": 1.0577267408370972, + "loss_ce": 0.0057247234508395195, + "loss_iou": 0.4140625, + "loss_num": 0.045166015625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 280512244, + "step": 4193 + }, + { + "epoch": 0.4759148936170213, + "grad_norm": 14.205679893493652, + "learning_rate": 5e-05, + "loss": 1.0124, + "num_input_tokens_seen": 280579600, + "step": 4194 + }, + { + "epoch": 0.4759148936170213, + "loss": 0.9776159524917603, + "loss_ce": 0.003983113914728165, + "loss_iou": 0.4453125, + "loss_num": 0.016357421875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 280579600, + "step": 4194 + }, + { + "epoch": 0.47602836879432625, + "grad_norm": 22.480531692504883, + "learning_rate": 5e-05, + "loss": 1.3106, + "num_input_tokens_seen": 280645188, + "step": 4195 + }, + { + "epoch": 0.47602836879432625, + "loss": 1.4877381324768066, + "loss_ce": 0.0038513243198394775, + "loss_iou": 0.5703125, + "loss_num": 0.06884765625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 280645188, + "step": 4195 + }, + { + "epoch": 0.47614184397163123, + "grad_norm": 24.10562515258789, + "learning_rate": 5e-05, + "loss": 1.4812, + "num_input_tokens_seen": 280712024, + "step": 4196 + }, + { + "epoch": 0.47614184397163123, + "loss": 1.3569461107254028, + "loss_ce": 0.008801579475402832, + "loss_iou": 0.56640625, + "loss_num": 0.042724609375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 280712024, + "step": 4196 + }, + { + "epoch": 0.47625531914893615, + "grad_norm": 23.912826538085938, + "learning_rate": 5e-05, + "loss": 1.0553, + "num_input_tokens_seen": 280779008, + "step": 4197 + }, + { + "epoch": 0.47625531914893615, + "loss": 1.133727788925171, + "loss_ce": 0.0043333121575415134, + "loss_iou": 0.48828125, + "loss_num": 0.0302734375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 280779008, + "step": 4197 + }, + { + "epoch": 0.47636879432624113, + "grad_norm": 58.910457611083984, + "learning_rate": 5e-05, + "loss": 1.2169, + "num_input_tokens_seen": 280846452, + "step": 4198 + }, + { + "epoch": 0.47636879432624113, + "loss": 1.0922937393188477, + "loss_ce": 0.0039148153737187386, + "loss_iou": 0.4609375, + "loss_num": 0.033447265625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 280846452, + "step": 4198 + }, + { + "epoch": 0.4764822695035461, + "grad_norm": 36.779048919677734, + "learning_rate": 5e-05, + "loss": 1.2919, + "num_input_tokens_seen": 280913424, + "step": 4199 + }, + { + "epoch": 0.4764822695035461, + "loss": 1.3243439197540283, + "loss_ce": 0.00744938850402832, + "loss_iou": 0.5390625, + "loss_num": 0.047607421875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 280913424, + "step": 4199 + }, + { + "epoch": 0.4765957446808511, + "grad_norm": 32.77827072143555, + "learning_rate": 5e-05, + "loss": 1.281, + "num_input_tokens_seen": 280979872, + "step": 4200 + }, + { + "epoch": 0.4765957446808511, + "loss": 1.116650104522705, + "loss_ce": 0.011669538915157318, + "loss_iou": 0.515625, + "loss_num": 0.01513671875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 280979872, + "step": 4200 + }, + { + "epoch": 0.476709219858156, + "grad_norm": 20.748451232910156, + "learning_rate": 5e-05, + "loss": 1.0607, + "num_input_tokens_seen": 281047464, + "step": 4201 + }, + { + "epoch": 0.476709219858156, + "loss": 0.9188538789749146, + "loss_ce": 0.004791381768882275, + "loss_iou": 0.39453125, + "loss_num": 0.025146484375, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 281047464, + "step": 4201 + }, + { + "epoch": 0.476822695035461, + "grad_norm": 31.16201400756836, + "learning_rate": 5e-05, + "loss": 1.2524, + "num_input_tokens_seen": 281114908, + "step": 4202 + }, + { + "epoch": 0.476822695035461, + "loss": 1.3310409784317017, + "loss_ce": 0.009751922450959682, + "loss_iou": 0.53125, + "loss_num": 0.052490234375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 281114908, + "step": 4202 + }, + { + "epoch": 0.47693617021276596, + "grad_norm": 29.06413459777832, + "learning_rate": 5e-05, + "loss": 1.1916, + "num_input_tokens_seen": 281182208, + "step": 4203 + }, + { + "epoch": 0.47693617021276596, + "loss": 1.2184414863586426, + "loss_ce": 0.004574350081384182, + "loss_iou": 0.51171875, + "loss_num": 0.037841796875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 281182208, + "step": 4203 + }, + { + "epoch": 0.47704964539007094, + "grad_norm": 28.19075584411621, + "learning_rate": 5e-05, + "loss": 1.5351, + "num_input_tokens_seen": 281247916, + "step": 4204 + }, + { + "epoch": 0.47704964539007094, + "loss": 1.4370641708374023, + "loss_ce": 0.0054235453717410564, + "loss_iou": 0.53125, + "loss_num": 0.0732421875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 281247916, + "step": 4204 + }, + { + "epoch": 0.47716312056737586, + "grad_norm": 22.872671127319336, + "learning_rate": 5e-05, + "loss": 1.0716, + "num_input_tokens_seen": 281315776, + "step": 4205 + }, + { + "epoch": 0.47716312056737586, + "loss": 1.0947699546813965, + "loss_ce": 0.003949685022234917, + "loss_iou": 0.490234375, + "loss_num": 0.0223388671875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 281315776, + "step": 4205 + }, + { + "epoch": 0.47727659574468084, + "grad_norm": 26.99329948425293, + "learning_rate": 5e-05, + "loss": 1.2417, + "num_input_tokens_seen": 281382504, + "step": 4206 + }, + { + "epoch": 0.47727659574468084, + "loss": 1.2844630479812622, + "loss_ce": 0.004189579747617245, + "loss_iou": 0.5390625, + "loss_num": 0.03955078125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 281382504, + "step": 4206 + }, + { + "epoch": 0.4773900709219858, + "grad_norm": 26.794967651367188, + "learning_rate": 5e-05, + "loss": 1.3639, + "num_input_tokens_seen": 281449384, + "step": 4207 + }, + { + "epoch": 0.4773900709219858, + "loss": 1.2585095167160034, + "loss_ce": 0.004603194538503885, + "loss_iou": 0.484375, + "loss_num": 0.056640625, + "loss_xval": 1.25, + "num_input_tokens_seen": 281449384, + "step": 4207 + }, + { + "epoch": 0.4775035460992908, + "grad_norm": 26.264942169189453, + "learning_rate": 5e-05, + "loss": 1.2383, + "num_input_tokens_seen": 281516448, + "step": 4208 + }, + { + "epoch": 0.4775035460992908, + "loss": 1.1095614433288574, + "loss_ce": 0.005069319158792496, + "loss_iou": 0.4609375, + "loss_num": 0.03662109375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 281516448, + "step": 4208 + }, + { + "epoch": 0.4776170212765957, + "grad_norm": 27.67340660095215, + "learning_rate": 5e-05, + "loss": 1.1853, + "num_input_tokens_seen": 281583820, + "step": 4209 + }, + { + "epoch": 0.4776170212765957, + "loss": 1.4150409698486328, + "loss_ce": 0.0043964264914393425, + "loss_iou": 0.58203125, + "loss_num": 0.049072265625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 281583820, + "step": 4209 + }, + { + "epoch": 0.4777304964539007, + "grad_norm": 32.47854232788086, + "learning_rate": 5e-05, + "loss": 1.1783, + "num_input_tokens_seen": 281649784, + "step": 4210 + }, + { + "epoch": 0.4777304964539007, + "loss": 1.238439917564392, + "loss_ce": 0.0050414809957146645, + "loss_iou": 0.52734375, + "loss_num": 0.035888671875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 281649784, + "step": 4210 + }, + { + "epoch": 0.4778439716312057, + "grad_norm": 35.139381408691406, + "learning_rate": 5e-05, + "loss": 1.3684, + "num_input_tokens_seen": 281715984, + "step": 4211 + }, + { + "epoch": 0.4778439716312057, + "loss": 1.3394485712051392, + "loss_ce": 0.006623891182243824, + "loss_iou": 0.494140625, + "loss_num": 0.06884765625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 281715984, + "step": 4211 + }, + { + "epoch": 0.47795744680851066, + "grad_norm": 23.902132034301758, + "learning_rate": 5e-05, + "loss": 1.2164, + "num_input_tokens_seen": 281782208, + "step": 4212 + }, + { + "epoch": 0.47795744680851066, + "loss": 1.2733958959579468, + "loss_ce": 0.006306055933237076, + "loss_iou": 0.515625, + "loss_num": 0.047607421875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 281782208, + "step": 4212 + }, + { + "epoch": 0.4780709219858156, + "grad_norm": 25.568952560424805, + "learning_rate": 5e-05, + "loss": 1.1031, + "num_input_tokens_seen": 281848224, + "step": 4213 + }, + { + "epoch": 0.4780709219858156, + "loss": 1.1275606155395508, + "loss_ce": 0.0050021023489534855, + "loss_iou": 0.470703125, + "loss_num": 0.036376953125, + "loss_xval": 1.125, + "num_input_tokens_seen": 281848224, + "step": 4213 + }, + { + "epoch": 0.47818439716312056, + "grad_norm": 34.52469253540039, + "learning_rate": 5e-05, + "loss": 1.2519, + "num_input_tokens_seen": 281915896, + "step": 4214 + }, + { + "epoch": 0.47818439716312056, + "loss": 1.377610683441162, + "loss_ce": 0.007005202583968639, + "loss_iou": 0.58984375, + "loss_num": 0.038330078125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 281915896, + "step": 4214 + }, + { + "epoch": 0.47829787234042553, + "grad_norm": 20.781362533569336, + "learning_rate": 5e-05, + "loss": 1.3161, + "num_input_tokens_seen": 281981676, + "step": 4215 + }, + { + "epoch": 0.47829787234042553, + "loss": 1.5217595100402832, + "loss_ce": 0.006622835993766785, + "loss_iou": 0.5625, + "loss_num": 0.0771484375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 281981676, + "step": 4215 + }, + { + "epoch": 0.4784113475177305, + "grad_norm": 15.022256851196289, + "learning_rate": 5e-05, + "loss": 1.1566, + "num_input_tokens_seen": 282048556, + "step": 4216 + }, + { + "epoch": 0.4784113475177305, + "loss": 1.0356674194335938, + "loss_ce": 0.0034409379586577415, + "loss_iou": 0.4375, + "loss_num": 0.03125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 282048556, + "step": 4216 + }, + { + "epoch": 0.47852482269503543, + "grad_norm": 17.053504943847656, + "learning_rate": 5e-05, + "loss": 1.1328, + "num_input_tokens_seen": 282114512, + "step": 4217 + }, + { + "epoch": 0.47852482269503543, + "loss": 0.9825176000595093, + "loss_ce": 0.006931641139090061, + "loss_iou": 0.392578125, + "loss_num": 0.038330078125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 282114512, + "step": 4217 + }, + { + "epoch": 0.4786382978723404, + "grad_norm": 26.371273040771484, + "learning_rate": 5e-05, + "loss": 1.1662, + "num_input_tokens_seen": 282181964, + "step": 4218 + }, + { + "epoch": 0.4786382978723404, + "loss": 1.1543093919754028, + "loss_ce": 0.008313337340950966, + "loss_iou": 0.482421875, + "loss_num": 0.036376953125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 282181964, + "step": 4218 + }, + { + "epoch": 0.4787517730496454, + "grad_norm": 35.83197021484375, + "learning_rate": 5e-05, + "loss": 1.4645, + "num_input_tokens_seen": 282248432, + "step": 4219 + }, + { + "epoch": 0.4787517730496454, + "loss": 1.5380096435546875, + "loss_ce": 0.0033416461665183306, + "loss_iou": 0.59765625, + "loss_num": 0.06787109375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 282248432, + "step": 4219 + }, + { + "epoch": 0.47886524822695037, + "grad_norm": 23.73973274230957, + "learning_rate": 5e-05, + "loss": 1.166, + "num_input_tokens_seen": 282315172, + "step": 4220 + }, + { + "epoch": 0.47886524822695037, + "loss": 1.240012764930725, + "loss_ce": 0.004050948657095432, + "loss_iou": 0.55078125, + "loss_num": 0.02734375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 282315172, + "step": 4220 + }, + { + "epoch": 0.47897872340425535, + "grad_norm": 20.458345413208008, + "learning_rate": 5e-05, + "loss": 1.208, + "num_input_tokens_seen": 282381628, + "step": 4221 + }, + { + "epoch": 0.47897872340425535, + "loss": 1.3336235284805298, + "loss_ce": 0.00598682789131999, + "loss_iou": 0.55859375, + "loss_num": 0.041748046875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 282381628, + "step": 4221 + }, + { + "epoch": 0.47909219858156027, + "grad_norm": 29.46512794494629, + "learning_rate": 5e-05, + "loss": 1.3688, + "num_input_tokens_seen": 282448448, + "step": 4222 + }, + { + "epoch": 0.47909219858156027, + "loss": 1.485614538192749, + "loss_ce": 0.0051457760855555534, + "loss_iou": 0.60546875, + "loss_num": 0.05419921875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 282448448, + "step": 4222 + }, + { + "epoch": 0.47920567375886525, + "grad_norm": 32.235008239746094, + "learning_rate": 5e-05, + "loss": 1.2484, + "num_input_tokens_seen": 282515100, + "step": 4223 + }, + { + "epoch": 0.47920567375886525, + "loss": 1.1622402667999268, + "loss_ce": 0.010628891177475452, + "loss_iou": 0.45703125, + "loss_num": 0.04736328125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 282515100, + "step": 4223 + }, + { + "epoch": 0.4793191489361702, + "grad_norm": 33.64689254760742, + "learning_rate": 5e-05, + "loss": 1.3637, + "num_input_tokens_seen": 282582080, + "step": 4224 + }, + { + "epoch": 0.4793191489361702, + "loss": 1.3755018711090088, + "loss_ce": 0.002454924862831831, + "loss_iou": 0.5859375, + "loss_num": 0.040771484375, + "loss_xval": 1.375, + "num_input_tokens_seen": 282582080, + "step": 4224 + }, + { + "epoch": 0.4794326241134752, + "grad_norm": 30.340967178344727, + "learning_rate": 5e-05, + "loss": 1.1796, + "num_input_tokens_seen": 282648684, + "step": 4225 + }, + { + "epoch": 0.4794326241134752, + "loss": 1.137489914894104, + "loss_ce": 0.004677378106862307, + "loss_iou": 0.455078125, + "loss_num": 0.044921875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 282648684, + "step": 4225 + }, + { + "epoch": 0.4795460992907801, + "grad_norm": 28.915430068969727, + "learning_rate": 5e-05, + "loss": 1.447, + "num_input_tokens_seen": 282715544, + "step": 4226 + }, + { + "epoch": 0.4795460992907801, + "loss": 1.494776725769043, + "loss_ce": 0.00454228650778532, + "loss_iou": 0.6484375, + "loss_num": 0.038818359375, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 282715544, + "step": 4226 + }, + { + "epoch": 0.4796595744680851, + "grad_norm": 21.45115852355957, + "learning_rate": 5e-05, + "loss": 1.2765, + "num_input_tokens_seen": 282781760, + "step": 4227 + }, + { + "epoch": 0.4796595744680851, + "loss": 1.1200170516967773, + "loss_ce": 0.007224020082503557, + "loss_iou": 0.490234375, + "loss_num": 0.02685546875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 282781760, + "step": 4227 + }, + { + "epoch": 0.4797730496453901, + "grad_norm": 15.960476875305176, + "learning_rate": 5e-05, + "loss": 1.0076, + "num_input_tokens_seen": 282847784, + "step": 4228 + }, + { + "epoch": 0.4797730496453901, + "loss": 1.30951726436615, + "loss_ce": 0.005318013019859791, + "loss_iou": 0.57421875, + "loss_num": 0.031494140625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 282847784, + "step": 4228 + }, + { + "epoch": 0.47988652482269506, + "grad_norm": 15.94096565246582, + "learning_rate": 5e-05, + "loss": 1.1527, + "num_input_tokens_seen": 282914524, + "step": 4229 + }, + { + "epoch": 0.47988652482269506, + "loss": 1.1662839651107788, + "loss_ce": 0.007592551875859499, + "loss_iou": 0.447265625, + "loss_num": 0.05322265625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 282914524, + "step": 4229 + }, + { + "epoch": 0.48, + "grad_norm": 19.1079158782959, + "learning_rate": 5e-05, + "loss": 1.2457, + "num_input_tokens_seen": 282981356, + "step": 4230 + }, + { + "epoch": 0.48, + "loss": 1.195495843887329, + "loss_ce": 0.006775196176022291, + "loss_iou": 0.50390625, + "loss_num": 0.035888671875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 282981356, + "step": 4230 + }, + { + "epoch": 0.48011347517730496, + "grad_norm": 33.9454345703125, + "learning_rate": 5e-05, + "loss": 1.2491, + "num_input_tokens_seen": 283048892, + "step": 4231 + }, + { + "epoch": 0.48011347517730496, + "loss": 1.370988368988037, + "loss_ce": 0.005753928795456886, + "loss_iou": 0.546875, + "loss_num": 0.053466796875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 283048892, + "step": 4231 + }, + { + "epoch": 0.48022695035460994, + "grad_norm": 33.884002685546875, + "learning_rate": 5e-05, + "loss": 1.3507, + "num_input_tokens_seen": 283115960, + "step": 4232 + }, + { + "epoch": 0.48022695035460994, + "loss": 1.399782657623291, + "loss_ce": 0.0052513377740979195, + "loss_iou": 0.59765625, + "loss_num": 0.0400390625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 283115960, + "step": 4232 + }, + { + "epoch": 0.4803404255319149, + "grad_norm": 19.925464630126953, + "learning_rate": 5e-05, + "loss": 1.1319, + "num_input_tokens_seen": 283181992, + "step": 4233 + }, + { + "epoch": 0.4803404255319149, + "loss": 0.8758466839790344, + "loss_ce": 0.004997067619115114, + "loss_iou": 0.310546875, + "loss_num": 0.0498046875, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 283181992, + "step": 4233 + }, + { + "epoch": 0.48045390070921984, + "grad_norm": 25.10178565979004, + "learning_rate": 5e-05, + "loss": 1.182, + "num_input_tokens_seen": 283249280, + "step": 4234 + }, + { + "epoch": 0.48045390070921984, + "loss": 1.1508874893188477, + "loss_ce": 0.004891378339380026, + "loss_iou": 0.46484375, + "loss_num": 0.043701171875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 283249280, + "step": 4234 + }, + { + "epoch": 0.4805673758865248, + "grad_norm": 30.064939498901367, + "learning_rate": 5e-05, + "loss": 0.9499, + "num_input_tokens_seen": 283315840, + "step": 4235 + }, + { + "epoch": 0.4805673758865248, + "loss": 0.9326390027999878, + "loss_ce": 0.004416340962052345, + "loss_iou": 0.375, + "loss_num": 0.034912109375, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 283315840, + "step": 4235 + }, + { + "epoch": 0.4806808510638298, + "grad_norm": 26.098739624023438, + "learning_rate": 5e-05, + "loss": 1.4239, + "num_input_tokens_seen": 283382196, + "step": 4236 + }, + { + "epoch": 0.4806808510638298, + "loss": 1.4105421304702759, + "loss_ce": 0.004292120225727558, + "loss_iou": 0.57421875, + "loss_num": 0.05224609375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 283382196, + "step": 4236 + }, + { + "epoch": 0.4807943262411348, + "grad_norm": 28.81708526611328, + "learning_rate": 5e-05, + "loss": 1.1705, + "num_input_tokens_seen": 283449372, + "step": 4237 + }, + { + "epoch": 0.4807943262411348, + "loss": 1.2667229175567627, + "loss_ce": 0.005004175938665867, + "loss_iou": 0.52734375, + "loss_num": 0.041748046875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 283449372, + "step": 4237 + }, + { + "epoch": 0.4809078014184397, + "grad_norm": 26.752195358276367, + "learning_rate": 5e-05, + "loss": 1.1829, + "num_input_tokens_seen": 283516384, + "step": 4238 + }, + { + "epoch": 0.4809078014184397, + "loss": 1.144392728805542, + "loss_ce": 0.00279117189347744, + "loss_iou": 0.49609375, + "loss_num": 0.0296630859375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 283516384, + "step": 4238 + }, + { + "epoch": 0.4810212765957447, + "grad_norm": 26.28949546813965, + "learning_rate": 5e-05, + "loss": 1.025, + "num_input_tokens_seen": 283583592, + "step": 4239 + }, + { + "epoch": 0.4810212765957447, + "loss": 1.1188108921051025, + "loss_ce": 0.006750414147973061, + "loss_iou": 0.4453125, + "loss_num": 0.0439453125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 283583592, + "step": 4239 + }, + { + "epoch": 0.48113475177304965, + "grad_norm": 109.1910400390625, + "learning_rate": 5e-05, + "loss": 1.3616, + "num_input_tokens_seen": 283650552, + "step": 4240 + }, + { + "epoch": 0.48113475177304965, + "loss": 1.3435403108596802, + "loss_ce": 0.006626244634389877, + "loss_iou": 0.47265625, + "loss_num": 0.07861328125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 283650552, + "step": 4240 + }, + { + "epoch": 0.48124822695035463, + "grad_norm": 25.393508911132812, + "learning_rate": 5e-05, + "loss": 1.109, + "num_input_tokens_seen": 283717848, + "step": 4241 + }, + { + "epoch": 0.48124822695035463, + "loss": 1.0947043895721436, + "loss_ce": 0.006325509864836931, + "loss_iou": 0.47265625, + "loss_num": 0.0283203125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 283717848, + "step": 4241 + }, + { + "epoch": 0.48136170212765955, + "grad_norm": 29.206092834472656, + "learning_rate": 5e-05, + "loss": 1.1672, + "num_input_tokens_seen": 283783912, + "step": 4242 + }, + { + "epoch": 0.48136170212765955, + "loss": 1.4463226795196533, + "loss_ce": 0.004916508682072163, + "loss_iou": 0.5703125, + "loss_num": 0.0595703125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 283783912, + "step": 4242 + }, + { + "epoch": 0.48147517730496453, + "grad_norm": 47.9396858215332, + "learning_rate": 5e-05, + "loss": 1.4811, + "num_input_tokens_seen": 283850792, + "step": 4243 + }, + { + "epoch": 0.48147517730496453, + "loss": 1.5609972476959229, + "loss_ce": 0.0033801025710999966, + "loss_iou": 0.609375, + "loss_num": 0.068359375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 283850792, + "step": 4243 + }, + { + "epoch": 0.4815886524822695, + "grad_norm": 22.763704299926758, + "learning_rate": 5e-05, + "loss": 1.263, + "num_input_tokens_seen": 283917564, + "step": 4244 + }, + { + "epoch": 0.4815886524822695, + "loss": 1.0971834659576416, + "loss_ce": 0.004898404236882925, + "loss_iou": 0.4765625, + "loss_num": 0.0277099609375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 283917564, + "step": 4244 + }, + { + "epoch": 0.4817021276595745, + "grad_norm": 23.305253982543945, + "learning_rate": 5e-05, + "loss": 1.1387, + "num_input_tokens_seen": 283984836, + "step": 4245 + }, + { + "epoch": 0.4817021276595745, + "loss": 1.1642999649047852, + "loss_ce": 0.0058527193032205105, + "loss_iou": 0.46875, + "loss_num": 0.0439453125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 283984836, + "step": 4245 + }, + { + "epoch": 0.4818156028368794, + "grad_norm": 37.793766021728516, + "learning_rate": 5e-05, + "loss": 1.3889, + "num_input_tokens_seen": 284051884, + "step": 4246 + }, + { + "epoch": 0.4818156028368794, + "loss": 1.4617576599121094, + "loss_ce": 0.007656117435544729, + "loss_iou": 0.59375, + "loss_num": 0.052490234375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 284051884, + "step": 4246 + }, + { + "epoch": 0.4819290780141844, + "grad_norm": 25.192180633544922, + "learning_rate": 5e-05, + "loss": 1.4667, + "num_input_tokens_seen": 284118416, + "step": 4247 + }, + { + "epoch": 0.4819290780141844, + "loss": 1.4008350372314453, + "loss_ce": 0.012163037434220314, + "loss_iou": 0.625, + "loss_num": 0.0274658203125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 284118416, + "step": 4247 + }, + { + "epoch": 0.48204255319148936, + "grad_norm": 18.195697784423828, + "learning_rate": 5e-05, + "loss": 1.1902, + "num_input_tokens_seen": 284185508, + "step": 4248 + }, + { + "epoch": 0.48204255319148936, + "loss": 1.0433263778686523, + "loss_ce": 0.005728753749281168, + "loss_iou": 0.400390625, + "loss_num": 0.046630859375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 284185508, + "step": 4248 + }, + { + "epoch": 0.48215602836879434, + "grad_norm": 46.79270553588867, + "learning_rate": 5e-05, + "loss": 1.222, + "num_input_tokens_seen": 284252908, + "step": 4249 + }, + { + "epoch": 0.48215602836879434, + "loss": 1.0310901403427124, + "loss_ce": 0.00374641758389771, + "loss_iou": 0.455078125, + "loss_num": 0.0230712890625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 284252908, + "step": 4249 + }, + { + "epoch": 0.48226950354609927, + "grad_norm": 29.374067306518555, + "learning_rate": 5e-05, + "loss": 1.4158, + "num_input_tokens_seen": 284319696, + "step": 4250 + }, + { + "epoch": 0.48226950354609927, + "eval_seeclick_CIoU": 0.3321220427751541, + "eval_seeclick_GIoU": 0.3155318945646286, + "eval_seeclick_IoU": 0.4363700747489929, + "eval_seeclick_MAE_all": 0.16828463971614838, + "eval_seeclick_MAE_h": 0.06466059572994709, + "eval_seeclick_MAE_w": 0.1265251263976097, + "eval_seeclick_MAE_x_boxes": 0.28697536885738373, + "eval_seeclick_MAE_y_boxes": 0.1285783387720585, + "eval_seeclick_NUM_probability": 0.9999496936798096, + "eval_seeclick_inside_bbox": 0.581250011920929, + "eval_seeclick_loss": 2.643577814102173, + "eval_seeclick_loss_ce": 0.01454479107633233, + "eval_seeclick_loss_iou": 0.88775634765625, + "eval_seeclick_loss_num": 0.1656036376953125, + "eval_seeclick_loss_xval": 2.6044921875, + "eval_seeclick_runtime": 69.7655, + "eval_seeclick_samples_per_second": 0.674, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 284319696, + "step": 4250 + }, + { + "epoch": 0.48226950354609927, + "eval_icons_CIoU": 0.5003200024366379, + "eval_icons_GIoU": 0.49423976242542267, + "eval_icons_IoU": 0.5436735451221466, + "eval_icons_MAE_all": 0.12231143191456795, + "eval_icons_MAE_h": 0.06414977088570595, + "eval_icons_MAE_w": 0.11843692883849144, + "eval_icons_MAE_x_boxes": 0.11095371097326279, + "eval_icons_MAE_y_boxes": 0.07830289378762245, + "eval_icons_NUM_probability": 0.9999605715274811, + "eval_icons_inside_bbox": 0.7517361044883728, + "eval_icons_loss": 2.295686960220337, + "eval_icons_loss_ce": 5.628684630210046e-05, + "eval_icons_loss_iou": 0.837646484375, + "eval_icons_loss_num": 0.12469863891601562, + "eval_icons_loss_xval": 2.29931640625, + "eval_icons_runtime": 75.2342, + "eval_icons_samples_per_second": 0.665, + "eval_icons_steps_per_second": 0.027, + "num_input_tokens_seen": 284319696, + "step": 4250 + }, + { + "epoch": 0.48226950354609927, + "eval_screenspot_CIoU": 0.27183669805526733, + "eval_screenspot_GIoU": 0.24517181515693665, + "eval_screenspot_IoU": 0.3797083596388499, + "eval_screenspot_MAE_all": 0.21442816654841104, + "eval_screenspot_MAE_h": 0.1396976113319397, + "eval_screenspot_MAE_w": 0.16656397779782614, + "eval_screenspot_MAE_x_boxes": 0.33481132984161377, + "eval_screenspot_MAE_y_boxes": 0.10401782393455505, + "eval_screenspot_NUM_probability": 0.9991532564163208, + "eval_screenspot_inside_bbox": 0.5600000023841858, + "eval_screenspot_loss": 2.981729507446289, + "eval_screenspot_loss_ce": 0.01582820589343707, + "eval_screenspot_loss_iou": 0.9578450520833334, + "eval_screenspot_loss_num": 0.22316487630208334, + "eval_screenspot_loss_xval": 3.0322265625, + "eval_screenspot_runtime": 133.9877, + "eval_screenspot_samples_per_second": 0.664, + "eval_screenspot_steps_per_second": 0.022, + "num_input_tokens_seen": 284319696, + "step": 4250 + }, + { + "epoch": 0.48226950354609927, + "eval_compot_CIoU": 0.2988656312227249, + "eval_compot_GIoU": 0.2410954087972641, + "eval_compot_IoU": 0.38198618590831757, + "eval_compot_MAE_all": 0.23630134761333466, + "eval_compot_MAE_h": 0.16733763366937637, + "eval_compot_MAE_w": 0.25012172758579254, + "eval_compot_MAE_x_boxes": 0.1870269849896431, + "eval_compot_MAE_y_boxes": 0.15294435620307922, + "eval_compot_NUM_probability": 0.9995971918106079, + "eval_compot_inside_bbox": 0.5121527910232544, + "eval_compot_loss": 3.1192941665649414, + "eval_compot_loss_ce": 0.003999393666163087, + "eval_compot_loss_iou": 0.96435546875, + "eval_compot_loss_num": 0.2408447265625, + "eval_compot_loss_xval": 3.134765625, + "eval_compot_runtime": 70.7727, + "eval_compot_samples_per_second": 0.706, + "eval_compot_steps_per_second": 0.028, + "num_input_tokens_seen": 284319696, + "step": 4250 + }, + { + "epoch": 0.48226950354609927, + "loss": 3.0049357414245605, + "loss_ce": 0.0029824799858033657, + "loss_iou": 0.9453125, + "loss_num": 0.2216796875, + "loss_xval": 3.0, + "num_input_tokens_seen": 284319696, + "step": 4250 + }, + { + "epoch": 0.48238297872340424, + "grad_norm": 18.983362197875977, + "learning_rate": 5e-05, + "loss": 1.0211, + "num_input_tokens_seen": 284386384, + "step": 4251 + }, + { + "epoch": 0.48238297872340424, + "loss": 1.1059489250183105, + "loss_ce": 0.007316152565181255, + "loss_iou": 0.455078125, + "loss_num": 0.037841796875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 284386384, + "step": 4251 + }, + { + "epoch": 0.4824964539007092, + "grad_norm": 24.889606475830078, + "learning_rate": 5e-05, + "loss": 1.1247, + "num_input_tokens_seen": 284453240, + "step": 4252 + }, + { + "epoch": 0.4824964539007092, + "loss": 1.404988169670105, + "loss_ce": 0.0060623399913311005, + "loss_iou": 0.5546875, + "loss_num": 0.058349609375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 284453240, + "step": 4252 + }, + { + "epoch": 0.4826099290780142, + "grad_norm": 28.882896423339844, + "learning_rate": 5e-05, + "loss": 1.1342, + "num_input_tokens_seen": 284520796, + "step": 4253 + }, + { + "epoch": 0.4826099290780142, + "loss": 1.1493289470672607, + "loss_ce": 0.006750741042196751, + "loss_iou": 0.466796875, + "loss_num": 0.041748046875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 284520796, + "step": 4253 + }, + { + "epoch": 0.4827234042553191, + "grad_norm": 22.31285285949707, + "learning_rate": 5e-05, + "loss": 1.2682, + "num_input_tokens_seen": 284587748, + "step": 4254 + }, + { + "epoch": 0.4827234042553191, + "loss": 1.1992073059082031, + "loss_ce": 0.004383033141493797, + "loss_iou": 0.494140625, + "loss_num": 0.041259765625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 284587748, + "step": 4254 + }, + { + "epoch": 0.4828368794326241, + "grad_norm": 28.921493530273438, + "learning_rate": 5e-05, + "loss": 1.0564, + "num_input_tokens_seen": 284653788, + "step": 4255 + }, + { + "epoch": 0.4828368794326241, + "loss": 0.9860894083976746, + "loss_ce": 0.0041558146476745605, + "loss_iou": 0.404296875, + "loss_num": 0.034912109375, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 284653788, + "step": 4255 + }, + { + "epoch": 0.4829503546099291, + "grad_norm": 38.72172927856445, + "learning_rate": 5e-05, + "loss": 1.1691, + "num_input_tokens_seen": 284720340, + "step": 4256 + }, + { + "epoch": 0.4829503546099291, + "loss": 1.0537710189819336, + "loss_ce": 0.007872480899095535, + "loss_iou": 0.447265625, + "loss_num": 0.030517578125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 284720340, + "step": 4256 + }, + { + "epoch": 0.48306382978723406, + "grad_norm": 32.10224151611328, + "learning_rate": 5e-05, + "loss": 1.1641, + "num_input_tokens_seen": 284786532, + "step": 4257 + }, + { + "epoch": 0.48306382978723406, + "loss": 0.8688154816627502, + "loss_ce": 0.005000178702175617, + "loss_iou": 0.375, + "loss_num": 0.02294921875, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 284786532, + "step": 4257 + }, + { + "epoch": 0.48317730496453903, + "grad_norm": 25.954999923706055, + "learning_rate": 5e-05, + "loss": 1.4453, + "num_input_tokens_seen": 284853816, + "step": 4258 + }, + { + "epoch": 0.48317730496453903, + "loss": 1.4401419162750244, + "loss_ce": 0.006548096891492605, + "loss_iou": 0.59765625, + "loss_num": 0.048095703125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 284853816, + "step": 4258 + }, + { + "epoch": 0.48329078014184396, + "grad_norm": 27.332958221435547, + "learning_rate": 5e-05, + "loss": 0.9943, + "num_input_tokens_seen": 284920856, + "step": 4259 + }, + { + "epoch": 0.48329078014184396, + "loss": 0.8069685697555542, + "loss_ce": 0.005576929077506065, + "loss_iou": 0.337890625, + "loss_num": 0.0252685546875, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 284920856, + "step": 4259 + }, + { + "epoch": 0.48340425531914893, + "grad_norm": 38.501556396484375, + "learning_rate": 5e-05, + "loss": 1.2933, + "num_input_tokens_seen": 284988312, + "step": 4260 + }, + { + "epoch": 0.48340425531914893, + "loss": 1.2559196949005127, + "loss_ce": 0.006896221078932285, + "loss_iou": 0.5, + "loss_num": 0.049560546875, + "loss_xval": 1.25, + "num_input_tokens_seen": 284988312, + "step": 4260 + }, + { + "epoch": 0.4835177304964539, + "grad_norm": 33.297340393066406, + "learning_rate": 5e-05, + "loss": 1.188, + "num_input_tokens_seen": 285054256, + "step": 4261 + }, + { + "epoch": 0.4835177304964539, + "loss": 1.0591020584106445, + "loss_ce": 0.004902936983853579, + "loss_iou": 0.443359375, + "loss_num": 0.032958984375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 285054256, + "step": 4261 + }, + { + "epoch": 0.4836312056737589, + "grad_norm": 18.729427337646484, + "learning_rate": 5e-05, + "loss": 1.209, + "num_input_tokens_seen": 285121656, + "step": 4262 + }, + { + "epoch": 0.4836312056737589, + "loss": 1.1454250812530518, + "loss_ce": 0.004800037015229464, + "loss_iou": 0.4921875, + "loss_num": 0.03125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 285121656, + "step": 4262 + }, + { + "epoch": 0.4837446808510638, + "grad_norm": 31.484180450439453, + "learning_rate": 5e-05, + "loss": 1.3212, + "num_input_tokens_seen": 285189144, + "step": 4263 + }, + { + "epoch": 0.4837446808510638, + "loss": 1.2566792964935303, + "loss_ce": 0.005702691618353128, + "loss_iou": 0.50390625, + "loss_num": 0.04931640625, + "loss_xval": 1.25, + "num_input_tokens_seen": 285189144, + "step": 4263 + }, + { + "epoch": 0.4838581560283688, + "grad_norm": 16.370235443115234, + "learning_rate": 5e-05, + "loss": 1.2534, + "num_input_tokens_seen": 285255776, + "step": 4264 + }, + { + "epoch": 0.4838581560283688, + "loss": 1.1001032590866089, + "loss_ce": 0.012212665751576424, + "loss_iou": 0.4921875, + "loss_num": 0.020751953125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 285255776, + "step": 4264 + }, + { + "epoch": 0.48397163120567377, + "grad_norm": 14.999828338623047, + "learning_rate": 5e-05, + "loss": 1.0976, + "num_input_tokens_seen": 285321552, + "step": 4265 + }, + { + "epoch": 0.48397163120567377, + "loss": 1.2729785442352295, + "loss_ce": 0.0046679358929395676, + "loss_iou": 0.478515625, + "loss_num": 0.0625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 285321552, + "step": 4265 + }, + { + "epoch": 0.48408510638297875, + "grad_norm": 44.00773620605469, + "learning_rate": 5e-05, + "loss": 1.3266, + "num_input_tokens_seen": 285388500, + "step": 4266 + }, + { + "epoch": 0.48408510638297875, + "loss": 1.534632682800293, + "loss_ce": 0.007288920693099499, + "loss_iou": 0.59375, + "loss_num": 0.06787109375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 285388500, + "step": 4266 + }, + { + "epoch": 0.48419858156028367, + "grad_norm": 21.02521324157715, + "learning_rate": 5e-05, + "loss": 1.2991, + "num_input_tokens_seen": 285455012, + "step": 4267 + }, + { + "epoch": 0.48419858156028367, + "loss": 1.3322745561599731, + "loss_ce": 0.005126086063683033, + "loss_iou": 0.57421875, + "loss_num": 0.0361328125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 285455012, + "step": 4267 + }, + { + "epoch": 0.48431205673758865, + "grad_norm": 23.81041145324707, + "learning_rate": 5e-05, + "loss": 1.1254, + "num_input_tokens_seen": 285522236, + "step": 4268 + }, + { + "epoch": 0.48431205673758865, + "loss": 0.907573938369751, + "loss_ce": 0.0020563851576298475, + "loss_iou": 0.416015625, + "loss_num": 0.01458740234375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 285522236, + "step": 4268 + }, + { + "epoch": 0.4844255319148936, + "grad_norm": 35.2734375, + "learning_rate": 5e-05, + "loss": 1.2785, + "num_input_tokens_seen": 285589344, + "step": 4269 + }, + { + "epoch": 0.4844255319148936, + "loss": 1.0515124797821045, + "loss_ce": 0.005369868129491806, + "loss_iou": 0.427734375, + "loss_num": 0.037841796875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 285589344, + "step": 4269 + }, + { + "epoch": 0.4845390070921986, + "grad_norm": 43.730491638183594, + "learning_rate": 5e-05, + "loss": 1.2491, + "num_input_tokens_seen": 285655656, + "step": 4270 + }, + { + "epoch": 0.4845390070921986, + "loss": 1.2773876190185547, + "loss_ce": 0.007856281474232674, + "loss_iou": 0.51171875, + "loss_num": 0.050048828125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 285655656, + "step": 4270 + }, + { + "epoch": 0.4846524822695035, + "grad_norm": 17.307323455810547, + "learning_rate": 5e-05, + "loss": 1.4117, + "num_input_tokens_seen": 285722604, + "step": 4271 + }, + { + "epoch": 0.4846524822695035, + "loss": 1.255041241645813, + "loss_ce": 0.006506090518087149, + "loss_iou": 0.5, + "loss_num": 0.049072265625, + "loss_xval": 1.25, + "num_input_tokens_seen": 285722604, + "step": 4271 + }, + { + "epoch": 0.4847659574468085, + "grad_norm": 18.566781997680664, + "learning_rate": 5e-05, + "loss": 0.8399, + "num_input_tokens_seen": 285788120, + "step": 4272 + }, + { + "epoch": 0.4847659574468085, + "loss": 0.9281014204025269, + "loss_ce": 0.005005734506994486, + "loss_iou": 0.40234375, + "loss_num": 0.02392578125, + "loss_xval": 0.921875, + "num_input_tokens_seen": 285788120, + "step": 4272 + }, + { + "epoch": 0.4848794326241135, + "grad_norm": 20.312557220458984, + "learning_rate": 5e-05, + "loss": 1.0564, + "num_input_tokens_seen": 285855044, + "step": 4273 + }, + { + "epoch": 0.4848794326241135, + "loss": 0.9772689938545227, + "loss_ce": 0.0046127126552164555, + "loss_iou": 0.41796875, + "loss_num": 0.0272216796875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 285855044, + "step": 4273 + }, + { + "epoch": 0.48499290780141846, + "grad_norm": 16.505022048950195, + "learning_rate": 5e-05, + "loss": 1.1335, + "num_input_tokens_seen": 285919956, + "step": 4274 + }, + { + "epoch": 0.48499290780141846, + "loss": 0.9381042718887329, + "loss_ce": 0.007440220098942518, + "loss_iou": 0.380859375, + "loss_num": 0.03369140625, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 285919956, + "step": 4274 + }, + { + "epoch": 0.4851063829787234, + "grad_norm": 13.53099250793457, + "learning_rate": 5e-05, + "loss": 1.0865, + "num_input_tokens_seen": 285986756, + "step": 4275 + }, + { + "epoch": 0.4851063829787234, + "loss": 1.0259168148040771, + "loss_ce": 0.005897384136915207, + "loss_iou": 0.3671875, + "loss_num": 0.0576171875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 285986756, + "step": 4275 + }, + { + "epoch": 0.48521985815602836, + "grad_norm": 19.36068344116211, + "learning_rate": 5e-05, + "loss": 1.306, + "num_input_tokens_seen": 286054328, + "step": 4276 + }, + { + "epoch": 0.48521985815602836, + "loss": 1.579927921295166, + "loss_ce": 0.00473267026245594, + "loss_iou": 0.6328125, + "loss_num": 0.0625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 286054328, + "step": 4276 + }, + { + "epoch": 0.48533333333333334, + "grad_norm": 23.781370162963867, + "learning_rate": 5e-05, + "loss": 1.1919, + "num_input_tokens_seen": 286121280, + "step": 4277 + }, + { + "epoch": 0.48533333333333334, + "loss": 1.240849256515503, + "loss_ce": 0.02209930308163166, + "loss_iou": 0.458984375, + "loss_num": 0.060302734375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 286121280, + "step": 4277 + }, + { + "epoch": 0.4854468085106383, + "grad_norm": 27.022789001464844, + "learning_rate": 5e-05, + "loss": 1.362, + "num_input_tokens_seen": 286188676, + "step": 4278 + }, + { + "epoch": 0.4854468085106383, + "loss": 1.1386492252349854, + "loss_ce": 0.006325049325823784, + "loss_iou": 0.453125, + "loss_num": 0.045654296875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 286188676, + "step": 4278 + }, + { + "epoch": 0.48556028368794324, + "grad_norm": 19.29509735107422, + "learning_rate": 5e-05, + "loss": 0.9355, + "num_input_tokens_seen": 286255224, + "step": 4279 + }, + { + "epoch": 0.48556028368794324, + "loss": 0.8976218700408936, + "loss_ce": 0.004799575544893742, + "loss_iou": 0.34765625, + "loss_num": 0.039794921875, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 286255224, + "step": 4279 + }, + { + "epoch": 0.4856737588652482, + "grad_norm": 24.315519332885742, + "learning_rate": 5e-05, + "loss": 1.1082, + "num_input_tokens_seen": 286321952, + "step": 4280 + }, + { + "epoch": 0.4856737588652482, + "loss": 1.1864936351776123, + "loss_ce": 0.004852993413805962, + "loss_iou": 0.498046875, + "loss_num": 0.037109375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 286321952, + "step": 4280 + }, + { + "epoch": 0.4857872340425532, + "grad_norm": 32.58725357055664, + "learning_rate": 5e-05, + "loss": 1.1739, + "num_input_tokens_seen": 286388420, + "step": 4281 + }, + { + "epoch": 0.4857872340425532, + "loss": 1.0135012865066528, + "loss_ce": 0.008130253292620182, + "loss_iou": 0.4140625, + "loss_num": 0.03515625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 286388420, + "step": 4281 + }, + { + "epoch": 0.4859007092198582, + "grad_norm": 33.800167083740234, + "learning_rate": 5e-05, + "loss": 1.051, + "num_input_tokens_seen": 286455120, + "step": 4282 + }, + { + "epoch": 0.4859007092198582, + "loss": 1.0365924835205078, + "loss_ce": 0.006807289086282253, + "loss_iou": 0.421875, + "loss_num": 0.037109375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 286455120, + "step": 4282 + }, + { + "epoch": 0.4860141843971631, + "grad_norm": 35.228153228759766, + "learning_rate": 5e-05, + "loss": 1.138, + "num_input_tokens_seen": 286522272, + "step": 4283 + }, + { + "epoch": 0.4860141843971631, + "loss": 1.028578519821167, + "loss_ce": 0.006117490120232105, + "loss_iou": 0.43359375, + "loss_num": 0.031494140625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 286522272, + "step": 4283 + }, + { + "epoch": 0.4861276595744681, + "grad_norm": 22.40202522277832, + "learning_rate": 5e-05, + "loss": 1.276, + "num_input_tokens_seen": 286589160, + "step": 4284 + }, + { + "epoch": 0.4861276595744681, + "loss": 1.2872755527496338, + "loss_ce": 0.006513867527246475, + "loss_iou": 0.52734375, + "loss_num": 0.0458984375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 286589160, + "step": 4284 + }, + { + "epoch": 0.48624113475177305, + "grad_norm": 23.907447814941406, + "learning_rate": 5e-05, + "loss": 1.0723, + "num_input_tokens_seen": 286656832, + "step": 4285 + }, + { + "epoch": 0.48624113475177305, + "loss": 1.037381887435913, + "loss_ce": 0.002713962458074093, + "loss_iou": 0.45703125, + "loss_num": 0.024169921875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 286656832, + "step": 4285 + }, + { + "epoch": 0.48635460992907803, + "grad_norm": 27.449317932128906, + "learning_rate": 5e-05, + "loss": 1.2437, + "num_input_tokens_seen": 286724164, + "step": 4286 + }, + { + "epoch": 0.48635460992907803, + "loss": 1.4704258441925049, + "loss_ce": 0.007535151671618223, + "loss_iou": 0.52734375, + "loss_num": 0.08203125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 286724164, + "step": 4286 + }, + { + "epoch": 0.48646808510638295, + "grad_norm": 26.075572967529297, + "learning_rate": 5e-05, + "loss": 1.2958, + "num_input_tokens_seen": 286790316, + "step": 4287 + }, + { + "epoch": 0.48646808510638295, + "loss": 0.9754296541213989, + "loss_ce": 0.004604454152286053, + "loss_iou": 0.384765625, + "loss_num": 0.040283203125, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 286790316, + "step": 4287 + }, + { + "epoch": 0.48658156028368793, + "grad_norm": 39.317466735839844, + "learning_rate": 5e-05, + "loss": 1.278, + "num_input_tokens_seen": 286856796, + "step": 4288 + }, + { + "epoch": 0.48658156028368793, + "loss": 1.3103638887405396, + "loss_ce": 0.004699823446571827, + "loss_iou": 0.5078125, + "loss_num": 0.05712890625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 286856796, + "step": 4288 + }, + { + "epoch": 0.4866950354609929, + "grad_norm": 29.171873092651367, + "learning_rate": 5e-05, + "loss": 1.3707, + "num_input_tokens_seen": 286923508, + "step": 4289 + }, + { + "epoch": 0.4866950354609929, + "loss": 1.0781816244125366, + "loss_ce": 0.005595540627837181, + "loss_iou": 0.46484375, + "loss_num": 0.0284423828125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 286923508, + "step": 4289 + }, + { + "epoch": 0.4868085106382979, + "grad_norm": 13.940112113952637, + "learning_rate": 5e-05, + "loss": 1.1319, + "num_input_tokens_seen": 286990492, + "step": 4290 + }, + { + "epoch": 0.4868085106382979, + "loss": 1.11873197555542, + "loss_ce": 0.00545062031596899, + "loss_iou": 0.447265625, + "loss_num": 0.0439453125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 286990492, + "step": 4290 + }, + { + "epoch": 0.48692198581560286, + "grad_norm": 20.146665573120117, + "learning_rate": 5e-05, + "loss": 1.1701, + "num_input_tokens_seen": 287057676, + "step": 4291 + }, + { + "epoch": 0.48692198581560286, + "loss": 1.1506328582763672, + "loss_ce": 0.006223694421350956, + "loss_iou": 0.46875, + "loss_num": 0.04150390625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 287057676, + "step": 4291 + }, + { + "epoch": 0.4870354609929078, + "grad_norm": 27.98824119567871, + "learning_rate": 5e-05, + "loss": 1.1774, + "num_input_tokens_seen": 287124596, + "step": 4292 + }, + { + "epoch": 0.4870354609929078, + "loss": 1.0259902477264404, + "loss_ce": 0.0020643980242311954, + "loss_iou": 0.4375, + "loss_num": 0.02978515625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 287124596, + "step": 4292 + }, + { + "epoch": 0.48714893617021277, + "grad_norm": 23.8153133392334, + "learning_rate": 5e-05, + "loss": 1.2372, + "num_input_tokens_seen": 287192228, + "step": 4293 + }, + { + "epoch": 0.48714893617021277, + "loss": 1.204078197479248, + "loss_ce": 0.004508519545197487, + "loss_iou": 0.5078125, + "loss_num": 0.0361328125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 287192228, + "step": 4293 + }, + { + "epoch": 0.48726241134751774, + "grad_norm": 32.173667907714844, + "learning_rate": 5e-05, + "loss": 1.2227, + "num_input_tokens_seen": 287258508, + "step": 4294 + }, + { + "epoch": 0.48726241134751774, + "loss": 1.4714455604553223, + "loss_ce": 0.005625337362289429, + "loss_iou": 0.58203125, + "loss_num": 0.06005859375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 287258508, + "step": 4294 + }, + { + "epoch": 0.4873758865248227, + "grad_norm": 27.807056427001953, + "learning_rate": 5e-05, + "loss": 1.5055, + "num_input_tokens_seen": 287323288, + "step": 4295 + }, + { + "epoch": 0.4873758865248227, + "loss": 1.4223177433013916, + "loss_ce": 0.005325579550117254, + "loss_iou": 0.5859375, + "loss_num": 0.048828125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 287323288, + "step": 4295 + }, + { + "epoch": 0.48748936170212764, + "grad_norm": 21.6838321685791, + "learning_rate": 5e-05, + "loss": 0.9216, + "num_input_tokens_seen": 287390060, + "step": 4296 + }, + { + "epoch": 0.48748936170212764, + "loss": 0.8896936178207397, + "loss_ce": 0.0023034741170704365, + "loss_iou": 0.380859375, + "loss_num": 0.0255126953125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 287390060, + "step": 4296 + }, + { + "epoch": 0.4876028368794326, + "grad_norm": 23.019105911254883, + "learning_rate": 5e-05, + "loss": 1.2826, + "num_input_tokens_seen": 287455492, + "step": 4297 + }, + { + "epoch": 0.4876028368794326, + "loss": 1.440692663192749, + "loss_ce": 0.008075462654232979, + "loss_iou": 0.58203125, + "loss_num": 0.053466796875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 287455492, + "step": 4297 + }, + { + "epoch": 0.4877163120567376, + "grad_norm": 68.45902252197266, + "learning_rate": 5e-05, + "loss": 1.4853, + "num_input_tokens_seen": 287523008, + "step": 4298 + }, + { + "epoch": 0.4877163120567376, + "loss": 1.5332659482955933, + "loss_ce": 0.004945684690028429, + "loss_iou": 0.6171875, + "loss_num": 0.058349609375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 287523008, + "step": 4298 + }, + { + "epoch": 0.4878297872340426, + "grad_norm": 18.30952262878418, + "learning_rate": 5e-05, + "loss": 1.1351, + "num_input_tokens_seen": 287590044, + "step": 4299 + }, + { + "epoch": 0.4878297872340426, + "loss": 0.9631259441375732, + "loss_ce": 0.003729564603418112, + "loss_iou": 0.400390625, + "loss_num": 0.03173828125, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 287590044, + "step": 4299 + }, + { + "epoch": 0.4879432624113475, + "grad_norm": 19.10336685180664, + "learning_rate": 5e-05, + "loss": 1.3065, + "num_input_tokens_seen": 287656888, + "step": 4300 + }, + { + "epoch": 0.4879432624113475, + "loss": 1.3505154848098755, + "loss_ce": 0.006277188658714294, + "loss_iou": 0.50390625, + "loss_num": 0.0673828125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 287656888, + "step": 4300 + }, + { + "epoch": 0.4880567375886525, + "grad_norm": 25.264833450317383, + "learning_rate": 5e-05, + "loss": 1.07, + "num_input_tokens_seen": 287723200, + "step": 4301 + }, + { + "epoch": 0.4880567375886525, + "loss": 0.9603840708732605, + "loss_ce": 0.004573564976453781, + "loss_iou": 0.3828125, + "loss_num": 0.0380859375, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 287723200, + "step": 4301 + }, + { + "epoch": 0.48817021276595746, + "grad_norm": 34.78756332397461, + "learning_rate": 5e-05, + "loss": 1.1042, + "num_input_tokens_seen": 287789632, + "step": 4302 + }, + { + "epoch": 0.48817021276595746, + "loss": 1.115036964416504, + "loss_ce": 0.005661921575665474, + "loss_iou": 0.48046875, + "loss_num": 0.0301513671875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 287789632, + "step": 4302 + }, + { + "epoch": 0.48828368794326243, + "grad_norm": 23.31968116760254, + "learning_rate": 5e-05, + "loss": 1.4794, + "num_input_tokens_seen": 287856440, + "step": 4303 + }, + { + "epoch": 0.48828368794326243, + "loss": 1.440730333328247, + "loss_ce": 0.00664830207824707, + "loss_iou": 0.56640625, + "loss_num": 0.059326171875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 287856440, + "step": 4303 + }, + { + "epoch": 0.48839716312056736, + "grad_norm": 15.546703338623047, + "learning_rate": 5e-05, + "loss": 1.2522, + "num_input_tokens_seen": 287923148, + "step": 4304 + }, + { + "epoch": 0.48839716312056736, + "loss": 1.4014899730682373, + "loss_ce": 0.007935374975204468, + "loss_iou": 0.51953125, + "loss_num": 0.0712890625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 287923148, + "step": 4304 + }, + { + "epoch": 0.48851063829787233, + "grad_norm": 29.58335304260254, + "learning_rate": 5e-05, + "loss": 1.0408, + "num_input_tokens_seen": 287990592, + "step": 4305 + }, + { + "epoch": 0.48851063829787233, + "loss": 1.0480345487594604, + "loss_ce": 0.006286493502557278, + "loss_iou": 0.400390625, + "loss_num": 0.048095703125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 287990592, + "step": 4305 + }, + { + "epoch": 0.4886241134751773, + "grad_norm": 16.107358932495117, + "learning_rate": 5e-05, + "loss": 1.008, + "num_input_tokens_seen": 288058364, + "step": 4306 + }, + { + "epoch": 0.4886241134751773, + "loss": 1.0133939981460571, + "loss_ce": 0.0031400606967508793, + "loss_iou": 0.421875, + "loss_num": 0.03369140625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 288058364, + "step": 4306 + }, + { + "epoch": 0.4887375886524823, + "grad_norm": 25.739465713500977, + "learning_rate": 5e-05, + "loss": 1.2075, + "num_input_tokens_seen": 288124996, + "step": 4307 + }, + { + "epoch": 0.4887375886524823, + "loss": 1.3490848541259766, + "loss_ce": 0.007043870165944099, + "loss_iou": 0.5234375, + "loss_num": 0.059326171875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 288124996, + "step": 4307 + }, + { + "epoch": 0.4888510638297872, + "grad_norm": 33.522361755371094, + "learning_rate": 5e-05, + "loss": 1.1428, + "num_input_tokens_seen": 288191652, + "step": 4308 + }, + { + "epoch": 0.4888510638297872, + "loss": 0.9740229845046997, + "loss_ce": 0.005944376345723867, + "loss_iou": 0.4375, + "loss_num": 0.018310546875, + "loss_xval": 0.96875, + "num_input_tokens_seen": 288191652, + "step": 4308 + }, + { + "epoch": 0.4889645390070922, + "grad_norm": 29.62031364440918, + "learning_rate": 5e-05, + "loss": 1.3756, + "num_input_tokens_seen": 288258560, + "step": 4309 + }, + { + "epoch": 0.4889645390070922, + "loss": 1.1983425617218018, + "loss_ce": 0.0041287848725914955, + "loss_iou": 0.51171875, + "loss_num": 0.034423828125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 288258560, + "step": 4309 + }, + { + "epoch": 0.48907801418439717, + "grad_norm": 22.51309585571289, + "learning_rate": 5e-05, + "loss": 1.0372, + "num_input_tokens_seen": 288325128, + "step": 4310 + }, + { + "epoch": 0.48907801418439717, + "loss": 0.9681452512741089, + "loss_ce": 0.006749933585524559, + "loss_iou": 0.36328125, + "loss_num": 0.04736328125, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 288325128, + "step": 4310 + }, + { + "epoch": 0.48919148936170215, + "grad_norm": 20.083581924438477, + "learning_rate": 5e-05, + "loss": 1.2333, + "num_input_tokens_seen": 288392748, + "step": 4311 + }, + { + "epoch": 0.48919148936170215, + "loss": 1.0787086486816406, + "loss_ce": 0.00546643789857626, + "loss_iou": 0.48046875, + "loss_num": 0.0225830078125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 288392748, + "step": 4311 + }, + { + "epoch": 0.48930496453900707, + "grad_norm": 34.863365173339844, + "learning_rate": 5e-05, + "loss": 1.3722, + "num_input_tokens_seen": 288459044, + "step": 4312 + }, + { + "epoch": 0.48930496453900707, + "loss": 1.392508625984192, + "loss_ce": 0.007743064779788256, + "loss_iou": 0.546875, + "loss_num": 0.058837890625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 288459044, + "step": 4312 + }, + { + "epoch": 0.48941843971631205, + "grad_norm": 28.60311508178711, + "learning_rate": 5e-05, + "loss": 1.2492, + "num_input_tokens_seen": 288526652, + "step": 4313 + }, + { + "epoch": 0.48941843971631205, + "loss": 1.2319200038909912, + "loss_ce": 0.010850653052330017, + "loss_iou": 0.5234375, + "loss_num": 0.034912109375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 288526652, + "step": 4313 + }, + { + "epoch": 0.489531914893617, + "grad_norm": 19.08444595336914, + "learning_rate": 5e-05, + "loss": 1.0323, + "num_input_tokens_seen": 288593804, + "step": 4314 + }, + { + "epoch": 0.489531914893617, + "loss": 0.9820538759231567, + "loss_ce": 0.006345859728753567, + "loss_iou": 0.38671875, + "loss_num": 0.0400390625, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 288593804, + "step": 4314 + }, + { + "epoch": 0.489645390070922, + "grad_norm": 18.714982986450195, + "learning_rate": 5e-05, + "loss": 1.2104, + "num_input_tokens_seen": 288660100, + "step": 4315 + }, + { + "epoch": 0.489645390070922, + "loss": 1.049050211906433, + "loss_ce": 0.0053795622661709785, + "loss_iou": 0.412109375, + "loss_num": 0.04345703125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 288660100, + "step": 4315 + }, + { + "epoch": 0.4897588652482269, + "grad_norm": 34.65945053100586, + "learning_rate": 5e-05, + "loss": 1.199, + "num_input_tokens_seen": 288727156, + "step": 4316 + }, + { + "epoch": 0.4897588652482269, + "loss": 1.2105423212051392, + "loss_ce": 0.013276636600494385, + "loss_iou": 0.44140625, + "loss_num": 0.06298828125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 288727156, + "step": 4316 + }, + { + "epoch": 0.4898723404255319, + "grad_norm": 35.355506896972656, + "learning_rate": 5e-05, + "loss": 1.3636, + "num_input_tokens_seen": 288794352, + "step": 4317 + }, + { + "epoch": 0.4898723404255319, + "loss": 1.3979967832565308, + "loss_ce": 0.007371684070676565, + "loss_iou": 0.578125, + "loss_num": 0.04736328125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 288794352, + "step": 4317 + }, + { + "epoch": 0.4899858156028369, + "grad_norm": 14.701171875, + "learning_rate": 5e-05, + "loss": 1.1332, + "num_input_tokens_seen": 288860696, + "step": 4318 + }, + { + "epoch": 0.4899858156028369, + "loss": 1.1040395498275757, + "loss_ce": 0.0063833920285105705, + "loss_iou": 0.427734375, + "loss_num": 0.048583984375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 288860696, + "step": 4318 + }, + { + "epoch": 0.49009929078014186, + "grad_norm": 15.36877727508545, + "learning_rate": 5e-05, + "loss": 1.0091, + "num_input_tokens_seen": 288927696, + "step": 4319 + }, + { + "epoch": 0.49009929078014186, + "loss": 1.0285940170288086, + "loss_ce": 0.005156551953405142, + "loss_iou": 0.43359375, + "loss_num": 0.0311279296875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 288927696, + "step": 4319 + }, + { + "epoch": 0.4902127659574468, + "grad_norm": 17.054141998291016, + "learning_rate": 5e-05, + "loss": 1.0918, + "num_input_tokens_seen": 288993876, + "step": 4320 + }, + { + "epoch": 0.4902127659574468, + "loss": 1.2871400117874146, + "loss_ce": 0.006866630632430315, + "loss_iou": 0.51953125, + "loss_num": 0.0478515625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 288993876, + "step": 4320 + }, + { + "epoch": 0.49032624113475176, + "grad_norm": 20.446487426757812, + "learning_rate": 5e-05, + "loss": 1.1693, + "num_input_tokens_seen": 289061504, + "step": 4321 + }, + { + "epoch": 0.49032624113475176, + "loss": 1.0701916217803955, + "loss_ce": 0.005494298413395882, + "loss_iou": 0.453125, + "loss_num": 0.03173828125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 289061504, + "step": 4321 + }, + { + "epoch": 0.49043971631205674, + "grad_norm": 31.12103843688965, + "learning_rate": 5e-05, + "loss": 1.2327, + "num_input_tokens_seen": 289127860, + "step": 4322 + }, + { + "epoch": 0.49043971631205674, + "loss": 1.1811014413833618, + "loss_ce": 0.004831950645893812, + "loss_iou": 0.482421875, + "loss_num": 0.04248046875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 289127860, + "step": 4322 + }, + { + "epoch": 0.4905531914893617, + "grad_norm": 48.15183639526367, + "learning_rate": 5e-05, + "loss": 1.176, + "num_input_tokens_seen": 289194144, + "step": 4323 + }, + { + "epoch": 0.4905531914893617, + "loss": 0.9604634046554565, + "loss_ce": 0.0061177220195531845, + "loss_iou": 0.380859375, + "loss_num": 0.03857421875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 289194144, + "step": 4323 + }, + { + "epoch": 0.49066666666666664, + "grad_norm": 25.095882415771484, + "learning_rate": 5e-05, + "loss": 1.1181, + "num_input_tokens_seen": 289261784, + "step": 4324 + }, + { + "epoch": 0.49066666666666664, + "loss": 1.0678234100341797, + "loss_ce": 0.0023937602527439594, + "loss_iou": 0.458984375, + "loss_num": 0.029052734375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 289261784, + "step": 4324 + }, + { + "epoch": 0.4907801418439716, + "grad_norm": 14.807059288024902, + "learning_rate": 5e-05, + "loss": 1.2133, + "num_input_tokens_seen": 289329152, + "step": 4325 + }, + { + "epoch": 0.4907801418439716, + "loss": 1.178581953048706, + "loss_ce": 0.004997916519641876, + "loss_iou": 0.478515625, + "loss_num": 0.04296875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 289329152, + "step": 4325 + }, + { + "epoch": 0.4908936170212766, + "grad_norm": 10.963665962219238, + "learning_rate": 5e-05, + "loss": 1.0615, + "num_input_tokens_seen": 289396808, + "step": 4326 + }, + { + "epoch": 0.4908936170212766, + "loss": 1.0946584939956665, + "loss_ce": 0.00334990955889225, + "loss_iou": 0.455078125, + "loss_num": 0.0361328125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 289396808, + "step": 4326 + }, + { + "epoch": 0.4910070921985816, + "grad_norm": 12.562355041503906, + "learning_rate": 5e-05, + "loss": 1.0697, + "num_input_tokens_seen": 289464516, + "step": 4327 + }, + { + "epoch": 0.4910070921985816, + "loss": 1.0587661266326904, + "loss_ce": 0.008473258465528488, + "loss_iou": 0.447265625, + "loss_num": 0.031005859375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 289464516, + "step": 4327 + }, + { + "epoch": 0.49112056737588655, + "grad_norm": 27.426979064941406, + "learning_rate": 5e-05, + "loss": 0.9455, + "num_input_tokens_seen": 289531036, + "step": 4328 + }, + { + "epoch": 0.49112056737588655, + "loss": 1.0945175886154175, + "loss_ce": 0.007115256041288376, + "loss_iou": 0.427734375, + "loss_num": 0.046630859375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 289531036, + "step": 4328 + }, + { + "epoch": 0.4912340425531915, + "grad_norm": 37.822654724121094, + "learning_rate": 5e-05, + "loss": 1.076, + "num_input_tokens_seen": 289598052, + "step": 4329 + }, + { + "epoch": 0.4912340425531915, + "loss": 1.1055818796157837, + "loss_ce": 0.004995936527848244, + "loss_iou": 0.4921875, + "loss_num": 0.02392578125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 289598052, + "step": 4329 + }, + { + "epoch": 0.49134751773049645, + "grad_norm": 45.258506774902344, + "learning_rate": 5e-05, + "loss": 1.4677, + "num_input_tokens_seen": 289663744, + "step": 4330 + }, + { + "epoch": 0.49134751773049645, + "loss": 1.6507412195205688, + "loss_ce": 0.005233406089246273, + "loss_iou": 0.69140625, + "loss_num": 0.052734375, + "loss_xval": 1.6484375, + "num_input_tokens_seen": 289663744, + "step": 4330 + }, + { + "epoch": 0.49146099290780143, + "grad_norm": 25.199413299560547, + "learning_rate": 5e-05, + "loss": 1.1958, + "num_input_tokens_seen": 289730260, + "step": 4331 + }, + { + "epoch": 0.49146099290780143, + "loss": 1.0628609657287598, + "loss_ce": 0.004267178010195494, + "loss_iou": 0.453125, + "loss_num": 0.0303955078125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 289730260, + "step": 4331 + }, + { + "epoch": 0.4915744680851064, + "grad_norm": 18.43089485168457, + "learning_rate": 5e-05, + "loss": 1.1347, + "num_input_tokens_seen": 289797020, + "step": 4332 + }, + { + "epoch": 0.4915744680851064, + "loss": 1.1777658462524414, + "loss_ce": 0.0027170954272150993, + "loss_iou": 0.50390625, + "loss_num": 0.033447265625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 289797020, + "step": 4332 + }, + { + "epoch": 0.49168794326241133, + "grad_norm": 35.16505813598633, + "learning_rate": 5e-05, + "loss": 1.3898, + "num_input_tokens_seen": 289864036, + "step": 4333 + }, + { + "epoch": 0.49168794326241133, + "loss": 1.5758118629455566, + "loss_ce": 0.010382235050201416, + "loss_iou": 0.6015625, + "loss_num": 0.07275390625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 289864036, + "step": 4333 + }, + { + "epoch": 0.4918014184397163, + "grad_norm": 12.820944786071777, + "learning_rate": 5e-05, + "loss": 1.0059, + "num_input_tokens_seen": 289931064, + "step": 4334 + }, + { + "epoch": 0.4918014184397163, + "loss": 1.137856125831604, + "loss_ce": 0.004799520131200552, + "loss_iou": 0.462890625, + "loss_num": 0.041259765625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 289931064, + "step": 4334 + }, + { + "epoch": 0.4919148936170213, + "grad_norm": 23.036205291748047, + "learning_rate": 5e-05, + "loss": 1.1235, + "num_input_tokens_seen": 289997744, + "step": 4335 + }, + { + "epoch": 0.4919148936170213, + "loss": 0.929218053817749, + "loss_ce": 0.007343023084104061, + "loss_iou": 0.3828125, + "loss_num": 0.0311279296875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 289997744, + "step": 4335 + }, + { + "epoch": 0.49202836879432627, + "grad_norm": 36.165382385253906, + "learning_rate": 5e-05, + "loss": 1.4103, + "num_input_tokens_seen": 290064768, + "step": 4336 + }, + { + "epoch": 0.49202836879432627, + "loss": 1.6500167846679688, + "loss_ce": 0.006462186109274626, + "loss_iou": 0.6953125, + "loss_num": 0.050048828125, + "loss_xval": 1.640625, + "num_input_tokens_seen": 290064768, + "step": 4336 + }, + { + "epoch": 0.4921418439716312, + "grad_norm": 26.238168716430664, + "learning_rate": 5e-05, + "loss": 1.5558, + "num_input_tokens_seen": 290131672, + "step": 4337 + }, + { + "epoch": 0.4921418439716312, + "loss": 1.6116973161697388, + "loss_ce": 0.004275390412658453, + "loss_iou": 0.66796875, + "loss_num": 0.05419921875, + "loss_xval": 1.609375, + "num_input_tokens_seen": 290131672, + "step": 4337 + }, + { + "epoch": 0.49225531914893617, + "grad_norm": 14.669328689575195, + "learning_rate": 5e-05, + "loss": 0.8825, + "num_input_tokens_seen": 290197988, + "step": 4338 + }, + { + "epoch": 0.49225531914893617, + "loss": 0.8676000833511353, + "loss_ce": 0.0054785097017884254, + "loss_iou": 0.392578125, + "loss_num": 0.0157470703125, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 290197988, + "step": 4338 + }, + { + "epoch": 0.49236879432624114, + "grad_norm": 50.792301177978516, + "learning_rate": 5e-05, + "loss": 1.3958, + "num_input_tokens_seen": 290265528, + "step": 4339 + }, + { + "epoch": 0.49236879432624114, + "loss": 1.3285714387893677, + "loss_ce": 0.004840925335884094, + "loss_iou": 0.52734375, + "loss_num": 0.05322265625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 290265528, + "step": 4339 + }, + { + "epoch": 0.4924822695035461, + "grad_norm": 39.98979949951172, + "learning_rate": 5e-05, + "loss": 1.1612, + "num_input_tokens_seen": 290332492, + "step": 4340 + }, + { + "epoch": 0.4924822695035461, + "loss": 1.2686783075332642, + "loss_ce": 0.0053269341588020325, + "loss_iou": 0.51953125, + "loss_num": 0.04541015625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 290332492, + "step": 4340 + }, + { + "epoch": 0.49259574468085104, + "grad_norm": 21.476499557495117, + "learning_rate": 5e-05, + "loss": 1.4561, + "num_input_tokens_seen": 290399412, + "step": 4341 + }, + { + "epoch": 0.49259574468085104, + "loss": 1.2341742515563965, + "loss_ce": 0.006635097786784172, + "loss_iou": 0.4765625, + "loss_num": 0.055419921875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 290399412, + "step": 4341 + }, + { + "epoch": 0.492709219858156, + "grad_norm": 31.590595245361328, + "learning_rate": 5e-05, + "loss": 1.3773, + "num_input_tokens_seen": 290464760, + "step": 4342 + }, + { + "epoch": 0.492709219858156, + "loss": 1.2763972282409668, + "loss_ce": 0.005401101894676685, + "loss_iou": 0.51171875, + "loss_num": 0.049072265625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 290464760, + "step": 4342 + }, + { + "epoch": 0.492822695035461, + "grad_norm": 31.43642234802246, + "learning_rate": 5e-05, + "loss": 1.2512, + "num_input_tokens_seen": 290532452, + "step": 4343 + }, + { + "epoch": 0.492822695035461, + "loss": 1.3013439178466797, + "loss_ce": 0.006422044709324837, + "loss_iou": 0.5546875, + "loss_num": 0.0361328125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 290532452, + "step": 4343 + }, + { + "epoch": 0.492936170212766, + "grad_norm": 32.30893325805664, + "learning_rate": 5e-05, + "loss": 1.3378, + "num_input_tokens_seen": 290600112, + "step": 4344 + }, + { + "epoch": 0.492936170212766, + "loss": 1.3383533954620361, + "loss_ce": 0.005345663987100124, + "loss_iou": 0.5546875, + "loss_num": 0.044921875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 290600112, + "step": 4344 + }, + { + "epoch": 0.4930496453900709, + "grad_norm": 29.590343475341797, + "learning_rate": 5e-05, + "loss": 1.3757, + "num_input_tokens_seen": 290666196, + "step": 4345 + }, + { + "epoch": 0.4930496453900709, + "loss": 1.3120808601379395, + "loss_ce": 0.00397543516010046, + "loss_iou": 0.5625, + "loss_num": 0.036376953125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 290666196, + "step": 4345 + }, + { + "epoch": 0.4931631205673759, + "grad_norm": 30.988126754760742, + "learning_rate": 5e-05, + "loss": 1.2041, + "num_input_tokens_seen": 290733156, + "step": 4346 + }, + { + "epoch": 0.4931631205673759, + "loss": 1.3227548599243164, + "loss_ce": 0.009278316050767899, + "loss_iou": 0.515625, + "loss_num": 0.056640625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 290733156, + "step": 4346 + }, + { + "epoch": 0.49327659574468086, + "grad_norm": 18.69916534423828, + "learning_rate": 5e-05, + "loss": 1.1363, + "num_input_tokens_seen": 290799472, + "step": 4347 + }, + { + "epoch": 0.49327659574468086, + "loss": 1.1131978034973145, + "loss_ce": 0.004799364134669304, + "loss_iou": 0.453125, + "loss_num": 0.04052734375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 290799472, + "step": 4347 + }, + { + "epoch": 0.49339007092198583, + "grad_norm": 85.51101684570312, + "learning_rate": 5e-05, + "loss": 1.3096, + "num_input_tokens_seen": 290866344, + "step": 4348 + }, + { + "epoch": 0.49339007092198583, + "loss": 1.328491449356079, + "loss_ce": 0.009643730707466602, + "loss_iou": 0.5546875, + "loss_num": 0.041259765625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 290866344, + "step": 4348 + }, + { + "epoch": 0.49350354609929076, + "grad_norm": 27.343379974365234, + "learning_rate": 5e-05, + "loss": 1.2232, + "num_input_tokens_seen": 290932304, + "step": 4349 + }, + { + "epoch": 0.49350354609929076, + "loss": 1.2929407358169556, + "loss_ce": 0.00827277172356844, + "loss_iou": 0.53515625, + "loss_num": 0.043212890625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 290932304, + "step": 4349 + }, + { + "epoch": 0.49361702127659574, + "grad_norm": 19.883609771728516, + "learning_rate": 5e-05, + "loss": 1.1064, + "num_input_tokens_seen": 290999064, + "step": 4350 + }, + { + "epoch": 0.49361702127659574, + "loss": 1.166426658630371, + "loss_ce": 0.005293800495564938, + "loss_iou": 0.47265625, + "loss_num": 0.04345703125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 290999064, + "step": 4350 + }, + { + "epoch": 0.4937304964539007, + "grad_norm": 20.83009910583496, + "learning_rate": 5e-05, + "loss": 1.063, + "num_input_tokens_seen": 291067152, + "step": 4351 + }, + { + "epoch": 0.4937304964539007, + "loss": 1.1083742380142212, + "loss_ce": 0.007788278628140688, + "loss_iou": 0.478515625, + "loss_num": 0.02880859375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 291067152, + "step": 4351 + }, + { + "epoch": 0.4938439716312057, + "grad_norm": 28.71405029296875, + "learning_rate": 5e-05, + "loss": 1.059, + "num_input_tokens_seen": 291133796, + "step": 4352 + }, + { + "epoch": 0.4938439716312057, + "loss": 0.9916826486587524, + "loss_ce": 0.0034624869003891945, + "loss_iou": 0.396484375, + "loss_num": 0.039306640625, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 291133796, + "step": 4352 + }, + { + "epoch": 0.4939574468085106, + "grad_norm": 35.4056396484375, + "learning_rate": 5e-05, + "loss": 1.1163, + "num_input_tokens_seen": 291200624, + "step": 4353 + }, + { + "epoch": 0.4939574468085106, + "loss": 1.1721619367599487, + "loss_ce": 0.006146262399852276, + "loss_iou": 0.4765625, + "loss_num": 0.04248046875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 291200624, + "step": 4353 + }, + { + "epoch": 0.4940709219858156, + "grad_norm": 41.12137222290039, + "learning_rate": 5e-05, + "loss": 1.3935, + "num_input_tokens_seen": 291268136, + "step": 4354 + }, + { + "epoch": 0.4940709219858156, + "loss": 1.2308650016784668, + "loss_ce": 0.0028376185800880194, + "loss_iou": 0.51171875, + "loss_num": 0.041015625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 291268136, + "step": 4354 + }, + { + "epoch": 0.49418439716312057, + "grad_norm": 22.420007705688477, + "learning_rate": 5e-05, + "loss": 1.1361, + "num_input_tokens_seen": 291334408, + "step": 4355 + }, + { + "epoch": 0.49418439716312057, + "loss": 1.0733816623687744, + "loss_ce": 0.00532744824886322, + "loss_iou": 0.4453125, + "loss_num": 0.03515625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 291334408, + "step": 4355 + }, + { + "epoch": 0.49429787234042555, + "grad_norm": 15.662532806396484, + "learning_rate": 5e-05, + "loss": 1.3257, + "num_input_tokens_seen": 291400652, + "step": 4356 + }, + { + "epoch": 0.49429787234042555, + "loss": 1.251537799835205, + "loss_ce": 0.008862064220011234, + "loss_iou": 0.52734375, + "loss_num": 0.0380859375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 291400652, + "step": 4356 + }, + { + "epoch": 0.49441134751773047, + "grad_norm": 29.651704788208008, + "learning_rate": 5e-05, + "loss": 1.2423, + "num_input_tokens_seen": 291467204, + "step": 4357 + }, + { + "epoch": 0.49441134751773047, + "loss": 1.2437503337860107, + "loss_ce": 0.009863688610494137, + "loss_iou": 0.53125, + "loss_num": 0.034912109375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 291467204, + "step": 4357 + }, + { + "epoch": 0.49452482269503545, + "grad_norm": 32.31563949584961, + "learning_rate": 5e-05, + "loss": 1.2413, + "num_input_tokens_seen": 291533728, + "step": 4358 + }, + { + "epoch": 0.49452482269503545, + "loss": 1.374359369277954, + "loss_ce": 0.008636746555566788, + "loss_iou": 0.55859375, + "loss_num": 0.049072265625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 291533728, + "step": 4358 + }, + { + "epoch": 0.4946382978723404, + "grad_norm": 23.74713134765625, + "learning_rate": 5e-05, + "loss": 1.2704, + "num_input_tokens_seen": 291600584, + "step": 4359 + }, + { + "epoch": 0.4946382978723404, + "loss": 1.1422219276428223, + "loss_ce": 0.006235647015273571, + "loss_iou": 0.455078125, + "loss_num": 0.045166015625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 291600584, + "step": 4359 + }, + { + "epoch": 0.4947517730496454, + "grad_norm": 17.48752784729004, + "learning_rate": 5e-05, + "loss": 1.1645, + "num_input_tokens_seen": 291668508, + "step": 4360 + }, + { + "epoch": 0.4947517730496454, + "loss": 1.215262770652771, + "loss_ce": 0.0062783644534647465, + "loss_iou": 0.5078125, + "loss_num": 0.0390625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 291668508, + "step": 4360 + }, + { + "epoch": 0.4948652482269503, + "grad_norm": 26.582101821899414, + "learning_rate": 5e-05, + "loss": 1.235, + "num_input_tokens_seen": 291734580, + "step": 4361 + }, + { + "epoch": 0.4948652482269503, + "loss": 1.2147440910339355, + "loss_ce": 0.007224546745419502, + "loss_iou": 0.490234375, + "loss_num": 0.045166015625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 291734580, + "step": 4361 + }, + { + "epoch": 0.4949787234042553, + "grad_norm": 21.179731369018555, + "learning_rate": 5e-05, + "loss": 1.1763, + "num_input_tokens_seen": 291801248, + "step": 4362 + }, + { + "epoch": 0.4949787234042553, + "loss": 1.1417670249938965, + "loss_ce": 0.0060248058289289474, + "loss_iou": 0.44921875, + "loss_num": 0.047607421875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 291801248, + "step": 4362 + }, + { + "epoch": 0.4950921985815603, + "grad_norm": 32.65346145629883, + "learning_rate": 5e-05, + "loss": 1.385, + "num_input_tokens_seen": 291867000, + "step": 4363 + }, + { + "epoch": 0.4950921985815603, + "loss": 1.5266358852386475, + "loss_ce": 0.0066163260489702225, + "loss_iou": 0.6015625, + "loss_num": 0.06298828125, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 291867000, + "step": 4363 + }, + { + "epoch": 0.49520567375886526, + "grad_norm": 40.19027328491211, + "learning_rate": 5e-05, + "loss": 1.1452, + "num_input_tokens_seen": 291933888, + "step": 4364 + }, + { + "epoch": 0.49520567375886526, + "loss": 1.129671335220337, + "loss_ce": 0.010530735366046429, + "loss_iou": 0.5, + "loss_num": 0.0238037109375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 291933888, + "step": 4364 + }, + { + "epoch": 0.49531914893617024, + "grad_norm": 28.796504974365234, + "learning_rate": 5e-05, + "loss": 1.1919, + "num_input_tokens_seen": 292000044, + "step": 4365 + }, + { + "epoch": 0.49531914893617024, + "loss": 1.1249802112579346, + "loss_ce": 0.00730450265109539, + "loss_iou": 0.48828125, + "loss_num": 0.0284423828125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 292000044, + "step": 4365 + }, + { + "epoch": 0.49543262411347516, + "grad_norm": 17.922624588012695, + "learning_rate": 5e-05, + "loss": 1.0077, + "num_input_tokens_seen": 292066044, + "step": 4366 + }, + { + "epoch": 0.49543262411347516, + "loss": 0.7527952194213867, + "loss_ce": 0.005724942311644554, + "loss_iou": 0.328125, + "loss_num": 0.01806640625, + "loss_xval": 0.74609375, + "num_input_tokens_seen": 292066044, + "step": 4366 + }, + { + "epoch": 0.49554609929078014, + "grad_norm": 15.417282104492188, + "learning_rate": 5e-05, + "loss": 0.9769, + "num_input_tokens_seen": 292134148, + "step": 4367 + }, + { + "epoch": 0.49554609929078014, + "loss": 0.9587616324424744, + "loss_ce": 0.004660049453377724, + "loss_iou": 0.3828125, + "loss_num": 0.03759765625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 292134148, + "step": 4367 + }, + { + "epoch": 0.4956595744680851, + "grad_norm": 18.990537643432617, + "learning_rate": 5e-05, + "loss": 0.9619, + "num_input_tokens_seen": 292200756, + "step": 4368 + }, + { + "epoch": 0.4956595744680851, + "loss": 0.9789929389953613, + "loss_ce": 0.004871876444667578, + "loss_iou": 0.4453125, + "loss_num": 0.01708984375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 292200756, + "step": 4368 + }, + { + "epoch": 0.4957730496453901, + "grad_norm": 19.47882652282715, + "learning_rate": 5e-05, + "loss": 1.0976, + "num_input_tokens_seen": 292267528, + "step": 4369 + }, + { + "epoch": 0.4957730496453901, + "loss": 1.0198307037353516, + "loss_ce": 0.0037174453027546406, + "loss_iou": 0.37890625, + "loss_num": 0.051513671875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 292267528, + "step": 4369 + }, + { + "epoch": 0.495886524822695, + "grad_norm": 22.017290115356445, + "learning_rate": 5e-05, + "loss": 1.402, + "num_input_tokens_seen": 292334832, + "step": 4370 + }, + { + "epoch": 0.495886524822695, + "loss": 1.4701688289642334, + "loss_ce": 0.005325013771653175, + "loss_iou": 0.578125, + "loss_num": 0.061767578125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 292334832, + "step": 4370 + }, + { + "epoch": 0.496, + "grad_norm": 41.76329803466797, + "learning_rate": 5e-05, + "loss": 1.2298, + "num_input_tokens_seen": 292401596, + "step": 4371 + }, + { + "epoch": 0.496, + "loss": 1.1259348392486572, + "loss_ce": 0.011188727803528309, + "loss_iou": 0.453125, + "loss_num": 0.041748046875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 292401596, + "step": 4371 + }, + { + "epoch": 0.496113475177305, + "grad_norm": 30.154550552368164, + "learning_rate": 5e-05, + "loss": 1.215, + "num_input_tokens_seen": 292468740, + "step": 4372 + }, + { + "epoch": 0.496113475177305, + "loss": 1.2382127046585083, + "loss_ce": 0.003837697207927704, + "loss_iou": 0.54296875, + "loss_num": 0.029296875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 292468740, + "step": 4372 + }, + { + "epoch": 0.49622695035460995, + "grad_norm": 33.23747253417969, + "learning_rate": 5e-05, + "loss": 1.0119, + "num_input_tokens_seen": 292534608, + "step": 4373 + }, + { + "epoch": 0.49622695035460995, + "loss": 1.0441601276397705, + "loss_ce": 0.007050845772027969, + "loss_iou": 0.44140625, + "loss_num": 0.0311279296875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 292534608, + "step": 4373 + }, + { + "epoch": 0.4963404255319149, + "grad_norm": 29.068038940429688, + "learning_rate": 5e-05, + "loss": 1.2936, + "num_input_tokens_seen": 292600552, + "step": 4374 + }, + { + "epoch": 0.4963404255319149, + "loss": 1.3755619525909424, + "loss_ce": 0.008862665854394436, + "loss_iou": 0.546875, + "loss_num": 0.0546875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 292600552, + "step": 4374 + }, + { + "epoch": 0.49645390070921985, + "grad_norm": 26.315229415893555, + "learning_rate": 5e-05, + "loss": 1.2125, + "num_input_tokens_seen": 292667500, + "step": 4375 + }, + { + "epoch": 0.49645390070921985, + "loss": 1.1753489971160889, + "loss_ce": 0.007380212657153606, + "loss_iou": 0.484375, + "loss_num": 0.040283203125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 292667500, + "step": 4375 + }, + { + "epoch": 0.49656737588652483, + "grad_norm": 19.780759811401367, + "learning_rate": 5e-05, + "loss": 1.249, + "num_input_tokens_seen": 292734708, + "step": 4376 + }, + { + "epoch": 0.49656737588652483, + "loss": 1.0831708908081055, + "loss_ce": 0.0030927688349038363, + "loss_iou": 0.46484375, + "loss_num": 0.02978515625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 292734708, + "step": 4376 + }, + { + "epoch": 0.4966808510638298, + "grad_norm": 29.620227813720703, + "learning_rate": 5e-05, + "loss": 1.1673, + "num_input_tokens_seen": 292801540, + "step": 4377 + }, + { + "epoch": 0.4966808510638298, + "loss": 0.9966753721237183, + "loss_ce": 0.002534799976274371, + "loss_iou": 0.423828125, + "loss_num": 0.02978515625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 292801540, + "step": 4377 + }, + { + "epoch": 0.49679432624113473, + "grad_norm": 17.70981216430664, + "learning_rate": 5e-05, + "loss": 1.2277, + "num_input_tokens_seen": 292867764, + "step": 4378 + }, + { + "epoch": 0.49679432624113473, + "loss": 1.141101598739624, + "loss_ce": 0.007800900377333164, + "loss_iou": 0.44921875, + "loss_num": 0.046875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 292867764, + "step": 4378 + }, + { + "epoch": 0.4969078014184397, + "grad_norm": 22.880828857421875, + "learning_rate": 5e-05, + "loss": 1.2199, + "num_input_tokens_seen": 292934368, + "step": 4379 + }, + { + "epoch": 0.4969078014184397, + "loss": 1.0898007154464722, + "loss_ce": 0.0033749013673514128, + "loss_iou": 0.451171875, + "loss_num": 0.036865234375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 292934368, + "step": 4379 + }, + { + "epoch": 0.4970212765957447, + "grad_norm": 42.463966369628906, + "learning_rate": 5e-05, + "loss": 1.2264, + "num_input_tokens_seen": 293001612, + "step": 4380 + }, + { + "epoch": 0.4970212765957447, + "loss": 1.018467664718628, + "loss_ce": 0.0028426586650311947, + "loss_iou": 0.439453125, + "loss_num": 0.0272216796875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 293001612, + "step": 4380 + }, + { + "epoch": 0.49713475177304967, + "grad_norm": 34.668907165527344, + "learning_rate": 5e-05, + "loss": 1.2496, + "num_input_tokens_seen": 293068772, + "step": 4381 + }, + { + "epoch": 0.49713475177304967, + "loss": 1.1674782037734985, + "loss_ce": 0.0034157128538936377, + "loss_iou": 0.5078125, + "loss_num": 0.029541015625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 293068772, + "step": 4381 + }, + { + "epoch": 0.4972482269503546, + "grad_norm": 28.360197067260742, + "learning_rate": 5e-05, + "loss": 1.19, + "num_input_tokens_seen": 293135320, + "step": 4382 + }, + { + "epoch": 0.4972482269503546, + "loss": 1.1917327642440796, + "loss_ce": 0.0037445281632244587, + "loss_iou": 0.47265625, + "loss_num": 0.04833984375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 293135320, + "step": 4382 + }, + { + "epoch": 0.49736170212765957, + "grad_norm": 33.80134963989258, + "learning_rate": 5e-05, + "loss": 1.1101, + "num_input_tokens_seen": 293201992, + "step": 4383 + }, + { + "epoch": 0.49736170212765957, + "loss": 1.1875110864639282, + "loss_ce": 0.006114575080573559, + "loss_iou": 0.44140625, + "loss_num": 0.06005859375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 293201992, + "step": 4383 + }, + { + "epoch": 0.49747517730496454, + "grad_norm": 22.174598693847656, + "learning_rate": 5e-05, + "loss": 1.2322, + "num_input_tokens_seen": 293268028, + "step": 4384 + }, + { + "epoch": 0.49747517730496454, + "loss": 1.3012884855270386, + "loss_ce": 0.007343187928199768, + "loss_iou": 0.546875, + "loss_num": 0.03955078125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 293268028, + "step": 4384 + }, + { + "epoch": 0.4975886524822695, + "grad_norm": 30.495925903320312, + "learning_rate": 5e-05, + "loss": 1.1472, + "num_input_tokens_seen": 293334656, + "step": 4385 + }, + { + "epoch": 0.4975886524822695, + "loss": 1.1573874950408936, + "loss_ce": 0.005532006733119488, + "loss_iou": 0.498046875, + "loss_num": 0.031494140625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 293334656, + "step": 4385 + }, + { + "epoch": 0.49770212765957444, + "grad_norm": 13.907894134521484, + "learning_rate": 5e-05, + "loss": 1.0737, + "num_input_tokens_seen": 293401360, + "step": 4386 + }, + { + "epoch": 0.49770212765957444, + "loss": 1.0226821899414062, + "loss_ce": 0.004219031427055597, + "loss_iou": 0.404296875, + "loss_num": 0.042236328125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 293401360, + "step": 4386 + }, + { + "epoch": 0.4978156028368794, + "grad_norm": 19.273143768310547, + "learning_rate": 5e-05, + "loss": 1.0938, + "num_input_tokens_seen": 293467568, + "step": 4387 + }, + { + "epoch": 0.4978156028368794, + "loss": 1.0827949047088623, + "loss_ce": 0.007111261133104563, + "loss_iou": 0.44921875, + "loss_num": 0.03515625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 293467568, + "step": 4387 + }, + { + "epoch": 0.4979290780141844, + "grad_norm": 52.602970123291016, + "learning_rate": 5e-05, + "loss": 1.2769, + "num_input_tokens_seen": 293534740, + "step": 4388 + }, + { + "epoch": 0.4979290780141844, + "loss": 1.0828709602355957, + "loss_ce": 0.0032810939010232687, + "loss_iou": 0.4609375, + "loss_num": 0.031494140625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 293534740, + "step": 4388 + }, + { + "epoch": 0.4980425531914894, + "grad_norm": 21.9454345703125, + "learning_rate": 5e-05, + "loss": 1.1837, + "num_input_tokens_seen": 293602144, + "step": 4389 + }, + { + "epoch": 0.4980425531914894, + "loss": 1.1889303922653198, + "loss_ce": 0.0033835263457149267, + "loss_iou": 0.494140625, + "loss_num": 0.03955078125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 293602144, + "step": 4389 + }, + { + "epoch": 0.4981560283687943, + "grad_norm": 23.884838104248047, + "learning_rate": 5e-05, + "loss": 1.1829, + "num_input_tokens_seen": 293669436, + "step": 4390 + }, + { + "epoch": 0.4981560283687943, + "loss": 1.107560157775879, + "loss_ce": 0.005997621454298496, + "loss_iou": 0.431640625, + "loss_num": 0.0478515625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 293669436, + "step": 4390 + }, + { + "epoch": 0.4982695035460993, + "grad_norm": 27.197298049926758, + "learning_rate": 5e-05, + "loss": 1.0637, + "num_input_tokens_seen": 293735484, + "step": 4391 + }, + { + "epoch": 0.4982695035460993, + "loss": 1.2159600257873535, + "loss_ce": 0.0020928462035954, + "loss_iou": 0.51171875, + "loss_num": 0.038330078125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 293735484, + "step": 4391 + }, + { + "epoch": 0.49838297872340426, + "grad_norm": 28.119869232177734, + "learning_rate": 5e-05, + "loss": 1.2915, + "num_input_tokens_seen": 293801664, + "step": 4392 + }, + { + "epoch": 0.49838297872340426, + "loss": 1.268721103668213, + "loss_ce": 0.050459347665309906, + "loss_iou": 0.48046875, + "loss_num": 0.05224609375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 293801664, + "step": 4392 + }, + { + "epoch": 0.49849645390070924, + "grad_norm": 26.990413665771484, + "learning_rate": 5e-05, + "loss": 1.3111, + "num_input_tokens_seen": 293868188, + "step": 4393 + }, + { + "epoch": 0.49849645390070924, + "loss": 1.3258590698242188, + "loss_ce": 0.005058283917605877, + "loss_iou": 0.55859375, + "loss_num": 0.04150390625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 293868188, + "step": 4393 + }, + { + "epoch": 0.49860992907801416, + "grad_norm": 27.604938507080078, + "learning_rate": 5e-05, + "loss": 0.983, + "num_input_tokens_seen": 293933980, + "step": 4394 + }, + { + "epoch": 0.49860992907801416, + "loss": 0.958060622215271, + "loss_ce": 0.0032266329508274794, + "loss_iou": 0.38671875, + "loss_num": 0.035888671875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 293933980, + "step": 4394 + }, + { + "epoch": 0.49872340425531914, + "grad_norm": 49.60350036621094, + "learning_rate": 5e-05, + "loss": 1.3611, + "num_input_tokens_seen": 294001568, + "step": 4395 + }, + { + "epoch": 0.49872340425531914, + "loss": 1.3844964504241943, + "loss_ce": 0.005590141750872135, + "loss_iou": 0.58203125, + "loss_num": 0.04345703125, + "loss_xval": 1.375, + "num_input_tokens_seen": 294001568, + "step": 4395 + }, + { + "epoch": 0.4988368794326241, + "grad_norm": 47.70814514160156, + "learning_rate": 5e-05, + "loss": 1.0528, + "num_input_tokens_seen": 294069000, + "step": 4396 + }, + { + "epoch": 0.4988368794326241, + "loss": 1.1692287921905518, + "loss_ce": 0.007119334768503904, + "loss_iou": 0.5234375, + "loss_num": 0.0225830078125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 294069000, + "step": 4396 + }, + { + "epoch": 0.4989503546099291, + "grad_norm": 30.797714233398438, + "learning_rate": 5e-05, + "loss": 1.1272, + "num_input_tokens_seen": 294135000, + "step": 4397 + }, + { + "epoch": 0.4989503546099291, + "loss": 1.0200408697128296, + "loss_ce": 0.008322034031152725, + "loss_iou": 0.40234375, + "loss_num": 0.041259765625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 294135000, + "step": 4397 + }, + { + "epoch": 0.49906382978723407, + "grad_norm": 25.608623504638672, + "learning_rate": 5e-05, + "loss": 1.2254, + "num_input_tokens_seen": 294202348, + "step": 4398 + }, + { + "epoch": 0.49906382978723407, + "loss": 1.3659048080444336, + "loss_ce": 0.007994669489562511, + "loss_iou": 0.546875, + "loss_num": 0.0537109375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 294202348, + "step": 4398 + }, + { + "epoch": 0.499177304964539, + "grad_norm": 28.39324951171875, + "learning_rate": 5e-05, + "loss": 1.2216, + "num_input_tokens_seen": 294268888, + "step": 4399 + }, + { + "epoch": 0.499177304964539, + "loss": 1.2318849563598633, + "loss_ce": 0.008252204395830631, + "loss_iou": 0.50390625, + "loss_num": 0.043701171875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 294268888, + "step": 4399 + }, + { + "epoch": 0.49929078014184397, + "grad_norm": 32.39619827270508, + "learning_rate": 5e-05, + "loss": 1.3821, + "num_input_tokens_seen": 294335984, + "step": 4400 + }, + { + "epoch": 0.49929078014184397, + "loss": 1.479459524154663, + "loss_ce": 0.01022123172879219, + "loss_iou": 0.5625, + "loss_num": 0.068359375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 294335984, + "step": 4400 + }, + { + "epoch": 0.49940425531914895, + "grad_norm": 28.030376434326172, + "learning_rate": 5e-05, + "loss": 1.3299, + "num_input_tokens_seen": 294404272, + "step": 4401 + }, + { + "epoch": 0.49940425531914895, + "loss": 1.3551825284957886, + "loss_ce": 0.00496279913932085, + "loss_iou": 0.54296875, + "loss_num": 0.052734375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 294404272, + "step": 4401 + }, + { + "epoch": 0.4995177304964539, + "grad_norm": 12.66758918762207, + "learning_rate": 5e-05, + "loss": 1.3643, + "num_input_tokens_seen": 294471792, + "step": 4402 + }, + { + "epoch": 0.4995177304964539, + "loss": 1.2277162075042725, + "loss_ce": 0.004571620374917984, + "loss_iou": 0.51171875, + "loss_num": 0.040771484375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 294471792, + "step": 4402 + }, + { + "epoch": 0.49963120567375885, + "grad_norm": 20.063932418823242, + "learning_rate": 5e-05, + "loss": 1.0451, + "num_input_tokens_seen": 294538912, + "step": 4403 + }, + { + "epoch": 0.49963120567375885, + "loss": 0.8648717403411865, + "loss_ce": 0.0034978147596120834, + "loss_iou": 0.3671875, + "loss_num": 0.025634765625, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 294538912, + "step": 4403 + }, + { + "epoch": 0.4997446808510638, + "grad_norm": 32.21944808959961, + "learning_rate": 5e-05, + "loss": 1.2857, + "num_input_tokens_seen": 294605492, + "step": 4404 + }, + { + "epoch": 0.4997446808510638, + "loss": 1.3251699209213257, + "loss_ce": 0.00632224278524518, + "loss_iou": 0.53125, + "loss_num": 0.05078125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 294605492, + "step": 4404 + }, + { + "epoch": 0.4998581560283688, + "grad_norm": 31.888431549072266, + "learning_rate": 5e-05, + "loss": 1.4273, + "num_input_tokens_seen": 294671312, + "step": 4405 + }, + { + "epoch": 0.4998581560283688, + "loss": 1.4591283798217773, + "loss_ce": 0.0040503209456801414, + "loss_iou": 0.640625, + "loss_num": 0.0341796875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 294671312, + "step": 4405 + }, + { + "epoch": 0.4999716312056738, + "grad_norm": 29.189817428588867, + "learning_rate": 5e-05, + "loss": 1.1556, + "num_input_tokens_seen": 294738256, + "step": 4406 + }, + { + "epoch": 0.4999716312056738, + "loss": 1.0498679876327515, + "loss_ce": 0.008364122360944748, + "loss_iou": 0.47265625, + "loss_num": 0.0196533203125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 294738256, + "step": 4406 + }, + { + "epoch": 0.5000851063829788, + "grad_norm": 34.0712776184082, + "learning_rate": 5e-05, + "loss": 1.4625, + "num_input_tokens_seen": 294805468, + "step": 4407 + }, + { + "epoch": 0.5000851063829788, + "loss": 1.444930911064148, + "loss_ce": 0.006454349961131811, + "loss_iou": 0.609375, + "loss_num": 0.043701171875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 294805468, + "step": 4407 + }, + { + "epoch": 0.5001985815602837, + "grad_norm": 29.92819595336914, + "learning_rate": 5e-05, + "loss": 1.3608, + "num_input_tokens_seen": 294872228, + "step": 4408 + }, + { + "epoch": 0.5001985815602837, + "loss": 1.3805603981018066, + "loss_ce": 0.010443191044032574, + "loss_iou": 0.546875, + "loss_num": 0.0556640625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 294872228, + "step": 4408 + }, + { + "epoch": 0.5003120567375886, + "grad_norm": 21.14029884338379, + "learning_rate": 5e-05, + "loss": 1.176, + "num_input_tokens_seen": 294938164, + "step": 4409 + }, + { + "epoch": 0.5003120567375886, + "loss": 1.1758053302764893, + "loss_ce": 0.00429653562605381, + "loss_iou": 0.49609375, + "loss_num": 0.03662109375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 294938164, + "step": 4409 + }, + { + "epoch": 0.5004255319148936, + "grad_norm": 13.819413185119629, + "learning_rate": 5e-05, + "loss": 1.0371, + "num_input_tokens_seen": 295004536, + "step": 4410 + }, + { + "epoch": 0.5004255319148936, + "loss": 0.9525666832923889, + "loss_ce": 0.005056913010776043, + "loss_iou": 0.3828125, + "loss_num": 0.03662109375, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 295004536, + "step": 4410 + }, + { + "epoch": 0.5005390070921986, + "grad_norm": 28.862953186035156, + "learning_rate": 5e-05, + "loss": 1.1634, + "num_input_tokens_seen": 295071220, + "step": 4411 + }, + { + "epoch": 0.5005390070921986, + "loss": 1.107107162475586, + "loss_ce": 0.006155012175440788, + "loss_iou": 0.439453125, + "loss_num": 0.04443359375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 295071220, + "step": 4411 + }, + { + "epoch": 0.5006524822695035, + "grad_norm": 25.454256057739258, + "learning_rate": 5e-05, + "loss": 1.3043, + "num_input_tokens_seen": 295137904, + "step": 4412 + }, + { + "epoch": 0.5006524822695035, + "loss": 1.3916610479354858, + "loss_ce": 0.005918844137340784, + "loss_iou": 0.5625, + "loss_num": 0.052490234375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 295137904, + "step": 4412 + }, + { + "epoch": 0.5007659574468085, + "grad_norm": 33.542728424072266, + "learning_rate": 5e-05, + "loss": 1.3128, + "num_input_tokens_seen": 295204768, + "step": 4413 + }, + { + "epoch": 0.5007659574468085, + "loss": 1.0873900651931763, + "loss_ce": 0.006335370242595673, + "loss_iou": 0.4609375, + "loss_num": 0.03173828125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 295204768, + "step": 4413 + }, + { + "epoch": 0.5008794326241135, + "grad_norm": 29.637475967407227, + "learning_rate": 5e-05, + "loss": 1.1353, + "num_input_tokens_seen": 295270712, + "step": 4414 + }, + { + "epoch": 0.5008794326241135, + "loss": 0.9446581602096558, + "loss_ce": 0.005525462329387665, + "loss_iou": 0.41796875, + "loss_num": 0.0203857421875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 295270712, + "step": 4414 + }, + { + "epoch": 0.5009929078014185, + "grad_norm": 26.507349014282227, + "learning_rate": 5e-05, + "loss": 0.9763, + "num_input_tokens_seen": 295338160, + "step": 4415 + }, + { + "epoch": 0.5009929078014185, + "loss": 0.8651962280273438, + "loss_ce": 0.0038681079167872667, + "loss_iou": 0.353515625, + "loss_num": 0.0308837890625, + "loss_xval": 0.859375, + "num_input_tokens_seen": 295338160, + "step": 4415 + }, + { + "epoch": 0.5011063829787235, + "grad_norm": 36.96433639526367, + "learning_rate": 5e-05, + "loss": 1.4139, + "num_input_tokens_seen": 295404456, + "step": 4416 + }, + { + "epoch": 0.5011063829787235, + "loss": 1.2840676307678223, + "loss_ce": 0.0037941550835967064, + "loss_iou": 0.52734375, + "loss_num": 0.04443359375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 295404456, + "step": 4416 + }, + { + "epoch": 0.5012198581560283, + "grad_norm": 28.50588607788086, + "learning_rate": 5e-05, + "loss": 1.1108, + "num_input_tokens_seen": 295469984, + "step": 4417 + }, + { + "epoch": 0.5012198581560283, + "loss": 1.1730313301086426, + "loss_ce": 0.005550920031964779, + "loss_iou": 0.5078125, + "loss_num": 0.0308837890625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 295469984, + "step": 4417 + }, + { + "epoch": 0.5013333333333333, + "grad_norm": 31.031831741333008, + "learning_rate": 5e-05, + "loss": 1.1551, + "num_input_tokens_seen": 295535368, + "step": 4418 + }, + { + "epoch": 0.5013333333333333, + "loss": 1.109748125076294, + "loss_ce": 0.005500029772520065, + "loss_iou": 0.408203125, + "loss_num": 0.0576171875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 295535368, + "step": 4418 + }, + { + "epoch": 0.5014468085106383, + "grad_norm": 41.77434539794922, + "learning_rate": 5e-05, + "loss": 1.3361, + "num_input_tokens_seen": 295603028, + "step": 4419 + }, + { + "epoch": 0.5014468085106383, + "loss": 1.4319676160812378, + "loss_ce": 0.01204571034759283, + "loss_iou": 0.5625, + "loss_num": 0.059326171875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 295603028, + "step": 4419 + }, + { + "epoch": 0.5015602836879433, + "grad_norm": 32.176666259765625, + "learning_rate": 5e-05, + "loss": 1.305, + "num_input_tokens_seen": 295670156, + "step": 4420 + }, + { + "epoch": 0.5015602836879433, + "loss": 1.130713701248169, + "loss_ce": 0.002784100826829672, + "loss_iou": 0.4921875, + "loss_num": 0.029052734375, + "loss_xval": 1.125, + "num_input_tokens_seen": 295670156, + "step": 4420 + }, + { + "epoch": 0.5016737588652482, + "grad_norm": 51.41695785522461, + "learning_rate": 5e-05, + "loss": 1.1517, + "num_input_tokens_seen": 295738244, + "step": 4421 + }, + { + "epoch": 0.5016737588652482, + "loss": 1.1439540386199951, + "loss_ce": 0.008211856707930565, + "loss_iou": 0.47265625, + "loss_num": 0.03857421875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 295738244, + "step": 4421 + }, + { + "epoch": 0.5017872340425532, + "grad_norm": 16.516454696655273, + "learning_rate": 5e-05, + "loss": 1.1393, + "num_input_tokens_seen": 295805028, + "step": 4422 + }, + { + "epoch": 0.5017872340425532, + "loss": 1.086359977722168, + "loss_ce": 0.0038403919897973537, + "loss_iou": 0.453125, + "loss_num": 0.03515625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 295805028, + "step": 4422 + }, + { + "epoch": 0.5019007092198582, + "grad_norm": 21.18300437927246, + "learning_rate": 5e-05, + "loss": 1.264, + "num_input_tokens_seen": 295872524, + "step": 4423 + }, + { + "epoch": 0.5019007092198582, + "loss": 1.3730665445327759, + "loss_ce": 0.005390759091824293, + "loss_iou": 0.51171875, + "loss_num": 0.068359375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 295872524, + "step": 4423 + }, + { + "epoch": 0.5020141843971632, + "grad_norm": 43.40048599243164, + "learning_rate": 5e-05, + "loss": 1.0819, + "num_input_tokens_seen": 295938676, + "step": 4424 + }, + { + "epoch": 0.5020141843971632, + "loss": 1.0324209928512573, + "loss_ce": 0.005565500818192959, + "loss_iou": 0.46484375, + "loss_num": 0.019775390625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 295938676, + "step": 4424 + }, + { + "epoch": 0.502127659574468, + "grad_norm": 38.534976959228516, + "learning_rate": 5e-05, + "loss": 1.3686, + "num_input_tokens_seen": 296006560, + "step": 4425 + }, + { + "epoch": 0.502127659574468, + "loss": 1.2776801586151123, + "loss_ce": 0.008637098595499992, + "loss_iou": 0.55078125, + "loss_num": 0.033203125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 296006560, + "step": 4425 + }, + { + "epoch": 0.502241134751773, + "grad_norm": 14.968403816223145, + "learning_rate": 5e-05, + "loss": 1.2162, + "num_input_tokens_seen": 296073516, + "step": 4426 + }, + { + "epoch": 0.502241134751773, + "loss": 1.2356373071670532, + "loss_ce": 0.006633389741182327, + "loss_iou": 0.5234375, + "loss_num": 0.036376953125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 296073516, + "step": 4426 + }, + { + "epoch": 0.502354609929078, + "grad_norm": 11.155159950256348, + "learning_rate": 5e-05, + "loss": 1.1209, + "num_input_tokens_seen": 296140164, + "step": 4427 + }, + { + "epoch": 0.502354609929078, + "loss": 1.1081650257110596, + "loss_ce": 0.0017196648987010121, + "loss_iou": 0.47265625, + "loss_num": 0.0322265625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 296140164, + "step": 4427 + }, + { + "epoch": 0.502468085106383, + "grad_norm": 17.66961669921875, + "learning_rate": 5e-05, + "loss": 1.12, + "num_input_tokens_seen": 296207668, + "step": 4428 + }, + { + "epoch": 0.502468085106383, + "loss": 1.0575594902038574, + "loss_ce": 0.006290004588663578, + "loss_iou": 0.4453125, + "loss_num": 0.031982421875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 296207668, + "step": 4428 + }, + { + "epoch": 0.502581560283688, + "grad_norm": 25.804731369018555, + "learning_rate": 5e-05, + "loss": 0.964, + "num_input_tokens_seen": 296274740, + "step": 4429 + }, + { + "epoch": 0.502581560283688, + "loss": 0.9290034174919128, + "loss_ce": 0.004198743030428886, + "loss_iou": 0.40234375, + "loss_num": 0.0240478515625, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 296274740, + "step": 4429 + }, + { + "epoch": 0.5026950354609929, + "grad_norm": 36.44380569458008, + "learning_rate": 5e-05, + "loss": 1.0636, + "num_input_tokens_seen": 296341088, + "step": 4430 + }, + { + "epoch": 0.5026950354609929, + "loss": 1.003129005432129, + "loss_ce": 0.005204214248806238, + "loss_iou": 0.40234375, + "loss_num": 0.038818359375, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 296341088, + "step": 4430 + }, + { + "epoch": 0.5028085106382979, + "grad_norm": 23.668066024780273, + "learning_rate": 5e-05, + "loss": 1.3642, + "num_input_tokens_seen": 296407668, + "step": 4431 + }, + { + "epoch": 0.5028085106382979, + "loss": 1.1592998504638672, + "loss_ce": 0.00549119058996439, + "loss_iou": 0.5, + "loss_num": 0.03076171875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 296407668, + "step": 4431 + }, + { + "epoch": 0.5029219858156029, + "grad_norm": 20.091609954833984, + "learning_rate": 5e-05, + "loss": 1.1897, + "num_input_tokens_seen": 296474740, + "step": 4432 + }, + { + "epoch": 0.5029219858156029, + "loss": 1.1509559154510498, + "loss_ce": 0.005936330184340477, + "loss_iou": 0.455078125, + "loss_num": 0.046630859375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 296474740, + "step": 4432 + }, + { + "epoch": 0.5030354609929079, + "grad_norm": 13.030801773071289, + "learning_rate": 5e-05, + "loss": 1.1075, + "num_input_tokens_seen": 296543032, + "step": 4433 + }, + { + "epoch": 0.5030354609929079, + "loss": 1.0536755323410034, + "loss_ce": 0.003626723075285554, + "loss_iou": 0.44140625, + "loss_num": 0.033447265625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 296543032, + "step": 4433 + }, + { + "epoch": 0.5031489361702127, + "grad_norm": 20.607505798339844, + "learning_rate": 5e-05, + "loss": 1.186, + "num_input_tokens_seen": 296609640, + "step": 4434 + }, + { + "epoch": 0.5031489361702127, + "loss": 1.3808058500289917, + "loss_ce": 0.005805823020637035, + "loss_iou": 0.498046875, + "loss_num": 0.07568359375, + "loss_xval": 1.375, + "num_input_tokens_seen": 296609640, + "step": 4434 + }, + { + "epoch": 0.5032624113475177, + "grad_norm": 26.385147094726562, + "learning_rate": 5e-05, + "loss": 1.2712, + "num_input_tokens_seen": 296676516, + "step": 4435 + }, + { + "epoch": 0.5032624113475177, + "loss": 1.2506356239318848, + "loss_ce": 0.00820402055978775, + "loss_iou": 0.451171875, + "loss_num": 0.068359375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 296676516, + "step": 4435 + }, + { + "epoch": 0.5033758865248227, + "grad_norm": 28.747695922851562, + "learning_rate": 5e-05, + "loss": 1.2671, + "num_input_tokens_seen": 296742696, + "step": 4436 + }, + { + "epoch": 0.5033758865248227, + "loss": 1.3059284687042236, + "loss_ce": 0.00368237541988492, + "loss_iou": 0.515625, + "loss_num": 0.053955078125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 296742696, + "step": 4436 + }, + { + "epoch": 0.5034893617021277, + "grad_norm": 55.73405838012695, + "learning_rate": 5e-05, + "loss": 1.2696, + "num_input_tokens_seen": 296808448, + "step": 4437 + }, + { + "epoch": 0.5034893617021277, + "loss": 1.364471435546875, + "loss_ce": 0.004119896795600653, + "loss_iou": 0.5546875, + "loss_num": 0.050537109375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 296808448, + "step": 4437 + }, + { + "epoch": 0.5036028368794326, + "grad_norm": 29.87570571899414, + "learning_rate": 5e-05, + "loss": 1.4573, + "num_input_tokens_seen": 296875200, + "step": 4438 + }, + { + "epoch": 0.5036028368794326, + "loss": 1.2525253295898438, + "loss_ce": 0.005943290423601866, + "loss_iou": 0.50390625, + "loss_num": 0.047607421875, + "loss_xval": 1.25, + "num_input_tokens_seen": 296875200, + "step": 4438 + }, + { + "epoch": 0.5037163120567376, + "grad_norm": 16.566570281982422, + "learning_rate": 5e-05, + "loss": 1.218, + "num_input_tokens_seen": 296942076, + "step": 4439 + }, + { + "epoch": 0.5037163120567376, + "loss": 1.032721757888794, + "loss_ce": 0.004401367157697678, + "loss_iou": 0.443359375, + "loss_num": 0.0281982421875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 296942076, + "step": 4439 + }, + { + "epoch": 0.5038297872340426, + "grad_norm": 13.361339569091797, + "learning_rate": 5e-05, + "loss": 1.069, + "num_input_tokens_seen": 297008712, + "step": 4440 + }, + { + "epoch": 0.5038297872340426, + "loss": 0.9863734245300293, + "loss_ce": 0.0015101314056664705, + "loss_iou": 0.390625, + "loss_num": 0.040771484375, + "loss_xval": 0.984375, + "num_input_tokens_seen": 297008712, + "step": 4440 + }, + { + "epoch": 0.5039432624113476, + "grad_norm": 17.311294555664062, + "learning_rate": 5e-05, + "loss": 1.0465, + "num_input_tokens_seen": 297074688, + "step": 4441 + }, + { + "epoch": 0.5039432624113476, + "loss": 1.1290203332901, + "loss_ce": 0.005485094152390957, + "loss_iou": 0.45703125, + "loss_num": 0.041748046875, + "loss_xval": 1.125, + "num_input_tokens_seen": 297074688, + "step": 4441 + }, + { + "epoch": 0.5040567375886524, + "grad_norm": 18.378660202026367, + "learning_rate": 5e-05, + "loss": 1.0392, + "num_input_tokens_seen": 297141136, + "step": 4442 + }, + { + "epoch": 0.5040567375886524, + "loss": 0.9729212522506714, + "loss_ce": 0.005086755380034447, + "loss_iou": 0.41796875, + "loss_num": 0.02685546875, + "loss_xval": 0.96875, + "num_input_tokens_seen": 297141136, + "step": 4442 + }, + { + "epoch": 0.5041702127659574, + "grad_norm": 26.34514617919922, + "learning_rate": 5e-05, + "loss": 1.1844, + "num_input_tokens_seen": 297209092, + "step": 4443 + }, + { + "epoch": 0.5041702127659574, + "loss": 1.2085869312286377, + "loss_ce": 0.005461943335831165, + "loss_iou": 0.53515625, + "loss_num": 0.027099609375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 297209092, + "step": 4443 + }, + { + "epoch": 0.5042836879432624, + "grad_norm": 22.686607360839844, + "learning_rate": 5e-05, + "loss": 1.2325, + "num_input_tokens_seen": 297275412, + "step": 4444 + }, + { + "epoch": 0.5042836879432624, + "loss": 1.1630215644836426, + "loss_ce": 0.0038418746553361416, + "loss_iou": 0.478515625, + "loss_num": 0.04052734375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 297275412, + "step": 4444 + }, + { + "epoch": 0.5043971631205674, + "grad_norm": 51.124210357666016, + "learning_rate": 5e-05, + "loss": 1.1121, + "num_input_tokens_seen": 297340936, + "step": 4445 + }, + { + "epoch": 0.5043971631205674, + "loss": 1.1107522249221802, + "loss_ce": 0.004306861665099859, + "loss_iou": 0.470703125, + "loss_num": 0.033203125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 297340936, + "step": 4445 + }, + { + "epoch": 0.5045106382978723, + "grad_norm": 30.387563705444336, + "learning_rate": 5e-05, + "loss": 1.2389, + "num_input_tokens_seen": 297407636, + "step": 4446 + }, + { + "epoch": 0.5045106382978723, + "loss": 1.3545167446136475, + "loss_ce": 0.008569475263357162, + "loss_iou": 0.5625, + "loss_num": 0.0439453125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 297407636, + "step": 4446 + }, + { + "epoch": 0.5046241134751773, + "grad_norm": 32.60053634643555, + "learning_rate": 5e-05, + "loss": 1.2956, + "num_input_tokens_seen": 297474848, + "step": 4447 + }, + { + "epoch": 0.5046241134751773, + "loss": 1.464522123336792, + "loss_ce": 0.005537695251405239, + "loss_iou": 0.56640625, + "loss_num": 0.0654296875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 297474848, + "step": 4447 + }, + { + "epoch": 0.5047375886524823, + "grad_norm": 36.896480560302734, + "learning_rate": 5e-05, + "loss": 1.2537, + "num_input_tokens_seen": 297542392, + "step": 4448 + }, + { + "epoch": 0.5047375886524823, + "loss": 1.3381578922271729, + "loss_ce": 0.0051499707624316216, + "loss_iou": 0.55859375, + "loss_num": 0.043701171875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 297542392, + "step": 4448 + }, + { + "epoch": 0.5048510638297873, + "grad_norm": 22.109739303588867, + "learning_rate": 5e-05, + "loss": 1.0717, + "num_input_tokens_seen": 297609608, + "step": 4449 + }, + { + "epoch": 0.5048510638297873, + "loss": 1.2347633838653564, + "loss_ce": 0.004782920703291893, + "loss_iou": 0.478515625, + "loss_num": 0.054931640625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 297609608, + "step": 4449 + }, + { + "epoch": 0.5049645390070922, + "grad_norm": 19.278900146484375, + "learning_rate": 5e-05, + "loss": 1.2375, + "num_input_tokens_seen": 297677152, + "step": 4450 + }, + { + "epoch": 0.5049645390070922, + "loss": 1.2906063795089722, + "loss_ce": 0.005450085736811161, + "loss_iou": 0.546875, + "loss_num": 0.037841796875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 297677152, + "step": 4450 + }, + { + "epoch": 0.5050780141843971, + "grad_norm": 41.5727653503418, + "learning_rate": 5e-05, + "loss": 1.518, + "num_input_tokens_seen": 297744324, + "step": 4451 + }, + { + "epoch": 0.5050780141843971, + "loss": 1.4768315553665161, + "loss_ce": 0.0041753435507416725, + "loss_iou": 0.6015625, + "loss_num": 0.0537109375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 297744324, + "step": 4451 + }, + { + "epoch": 0.5051914893617021, + "grad_norm": 32.11625289916992, + "learning_rate": 5e-05, + "loss": 1.4223, + "num_input_tokens_seen": 297809992, + "step": 4452 + }, + { + "epoch": 0.5051914893617021, + "loss": 1.1764814853668213, + "loss_ce": 0.006071343086659908, + "loss_iou": 0.49609375, + "loss_num": 0.035400390625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 297809992, + "step": 4452 + }, + { + "epoch": 0.5053049645390071, + "grad_norm": 23.03472328186035, + "learning_rate": 5e-05, + "loss": 1.1184, + "num_input_tokens_seen": 297876512, + "step": 4453 + }, + { + "epoch": 0.5053049645390071, + "loss": 1.2133004665374756, + "loss_ce": 0.004316107369959354, + "loss_iou": 0.48828125, + "loss_num": 0.047119140625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 297876512, + "step": 4453 + }, + { + "epoch": 0.5054184397163121, + "grad_norm": 27.558568954467773, + "learning_rate": 5e-05, + "loss": 1.2349, + "num_input_tokens_seen": 297943464, + "step": 4454 + }, + { + "epoch": 0.5054184397163121, + "loss": 1.2266695499420166, + "loss_ce": 0.013290584087371826, + "loss_iou": 0.5234375, + "loss_num": 0.033447265625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 297943464, + "step": 4454 + }, + { + "epoch": 0.505531914893617, + "grad_norm": 53.715370178222656, + "learning_rate": 5e-05, + "loss": 1.2304, + "num_input_tokens_seen": 298009888, + "step": 4455 + }, + { + "epoch": 0.505531914893617, + "loss": 1.266849398612976, + "loss_ce": 0.010013449937105179, + "loss_iou": 0.546875, + "loss_num": 0.0322265625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 298009888, + "step": 4455 + }, + { + "epoch": 0.505645390070922, + "grad_norm": 22.365068435668945, + "learning_rate": 5e-05, + "loss": 1.5359, + "num_input_tokens_seen": 298075732, + "step": 4456 + }, + { + "epoch": 0.505645390070922, + "loss": 1.551580548286438, + "loss_ce": 0.0032406593672931194, + "loss_iou": 0.65625, + "loss_num": 0.047607421875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 298075732, + "step": 4456 + }, + { + "epoch": 0.505758865248227, + "grad_norm": 18.112815856933594, + "learning_rate": 5e-05, + "loss": 1.2732, + "num_input_tokens_seen": 298142188, + "step": 4457 + }, + { + "epoch": 0.505758865248227, + "loss": 1.1471444368362427, + "loss_ce": 0.006031183525919914, + "loss_iou": 0.47265625, + "loss_num": 0.0390625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 298142188, + "step": 4457 + }, + { + "epoch": 0.5058723404255319, + "grad_norm": 20.072317123413086, + "learning_rate": 5e-05, + "loss": 1.0983, + "num_input_tokens_seen": 298208092, + "step": 4458 + }, + { + "epoch": 0.5058723404255319, + "loss": 1.0491127967834473, + "loss_ce": 0.002481980249285698, + "loss_iou": 0.4453125, + "loss_num": 0.031005859375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 298208092, + "step": 4458 + }, + { + "epoch": 0.5059858156028368, + "grad_norm": 58.0288200378418, + "learning_rate": 5e-05, + "loss": 1.3768, + "num_input_tokens_seen": 298275196, + "step": 4459 + }, + { + "epoch": 0.5059858156028368, + "loss": 1.4608516693115234, + "loss_ce": 0.005773469805717468, + "loss_iou": 0.5859375, + "loss_num": 0.05712890625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 298275196, + "step": 4459 + }, + { + "epoch": 0.5060992907801418, + "grad_norm": 21.018354415893555, + "learning_rate": 5e-05, + "loss": 1.3751, + "num_input_tokens_seen": 298342204, + "step": 4460 + }, + { + "epoch": 0.5060992907801418, + "loss": 1.2287707328796387, + "loss_ce": 0.01295035146176815, + "loss_iou": 0.51953125, + "loss_num": 0.035400390625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 298342204, + "step": 4460 + }, + { + "epoch": 0.5062127659574468, + "grad_norm": 29.055551528930664, + "learning_rate": 5e-05, + "loss": 1.1524, + "num_input_tokens_seen": 298408828, + "step": 4461 + }, + { + "epoch": 0.5062127659574468, + "loss": 1.009412407875061, + "loss_ce": 0.0045296186581254005, + "loss_iou": 0.439453125, + "loss_num": 0.025634765625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 298408828, + "step": 4461 + }, + { + "epoch": 0.5063262411347518, + "grad_norm": 34.526023864746094, + "learning_rate": 5e-05, + "loss": 1.0082, + "num_input_tokens_seen": 298475876, + "step": 4462 + }, + { + "epoch": 0.5063262411347518, + "loss": 1.0211611986160278, + "loss_ce": 0.004071346018463373, + "loss_iou": 0.447265625, + "loss_num": 0.0245361328125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 298475876, + "step": 4462 + }, + { + "epoch": 0.5064397163120568, + "grad_norm": 27.258960723876953, + "learning_rate": 5e-05, + "loss": 1.3494, + "num_input_tokens_seen": 298543072, + "step": 4463 + }, + { + "epoch": 0.5064397163120568, + "loss": 1.1049857139587402, + "loss_ce": 0.008794382214546204, + "loss_iou": 0.44921875, + "loss_num": 0.039306640625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 298543072, + "step": 4463 + }, + { + "epoch": 0.5065531914893617, + "grad_norm": 20.55097770690918, + "learning_rate": 5e-05, + "loss": 1.0573, + "num_input_tokens_seen": 298609836, + "step": 4464 + }, + { + "epoch": 0.5065531914893617, + "loss": 1.0981420278549194, + "loss_ce": 0.006100993603467941, + "loss_iou": 0.458984375, + "loss_num": 0.034912109375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 298609836, + "step": 4464 + }, + { + "epoch": 0.5066666666666667, + "grad_norm": 28.488121032714844, + "learning_rate": 5e-05, + "loss": 1.2747, + "num_input_tokens_seen": 298676652, + "step": 4465 + }, + { + "epoch": 0.5066666666666667, + "loss": 1.4665861129760742, + "loss_ce": 0.007601711433380842, + "loss_iou": 0.58203125, + "loss_num": 0.05859375, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 298676652, + "step": 4465 + }, + { + "epoch": 0.5067801418439716, + "grad_norm": 31.043569564819336, + "learning_rate": 5e-05, + "loss": 1.2826, + "num_input_tokens_seen": 298743620, + "step": 4466 + }, + { + "epoch": 0.5067801418439716, + "loss": 1.3417980670928955, + "loss_ce": 0.0058606406673789024, + "loss_iou": 0.58984375, + "loss_num": 0.03076171875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 298743620, + "step": 4466 + }, + { + "epoch": 0.5068936170212766, + "grad_norm": 30.578340530395508, + "learning_rate": 5e-05, + "loss": 1.204, + "num_input_tokens_seen": 298809452, + "step": 4467 + }, + { + "epoch": 0.5068936170212766, + "loss": 1.2029751539230347, + "loss_ce": 0.005709528923034668, + "loss_iou": 0.498046875, + "loss_num": 0.04052734375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 298809452, + "step": 4467 + }, + { + "epoch": 0.5070070921985815, + "grad_norm": 64.70799255371094, + "learning_rate": 5e-05, + "loss": 1.3763, + "num_input_tokens_seen": 298876524, + "step": 4468 + }, + { + "epoch": 0.5070070921985815, + "loss": 1.2027766704559326, + "loss_ce": 0.009905493818223476, + "loss_iou": 0.470703125, + "loss_num": 0.050537109375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 298876524, + "step": 4468 + }, + { + "epoch": 0.5071205673758865, + "grad_norm": 48.18522644042969, + "learning_rate": 5e-05, + "loss": 1.2337, + "num_input_tokens_seen": 298944436, + "step": 4469 + }, + { + "epoch": 0.5071205673758865, + "loss": 1.1780881881713867, + "loss_ce": 0.004748415667563677, + "loss_iou": 0.4921875, + "loss_num": 0.03759765625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 298944436, + "step": 4469 + }, + { + "epoch": 0.5072340425531915, + "grad_norm": 27.202850341796875, + "learning_rate": 5e-05, + "loss": 1.1912, + "num_input_tokens_seen": 299010872, + "step": 4470 + }, + { + "epoch": 0.5072340425531915, + "loss": 1.2888567447662354, + "loss_ce": 0.006630207411944866, + "loss_iou": 0.50390625, + "loss_num": 0.054931640625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 299010872, + "step": 4470 + }, + { + "epoch": 0.5073475177304965, + "grad_norm": 70.11170959472656, + "learning_rate": 5e-05, + "loss": 1.2734, + "num_input_tokens_seen": 299077972, + "step": 4471 + }, + { + "epoch": 0.5073475177304965, + "loss": 1.1840050220489502, + "loss_ce": 0.006270651705563068, + "loss_iou": 0.5, + "loss_num": 0.034912109375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 299077972, + "step": 4471 + }, + { + "epoch": 0.5074609929078014, + "grad_norm": 22.903818130493164, + "learning_rate": 5e-05, + "loss": 1.2309, + "num_input_tokens_seen": 299145280, + "step": 4472 + }, + { + "epoch": 0.5074609929078014, + "loss": 1.154911994934082, + "loss_ce": 0.007450979668647051, + "loss_iou": 0.46875, + "loss_num": 0.041259765625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 299145280, + "step": 4472 + }, + { + "epoch": 0.5075744680851064, + "grad_norm": 24.798721313476562, + "learning_rate": 5e-05, + "loss": 1.085, + "num_input_tokens_seen": 299211528, + "step": 4473 + }, + { + "epoch": 0.5075744680851064, + "loss": 1.1617703437805176, + "loss_ce": 0.0023464481346309185, + "loss_iou": 0.45703125, + "loss_num": 0.04931640625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 299211528, + "step": 4473 + }, + { + "epoch": 0.5076879432624114, + "grad_norm": 26.75017547607422, + "learning_rate": 5e-05, + "loss": 1.2829, + "num_input_tokens_seen": 299278600, + "step": 4474 + }, + { + "epoch": 0.5076879432624114, + "loss": 1.204673409461975, + "loss_ce": 0.004722235724329948, + "loss_iou": 0.48046875, + "loss_num": 0.048095703125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 299278600, + "step": 4474 + }, + { + "epoch": 0.5078014184397163, + "grad_norm": 29.144235610961914, + "learning_rate": 5e-05, + "loss": 1.0589, + "num_input_tokens_seen": 299345968, + "step": 4475 + }, + { + "epoch": 0.5078014184397163, + "loss": 1.0499967336654663, + "loss_ce": 0.008004559203982353, + "loss_iou": 0.447265625, + "loss_num": 0.0291748046875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 299345968, + "step": 4475 + }, + { + "epoch": 0.5079148936170212, + "grad_norm": 22.396804809570312, + "learning_rate": 5e-05, + "loss": 1.1456, + "num_input_tokens_seen": 299413052, + "step": 4476 + }, + { + "epoch": 0.5079148936170212, + "loss": 1.0608086585998535, + "loss_ce": 0.004656333010643721, + "loss_iou": 0.455078125, + "loss_num": 0.029052734375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 299413052, + "step": 4476 + }, + { + "epoch": 0.5080283687943262, + "grad_norm": 18.936002731323242, + "learning_rate": 5e-05, + "loss": 1.2317, + "num_input_tokens_seen": 299480304, + "step": 4477 + }, + { + "epoch": 0.5080283687943262, + "loss": 1.2436580657958984, + "loss_ce": 0.00440016807988286, + "loss_iou": 0.52734375, + "loss_num": 0.0361328125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 299480304, + "step": 4477 + }, + { + "epoch": 0.5081418439716312, + "grad_norm": 46.680938720703125, + "learning_rate": 5e-05, + "loss": 1.1302, + "num_input_tokens_seen": 299547156, + "step": 4478 + }, + { + "epoch": 0.5081418439716312, + "loss": 0.9615569114685059, + "loss_ce": 0.004525641445070505, + "loss_iou": 0.431640625, + "loss_num": 0.0189208984375, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 299547156, + "step": 4478 + }, + { + "epoch": 0.5082553191489362, + "grad_norm": 47.214447021484375, + "learning_rate": 5e-05, + "loss": 1.5417, + "num_input_tokens_seen": 299613864, + "step": 4479 + }, + { + "epoch": 0.5082553191489362, + "loss": 1.3968504667282104, + "loss_ce": 0.00720201525837183, + "loss_iou": 0.5546875, + "loss_num": 0.056640625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 299613864, + "step": 4479 + }, + { + "epoch": 0.5083687943262412, + "grad_norm": 26.805212020874023, + "learning_rate": 5e-05, + "loss": 1.3442, + "num_input_tokens_seen": 299681400, + "step": 4480 + }, + { + "epoch": 0.5083687943262412, + "loss": 1.311453938484192, + "loss_ce": 0.005301640368998051, + "loss_iou": 0.5546875, + "loss_num": 0.038818359375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 299681400, + "step": 4480 + }, + { + "epoch": 0.5084822695035461, + "grad_norm": 24.762577056884766, + "learning_rate": 5e-05, + "loss": 1.0815, + "num_input_tokens_seen": 299749144, + "step": 4481 + }, + { + "epoch": 0.5084822695035461, + "loss": 1.1531431674957275, + "loss_ce": 0.0047056968323886395, + "loss_iou": 0.484375, + "loss_num": 0.036376953125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 299749144, + "step": 4481 + }, + { + "epoch": 0.5085957446808511, + "grad_norm": 21.3387451171875, + "learning_rate": 5e-05, + "loss": 0.9835, + "num_input_tokens_seen": 299816576, + "step": 4482 + }, + { + "epoch": 0.5085957446808511, + "loss": 1.0273655652999878, + "loss_ce": 0.003928059712052345, + "loss_iou": 0.412109375, + "loss_num": 0.0400390625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 299816576, + "step": 4482 + }, + { + "epoch": 0.508709219858156, + "grad_norm": 33.77482986450195, + "learning_rate": 5e-05, + "loss": 1.1945, + "num_input_tokens_seen": 299883232, + "step": 4483 + }, + { + "epoch": 0.508709219858156, + "loss": 1.224731206893921, + "loss_ce": 0.005981284659355879, + "loss_iou": 0.49609375, + "loss_num": 0.044921875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 299883232, + "step": 4483 + }, + { + "epoch": 0.508822695035461, + "grad_norm": 28.17200469970703, + "learning_rate": 5e-05, + "loss": 1.392, + "num_input_tokens_seen": 299950572, + "step": 4484 + }, + { + "epoch": 0.508822695035461, + "loss": 1.4559062719345093, + "loss_ce": 0.006199273280799389, + "loss_iou": 0.61328125, + "loss_num": 0.044921875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 299950572, + "step": 4484 + }, + { + "epoch": 0.5089361702127659, + "grad_norm": 23.336971282958984, + "learning_rate": 5e-05, + "loss": 1.0959, + "num_input_tokens_seen": 300017316, + "step": 4485 + }, + { + "epoch": 0.5089361702127659, + "loss": 1.0706756114959717, + "loss_ce": 0.006710788235068321, + "loss_iou": 0.46484375, + "loss_num": 0.026611328125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 300017316, + "step": 4485 + }, + { + "epoch": 0.5090496453900709, + "grad_norm": 29.393733978271484, + "learning_rate": 5e-05, + "loss": 1.3682, + "num_input_tokens_seen": 300084928, + "step": 4486 + }, + { + "epoch": 0.5090496453900709, + "loss": 1.4776570796966553, + "loss_ce": 0.009883730672299862, + "loss_iou": 0.58203125, + "loss_num": 0.060791015625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 300084928, + "step": 4486 + }, + { + "epoch": 0.5091631205673759, + "grad_norm": 30.40289306640625, + "learning_rate": 5e-05, + "loss": 1.2146, + "num_input_tokens_seen": 300151812, + "step": 4487 + }, + { + "epoch": 0.5091631205673759, + "loss": 0.9916737675666809, + "loss_ce": 0.0075428662821650505, + "loss_iou": 0.390625, + "loss_num": 0.04052734375, + "loss_xval": 0.984375, + "num_input_tokens_seen": 300151812, + "step": 4487 + }, + { + "epoch": 0.5092765957446809, + "grad_norm": 20.624042510986328, + "learning_rate": 5e-05, + "loss": 1.1033, + "num_input_tokens_seen": 300218424, + "step": 4488 + }, + { + "epoch": 0.5092765957446809, + "loss": 1.1427490711212158, + "loss_ce": 0.006030390039086342, + "loss_iou": 0.48828125, + "loss_num": 0.031494140625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 300218424, + "step": 4488 + }, + { + "epoch": 0.5093900709219858, + "grad_norm": 30.390737533569336, + "learning_rate": 5e-05, + "loss": 1.4114, + "num_input_tokens_seen": 300286304, + "step": 4489 + }, + { + "epoch": 0.5093900709219858, + "loss": 1.4769287109375, + "loss_ce": 0.00476070074364543, + "loss_iou": 0.57421875, + "loss_num": 0.064453125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 300286304, + "step": 4489 + }, + { + "epoch": 0.5095035460992908, + "grad_norm": 41.698219299316406, + "learning_rate": 5e-05, + "loss": 1.2752, + "num_input_tokens_seen": 300352460, + "step": 4490 + }, + { + "epoch": 0.5095035460992908, + "loss": 1.2743080854415894, + "loss_ce": 0.006241684779524803, + "loss_iou": 0.5546875, + "loss_num": 0.03271484375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 300352460, + "step": 4490 + }, + { + "epoch": 0.5096170212765957, + "grad_norm": 40.128360748291016, + "learning_rate": 5e-05, + "loss": 1.3009, + "num_input_tokens_seen": 300419596, + "step": 4491 + }, + { + "epoch": 0.5096170212765957, + "loss": 1.2276978492736816, + "loss_ce": 0.004797419998794794, + "loss_iou": 0.5234375, + "loss_num": 0.0361328125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 300419596, + "step": 4491 + }, + { + "epoch": 0.5097304964539007, + "grad_norm": 29.517215728759766, + "learning_rate": 5e-05, + "loss": 1.3572, + "num_input_tokens_seen": 300486360, + "step": 4492 + }, + { + "epoch": 0.5097304964539007, + "loss": 1.1189391613006592, + "loss_ce": 0.003704843809828162, + "loss_iou": 0.46875, + "loss_num": 0.03564453125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 300486360, + "step": 4492 + }, + { + "epoch": 0.5098439716312057, + "grad_norm": 87.92117309570312, + "learning_rate": 5e-05, + "loss": 1.2599, + "num_input_tokens_seen": 300554224, + "step": 4493 + }, + { + "epoch": 0.5098439716312057, + "loss": 1.159174919128418, + "loss_ce": 0.006342866457998753, + "loss_iou": 0.447265625, + "loss_num": 0.052001953125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 300554224, + "step": 4493 + }, + { + "epoch": 0.5099574468085106, + "grad_norm": 15.55949592590332, + "learning_rate": 5e-05, + "loss": 1.1362, + "num_input_tokens_seen": 300621932, + "step": 4494 + }, + { + "epoch": 0.5099574468085106, + "loss": 0.9274569749832153, + "loss_ce": 0.004117178730666637, + "loss_iou": 0.396484375, + "loss_num": 0.0260009765625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 300621932, + "step": 4494 + }, + { + "epoch": 0.5100709219858156, + "grad_norm": 17.4780216217041, + "learning_rate": 5e-05, + "loss": 1.0216, + "num_input_tokens_seen": 300688276, + "step": 4495 + }, + { + "epoch": 0.5100709219858156, + "loss": 0.960739254951477, + "loss_ce": 0.006149411201477051, + "loss_iou": 0.390625, + "loss_num": 0.034912109375, + "loss_xval": 0.953125, + "num_input_tokens_seen": 300688276, + "step": 4495 + }, + { + "epoch": 0.5101843971631206, + "grad_norm": 45.15021896362305, + "learning_rate": 5e-05, + "loss": 1.0873, + "num_input_tokens_seen": 300754960, + "step": 4496 + }, + { + "epoch": 0.5101843971631206, + "loss": 0.9952809810638428, + "loss_ce": 0.00809834897518158, + "loss_iou": 0.41796875, + "loss_num": 0.030029296875, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 300754960, + "step": 4496 + }, + { + "epoch": 0.5102978723404256, + "grad_norm": 29.622947692871094, + "learning_rate": 5e-05, + "loss": 1.1405, + "num_input_tokens_seen": 300821352, + "step": 4497 + }, + { + "epoch": 0.5102978723404256, + "loss": 1.0039620399475098, + "loss_ce": 0.004938598722219467, + "loss_iou": 0.4140625, + "loss_num": 0.033935546875, + "loss_xval": 1.0, + "num_input_tokens_seen": 300821352, + "step": 4497 + }, + { + "epoch": 0.5104113475177305, + "grad_norm": 29.080989837646484, + "learning_rate": 5e-05, + "loss": 1.1108, + "num_input_tokens_seen": 300889076, + "step": 4498 + }, + { + "epoch": 0.5104113475177305, + "loss": 1.1005859375, + "loss_ce": 0.005371073726564646, + "loss_iou": 0.478515625, + "loss_num": 0.02783203125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 300889076, + "step": 4498 + }, + { + "epoch": 0.5105248226950354, + "grad_norm": 27.677947998046875, + "learning_rate": 5e-05, + "loss": 1.2195, + "num_input_tokens_seen": 300954876, + "step": 4499 + }, + { + "epoch": 0.5105248226950354, + "loss": 1.239464521408081, + "loss_ce": 0.00850752368569374, + "loss_iou": 0.51171875, + "loss_num": 0.042236328125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 300954876, + "step": 4499 + }, + { + "epoch": 0.5106382978723404, + "grad_norm": 16.000967025756836, + "learning_rate": 5e-05, + "loss": 1.0879, + "num_input_tokens_seen": 301021060, + "step": 4500 + }, + { + "epoch": 0.5106382978723404, + "eval_seeclick_CIoU": 0.3607160598039627, + "eval_seeclick_GIoU": 0.3542136549949646, + "eval_seeclick_IoU": 0.45851264894008636, + "eval_seeclick_MAE_all": 0.1637643575668335, + "eval_seeclick_MAE_h": 0.057237911969423294, + "eval_seeclick_MAE_w": 0.11075621098279953, + "eval_seeclick_MAE_x_boxes": 0.27696236968040466, + "eval_seeclick_MAE_y_boxes": 0.1222473718225956, + "eval_seeclick_NUM_probability": 0.9999496042728424, + "eval_seeclick_inside_bbox": 0.6145833432674408, + "eval_seeclick_loss": 2.473033905029297, + "eval_seeclick_loss_ce": 0.013348984066396952, + "eval_seeclick_loss_iou": 0.81695556640625, + "eval_seeclick_loss_num": 0.16040611267089844, + "eval_seeclick_loss_xval": 2.4359130859375, + "eval_seeclick_runtime": 71.8478, + "eval_seeclick_samples_per_second": 0.654, + "eval_seeclick_steps_per_second": 0.028, + "num_input_tokens_seen": 301021060, + "step": 4500 + }, + { + "epoch": 0.5106382978723404, + "eval_icons_CIoU": 0.47580285370349884, + "eval_icons_GIoU": 0.47231681644916534, + "eval_icons_IoU": 0.5144772082567215, + "eval_icons_MAE_all": 0.12917348742485046, + "eval_icons_MAE_h": 0.08988338336348534, + "eval_icons_MAE_w": 0.11211714521050453, + "eval_icons_MAE_x_boxes": 0.12046309560537338, + "eval_icons_MAE_y_boxes": 0.057029851246625185, + "eval_icons_NUM_probability": 0.999958872795105, + "eval_icons_inside_bbox": 0.7083333432674408, + "eval_icons_loss": 2.2612860202789307, + "eval_icons_loss_ce": 3.320468204037752e-05, + "eval_icons_loss_iou": 0.8310546875, + "eval_icons_loss_num": 0.11475372314453125, + "eval_icons_loss_xval": 2.23486328125, + "eval_icons_runtime": 97.5742, + "eval_icons_samples_per_second": 0.512, + "eval_icons_steps_per_second": 0.02, + "num_input_tokens_seen": 301021060, + "step": 4500 + }, + { + "epoch": 0.5106382978723404, + "eval_screenspot_CIoU": 0.3183220475912094, + "eval_screenspot_GIoU": 0.29658274352550507, + "eval_screenspot_IoU": 0.4001234869162242, + "eval_screenspot_MAE_all": 0.2038637101650238, + "eval_screenspot_MAE_h": 0.16486621151367822, + "eval_screenspot_MAE_w": 0.16356289386749268, + "eval_screenspot_MAE_x_boxes": 0.2797440489133199, + "eval_screenspot_MAE_y_boxes": 0.09042703732848167, + "eval_screenspot_NUM_probability": 0.9996496836344401, + "eval_screenspot_inside_bbox": 0.6729166706403097, + "eval_screenspot_loss": 2.9540083408355713, + "eval_screenspot_loss_ce": 0.016937484964728355, + "eval_screenspot_loss_iou": 0.9537760416666666, + "eval_screenspot_loss_num": 0.21466064453125, + "eval_screenspot_loss_xval": 2.9801432291666665, + "eval_screenspot_runtime": 124.9427, + "eval_screenspot_samples_per_second": 0.712, + "eval_screenspot_steps_per_second": 0.024, + "num_input_tokens_seen": 301021060, + "step": 4500 + }, + { + "epoch": 0.5106382978723404, + "eval_compot_CIoU": 0.28298453986644745, + "eval_compot_GIoU": 0.23256700485944748, + "eval_compot_IoU": 0.3575533628463745, + "eval_compot_MAE_all": 0.22713574022054672, + "eval_compot_MAE_h": 0.18057097494602203, + "eval_compot_MAE_w": 0.20804991573095322, + "eval_compot_MAE_x_boxes": 0.15848147496581078, + "eval_compot_MAE_y_boxes": 0.14829135686159134, + "eval_compot_NUM_probability": 0.9996579587459564, + "eval_compot_inside_bbox": 0.5590277910232544, + "eval_compot_loss": 3.038466215133667, + "eval_compot_loss_ce": 0.003926245495676994, + "eval_compot_loss_iou": 0.9482421875, + "eval_compot_loss_num": 0.228118896484375, + "eval_compot_loss_xval": 3.0380859375, + "eval_compot_runtime": 69.3986, + "eval_compot_samples_per_second": 0.72, + "eval_compot_steps_per_second": 0.029, + "num_input_tokens_seen": 301021060, + "step": 4500 + }, + { + "epoch": 0.5106382978723404, + "loss": 3.0042614936828613, + "loss_ce": 0.004261584021151066, + "loss_iou": 0.94140625, + "loss_num": 0.22265625, + "loss_xval": 3.0, + "num_input_tokens_seen": 301021060, + "step": 4500 + }, + { + "epoch": 0.5107517730496454, + "grad_norm": 14.731332778930664, + "learning_rate": 5e-05, + "loss": 1.1443, + "num_input_tokens_seen": 301087908, + "step": 4501 + }, + { + "epoch": 0.5107517730496454, + "loss": 1.125917673110962, + "loss_ce": 0.0028707794845104218, + "loss_iou": 0.474609375, + "loss_num": 0.03466796875, + "loss_xval": 1.125, + "num_input_tokens_seen": 301087908, + "step": 4501 + }, + { + "epoch": 0.5108652482269503, + "grad_norm": 35.845157623291016, + "learning_rate": 5e-05, + "loss": 1.204, + "num_input_tokens_seen": 301154504, + "step": 4502 + }, + { + "epoch": 0.5108652482269503, + "loss": 1.0957565307617188, + "loss_ce": 0.0054244305938482285, + "loss_iou": 0.474609375, + "loss_num": 0.028076171875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 301154504, + "step": 4502 + }, + { + "epoch": 0.5109787234042553, + "grad_norm": 44.8936653137207, + "learning_rate": 5e-05, + "loss": 1.4674, + "num_input_tokens_seen": 301221224, + "step": 4503 + }, + { + "epoch": 0.5109787234042553, + "loss": 1.520648717880249, + "loss_ce": 0.005023891571909189, + "loss_iou": 0.6328125, + "loss_num": 0.05126953125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 301221224, + "step": 4503 + }, + { + "epoch": 0.5110921985815603, + "grad_norm": 31.78890609741211, + "learning_rate": 5e-05, + "loss": 1.5171, + "num_input_tokens_seen": 301288560, + "step": 4504 + }, + { + "epoch": 0.5110921985815603, + "loss": 1.523361325263977, + "loss_ce": 0.004318291321396828, + "loss_iou": 0.64453125, + "loss_num": 0.04638671875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 301288560, + "step": 4504 + }, + { + "epoch": 0.5112056737588653, + "grad_norm": 25.0733585357666, + "learning_rate": 5e-05, + "loss": 1.2092, + "num_input_tokens_seen": 301354340, + "step": 4505 + }, + { + "epoch": 0.5112056737588653, + "loss": 1.2943711280822754, + "loss_ce": 0.004332148935645819, + "loss_iou": 0.51171875, + "loss_num": 0.052734375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 301354340, + "step": 4505 + }, + { + "epoch": 0.5113191489361703, + "grad_norm": 25.556970596313477, + "learning_rate": 5e-05, + "loss": 1.3591, + "num_input_tokens_seen": 301420504, + "step": 4506 + }, + { + "epoch": 0.5113191489361703, + "loss": 1.3348511457443237, + "loss_ce": 0.004772968590259552, + "loss_iou": 0.55859375, + "loss_num": 0.04296875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 301420504, + "step": 4506 + }, + { + "epoch": 0.5114326241134752, + "grad_norm": 29.681360244750977, + "learning_rate": 5e-05, + "loss": 1.3252, + "num_input_tokens_seen": 301488500, + "step": 4507 + }, + { + "epoch": 0.5114326241134752, + "loss": 1.2751045227050781, + "loss_ce": 0.0055732931941747665, + "loss_iou": 0.546875, + "loss_num": 0.03466796875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 301488500, + "step": 4507 + }, + { + "epoch": 0.5115460992907801, + "grad_norm": 22.000289916992188, + "learning_rate": 5e-05, + "loss": 1.4578, + "num_input_tokens_seen": 301555120, + "step": 4508 + }, + { + "epoch": 0.5115460992907801, + "loss": 1.5025393962860107, + "loss_ce": 0.004492558538913727, + "loss_iou": 0.63671875, + "loss_num": 0.04443359375, + "loss_xval": 1.5, + "num_input_tokens_seen": 301555120, + "step": 4508 + }, + { + "epoch": 0.5116595744680851, + "grad_norm": 17.668535232543945, + "learning_rate": 5e-05, + "loss": 1.1154, + "num_input_tokens_seen": 301623012, + "step": 4509 + }, + { + "epoch": 0.5116595744680851, + "loss": 0.8545092940330505, + "loss_ce": 0.005449245683848858, + "loss_iou": 0.3671875, + "loss_num": 0.0230712890625, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 301623012, + "step": 4509 + }, + { + "epoch": 0.51177304964539, + "grad_norm": 32.52492141723633, + "learning_rate": 5e-05, + "loss": 1.3129, + "num_input_tokens_seen": 301690364, + "step": 4510 + }, + { + "epoch": 0.51177304964539, + "loss": 1.217864990234375, + "loss_ce": 0.008880573324859142, + "loss_iou": 0.53125, + "loss_num": 0.02880859375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 301690364, + "step": 4510 + }, + { + "epoch": 0.511886524822695, + "grad_norm": 53.17311096191406, + "learning_rate": 5e-05, + "loss": 1.2818, + "num_input_tokens_seen": 301758616, + "step": 4511 + }, + { + "epoch": 0.511886524822695, + "loss": 1.1942312717437744, + "loss_ce": 0.009172685444355011, + "loss_iou": 0.5078125, + "loss_num": 0.0341796875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 301758616, + "step": 4511 + }, + { + "epoch": 0.512, + "grad_norm": 29.10331916809082, + "learning_rate": 5e-05, + "loss": 1.2702, + "num_input_tokens_seen": 301825128, + "step": 4512 + }, + { + "epoch": 0.512, + "loss": 1.2209982872009277, + "loss_ce": 0.004201370291411877, + "loss_iou": 0.515625, + "loss_num": 0.03759765625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 301825128, + "step": 4512 + }, + { + "epoch": 0.512113475177305, + "grad_norm": 31.563140869140625, + "learning_rate": 5e-05, + "loss": 1.2272, + "num_input_tokens_seen": 301892260, + "step": 4513 + }, + { + "epoch": 0.512113475177305, + "loss": 1.2558543682098389, + "loss_ce": 0.00829570833593607, + "loss_iou": 0.453125, + "loss_num": 0.06884765625, + "loss_xval": 1.25, + "num_input_tokens_seen": 301892260, + "step": 4513 + }, + { + "epoch": 0.51222695035461, + "grad_norm": 21.09943199157715, + "learning_rate": 5e-05, + "loss": 1.1129, + "num_input_tokens_seen": 301959124, + "step": 4514 + }, + { + "epoch": 0.51222695035461, + "loss": 1.0511417388916016, + "loss_ce": 0.005731626879423857, + "loss_iou": 0.462890625, + "loss_num": 0.023681640625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 301959124, + "step": 4514 + }, + { + "epoch": 0.512340425531915, + "grad_norm": 27.873741149902344, + "learning_rate": 5e-05, + "loss": 1.0823, + "num_input_tokens_seen": 302025600, + "step": 4515 + }, + { + "epoch": 0.512340425531915, + "loss": 1.0324435234069824, + "loss_ce": 0.006564573850482702, + "loss_iou": 0.447265625, + "loss_num": 0.0262451171875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 302025600, + "step": 4515 + }, + { + "epoch": 0.5124539007092198, + "grad_norm": 24.502727508544922, + "learning_rate": 5e-05, + "loss": 1.4454, + "num_input_tokens_seen": 302093292, + "step": 4516 + }, + { + "epoch": 0.5124539007092198, + "loss": 1.468581199645996, + "loss_ce": 0.005690473131835461, + "loss_iou": 0.61328125, + "loss_num": 0.046630859375, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 302093292, + "step": 4516 + }, + { + "epoch": 0.5125673758865248, + "grad_norm": 417.18072509765625, + "learning_rate": 5e-05, + "loss": 1.1446, + "num_input_tokens_seen": 302159984, + "step": 4517 + }, + { + "epoch": 0.5125673758865248, + "loss": 1.2471158504486084, + "loss_ce": 0.002975250594317913, + "loss_iou": 0.5390625, + "loss_num": 0.03369140625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 302159984, + "step": 4517 + }, + { + "epoch": 0.5126808510638298, + "grad_norm": 257.6095886230469, + "learning_rate": 5e-05, + "loss": 1.1762, + "num_input_tokens_seen": 302227016, + "step": 4518 + }, + { + "epoch": 0.5126808510638298, + "loss": 1.1531105041503906, + "loss_ce": 0.0061379000544548035, + "loss_iou": 0.458984375, + "loss_num": 0.0458984375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 302227016, + "step": 4518 + }, + { + "epoch": 0.5127943262411347, + "grad_norm": 30.650711059570312, + "learning_rate": 5e-05, + "loss": 1.096, + "num_input_tokens_seen": 302293932, + "step": 4519 + }, + { + "epoch": 0.5127943262411347, + "loss": 0.9504849910736084, + "loss_ce": 0.004195898771286011, + "loss_iou": 0.43359375, + "loss_num": 0.01611328125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 302293932, + "step": 4519 + }, + { + "epoch": 0.5129078014184397, + "grad_norm": 37.781620025634766, + "learning_rate": 5e-05, + "loss": 1.4792, + "num_input_tokens_seen": 302360908, + "step": 4520 + }, + { + "epoch": 0.5129078014184397, + "loss": 1.3770197629928589, + "loss_ce": 0.006902580615133047, + "loss_iou": 0.58203125, + "loss_num": 0.040771484375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 302360908, + "step": 4520 + }, + { + "epoch": 0.5130212765957447, + "grad_norm": 28.535751342773438, + "learning_rate": 5e-05, + "loss": 1.1436, + "num_input_tokens_seen": 302427764, + "step": 4521 + }, + { + "epoch": 0.5130212765957447, + "loss": 1.1493628025054932, + "loss_ce": 0.012155888602137566, + "loss_iou": 0.484375, + "loss_num": 0.033935546875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 302427764, + "step": 4521 + }, + { + "epoch": 0.5131347517730497, + "grad_norm": 25.00425148010254, + "learning_rate": 5e-05, + "loss": 1.3339, + "num_input_tokens_seen": 302494872, + "step": 4522 + }, + { + "epoch": 0.5131347517730497, + "loss": 1.3723294734954834, + "loss_ce": 0.005386108532547951, + "loss_iou": 0.55078125, + "loss_num": 0.052734375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 302494872, + "step": 4522 + }, + { + "epoch": 0.5132482269503547, + "grad_norm": 19.592519760131836, + "learning_rate": 5e-05, + "loss": 1.0834, + "num_input_tokens_seen": 302562204, + "step": 4523 + }, + { + "epoch": 0.5132482269503547, + "loss": 1.2224422693252563, + "loss_ce": 0.007598524913191795, + "loss_iou": 0.52734375, + "loss_num": 0.031982421875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 302562204, + "step": 4523 + }, + { + "epoch": 0.5133617021276595, + "grad_norm": 40.68189239501953, + "learning_rate": 5e-05, + "loss": 1.4114, + "num_input_tokens_seen": 302629612, + "step": 4524 + }, + { + "epoch": 0.5133617021276595, + "loss": 1.348994255065918, + "loss_ce": 0.0032910965383052826, + "loss_iou": 0.578125, + "loss_num": 0.03857421875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 302629612, + "step": 4524 + }, + { + "epoch": 0.5134751773049645, + "grad_norm": 35.77262496948242, + "learning_rate": 5e-05, + "loss": 1.2683, + "num_input_tokens_seen": 302697200, + "step": 4525 + }, + { + "epoch": 0.5134751773049645, + "loss": 1.2582427263259888, + "loss_ce": 0.006289633922278881, + "loss_iou": 0.5078125, + "loss_num": 0.04638671875, + "loss_xval": 1.25, + "num_input_tokens_seen": 302697200, + "step": 4525 + }, + { + "epoch": 0.5135886524822695, + "grad_norm": 23.492565155029297, + "learning_rate": 5e-05, + "loss": 1.2293, + "num_input_tokens_seen": 302763484, + "step": 4526 + }, + { + "epoch": 0.5135886524822695, + "loss": 1.11588716506958, + "loss_ce": 0.0026058880612254143, + "loss_iou": 0.453125, + "loss_num": 0.041748046875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 302763484, + "step": 4526 + }, + { + "epoch": 0.5137021276595745, + "grad_norm": 71.07178497314453, + "learning_rate": 5e-05, + "loss": 1.2164, + "num_input_tokens_seen": 302829348, + "step": 4527 + }, + { + "epoch": 0.5137021276595745, + "loss": 1.1622523069381714, + "loss_ce": 0.0050256941467523575, + "loss_iou": 0.486328125, + "loss_num": 0.036376953125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 302829348, + "step": 4527 + }, + { + "epoch": 0.5138156028368794, + "grad_norm": 27.76082420349121, + "learning_rate": 5e-05, + "loss": 1.0553, + "num_input_tokens_seen": 302895108, + "step": 4528 + }, + { + "epoch": 0.5138156028368794, + "loss": 1.0004668235778809, + "loss_ce": 0.004861342720687389, + "loss_iou": 0.40625, + "loss_num": 0.037109375, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 302895108, + "step": 4528 + }, + { + "epoch": 0.5139290780141844, + "grad_norm": 36.4941520690918, + "learning_rate": 5e-05, + "loss": 0.9908, + "num_input_tokens_seen": 302961824, + "step": 4529 + }, + { + "epoch": 0.5139290780141844, + "loss": 1.045936107635498, + "loss_ce": 0.00736184511333704, + "loss_iou": 0.451171875, + "loss_num": 0.02734375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 302961824, + "step": 4529 + }, + { + "epoch": 0.5140425531914894, + "grad_norm": 28.78630256652832, + "learning_rate": 5e-05, + "loss": 1.1826, + "num_input_tokens_seen": 303028660, + "step": 4530 + }, + { + "epoch": 0.5140425531914894, + "loss": 1.2246472835540771, + "loss_ce": 0.005409048870205879, + "loss_iou": 0.52734375, + "loss_num": 0.033203125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 303028660, + "step": 4530 + }, + { + "epoch": 0.5141560283687944, + "grad_norm": 79.7751693725586, + "learning_rate": 5e-05, + "loss": 1.1616, + "num_input_tokens_seen": 303095256, + "step": 4531 + }, + { + "epoch": 0.5141560283687944, + "loss": 1.0677008628845215, + "loss_ce": 0.007642312441021204, + "loss_iou": 0.404296875, + "loss_num": 0.051025390625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 303095256, + "step": 4531 + }, + { + "epoch": 0.5142695035460992, + "grad_norm": 19.95189666748047, + "learning_rate": 5e-05, + "loss": 1.1675, + "num_input_tokens_seen": 303162560, + "step": 4532 + }, + { + "epoch": 0.5142695035460992, + "loss": 1.122222661972046, + "loss_ce": 0.024566415697336197, + "loss_iou": 0.4609375, + "loss_num": 0.03515625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 303162560, + "step": 4532 + }, + { + "epoch": 0.5143829787234042, + "grad_norm": 30.32969093322754, + "learning_rate": 5e-05, + "loss": 1.1354, + "num_input_tokens_seen": 303229268, + "step": 4533 + }, + { + "epoch": 0.5143829787234042, + "loss": 1.015805959701538, + "loss_ce": 0.004422853700816631, + "loss_iou": 0.42578125, + "loss_num": 0.03173828125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 303229268, + "step": 4533 + }, + { + "epoch": 0.5144964539007092, + "grad_norm": 26.702054977416992, + "learning_rate": 5e-05, + "loss": 1.2133, + "num_input_tokens_seen": 303296724, + "step": 4534 + }, + { + "epoch": 0.5144964539007092, + "loss": 1.21680748462677, + "loss_ce": 0.006846575066447258, + "loss_iou": 0.46484375, + "loss_num": 0.055908203125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 303296724, + "step": 4534 + }, + { + "epoch": 0.5146099290780142, + "grad_norm": 18.346681594848633, + "learning_rate": 5e-05, + "loss": 1.2651, + "num_input_tokens_seen": 303363844, + "step": 4535 + }, + { + "epoch": 0.5146099290780142, + "loss": 1.1066009998321533, + "loss_ce": 0.004550201818346977, + "loss_iou": 0.423828125, + "loss_num": 0.05078125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 303363844, + "step": 4535 + }, + { + "epoch": 0.5147234042553192, + "grad_norm": 24.152101516723633, + "learning_rate": 5e-05, + "loss": 0.9638, + "num_input_tokens_seen": 303430388, + "step": 4536 + }, + { + "epoch": 0.5147234042553192, + "loss": 1.0441814661026, + "loss_ce": 0.00756043242290616, + "loss_iou": 0.41796875, + "loss_num": 0.0400390625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 303430388, + "step": 4536 + }, + { + "epoch": 0.5148368794326241, + "grad_norm": 33.82258224487305, + "learning_rate": 5e-05, + "loss": 1.2331, + "num_input_tokens_seen": 303498264, + "step": 4537 + }, + { + "epoch": 0.5148368794326241, + "loss": 1.2292752265930176, + "loss_ce": 0.00466584088280797, + "loss_iou": 0.52734375, + "loss_num": 0.033447265625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 303498264, + "step": 4537 + }, + { + "epoch": 0.5149503546099291, + "grad_norm": 33.3572883605957, + "learning_rate": 5e-05, + "loss": 1.3963, + "num_input_tokens_seen": 303565204, + "step": 4538 + }, + { + "epoch": 0.5149503546099291, + "loss": 1.4674434661865234, + "loss_ce": 0.011388799175620079, + "loss_iou": 0.58203125, + "loss_num": 0.058349609375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 303565204, + "step": 4538 + }, + { + "epoch": 0.5150638297872341, + "grad_norm": 9.952211380004883, + "learning_rate": 5e-05, + "loss": 1.1323, + "num_input_tokens_seen": 303632316, + "step": 4539 + }, + { + "epoch": 0.5150638297872341, + "loss": 1.1563172340393066, + "loss_ce": 0.005438307300209999, + "loss_iou": 0.4453125, + "loss_num": 0.052001953125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 303632316, + "step": 4539 + }, + { + "epoch": 0.5151773049645391, + "grad_norm": 37.99178695678711, + "learning_rate": 5e-05, + "loss": 0.9697, + "num_input_tokens_seen": 303699804, + "step": 4540 + }, + { + "epoch": 0.5151773049645391, + "loss": 0.9682669043540955, + "loss_ce": 0.004155617207288742, + "loss_iou": 0.3984375, + "loss_num": 0.033203125, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 303699804, + "step": 4540 + }, + { + "epoch": 0.5152907801418439, + "grad_norm": 28.603816986083984, + "learning_rate": 5e-05, + "loss": 1.2203, + "num_input_tokens_seen": 303767280, + "step": 4541 + }, + { + "epoch": 0.5152907801418439, + "loss": 1.1867631673812866, + "loss_ce": 0.0065873488783836365, + "loss_iou": 0.48046875, + "loss_num": 0.0439453125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 303767280, + "step": 4541 + }, + { + "epoch": 0.5154042553191489, + "grad_norm": 51.87882995605469, + "learning_rate": 5e-05, + "loss": 1.3553, + "num_input_tokens_seen": 303833700, + "step": 4542 + }, + { + "epoch": 0.5154042553191489, + "loss": 1.2577714920043945, + "loss_ce": 0.0028886969666928053, + "loss_iou": 0.578125, + "loss_num": 0.018798828125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 303833700, + "step": 4542 + }, + { + "epoch": 0.5155177304964539, + "grad_norm": 28.635711669921875, + "learning_rate": 5e-05, + "loss": 1.2607, + "num_input_tokens_seen": 303900772, + "step": 4543 + }, + { + "epoch": 0.5155177304964539, + "loss": 1.2557127475738525, + "loss_ce": 0.003759569488465786, + "loss_iou": 0.5078125, + "loss_num": 0.046142578125, + "loss_xval": 1.25, + "num_input_tokens_seen": 303900772, + "step": 4543 + }, + { + "epoch": 0.5156312056737589, + "grad_norm": 25.174022674560547, + "learning_rate": 5e-05, + "loss": 1.3353, + "num_input_tokens_seen": 303966920, + "step": 4544 + }, + { + "epoch": 0.5156312056737589, + "loss": 1.3741981983184814, + "loss_ce": 0.006034141406416893, + "loss_iou": 0.5390625, + "loss_num": 0.058349609375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 303966920, + "step": 4544 + }, + { + "epoch": 0.5157446808510638, + "grad_norm": 20.437355041503906, + "learning_rate": 5e-05, + "loss": 1.141, + "num_input_tokens_seen": 304033636, + "step": 4545 + }, + { + "epoch": 0.5157446808510638, + "loss": 1.0151724815368652, + "loss_ce": 0.0063834479078650475, + "loss_iou": 0.4296875, + "loss_num": 0.0296630859375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 304033636, + "step": 4545 + }, + { + "epoch": 0.5158581560283688, + "grad_norm": 16.333168029785156, + "learning_rate": 5e-05, + "loss": 1.1512, + "num_input_tokens_seen": 304101276, + "step": 4546 + }, + { + "epoch": 0.5158581560283688, + "loss": 1.1641788482666016, + "loss_ce": 0.004999133758246899, + "loss_iou": 0.462890625, + "loss_num": 0.046875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 304101276, + "step": 4546 + }, + { + "epoch": 0.5159716312056738, + "grad_norm": 20.737058639526367, + "learning_rate": 5e-05, + "loss": 0.8735, + "num_input_tokens_seen": 304167840, + "step": 4547 + }, + { + "epoch": 0.5159716312056738, + "loss": 0.874687910079956, + "loss_ce": 0.00771409273147583, + "loss_iou": 0.337890625, + "loss_num": 0.038330078125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 304167840, + "step": 4547 + }, + { + "epoch": 0.5160851063829788, + "grad_norm": 52.60915756225586, + "learning_rate": 5e-05, + "loss": 1.1368, + "num_input_tokens_seen": 304234556, + "step": 4548 + }, + { + "epoch": 0.5160851063829788, + "loss": 1.0296554565429688, + "loss_ce": 0.0026168711483478546, + "loss_iou": 0.4296875, + "loss_num": 0.03369140625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 304234556, + "step": 4548 + }, + { + "epoch": 0.5161985815602836, + "grad_norm": 21.331132888793945, + "learning_rate": 5e-05, + "loss": 1.1041, + "num_input_tokens_seen": 304300944, + "step": 4549 + }, + { + "epoch": 0.5161985815602836, + "loss": 1.0829524993896484, + "loss_ce": 0.004400178790092468, + "loss_iou": 0.421875, + "loss_num": 0.04736328125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 304300944, + "step": 4549 + }, + { + "epoch": 0.5163120567375886, + "grad_norm": 13.25970458984375, + "learning_rate": 5e-05, + "loss": 1.0911, + "num_input_tokens_seen": 304367520, + "step": 4550 + }, + { + "epoch": 0.5163120567375886, + "loss": 1.025599479675293, + "loss_ce": 0.006190367043018341, + "loss_iou": 0.384765625, + "loss_num": 0.0498046875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 304367520, + "step": 4550 + }, + { + "epoch": 0.5164255319148936, + "grad_norm": 17.7651309967041, + "learning_rate": 5e-05, + "loss": 0.9534, + "num_input_tokens_seen": 304434116, + "step": 4551 + }, + { + "epoch": 0.5164255319148936, + "loss": 0.9896246194839478, + "loss_ce": 0.008667618036270142, + "loss_iou": 0.4140625, + "loss_num": 0.03076171875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 304434116, + "step": 4551 + }, + { + "epoch": 0.5165390070921986, + "grad_norm": 47.133487701416016, + "learning_rate": 5e-05, + "loss": 1.1648, + "num_input_tokens_seen": 304500816, + "step": 4552 + }, + { + "epoch": 0.5165390070921986, + "loss": 1.0544211864471436, + "loss_ce": 0.004616554360836744, + "loss_iou": 0.443359375, + "loss_num": 0.03271484375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 304500816, + "step": 4552 + }, + { + "epoch": 0.5166524822695036, + "grad_norm": 33.1954231262207, + "learning_rate": 5e-05, + "loss": 1.1008, + "num_input_tokens_seen": 304567184, + "step": 4553 + }, + { + "epoch": 0.5166524822695036, + "loss": 0.9979422092437744, + "loss_ce": 0.00709750410169363, + "loss_iou": 0.404296875, + "loss_num": 0.036376953125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 304567184, + "step": 4553 + }, + { + "epoch": 0.5167659574468085, + "grad_norm": 33.76845169067383, + "learning_rate": 5e-05, + "loss": 1.4781, + "num_input_tokens_seen": 304634204, + "step": 4554 + }, + { + "epoch": 0.5167659574468085, + "loss": 1.387836217880249, + "loss_ce": 0.003070555627346039, + "loss_iou": 0.6015625, + "loss_num": 0.035888671875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 304634204, + "step": 4554 + }, + { + "epoch": 0.5168794326241135, + "grad_norm": 40.75388717651367, + "learning_rate": 5e-05, + "loss": 1.1649, + "num_input_tokens_seen": 304700904, + "step": 4555 + }, + { + "epoch": 0.5168794326241135, + "loss": 1.3193442821502686, + "loss_ce": 0.008309251628816128, + "loss_iou": 0.49609375, + "loss_num": 0.06396484375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 304700904, + "step": 4555 + }, + { + "epoch": 0.5169929078014185, + "grad_norm": 30.006725311279297, + "learning_rate": 5e-05, + "loss": 1.1904, + "num_input_tokens_seen": 304767176, + "step": 4556 + }, + { + "epoch": 0.5169929078014185, + "loss": 1.1772382259368896, + "loss_ce": 0.00389834214001894, + "loss_iou": 0.5078125, + "loss_num": 0.031982421875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 304767176, + "step": 4556 + }, + { + "epoch": 0.5171063829787234, + "grad_norm": 47.99741744995117, + "learning_rate": 5e-05, + "loss": 1.4768, + "num_input_tokens_seen": 304832964, + "step": 4557 + }, + { + "epoch": 0.5171063829787234, + "loss": 1.6613194942474365, + "loss_ce": 0.007022627629339695, + "loss_iou": 0.625, + "loss_num": 0.08056640625, + "loss_xval": 1.65625, + "num_input_tokens_seen": 304832964, + "step": 4557 + }, + { + "epoch": 0.5172198581560283, + "grad_norm": 23.91900062561035, + "learning_rate": 5e-05, + "loss": 1.5619, + "num_input_tokens_seen": 304899356, + "step": 4558 + }, + { + "epoch": 0.5172198581560283, + "loss": 1.4556055068969727, + "loss_ce": 0.0063866376876831055, + "loss_iou": 0.625, + "loss_num": 0.03955078125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 304899356, + "step": 4558 + }, + { + "epoch": 0.5173333333333333, + "grad_norm": 19.10962677001953, + "learning_rate": 5e-05, + "loss": 1.2411, + "num_input_tokens_seen": 304966060, + "step": 4559 + }, + { + "epoch": 0.5173333333333333, + "loss": 1.2068980932235718, + "loss_ce": 0.0038952073082327843, + "loss_iou": 0.50390625, + "loss_num": 0.039306640625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 304966060, + "step": 4559 + }, + { + "epoch": 0.5174468085106383, + "grad_norm": 28.0472469329834, + "learning_rate": 5e-05, + "loss": 1.1607, + "num_input_tokens_seen": 305033960, + "step": 4560 + }, + { + "epoch": 0.5174468085106383, + "loss": 1.1634488105773926, + "loss_ce": 0.00475729163736105, + "loss_iou": 0.46875, + "loss_num": 0.044189453125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 305033960, + "step": 4560 + }, + { + "epoch": 0.5175602836879433, + "grad_norm": 32.86800765991211, + "learning_rate": 5e-05, + "loss": 1.1902, + "num_input_tokens_seen": 305101120, + "step": 4561 + }, + { + "epoch": 0.5175602836879433, + "loss": 1.2391948699951172, + "loss_ce": 0.0018900784198194742, + "loss_iou": 0.53125, + "loss_num": 0.034912109375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 305101120, + "step": 4561 + }, + { + "epoch": 0.5176737588652482, + "grad_norm": 22.717954635620117, + "learning_rate": 5e-05, + "loss": 1.1834, + "num_input_tokens_seen": 305168704, + "step": 4562 + }, + { + "epoch": 0.5176737588652482, + "loss": 1.15621018409729, + "loss_ce": 0.0048430925235152245, + "loss_iou": 0.470703125, + "loss_num": 0.041748046875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 305168704, + "step": 4562 + }, + { + "epoch": 0.5177872340425532, + "grad_norm": 19.40526008605957, + "learning_rate": 5e-05, + "loss": 1.0752, + "num_input_tokens_seen": 305235888, + "step": 4563 + }, + { + "epoch": 0.5177872340425532, + "loss": 0.9928811192512512, + "loss_ce": 0.006675031967461109, + "loss_iou": 0.396484375, + "loss_num": 0.03857421875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 305235888, + "step": 4563 + }, + { + "epoch": 0.5179007092198582, + "grad_norm": 21.018840789794922, + "learning_rate": 5e-05, + "loss": 1.0623, + "num_input_tokens_seen": 305302388, + "step": 4564 + }, + { + "epoch": 0.5179007092198582, + "loss": 1.333016276359558, + "loss_ce": 0.008797571063041687, + "loss_iou": 0.53125, + "loss_num": 0.052734375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 305302388, + "step": 4564 + }, + { + "epoch": 0.5180141843971631, + "grad_norm": 18.079431533813477, + "learning_rate": 5e-05, + "loss": 1.1, + "num_input_tokens_seen": 305369592, + "step": 4565 + }, + { + "epoch": 0.5180141843971631, + "loss": 0.985388457775116, + "loss_ce": 0.003317509312182665, + "loss_iou": 0.41015625, + "loss_num": 0.032470703125, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 305369592, + "step": 4565 + }, + { + "epoch": 0.518127659574468, + "grad_norm": 26.816741943359375, + "learning_rate": 5e-05, + "loss": 1.1405, + "num_input_tokens_seen": 305436796, + "step": 4566 + }, + { + "epoch": 0.518127659574468, + "loss": 1.2571860551834106, + "loss_ce": 0.004744610749185085, + "loss_iou": 0.5, + "loss_num": 0.0498046875, + "loss_xval": 1.25, + "num_input_tokens_seen": 305436796, + "step": 4566 + }, + { + "epoch": 0.518241134751773, + "grad_norm": 25.83938217163086, + "learning_rate": 5e-05, + "loss": 1.3722, + "num_input_tokens_seen": 305503648, + "step": 4567 + }, + { + "epoch": 0.518241134751773, + "loss": 1.4442133903503418, + "loss_ce": 0.005736848339438438, + "loss_iou": 0.57421875, + "loss_num": 0.05810546875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 305503648, + "step": 4567 + }, + { + "epoch": 0.518354609929078, + "grad_norm": 28.659757614135742, + "learning_rate": 5e-05, + "loss": 1.2278, + "num_input_tokens_seen": 305569904, + "step": 4568 + }, + { + "epoch": 0.518354609929078, + "loss": 1.1790941953659058, + "loss_ce": 0.004289525561034679, + "loss_iou": 0.48046875, + "loss_num": 0.043212890625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 305569904, + "step": 4568 + }, + { + "epoch": 0.518468085106383, + "grad_norm": 33.734169006347656, + "learning_rate": 5e-05, + "loss": 1.204, + "num_input_tokens_seen": 305636792, + "step": 4569 + }, + { + "epoch": 0.518468085106383, + "loss": 1.0647497177124023, + "loss_ce": 0.005423625931143761, + "loss_iou": 0.419921875, + "loss_num": 0.0439453125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 305636792, + "step": 4569 + }, + { + "epoch": 0.518581560283688, + "grad_norm": 13.70258617401123, + "learning_rate": 5e-05, + "loss": 0.9616, + "num_input_tokens_seen": 305703232, + "step": 4570 + }, + { + "epoch": 0.518581560283688, + "loss": 0.9825297594070435, + "loss_ce": 0.0045023756101727486, + "loss_iou": 0.4296875, + "loss_num": 0.0235595703125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 305703232, + "step": 4570 + }, + { + "epoch": 0.5186950354609929, + "grad_norm": 20.456087112426758, + "learning_rate": 5e-05, + "loss": 1.1029, + "num_input_tokens_seen": 305769296, + "step": 4571 + }, + { + "epoch": 0.5186950354609929, + "loss": 0.9799509644508362, + "loss_ce": 0.0024119459558278322, + "loss_iou": 0.41796875, + "loss_num": 0.0281982421875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 305769296, + "step": 4571 + }, + { + "epoch": 0.5188085106382979, + "grad_norm": 32.25375747680664, + "learning_rate": 5e-05, + "loss": 1.0638, + "num_input_tokens_seen": 305836164, + "step": 4572 + }, + { + "epoch": 0.5188085106382979, + "loss": 1.2132046222686768, + "loss_ce": 0.005196771118789911, + "loss_iou": 0.494140625, + "loss_num": 0.0439453125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 305836164, + "step": 4572 + }, + { + "epoch": 0.5189219858156028, + "grad_norm": 47.303531646728516, + "learning_rate": 5e-05, + "loss": 1.2447, + "num_input_tokens_seen": 305903488, + "step": 4573 + }, + { + "epoch": 0.5189219858156028, + "loss": 1.336391806602478, + "loss_ce": 0.009731641039252281, + "loss_iou": 0.55078125, + "loss_num": 0.04443359375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 305903488, + "step": 4573 + }, + { + "epoch": 0.5190354609929078, + "grad_norm": 18.712825775146484, + "learning_rate": 5e-05, + "loss": 1.2621, + "num_input_tokens_seen": 305970364, + "step": 4574 + }, + { + "epoch": 0.5190354609929078, + "loss": 1.3831809759140015, + "loss_ce": 0.012087236158549786, + "loss_iou": 0.50390625, + "loss_num": 0.072265625, + "loss_xval": 1.375, + "num_input_tokens_seen": 305970364, + "step": 4574 + }, + { + "epoch": 0.5191489361702127, + "grad_norm": 34.86029052734375, + "learning_rate": 5e-05, + "loss": 1.4021, + "num_input_tokens_seen": 306035800, + "step": 4575 + }, + { + "epoch": 0.5191489361702127, + "loss": 1.2229441404342651, + "loss_ce": 0.007123889401555061, + "loss_iou": 0.5078125, + "loss_num": 0.04052734375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 306035800, + "step": 4575 + }, + { + "epoch": 0.5192624113475177, + "grad_norm": 21.425809860229492, + "learning_rate": 5e-05, + "loss": 1.2377, + "num_input_tokens_seen": 306103620, + "step": 4576 + }, + { + "epoch": 0.5192624113475177, + "loss": 1.0616118907928467, + "loss_ce": 0.003994707949459553, + "loss_iou": 0.462890625, + "loss_num": 0.0263671875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 306103620, + "step": 4576 + }, + { + "epoch": 0.5193758865248227, + "grad_norm": 12.956791877746582, + "learning_rate": 5e-05, + "loss": 1.0964, + "num_input_tokens_seen": 306171280, + "step": 4577 + }, + { + "epoch": 0.5193758865248227, + "loss": 1.333408236503601, + "loss_ce": 0.005283229053020477, + "loss_iou": 0.5390625, + "loss_num": 0.05029296875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 306171280, + "step": 4577 + }, + { + "epoch": 0.5194893617021277, + "grad_norm": 16.537464141845703, + "learning_rate": 5e-05, + "loss": 1.0984, + "num_input_tokens_seen": 306238396, + "step": 4578 + }, + { + "epoch": 0.5194893617021277, + "loss": 1.2414885759353638, + "loss_ce": 0.005648727063089609, + "loss_iou": 0.5, + "loss_num": 0.04638671875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 306238396, + "step": 4578 + }, + { + "epoch": 0.5196028368794327, + "grad_norm": 32.65505599975586, + "learning_rate": 5e-05, + "loss": 0.9884, + "num_input_tokens_seen": 306305536, + "step": 4579 + }, + { + "epoch": 0.5196028368794327, + "loss": 0.9434462785720825, + "loss_ce": 0.004481427371501923, + "loss_iou": 0.416015625, + "loss_num": 0.021728515625, + "loss_xval": 0.9375, + "num_input_tokens_seen": 306305536, + "step": 4579 + }, + { + "epoch": 0.5197163120567376, + "grad_norm": 44.4818115234375, + "learning_rate": 5e-05, + "loss": 1.4501, + "num_input_tokens_seen": 306371980, + "step": 4580 + }, + { + "epoch": 0.5197163120567376, + "loss": 1.4359874725341797, + "loss_ce": 0.004346808418631554, + "loss_iou": 0.6171875, + "loss_num": 0.0400390625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 306371980, + "step": 4580 + }, + { + "epoch": 0.5198297872340426, + "grad_norm": 22.62458610534668, + "learning_rate": 5e-05, + "loss": 1.2457, + "num_input_tokens_seen": 306438264, + "step": 4581 + }, + { + "epoch": 0.5198297872340426, + "loss": 1.263188123703003, + "loss_ce": 0.004399009980261326, + "loss_iou": 0.5, + "loss_num": 0.051025390625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 306438264, + "step": 4581 + }, + { + "epoch": 0.5199432624113475, + "grad_norm": 28.085689544677734, + "learning_rate": 5e-05, + "loss": 1.159, + "num_input_tokens_seen": 306504552, + "step": 4582 + }, + { + "epoch": 0.5199432624113475, + "loss": 1.0996150970458984, + "loss_ce": 0.0024471203796565533, + "loss_iou": 0.4609375, + "loss_num": 0.034423828125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 306504552, + "step": 4582 + }, + { + "epoch": 0.5200567375886525, + "grad_norm": 34.39693832397461, + "learning_rate": 5e-05, + "loss": 1.4135, + "num_input_tokens_seen": 306571940, + "step": 4583 + }, + { + "epoch": 0.5200567375886525, + "loss": 1.363457441329956, + "loss_ce": 0.007500395178794861, + "loss_iou": 0.59765625, + "loss_num": 0.032470703125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 306571940, + "step": 4583 + }, + { + "epoch": 0.5201702127659574, + "grad_norm": 24.777084350585938, + "learning_rate": 5e-05, + "loss": 0.9507, + "num_input_tokens_seen": 306638944, + "step": 4584 + }, + { + "epoch": 0.5201702127659574, + "loss": 0.9775568246841431, + "loss_ce": 0.002962700091302395, + "loss_iou": 0.4140625, + "loss_num": 0.0289306640625, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 306638944, + "step": 4584 + }, + { + "epoch": 0.5202836879432624, + "grad_norm": 24.164731979370117, + "learning_rate": 5e-05, + "loss": 1.1492, + "num_input_tokens_seen": 306705988, + "step": 4585 + }, + { + "epoch": 0.5202836879432624, + "loss": 1.1393948793411255, + "loss_ce": 0.005605805199593306, + "loss_iou": 0.48828125, + "loss_num": 0.03173828125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 306705988, + "step": 4585 + }, + { + "epoch": 0.5203971631205674, + "grad_norm": 23.747026443481445, + "learning_rate": 5e-05, + "loss": 1.0574, + "num_input_tokens_seen": 306772340, + "step": 4586 + }, + { + "epoch": 0.5203971631205674, + "loss": 1.0322515964508057, + "loss_ce": 0.006372705101966858, + "loss_iou": 0.44140625, + "loss_num": 0.0284423828125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 306772340, + "step": 4586 + }, + { + "epoch": 0.5205106382978724, + "grad_norm": 34.17471694946289, + "learning_rate": 5e-05, + "loss": 1.232, + "num_input_tokens_seen": 306839276, + "step": 4587 + }, + { + "epoch": 0.5205106382978724, + "loss": 1.2403134107589722, + "loss_ce": 0.006426634266972542, + "loss_iou": 0.484375, + "loss_num": 0.052978515625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 306839276, + "step": 4587 + }, + { + "epoch": 0.5206241134751773, + "grad_norm": 28.6202449798584, + "learning_rate": 5e-05, + "loss": 1.3306, + "num_input_tokens_seen": 306906108, + "step": 4588 + }, + { + "epoch": 0.5206241134751773, + "loss": 1.2259286642074585, + "loss_ce": 0.006690387614071369, + "loss_iou": 0.515625, + "loss_num": 0.037841796875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 306906108, + "step": 4588 + }, + { + "epoch": 0.5207375886524823, + "grad_norm": 29.98627471923828, + "learning_rate": 5e-05, + "loss": 1.1565, + "num_input_tokens_seen": 306972188, + "step": 4589 + }, + { + "epoch": 0.5207375886524823, + "loss": 1.1862766742706299, + "loss_ce": 0.007321633398532867, + "loss_iou": 0.451171875, + "loss_num": 0.05517578125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 306972188, + "step": 4589 + }, + { + "epoch": 0.5208510638297872, + "grad_norm": 26.56409454345703, + "learning_rate": 5e-05, + "loss": 1.3786, + "num_input_tokens_seen": 307038908, + "step": 4590 + }, + { + "epoch": 0.5208510638297872, + "loss": 1.304532766342163, + "loss_ce": 0.009122622199356556, + "loss_iou": 0.51953125, + "loss_num": 0.051025390625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 307038908, + "step": 4590 + }, + { + "epoch": 0.5209645390070922, + "grad_norm": 27.5542049407959, + "learning_rate": 5e-05, + "loss": 1.2724, + "num_input_tokens_seen": 307105648, + "step": 4591 + }, + { + "epoch": 0.5209645390070922, + "loss": 1.2531371116638184, + "loss_ce": 0.005578479263931513, + "loss_iou": 0.5390625, + "loss_num": 0.033203125, + "loss_xval": 1.25, + "num_input_tokens_seen": 307105648, + "step": 4591 + }, + { + "epoch": 0.5210780141843971, + "grad_norm": 23.854795455932617, + "learning_rate": 5e-05, + "loss": 1.0951, + "num_input_tokens_seen": 307172508, + "step": 4592 + }, + { + "epoch": 0.5210780141843971, + "loss": 1.1429028511047363, + "loss_ce": 0.0032543609850108624, + "loss_iou": 0.4921875, + "loss_num": 0.031494140625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 307172508, + "step": 4592 + }, + { + "epoch": 0.5211914893617021, + "grad_norm": 69.82303619384766, + "learning_rate": 5e-05, + "loss": 1.2152, + "num_input_tokens_seen": 307239140, + "step": 4593 + }, + { + "epoch": 0.5211914893617021, + "loss": 1.2087278366088867, + "loss_ce": 0.008532541804015636, + "loss_iou": 0.490234375, + "loss_num": 0.043701171875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 307239140, + "step": 4593 + }, + { + "epoch": 0.5213049645390071, + "grad_norm": 29.504989624023438, + "learning_rate": 5e-05, + "loss": 1.1704, + "num_input_tokens_seen": 307306432, + "step": 4594 + }, + { + "epoch": 0.5213049645390071, + "loss": 1.111649513244629, + "loss_ce": 0.0032510426826775074, + "loss_iou": 0.486328125, + "loss_num": 0.0269775390625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 307306432, + "step": 4594 + }, + { + "epoch": 0.5214184397163121, + "grad_norm": 26.67452621459961, + "learning_rate": 5e-05, + "loss": 1.2154, + "num_input_tokens_seen": 307372888, + "step": 4595 + }, + { + "epoch": 0.5214184397163121, + "loss": 1.2423057556152344, + "loss_ce": 0.003536226926371455, + "loss_iou": 0.5234375, + "loss_num": 0.03857421875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 307372888, + "step": 4595 + }, + { + "epoch": 0.521531914893617, + "grad_norm": 29.51204490661621, + "learning_rate": 5e-05, + "loss": 1.1497, + "num_input_tokens_seen": 307440060, + "step": 4596 + }, + { + "epoch": 0.521531914893617, + "loss": 1.2093138694763184, + "loss_ce": 0.005700643174350262, + "loss_iou": 0.51171875, + "loss_num": 0.0361328125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 307440060, + "step": 4596 + }, + { + "epoch": 0.521645390070922, + "grad_norm": 38.73259735107422, + "learning_rate": 5e-05, + "loss": 1.2132, + "num_input_tokens_seen": 307506908, + "step": 4597 + }, + { + "epoch": 0.521645390070922, + "loss": 1.1657793521881104, + "loss_ce": 0.0061114197596907616, + "loss_iou": 0.51953125, + "loss_num": 0.024658203125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 307506908, + "step": 4597 + }, + { + "epoch": 0.5217588652482269, + "grad_norm": 25.660926818847656, + "learning_rate": 5e-05, + "loss": 1.4903, + "num_input_tokens_seen": 307574756, + "step": 4598 + }, + { + "epoch": 0.5217588652482269, + "loss": 1.5995006561279297, + "loss_ce": 0.006727295927703381, + "loss_iou": 0.6484375, + "loss_num": 0.05810546875, + "loss_xval": 1.59375, + "num_input_tokens_seen": 307574756, + "step": 4598 + }, + { + "epoch": 0.5218723404255319, + "grad_norm": 9.657840728759766, + "learning_rate": 5e-05, + "loss": 0.9626, + "num_input_tokens_seen": 307641976, + "step": 4599 + }, + { + "epoch": 0.5218723404255319, + "loss": 1.0201795101165771, + "loss_ce": 0.00553105678409338, + "loss_iou": 0.42578125, + "loss_num": 0.03271484375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 307641976, + "step": 4599 + }, + { + "epoch": 0.5219858156028369, + "grad_norm": 17.360013961791992, + "learning_rate": 5e-05, + "loss": 1.1898, + "num_input_tokens_seen": 307708632, + "step": 4600 + }, + { + "epoch": 0.5219858156028369, + "loss": 1.2830827236175537, + "loss_ce": 0.0032975655049085617, + "loss_iou": 0.5078125, + "loss_num": 0.05224609375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 307708632, + "step": 4600 + }, + { + "epoch": 0.5220992907801418, + "grad_norm": 23.962121963500977, + "learning_rate": 5e-05, + "loss": 0.9784, + "num_input_tokens_seen": 307774536, + "step": 4601 + }, + { + "epoch": 0.5220992907801418, + "loss": 0.8851863145828247, + "loss_ce": 0.0026180099230259657, + "loss_iou": 0.3984375, + "loss_num": 0.0172119140625, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 307774536, + "step": 4601 + }, + { + "epoch": 0.5222127659574468, + "grad_norm": 34.84163284301758, + "learning_rate": 5e-05, + "loss": 0.9583, + "num_input_tokens_seen": 307840612, + "step": 4602 + }, + { + "epoch": 0.5222127659574468, + "loss": 0.8144572973251343, + "loss_ce": 0.010258042253553867, + "loss_iou": 0.271484375, + "loss_num": 0.052001953125, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 307840612, + "step": 4602 + }, + { + "epoch": 0.5223262411347518, + "grad_norm": 18.345413208007812, + "learning_rate": 5e-05, + "loss": 1.1699, + "num_input_tokens_seen": 307906280, + "step": 4603 + }, + { + "epoch": 0.5223262411347518, + "loss": 1.2197201251983643, + "loss_ce": 0.00927088875323534, + "loss_iou": 0.43359375, + "loss_num": 0.06884765625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 307906280, + "step": 4603 + }, + { + "epoch": 0.5224397163120568, + "grad_norm": 31.089601516723633, + "learning_rate": 5e-05, + "loss": 1.227, + "num_input_tokens_seen": 307973928, + "step": 4604 + }, + { + "epoch": 0.5224397163120568, + "loss": 1.2487576007843018, + "loss_ce": 0.005593456327915192, + "loss_iou": 0.54296875, + "loss_num": 0.03173828125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 307973928, + "step": 4604 + }, + { + "epoch": 0.5225531914893617, + "grad_norm": 33.052833557128906, + "learning_rate": 5e-05, + "loss": 1.3484, + "num_input_tokens_seen": 308040268, + "step": 4605 + }, + { + "epoch": 0.5225531914893617, + "loss": 1.3776147365570068, + "loss_ce": 0.007497497368603945, + "loss_iou": 0.5703125, + "loss_num": 0.0458984375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 308040268, + "step": 4605 + }, + { + "epoch": 0.5226666666666666, + "grad_norm": 25.595876693725586, + "learning_rate": 5e-05, + "loss": 0.9844, + "num_input_tokens_seen": 308106492, + "step": 4606 + }, + { + "epoch": 0.5226666666666666, + "loss": 0.9439400434494019, + "loss_ce": 0.005707684904336929, + "loss_iou": 0.423828125, + "loss_num": 0.0181884765625, + "loss_xval": 0.9375, + "num_input_tokens_seen": 308106492, + "step": 4606 + }, + { + "epoch": 0.5227801418439716, + "grad_norm": 27.469707489013672, + "learning_rate": 5e-05, + "loss": 1.4563, + "num_input_tokens_seen": 308173660, + "step": 4607 + }, + { + "epoch": 0.5227801418439716, + "loss": 1.3955368995666504, + "loss_ce": 0.00393537525087595, + "loss_iou": 0.578125, + "loss_num": 0.046875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 308173660, + "step": 4607 + }, + { + "epoch": 0.5228936170212766, + "grad_norm": 27.92701530456543, + "learning_rate": 5e-05, + "loss": 1.1157, + "num_input_tokens_seen": 308240260, + "step": 4608 + }, + { + "epoch": 0.5228936170212766, + "loss": 1.0781469345092773, + "loss_ce": 0.004172337707132101, + "loss_iou": 0.41796875, + "loss_num": 0.0478515625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 308240260, + "step": 4608 + }, + { + "epoch": 0.5230070921985815, + "grad_norm": 43.688663482666016, + "learning_rate": 5e-05, + "loss": 1.2022, + "num_input_tokens_seen": 308306776, + "step": 4609 + }, + { + "epoch": 0.5230070921985815, + "loss": 1.1177804470062256, + "loss_ce": 0.006452292203903198, + "loss_iou": 0.4609375, + "loss_num": 0.037841796875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 308306776, + "step": 4609 + }, + { + "epoch": 0.5231205673758865, + "grad_norm": 24.28206443786621, + "learning_rate": 5e-05, + "loss": 1.1951, + "num_input_tokens_seen": 308373056, + "step": 4610 + }, + { + "epoch": 0.5231205673758865, + "loss": 1.245072841644287, + "loss_ce": 0.004350153263658285, + "loss_iou": 0.53125, + "loss_num": 0.03662109375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 308373056, + "step": 4610 + }, + { + "epoch": 0.5232340425531915, + "grad_norm": 26.073776245117188, + "learning_rate": 5e-05, + "loss": 1.3283, + "num_input_tokens_seen": 308439152, + "step": 4611 + }, + { + "epoch": 0.5232340425531915, + "loss": 1.3661179542541504, + "loss_ce": 0.0052781058475375175, + "loss_iou": 0.5546875, + "loss_num": 0.050537109375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 308439152, + "step": 4611 + }, + { + "epoch": 0.5233475177304965, + "grad_norm": 20.24939727783203, + "learning_rate": 5e-05, + "loss": 1.1145, + "num_input_tokens_seen": 308505936, + "step": 4612 + }, + { + "epoch": 0.5233475177304965, + "loss": 1.0900617837905884, + "loss_ce": 0.008274600841104984, + "loss_iou": 0.453125, + "loss_num": 0.03564453125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 308505936, + "step": 4612 + }, + { + "epoch": 0.5234609929078015, + "grad_norm": 24.187259674072266, + "learning_rate": 5e-05, + "loss": 1.1974, + "num_input_tokens_seen": 308572492, + "step": 4613 + }, + { + "epoch": 0.5234609929078015, + "loss": 1.230918288230896, + "loss_ce": 0.010703404434025288, + "loss_iou": 0.5234375, + "loss_num": 0.03515625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 308572492, + "step": 4613 + }, + { + "epoch": 0.5235744680851064, + "grad_norm": 38.65528869628906, + "learning_rate": 5e-05, + "loss": 1.0651, + "num_input_tokens_seen": 308638260, + "step": 4614 + }, + { + "epoch": 0.5235744680851064, + "loss": 1.1839951276779175, + "loss_ce": 0.0050400132313370705, + "loss_iou": 0.451171875, + "loss_num": 0.05517578125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 308638260, + "step": 4614 + }, + { + "epoch": 0.5236879432624113, + "grad_norm": 31.377708435058594, + "learning_rate": 5e-05, + "loss": 1.2116, + "num_input_tokens_seen": 308705124, + "step": 4615 + }, + { + "epoch": 0.5236879432624113, + "loss": 1.3585504293441772, + "loss_ce": 0.008941052481532097, + "loss_iou": 0.55859375, + "loss_num": 0.0458984375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 308705124, + "step": 4615 + }, + { + "epoch": 0.5238014184397163, + "grad_norm": 38.456146240234375, + "learning_rate": 5e-05, + "loss": 1.1894, + "num_input_tokens_seen": 308771816, + "step": 4616 + }, + { + "epoch": 0.5238014184397163, + "loss": 1.1061766147613525, + "loss_ce": 0.0055905906483531, + "loss_iou": 0.451171875, + "loss_num": 0.03955078125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 308771816, + "step": 4616 + }, + { + "epoch": 0.5239148936170213, + "grad_norm": 26.372650146484375, + "learning_rate": 5e-05, + "loss": 1.2814, + "num_input_tokens_seen": 308838380, + "step": 4617 + }, + { + "epoch": 0.5239148936170213, + "loss": 1.1385477781295776, + "loss_ce": 0.004026304930448532, + "loss_iou": 0.451171875, + "loss_num": 0.046142578125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 308838380, + "step": 4617 + }, + { + "epoch": 0.5240283687943262, + "grad_norm": 16.588844299316406, + "learning_rate": 5e-05, + "loss": 1.3015, + "num_input_tokens_seen": 308904768, + "step": 4618 + }, + { + "epoch": 0.5240283687943262, + "loss": 1.4672940969467163, + "loss_ce": 0.008309787139296532, + "loss_iou": 0.56640625, + "loss_num": 0.064453125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 308904768, + "step": 4618 + }, + { + "epoch": 0.5241418439716312, + "grad_norm": 24.631912231445312, + "learning_rate": 5e-05, + "loss": 1.2842, + "num_input_tokens_seen": 308972548, + "step": 4619 + }, + { + "epoch": 0.5241418439716312, + "loss": 1.3093035221099854, + "loss_ce": 0.004127768334001303, + "loss_iou": 0.515625, + "loss_num": 0.05419921875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 308972548, + "step": 4619 + }, + { + "epoch": 0.5242553191489362, + "grad_norm": 24.947078704833984, + "learning_rate": 5e-05, + "loss": 1.3788, + "num_input_tokens_seen": 309039252, + "step": 4620 + }, + { + "epoch": 0.5242553191489362, + "loss": 1.3966678380966187, + "loss_ce": 0.005066236946731806, + "loss_iou": 0.53515625, + "loss_num": 0.0634765625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 309039252, + "step": 4620 + }, + { + "epoch": 0.5243687943262412, + "grad_norm": 39.063209533691406, + "learning_rate": 5e-05, + "loss": 1.146, + "num_input_tokens_seen": 309106236, + "step": 4621 + }, + { + "epoch": 0.5243687943262412, + "loss": 0.9739499092102051, + "loss_ce": 0.0047116451896727085, + "loss_iou": 0.423828125, + "loss_num": 0.024169921875, + "loss_xval": 0.96875, + "num_input_tokens_seen": 309106236, + "step": 4621 + }, + { + "epoch": 0.5244822695035462, + "grad_norm": 37.67231369018555, + "learning_rate": 5e-05, + "loss": 1.3544, + "num_input_tokens_seen": 309173892, + "step": 4622 + }, + { + "epoch": 0.5244822695035462, + "loss": 1.2923188209533691, + "loss_ce": 0.0032562511041760445, + "loss_iou": 0.55859375, + "loss_num": 0.03369140625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 309173892, + "step": 4622 + }, + { + "epoch": 0.524595744680851, + "grad_norm": 27.678607940673828, + "learning_rate": 5e-05, + "loss": 1.5361, + "num_input_tokens_seen": 309241332, + "step": 4623 + }, + { + "epoch": 0.524595744680851, + "loss": 1.517289161682129, + "loss_ce": 0.006546958349645138, + "loss_iou": 0.59765625, + "loss_num": 0.06396484375, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 309241332, + "step": 4623 + }, + { + "epoch": 0.524709219858156, + "grad_norm": 14.849688529968262, + "learning_rate": 5e-05, + "loss": 1.1037, + "num_input_tokens_seen": 309308352, + "step": 4624 + }, + { + "epoch": 0.524709219858156, + "loss": 1.0714566707611084, + "loss_ce": 0.007247692905366421, + "loss_iou": 0.42578125, + "loss_num": 0.042236328125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 309308352, + "step": 4624 + }, + { + "epoch": 0.524822695035461, + "grad_norm": 24.8405704498291, + "learning_rate": 5e-05, + "loss": 1.3425, + "num_input_tokens_seen": 309375336, + "step": 4625 + }, + { + "epoch": 0.524822695035461, + "loss": 1.4880485534667969, + "loss_ce": 0.008556285873055458, + "loss_iou": 0.59375, + "loss_num": 0.05859375, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 309375336, + "step": 4625 + }, + { + "epoch": 0.524936170212766, + "grad_norm": 28.02850914001465, + "learning_rate": 5e-05, + "loss": 1.312, + "num_input_tokens_seen": 309442428, + "step": 4626 + }, + { + "epoch": 0.524936170212766, + "loss": 1.1193101406097412, + "loss_ce": 0.006517260801047087, + "loss_iou": 0.50390625, + "loss_num": 0.0208740234375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 309442428, + "step": 4626 + }, + { + "epoch": 0.5250496453900709, + "grad_norm": 55.863407135009766, + "learning_rate": 5e-05, + "loss": 1.4646, + "num_input_tokens_seen": 309509028, + "step": 4627 + }, + { + "epoch": 0.5250496453900709, + "loss": 1.5463618040084839, + "loss_ce": 0.008275844156742096, + "loss_iou": 0.6484375, + "loss_num": 0.0478515625, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 309509028, + "step": 4627 + }, + { + "epoch": 0.5251631205673759, + "grad_norm": 33.10390090942383, + "learning_rate": 5e-05, + "loss": 1.1965, + "num_input_tokens_seen": 309575332, + "step": 4628 + }, + { + "epoch": 0.5251631205673759, + "loss": 1.0750977993011475, + "loss_ce": 0.005273602902889252, + "loss_iou": 0.451171875, + "loss_num": 0.033447265625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 309575332, + "step": 4628 + }, + { + "epoch": 0.5252765957446809, + "grad_norm": 30.699230194091797, + "learning_rate": 5e-05, + "loss": 1.171, + "num_input_tokens_seen": 309642792, + "step": 4629 + }, + { + "epoch": 0.5252765957446809, + "loss": 1.2475064992904663, + "loss_ce": 0.005807269364595413, + "loss_iou": 0.55859375, + "loss_num": 0.0244140625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 309642792, + "step": 4629 + }, + { + "epoch": 0.5253900709219859, + "grad_norm": 29.364484786987305, + "learning_rate": 5e-05, + "loss": 1.3545, + "num_input_tokens_seen": 309710080, + "step": 4630 + }, + { + "epoch": 0.5253900709219859, + "loss": 1.287017583847046, + "loss_ce": 0.007232464384287596, + "loss_iou": 0.52734375, + "loss_num": 0.044921875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 309710080, + "step": 4630 + }, + { + "epoch": 0.5255035460992907, + "grad_norm": 22.807735443115234, + "learning_rate": 5e-05, + "loss": 1.2365, + "num_input_tokens_seen": 309777284, + "step": 4631 + }, + { + "epoch": 0.5255035460992907, + "loss": 1.2614669799804688, + "loss_ce": 0.00487508624792099, + "loss_iou": 0.5, + "loss_num": 0.051513671875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 309777284, + "step": 4631 + }, + { + "epoch": 0.5256170212765957, + "grad_norm": 27.17502212524414, + "learning_rate": 5e-05, + "loss": 1.1356, + "num_input_tokens_seen": 309844072, + "step": 4632 + }, + { + "epoch": 0.5256170212765957, + "loss": 1.2245264053344727, + "loss_ce": 0.007729645818471909, + "loss_iou": 0.5234375, + "loss_num": 0.0341796875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 309844072, + "step": 4632 + }, + { + "epoch": 0.5257304964539007, + "grad_norm": 24.10354232788086, + "learning_rate": 5e-05, + "loss": 1.0453, + "num_input_tokens_seen": 309910848, + "step": 4633 + }, + { + "epoch": 0.5257304964539007, + "loss": 1.1652593612670898, + "loss_ce": 0.004858991131186485, + "loss_iou": 0.42578125, + "loss_num": 0.0625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 309910848, + "step": 4633 + }, + { + "epoch": 0.5258439716312057, + "grad_norm": 14.365212440490723, + "learning_rate": 5e-05, + "loss": 1.1386, + "num_input_tokens_seen": 309977756, + "step": 4634 + }, + { + "epoch": 0.5258439716312057, + "loss": 1.0468273162841797, + "loss_ce": 0.00825304351747036, + "loss_iou": 0.392578125, + "loss_num": 0.050537109375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 309977756, + "step": 4634 + }, + { + "epoch": 0.5259574468085106, + "grad_norm": 11.127184867858887, + "learning_rate": 5e-05, + "loss": 1.0072, + "num_input_tokens_seen": 310044320, + "step": 4635 + }, + { + "epoch": 0.5259574468085106, + "loss": 0.967349112033844, + "loss_ce": 0.0072050755843520164, + "loss_iou": 0.390625, + "loss_num": 0.03564453125, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 310044320, + "step": 4635 + }, + { + "epoch": 0.5260709219858156, + "grad_norm": 27.862314224243164, + "learning_rate": 5e-05, + "loss": 1.191, + "num_input_tokens_seen": 310111732, + "step": 4636 + }, + { + "epoch": 0.5260709219858156, + "loss": 1.2898608446121216, + "loss_ce": 0.004216278903186321, + "loss_iou": 0.515625, + "loss_num": 0.05029296875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 310111732, + "step": 4636 + }, + { + "epoch": 0.5261843971631206, + "grad_norm": 34.90890884399414, + "learning_rate": 5e-05, + "loss": 1.2335, + "num_input_tokens_seen": 310178788, + "step": 4637 + }, + { + "epoch": 0.5261843971631206, + "loss": 1.258131980895996, + "loss_ce": 0.005690603516995907, + "loss_iou": 0.5234375, + "loss_num": 0.041259765625, + "loss_xval": 1.25, + "num_input_tokens_seen": 310178788, + "step": 4637 + }, + { + "epoch": 0.5262978723404256, + "grad_norm": 31.038244247436523, + "learning_rate": 5e-05, + "loss": 1.344, + "num_input_tokens_seen": 310244720, + "step": 4638 + }, + { + "epoch": 0.5262978723404256, + "loss": 1.335702896118164, + "loss_ce": 0.006113120820373297, + "loss_iou": 0.53515625, + "loss_num": 0.051513671875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 310244720, + "step": 4638 + }, + { + "epoch": 0.5264113475177304, + "grad_norm": 38.35239791870117, + "learning_rate": 5e-05, + "loss": 1.1844, + "num_input_tokens_seen": 310311288, + "step": 4639 + }, + { + "epoch": 0.5264113475177304, + "loss": 1.2013795375823975, + "loss_ce": 0.005578832700848579, + "loss_iou": 0.5, + "loss_num": 0.0390625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 310311288, + "step": 4639 + }, + { + "epoch": 0.5265248226950354, + "grad_norm": 30.85897445678711, + "learning_rate": 5e-05, + "loss": 1.3321, + "num_input_tokens_seen": 310377876, + "step": 4640 + }, + { + "epoch": 0.5265248226950354, + "loss": 1.5406584739685059, + "loss_ce": 0.008126718923449516, + "loss_iou": 0.59375, + "loss_num": 0.068359375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 310377876, + "step": 4640 + }, + { + "epoch": 0.5266382978723404, + "grad_norm": 12.05350112915039, + "learning_rate": 5e-05, + "loss": 1.1067, + "num_input_tokens_seen": 310445224, + "step": 4641 + }, + { + "epoch": 0.5266382978723404, + "loss": 1.1203186511993408, + "loss_ce": 0.006060821935534477, + "loss_iou": 0.451171875, + "loss_num": 0.042236328125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 310445224, + "step": 4641 + }, + { + "epoch": 0.5267517730496454, + "grad_norm": 16.567670822143555, + "learning_rate": 5e-05, + "loss": 1.1243, + "num_input_tokens_seen": 310512068, + "step": 4642 + }, + { + "epoch": 0.5267517730496454, + "loss": 1.1895501613616943, + "loss_ce": 0.0054680826142430305, + "loss_iou": 0.49609375, + "loss_num": 0.03857421875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 310512068, + "step": 4642 + }, + { + "epoch": 0.5268652482269504, + "grad_norm": 32.125118255615234, + "learning_rate": 5e-05, + "loss": 1.1708, + "num_input_tokens_seen": 310578628, + "step": 4643 + }, + { + "epoch": 0.5268652482269504, + "loss": 1.1661453247070312, + "loss_ce": 0.006233155727386475, + "loss_iou": 0.47265625, + "loss_num": 0.04296875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 310578628, + "step": 4643 + }, + { + "epoch": 0.5269787234042553, + "grad_norm": 27.711380004882812, + "learning_rate": 5e-05, + "loss": 1.2142, + "num_input_tokens_seen": 310644228, + "step": 4644 + }, + { + "epoch": 0.5269787234042553, + "loss": 1.39715576171875, + "loss_ce": 0.007995533756911755, + "loss_iou": 0.546875, + "loss_num": 0.05810546875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 310644228, + "step": 4644 + }, + { + "epoch": 0.5270921985815603, + "grad_norm": 45.81631088256836, + "learning_rate": 5e-05, + "loss": 1.2061, + "num_input_tokens_seen": 310711844, + "step": 4645 + }, + { + "epoch": 0.5270921985815603, + "loss": 1.336073398590088, + "loss_ce": 0.007948415353894234, + "loss_iou": 0.57421875, + "loss_num": 0.03662109375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 310711844, + "step": 4645 + }, + { + "epoch": 0.5272056737588653, + "grad_norm": 26.340612411499023, + "learning_rate": 5e-05, + "loss": 1.3591, + "num_input_tokens_seen": 310778948, + "step": 4646 + }, + { + "epoch": 0.5272056737588653, + "loss": 1.5495851039886475, + "loss_ce": 0.008325386792421341, + "loss_iou": 0.6015625, + "loss_num": 0.06787109375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 310778948, + "step": 4646 + }, + { + "epoch": 0.5273191489361703, + "grad_norm": 22.41484832763672, + "learning_rate": 5e-05, + "loss": 1.2152, + "num_input_tokens_seen": 310846520, + "step": 4647 + }, + { + "epoch": 0.5273191489361703, + "loss": 1.278268575668335, + "loss_ce": 0.00678415410220623, + "loss_iou": 0.55078125, + "loss_num": 0.033447265625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 310846520, + "step": 4647 + }, + { + "epoch": 0.5274326241134751, + "grad_norm": 30.35158348083496, + "learning_rate": 5e-05, + "loss": 1.3046, + "num_input_tokens_seen": 310913476, + "step": 4648 + }, + { + "epoch": 0.5274326241134751, + "loss": 1.3758829832077026, + "loss_ce": 0.004545093048363924, + "loss_iou": 0.5546875, + "loss_num": 0.052734375, + "loss_xval": 1.375, + "num_input_tokens_seen": 310913476, + "step": 4648 + }, + { + "epoch": 0.5275460992907801, + "grad_norm": 41.00208282470703, + "learning_rate": 5e-05, + "loss": 1.3388, + "num_input_tokens_seen": 310979388, + "step": 4649 + }, + { + "epoch": 0.5275460992907801, + "loss": 1.3806915283203125, + "loss_ce": 0.0037384224124252796, + "loss_iou": 0.55078125, + "loss_num": 0.05517578125, + "loss_xval": 1.375, + "num_input_tokens_seen": 310979388, + "step": 4649 + }, + { + "epoch": 0.5276595744680851, + "grad_norm": 30.697595596313477, + "learning_rate": 5e-05, + "loss": 1.4075, + "num_input_tokens_seen": 311045860, + "step": 4650 + }, + { + "epoch": 0.5276595744680851, + "loss": 1.3381447792053223, + "loss_ce": 0.007090025115758181, + "loss_iou": 0.55078125, + "loss_num": 0.04541015625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 311045860, + "step": 4650 + }, + { + "epoch": 0.5277730496453901, + "grad_norm": 38.17683410644531, + "learning_rate": 5e-05, + "loss": 1.1087, + "num_input_tokens_seen": 311113588, + "step": 4651 + }, + { + "epoch": 0.5277730496453901, + "loss": 1.2345848083496094, + "loss_ce": 0.006069236434996128, + "loss_iou": 0.51953125, + "loss_num": 0.03857421875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 311113588, + "step": 4651 + }, + { + "epoch": 0.527886524822695, + "grad_norm": 31.02022933959961, + "learning_rate": 5e-05, + "loss": 1.1335, + "num_input_tokens_seen": 311179128, + "step": 4652 + }, + { + "epoch": 0.527886524822695, + "loss": 1.0082893371582031, + "loss_ce": 0.008777542971074581, + "loss_iou": 0.37109375, + "loss_num": 0.0517578125, + "loss_xval": 1.0, + "num_input_tokens_seen": 311179128, + "step": 4652 + }, + { + "epoch": 0.528, + "grad_norm": 21.502138137817383, + "learning_rate": 5e-05, + "loss": 1.1803, + "num_input_tokens_seen": 311244744, + "step": 4653 + }, + { + "epoch": 0.528, + "loss": 1.1217608451843262, + "loss_ce": 0.0016436008736491203, + "loss_iou": 0.458984375, + "loss_num": 0.040771484375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 311244744, + "step": 4653 + }, + { + "epoch": 0.528113475177305, + "grad_norm": 21.942340850830078, + "learning_rate": 5e-05, + "loss": 1.1327, + "num_input_tokens_seen": 311313148, + "step": 4654 + }, + { + "epoch": 0.528113475177305, + "loss": 1.2862398624420166, + "loss_ce": 0.009384337812662125, + "loss_iou": 0.50390625, + "loss_num": 0.052978515625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 311313148, + "step": 4654 + }, + { + "epoch": 0.52822695035461, + "grad_norm": 32.58049392700195, + "learning_rate": 5e-05, + "loss": 1.0472, + "num_input_tokens_seen": 311379984, + "step": 4655 + }, + { + "epoch": 0.52822695035461, + "loss": 1.0356497764587402, + "loss_ce": 0.00586458295583725, + "loss_iou": 0.447265625, + "loss_num": 0.02734375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 311379984, + "step": 4655 + }, + { + "epoch": 0.5283404255319148, + "grad_norm": 30.376636505126953, + "learning_rate": 5e-05, + "loss": 1.2411, + "num_input_tokens_seen": 311446988, + "step": 4656 + }, + { + "epoch": 0.5283404255319148, + "loss": 1.224623680114746, + "loss_ce": 0.00489705428481102, + "loss_iou": 0.5390625, + "loss_num": 0.0277099609375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 311446988, + "step": 4656 + }, + { + "epoch": 0.5284539007092198, + "grad_norm": 10.693048477172852, + "learning_rate": 5e-05, + "loss": 0.8211, + "num_input_tokens_seen": 311513084, + "step": 4657 + }, + { + "epoch": 0.5284539007092198, + "loss": 0.7652876377105713, + "loss_ce": 0.0074751777574419975, + "loss_iou": 0.302734375, + "loss_num": 0.0303955078125, + "loss_xval": 0.7578125, + "num_input_tokens_seen": 311513084, + "step": 4657 + }, + { + "epoch": 0.5285673758865248, + "grad_norm": 13.682548522949219, + "learning_rate": 5e-05, + "loss": 1.038, + "num_input_tokens_seen": 311579864, + "step": 4658 + }, + { + "epoch": 0.5285673758865248, + "loss": 1.1565964221954346, + "loss_ce": 0.007670738734304905, + "loss_iou": 0.455078125, + "loss_num": 0.047607421875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 311579864, + "step": 4658 + }, + { + "epoch": 0.5286808510638298, + "grad_norm": 46.180870056152344, + "learning_rate": 5e-05, + "loss": 1.2288, + "num_input_tokens_seen": 311646784, + "step": 4659 + }, + { + "epoch": 0.5286808510638298, + "loss": 1.193056344985962, + "loss_ce": 0.008486030623316765, + "loss_iou": 0.451171875, + "loss_num": 0.055908203125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 311646784, + "step": 4659 + }, + { + "epoch": 0.5287943262411348, + "grad_norm": 21.4257869720459, + "learning_rate": 5e-05, + "loss": 1.1506, + "num_input_tokens_seen": 311713812, + "step": 4660 + }, + { + "epoch": 0.5287943262411348, + "loss": 1.0755343437194824, + "loss_ce": 0.005221847910434008, + "loss_iou": 0.44140625, + "loss_num": 0.037109375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 311713812, + "step": 4660 + }, + { + "epoch": 0.5289078014184397, + "grad_norm": 62.838096618652344, + "learning_rate": 5e-05, + "loss": 1.2058, + "num_input_tokens_seen": 311779580, + "step": 4661 + }, + { + "epoch": 0.5289078014184397, + "loss": 1.1098612546920776, + "loss_ce": 0.0056742411106824875, + "loss_iou": 0.453125, + "loss_num": 0.039306640625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 311779580, + "step": 4661 + }, + { + "epoch": 0.5290212765957447, + "grad_norm": 34.552696228027344, + "learning_rate": 5e-05, + "loss": 1.2524, + "num_input_tokens_seen": 311846084, + "step": 4662 + }, + { + "epoch": 0.5290212765957447, + "loss": 1.319307804107666, + "loss_ce": 0.005831305403262377, + "loss_iou": 0.546875, + "loss_num": 0.04345703125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 311846084, + "step": 4662 + }, + { + "epoch": 0.5291347517730497, + "grad_norm": 31.46341323852539, + "learning_rate": 5e-05, + "loss": 1.3214, + "num_input_tokens_seen": 311913596, + "step": 4663 + }, + { + "epoch": 0.5291347517730497, + "loss": 1.2653447389602661, + "loss_ce": 0.008020522072911263, + "loss_iou": 0.51953125, + "loss_num": 0.043212890625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 311913596, + "step": 4663 + }, + { + "epoch": 0.5292482269503546, + "grad_norm": 32.26939010620117, + "learning_rate": 5e-05, + "loss": 1.1446, + "num_input_tokens_seen": 311979980, + "step": 4664 + }, + { + "epoch": 0.5292482269503546, + "loss": 1.105199933052063, + "loss_ce": 0.005483726970851421, + "loss_iou": 0.455078125, + "loss_num": 0.0380859375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 311979980, + "step": 4664 + }, + { + "epoch": 0.5293617021276595, + "grad_norm": 26.533721923828125, + "learning_rate": 5e-05, + "loss": 1.2505, + "num_input_tokens_seen": 312046300, + "step": 4665 + }, + { + "epoch": 0.5293617021276595, + "loss": 1.2280102968215942, + "loss_ce": 0.004377562087029219, + "loss_iou": 0.51953125, + "loss_num": 0.03662109375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 312046300, + "step": 4665 + }, + { + "epoch": 0.5294751773049645, + "grad_norm": 18.25514793395996, + "learning_rate": 5e-05, + "loss": 1.1255, + "num_input_tokens_seen": 312114048, + "step": 4666 + }, + { + "epoch": 0.5294751773049645, + "loss": 1.0570873022079468, + "loss_ce": 0.009235700592398643, + "loss_iou": 0.455078125, + "loss_num": 0.0279541015625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 312114048, + "step": 4666 + }, + { + "epoch": 0.5295886524822695, + "grad_norm": 152.15650939941406, + "learning_rate": 5e-05, + "loss": 1.3794, + "num_input_tokens_seen": 312180904, + "step": 4667 + }, + { + "epoch": 0.5295886524822695, + "loss": 1.3507810831069946, + "loss_ce": 0.008984271436929703, + "loss_iou": 0.54296875, + "loss_num": 0.050537109375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 312180904, + "step": 4667 + }, + { + "epoch": 0.5297021276595745, + "grad_norm": 38.743614196777344, + "learning_rate": 5e-05, + "loss": 1.4342, + "num_input_tokens_seen": 312247628, + "step": 4668 + }, + { + "epoch": 0.5297021276595745, + "loss": 1.43217134475708, + "loss_ce": 0.007366720587015152, + "loss_iou": 0.5078125, + "loss_num": 0.08154296875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 312247628, + "step": 4668 + }, + { + "epoch": 0.5298156028368795, + "grad_norm": 55.6072998046875, + "learning_rate": 5e-05, + "loss": 1.4311, + "num_input_tokens_seen": 312314188, + "step": 4669 + }, + { + "epoch": 0.5298156028368795, + "loss": 1.5004640817642212, + "loss_ce": 0.007300086319446564, + "loss_iou": 0.625, + "loss_num": 0.04931640625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 312314188, + "step": 4669 + }, + { + "epoch": 0.5299290780141844, + "grad_norm": 17.811426162719727, + "learning_rate": 5e-05, + "loss": 0.9277, + "num_input_tokens_seen": 312381152, + "step": 4670 + }, + { + "epoch": 0.5299290780141844, + "loss": 0.9708825349807739, + "loss_ce": 0.0031091044656932354, + "loss_iou": 0.423828125, + "loss_num": 0.02392578125, + "loss_xval": 0.96875, + "num_input_tokens_seen": 312381152, + "step": 4670 + }, + { + "epoch": 0.5300425531914894, + "grad_norm": 58.95274353027344, + "learning_rate": 5e-05, + "loss": 1.2571, + "num_input_tokens_seen": 312447208, + "step": 4671 + }, + { + "epoch": 0.5300425531914894, + "loss": 1.156907320022583, + "loss_ce": 0.0060285162180662155, + "loss_iou": 0.482421875, + "loss_num": 0.037353515625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 312447208, + "step": 4671 + }, + { + "epoch": 0.5301560283687943, + "grad_norm": 25.92483901977539, + "learning_rate": 5e-05, + "loss": 1.0798, + "num_input_tokens_seen": 312514636, + "step": 4672 + }, + { + "epoch": 0.5301560283687943, + "loss": 1.0847114324569702, + "loss_ce": 0.005609821993857622, + "loss_iou": 0.4609375, + "loss_num": 0.03125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 312514636, + "step": 4672 + }, + { + "epoch": 0.5302695035460993, + "grad_norm": 37.587711334228516, + "learning_rate": 5e-05, + "loss": 1.2253, + "num_input_tokens_seen": 312581436, + "step": 4673 + }, + { + "epoch": 0.5302695035460993, + "loss": 1.2647634744644165, + "loss_ce": 0.0045094965025782585, + "loss_iou": 0.54296875, + "loss_num": 0.034423828125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 312581436, + "step": 4673 + }, + { + "epoch": 0.5303829787234042, + "grad_norm": 25.653291702270508, + "learning_rate": 5e-05, + "loss": 1.3493, + "num_input_tokens_seen": 312648364, + "step": 4674 + }, + { + "epoch": 0.5303829787234042, + "loss": 1.287463903427124, + "loss_ce": 0.0027959824074059725, + "loss_iou": 0.53515625, + "loss_num": 0.043212890625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 312648364, + "step": 4674 + }, + { + "epoch": 0.5304964539007092, + "grad_norm": 25.223440170288086, + "learning_rate": 5e-05, + "loss": 1.2804, + "num_input_tokens_seen": 312715652, + "step": 4675 + }, + { + "epoch": 0.5304964539007092, + "loss": 1.211135745048523, + "loss_ce": 0.008010799065232277, + "loss_iou": 0.498046875, + "loss_num": 0.041259765625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 312715652, + "step": 4675 + }, + { + "epoch": 0.5306099290780142, + "grad_norm": 24.934459686279297, + "learning_rate": 5e-05, + "loss": 1.2159, + "num_input_tokens_seen": 312782004, + "step": 4676 + }, + { + "epoch": 0.5306099290780142, + "loss": 1.3933742046356201, + "loss_ce": 0.007632028311491013, + "loss_iou": 0.55078125, + "loss_num": 0.05712890625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 312782004, + "step": 4676 + }, + { + "epoch": 0.5307234042553192, + "grad_norm": 20.338687896728516, + "learning_rate": 5e-05, + "loss": 1.2467, + "num_input_tokens_seen": 312849500, + "step": 4677 + }, + { + "epoch": 0.5307234042553192, + "loss": 1.2428534030914307, + "loss_ce": 0.007990077137947083, + "loss_iou": 0.5078125, + "loss_num": 0.043212890625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 312849500, + "step": 4677 + }, + { + "epoch": 0.5308368794326241, + "grad_norm": 105.20232391357422, + "learning_rate": 5e-05, + "loss": 1.2271, + "num_input_tokens_seen": 312917588, + "step": 4678 + }, + { + "epoch": 0.5308368794326241, + "loss": 1.1865661144256592, + "loss_ce": 0.0034606093540787697, + "loss_iou": 0.50390625, + "loss_num": 0.03466796875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 312917588, + "step": 4678 + }, + { + "epoch": 0.5309503546099291, + "grad_norm": 16.133655548095703, + "learning_rate": 5e-05, + "loss": 1.0754, + "num_input_tokens_seen": 312983460, + "step": 4679 + }, + { + "epoch": 0.5309503546099291, + "loss": 1.1083731651306152, + "loss_ce": 0.009008023887872696, + "loss_iou": 0.453125, + "loss_num": 0.03857421875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 312983460, + "step": 4679 + }, + { + "epoch": 0.531063829787234, + "grad_norm": 31.425762176513672, + "learning_rate": 5e-05, + "loss": 1.2749, + "num_input_tokens_seen": 313050364, + "step": 4680 + }, + { + "epoch": 0.531063829787234, + "loss": 1.3047724962234497, + "loss_ce": 0.007409205194562674, + "loss_iou": 0.51171875, + "loss_num": 0.05517578125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 313050364, + "step": 4680 + }, + { + "epoch": 0.531177304964539, + "grad_norm": 32.50326156616211, + "learning_rate": 5e-05, + "loss": 1.3162, + "num_input_tokens_seen": 313117900, + "step": 4681 + }, + { + "epoch": 0.531177304964539, + "loss": 1.5829122066497803, + "loss_ce": 0.007228570990264416, + "loss_iou": 0.6484375, + "loss_num": 0.05517578125, + "loss_xval": 1.578125, + "num_input_tokens_seen": 313117900, + "step": 4681 + }, + { + "epoch": 0.5312907801418439, + "grad_norm": 45.606868743896484, + "learning_rate": 5e-05, + "loss": 1.3391, + "num_input_tokens_seen": 313184272, + "step": 4682 + }, + { + "epoch": 0.5312907801418439, + "loss": 1.3827836513519287, + "loss_ce": 0.0063188448548316956, + "loss_iou": 0.51953125, + "loss_num": 0.06689453125, + "loss_xval": 1.375, + "num_input_tokens_seen": 313184272, + "step": 4682 + }, + { + "epoch": 0.5314042553191489, + "grad_norm": 17.679515838623047, + "learning_rate": 5e-05, + "loss": 1.3443, + "num_input_tokens_seen": 313250104, + "step": 4683 + }, + { + "epoch": 0.5314042553191489, + "loss": 1.1428254842758179, + "loss_ce": 0.004153595305979252, + "loss_iou": 0.48828125, + "loss_num": 0.031982421875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 313250104, + "step": 4683 + }, + { + "epoch": 0.5315177304964539, + "grad_norm": 11.92470932006836, + "learning_rate": 5e-05, + "loss": 1.1258, + "num_input_tokens_seen": 313316308, + "step": 4684 + }, + { + "epoch": 0.5315177304964539, + "loss": 1.2787479162216187, + "loss_ce": 0.008728396147489548, + "loss_iou": 0.48828125, + "loss_num": 0.058349609375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 313316308, + "step": 4684 + }, + { + "epoch": 0.5316312056737589, + "grad_norm": 19.967761993408203, + "learning_rate": 5e-05, + "loss": 1.2694, + "num_input_tokens_seen": 313383832, + "step": 4685 + }, + { + "epoch": 0.5316312056737589, + "loss": 1.2809429168701172, + "loss_ce": 0.008481951430439949, + "loss_iou": 0.48828125, + "loss_num": 0.0595703125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 313383832, + "step": 4685 + }, + { + "epoch": 0.5317446808510639, + "grad_norm": 43.32267761230469, + "learning_rate": 5e-05, + "loss": 1.2653, + "num_input_tokens_seen": 313452016, + "step": 4686 + }, + { + "epoch": 0.5317446808510639, + "loss": 1.1934542655944824, + "loss_ce": 0.00595430564135313, + "loss_iou": 0.51171875, + "loss_num": 0.032470703125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 313452016, + "step": 4686 + }, + { + "epoch": 0.5318581560283688, + "grad_norm": 27.24169158935547, + "learning_rate": 5e-05, + "loss": 1.3077, + "num_input_tokens_seen": 313519404, + "step": 4687 + }, + { + "epoch": 0.5318581560283688, + "loss": 1.1891229152679443, + "loss_ce": 0.008458761498332024, + "loss_iou": 0.5078125, + "loss_num": 0.033935546875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 313519404, + "step": 4687 + }, + { + "epoch": 0.5319716312056738, + "grad_norm": 25.238801956176758, + "learning_rate": 5e-05, + "loss": 1.13, + "num_input_tokens_seen": 313585604, + "step": 4688 + }, + { + "epoch": 0.5319716312056738, + "loss": 1.1129567623138428, + "loss_ce": 0.007488007191568613, + "loss_iou": 0.4375, + "loss_num": 0.0458984375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 313585604, + "step": 4688 + }, + { + "epoch": 0.5320851063829787, + "grad_norm": 28.945274353027344, + "learning_rate": 5e-05, + "loss": 1.1952, + "num_input_tokens_seen": 313652796, + "step": 4689 + }, + { + "epoch": 0.5320851063829787, + "loss": 1.295708179473877, + "loss_ce": 0.004204313270747662, + "loss_iou": 0.515625, + "loss_num": 0.051513671875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 313652796, + "step": 4689 + }, + { + "epoch": 0.5321985815602837, + "grad_norm": 24.5340576171875, + "learning_rate": 5e-05, + "loss": 1.3776, + "num_input_tokens_seen": 313720944, + "step": 4690 + }, + { + "epoch": 0.5321985815602837, + "loss": 1.5790117979049683, + "loss_ce": 0.007722758688032627, + "loss_iou": 0.62890625, + "loss_num": 0.062255859375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 313720944, + "step": 4690 + }, + { + "epoch": 0.5323120567375886, + "grad_norm": 20.74738121032715, + "learning_rate": 5e-05, + "loss": 1.1627, + "num_input_tokens_seen": 313786932, + "step": 4691 + }, + { + "epoch": 0.5323120567375886, + "loss": 1.128517985343933, + "loss_ce": 0.0022972687147557735, + "loss_iou": 0.4609375, + "loss_num": 0.040771484375, + "loss_xval": 1.125, + "num_input_tokens_seen": 313786932, + "step": 4691 + }, + { + "epoch": 0.5324255319148936, + "grad_norm": 22.021183013916016, + "learning_rate": 5e-05, + "loss": 0.9863, + "num_input_tokens_seen": 313854136, + "step": 4692 + }, + { + "epoch": 0.5324255319148936, + "loss": 1.0740491151809692, + "loss_ce": 0.008131083101034164, + "loss_iou": 0.40234375, + "loss_num": 0.052001953125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 313854136, + "step": 4692 + }, + { + "epoch": 0.5325390070921986, + "grad_norm": 47.43151092529297, + "learning_rate": 5e-05, + "loss": 1.1794, + "num_input_tokens_seen": 313920584, + "step": 4693 + }, + { + "epoch": 0.5325390070921986, + "loss": 1.0861916542053223, + "loss_ce": 0.0026955886278301477, + "loss_iou": 0.46484375, + "loss_num": 0.031005859375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 313920584, + "step": 4693 + }, + { + "epoch": 0.5326524822695036, + "grad_norm": 24.35834503173828, + "learning_rate": 5e-05, + "loss": 1.37, + "num_input_tokens_seen": 313986472, + "step": 4694 + }, + { + "epoch": 0.5326524822695036, + "loss": 1.4392571449279785, + "loss_ce": 0.0076164607889950275, + "loss_iou": 0.55859375, + "loss_num": 0.06201171875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 313986472, + "step": 4694 + }, + { + "epoch": 0.5327659574468085, + "grad_norm": 27.11945152282715, + "learning_rate": 5e-05, + "loss": 1.1306, + "num_input_tokens_seen": 314052684, + "step": 4695 + }, + { + "epoch": 0.5327659574468085, + "loss": 1.0071347951889038, + "loss_ce": 0.005669940263032913, + "loss_iou": 0.419921875, + "loss_num": 0.031982421875, + "loss_xval": 1.0, + "num_input_tokens_seen": 314052684, + "step": 4695 + }, + { + "epoch": 0.5328794326241135, + "grad_norm": 33.87855911254883, + "learning_rate": 5e-05, + "loss": 1.1916, + "num_input_tokens_seen": 314118632, + "step": 4696 + }, + { + "epoch": 0.5328794326241135, + "loss": 1.1187200546264648, + "loss_ce": 0.011298095807433128, + "loss_iou": 0.455078125, + "loss_num": 0.03955078125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 314118632, + "step": 4696 + }, + { + "epoch": 0.5329929078014184, + "grad_norm": 31.341209411621094, + "learning_rate": 5e-05, + "loss": 1.1529, + "num_input_tokens_seen": 314185352, + "step": 4697 + }, + { + "epoch": 0.5329929078014184, + "loss": 1.0891289710998535, + "loss_ce": 0.006121098063886166, + "loss_iou": 0.45703125, + "loss_num": 0.033935546875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 314185352, + "step": 4697 + }, + { + "epoch": 0.5331063829787234, + "grad_norm": 24.293134689331055, + "learning_rate": 5e-05, + "loss": 1.2168, + "num_input_tokens_seen": 314252700, + "step": 4698 + }, + { + "epoch": 0.5331063829787234, + "loss": 1.0676288604736328, + "loss_ce": 0.008058507926762104, + "loss_iou": 0.4453125, + "loss_num": 0.033447265625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 314252700, + "step": 4698 + }, + { + "epoch": 0.5332198581560283, + "grad_norm": 16.035444259643555, + "learning_rate": 5e-05, + "loss": 1.1659, + "num_input_tokens_seen": 314319204, + "step": 4699 + }, + { + "epoch": 0.5332198581560283, + "loss": 1.1516387462615967, + "loss_ce": 0.0071074096485972404, + "loss_iou": 0.462890625, + "loss_num": 0.0439453125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 314319204, + "step": 4699 + }, + { + "epoch": 0.5333333333333333, + "grad_norm": 34.838680267333984, + "learning_rate": 5e-05, + "loss": 1.1669, + "num_input_tokens_seen": 314385592, + "step": 4700 + }, + { + "epoch": 0.5333333333333333, + "loss": 1.2630599737167358, + "loss_ce": 0.007200608029961586, + "loss_iou": 0.53125, + "loss_num": 0.039306640625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 314385592, + "step": 4700 + }, + { + "epoch": 0.5334468085106383, + "grad_norm": 33.4798583984375, + "learning_rate": 5e-05, + "loss": 1.6662, + "num_input_tokens_seen": 314452940, + "step": 4701 + }, + { + "epoch": 0.5334468085106383, + "loss": 1.6916515827178955, + "loss_ce": 0.005128131248056889, + "loss_iou": 0.67578125, + "loss_num": 0.06689453125, + "loss_xval": 1.6875, + "num_input_tokens_seen": 314452940, + "step": 4701 + }, + { + "epoch": 0.5335602836879433, + "grad_norm": 21.17515754699707, + "learning_rate": 5e-05, + "loss": 1.2759, + "num_input_tokens_seen": 314519464, + "step": 4702 + }, + { + "epoch": 0.5335602836879433, + "loss": 1.4868667125701904, + "loss_ce": 0.005421414505690336, + "loss_iou": 0.62109375, + "loss_num": 0.04833984375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 314519464, + "step": 4702 + }, + { + "epoch": 0.5336737588652483, + "grad_norm": 20.644672393798828, + "learning_rate": 5e-05, + "loss": 1.1801, + "num_input_tokens_seen": 314586920, + "step": 4703 + }, + { + "epoch": 0.5336737588652483, + "loss": 1.1495078802108765, + "loss_ce": 0.005464930087327957, + "loss_iou": 0.48046875, + "loss_num": 0.036376953125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 314586920, + "step": 4703 + }, + { + "epoch": 0.5337872340425532, + "grad_norm": 21.027236938476562, + "learning_rate": 5e-05, + "loss": 1.4468, + "num_input_tokens_seen": 314653064, + "step": 4704 + }, + { + "epoch": 0.5337872340425532, + "loss": 1.7286500930786133, + "loss_ce": 0.01527119055390358, + "loss_iou": 0.6484375, + "loss_num": 0.08349609375, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 314653064, + "step": 4704 + }, + { + "epoch": 0.5339007092198581, + "grad_norm": 32.18444061279297, + "learning_rate": 5e-05, + "loss": 1.2327, + "num_input_tokens_seen": 314720156, + "step": 4705 + }, + { + "epoch": 0.5339007092198581, + "loss": 1.2815399169921875, + "loss_ce": 0.005660962779074907, + "loss_iou": 0.50390625, + "loss_num": 0.05419921875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 314720156, + "step": 4705 + }, + { + "epoch": 0.5340141843971631, + "grad_norm": 29.985868453979492, + "learning_rate": 5e-05, + "loss": 1.5076, + "num_input_tokens_seen": 314787348, + "step": 4706 + }, + { + "epoch": 0.5340141843971631, + "loss": 1.5332813262939453, + "loss_ce": 0.009355594404041767, + "loss_iou": 0.640625, + "loss_num": 0.048583984375, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 314787348, + "step": 4706 + }, + { + "epoch": 0.5341276595744681, + "grad_norm": 18.91018295288086, + "learning_rate": 5e-05, + "loss": 0.9769, + "num_input_tokens_seen": 314854680, + "step": 4707 + }, + { + "epoch": 0.5341276595744681, + "loss": 0.8532224893569946, + "loss_ce": 0.00410136254504323, + "loss_iou": 0.357421875, + "loss_num": 0.0269775390625, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 314854680, + "step": 4707 + }, + { + "epoch": 0.534241134751773, + "grad_norm": 23.82096290588379, + "learning_rate": 5e-05, + "loss": 1.2727, + "num_input_tokens_seen": 314921620, + "step": 4708 + }, + { + "epoch": 0.534241134751773, + "loss": 1.242676019668579, + "loss_ce": 0.006347866728901863, + "loss_iou": 0.5078125, + "loss_num": 0.04443359375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 314921620, + "step": 4708 + }, + { + "epoch": 0.534354609929078, + "grad_norm": 46.82461166381836, + "learning_rate": 5e-05, + "loss": 1.3954, + "num_input_tokens_seen": 314988104, + "step": 4709 + }, + { + "epoch": 0.534354609929078, + "loss": 1.36020827293396, + "loss_ce": 0.003274664282798767, + "loss_iou": 0.58203125, + "loss_num": 0.038330078125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 314988104, + "step": 4709 + }, + { + "epoch": 0.534468085106383, + "grad_norm": 35.21803665161133, + "learning_rate": 5e-05, + "loss": 1.5266, + "num_input_tokens_seen": 315054668, + "step": 4710 + }, + { + "epoch": 0.534468085106383, + "loss": 1.6310980319976807, + "loss_ce": 0.005121449939906597, + "loss_iou": 0.69921875, + "loss_num": 0.046142578125, + "loss_xval": 1.625, + "num_input_tokens_seen": 315054668, + "step": 4710 + }, + { + "epoch": 0.534581560283688, + "grad_norm": 13.88823127746582, + "learning_rate": 5e-05, + "loss": 1.1113, + "num_input_tokens_seen": 315122140, + "step": 4711 + }, + { + "epoch": 0.534581560283688, + "loss": 1.1194136142730713, + "loss_ce": 0.010038651525974274, + "loss_iou": 0.466796875, + "loss_num": 0.034912109375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 315122140, + "step": 4711 + }, + { + "epoch": 0.534695035460993, + "grad_norm": 22.350244522094727, + "learning_rate": 5e-05, + "loss": 1.3017, + "num_input_tokens_seen": 315188924, + "step": 4712 + }, + { + "epoch": 0.534695035460993, + "loss": 0.9937403798103333, + "loss_ce": 0.007900535129010677, + "loss_iou": 0.421875, + "loss_num": 0.0281982421875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 315188924, + "step": 4712 + }, + { + "epoch": 0.5348085106382978, + "grad_norm": 27.30362319946289, + "learning_rate": 5e-05, + "loss": 1.0111, + "num_input_tokens_seen": 315256044, + "step": 4713 + }, + { + "epoch": 0.5348085106382978, + "loss": 0.9349254369735718, + "loss_ce": 0.00328485993668437, + "loss_iou": 0.384765625, + "loss_num": 0.0322265625, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 315256044, + "step": 4713 + }, + { + "epoch": 0.5349219858156028, + "grad_norm": 27.423954010009766, + "learning_rate": 5e-05, + "loss": 1.2541, + "num_input_tokens_seen": 315322764, + "step": 4714 + }, + { + "epoch": 0.5349219858156028, + "loss": 1.2227017879486084, + "loss_ce": 0.007858037948608398, + "loss_iou": 0.515625, + "loss_num": 0.0361328125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 315322764, + "step": 4714 + }, + { + "epoch": 0.5350354609929078, + "grad_norm": 28.695859909057617, + "learning_rate": 5e-05, + "loss": 1.1384, + "num_input_tokens_seen": 315389900, + "step": 4715 + }, + { + "epoch": 0.5350354609929078, + "loss": 1.1306686401367188, + "loss_ce": 0.006156827323138714, + "loss_iou": 0.478515625, + "loss_num": 0.03369140625, + "loss_xval": 1.125, + "num_input_tokens_seen": 315389900, + "step": 4715 + }, + { + "epoch": 0.5351489361702128, + "grad_norm": 36.33778762817383, + "learning_rate": 5e-05, + "loss": 1.1463, + "num_input_tokens_seen": 315456920, + "step": 4716 + }, + { + "epoch": 0.5351489361702128, + "loss": 1.2302217483520508, + "loss_ce": 0.010495135560631752, + "loss_iou": 0.51171875, + "loss_num": 0.0390625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 315456920, + "step": 4716 + }, + { + "epoch": 0.5352624113475177, + "grad_norm": 24.46251678466797, + "learning_rate": 5e-05, + "loss": 1.1429, + "num_input_tokens_seen": 315522868, + "step": 4717 + }, + { + "epoch": 0.5352624113475177, + "loss": 1.309968113899231, + "loss_ce": 0.007233710959553719, + "loss_iou": 0.53515625, + "loss_num": 0.045654296875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 315522868, + "step": 4717 + }, + { + "epoch": 0.5353758865248227, + "grad_norm": 56.11351776123047, + "learning_rate": 5e-05, + "loss": 1.4167, + "num_input_tokens_seen": 315590648, + "step": 4718 + }, + { + "epoch": 0.5353758865248227, + "loss": 1.44807767868042, + "loss_ce": 0.0047182124108076096, + "loss_iou": 0.5703125, + "loss_num": 0.06103515625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 315590648, + "step": 4718 + }, + { + "epoch": 0.5354893617021277, + "grad_norm": 41.664337158203125, + "learning_rate": 5e-05, + "loss": 1.4597, + "num_input_tokens_seen": 315656888, + "step": 4719 + }, + { + "epoch": 0.5354893617021277, + "loss": 1.2465524673461914, + "loss_ce": 0.007294557522982359, + "loss_iou": 0.494140625, + "loss_num": 0.05029296875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 315656888, + "step": 4719 + }, + { + "epoch": 0.5356028368794327, + "grad_norm": 39.46324920654297, + "learning_rate": 5e-05, + "loss": 1.2042, + "num_input_tokens_seen": 315723476, + "step": 4720 + }, + { + "epoch": 0.5356028368794327, + "loss": 0.977219820022583, + "loss_ce": 0.005051825195550919, + "loss_iou": 0.412109375, + "loss_num": 0.0291748046875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 315723476, + "step": 4720 + }, + { + "epoch": 0.5357163120567376, + "grad_norm": 39.71000671386719, + "learning_rate": 5e-05, + "loss": 1.2364, + "num_input_tokens_seen": 315789660, + "step": 4721 + }, + { + "epoch": 0.5357163120567376, + "loss": 1.1738874912261963, + "loss_ce": 0.004942232742905617, + "loss_iou": 0.484375, + "loss_num": 0.03955078125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 315789660, + "step": 4721 + }, + { + "epoch": 0.5358297872340425, + "grad_norm": 29.486955642700195, + "learning_rate": 5e-05, + "loss": 1.3712, + "num_input_tokens_seen": 315856852, + "step": 4722 + }, + { + "epoch": 0.5358297872340425, + "loss": 1.4096903800964355, + "loss_ce": 0.007346605882048607, + "loss_iou": 0.55859375, + "loss_num": 0.056396484375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 315856852, + "step": 4722 + }, + { + "epoch": 0.5359432624113475, + "grad_norm": 43.22416687011719, + "learning_rate": 5e-05, + "loss": 1.0268, + "num_input_tokens_seen": 315924552, + "step": 4723 + }, + { + "epoch": 0.5359432624113475, + "loss": 0.8982588648796082, + "loss_ce": 0.00421591242775321, + "loss_iou": 0.38671875, + "loss_num": 0.0238037109375, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 315924552, + "step": 4723 + }, + { + "epoch": 0.5360567375886525, + "grad_norm": 25.96942710876465, + "learning_rate": 5e-05, + "loss": 1.483, + "num_input_tokens_seen": 315991544, + "step": 4724 + }, + { + "epoch": 0.5360567375886525, + "loss": 1.4934260845184326, + "loss_ce": 0.007586241699755192, + "loss_iou": 0.62109375, + "loss_num": 0.049560546875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 315991544, + "step": 4724 + }, + { + "epoch": 0.5361702127659574, + "grad_norm": 21.797975540161133, + "learning_rate": 5e-05, + "loss": 1.1858, + "num_input_tokens_seen": 316058324, + "step": 4725 + }, + { + "epoch": 0.5361702127659574, + "loss": 1.2208123207092285, + "loss_ce": 0.009386453777551651, + "loss_iou": 0.47265625, + "loss_num": 0.05322265625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 316058324, + "step": 4725 + }, + { + "epoch": 0.5362836879432624, + "grad_norm": 33.50748825073242, + "learning_rate": 5e-05, + "loss": 1.2044, + "num_input_tokens_seen": 316125420, + "step": 4726 + }, + { + "epoch": 0.5362836879432624, + "loss": 1.3057481050491333, + "loss_ce": 0.004478588700294495, + "loss_iou": 0.515625, + "loss_num": 0.053955078125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 316125420, + "step": 4726 + }, + { + "epoch": 0.5363971631205674, + "grad_norm": 109.28750610351562, + "learning_rate": 5e-05, + "loss": 1.4153, + "num_input_tokens_seen": 316191612, + "step": 4727 + }, + { + "epoch": 0.5363971631205674, + "loss": 1.3740582466125488, + "loss_ce": 0.013218428939580917, + "loss_iou": 0.5546875, + "loss_num": 0.05029296875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 316191612, + "step": 4727 + }, + { + "epoch": 0.5365106382978724, + "grad_norm": 32.829505920410156, + "learning_rate": 5e-05, + "loss": 1.1631, + "num_input_tokens_seen": 316258488, + "step": 4728 + }, + { + "epoch": 0.5365106382978724, + "loss": 1.258071780204773, + "loss_ce": 0.005142104811966419, + "loss_iou": 0.5234375, + "loss_num": 0.04052734375, + "loss_xval": 1.25, + "num_input_tokens_seen": 316258488, + "step": 4728 + }, + { + "epoch": 0.5366241134751774, + "grad_norm": 29.99812889099121, + "learning_rate": 5e-05, + "loss": 1.1935, + "num_input_tokens_seen": 316324048, + "step": 4729 + }, + { + "epoch": 0.5366241134751774, + "loss": 0.9641522169113159, + "loss_ce": 0.0032147523015737534, + "loss_iou": 0.41796875, + "loss_num": 0.0245361328125, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 316324048, + "step": 4729 + }, + { + "epoch": 0.5367375886524822, + "grad_norm": 28.381093978881836, + "learning_rate": 5e-05, + "loss": 1.2598, + "num_input_tokens_seen": 316390512, + "step": 4730 + }, + { + "epoch": 0.5367375886524822, + "loss": 1.2156283855438232, + "loss_ce": 0.009085370227694511, + "loss_iou": 0.52734375, + "loss_num": 0.0311279296875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 316390512, + "step": 4730 + }, + { + "epoch": 0.5368510638297872, + "grad_norm": 35.588172912597656, + "learning_rate": 5e-05, + "loss": 1.1212, + "num_input_tokens_seen": 316457504, + "step": 4731 + }, + { + "epoch": 0.5368510638297872, + "loss": 0.9272127151489258, + "loss_ce": 0.00777909392490983, + "loss_iou": 0.353515625, + "loss_num": 0.042724609375, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 316457504, + "step": 4731 + }, + { + "epoch": 0.5369645390070922, + "grad_norm": 28.05521583557129, + "learning_rate": 5e-05, + "loss": 1.2383, + "num_input_tokens_seen": 316525352, + "step": 4732 + }, + { + "epoch": 0.5369645390070922, + "loss": 1.5586949586868286, + "loss_ce": 0.0069372160360217094, + "loss_iou": 0.6328125, + "loss_num": 0.057861328125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 316525352, + "step": 4732 + }, + { + "epoch": 0.5370780141843972, + "grad_norm": 40.27638626098633, + "learning_rate": 5e-05, + "loss": 1.1161, + "num_input_tokens_seen": 316591616, + "step": 4733 + }, + { + "epoch": 0.5370780141843972, + "loss": 1.2261030673980713, + "loss_ce": 0.0029585366137325764, + "loss_iou": 0.50390625, + "loss_num": 0.043701171875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 316591616, + "step": 4733 + }, + { + "epoch": 0.5371914893617021, + "grad_norm": 22.797218322753906, + "learning_rate": 5e-05, + "loss": 1.2325, + "num_input_tokens_seen": 316659324, + "step": 4734 + }, + { + "epoch": 0.5371914893617021, + "loss": 1.014230489730835, + "loss_ce": 0.002999980468302965, + "loss_iou": 0.4375, + "loss_num": 0.027587890625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 316659324, + "step": 4734 + }, + { + "epoch": 0.5373049645390071, + "grad_norm": 28.539581298828125, + "learning_rate": 5e-05, + "loss": 1.3011, + "num_input_tokens_seen": 316726860, + "step": 4735 + }, + { + "epoch": 0.5373049645390071, + "loss": 1.296471118927002, + "loss_ce": 0.004478988237679005, + "loss_iou": 0.515625, + "loss_num": 0.0517578125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 316726860, + "step": 4735 + }, + { + "epoch": 0.5374184397163121, + "grad_norm": 25.587221145629883, + "learning_rate": 5e-05, + "loss": 1.3646, + "num_input_tokens_seen": 316793288, + "step": 4736 + }, + { + "epoch": 0.5374184397163121, + "loss": 1.4425204992294312, + "loss_ce": 0.00941504631191492, + "loss_iou": 0.53515625, + "loss_num": 0.072265625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 316793288, + "step": 4736 + }, + { + "epoch": 0.5375319148936171, + "grad_norm": 30.91819953918457, + "learning_rate": 5e-05, + "loss": 1.0847, + "num_input_tokens_seen": 316859660, + "step": 4737 + }, + { + "epoch": 0.5375319148936171, + "loss": 1.2079482078552246, + "loss_ce": 0.003846630919724703, + "loss_iou": 0.5078125, + "loss_num": 0.037353515625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 316859660, + "step": 4737 + }, + { + "epoch": 0.5376453900709219, + "grad_norm": 21.35914421081543, + "learning_rate": 5e-05, + "loss": 1.1818, + "num_input_tokens_seen": 316925680, + "step": 4738 + }, + { + "epoch": 0.5376453900709219, + "loss": 1.1259765625, + "loss_ce": 0.002929624868556857, + "loss_iou": 0.47265625, + "loss_num": 0.035400390625, + "loss_xval": 1.125, + "num_input_tokens_seen": 316925680, + "step": 4738 + }, + { + "epoch": 0.5377588652482269, + "grad_norm": 194.75177001953125, + "learning_rate": 5e-05, + "loss": 1.2059, + "num_input_tokens_seen": 316991852, + "step": 4739 + }, + { + "epoch": 0.5377588652482269, + "loss": 1.0814628601074219, + "loss_ce": 0.0088309645652771, + "loss_iou": 0.388671875, + "loss_num": 0.059326171875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 316991852, + "step": 4739 + }, + { + "epoch": 0.5378723404255319, + "grad_norm": 23.828645706176758, + "learning_rate": 5e-05, + "loss": 0.9957, + "num_input_tokens_seen": 317058296, + "step": 4740 + }, + { + "epoch": 0.5378723404255319, + "loss": 0.8383215665817261, + "loss_ce": 0.008731723763048649, + "loss_iou": 0.33203125, + "loss_num": 0.033203125, + "loss_xval": 0.828125, + "num_input_tokens_seen": 317058296, + "step": 4740 + }, + { + "epoch": 0.5379858156028369, + "grad_norm": 28.525653839111328, + "learning_rate": 5e-05, + "loss": 1.3363, + "num_input_tokens_seen": 317125588, + "step": 4741 + }, + { + "epoch": 0.5379858156028369, + "loss": 1.47196626663208, + "loss_ce": 0.003216241020709276, + "loss_iou": 0.5625, + "loss_num": 0.068359375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 317125588, + "step": 4741 + }, + { + "epoch": 0.5380992907801418, + "grad_norm": 32.95852279663086, + "learning_rate": 5e-05, + "loss": 1.3205, + "num_input_tokens_seen": 317193436, + "step": 4742 + }, + { + "epoch": 0.5380992907801418, + "loss": 1.2666563987731934, + "loss_ce": 0.004449395928531885, + "loss_iou": 0.51953125, + "loss_num": 0.044677734375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 317193436, + "step": 4742 + }, + { + "epoch": 0.5382127659574468, + "grad_norm": 22.631864547729492, + "learning_rate": 5e-05, + "loss": 1.4226, + "num_input_tokens_seen": 317260184, + "step": 4743 + }, + { + "epoch": 0.5382127659574468, + "loss": 1.368044376373291, + "loss_ce": 0.0037864241749048233, + "loss_iou": 0.5625, + "loss_num": 0.048095703125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 317260184, + "step": 4743 + }, + { + "epoch": 0.5383262411347518, + "grad_norm": 253.42813110351562, + "learning_rate": 5e-05, + "loss": 1.117, + "num_input_tokens_seen": 317327224, + "step": 4744 + }, + { + "epoch": 0.5383262411347518, + "loss": 1.0892603397369385, + "loss_ce": 0.006252440623939037, + "loss_iou": 0.46484375, + "loss_num": 0.0306396484375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 317327224, + "step": 4744 + }, + { + "epoch": 0.5384397163120568, + "grad_norm": 29.171939849853516, + "learning_rate": 5e-05, + "loss": 1.133, + "num_input_tokens_seen": 317394340, + "step": 4745 + }, + { + "epoch": 0.5384397163120568, + "loss": 1.120445728302002, + "loss_ce": 0.006920256651937962, + "loss_iou": 0.462890625, + "loss_num": 0.037841796875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 317394340, + "step": 4745 + }, + { + "epoch": 0.5385531914893616, + "grad_norm": 31.050214767456055, + "learning_rate": 5e-05, + "loss": 1.0788, + "num_input_tokens_seen": 317461652, + "step": 4746 + }, + { + "epoch": 0.5385531914893616, + "loss": 1.1165481805801392, + "loss_ce": 0.006684897467494011, + "loss_iou": 0.439453125, + "loss_num": 0.046142578125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 317461652, + "step": 4746 + }, + { + "epoch": 0.5386666666666666, + "grad_norm": 39.28247833251953, + "learning_rate": 5e-05, + "loss": 1.3296, + "num_input_tokens_seen": 317527468, + "step": 4747 + }, + { + "epoch": 0.5386666666666666, + "loss": 1.333833932876587, + "loss_ce": 0.01059165969491005, + "loss_iou": 0.53515625, + "loss_num": 0.0498046875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 317527468, + "step": 4747 + }, + { + "epoch": 0.5387801418439716, + "grad_norm": 32.90999221801758, + "learning_rate": 5e-05, + "loss": 1.4941, + "num_input_tokens_seen": 317595664, + "step": 4748 + }, + { + "epoch": 0.5387801418439716, + "loss": 1.410023808479309, + "loss_ce": 0.006703488528728485, + "loss_iou": 0.578125, + "loss_num": 0.049560546875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 317595664, + "step": 4748 + }, + { + "epoch": 0.5388936170212766, + "grad_norm": 21.03764533996582, + "learning_rate": 5e-05, + "loss": 1.2037, + "num_input_tokens_seen": 317662840, + "step": 4749 + }, + { + "epoch": 0.5388936170212766, + "loss": 1.1193828582763672, + "loss_ce": 0.0036601810716092587, + "loss_iou": 0.455078125, + "loss_num": 0.040771484375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 317662840, + "step": 4749 + }, + { + "epoch": 0.5390070921985816, + "grad_norm": 25.24809455871582, + "learning_rate": 5e-05, + "loss": 1.22, + "num_input_tokens_seen": 317729024, + "step": 4750 + }, + { + "epoch": 0.5390070921985816, + "eval_seeclick_CIoU": 0.42732392251491547, + "eval_seeclick_GIoU": 0.4114486426115036, + "eval_seeclick_IoU": 0.5048714280128479, + "eval_seeclick_MAE_all": 0.15077903866767883, + "eval_seeclick_MAE_h": 0.04959178064018488, + "eval_seeclick_MAE_w": 0.11872904747724533, + "eval_seeclick_MAE_x_boxes": 0.22414054721593857, + "eval_seeclick_MAE_y_boxes": 0.11304249987006187, + "eval_seeclick_NUM_probability": 0.9999578297138214, + "eval_seeclick_inside_bbox": 0.6614583432674408, + "eval_seeclick_loss": 2.377708673477173, + "eval_seeclick_loss_ce": 0.013549021445214748, + "eval_seeclick_loss_iou": 0.8218994140625, + "eval_seeclick_loss_num": 0.14437103271484375, + "eval_seeclick_loss_xval": 2.36669921875, + "eval_seeclick_runtime": 63.6678, + "eval_seeclick_samples_per_second": 0.738, + "eval_seeclick_steps_per_second": 0.031, + "num_input_tokens_seen": 317729024, + "step": 4750 + }, + { + "epoch": 0.5390070921985816, + "eval_icons_CIoU": 0.505115196108818, + "eval_icons_GIoU": 0.4973857253789902, + "eval_icons_IoU": 0.5418099462985992, + "eval_icons_MAE_all": 0.13056015968322754, + "eval_icons_MAE_h": 0.06915656849741936, + "eval_icons_MAE_w": 0.1591188609600067, + "eval_icons_MAE_x_boxes": 0.0938350148499012, + "eval_icons_MAE_y_boxes": 0.07840271294116974, + "eval_icons_NUM_probability": 0.9999582171440125, + "eval_icons_inside_bbox": 0.8038194477558136, + "eval_icons_loss": 2.3709754943847656, + "eval_icons_loss_ce": 3.699199078255333e-05, + "eval_icons_loss_iou": 0.868408203125, + "eval_icons_loss_num": 0.12847518920898438, + "eval_icons_loss_xval": 2.37744140625, + "eval_icons_runtime": 66.2547, + "eval_icons_samples_per_second": 0.755, + "eval_icons_steps_per_second": 0.03, + "num_input_tokens_seen": 317729024, + "step": 4750 + }, + { + "epoch": 0.5390070921985816, + "eval_screenspot_CIoU": 0.2986188630263011, + "eval_screenspot_GIoU": 0.28390354414780933, + "eval_screenspot_IoU": 0.3842420329650243, + "eval_screenspot_MAE_all": 0.1959703266620636, + "eval_screenspot_MAE_h": 0.12149114285906155, + "eval_screenspot_MAE_w": 0.16221629083156586, + "eval_screenspot_MAE_x_boxes": 0.2941537102063497, + "eval_screenspot_MAE_y_boxes": 0.07595719024538994, + "eval_screenspot_NUM_probability": 0.9995460907618204, + "eval_screenspot_inside_bbox": 0.6104166706403097, + "eval_screenspot_loss": 2.9370062351226807, + "eval_screenspot_loss_ce": 0.014405305497348309, + "eval_screenspot_loss_iou": 0.97216796875, + "eval_screenspot_loss_num": 0.21244303385416666, + "eval_screenspot_loss_xval": 3.0065104166666665, + "eval_screenspot_runtime": 118.1313, + "eval_screenspot_samples_per_second": 0.753, + "eval_screenspot_steps_per_second": 0.025, + "num_input_tokens_seen": 317729024, + "step": 4750 + }, + { + "epoch": 0.5390070921985816, + "eval_compot_CIoU": 0.2533332034945488, + "eval_compot_GIoU": 0.203989677131176, + "eval_compot_IoU": 0.33112432062625885, + "eval_compot_MAE_all": 0.2637382447719574, + "eval_compot_MAE_h": 0.19006607681512833, + "eval_compot_MAE_w": 0.30164866149425507, + "eval_compot_MAE_x_boxes": 0.2030579149723053, + "eval_compot_MAE_y_boxes": 0.12844938039779663, + "eval_compot_NUM_probability": 0.9999415576457977, + "eval_compot_inside_bbox": 0.5434027910232544, + "eval_compot_loss": 3.360861301422119, + "eval_compot_loss_ce": 0.0035617146641016006, + "eval_compot_loss_iou": 1.0234375, + "eval_compot_loss_num": 0.252288818359375, + "eval_compot_loss_xval": 3.30810546875, + "eval_compot_runtime": 70.1761, + "eval_compot_samples_per_second": 0.712, + "eval_compot_steps_per_second": 0.028, + "num_input_tokens_seen": 317729024, + "step": 4750 + }, + { + "epoch": 0.5390070921985816, + "loss": 3.2385945320129395, + "loss_ce": 0.0032430049031972885, + "loss_iou": 1.0078125, + "loss_num": 0.2451171875, + "loss_xval": 3.234375, + "num_input_tokens_seen": 317729024, + "step": 4750 + }, + { + "epoch": 0.5391205673758865, + "grad_norm": 43.8493537902832, + "learning_rate": 5e-05, + "loss": 1.1039, + "num_input_tokens_seen": 317795488, + "step": 4751 + }, + { + "epoch": 0.5391205673758865, + "loss": 0.8707873225212097, + "loss_ce": 0.00604125065729022, + "loss_iou": 0.376953125, + "loss_num": 0.0218505859375, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 317795488, + "step": 4751 + }, + { + "epoch": 0.5392340425531915, + "grad_norm": 31.349580764770508, + "learning_rate": 5e-05, + "loss": 1.5293, + "num_input_tokens_seen": 317862736, + "step": 4752 + }, + { + "epoch": 0.5392340425531915, + "loss": 1.4120898246765137, + "loss_ce": 0.0024218664038926363, + "loss_iou": 0.59765625, + "loss_num": 0.043701171875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 317862736, + "step": 4752 + }, + { + "epoch": 0.5393475177304965, + "grad_norm": 28.32089614868164, + "learning_rate": 5e-05, + "loss": 1.1491, + "num_input_tokens_seen": 317929392, + "step": 4753 + }, + { + "epoch": 0.5393475177304965, + "loss": 1.2427639961242676, + "loss_ce": 0.007900722324848175, + "loss_iou": 0.50390625, + "loss_num": 0.045166015625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 317929392, + "step": 4753 + }, + { + "epoch": 0.5394609929078015, + "grad_norm": 52.23899841308594, + "learning_rate": 5e-05, + "loss": 1.1782, + "num_input_tokens_seen": 317997248, + "step": 4754 + }, + { + "epoch": 0.5394609929078015, + "loss": 1.166081190109253, + "loss_ce": 0.006901571527123451, + "loss_iou": 0.5, + "loss_num": 0.0322265625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 317997248, + "step": 4754 + }, + { + "epoch": 0.5395744680851063, + "grad_norm": 40.27772903442383, + "learning_rate": 5e-05, + "loss": 1.0481, + "num_input_tokens_seen": 318065472, + "step": 4755 + }, + { + "epoch": 0.5395744680851063, + "loss": 1.0792291164398193, + "loss_ce": 0.0059869978576898575, + "loss_iou": 0.478515625, + "loss_num": 0.023681640625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 318065472, + "step": 4755 + }, + { + "epoch": 0.5396879432624113, + "grad_norm": 28.56338119506836, + "learning_rate": 5e-05, + "loss": 1.2973, + "num_input_tokens_seen": 318133172, + "step": 4756 + }, + { + "epoch": 0.5396879432624113, + "loss": 1.3534513711929321, + "loss_ce": 0.005306829698383808, + "loss_iou": 0.56640625, + "loss_num": 0.0439453125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 318133172, + "step": 4756 + }, + { + "epoch": 0.5398014184397163, + "grad_norm": 25.067153930664062, + "learning_rate": 5e-05, + "loss": 1.1503, + "num_input_tokens_seen": 318199776, + "step": 4757 + }, + { + "epoch": 0.5398014184397163, + "loss": 1.318969488143921, + "loss_ce": 0.005737002938985825, + "loss_iou": 0.5078125, + "loss_num": 0.058349609375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 318199776, + "step": 4757 + }, + { + "epoch": 0.5399148936170213, + "grad_norm": 23.369165420532227, + "learning_rate": 5e-05, + "loss": 1.2824, + "num_input_tokens_seen": 318267136, + "step": 4758 + }, + { + "epoch": 0.5399148936170213, + "loss": 1.3455086946487427, + "loss_ce": 0.005176669918000698, + "loss_iou": 0.5625, + "loss_num": 0.043212890625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 318267136, + "step": 4758 + }, + { + "epoch": 0.5400283687943263, + "grad_norm": 29.949331283569336, + "learning_rate": 5e-05, + "loss": 1.0481, + "num_input_tokens_seen": 318333972, + "step": 4759 + }, + { + "epoch": 0.5400283687943263, + "loss": 1.0529370307922363, + "loss_ce": 0.006061998195946217, + "loss_iou": 0.4296875, + "loss_num": 0.037841796875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 318333972, + "step": 4759 + }, + { + "epoch": 0.5401418439716312, + "grad_norm": 42.93399429321289, + "learning_rate": 5e-05, + "loss": 1.2388, + "num_input_tokens_seen": 318401076, + "step": 4760 + }, + { + "epoch": 0.5401418439716312, + "loss": 1.1963868141174316, + "loss_ce": 0.0035157192032784224, + "loss_iou": 0.515625, + "loss_num": 0.03271484375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 318401076, + "step": 4760 + }, + { + "epoch": 0.5402553191489362, + "grad_norm": 31.899459838867188, + "learning_rate": 5e-05, + "loss": 1.0937, + "num_input_tokens_seen": 318467568, + "step": 4761 + }, + { + "epoch": 0.5402553191489362, + "loss": 1.2203707695007324, + "loss_ce": 0.006259388290345669, + "loss_iou": 0.435546875, + "loss_num": 0.068359375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 318467568, + "step": 4761 + }, + { + "epoch": 0.5403687943262412, + "grad_norm": 40.052757263183594, + "learning_rate": 5e-05, + "loss": 1.1412, + "num_input_tokens_seen": 318534848, + "step": 4762 + }, + { + "epoch": 0.5403687943262412, + "loss": 1.1573580503463745, + "loss_ce": 0.006479131057858467, + "loss_iou": 0.5078125, + "loss_num": 0.0267333984375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 318534848, + "step": 4762 + }, + { + "epoch": 0.540482269503546, + "grad_norm": 59.529964447021484, + "learning_rate": 5e-05, + "loss": 1.1446, + "num_input_tokens_seen": 318601504, + "step": 4763 + }, + { + "epoch": 0.540482269503546, + "loss": 1.1080265045166016, + "loss_ce": 0.005975621752440929, + "loss_iou": 0.423828125, + "loss_num": 0.050537109375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 318601504, + "step": 4763 + }, + { + "epoch": 0.540595744680851, + "grad_norm": 47.39345169067383, + "learning_rate": 5e-05, + "loss": 1.18, + "num_input_tokens_seen": 318665624, + "step": 4764 + }, + { + "epoch": 0.540595744680851, + "loss": 1.1498584747314453, + "loss_ce": 0.002397536765784025, + "loss_iou": 0.482421875, + "loss_num": 0.03662109375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 318665624, + "step": 4764 + }, + { + "epoch": 0.540709219858156, + "grad_norm": 42.34235382080078, + "learning_rate": 5e-05, + "loss": 1.0345, + "num_input_tokens_seen": 318730552, + "step": 4765 + }, + { + "epoch": 0.540709219858156, + "loss": 1.0144606828689575, + "loss_ce": 0.004084696527570486, + "loss_iou": 0.416015625, + "loss_num": 0.03564453125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 318730552, + "step": 4765 + }, + { + "epoch": 0.540822695035461, + "grad_norm": 36.079856872558594, + "learning_rate": 5e-05, + "loss": 1.4125, + "num_input_tokens_seen": 318797136, + "step": 4766 + }, + { + "epoch": 0.540822695035461, + "loss": 1.549157738685608, + "loss_ce": 0.005700807087123394, + "loss_iou": 0.62890625, + "loss_num": 0.056396484375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 318797136, + "step": 4766 + }, + { + "epoch": 0.540936170212766, + "grad_norm": 40.745582580566406, + "learning_rate": 5e-05, + "loss": 1.1785, + "num_input_tokens_seen": 318864196, + "step": 4767 + }, + { + "epoch": 0.540936170212766, + "loss": 1.2785669565200806, + "loss_ce": 0.008059152401983738, + "loss_iou": 0.53515625, + "loss_num": 0.039794921875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 318864196, + "step": 4767 + }, + { + "epoch": 0.5410496453900709, + "grad_norm": 27.14491081237793, + "learning_rate": 5e-05, + "loss": 1.427, + "num_input_tokens_seen": 318930168, + "step": 4768 + }, + { + "epoch": 0.5410496453900709, + "loss": 1.287668228149414, + "loss_ce": 0.008859727531671524, + "loss_iou": 0.546875, + "loss_num": 0.0380859375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 318930168, + "step": 4768 + }, + { + "epoch": 0.5411631205673759, + "grad_norm": 17.665895462036133, + "learning_rate": 5e-05, + "loss": 1.2922, + "num_input_tokens_seen": 318996700, + "step": 4769 + }, + { + "epoch": 0.5411631205673759, + "loss": 1.169123888015747, + "loss_ce": 0.005549703259021044, + "loss_iou": 0.51953125, + "loss_num": 0.025634765625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 318996700, + "step": 4769 + }, + { + "epoch": 0.5412765957446809, + "grad_norm": 28.314207077026367, + "learning_rate": 5e-05, + "loss": 1.379, + "num_input_tokens_seen": 319063500, + "step": 4770 + }, + { + "epoch": 0.5412765957446809, + "loss": 1.3528406620025635, + "loss_ce": 0.0051844678819179535, + "loss_iou": 0.51171875, + "loss_num": 0.0654296875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 319063500, + "step": 4770 + }, + { + "epoch": 0.5413900709219858, + "grad_norm": 30.97231674194336, + "learning_rate": 5e-05, + "loss": 1.2738, + "num_input_tokens_seen": 319129440, + "step": 4771 + }, + { + "epoch": 0.5413900709219858, + "loss": 1.3288753032684326, + "loss_ce": 0.0051448289304971695, + "loss_iou": 0.546875, + "loss_num": 0.04541015625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 319129440, + "step": 4771 + }, + { + "epoch": 0.5415035460992907, + "grad_norm": 33.8351936340332, + "learning_rate": 5e-05, + "loss": 1.0465, + "num_input_tokens_seen": 319196052, + "step": 4772 + }, + { + "epoch": 0.5415035460992907, + "loss": 1.1316523551940918, + "loss_ce": 0.004211037885397673, + "loss_iou": 0.46875, + "loss_num": 0.0380859375, + "loss_xval": 1.125, + "num_input_tokens_seen": 319196052, + "step": 4772 + }, + { + "epoch": 0.5416170212765957, + "grad_norm": 27.154733657836914, + "learning_rate": 5e-05, + "loss": 1.4965, + "num_input_tokens_seen": 319262912, + "step": 4773 + }, + { + "epoch": 0.5416170212765957, + "loss": 1.4303438663482666, + "loss_ce": 0.0065157124772667885, + "loss_iou": 0.61328125, + "loss_num": 0.039794921875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 319262912, + "step": 4773 + }, + { + "epoch": 0.5417304964539007, + "grad_norm": 17.650325775146484, + "learning_rate": 5e-05, + "loss": 1.0537, + "num_input_tokens_seen": 319329612, + "step": 4774 + }, + { + "epoch": 0.5417304964539007, + "loss": 1.1048333644866943, + "loss_ce": 0.006200593896210194, + "loss_iou": 0.423828125, + "loss_num": 0.05029296875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 319329612, + "step": 4774 + }, + { + "epoch": 0.5418439716312057, + "grad_norm": 12.094015121459961, + "learning_rate": 5e-05, + "loss": 1.0282, + "num_input_tokens_seen": 319396620, + "step": 4775 + }, + { + "epoch": 0.5418439716312057, + "loss": 0.9743000864982605, + "loss_ce": 0.006038333289325237, + "loss_iou": 0.419921875, + "loss_num": 0.0260009765625, + "loss_xval": 0.96875, + "num_input_tokens_seen": 319396620, + "step": 4775 + }, + { + "epoch": 0.5419574468085107, + "grad_norm": 26.699874877929688, + "learning_rate": 5e-05, + "loss": 1.1992, + "num_input_tokens_seen": 319463092, + "step": 4776 + }, + { + "epoch": 0.5419574468085107, + "loss": 1.00969660282135, + "loss_ce": 0.00505792535841465, + "loss_iou": 0.447265625, + "loss_num": 0.022216796875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 319463092, + "step": 4776 + }, + { + "epoch": 0.5420709219858156, + "grad_norm": 61.52935028076172, + "learning_rate": 5e-05, + "loss": 1.2257, + "num_input_tokens_seen": 319530140, + "step": 4777 + }, + { + "epoch": 0.5420709219858156, + "loss": 1.2233878374099731, + "loss_ce": 0.008544044569134712, + "loss_iou": 0.5546875, + "loss_num": 0.0213623046875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 319530140, + "step": 4777 + }, + { + "epoch": 0.5421843971631206, + "grad_norm": 43.11962127685547, + "learning_rate": 5e-05, + "loss": 1.4851, + "num_input_tokens_seen": 319596724, + "step": 4778 + }, + { + "epoch": 0.5421843971631206, + "loss": 1.4168355464935303, + "loss_ce": 0.0037496553268283606, + "loss_iou": 0.60546875, + "loss_num": 0.0400390625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 319596724, + "step": 4778 + }, + { + "epoch": 0.5422978723404255, + "grad_norm": 32.53317642211914, + "learning_rate": 5e-05, + "loss": 1.4281, + "num_input_tokens_seen": 319663636, + "step": 4779 + }, + { + "epoch": 0.5422978723404255, + "loss": 1.3379653692245483, + "loss_ce": 0.008375540375709534, + "loss_iou": 0.494140625, + "loss_num": 0.068359375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 319663636, + "step": 4779 + }, + { + "epoch": 0.5424113475177305, + "grad_norm": 19.599851608276367, + "learning_rate": 5e-05, + "loss": 1.0589, + "num_input_tokens_seen": 319730348, + "step": 4780 + }, + { + "epoch": 0.5424113475177305, + "loss": 1.1721935272216797, + "loss_ce": 0.004224700387567282, + "loss_iou": 0.515625, + "loss_num": 0.027587890625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 319730348, + "step": 4780 + }, + { + "epoch": 0.5425248226950354, + "grad_norm": 22.999162673950195, + "learning_rate": 5e-05, + "loss": 1.1567, + "num_input_tokens_seen": 319796692, + "step": 4781 + }, + { + "epoch": 0.5425248226950354, + "loss": 1.0717878341674805, + "loss_ce": 0.008311202749609947, + "loss_iou": 0.435546875, + "loss_num": 0.0390625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 319796692, + "step": 4781 + }, + { + "epoch": 0.5426382978723404, + "grad_norm": 24.916576385498047, + "learning_rate": 5e-05, + "loss": 1.1479, + "num_input_tokens_seen": 319864356, + "step": 4782 + }, + { + "epoch": 0.5426382978723404, + "loss": 1.1677650213241577, + "loss_ce": 0.008097052574157715, + "loss_iou": 0.486328125, + "loss_num": 0.037841796875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 319864356, + "step": 4782 + }, + { + "epoch": 0.5427517730496454, + "grad_norm": 31.552501678466797, + "learning_rate": 5e-05, + "loss": 1.2686, + "num_input_tokens_seen": 319930440, + "step": 4783 + }, + { + "epoch": 0.5427517730496454, + "loss": 1.283358097076416, + "loss_ce": 0.0030846353620290756, + "loss_iou": 0.50390625, + "loss_num": 0.05517578125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 319930440, + "step": 4783 + }, + { + "epoch": 0.5428652482269504, + "grad_norm": 32.1490364074707, + "learning_rate": 5e-05, + "loss": 1.265, + "num_input_tokens_seen": 319997064, + "step": 4784 + }, + { + "epoch": 0.5428652482269504, + "loss": 1.394004464149475, + "loss_ce": 0.005332565866410732, + "loss_iou": 0.57421875, + "loss_num": 0.048095703125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 319997064, + "step": 4784 + }, + { + "epoch": 0.5429787234042553, + "grad_norm": 35.23151779174805, + "learning_rate": 5e-05, + "loss": 1.3518, + "num_input_tokens_seen": 320063496, + "step": 4785 + }, + { + "epoch": 0.5429787234042553, + "loss": 1.2117012739181519, + "loss_ce": 0.00467006117105484, + "loss_iou": 0.490234375, + "loss_num": 0.04541015625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 320063496, + "step": 4785 + }, + { + "epoch": 0.5430921985815603, + "grad_norm": 29.424287796020508, + "learning_rate": 5e-05, + "loss": 1.3607, + "num_input_tokens_seen": 320131480, + "step": 4786 + }, + { + "epoch": 0.5430921985815603, + "loss": 1.433839201927185, + "loss_ce": 0.005128266289830208, + "loss_iou": 0.58203125, + "loss_num": 0.05322265625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 320131480, + "step": 4786 + }, + { + "epoch": 0.5432056737588652, + "grad_norm": 13.91395378112793, + "learning_rate": 5e-05, + "loss": 1.054, + "num_input_tokens_seen": 320197408, + "step": 4787 + }, + { + "epoch": 0.5432056737588652, + "loss": 0.8473160266876221, + "loss_ce": 0.0060074180364608765, + "loss_iou": 0.330078125, + "loss_num": 0.035888671875, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 320197408, + "step": 4787 + }, + { + "epoch": 0.5433191489361702, + "grad_norm": 23.233936309814453, + "learning_rate": 5e-05, + "loss": 1.1455, + "num_input_tokens_seen": 320265504, + "step": 4788 + }, + { + "epoch": 0.5433191489361702, + "loss": 1.1149649620056152, + "loss_ce": 0.005590048618614674, + "loss_iou": 0.45703125, + "loss_num": 0.038818359375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 320265504, + "step": 4788 + }, + { + "epoch": 0.5434326241134751, + "grad_norm": 13.465558052062988, + "learning_rate": 5e-05, + "loss": 0.9706, + "num_input_tokens_seen": 320332296, + "step": 4789 + }, + { + "epoch": 0.5434326241134751, + "loss": 1.041985273361206, + "loss_ce": 0.008293896913528442, + "loss_iou": 0.451171875, + "loss_num": 0.0263671875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 320332296, + "step": 4789 + }, + { + "epoch": 0.5435460992907801, + "grad_norm": 17.427181243896484, + "learning_rate": 5e-05, + "loss": 1.1529, + "num_input_tokens_seen": 320399816, + "step": 4790 + }, + { + "epoch": 0.5435460992907801, + "loss": 1.0486743450164795, + "loss_ce": 0.007658702787011862, + "loss_iou": 0.43359375, + "loss_num": 0.034423828125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 320399816, + "step": 4790 + }, + { + "epoch": 0.5436595744680851, + "grad_norm": 17.53374481201172, + "learning_rate": 5e-05, + "loss": 1.1719, + "num_input_tokens_seen": 320466900, + "step": 4791 + }, + { + "epoch": 0.5436595744680851, + "loss": 1.0568318367004395, + "loss_ce": 0.004341584630310535, + "loss_iou": 0.435546875, + "loss_num": 0.036376953125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 320466900, + "step": 4791 + }, + { + "epoch": 0.5437730496453901, + "grad_norm": 22.531097412109375, + "learning_rate": 5e-05, + "loss": 1.1021, + "num_input_tokens_seen": 320534312, + "step": 4792 + }, + { + "epoch": 0.5437730496453901, + "loss": 1.1828796863555908, + "loss_ce": 0.0061218030750751495, + "loss_iou": 0.5, + "loss_num": 0.034912109375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 320534312, + "step": 4792 + }, + { + "epoch": 0.5438865248226951, + "grad_norm": 63.45594024658203, + "learning_rate": 5e-05, + "loss": 1.1638, + "num_input_tokens_seen": 320600672, + "step": 4793 + }, + { + "epoch": 0.5438865248226951, + "loss": 1.0814155340194702, + "loss_ce": 0.005609860178083181, + "loss_iou": 0.4375, + "loss_num": 0.0400390625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 320600672, + "step": 4793 + }, + { + "epoch": 0.544, + "grad_norm": 24.73275375366211, + "learning_rate": 5e-05, + "loss": 1.1737, + "num_input_tokens_seen": 320667500, + "step": 4794 + }, + { + "epoch": 0.544, + "loss": 1.1941627264022827, + "loss_ce": 0.004709615372121334, + "loss_iou": 0.439453125, + "loss_num": 0.0615234375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 320667500, + "step": 4794 + }, + { + "epoch": 0.544113475177305, + "grad_norm": 21.766590118408203, + "learning_rate": 5e-05, + "loss": 1.1186, + "num_input_tokens_seen": 320733772, + "step": 4795 + }, + { + "epoch": 0.544113475177305, + "loss": 1.0612456798553467, + "loss_ce": 0.005093275103718042, + "loss_iou": 0.4296875, + "loss_num": 0.039306640625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 320733772, + "step": 4795 + }, + { + "epoch": 0.5442269503546099, + "grad_norm": 30.901071548461914, + "learning_rate": 5e-05, + "loss": 1.1006, + "num_input_tokens_seen": 320801552, + "step": 4796 + }, + { + "epoch": 0.5442269503546099, + "loss": 1.0961477756500244, + "loss_ce": 0.008257141336798668, + "loss_iou": 0.443359375, + "loss_num": 0.0400390625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 320801552, + "step": 4796 + }, + { + "epoch": 0.5443404255319149, + "grad_norm": 29.9576358795166, + "learning_rate": 5e-05, + "loss": 1.1536, + "num_input_tokens_seen": 320868400, + "step": 4797 + }, + { + "epoch": 0.5443404255319149, + "loss": 1.3131712675094604, + "loss_ce": 0.0031126467511057854, + "loss_iou": 0.5625, + "loss_num": 0.037353515625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 320868400, + "step": 4797 + }, + { + "epoch": 0.5444539007092198, + "grad_norm": 27.961627960205078, + "learning_rate": 5e-05, + "loss": 1.2018, + "num_input_tokens_seen": 320935652, + "step": 4798 + }, + { + "epoch": 0.5444539007092198, + "loss": 1.0671381950378418, + "loss_ce": 0.003661584574729204, + "loss_iou": 0.4609375, + "loss_num": 0.0283203125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 320935652, + "step": 4798 + }, + { + "epoch": 0.5445673758865248, + "grad_norm": 32.48509216308594, + "learning_rate": 5e-05, + "loss": 1.2074, + "num_input_tokens_seen": 321003300, + "step": 4799 + }, + { + "epoch": 0.5445673758865248, + "loss": 1.270161747932434, + "loss_ce": 0.009419519454240799, + "loss_iou": 0.458984375, + "loss_num": 0.068359375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 321003300, + "step": 4799 + }, + { + "epoch": 0.5446808510638298, + "grad_norm": 30.156936645507812, + "learning_rate": 5e-05, + "loss": 1.2827, + "num_input_tokens_seen": 321070432, + "step": 4800 + }, + { + "epoch": 0.5446808510638298, + "loss": 1.152881145477295, + "loss_ce": 0.008594021201133728, + "loss_iou": 0.4609375, + "loss_num": 0.04443359375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 321070432, + "step": 4800 + }, + { + "epoch": 0.5447943262411348, + "grad_norm": 20.77678871154785, + "learning_rate": 5e-05, + "loss": 1.456, + "num_input_tokens_seen": 321138220, + "step": 4801 + }, + { + "epoch": 0.5447943262411348, + "loss": 1.3256051540374756, + "loss_ce": 0.003827861277386546, + "loss_iou": 0.55078125, + "loss_num": 0.044189453125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 321138220, + "step": 4801 + }, + { + "epoch": 0.5449078014184398, + "grad_norm": 25.925338745117188, + "learning_rate": 5e-05, + "loss": 0.9744, + "num_input_tokens_seen": 321205044, + "step": 4802 + }, + { + "epoch": 0.5449078014184398, + "loss": 0.9587661623954773, + "loss_ce": 0.0031997954938560724, + "loss_iou": 0.40625, + "loss_num": 0.0283203125, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 321205044, + "step": 4802 + }, + { + "epoch": 0.5450212765957447, + "grad_norm": 28.157533645629883, + "learning_rate": 5e-05, + "loss": 1.3199, + "num_input_tokens_seen": 321271992, + "step": 4803 + }, + { + "epoch": 0.5450212765957447, + "loss": 1.322356939315796, + "loss_ce": 0.009856939315795898, + "loss_iou": 0.5546875, + "loss_num": 0.0400390625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 321271992, + "step": 4803 + }, + { + "epoch": 0.5451347517730496, + "grad_norm": 35.68123245239258, + "learning_rate": 5e-05, + "loss": 1.2716, + "num_input_tokens_seen": 321338668, + "step": 4804 + }, + { + "epoch": 0.5451347517730496, + "loss": 1.4173552989959717, + "loss_ce": 0.004757626447826624, + "loss_iou": 0.5546875, + "loss_num": 0.060791015625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 321338668, + "step": 4804 + }, + { + "epoch": 0.5452482269503546, + "grad_norm": 25.114702224731445, + "learning_rate": 5e-05, + "loss": 1.3988, + "num_input_tokens_seen": 321405716, + "step": 4805 + }, + { + "epoch": 0.5452482269503546, + "loss": 1.261199951171875, + "loss_ce": 0.006805360782891512, + "loss_iou": 0.51171875, + "loss_num": 0.0458984375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 321405716, + "step": 4805 + }, + { + "epoch": 0.5453617021276596, + "grad_norm": 20.944398880004883, + "learning_rate": 5e-05, + "loss": 1.0596, + "num_input_tokens_seen": 321473860, + "step": 4806 + }, + { + "epoch": 0.5453617021276596, + "loss": 0.9892458915710449, + "loss_ce": 0.0034060401376336813, + "loss_iou": 0.4296875, + "loss_num": 0.0250244140625, + "loss_xval": 0.984375, + "num_input_tokens_seen": 321473860, + "step": 4806 + }, + { + "epoch": 0.5454751773049645, + "grad_norm": 49.264137268066406, + "learning_rate": 5e-05, + "loss": 1.18, + "num_input_tokens_seen": 321540996, + "step": 4807 + }, + { + "epoch": 0.5454751773049645, + "loss": 1.237731695175171, + "loss_ce": 0.008239462971687317, + "loss_iou": 0.5390625, + "loss_num": 0.030029296875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 321540996, + "step": 4807 + }, + { + "epoch": 0.5455886524822695, + "grad_norm": 26.308940887451172, + "learning_rate": 5e-05, + "loss": 1.1138, + "num_input_tokens_seen": 321606808, + "step": 4808 + }, + { + "epoch": 0.5455886524822695, + "loss": 1.2628300189971924, + "loss_ce": 0.0035527576692402363, + "loss_iou": 0.515625, + "loss_num": 0.044921875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 321606808, + "step": 4808 + }, + { + "epoch": 0.5457021276595745, + "grad_norm": 13.642491340637207, + "learning_rate": 5e-05, + "loss": 1.0398, + "num_input_tokens_seen": 321673680, + "step": 4809 + }, + { + "epoch": 0.5457021276595745, + "loss": 1.0385775566101074, + "loss_ce": 0.007327570579946041, + "loss_iou": 0.3984375, + "loss_num": 0.046630859375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 321673680, + "step": 4809 + }, + { + "epoch": 0.5458156028368795, + "grad_norm": 21.915912628173828, + "learning_rate": 5e-05, + "loss": 1.2871, + "num_input_tokens_seen": 321740604, + "step": 4810 + }, + { + "epoch": 0.5458156028368795, + "loss": 1.4382647275924683, + "loss_ce": 0.005647524259984493, + "loss_iou": 0.5390625, + "loss_num": 0.07080078125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 321740604, + "step": 4810 + }, + { + "epoch": 0.5459290780141844, + "grad_norm": 25.97247314453125, + "learning_rate": 5e-05, + "loss": 1.0516, + "num_input_tokens_seen": 321806624, + "step": 4811 + }, + { + "epoch": 0.5459290780141844, + "loss": 1.0840359926223755, + "loss_ce": 0.0049344627186656, + "loss_iou": 0.44921875, + "loss_num": 0.035888671875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 321806624, + "step": 4811 + }, + { + "epoch": 0.5460425531914893, + "grad_norm": 33.99527359008789, + "learning_rate": 5e-05, + "loss": 1.3217, + "num_input_tokens_seen": 321874512, + "step": 4812 + }, + { + "epoch": 0.5460425531914893, + "loss": 1.3057844638824463, + "loss_ce": 0.006468103267252445, + "loss_iou": 0.515625, + "loss_num": 0.05322265625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 321874512, + "step": 4812 + }, + { + "epoch": 0.5461560283687943, + "grad_norm": 26.60349464416504, + "learning_rate": 5e-05, + "loss": 1.3173, + "num_input_tokens_seen": 321941672, + "step": 4813 + }, + { + "epoch": 0.5461560283687943, + "loss": 1.209740400314331, + "loss_ce": 0.003929832950234413, + "loss_iou": 0.49609375, + "loss_num": 0.04296875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 321941672, + "step": 4813 + }, + { + "epoch": 0.5462695035460993, + "grad_norm": 30.765724182128906, + "learning_rate": 5e-05, + "loss": 0.9879, + "num_input_tokens_seen": 322008456, + "step": 4814 + }, + { + "epoch": 0.5462695035460993, + "loss": 1.0516462326049805, + "loss_ce": 0.008555435575544834, + "loss_iou": 0.44140625, + "loss_num": 0.031982421875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 322008456, + "step": 4814 + }, + { + "epoch": 0.5463829787234042, + "grad_norm": 21.23627471923828, + "learning_rate": 5e-05, + "loss": 1.1279, + "num_input_tokens_seen": 322075308, + "step": 4815 + }, + { + "epoch": 0.5463829787234042, + "loss": 1.1264739036560059, + "loss_ce": 0.010263022035360336, + "loss_iou": 0.470703125, + "loss_num": 0.034912109375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 322075308, + "step": 4815 + }, + { + "epoch": 0.5464964539007092, + "grad_norm": 19.494413375854492, + "learning_rate": 5e-05, + "loss": 1.3254, + "num_input_tokens_seen": 322142944, + "step": 4816 + }, + { + "epoch": 0.5464964539007092, + "loss": 1.1889936923980713, + "loss_ce": 0.005888238549232483, + "loss_iou": 0.4921875, + "loss_num": 0.03955078125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 322142944, + "step": 4816 + }, + { + "epoch": 0.5466099290780142, + "grad_norm": 35.14594650268555, + "learning_rate": 5e-05, + "loss": 1.303, + "num_input_tokens_seen": 322208172, + "step": 4817 + }, + { + "epoch": 0.5466099290780142, + "loss": 1.2953433990478516, + "loss_ce": 0.004144725855439901, + "loss_iou": 0.5, + "loss_num": 0.057861328125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 322208172, + "step": 4817 + }, + { + "epoch": 0.5467234042553192, + "grad_norm": 83.40958404541016, + "learning_rate": 5e-05, + "loss": 1.1234, + "num_input_tokens_seen": 322274040, + "step": 4818 + }, + { + "epoch": 0.5467234042553192, + "loss": 0.8796910047531128, + "loss_ce": 0.004935167729854584, + "loss_iou": 0.37109375, + "loss_num": 0.02685546875, + "loss_xval": 0.875, + "num_input_tokens_seen": 322274040, + "step": 4818 + }, + { + "epoch": 0.5468368794326242, + "grad_norm": 18.831268310546875, + "learning_rate": 5e-05, + "loss": 1.2235, + "num_input_tokens_seen": 322340068, + "step": 4819 + }, + { + "epoch": 0.5468368794326242, + "loss": 1.0877033472061157, + "loss_ce": 0.005183838307857513, + "loss_iou": 0.484375, + "loss_num": 0.0233154296875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 322340068, + "step": 4819 + }, + { + "epoch": 0.546950354609929, + "grad_norm": 23.676517486572266, + "learning_rate": 5e-05, + "loss": 1.3235, + "num_input_tokens_seen": 322407016, + "step": 4820 + }, + { + "epoch": 0.546950354609929, + "loss": 1.2258317470550537, + "loss_ce": 0.005738936364650726, + "loss_iou": 0.48828125, + "loss_num": 0.048828125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 322407016, + "step": 4820 + }, + { + "epoch": 0.547063829787234, + "grad_norm": 23.11874008178711, + "learning_rate": 5e-05, + "loss": 1.0697, + "num_input_tokens_seen": 322475256, + "step": 4821 + }, + { + "epoch": 0.547063829787234, + "loss": 0.968431293964386, + "loss_ce": 0.006517226342111826, + "loss_iou": 0.4140625, + "loss_num": 0.0269775390625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 322475256, + "step": 4821 + }, + { + "epoch": 0.547177304964539, + "grad_norm": 29.249387741088867, + "learning_rate": 5e-05, + "loss": 1.1854, + "num_input_tokens_seen": 322542584, + "step": 4822 + }, + { + "epoch": 0.547177304964539, + "loss": 1.023277997970581, + "loss_ce": 0.00533355912193656, + "loss_iou": 0.419921875, + "loss_num": 0.035400390625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 322542584, + "step": 4822 + }, + { + "epoch": 0.547290780141844, + "grad_norm": 41.5644416809082, + "learning_rate": 5e-05, + "loss": 1.2933, + "num_input_tokens_seen": 322609596, + "step": 4823 + }, + { + "epoch": 0.547290780141844, + "loss": 1.3930449485778809, + "loss_ce": 0.004861284978687763, + "loss_iou": 0.5234375, + "loss_num": 0.06884765625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 322609596, + "step": 4823 + }, + { + "epoch": 0.5474042553191489, + "grad_norm": 26.28759765625, + "learning_rate": 5e-05, + "loss": 1.2127, + "num_input_tokens_seen": 322675736, + "step": 4824 + }, + { + "epoch": 0.5474042553191489, + "loss": 1.298349380493164, + "loss_ce": 0.0034275148063898087, + "loss_iou": 0.546875, + "loss_num": 0.041015625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 322675736, + "step": 4824 + }, + { + "epoch": 0.5475177304964539, + "grad_norm": 9.726093292236328, + "learning_rate": 5e-05, + "loss": 1.0947, + "num_input_tokens_seen": 322742640, + "step": 4825 + }, + { + "epoch": 0.5475177304964539, + "loss": 1.07294499874115, + "loss_ce": 0.02167549729347229, + "loss_iou": 0.39453125, + "loss_num": 0.052734375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 322742640, + "step": 4825 + }, + { + "epoch": 0.5476312056737589, + "grad_norm": 18.00224494934082, + "learning_rate": 5e-05, + "loss": 1.1543, + "num_input_tokens_seen": 322808992, + "step": 4826 + }, + { + "epoch": 0.5476312056737589, + "loss": 1.1062560081481934, + "loss_ce": 0.009088078513741493, + "loss_iou": 0.41015625, + "loss_num": 0.055908203125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 322808992, + "step": 4826 + }, + { + "epoch": 0.5477446808510639, + "grad_norm": 30.209806442260742, + "learning_rate": 5e-05, + "loss": 1.0776, + "num_input_tokens_seen": 322875452, + "step": 4827 + }, + { + "epoch": 0.5477446808510639, + "loss": 1.0525414943695068, + "loss_ce": 0.008229834958910942, + "loss_iou": 0.388671875, + "loss_num": 0.053466796875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 322875452, + "step": 4827 + }, + { + "epoch": 0.5478581560283688, + "grad_norm": 30.228797912597656, + "learning_rate": 5e-05, + "loss": 1.452, + "num_input_tokens_seen": 322942956, + "step": 4828 + }, + { + "epoch": 0.5478581560283688, + "loss": 1.437187671661377, + "loss_ce": 0.006523615214973688, + "loss_iou": 0.578125, + "loss_num": 0.054931640625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 322942956, + "step": 4828 + }, + { + "epoch": 0.5479716312056737, + "grad_norm": 34.4157600402832, + "learning_rate": 5e-05, + "loss": 1.1366, + "num_input_tokens_seen": 323010204, + "step": 4829 + }, + { + "epoch": 0.5479716312056737, + "loss": 1.1074292659759521, + "loss_ce": 0.004890165291726589, + "loss_iou": 0.474609375, + "loss_num": 0.030517578125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 323010204, + "step": 4829 + }, + { + "epoch": 0.5480851063829787, + "grad_norm": 32.129337310791016, + "learning_rate": 5e-05, + "loss": 1.323, + "num_input_tokens_seen": 323077480, + "step": 4830 + }, + { + "epoch": 0.5480851063829787, + "loss": 1.383744716644287, + "loss_ce": 0.005326786078512669, + "loss_iou": 0.57421875, + "loss_num": 0.04638671875, + "loss_xval": 1.375, + "num_input_tokens_seen": 323077480, + "step": 4830 + }, + { + "epoch": 0.5481985815602837, + "grad_norm": 25.851654052734375, + "learning_rate": 5e-05, + "loss": 1.1588, + "num_input_tokens_seen": 323143664, + "step": 4831 + }, + { + "epoch": 0.5481985815602837, + "loss": 1.2588473558425903, + "loss_ce": 0.0039645349606871605, + "loss_iou": 0.5390625, + "loss_num": 0.0361328125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 323143664, + "step": 4831 + }, + { + "epoch": 0.5483120567375886, + "grad_norm": 27.280670166015625, + "learning_rate": 5e-05, + "loss": 1.2849, + "num_input_tokens_seen": 323210720, + "step": 4832 + }, + { + "epoch": 0.5483120567375886, + "loss": 1.2618792057037354, + "loss_ce": 0.004066607914865017, + "loss_iou": 0.5, + "loss_num": 0.05126953125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 323210720, + "step": 4832 + }, + { + "epoch": 0.5484255319148936, + "grad_norm": 435.752197265625, + "learning_rate": 5e-05, + "loss": 1.1626, + "num_input_tokens_seen": 323277176, + "step": 4833 + }, + { + "epoch": 0.5484255319148936, + "loss": 1.3390836715698242, + "loss_ce": 0.0036344320978969336, + "loss_iou": 0.54296875, + "loss_num": 0.04931640625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 323277176, + "step": 4833 + }, + { + "epoch": 0.5485390070921986, + "grad_norm": 27.521949768066406, + "learning_rate": 5e-05, + "loss": 1.1727, + "num_input_tokens_seen": 323343780, + "step": 4834 + }, + { + "epoch": 0.5485390070921986, + "loss": 1.0784566402435303, + "loss_ce": 0.006191074848175049, + "loss_iou": 0.45703125, + "loss_num": 0.031494140625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 323343780, + "step": 4834 + }, + { + "epoch": 0.5486524822695036, + "grad_norm": 37.217838287353516, + "learning_rate": 5e-05, + "loss": 1.3283, + "num_input_tokens_seen": 323410504, + "step": 4835 + }, + { + "epoch": 0.5486524822695036, + "loss": 1.275057315826416, + "loss_ce": 0.0035728763323277235, + "loss_iou": 0.52734375, + "loss_num": 0.04345703125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 323410504, + "step": 4835 + }, + { + "epoch": 0.5487659574468086, + "grad_norm": 22.4635066986084, + "learning_rate": 5e-05, + "loss": 1.2443, + "num_input_tokens_seen": 323478056, + "step": 4836 + }, + { + "epoch": 0.5487659574468086, + "loss": 1.1390366554260254, + "loss_ce": 0.006224189884960651, + "loss_iou": 0.462890625, + "loss_num": 0.041259765625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 323478056, + "step": 4836 + }, + { + "epoch": 0.5488794326241134, + "grad_norm": 23.751585006713867, + "learning_rate": 5e-05, + "loss": 1.1078, + "num_input_tokens_seen": 323545020, + "step": 4837 + }, + { + "epoch": 0.5488794326241134, + "loss": 1.3783693313598633, + "loss_ce": 0.008740389719605446, + "loss_iou": 0.5546875, + "loss_num": 0.052734375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 323545020, + "step": 4837 + }, + { + "epoch": 0.5489929078014184, + "grad_norm": 28.622669219970703, + "learning_rate": 5e-05, + "loss": 1.3197, + "num_input_tokens_seen": 323611900, + "step": 4838 + }, + { + "epoch": 0.5489929078014184, + "loss": 1.2537482976913452, + "loss_ce": 0.0061897882260382175, + "loss_iou": 0.51953125, + "loss_num": 0.0419921875, + "loss_xval": 1.25, + "num_input_tokens_seen": 323611900, + "step": 4838 + }, + { + "epoch": 0.5491063829787234, + "grad_norm": 28.02667999267578, + "learning_rate": 5e-05, + "loss": 0.8147, + "num_input_tokens_seen": 323678356, + "step": 4839 + }, + { + "epoch": 0.5491063829787234, + "loss": 0.639404296875, + "loss_ce": 0.005554249044507742, + "loss_iou": 0.2578125, + "loss_num": 0.0234375, + "loss_xval": 0.6328125, + "num_input_tokens_seen": 323678356, + "step": 4839 + }, + { + "epoch": 0.5492198581560284, + "grad_norm": 28.852413177490234, + "learning_rate": 5e-05, + "loss": 1.288, + "num_input_tokens_seen": 323745924, + "step": 4840 + }, + { + "epoch": 0.5492198581560284, + "loss": 1.266641616821289, + "loss_ce": 0.005411061458289623, + "loss_iou": 0.55078125, + "loss_num": 0.031982421875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 323745924, + "step": 4840 + }, + { + "epoch": 0.5493333333333333, + "grad_norm": 32.95180892944336, + "learning_rate": 5e-05, + "loss": 1.2208, + "num_input_tokens_seen": 323812420, + "step": 4841 + }, + { + "epoch": 0.5493333333333333, + "loss": 1.026151418685913, + "loss_ce": 0.003690547775477171, + "loss_iou": 0.42578125, + "loss_num": 0.0341796875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 323812420, + "step": 4841 + }, + { + "epoch": 0.5494468085106383, + "grad_norm": 44.16696548461914, + "learning_rate": 5e-05, + "loss": 1.1963, + "num_input_tokens_seen": 323878424, + "step": 4842 + }, + { + "epoch": 0.5494468085106383, + "loss": 1.2501498460769653, + "loss_ce": 0.003079609479755163, + "loss_iou": 0.5234375, + "loss_num": 0.039794921875, + "loss_xval": 1.25, + "num_input_tokens_seen": 323878424, + "step": 4842 + }, + { + "epoch": 0.5495602836879433, + "grad_norm": 21.549196243286133, + "learning_rate": 5e-05, + "loss": 1.2299, + "num_input_tokens_seen": 323945056, + "step": 4843 + }, + { + "epoch": 0.5495602836879433, + "loss": 1.1769170761108398, + "loss_ce": 0.004553844220936298, + "loss_iou": 0.455078125, + "loss_num": 0.05224609375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 323945056, + "step": 4843 + }, + { + "epoch": 0.5496737588652483, + "grad_norm": 24.45783042907715, + "learning_rate": 5e-05, + "loss": 1.0495, + "num_input_tokens_seen": 324013396, + "step": 4844 + }, + { + "epoch": 0.5496737588652483, + "loss": 1.1600439548492432, + "loss_ce": 0.006235363893210888, + "loss_iou": 0.443359375, + "loss_num": 0.053466796875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 324013396, + "step": 4844 + }, + { + "epoch": 0.5497872340425531, + "grad_norm": 38.91545104980469, + "learning_rate": 5e-05, + "loss": 1.174, + "num_input_tokens_seen": 324081284, + "step": 4845 + }, + { + "epoch": 0.5497872340425531, + "loss": 1.143932819366455, + "loss_ce": 0.006237521767616272, + "loss_iou": 0.48046875, + "loss_num": 0.03466796875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 324081284, + "step": 4845 + }, + { + "epoch": 0.5499007092198581, + "grad_norm": 34.84029769897461, + "learning_rate": 5e-05, + "loss": 1.393, + "num_input_tokens_seen": 324148648, + "step": 4846 + }, + { + "epoch": 0.5499007092198581, + "loss": 1.5020519495010376, + "loss_ce": 0.008399629965424538, + "loss_iou": 0.62109375, + "loss_num": 0.050537109375, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 324148648, + "step": 4846 + }, + { + "epoch": 0.5500141843971631, + "grad_norm": 20.976232528686523, + "learning_rate": 5e-05, + "loss": 1.1361, + "num_input_tokens_seen": 324215180, + "step": 4847 + }, + { + "epoch": 0.5500141843971631, + "loss": 0.9861505031585693, + "loss_ce": 0.0051934607326984406, + "loss_iou": 0.376953125, + "loss_num": 0.044921875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 324215180, + "step": 4847 + }, + { + "epoch": 0.5501276595744681, + "grad_norm": 32.298667907714844, + "learning_rate": 5e-05, + "loss": 1.1957, + "num_input_tokens_seen": 324281660, + "step": 4848 + }, + { + "epoch": 0.5501276595744681, + "loss": 1.3103723526000977, + "loss_ce": 0.004220087081193924, + "loss_iou": 0.546875, + "loss_num": 0.042236328125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 324281660, + "step": 4848 + }, + { + "epoch": 0.550241134751773, + "grad_norm": 26.908367156982422, + "learning_rate": 5e-05, + "loss": 1.1333, + "num_input_tokens_seen": 324348068, + "step": 4849 + }, + { + "epoch": 0.550241134751773, + "loss": 0.9879462718963623, + "loss_ce": 0.006745155900716782, + "loss_iou": 0.361328125, + "loss_num": 0.051513671875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 324348068, + "step": 4849 + }, + { + "epoch": 0.550354609929078, + "grad_norm": 68.06443786621094, + "learning_rate": 5e-05, + "loss": 1.2428, + "num_input_tokens_seen": 324415208, + "step": 4850 + }, + { + "epoch": 0.550354609929078, + "loss": 1.1464838981628418, + "loss_ce": 0.0048822928220033646, + "loss_iou": 0.46875, + "loss_num": 0.040771484375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 324415208, + "step": 4850 + }, + { + "epoch": 0.550468085106383, + "grad_norm": 17.377479553222656, + "learning_rate": 5e-05, + "loss": 1.2054, + "num_input_tokens_seen": 324482152, + "step": 4851 + }, + { + "epoch": 0.550468085106383, + "loss": 1.2302958965301514, + "loss_ce": 0.008127948269248009, + "loss_iou": 0.51953125, + "loss_num": 0.037109375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 324482152, + "step": 4851 + }, + { + "epoch": 0.550581560283688, + "grad_norm": 13.390669822692871, + "learning_rate": 5e-05, + "loss": 1.1361, + "num_input_tokens_seen": 324548524, + "step": 4852 + }, + { + "epoch": 0.550581560283688, + "loss": 1.0742591619491577, + "loss_ce": 0.003214287105947733, + "loss_iou": 0.447265625, + "loss_num": 0.035400390625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 324548524, + "step": 4852 + }, + { + "epoch": 0.5506950354609929, + "grad_norm": 15.736796379089355, + "learning_rate": 5e-05, + "loss": 1.2668, + "num_input_tokens_seen": 324616400, + "step": 4853 + }, + { + "epoch": 0.5506950354609929, + "loss": 1.460629940032959, + "loss_ce": 0.0075049614533782005, + "loss_iou": 0.58203125, + "loss_num": 0.05712890625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 324616400, + "step": 4853 + }, + { + "epoch": 0.5508085106382978, + "grad_norm": 20.2037353515625, + "learning_rate": 5e-05, + "loss": 1.2403, + "num_input_tokens_seen": 324682964, + "step": 4854 + }, + { + "epoch": 0.5508085106382978, + "loss": 1.224177598953247, + "loss_ce": 0.007380753755569458, + "loss_iou": 0.478515625, + "loss_num": 0.052001953125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 324682964, + "step": 4854 + }, + { + "epoch": 0.5509219858156028, + "grad_norm": 24.908597946166992, + "learning_rate": 5e-05, + "loss": 1.0327, + "num_input_tokens_seen": 324748988, + "step": 4855 + }, + { + "epoch": 0.5509219858156028, + "loss": 1.162646770477295, + "loss_ce": 0.004443714395165443, + "loss_iou": 0.5078125, + "loss_num": 0.0281982421875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 324748988, + "step": 4855 + }, + { + "epoch": 0.5510354609929078, + "grad_norm": 30.54432487487793, + "learning_rate": 5e-05, + "loss": 1.3029, + "num_input_tokens_seen": 324815984, + "step": 4856 + }, + { + "epoch": 0.5510354609929078, + "loss": 1.3074531555175781, + "loss_ce": 0.012042983435094357, + "loss_iou": 0.53515625, + "loss_num": 0.04443359375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 324815984, + "step": 4856 + }, + { + "epoch": 0.5511489361702128, + "grad_norm": 42.995079040527344, + "learning_rate": 5e-05, + "loss": 1.0813, + "num_input_tokens_seen": 324882692, + "step": 4857 + }, + { + "epoch": 0.5511489361702128, + "loss": 1.1504862308502197, + "loss_ce": 0.009006708860397339, + "loss_iou": 0.455078125, + "loss_num": 0.046630859375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 324882692, + "step": 4857 + }, + { + "epoch": 0.5512624113475177, + "grad_norm": 28.267303466796875, + "learning_rate": 5e-05, + "loss": 1.3999, + "num_input_tokens_seen": 324950608, + "step": 4858 + }, + { + "epoch": 0.5512624113475177, + "loss": 1.4119441509246826, + "loss_ce": 0.007159017026424408, + "loss_iou": 0.55859375, + "loss_num": 0.05810546875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 324950608, + "step": 4858 + }, + { + "epoch": 0.5513758865248227, + "grad_norm": 39.067813873291016, + "learning_rate": 5e-05, + "loss": 1.2528, + "num_input_tokens_seen": 325018624, + "step": 4859 + }, + { + "epoch": 0.5513758865248227, + "loss": 1.382075309753418, + "loss_ce": 0.00463394820690155, + "loss_iou": 0.5234375, + "loss_num": 0.06591796875, + "loss_xval": 1.375, + "num_input_tokens_seen": 325018624, + "step": 4859 + }, + { + "epoch": 0.5514893617021277, + "grad_norm": 24.57083511352539, + "learning_rate": 5e-05, + "loss": 1.5093, + "num_input_tokens_seen": 325085068, + "step": 4860 + }, + { + "epoch": 0.5514893617021277, + "loss": 1.7544817924499512, + "loss_ce": 0.006434886250644922, + "loss_iou": 0.7109375, + "loss_num": 0.0654296875, + "loss_xval": 1.75, + "num_input_tokens_seen": 325085068, + "step": 4860 + }, + { + "epoch": 0.5516028368794327, + "grad_norm": 45.100486755371094, + "learning_rate": 5e-05, + "loss": 1.0021, + "num_input_tokens_seen": 325151832, + "step": 4861 + }, + { + "epoch": 0.5516028368794327, + "loss": 0.9761312007904053, + "loss_ce": 0.00463462108746171, + "loss_iou": 0.40625, + "loss_num": 0.031494140625, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 325151832, + "step": 4861 + }, + { + "epoch": 0.5517163120567375, + "grad_norm": 41.35369110107422, + "learning_rate": 5e-05, + "loss": 1.2665, + "num_input_tokens_seen": 325218912, + "step": 4862 + }, + { + "epoch": 0.5517163120567375, + "loss": 1.282834529876709, + "loss_ce": 0.007932104170322418, + "loss_iou": 0.4921875, + "loss_num": 0.0576171875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 325218912, + "step": 4862 + }, + { + "epoch": 0.5518297872340425, + "grad_norm": 43.8740234375, + "learning_rate": 5e-05, + "loss": 1.2186, + "num_input_tokens_seen": 325286476, + "step": 4863 + }, + { + "epoch": 0.5518297872340425, + "loss": 1.1573357582092285, + "loss_ce": 0.004015483893454075, + "loss_iou": 0.5078125, + "loss_num": 0.0269775390625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 325286476, + "step": 4863 + }, + { + "epoch": 0.5519432624113475, + "grad_norm": 31.615039825439453, + "learning_rate": 5e-05, + "loss": 1.3355, + "num_input_tokens_seen": 325354360, + "step": 4864 + }, + { + "epoch": 0.5519432624113475, + "loss": 1.2480576038360596, + "loss_ce": 0.007334965281188488, + "loss_iou": 0.53125, + "loss_num": 0.03515625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 325354360, + "step": 4864 + }, + { + "epoch": 0.5520567375886525, + "grad_norm": 21.22825813293457, + "learning_rate": 5e-05, + "loss": 1.1649, + "num_input_tokens_seen": 325420924, + "step": 4865 + }, + { + "epoch": 0.5520567375886525, + "loss": 1.370399832725525, + "loss_ce": 0.008583429269492626, + "loss_iou": 0.546875, + "loss_num": 0.054443359375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 325420924, + "step": 4865 + }, + { + "epoch": 0.5521702127659575, + "grad_norm": 27.424440383911133, + "learning_rate": 5e-05, + "loss": 1.0864, + "num_input_tokens_seen": 325488472, + "step": 4866 + }, + { + "epoch": 0.5521702127659575, + "loss": 0.9202736616134644, + "loss_ce": 0.0027931849472224712, + "loss_iou": 0.396484375, + "loss_num": 0.0247802734375, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 325488472, + "step": 4866 + }, + { + "epoch": 0.5522836879432624, + "grad_norm": 31.827369689941406, + "learning_rate": 5e-05, + "loss": 1.4341, + "num_input_tokens_seen": 325554408, + "step": 4867 + }, + { + "epoch": 0.5522836879432624, + "loss": 1.3372728824615479, + "loss_ce": 0.004753367509692907, + "loss_iou": 0.5625, + "loss_num": 0.041748046875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 325554408, + "step": 4867 + }, + { + "epoch": 0.5523971631205674, + "grad_norm": 31.6961727142334, + "learning_rate": 5e-05, + "loss": 1.4931, + "num_input_tokens_seen": 325621140, + "step": 4868 + }, + { + "epoch": 0.5523971631205674, + "loss": 1.3124006986618042, + "loss_ce": 0.005760133266448975, + "loss_iou": 0.53515625, + "loss_num": 0.04638671875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 325621140, + "step": 4868 + }, + { + "epoch": 0.5525106382978724, + "grad_norm": 28.14188003540039, + "learning_rate": 5e-05, + "loss": 1.196, + "num_input_tokens_seen": 325688536, + "step": 4869 + }, + { + "epoch": 0.5525106382978724, + "loss": 1.0877350568771362, + "loss_ce": 0.009610082022845745, + "loss_iou": 0.478515625, + "loss_num": 0.024169921875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 325688536, + "step": 4869 + }, + { + "epoch": 0.5526241134751773, + "grad_norm": 57.48733901977539, + "learning_rate": 5e-05, + "loss": 1.4382, + "num_input_tokens_seen": 325755928, + "step": 4870 + }, + { + "epoch": 0.5526241134751773, + "loss": 1.5930358171463013, + "loss_ce": 0.009051414206624031, + "loss_iou": 0.6171875, + "loss_num": 0.06982421875, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 325755928, + "step": 4870 + }, + { + "epoch": 0.5527375886524822, + "grad_norm": 19.352127075195312, + "learning_rate": 5e-05, + "loss": 1.1368, + "num_input_tokens_seen": 325823684, + "step": 4871 + }, + { + "epoch": 0.5527375886524822, + "loss": 1.1804755926132202, + "loss_ce": 0.007624058984220028, + "loss_iou": 0.470703125, + "loss_num": 0.04638671875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 325823684, + "step": 4871 + }, + { + "epoch": 0.5528510638297872, + "grad_norm": 19.38819694519043, + "learning_rate": 5e-05, + "loss": 1.1646, + "num_input_tokens_seen": 325890368, + "step": 4872 + }, + { + "epoch": 0.5528510638297872, + "loss": 1.24666428565979, + "loss_ce": 0.005621207877993584, + "loss_iou": 0.498046875, + "loss_num": 0.049072265625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 325890368, + "step": 4872 + }, + { + "epoch": 0.5529645390070922, + "grad_norm": 20.40973663330078, + "learning_rate": 5e-05, + "loss": 0.935, + "num_input_tokens_seen": 325956440, + "step": 4873 + }, + { + "epoch": 0.5529645390070922, + "loss": 0.8346619606018066, + "loss_ce": 0.010565283708274364, + "loss_iou": 0.3671875, + "loss_num": 0.0181884765625, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 325956440, + "step": 4873 + }, + { + "epoch": 0.5530780141843972, + "grad_norm": 32.04225540161133, + "learning_rate": 5e-05, + "loss": 1.2081, + "num_input_tokens_seen": 326023168, + "step": 4874 + }, + { + "epoch": 0.5530780141843972, + "loss": 1.148017406463623, + "loss_ce": 0.005927521735429764, + "loss_iou": 0.4921875, + "loss_num": 0.03125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 326023168, + "step": 4874 + }, + { + "epoch": 0.5531914893617021, + "grad_norm": 30.888229370117188, + "learning_rate": 5e-05, + "loss": 1.2061, + "num_input_tokens_seen": 326089360, + "step": 4875 + }, + { + "epoch": 0.5531914893617021, + "loss": 1.3406128883361816, + "loss_ce": 0.0036988863721489906, + "loss_iou": 0.52734375, + "loss_num": 0.055908203125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 326089360, + "step": 4875 + }, + { + "epoch": 0.5533049645390071, + "grad_norm": 25.757047653198242, + "learning_rate": 5e-05, + "loss": 1.2527, + "num_input_tokens_seen": 326156152, + "step": 4876 + }, + { + "epoch": 0.5533049645390071, + "loss": 1.1662644147872925, + "loss_ce": 0.005619896575808525, + "loss_iou": 0.46484375, + "loss_num": 0.0458984375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 326156152, + "step": 4876 + }, + { + "epoch": 0.5534184397163121, + "grad_norm": 34.939361572265625, + "learning_rate": 5e-05, + "loss": 1.0359, + "num_input_tokens_seen": 326221740, + "step": 4877 + }, + { + "epoch": 0.5534184397163121, + "loss": 1.1753151416778564, + "loss_ce": 0.006613994017243385, + "loss_iou": 0.4453125, + "loss_num": 0.055419921875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 326221740, + "step": 4877 + }, + { + "epoch": 0.553531914893617, + "grad_norm": 24.035966873168945, + "learning_rate": 5e-05, + "loss": 1.2722, + "num_input_tokens_seen": 326289684, + "step": 4878 + }, + { + "epoch": 0.553531914893617, + "loss": 1.1824181079864502, + "loss_ce": 0.011031469330191612, + "loss_iou": 0.453125, + "loss_num": 0.053466796875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 326289684, + "step": 4878 + }, + { + "epoch": 0.553645390070922, + "grad_norm": 29.289020538330078, + "learning_rate": 5e-05, + "loss": 1.1465, + "num_input_tokens_seen": 326357184, + "step": 4879 + }, + { + "epoch": 0.553645390070922, + "loss": 1.101100206375122, + "loss_ce": 0.004420542158186436, + "loss_iou": 0.42578125, + "loss_num": 0.048583984375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 326357184, + "step": 4879 + }, + { + "epoch": 0.5537588652482269, + "grad_norm": 304.9296569824219, + "learning_rate": 5e-05, + "loss": 1.4277, + "num_input_tokens_seen": 326424700, + "step": 4880 + }, + { + "epoch": 0.5537588652482269, + "loss": 1.3985345363616943, + "loss_ce": 0.007421337068080902, + "loss_iou": 0.5859375, + "loss_num": 0.0439453125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 326424700, + "step": 4880 + }, + { + "epoch": 0.5538723404255319, + "grad_norm": 54.027591705322266, + "learning_rate": 5e-05, + "loss": 1.1895, + "num_input_tokens_seen": 326491764, + "step": 4881 + }, + { + "epoch": 0.5538723404255319, + "loss": 1.2615382671356201, + "loss_ce": 0.0076319631189107895, + "loss_iou": 0.51171875, + "loss_num": 0.046875, + "loss_xval": 1.25, + "num_input_tokens_seen": 326491764, + "step": 4881 + }, + { + "epoch": 0.5539858156028369, + "grad_norm": 25.84970474243164, + "learning_rate": 5e-05, + "loss": 1.2884, + "num_input_tokens_seen": 326559380, + "step": 4882 + }, + { + "epoch": 0.5539858156028369, + "loss": 1.2070941925048828, + "loss_ce": 0.004457524977624416, + "loss_iou": 0.5390625, + "loss_num": 0.0240478515625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 326559380, + "step": 4882 + }, + { + "epoch": 0.5540992907801419, + "grad_norm": 49.481136322021484, + "learning_rate": 5e-05, + "loss": 1.1838, + "num_input_tokens_seen": 326625692, + "step": 4883 + }, + { + "epoch": 0.5540992907801419, + "loss": 1.1914341449737549, + "loss_ce": 0.006375588476657867, + "loss_iou": 0.50390625, + "loss_num": 0.03466796875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 326625692, + "step": 4883 + }, + { + "epoch": 0.5542127659574468, + "grad_norm": 33.659786224365234, + "learning_rate": 5e-05, + "loss": 1.1492, + "num_input_tokens_seen": 326693648, + "step": 4884 + }, + { + "epoch": 0.5542127659574468, + "loss": 1.322335958480835, + "loss_ce": 0.0064180269837379456, + "loss_iou": 0.50390625, + "loss_num": 0.062255859375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 326693648, + "step": 4884 + }, + { + "epoch": 0.5543262411347518, + "grad_norm": 30.668045043945312, + "learning_rate": 5e-05, + "loss": 1.146, + "num_input_tokens_seen": 326760620, + "step": 4885 + }, + { + "epoch": 0.5543262411347518, + "loss": 1.0356366634368896, + "loss_ce": 0.003898418741300702, + "loss_iou": 0.4453125, + "loss_num": 0.028564453125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 326760620, + "step": 4885 + }, + { + "epoch": 0.5544397163120567, + "grad_norm": 16.91985511779785, + "learning_rate": 5e-05, + "loss": 1.2227, + "num_input_tokens_seen": 326827736, + "step": 4886 + }, + { + "epoch": 0.5544397163120567, + "loss": 1.3730034828186035, + "loss_ce": 0.003984983079135418, + "loss_iou": 0.515625, + "loss_num": 0.06689453125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 326827736, + "step": 4886 + }, + { + "epoch": 0.5545531914893617, + "grad_norm": 20.3210391998291, + "learning_rate": 5e-05, + "loss": 1.1928, + "num_input_tokens_seen": 326894436, + "step": 4887 + }, + { + "epoch": 0.5545531914893617, + "loss": 1.352539300918579, + "loss_ce": 0.012207239866256714, + "loss_iou": 0.50390625, + "loss_num": 0.06689453125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 326894436, + "step": 4887 + }, + { + "epoch": 0.5546666666666666, + "grad_norm": 28.004413604736328, + "learning_rate": 5e-05, + "loss": 1.3433, + "num_input_tokens_seen": 326961312, + "step": 4888 + }, + { + "epoch": 0.5546666666666666, + "loss": 1.3626012802124023, + "loss_ce": 0.008109075017273426, + "loss_iou": 0.51953125, + "loss_num": 0.06396484375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 326961312, + "step": 4888 + }, + { + "epoch": 0.5547801418439716, + "grad_norm": 38.46113586425781, + "learning_rate": 5e-05, + "loss": 1.2844, + "num_input_tokens_seen": 327028084, + "step": 4889 + }, + { + "epoch": 0.5547801418439716, + "loss": 1.1243358850479126, + "loss_ce": 0.010566379874944687, + "loss_iou": 0.4453125, + "loss_num": 0.044189453125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 327028084, + "step": 4889 + }, + { + "epoch": 0.5548936170212766, + "grad_norm": 22.410917282104492, + "learning_rate": 5e-05, + "loss": 1.2296, + "num_input_tokens_seen": 327094684, + "step": 4890 + }, + { + "epoch": 0.5548936170212766, + "loss": 1.244894027709961, + "loss_ce": 0.005147924646735191, + "loss_iou": 0.5078125, + "loss_num": 0.045166015625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 327094684, + "step": 4890 + }, + { + "epoch": 0.5550070921985816, + "grad_norm": 35.71026611328125, + "learning_rate": 5e-05, + "loss": 1.1414, + "num_input_tokens_seen": 327161460, + "step": 4891 + }, + { + "epoch": 0.5550070921985816, + "loss": 1.3471589088439941, + "loss_ce": 0.005362105090171099, + "loss_iou": 0.55078125, + "loss_num": 0.048828125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 327161460, + "step": 4891 + }, + { + "epoch": 0.5551205673758866, + "grad_norm": 30.544553756713867, + "learning_rate": 5e-05, + "loss": 1.2347, + "num_input_tokens_seen": 327227304, + "step": 4892 + }, + { + "epoch": 0.5551205673758866, + "loss": 1.2615305185317993, + "loss_ce": 0.00371801620349288, + "loss_iou": 0.53515625, + "loss_num": 0.037841796875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 327227304, + "step": 4892 + }, + { + "epoch": 0.5552340425531915, + "grad_norm": 26.211193084716797, + "learning_rate": 5e-05, + "loss": 1.4728, + "num_input_tokens_seen": 327295132, + "step": 4893 + }, + { + "epoch": 0.5552340425531915, + "loss": 1.4961729049682617, + "loss_ce": 0.010821318253874779, + "loss_iou": 0.609375, + "loss_num": 0.05224609375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 327295132, + "step": 4893 + }, + { + "epoch": 0.5553475177304965, + "grad_norm": 21.967811584472656, + "learning_rate": 5e-05, + "loss": 1.0835, + "num_input_tokens_seen": 327362052, + "step": 4894 + }, + { + "epoch": 0.5553475177304965, + "loss": 1.1525533199310303, + "loss_ce": 0.007045449689030647, + "loss_iou": 0.458984375, + "loss_num": 0.04541015625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 327362052, + "step": 4894 + }, + { + "epoch": 0.5554609929078014, + "grad_norm": 15.629730224609375, + "learning_rate": 5e-05, + "loss": 1.0281, + "num_input_tokens_seen": 327428552, + "step": 4895 + }, + { + "epoch": 0.5554609929078014, + "loss": 1.0818918943405151, + "loss_ce": 0.005720058921724558, + "loss_iou": 0.447265625, + "loss_num": 0.036376953125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 327428552, + "step": 4895 + }, + { + "epoch": 0.5555744680851064, + "grad_norm": 14.752900123596191, + "learning_rate": 5e-05, + "loss": 1.135, + "num_input_tokens_seen": 327495576, + "step": 4896 + }, + { + "epoch": 0.5555744680851064, + "loss": 1.1015275716781616, + "loss_ce": 0.00435954425483942, + "loss_iou": 0.482421875, + "loss_num": 0.0264892578125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 327495576, + "step": 4896 + }, + { + "epoch": 0.5556879432624113, + "grad_norm": 34.58833312988281, + "learning_rate": 5e-05, + "loss": 1.2309, + "num_input_tokens_seen": 327562932, + "step": 4897 + }, + { + "epoch": 0.5556879432624113, + "loss": 1.193885326385498, + "loss_ce": 0.004432239104062319, + "loss_iou": 0.4609375, + "loss_num": 0.05322265625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 327562932, + "step": 4897 + }, + { + "epoch": 0.5558014184397163, + "grad_norm": 47.41596984863281, + "learning_rate": 5e-05, + "loss": 1.1697, + "num_input_tokens_seen": 327630120, + "step": 4898 + }, + { + "epoch": 0.5558014184397163, + "loss": 1.125788927078247, + "loss_ce": 0.006160055752843618, + "loss_iou": 0.46875, + "loss_num": 0.0361328125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 327630120, + "step": 4898 + }, + { + "epoch": 0.5559148936170213, + "grad_norm": 19.69085693359375, + "learning_rate": 5e-05, + "loss": 1.4684, + "num_input_tokens_seen": 327696292, + "step": 4899 + }, + { + "epoch": 0.5559148936170213, + "loss": 1.2032148838043213, + "loss_ce": 0.010343756526708603, + "loss_iou": 0.46875, + "loss_num": 0.051513671875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 327696292, + "step": 4899 + }, + { + "epoch": 0.5560283687943263, + "grad_norm": 22.07318115234375, + "learning_rate": 5e-05, + "loss": 1.2895, + "num_input_tokens_seen": 327763200, + "step": 4900 + }, + { + "epoch": 0.5560283687943263, + "loss": 1.3268612623214722, + "loss_ce": 0.005572214722633362, + "loss_iou": 0.515625, + "loss_num": 0.057861328125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 327763200, + "step": 4900 + }, + { + "epoch": 0.5561418439716312, + "grad_norm": 28.534927368164062, + "learning_rate": 5e-05, + "loss": 1.1031, + "num_input_tokens_seen": 327828944, + "step": 4901 + }, + { + "epoch": 0.5561418439716312, + "loss": 1.1230030059814453, + "loss_ce": 0.006792023312300444, + "loss_iou": 0.48046875, + "loss_num": 0.0311279296875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 327828944, + "step": 4901 + }, + { + "epoch": 0.5562553191489362, + "grad_norm": 30.272197723388672, + "learning_rate": 5e-05, + "loss": 1.2531, + "num_input_tokens_seen": 327897200, + "step": 4902 + }, + { + "epoch": 0.5562553191489362, + "loss": 1.2519745826721191, + "loss_ce": 0.008322182111442089, + "loss_iou": 0.5078125, + "loss_num": 0.046142578125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 327897200, + "step": 4902 + }, + { + "epoch": 0.5563687943262411, + "grad_norm": 34.938720703125, + "learning_rate": 5e-05, + "loss": 1.0396, + "num_input_tokens_seen": 327963708, + "step": 4903 + }, + { + "epoch": 0.5563687943262411, + "loss": 1.0810601711273193, + "loss_ce": 0.00842827744781971, + "loss_iou": 0.439453125, + "loss_num": 0.0390625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 327963708, + "step": 4903 + }, + { + "epoch": 0.5564822695035461, + "grad_norm": 26.336694717407227, + "learning_rate": 5e-05, + "loss": 1.3724, + "num_input_tokens_seen": 328030404, + "step": 4904 + }, + { + "epoch": 0.5564822695035461, + "loss": 1.4500830173492432, + "loss_ce": 0.00672368798404932, + "loss_iou": 0.59765625, + "loss_num": 0.0498046875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 328030404, + "step": 4904 + }, + { + "epoch": 0.556595744680851, + "grad_norm": 19.156496047973633, + "learning_rate": 5e-05, + "loss": 1.0581, + "num_input_tokens_seen": 328097460, + "step": 4905 + }, + { + "epoch": 0.556595744680851, + "loss": 1.0044851303100586, + "loss_ce": 0.0064382050186395645, + "loss_iou": 0.41015625, + "loss_num": 0.03515625, + "loss_xval": 1.0, + "num_input_tokens_seen": 328097460, + "step": 4905 + }, + { + "epoch": 0.556709219858156, + "grad_norm": 27.953222274780273, + "learning_rate": 5e-05, + "loss": 1.1435, + "num_input_tokens_seen": 328165076, + "step": 4906 + }, + { + "epoch": 0.556709219858156, + "loss": 1.2165849208831787, + "loss_ce": 0.007112310267984867, + "loss_iou": 0.484375, + "loss_num": 0.0478515625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 328165076, + "step": 4906 + }, + { + "epoch": 0.556822695035461, + "grad_norm": 35.69458770751953, + "learning_rate": 5e-05, + "loss": 1.1951, + "num_input_tokens_seen": 328232336, + "step": 4907 + }, + { + "epoch": 0.556822695035461, + "loss": 1.1366339921951294, + "loss_ce": 0.00382147915661335, + "loss_iou": 0.470703125, + "loss_num": 0.0380859375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 328232336, + "step": 4907 + }, + { + "epoch": 0.556936170212766, + "grad_norm": 27.087739944458008, + "learning_rate": 5e-05, + "loss": 1.4182, + "num_input_tokens_seen": 328299464, + "step": 4908 + }, + { + "epoch": 0.556936170212766, + "loss": 1.3731343746185303, + "loss_ce": 0.004970278590917587, + "loss_iou": 0.5625, + "loss_num": 0.048095703125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 328299464, + "step": 4908 + }, + { + "epoch": 0.557049645390071, + "grad_norm": 24.63889503479004, + "learning_rate": 5e-05, + "loss": 1.244, + "num_input_tokens_seen": 328366820, + "step": 4909 + }, + { + "epoch": 0.557049645390071, + "loss": 1.2794160842895508, + "loss_ce": 0.004513746127486229, + "loss_iou": 0.470703125, + "loss_num": 0.06640625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 328366820, + "step": 4909 + }, + { + "epoch": 0.5571631205673759, + "grad_norm": 71.44771575927734, + "learning_rate": 5e-05, + "loss": 1.1352, + "num_input_tokens_seen": 328434320, + "step": 4910 + }, + { + "epoch": 0.5571631205673759, + "loss": 0.9830795526504517, + "loss_ce": 0.007066353224217892, + "loss_iou": 0.4140625, + "loss_num": 0.0296630859375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 328434320, + "step": 4910 + }, + { + "epoch": 0.5572765957446808, + "grad_norm": 40.15235900878906, + "learning_rate": 5e-05, + "loss": 1.1665, + "num_input_tokens_seen": 328500812, + "step": 4911 + }, + { + "epoch": 0.5572765957446808, + "loss": 1.2563934326171875, + "loss_ce": 0.0049285669811069965, + "loss_iou": 0.5, + "loss_num": 0.05029296875, + "loss_xval": 1.25, + "num_input_tokens_seen": 328500812, + "step": 4911 + }, + { + "epoch": 0.5573900709219858, + "grad_norm": 42.40393829345703, + "learning_rate": 5e-05, + "loss": 1.3266, + "num_input_tokens_seen": 328566844, + "step": 4912 + }, + { + "epoch": 0.5573900709219858, + "loss": 1.3809901475906372, + "loss_ce": 0.004525251220911741, + "loss_iou": 0.55859375, + "loss_num": 0.051513671875, + "loss_xval": 1.375, + "num_input_tokens_seen": 328566844, + "step": 4912 + }, + { + "epoch": 0.5575035460992908, + "grad_norm": 20.28927993774414, + "learning_rate": 5e-05, + "loss": 1.3407, + "num_input_tokens_seen": 328633988, + "step": 4913 + }, + { + "epoch": 0.5575035460992908, + "loss": 1.1137956380844116, + "loss_ce": 0.004908881615847349, + "loss_iou": 0.453125, + "loss_num": 0.040771484375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 328633988, + "step": 4913 + }, + { + "epoch": 0.5576170212765957, + "grad_norm": 22.578060150146484, + "learning_rate": 5e-05, + "loss": 1.2439, + "num_input_tokens_seen": 328701280, + "step": 4914 + }, + { + "epoch": 0.5576170212765957, + "loss": 1.3040344715118408, + "loss_ce": 0.007647767197340727, + "loss_iou": 0.5625, + "loss_num": 0.034423828125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 328701280, + "step": 4914 + }, + { + "epoch": 0.5577304964539007, + "grad_norm": 31.764896392822266, + "learning_rate": 5e-05, + "loss": 1.3421, + "num_input_tokens_seen": 328767768, + "step": 4915 + }, + { + "epoch": 0.5577304964539007, + "loss": 1.2779443264007568, + "loss_ce": 0.006459878291934729, + "loss_iou": 0.546875, + "loss_num": 0.035400390625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 328767768, + "step": 4915 + }, + { + "epoch": 0.5578439716312057, + "grad_norm": 41.94508743286133, + "learning_rate": 5e-05, + "loss": 1.2077, + "num_input_tokens_seen": 328834812, + "step": 4916 + }, + { + "epoch": 0.5578439716312057, + "loss": 1.1098647117614746, + "loss_ce": 0.0048842946998775005, + "loss_iou": 0.494140625, + "loss_num": 0.0233154296875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 328834812, + "step": 4916 + }, + { + "epoch": 0.5579574468085107, + "grad_norm": 22.989151000976562, + "learning_rate": 5e-05, + "loss": 1.2074, + "num_input_tokens_seen": 328901064, + "step": 4917 + }, + { + "epoch": 0.5579574468085107, + "loss": 1.2258522510528564, + "loss_ce": 0.006186760030686855, + "loss_iou": 0.4921875, + "loss_num": 0.04736328125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 328901064, + "step": 4917 + }, + { + "epoch": 0.5580709219858156, + "grad_norm": 35.06822204589844, + "learning_rate": 5e-05, + "loss": 1.1569, + "num_input_tokens_seen": 328967608, + "step": 4918 + }, + { + "epoch": 0.5580709219858156, + "loss": 1.2829246520996094, + "loss_ce": 0.005092648323625326, + "loss_iou": 0.494140625, + "loss_num": 0.05810546875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 328967608, + "step": 4918 + }, + { + "epoch": 0.5581843971631205, + "grad_norm": 21.757587432861328, + "learning_rate": 5e-05, + "loss": 1.2213, + "num_input_tokens_seen": 329034840, + "step": 4919 + }, + { + "epoch": 0.5581843971631205, + "loss": 1.239227533340454, + "loss_ce": 0.008758872747421265, + "loss_iou": 0.53125, + "loss_num": 0.033203125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 329034840, + "step": 4919 + }, + { + "epoch": 0.5582978723404255, + "grad_norm": 32.247596740722656, + "learning_rate": 5e-05, + "loss": 1.3487, + "num_input_tokens_seen": 329102692, + "step": 4920 + }, + { + "epoch": 0.5582978723404255, + "loss": 1.471796989440918, + "loss_ce": 0.004023538436740637, + "loss_iou": 0.5859375, + "loss_num": 0.05859375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 329102692, + "step": 4920 + }, + { + "epoch": 0.5584113475177305, + "grad_norm": 30.14851188659668, + "learning_rate": 5e-05, + "loss": 1.0924, + "num_input_tokens_seen": 329168876, + "step": 4921 + }, + { + "epoch": 0.5584113475177305, + "loss": 0.9283328056335449, + "loss_ce": 0.007922609336674213, + "loss_iou": 0.37890625, + "loss_num": 0.032470703125, + "loss_xval": 0.921875, + "num_input_tokens_seen": 329168876, + "step": 4921 + }, + { + "epoch": 0.5585248226950354, + "grad_norm": 29.027015686035156, + "learning_rate": 5e-05, + "loss": 1.2951, + "num_input_tokens_seen": 329235688, + "step": 4922 + }, + { + "epoch": 0.5585248226950354, + "loss": 1.313666820526123, + "loss_ce": 0.005073062609881163, + "loss_iou": 0.5390625, + "loss_num": 0.045654296875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 329235688, + "step": 4922 + }, + { + "epoch": 0.5586382978723404, + "grad_norm": 38.775630950927734, + "learning_rate": 5e-05, + "loss": 1.1903, + "num_input_tokens_seen": 329301584, + "step": 4923 + }, + { + "epoch": 0.5586382978723404, + "loss": 1.2160162925720215, + "loss_ce": 0.0021490941289812326, + "loss_iou": 0.486328125, + "loss_num": 0.0478515625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 329301584, + "step": 4923 + }, + { + "epoch": 0.5587517730496454, + "grad_norm": 37.9276237487793, + "learning_rate": 5e-05, + "loss": 1.4063, + "num_input_tokens_seen": 329368000, + "step": 4924 + }, + { + "epoch": 0.5587517730496454, + "loss": 1.1861025094985962, + "loss_ce": 0.006903289817273617, + "loss_iou": 0.515625, + "loss_num": 0.029296875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 329368000, + "step": 4924 + }, + { + "epoch": 0.5588652482269504, + "grad_norm": 18.85363006591797, + "learning_rate": 5e-05, + "loss": 1.201, + "num_input_tokens_seen": 329434136, + "step": 4925 + }, + { + "epoch": 0.5588652482269504, + "loss": 1.386427879333496, + "loss_ce": 0.004103758372366428, + "loss_iou": 0.55078125, + "loss_num": 0.056640625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 329434136, + "step": 4925 + }, + { + "epoch": 0.5589787234042554, + "grad_norm": 25.254364013671875, + "learning_rate": 5e-05, + "loss": 0.9745, + "num_input_tokens_seen": 329500056, + "step": 4926 + }, + { + "epoch": 0.5589787234042554, + "loss": 0.7796745896339417, + "loss_ce": 0.003185349516570568, + "loss_iou": 0.337890625, + "loss_num": 0.0196533203125, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 329500056, + "step": 4926 + }, + { + "epoch": 0.5590921985815602, + "grad_norm": 28.172658920288086, + "learning_rate": 5e-05, + "loss": 1.2831, + "num_input_tokens_seen": 329568336, + "step": 4927 + }, + { + "epoch": 0.5590921985815602, + "loss": 1.0425609350204468, + "loss_ce": 0.006916423328220844, + "loss_iou": 0.443359375, + "loss_num": 0.0296630859375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 329568336, + "step": 4927 + }, + { + "epoch": 0.5592056737588652, + "grad_norm": 40.7171630859375, + "learning_rate": 5e-05, + "loss": 1.1511, + "num_input_tokens_seen": 329635040, + "step": 4928 + }, + { + "epoch": 0.5592056737588652, + "loss": 1.0260343551635742, + "loss_ce": 0.008944569155573845, + "loss_iou": 0.392578125, + "loss_num": 0.046142578125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 329635040, + "step": 4928 + }, + { + "epoch": 0.5593191489361702, + "grad_norm": 30.061847686767578, + "learning_rate": 5e-05, + "loss": 1.0572, + "num_input_tokens_seen": 329700752, + "step": 4929 + }, + { + "epoch": 0.5593191489361702, + "loss": 1.1650774478912354, + "loss_ce": 0.00467707309871912, + "loss_iou": 0.4765625, + "loss_num": 0.04150390625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 329700752, + "step": 4929 + }, + { + "epoch": 0.5594326241134752, + "grad_norm": 24.917905807495117, + "learning_rate": 5e-05, + "loss": 1.1922, + "num_input_tokens_seen": 329767696, + "step": 4930 + }, + { + "epoch": 0.5594326241134752, + "loss": 1.2102532386779785, + "loss_ce": 0.004198647104203701, + "loss_iou": 0.515625, + "loss_num": 0.03515625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 329767696, + "step": 4930 + }, + { + "epoch": 0.5595460992907801, + "grad_norm": 16.508800506591797, + "learning_rate": 5e-05, + "loss": 1.2022, + "num_input_tokens_seen": 329835832, + "step": 4931 + }, + { + "epoch": 0.5595460992907801, + "loss": 1.0897183418273926, + "loss_ce": 0.003780816914513707, + "loss_iou": 0.462890625, + "loss_num": 0.03173828125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 329835832, + "step": 4931 + }, + { + "epoch": 0.5596595744680851, + "grad_norm": 25.30193328857422, + "learning_rate": 5e-05, + "loss": 1.2017, + "num_input_tokens_seen": 329902116, + "step": 4932 + }, + { + "epoch": 0.5596595744680851, + "loss": 1.2222872972488403, + "loss_ce": 0.005978715606033802, + "loss_iou": 0.48046875, + "loss_num": 0.05078125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 329902116, + "step": 4932 + }, + { + "epoch": 0.5597730496453901, + "grad_norm": 33.986175537109375, + "learning_rate": 5e-05, + "loss": 1.2939, + "num_input_tokens_seen": 329969436, + "step": 4933 + }, + { + "epoch": 0.5597730496453901, + "loss": 1.5235711336135864, + "loss_ce": 0.012340633198618889, + "loss_iou": 0.58203125, + "loss_num": 0.06884765625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 329969436, + "step": 4933 + }, + { + "epoch": 0.5598865248226951, + "grad_norm": 26.823810577392578, + "learning_rate": 5e-05, + "loss": 1.317, + "num_input_tokens_seen": 330036028, + "step": 4934 + }, + { + "epoch": 0.5598865248226951, + "loss": 1.2902030944824219, + "loss_ce": 0.005535168573260307, + "loss_iou": 0.51171875, + "loss_num": 0.051513671875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 330036028, + "step": 4934 + }, + { + "epoch": 0.56, + "grad_norm": 25.64900779724121, + "learning_rate": 5e-05, + "loss": 1.4006, + "num_input_tokens_seen": 330102868, + "step": 4935 + }, + { + "epoch": 0.56, + "loss": 1.0748720169067383, + "loss_ce": 0.00700088357552886, + "loss_iou": 0.4296875, + "loss_num": 0.041259765625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 330102868, + "step": 4935 + }, + { + "epoch": 0.5601134751773049, + "grad_norm": 34.54188919067383, + "learning_rate": 5e-05, + "loss": 1.3188, + "num_input_tokens_seen": 330170384, + "step": 4936 + }, + { + "epoch": 0.5601134751773049, + "loss": 1.2870068550109863, + "loss_ce": 0.005268495995551348, + "loss_iou": 0.5390625, + "loss_num": 0.041015625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 330170384, + "step": 4936 + }, + { + "epoch": 0.5602269503546099, + "grad_norm": 36.64124298095703, + "learning_rate": 5e-05, + "loss": 1.2387, + "num_input_tokens_seen": 330237640, + "step": 4937 + }, + { + "epoch": 0.5602269503546099, + "loss": 1.2341891527175903, + "loss_ce": 0.00665014423429966, + "loss_iou": 0.5234375, + "loss_num": 0.035888671875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 330237640, + "step": 4937 + }, + { + "epoch": 0.5603404255319149, + "grad_norm": 28.40440559387207, + "learning_rate": 5e-05, + "loss": 1.6019, + "num_input_tokens_seen": 330304488, + "step": 4938 + }, + { + "epoch": 0.5603404255319149, + "loss": 1.542630672454834, + "loss_ce": 0.0055213551968336105, + "loss_iou": 0.61328125, + "loss_num": 0.0615234375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 330304488, + "step": 4938 + }, + { + "epoch": 0.5604539007092199, + "grad_norm": 27.66460609436035, + "learning_rate": 5e-05, + "loss": 1.0071, + "num_input_tokens_seen": 330370424, + "step": 4939 + }, + { + "epoch": 0.5604539007092199, + "loss": 0.9918259382247925, + "loss_ce": 0.009831327944993973, + "loss_iou": 0.39453125, + "loss_num": 0.038330078125, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 330370424, + "step": 4939 + }, + { + "epoch": 0.5605673758865248, + "grad_norm": 31.102813720703125, + "learning_rate": 5e-05, + "loss": 1.1028, + "num_input_tokens_seen": 330437176, + "step": 4940 + }, + { + "epoch": 0.5605673758865248, + "loss": 1.0175763368606567, + "loss_ce": 0.0041486164554953575, + "loss_iou": 0.4140625, + "loss_num": 0.037109375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 330437176, + "step": 4940 + }, + { + "epoch": 0.5606808510638298, + "grad_norm": 37.41473388671875, + "learning_rate": 5e-05, + "loss": 1.3703, + "num_input_tokens_seen": 330503516, + "step": 4941 + }, + { + "epoch": 0.5606808510638298, + "loss": 1.2656068801879883, + "loss_ce": 0.008282668888568878, + "loss_iou": 0.546875, + "loss_num": 0.033203125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 330503516, + "step": 4941 + }, + { + "epoch": 0.5607943262411348, + "grad_norm": 33.703853607177734, + "learning_rate": 5e-05, + "loss": 1.2305, + "num_input_tokens_seen": 330570232, + "step": 4942 + }, + { + "epoch": 0.5607943262411348, + "loss": 1.4899898767471313, + "loss_ce": 0.008544562384486198, + "loss_iou": 0.59765625, + "loss_num": 0.0576171875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 330570232, + "step": 4942 + }, + { + "epoch": 0.5609078014184398, + "grad_norm": 17.161972045898438, + "learning_rate": 5e-05, + "loss": 1.0873, + "num_input_tokens_seen": 330636520, + "step": 4943 + }, + { + "epoch": 0.5609078014184398, + "loss": 1.0980396270751953, + "loss_ce": 0.007707544602453709, + "loss_iou": 0.4375, + "loss_num": 0.042724609375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 330636520, + "step": 4943 + }, + { + "epoch": 0.5610212765957446, + "grad_norm": 19.27989387512207, + "learning_rate": 5e-05, + "loss": 1.1428, + "num_input_tokens_seen": 330702456, + "step": 4944 + }, + { + "epoch": 0.5610212765957446, + "loss": 1.12716543674469, + "loss_ce": 0.007780636660754681, + "loss_iou": 0.427734375, + "loss_num": 0.052978515625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 330702456, + "step": 4944 + }, + { + "epoch": 0.5611347517730496, + "grad_norm": 22.505197525024414, + "learning_rate": 5e-05, + "loss": 1.2204, + "num_input_tokens_seen": 330768668, + "step": 4945 + }, + { + "epoch": 0.5611347517730496, + "loss": 1.3937580585479736, + "loss_ce": 0.008504185825586319, + "loss_iou": 0.5546875, + "loss_num": 0.0546875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 330768668, + "step": 4945 + }, + { + "epoch": 0.5612482269503546, + "grad_norm": 39.65045166015625, + "learning_rate": 5e-05, + "loss": 1.3905, + "num_input_tokens_seen": 330837020, + "step": 4946 + }, + { + "epoch": 0.5612482269503546, + "loss": 1.2254364490509033, + "loss_ce": 0.007174873724579811, + "loss_iou": 0.50390625, + "loss_num": 0.041259765625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 330837020, + "step": 4946 + }, + { + "epoch": 0.5613617021276596, + "grad_norm": 33.53462600708008, + "learning_rate": 5e-05, + "loss": 1.2309, + "num_input_tokens_seen": 330902648, + "step": 4947 + }, + { + "epoch": 0.5613617021276596, + "loss": 1.3017182350158691, + "loss_ce": 0.006308054551482201, + "loss_iou": 0.49609375, + "loss_num": 0.060791015625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 330902648, + "step": 4947 + }, + { + "epoch": 0.5614751773049645, + "grad_norm": 41.0725212097168, + "learning_rate": 5e-05, + "loss": 1.3976, + "num_input_tokens_seen": 330969832, + "step": 4948 + }, + { + "epoch": 0.5614751773049645, + "loss": 1.293075680732727, + "loss_ce": 0.008407686837017536, + "loss_iou": 0.55078125, + "loss_num": 0.0361328125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 330969832, + "step": 4948 + }, + { + "epoch": 0.5615886524822695, + "grad_norm": 26.775068283081055, + "learning_rate": 5e-05, + "loss": 1.269, + "num_input_tokens_seen": 331036528, + "step": 4949 + }, + { + "epoch": 0.5615886524822695, + "loss": 1.3265962600708008, + "loss_ce": 0.006283783353865147, + "loss_iou": 0.578125, + "loss_num": 0.03271484375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 331036528, + "step": 4949 + }, + { + "epoch": 0.5617021276595745, + "grad_norm": 16.511520385742188, + "learning_rate": 5e-05, + "loss": 1.0773, + "num_input_tokens_seen": 331103408, + "step": 4950 + }, + { + "epoch": 0.5617021276595745, + "loss": 1.1244699954986572, + "loss_ce": 0.007282499689608812, + "loss_iou": 0.466796875, + "loss_num": 0.036376953125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 331103408, + "step": 4950 + }, + { + "epoch": 0.5618156028368795, + "grad_norm": 106.78459930419922, + "learning_rate": 5e-05, + "loss": 1.1651, + "num_input_tokens_seen": 331170968, + "step": 4951 + }, + { + "epoch": 0.5618156028368795, + "loss": 1.1846166849136353, + "loss_ce": 0.0063940053805708885, + "loss_iou": 0.462890625, + "loss_num": 0.05078125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 331170968, + "step": 4951 + }, + { + "epoch": 0.5619290780141843, + "grad_norm": 29.864023208618164, + "learning_rate": 5e-05, + "loss": 1.2429, + "num_input_tokens_seen": 331238584, + "step": 4952 + }, + { + "epoch": 0.5619290780141843, + "loss": 1.1954371929168701, + "loss_ce": 0.005984053481370211, + "loss_iou": 0.5, + "loss_num": 0.03759765625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 331238584, + "step": 4952 + }, + { + "epoch": 0.5620425531914893, + "grad_norm": 35.55009078979492, + "learning_rate": 5e-05, + "loss": 1.0917, + "num_input_tokens_seen": 331304924, + "step": 4953 + }, + { + "epoch": 0.5620425531914893, + "loss": 1.1070060729980469, + "loss_ce": 0.004466980695724487, + "loss_iou": 0.48046875, + "loss_num": 0.0286865234375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 331304924, + "step": 4953 + }, + { + "epoch": 0.5621560283687943, + "grad_norm": 42.69620895385742, + "learning_rate": 5e-05, + "loss": 1.3076, + "num_input_tokens_seen": 331372820, + "step": 4954 + }, + { + "epoch": 0.5621560283687943, + "loss": 1.339268684387207, + "loss_ce": 0.004796033725142479, + "loss_iou": 0.5625, + "loss_num": 0.042236328125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 331372820, + "step": 4954 + }, + { + "epoch": 0.5622695035460993, + "grad_norm": 29.22479248046875, + "learning_rate": 5e-05, + "loss": 1.4941, + "num_input_tokens_seen": 331439492, + "step": 4955 + }, + { + "epoch": 0.5622695035460993, + "loss": 1.451538324356079, + "loss_ce": 0.007202373817563057, + "loss_iou": 0.59375, + "loss_num": 0.05029296875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 331439492, + "step": 4955 + }, + { + "epoch": 0.5623829787234043, + "grad_norm": 32.71905517578125, + "learning_rate": 5e-05, + "loss": 1.1507, + "num_input_tokens_seen": 331506740, + "step": 4956 + }, + { + "epoch": 0.5623829787234043, + "loss": 1.0423325300216675, + "loss_ce": 0.0037582977674901485, + "loss_iou": 0.435546875, + "loss_num": 0.03369140625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 331506740, + "step": 4956 + }, + { + "epoch": 0.5624964539007092, + "grad_norm": 17.48735809326172, + "learning_rate": 5e-05, + "loss": 1.0336, + "num_input_tokens_seen": 331573600, + "step": 4957 + }, + { + "epoch": 0.5624964539007092, + "loss": 0.8447220325469971, + "loss_ce": 0.00243689538910985, + "loss_iou": 0.326171875, + "loss_num": 0.0380859375, + "loss_xval": 0.84375, + "num_input_tokens_seen": 331573600, + "step": 4957 + }, + { + "epoch": 0.5626099290780142, + "grad_norm": 23.51801109313965, + "learning_rate": 5e-05, + "loss": 1.1466, + "num_input_tokens_seen": 331640100, + "step": 4958 + }, + { + "epoch": 0.5626099290780142, + "loss": 1.2463912963867188, + "loss_ce": 0.0037154622841626406, + "loss_iou": 0.48828125, + "loss_num": 0.05322265625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 331640100, + "step": 4958 + }, + { + "epoch": 0.5627234042553192, + "grad_norm": 32.678096771240234, + "learning_rate": 5e-05, + "loss": 1.4146, + "num_input_tokens_seen": 331706740, + "step": 4959 + }, + { + "epoch": 0.5627234042553192, + "loss": 1.4207631349563599, + "loss_ce": 0.006212349981069565, + "loss_iou": 0.5859375, + "loss_num": 0.048095703125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 331706740, + "step": 4959 + }, + { + "epoch": 0.5628368794326241, + "grad_norm": 49.65266036987305, + "learning_rate": 5e-05, + "loss": 1.209, + "num_input_tokens_seen": 331773308, + "step": 4960 + }, + { + "epoch": 0.5628368794326241, + "loss": 1.2399544715881348, + "loss_ce": 0.014368472620844841, + "loss_iou": 0.5, + "loss_num": 0.044921875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 331773308, + "step": 4960 + }, + { + "epoch": 0.562950354609929, + "grad_norm": 58.08481216430664, + "learning_rate": 5e-05, + "loss": 1.3109, + "num_input_tokens_seen": 331840664, + "step": 4961 + }, + { + "epoch": 0.562950354609929, + "loss": 1.0889474153518677, + "loss_ce": 0.0039863986894488335, + "loss_iou": 0.482421875, + "loss_num": 0.024169921875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 331840664, + "step": 4961 + }, + { + "epoch": 0.563063829787234, + "grad_norm": 44.62742233276367, + "learning_rate": 5e-05, + "loss": 1.2457, + "num_input_tokens_seen": 331907520, + "step": 4962 + }, + { + "epoch": 0.563063829787234, + "loss": 1.036478877067566, + "loss_ce": 0.002787410281598568, + "loss_iou": 0.431640625, + "loss_num": 0.034423828125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 331907520, + "step": 4962 + }, + { + "epoch": 0.563177304964539, + "grad_norm": 20.2059383392334, + "learning_rate": 5e-05, + "loss": 1.0244, + "num_input_tokens_seen": 331975148, + "step": 4963 + }, + { + "epoch": 0.563177304964539, + "loss": 1.1129601001739502, + "loss_ce": 0.00895619299262762, + "loss_iou": 0.453125, + "loss_num": 0.03955078125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 331975148, + "step": 4963 + }, + { + "epoch": 0.563290780141844, + "grad_norm": 20.14992332458496, + "learning_rate": 5e-05, + "loss": 1.2737, + "num_input_tokens_seen": 332043248, + "step": 4964 + }, + { + "epoch": 0.563290780141844, + "loss": 1.4291870594024658, + "loss_ce": 0.008288596756756306, + "loss_iou": 0.546875, + "loss_num": 0.06494140625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 332043248, + "step": 4964 + }, + { + "epoch": 0.563404255319149, + "grad_norm": 35.19414138793945, + "learning_rate": 5e-05, + "loss": 1.4679, + "num_input_tokens_seen": 332110100, + "step": 4965 + }, + { + "epoch": 0.563404255319149, + "loss": 1.3867464065551758, + "loss_ce": 0.0068636126816272736, + "loss_iou": 0.54296875, + "loss_num": 0.059326171875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 332110100, + "step": 4965 + }, + { + "epoch": 0.5635177304964539, + "grad_norm": 24.911476135253906, + "learning_rate": 5e-05, + "loss": 1.2935, + "num_input_tokens_seen": 332177356, + "step": 4966 + }, + { + "epoch": 0.5635177304964539, + "loss": 1.2056910991668701, + "loss_ce": 0.005007428117096424, + "loss_iou": 0.5234375, + "loss_num": 0.03173828125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 332177356, + "step": 4966 + }, + { + "epoch": 0.5636312056737589, + "grad_norm": 25.485549926757812, + "learning_rate": 5e-05, + "loss": 1.1787, + "num_input_tokens_seen": 332245196, + "step": 4967 + }, + { + "epoch": 0.5636312056737589, + "loss": 0.9875553846359253, + "loss_ce": 0.005377617664635181, + "loss_iou": 0.412109375, + "loss_num": 0.03125, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 332245196, + "step": 4967 + }, + { + "epoch": 0.5637446808510639, + "grad_norm": 14.673656463623047, + "learning_rate": 5e-05, + "loss": 1.1645, + "num_input_tokens_seen": 332312404, + "step": 4968 + }, + { + "epoch": 0.5637446808510639, + "loss": 1.2871978282928467, + "loss_ce": 0.006924474611878395, + "loss_iou": 0.494140625, + "loss_num": 0.058349609375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 332312404, + "step": 4968 + }, + { + "epoch": 0.5638581560283688, + "grad_norm": 30.741161346435547, + "learning_rate": 5e-05, + "loss": 1.127, + "num_input_tokens_seen": 332379860, + "step": 4969 + }, + { + "epoch": 0.5638581560283688, + "loss": 1.1974912881851196, + "loss_ce": 0.00608500000089407, + "loss_iou": 0.4609375, + "loss_num": 0.053466796875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 332379860, + "step": 4969 + }, + { + "epoch": 0.5639716312056737, + "grad_norm": 53.94024658203125, + "learning_rate": 5e-05, + "loss": 1.3737, + "num_input_tokens_seen": 332447048, + "step": 4970 + }, + { + "epoch": 0.5639716312056737, + "loss": 1.2859994173049927, + "loss_ce": 0.006702437996864319, + "loss_iou": 0.54296875, + "loss_num": 0.038818359375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 332447048, + "step": 4970 + }, + { + "epoch": 0.5640851063829787, + "grad_norm": 30.77735137939453, + "learning_rate": 5e-05, + "loss": 1.2501, + "num_input_tokens_seen": 332513432, + "step": 4971 + }, + { + "epoch": 0.5640851063829787, + "loss": 1.3524436950683594, + "loss_ce": 0.008998965844511986, + "loss_iou": 0.53515625, + "loss_num": 0.05419921875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 332513432, + "step": 4971 + }, + { + "epoch": 0.5641985815602837, + "grad_norm": 27.00779914855957, + "learning_rate": 5e-05, + "loss": 1.1529, + "num_input_tokens_seen": 332580280, + "step": 4972 + }, + { + "epoch": 0.5641985815602837, + "loss": 1.0008749961853027, + "loss_ce": 0.0038046585395932198, + "loss_iou": 0.3984375, + "loss_num": 0.040771484375, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 332580280, + "step": 4972 + }, + { + "epoch": 0.5643120567375887, + "grad_norm": 36.050479888916016, + "learning_rate": 5e-05, + "loss": 1.3337, + "num_input_tokens_seen": 332646220, + "step": 4973 + }, + { + "epoch": 0.5643120567375887, + "loss": 1.2730733156204224, + "loss_ce": 0.005006985738873482, + "loss_iou": 0.51953125, + "loss_num": 0.046142578125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 332646220, + "step": 4973 + }, + { + "epoch": 0.5644255319148936, + "grad_norm": 52.300933837890625, + "learning_rate": 5e-05, + "loss": 1.4747, + "num_input_tokens_seen": 332713444, + "step": 4974 + }, + { + "epoch": 0.5644255319148936, + "loss": 1.3918335437774658, + "loss_ce": 0.0041382573544979095, + "loss_iou": 0.61328125, + "loss_num": 0.032470703125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 332713444, + "step": 4974 + }, + { + "epoch": 0.5645390070921986, + "grad_norm": 23.42714500427246, + "learning_rate": 5e-05, + "loss": 1.5133, + "num_input_tokens_seen": 332778880, + "step": 4975 + }, + { + "epoch": 0.5645390070921986, + "loss": 1.5806353092193604, + "loss_ce": 0.006416521966457367, + "loss_iou": 0.64453125, + "loss_num": 0.056884765625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 332778880, + "step": 4975 + }, + { + "epoch": 0.5646524822695036, + "grad_norm": 19.037607192993164, + "learning_rate": 5e-05, + "loss": 1.2178, + "num_input_tokens_seen": 332846652, + "step": 4976 + }, + { + "epoch": 0.5646524822695036, + "loss": 1.1390628814697266, + "loss_ce": 0.002344246953725815, + "loss_iou": 0.486328125, + "loss_num": 0.03271484375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 332846652, + "step": 4976 + }, + { + "epoch": 0.5647659574468085, + "grad_norm": 31.697553634643555, + "learning_rate": 5e-05, + "loss": 1.3778, + "num_input_tokens_seen": 332914272, + "step": 4977 + }, + { + "epoch": 0.5647659574468085, + "loss": 1.4060256481170654, + "loss_ce": 0.004170149099081755, + "loss_iou": 0.515625, + "loss_num": 0.07421875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 332914272, + "step": 4977 + }, + { + "epoch": 0.5648794326241134, + "grad_norm": 26.87432098388672, + "learning_rate": 5e-05, + "loss": 1.3414, + "num_input_tokens_seen": 332980804, + "step": 4978 + }, + { + "epoch": 0.5648794326241134, + "loss": 1.246314287185669, + "loss_ce": 0.004614993464201689, + "loss_iou": 0.55859375, + "loss_num": 0.024658203125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 332980804, + "step": 4978 + }, + { + "epoch": 0.5649929078014184, + "grad_norm": 18.663808822631836, + "learning_rate": 5e-05, + "loss": 1.2313, + "num_input_tokens_seen": 333047828, + "step": 4979 + }, + { + "epoch": 0.5649929078014184, + "loss": 1.2184958457946777, + "loss_ce": 0.010487953200936317, + "loss_iou": 0.484375, + "loss_num": 0.048095703125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 333047828, + "step": 4979 + }, + { + "epoch": 0.5651063829787234, + "grad_norm": 25.65393829345703, + "learning_rate": 5e-05, + "loss": 1.1052, + "num_input_tokens_seen": 333116168, + "step": 4980 + }, + { + "epoch": 0.5651063829787234, + "loss": 1.1438062191009521, + "loss_ce": 0.007087458856403828, + "loss_iou": 0.49609375, + "loss_num": 0.029296875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 333116168, + "step": 4980 + }, + { + "epoch": 0.5652198581560284, + "grad_norm": 40.18540954589844, + "learning_rate": 5e-05, + "loss": 1.1778, + "num_input_tokens_seen": 333182800, + "step": 4981 + }, + { + "epoch": 0.5652198581560284, + "loss": 1.073340654373169, + "loss_ce": 0.007910959422588348, + "loss_iou": 0.458984375, + "loss_num": 0.0291748046875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 333182800, + "step": 4981 + }, + { + "epoch": 0.5653333333333334, + "grad_norm": 28.582666397094727, + "learning_rate": 5e-05, + "loss": 1.463, + "num_input_tokens_seen": 333248472, + "step": 4982 + }, + { + "epoch": 0.5653333333333334, + "loss": 1.34046471118927, + "loss_ce": 0.005503780674189329, + "loss_iou": 0.515625, + "loss_num": 0.061279296875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 333248472, + "step": 4982 + }, + { + "epoch": 0.5654468085106383, + "grad_norm": 14.577739715576172, + "learning_rate": 5e-05, + "loss": 0.8917, + "num_input_tokens_seen": 333315048, + "step": 4983 + }, + { + "epoch": 0.5654468085106383, + "loss": 0.7556767463684082, + "loss_ce": 0.007141585927456617, + "loss_iou": 0.3125, + "loss_num": 0.0247802734375, + "loss_xval": 0.75, + "num_input_tokens_seen": 333315048, + "step": 4983 + }, + { + "epoch": 0.5655602836879433, + "grad_norm": 30.528789520263672, + "learning_rate": 5e-05, + "loss": 0.9425, + "num_input_tokens_seen": 333381764, + "step": 4984 + }, + { + "epoch": 0.5655602836879433, + "loss": 1.0762991905212402, + "loss_ce": 0.006963286083191633, + "loss_iou": 0.447265625, + "loss_num": 0.034912109375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 333381764, + "step": 4984 + }, + { + "epoch": 0.5656737588652482, + "grad_norm": 23.635360717773438, + "learning_rate": 5e-05, + "loss": 1.3165, + "num_input_tokens_seen": 333449256, + "step": 4985 + }, + { + "epoch": 0.5656737588652482, + "loss": 1.3222527503967285, + "loss_ce": 0.008776326663792133, + "loss_iou": 0.50390625, + "loss_num": 0.06103515625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 333449256, + "step": 4985 + }, + { + "epoch": 0.5657872340425532, + "grad_norm": 29.31076431274414, + "learning_rate": 5e-05, + "loss": 1.2875, + "num_input_tokens_seen": 333516044, + "step": 4986 + }, + { + "epoch": 0.5657872340425532, + "loss": 1.3089836835861206, + "loss_ce": 0.0013664952712133527, + "loss_iou": 0.55078125, + "loss_num": 0.041748046875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 333516044, + "step": 4986 + }, + { + "epoch": 0.5659007092198581, + "grad_norm": 34.077945709228516, + "learning_rate": 5e-05, + "loss": 1.1725, + "num_input_tokens_seen": 333582420, + "step": 4987 + }, + { + "epoch": 0.5659007092198581, + "loss": 1.0740993022918701, + "loss_ce": 0.007693042978644371, + "loss_iou": 0.4609375, + "loss_num": 0.028564453125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 333582420, + "step": 4987 + }, + { + "epoch": 0.5660141843971631, + "grad_norm": 32.22300338745117, + "learning_rate": 5e-05, + "loss": 1.1981, + "num_input_tokens_seen": 333647928, + "step": 4988 + }, + { + "epoch": 0.5660141843971631, + "loss": 1.2274070978164673, + "loss_ce": 0.004811827093362808, + "loss_iou": 0.484375, + "loss_num": 0.051025390625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 333647928, + "step": 4988 + }, + { + "epoch": 0.5661276595744681, + "grad_norm": 20.959102630615234, + "learning_rate": 5e-05, + "loss": 1.0498, + "num_input_tokens_seen": 333714720, + "step": 4989 + }, + { + "epoch": 0.5661276595744681, + "loss": 1.037771224975586, + "loss_ce": 0.004079889971762896, + "loss_iou": 0.419921875, + "loss_num": 0.0390625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 333714720, + "step": 4989 + }, + { + "epoch": 0.5662411347517731, + "grad_norm": 17.78959083557129, + "learning_rate": 5e-05, + "loss": 1.431, + "num_input_tokens_seen": 333780780, + "step": 4990 + }, + { + "epoch": 0.5662411347517731, + "loss": 1.3004670143127441, + "loss_ce": 0.00993967242538929, + "loss_iou": 0.5390625, + "loss_num": 0.04248046875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 333780780, + "step": 4990 + }, + { + "epoch": 0.566354609929078, + "grad_norm": 21.06534767150879, + "learning_rate": 5e-05, + "loss": 1.1931, + "num_input_tokens_seen": 333847576, + "step": 4991 + }, + { + "epoch": 0.566354609929078, + "loss": 1.32917320728302, + "loss_ce": 0.0059310197830200195, + "loss_iou": 0.53125, + "loss_num": 0.052978515625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 333847576, + "step": 4991 + }, + { + "epoch": 0.566468085106383, + "grad_norm": 21.62777328491211, + "learning_rate": 5e-05, + "loss": 1.3212, + "num_input_tokens_seen": 333914792, + "step": 4992 + }, + { + "epoch": 0.566468085106383, + "loss": 1.4893602132797241, + "loss_ce": 0.004008736461400986, + "loss_iou": 0.53515625, + "loss_num": 0.08251953125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 333914792, + "step": 4992 + }, + { + "epoch": 0.5665815602836879, + "grad_norm": 26.856712341308594, + "learning_rate": 5e-05, + "loss": 1.2165, + "num_input_tokens_seen": 333980720, + "step": 4993 + }, + { + "epoch": 0.5665815602836879, + "loss": 1.2014415264129639, + "loss_ce": 0.009058699943125248, + "loss_iou": 0.482421875, + "loss_num": 0.0458984375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 333980720, + "step": 4993 + }, + { + "epoch": 0.5666950354609929, + "grad_norm": 28.33243751525879, + "learning_rate": 5e-05, + "loss": 1.2786, + "num_input_tokens_seen": 334047948, + "step": 4994 + }, + { + "epoch": 0.5666950354609929, + "loss": 1.333511471748352, + "loss_ce": 0.0063629914075136185, + "loss_iou": 0.5390625, + "loss_num": 0.04931640625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 334047948, + "step": 4994 + }, + { + "epoch": 0.5668085106382978, + "grad_norm": 40.04172897338867, + "learning_rate": 5e-05, + "loss": 1.1827, + "num_input_tokens_seen": 334115784, + "step": 4995 + }, + { + "epoch": 0.5668085106382978, + "loss": 1.176761507987976, + "loss_ce": 0.0048865689896047115, + "loss_iou": 0.5, + "loss_num": 0.0341796875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 334115784, + "step": 4995 + }, + { + "epoch": 0.5669219858156028, + "grad_norm": 26.6574649810791, + "learning_rate": 5e-05, + "loss": 1.3682, + "num_input_tokens_seen": 334182192, + "step": 4996 + }, + { + "epoch": 0.5669219858156028, + "loss": 1.559899091720581, + "loss_ce": 0.0071646892465651035, + "loss_iou": 0.6328125, + "loss_num": 0.057373046875, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 334182192, + "step": 4996 + }, + { + "epoch": 0.5670354609929078, + "grad_norm": 48.84589767456055, + "learning_rate": 5e-05, + "loss": 0.8864, + "num_input_tokens_seen": 334247944, + "step": 4997 + }, + { + "epoch": 0.5670354609929078, + "loss": 0.9476948976516724, + "loss_ce": 0.006776954513043165, + "loss_iou": 0.39453125, + "loss_num": 0.03076171875, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 334247944, + "step": 4997 + }, + { + "epoch": 0.5671489361702128, + "grad_norm": 29.723936080932617, + "learning_rate": 5e-05, + "loss": 1.2355, + "num_input_tokens_seen": 334315744, + "step": 4998 + }, + { + "epoch": 0.5671489361702128, + "loss": 1.168210744857788, + "loss_ce": 0.006589635740965605, + "loss_iou": 0.48828125, + "loss_num": 0.037353515625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 334315744, + "step": 4998 + }, + { + "epoch": 0.5672624113475178, + "grad_norm": 39.65248489379883, + "learning_rate": 5e-05, + "loss": 1.3266, + "num_input_tokens_seen": 334382540, + "step": 4999 + }, + { + "epoch": 0.5672624113475178, + "loss": 1.3807321786880493, + "loss_ce": 0.003779064165428281, + "loss_iou": 0.58984375, + "loss_num": 0.039306640625, + "loss_xval": 1.375, + "num_input_tokens_seen": 334382540, + "step": 4999 + }, + { + "epoch": 0.5673758865248227, + "grad_norm": 33.36021423339844, + "learning_rate": 5e-05, + "loss": 1.3402, + "num_input_tokens_seen": 334449600, + "step": 5000 + }, + { + "epoch": 0.5673758865248227, + "eval_seeclick_CIoU": 0.4098704010248184, + "eval_seeclick_GIoU": 0.3895348906517029, + "eval_seeclick_IoU": 0.49592164158821106, + "eval_seeclick_MAE_all": 0.1602296307682991, + "eval_seeclick_MAE_h": 0.0666903629899025, + "eval_seeclick_MAE_w": 0.1206083782017231, + "eval_seeclick_MAE_x_boxes": 0.22441641986370087, + "eval_seeclick_MAE_y_boxes": 0.14468321949243546, + "eval_seeclick_NUM_probability": 0.9999651610851288, + "eval_seeclick_inside_bbox": 0.6770833432674408, + "eval_seeclick_loss": 2.414332628250122, + "eval_seeclick_loss_ce": 0.014791741035878658, + "eval_seeclick_loss_iou": 0.816650390625, + "eval_seeclick_loss_num": 0.1602020263671875, + "eval_seeclick_loss_xval": 2.4345703125, + "eval_seeclick_runtime": 63.8026, + "eval_seeclick_samples_per_second": 0.737, + "eval_seeclick_steps_per_second": 0.031, + "num_input_tokens_seen": 334449600, + "step": 5000 + }, + { + "epoch": 0.5673758865248227, + "eval_icons_CIoU": 0.4615674316883087, + "eval_icons_GIoU": 0.44827182590961456, + "eval_icons_IoU": 0.5046789646148682, + "eval_icons_MAE_all": 0.14455892145633698, + "eval_icons_MAE_h": 0.0787493884563446, + "eval_icons_MAE_w": 0.14097297936677933, + "eval_icons_MAE_x_boxes": 0.1280944049358368, + "eval_icons_MAE_y_boxes": 0.0927026979625225, + "eval_icons_NUM_probability": 0.9999306499958038, + "eval_icons_inside_bbox": 0.7604166567325592, + "eval_icons_loss": 2.457536458969116, + "eval_icons_loss_ce": 3.570516673789825e-05, + "eval_icons_loss_iou": 0.875244140625, + "eval_icons_loss_num": 0.139617919921875, + "eval_icons_loss_xval": 2.44677734375, + "eval_icons_runtime": 76.0474, + "eval_icons_samples_per_second": 0.657, + "eval_icons_steps_per_second": 0.026, + "num_input_tokens_seen": 334449600, + "step": 5000 + }, + { + "epoch": 0.5673758865248227, + "eval_screenspot_CIoU": 0.19510827586054802, + "eval_screenspot_GIoU": 0.1896753485004107, + "eval_screenspot_IoU": 0.31575894355773926, + "eval_screenspot_MAE_all": 0.2274771730105082, + "eval_screenspot_MAE_h": 0.147756094733874, + "eval_screenspot_MAE_w": 0.13936516145865122, + "eval_screenspot_MAE_x_boxes": 0.37296698490778607, + "eval_screenspot_MAE_y_boxes": 0.10641739269097646, + "eval_screenspot_NUM_probability": 0.9999249577522278, + "eval_screenspot_inside_bbox": 0.4975000023841858, + "eval_screenspot_loss": 3.105910062789917, + "eval_screenspot_loss_ce": 0.0113033934806784, + "eval_screenspot_loss_iou": 0.98046875, + "eval_screenspot_loss_num": 0.2507832845052083, + "eval_screenspot_loss_xval": 3.2145182291666665, + "eval_screenspot_runtime": 115.9099, + "eval_screenspot_samples_per_second": 0.768, + "eval_screenspot_steps_per_second": 0.026, + "num_input_tokens_seen": 334449600, + "step": 5000 + }, + { + "epoch": 0.5673758865248227, + "eval_compot_CIoU": 0.26378098875284195, + "eval_compot_GIoU": 0.22648628056049347, + "eval_compot_IoU": 0.3463498204946518, + "eval_compot_MAE_all": 0.24898391216993332, + "eval_compot_MAE_h": 0.14850447326898575, + "eval_compot_MAE_w": 0.2812645137310028, + "eval_compot_MAE_x_boxes": 0.2297632023692131, + "eval_compot_MAE_y_boxes": 0.11468151211738586, + "eval_compot_NUM_probability": 0.9995900690555573, + "eval_compot_inside_bbox": 0.5833333432674408, + "eval_compot_loss": 3.2764270305633545, + "eval_compot_loss_ce": 0.00377165584359318, + "eval_compot_loss_iou": 1.0087890625, + "eval_compot_loss_num": 0.254119873046875, + "eval_compot_loss_xval": 3.2890625, + "eval_compot_runtime": 75.1871, + "eval_compot_samples_per_second": 0.665, + "eval_compot_steps_per_second": 0.027, + "num_input_tokens_seen": 334449600, + "step": 5000 + }, + { + "epoch": 0.5673758865248227, + "loss": 3.2458438873291016, + "loss_ce": 0.003656573360785842, + "loss_iou": 1.0078125, + "loss_num": 0.24609375, + "loss_xval": 3.25, + "num_input_tokens_seen": 334449600, + "step": 5000 + }, + { + "epoch": 0.5674893617021277, + "grad_norm": 23.649038314819336, + "learning_rate": 5e-05, + "loss": 1.222, + "num_input_tokens_seen": 334517196, + "step": 5001 + }, + { + "epoch": 0.5674893617021277, + "loss": 1.2228480577468872, + "loss_ce": 0.004586325027048588, + "loss_iou": 0.498046875, + "loss_num": 0.04443359375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 334517196, + "step": 5001 + }, + { + "epoch": 0.5676028368794326, + "grad_norm": 11.482810974121094, + "learning_rate": 5e-05, + "loss": 1.2016, + "num_input_tokens_seen": 334584444, + "step": 5002 + }, + { + "epoch": 0.5676028368794326, + "loss": 1.350062608718872, + "loss_ce": 0.00826573558151722, + "loss_iou": 0.5234375, + "loss_num": 0.058349609375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 334584444, + "step": 5002 + }, + { + "epoch": 0.5677163120567376, + "grad_norm": 17.606008529663086, + "learning_rate": 5e-05, + "loss": 1.0905, + "num_input_tokens_seen": 334650972, + "step": 5003 + }, + { + "epoch": 0.5677163120567376, + "loss": 1.0941393375396729, + "loss_ce": 0.007713494822382927, + "loss_iou": 0.439453125, + "loss_num": 0.041748046875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 334650972, + "step": 5003 + }, + { + "epoch": 0.5678297872340425, + "grad_norm": 22.28093910217285, + "learning_rate": 5e-05, + "loss": 1.1455, + "num_input_tokens_seen": 334719056, + "step": 5004 + }, + { + "epoch": 0.5678297872340425, + "loss": 1.1399427652359009, + "loss_ce": 0.006641940679401159, + "loss_iou": 0.443359375, + "loss_num": 0.049560546875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 334719056, + "step": 5004 + }, + { + "epoch": 0.5679432624113475, + "grad_norm": 31.057273864746094, + "learning_rate": 5e-05, + "loss": 1.3217, + "num_input_tokens_seen": 334786384, + "step": 5005 + }, + { + "epoch": 0.5679432624113475, + "loss": 1.4949188232421875, + "loss_ce": 0.005660926923155785, + "loss_iou": 0.5859375, + "loss_num": 0.06396484375, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 334786384, + "step": 5005 + }, + { + "epoch": 0.5680567375886525, + "grad_norm": 39.75568389892578, + "learning_rate": 5e-05, + "loss": 1.5785, + "num_input_tokens_seen": 334853168, + "step": 5006 + }, + { + "epoch": 0.5680567375886525, + "loss": 1.551694631576538, + "loss_ce": 0.004819698166102171, + "loss_iou": 0.640625, + "loss_num": 0.052490234375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 334853168, + "step": 5006 + }, + { + "epoch": 0.5681702127659575, + "grad_norm": 31.312381744384766, + "learning_rate": 5e-05, + "loss": 1.2048, + "num_input_tokens_seen": 334920348, + "step": 5007 + }, + { + "epoch": 0.5681702127659575, + "loss": 1.216429591178894, + "loss_ce": 0.0064686257392168045, + "loss_iou": 0.51953125, + "loss_num": 0.03369140625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 334920348, + "step": 5007 + }, + { + "epoch": 0.5682836879432624, + "grad_norm": 33.00912857055664, + "learning_rate": 5e-05, + "loss": 1.2713, + "num_input_tokens_seen": 334987096, + "step": 5008 + }, + { + "epoch": 0.5682836879432624, + "loss": 1.433152198791504, + "loss_ce": 0.007371014915406704, + "loss_iou": 0.57421875, + "loss_num": 0.0556640625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 334987096, + "step": 5008 + }, + { + "epoch": 0.5683971631205674, + "grad_norm": 25.53887367248535, + "learning_rate": 5e-05, + "loss": 1.3824, + "num_input_tokens_seen": 335054596, + "step": 5009 + }, + { + "epoch": 0.5683971631205674, + "loss": 1.3677207231521606, + "loss_ce": 0.007369226776063442, + "loss_iou": 0.5859375, + "loss_num": 0.03759765625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 335054596, + "step": 5009 + }, + { + "epoch": 0.5685106382978723, + "grad_norm": 15.314245223999023, + "learning_rate": 5e-05, + "loss": 1.3038, + "num_input_tokens_seen": 335122456, + "step": 5010 + }, + { + "epoch": 0.5685106382978723, + "loss": 1.417092204093933, + "loss_ce": 0.009377333335578442, + "loss_iou": 0.52734375, + "loss_num": 0.0703125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 335122456, + "step": 5010 + }, + { + "epoch": 0.5686241134751773, + "grad_norm": 24.587209701538086, + "learning_rate": 5e-05, + "loss": 1.0986, + "num_input_tokens_seen": 335187808, + "step": 5011 + }, + { + "epoch": 0.5686241134751773, + "loss": 1.0827443599700928, + "loss_ce": 0.006328411400318146, + "loss_iou": 0.455078125, + "loss_num": 0.03369140625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 335187808, + "step": 5011 + }, + { + "epoch": 0.5687375886524823, + "grad_norm": 25.880226135253906, + "learning_rate": 5e-05, + "loss": 1.2062, + "num_input_tokens_seen": 335255248, + "step": 5012 + }, + { + "epoch": 0.5687375886524823, + "loss": 1.1685452461242676, + "loss_ce": 0.00594754796475172, + "loss_iou": 0.484375, + "loss_num": 0.03857421875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 335255248, + "step": 5012 + }, + { + "epoch": 0.5688510638297872, + "grad_norm": 32.314029693603516, + "learning_rate": 5e-05, + "loss": 1.2957, + "num_input_tokens_seen": 335321856, + "step": 5013 + }, + { + "epoch": 0.5688510638297872, + "loss": 1.4063127040863037, + "loss_ce": 0.0034807182382792234, + "loss_iou": 0.52734375, + "loss_num": 0.06982421875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 335321856, + "step": 5013 + }, + { + "epoch": 0.5689645390070922, + "grad_norm": 27.58001136779785, + "learning_rate": 5e-05, + "loss": 1.2563, + "num_input_tokens_seen": 335388244, + "step": 5014 + }, + { + "epoch": 0.5689645390070922, + "loss": 1.3370094299316406, + "loss_ce": 0.00448986142873764, + "loss_iou": 0.55859375, + "loss_num": 0.04345703125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 335388244, + "step": 5014 + }, + { + "epoch": 0.5690780141843972, + "grad_norm": 21.543861389160156, + "learning_rate": 5e-05, + "loss": 1.2353, + "num_input_tokens_seen": 335453812, + "step": 5015 + }, + { + "epoch": 0.5690780141843972, + "loss": 1.352880597114563, + "loss_ce": 0.003759551327675581, + "loss_iou": 0.52734375, + "loss_num": 0.0595703125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 335453812, + "step": 5015 + }, + { + "epoch": 0.5691914893617022, + "grad_norm": 18.836776733398438, + "learning_rate": 5e-05, + "loss": 1.1356, + "num_input_tokens_seen": 335520064, + "step": 5016 + }, + { + "epoch": 0.5691914893617022, + "loss": 1.183016300201416, + "loss_ce": 0.006746732629835606, + "loss_iou": 0.5078125, + "loss_num": 0.032470703125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 335520064, + "step": 5016 + }, + { + "epoch": 0.5693049645390071, + "grad_norm": 24.833984375, + "learning_rate": 5e-05, + "loss": 1.0925, + "num_input_tokens_seen": 335587520, + "step": 5017 + }, + { + "epoch": 0.5693049645390071, + "loss": 1.2252942323684692, + "loss_ce": 0.008009041659533978, + "loss_iou": 0.470703125, + "loss_num": 0.055419921875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 335587520, + "step": 5017 + }, + { + "epoch": 0.569418439716312, + "grad_norm": 27.118404388427734, + "learning_rate": 5e-05, + "loss": 1.1659, + "num_input_tokens_seen": 335654600, + "step": 5018 + }, + { + "epoch": 0.569418439716312, + "loss": 1.2626538276672363, + "loss_ce": 0.005817870609462261, + "loss_iou": 0.55078125, + "loss_num": 0.031005859375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 335654600, + "step": 5018 + }, + { + "epoch": 0.569531914893617, + "grad_norm": 30.202138900756836, + "learning_rate": 5e-05, + "loss": 1.1748, + "num_input_tokens_seen": 335720708, + "step": 5019 + }, + { + "epoch": 0.569531914893617, + "loss": 1.1886329650878906, + "loss_ce": 0.012851683422923088, + "loss_iou": 0.466796875, + "loss_num": 0.04833984375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 335720708, + "step": 5019 + }, + { + "epoch": 0.569645390070922, + "grad_norm": 32.663475036621094, + "learning_rate": 5e-05, + "loss": 1.2768, + "num_input_tokens_seen": 335786444, + "step": 5020 + }, + { + "epoch": 0.569645390070922, + "loss": 1.1113449335098267, + "loss_ce": 0.005143788643181324, + "loss_iou": 0.45703125, + "loss_num": 0.0380859375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 335786444, + "step": 5020 + }, + { + "epoch": 0.5697588652482269, + "grad_norm": 24.513961791992188, + "learning_rate": 5e-05, + "loss": 1.0475, + "num_input_tokens_seen": 335853308, + "step": 5021 + }, + { + "epoch": 0.5697588652482269, + "loss": 1.0012035369873047, + "loss_ce": 0.006086287554353476, + "loss_iou": 0.42578125, + "loss_num": 0.0289306640625, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 335853308, + "step": 5021 + }, + { + "epoch": 0.5698723404255319, + "grad_norm": 25.62059211730957, + "learning_rate": 5e-05, + "loss": 1.1731, + "num_input_tokens_seen": 335918708, + "step": 5022 + }, + { + "epoch": 0.5698723404255319, + "loss": 1.186645269393921, + "loss_ce": 0.005981195718050003, + "loss_iou": 0.5, + "loss_num": 0.0361328125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 335918708, + "step": 5022 + }, + { + "epoch": 0.5699858156028369, + "grad_norm": 26.536231994628906, + "learning_rate": 5e-05, + "loss": 0.9944, + "num_input_tokens_seen": 335985700, + "step": 5023 + }, + { + "epoch": 0.5699858156028369, + "loss": 1.04551362991333, + "loss_ce": 0.004498048219829798, + "loss_iou": 0.453125, + "loss_num": 0.0269775390625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 335985700, + "step": 5023 + }, + { + "epoch": 0.5700992907801419, + "grad_norm": 34.379539489746094, + "learning_rate": 5e-05, + "loss": 1.2002, + "num_input_tokens_seen": 336052248, + "step": 5024 + }, + { + "epoch": 0.5700992907801419, + "loss": 1.4107332229614258, + "loss_ce": 0.01034252718091011, + "loss_iou": 0.5859375, + "loss_num": 0.0458984375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 336052248, + "step": 5024 + }, + { + "epoch": 0.5702127659574469, + "grad_norm": 33.46399688720703, + "learning_rate": 5e-05, + "loss": 1.4404, + "num_input_tokens_seen": 336119352, + "step": 5025 + }, + { + "epoch": 0.5702127659574469, + "loss": 1.2738479375839233, + "loss_ce": 0.009687760844826698, + "loss_iou": 0.54296875, + "loss_num": 0.0361328125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 336119352, + "step": 5025 + }, + { + "epoch": 0.5703262411347517, + "grad_norm": 24.695556640625, + "learning_rate": 5e-05, + "loss": 1.1911, + "num_input_tokens_seen": 336186236, + "step": 5026 + }, + { + "epoch": 0.5703262411347517, + "loss": 1.0439951419830322, + "loss_ce": 0.006885756738483906, + "loss_iou": 0.439453125, + "loss_num": 0.031494140625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 336186236, + "step": 5026 + }, + { + "epoch": 0.5704397163120567, + "grad_norm": 23.760238647460938, + "learning_rate": 5e-05, + "loss": 1.3246, + "num_input_tokens_seen": 336252492, + "step": 5027 + }, + { + "epoch": 0.5704397163120567, + "loss": 1.2703765630722046, + "loss_ce": 0.0067047420889139175, + "loss_iou": 0.515625, + "loss_num": 0.0458984375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 336252492, + "step": 5027 + }, + { + "epoch": 0.5705531914893617, + "grad_norm": 31.48765754699707, + "learning_rate": 5e-05, + "loss": 1.3548, + "num_input_tokens_seen": 336318588, + "step": 5028 + }, + { + "epoch": 0.5705531914893617, + "loss": 1.2589194774627686, + "loss_ce": 0.007454616017639637, + "loss_iou": 0.48828125, + "loss_num": 0.0546875, + "loss_xval": 1.25, + "num_input_tokens_seen": 336318588, + "step": 5028 + }, + { + "epoch": 0.5706666666666667, + "grad_norm": 32.64128875732422, + "learning_rate": 5e-05, + "loss": 1.1689, + "num_input_tokens_seen": 336385300, + "step": 5029 + }, + { + "epoch": 0.5706666666666667, + "loss": 1.2453700304031372, + "loss_ce": 0.010018439963459969, + "loss_iou": 0.515625, + "loss_num": 0.040283203125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 336385300, + "step": 5029 + }, + { + "epoch": 0.5707801418439716, + "grad_norm": 18.70163917541504, + "learning_rate": 5e-05, + "loss": 1.2455, + "num_input_tokens_seen": 336451632, + "step": 5030 + }, + { + "epoch": 0.5707801418439716, + "loss": 1.2986019849777222, + "loss_ce": 0.009539421647787094, + "loss_iou": 0.51953125, + "loss_num": 0.05029296875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 336451632, + "step": 5030 + }, + { + "epoch": 0.5708936170212766, + "grad_norm": 21.38880729675293, + "learning_rate": 5e-05, + "loss": 1.1243, + "num_input_tokens_seen": 336519448, + "step": 5031 + }, + { + "epoch": 0.5708936170212766, + "loss": 1.2750425338745117, + "loss_ce": 0.0074644810520112514, + "loss_iou": 0.52734375, + "loss_num": 0.043212890625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 336519448, + "step": 5031 + }, + { + "epoch": 0.5710070921985816, + "grad_norm": 22.46432113647461, + "learning_rate": 5e-05, + "loss": 1.2017, + "num_input_tokens_seen": 336586712, + "step": 5032 + }, + { + "epoch": 0.5710070921985816, + "loss": 1.3667542934417725, + "loss_ce": 0.004937851335853338, + "loss_iou": 0.5703125, + "loss_num": 0.04443359375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 336586712, + "step": 5032 + }, + { + "epoch": 0.5711205673758866, + "grad_norm": 24.689546585083008, + "learning_rate": 5e-05, + "loss": 1.2736, + "num_input_tokens_seen": 336653672, + "step": 5033 + }, + { + "epoch": 0.5711205673758866, + "loss": 1.346178650856018, + "loss_ce": 0.0053582824766635895, + "loss_iou": 0.51953125, + "loss_num": 0.059814453125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 336653672, + "step": 5033 + }, + { + "epoch": 0.5712340425531914, + "grad_norm": 26.41623878479004, + "learning_rate": 5e-05, + "loss": 1.1071, + "num_input_tokens_seen": 336720968, + "step": 5034 + }, + { + "epoch": 0.5712340425531914, + "loss": 0.9760836362838745, + "loss_ce": 0.005868761800229549, + "loss_iou": 0.404296875, + "loss_num": 0.0322265625, + "loss_xval": 0.96875, + "num_input_tokens_seen": 336720968, + "step": 5034 + }, + { + "epoch": 0.5713475177304964, + "grad_norm": 144.72808837890625, + "learning_rate": 5e-05, + "loss": 1.349, + "num_input_tokens_seen": 336788428, + "step": 5035 + }, + { + "epoch": 0.5713475177304964, + "loss": 1.342002511024475, + "loss_ce": 0.008506472222507, + "loss_iou": 0.5390625, + "loss_num": 0.051513671875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 336788428, + "step": 5035 + }, + { + "epoch": 0.5714609929078014, + "grad_norm": 36.505165100097656, + "learning_rate": 5e-05, + "loss": 1.1518, + "num_input_tokens_seen": 336856176, + "step": 5036 + }, + { + "epoch": 0.5714609929078014, + "loss": 1.0762660503387451, + "loss_ce": 0.0025354528333991766, + "loss_iou": 0.478515625, + "loss_num": 0.0235595703125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 336856176, + "step": 5036 + }, + { + "epoch": 0.5715744680851064, + "grad_norm": 38.0229606628418, + "learning_rate": 5e-05, + "loss": 1.3181, + "num_input_tokens_seen": 336923232, + "step": 5037 + }, + { + "epoch": 0.5715744680851064, + "loss": 1.2809324264526367, + "loss_ce": 0.006274321116507053, + "loss_iou": 0.53125, + "loss_num": 0.041748046875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 336923232, + "step": 5037 + }, + { + "epoch": 0.5716879432624113, + "grad_norm": 38.65548324584961, + "learning_rate": 5e-05, + "loss": 1.1358, + "num_input_tokens_seen": 336991000, + "step": 5038 + }, + { + "epoch": 0.5716879432624113, + "loss": 1.1439622640609741, + "loss_ce": 0.004802095703780651, + "loss_iou": 0.47265625, + "loss_num": 0.0390625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 336991000, + "step": 5038 + }, + { + "epoch": 0.5718014184397163, + "grad_norm": 32.6312141418457, + "learning_rate": 5e-05, + "loss": 1.4033, + "num_input_tokens_seen": 337058140, + "step": 5039 + }, + { + "epoch": 0.5718014184397163, + "loss": 1.3186750411987305, + "loss_ce": 0.003245322499424219, + "loss_iou": 0.53125, + "loss_num": 0.05126953125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 337058140, + "step": 5039 + }, + { + "epoch": 0.5719148936170213, + "grad_norm": 21.790735244750977, + "learning_rate": 5e-05, + "loss": 1.2068, + "num_input_tokens_seen": 337125724, + "step": 5040 + }, + { + "epoch": 0.5719148936170213, + "loss": 1.1507291793823242, + "loss_ce": 0.0032683429308235645, + "loss_iou": 0.421875, + "loss_num": 0.06103515625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 337125724, + "step": 5040 + }, + { + "epoch": 0.5720283687943263, + "grad_norm": 37.99151611328125, + "learning_rate": 5e-05, + "loss": 1.2276, + "num_input_tokens_seen": 337192456, + "step": 5041 + }, + { + "epoch": 0.5720283687943263, + "loss": 1.2059576511383057, + "loss_ce": 0.0023444639518857002, + "loss_iou": 0.50390625, + "loss_num": 0.039794921875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 337192456, + "step": 5041 + }, + { + "epoch": 0.5721418439716313, + "grad_norm": 31.642610549926758, + "learning_rate": 5e-05, + "loss": 1.2638, + "num_input_tokens_seen": 337258844, + "step": 5042 + }, + { + "epoch": 0.5721418439716313, + "loss": 1.2667136192321777, + "loss_ce": 0.0069479248486459255, + "loss_iou": 0.51953125, + "loss_num": 0.04443359375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 337258844, + "step": 5042 + }, + { + "epoch": 0.5722553191489361, + "grad_norm": 41.39643859863281, + "learning_rate": 5e-05, + "loss": 1.231, + "num_input_tokens_seen": 337326676, + "step": 5043 + }, + { + "epoch": 0.5722553191489361, + "loss": 1.369795322418213, + "loss_ce": 0.007978921756148338, + "loss_iou": 0.5390625, + "loss_num": 0.056640625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 337326676, + "step": 5043 + }, + { + "epoch": 0.5723687943262411, + "grad_norm": 20.16838836669922, + "learning_rate": 5e-05, + "loss": 1.2166, + "num_input_tokens_seen": 337393752, + "step": 5044 + }, + { + "epoch": 0.5723687943262411, + "loss": 1.094724178314209, + "loss_ce": 0.005856972187757492, + "loss_iou": 0.439453125, + "loss_num": 0.041748046875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 337393752, + "step": 5044 + }, + { + "epoch": 0.5724822695035461, + "grad_norm": 29.110620498657227, + "learning_rate": 5e-05, + "loss": 1.0944, + "num_input_tokens_seen": 337460252, + "step": 5045 + }, + { + "epoch": 0.5724822695035461, + "loss": 1.0476025342941284, + "loss_ce": 0.004633866250514984, + "loss_iou": 0.423828125, + "loss_num": 0.038818359375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 337460252, + "step": 5045 + }, + { + "epoch": 0.5725957446808511, + "grad_norm": 18.897178649902344, + "learning_rate": 5e-05, + "loss": 1.2211, + "num_input_tokens_seen": 337527308, + "step": 5046 + }, + { + "epoch": 0.5725957446808511, + "loss": 1.15781569480896, + "loss_ce": 0.009866413660347462, + "loss_iou": 0.4296875, + "loss_num": 0.057373046875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 337527308, + "step": 5046 + }, + { + "epoch": 0.572709219858156, + "grad_norm": 25.36446762084961, + "learning_rate": 5e-05, + "loss": 1.1589, + "num_input_tokens_seen": 337594032, + "step": 5047 + }, + { + "epoch": 0.572709219858156, + "loss": 1.2787246704101562, + "loss_ce": 0.005775531753897667, + "loss_iou": 0.5, + "loss_num": 0.053955078125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 337594032, + "step": 5047 + }, + { + "epoch": 0.572822695035461, + "grad_norm": 14.267329216003418, + "learning_rate": 5e-05, + "loss": 0.9862, + "num_input_tokens_seen": 337660336, + "step": 5048 + }, + { + "epoch": 0.572822695035461, + "loss": 0.9783794283866882, + "loss_ce": 0.004746613092720509, + "loss_iou": 0.3828125, + "loss_num": 0.041259765625, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 337660336, + "step": 5048 + }, + { + "epoch": 0.572936170212766, + "grad_norm": 20.49875831604004, + "learning_rate": 5e-05, + "loss": 1.1266, + "num_input_tokens_seen": 337727384, + "step": 5049 + }, + { + "epoch": 0.572936170212766, + "loss": 1.153806209564209, + "loss_ce": 0.004880346357822418, + "loss_iou": 0.51171875, + "loss_num": 0.0245361328125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 337727384, + "step": 5049 + }, + { + "epoch": 0.573049645390071, + "grad_norm": 21.848106384277344, + "learning_rate": 5e-05, + "loss": 1.0993, + "num_input_tokens_seen": 337794408, + "step": 5050 + }, + { + "epoch": 0.573049645390071, + "loss": 1.035143494606018, + "loss_ce": 0.005846637301146984, + "loss_iou": 0.45703125, + "loss_num": 0.02294921875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 337794408, + "step": 5050 + }, + { + "epoch": 0.5731631205673758, + "grad_norm": 33.18318176269531, + "learning_rate": 5e-05, + "loss": 1.1166, + "num_input_tokens_seen": 337861436, + "step": 5051 + }, + { + "epoch": 0.5731631205673758, + "loss": 1.1309620141983032, + "loss_ce": 0.006938609294593334, + "loss_iou": 0.474609375, + "loss_num": 0.03466796875, + "loss_xval": 1.125, + "num_input_tokens_seen": 337861436, + "step": 5051 + }, + { + "epoch": 0.5732765957446808, + "grad_norm": 23.747913360595703, + "learning_rate": 5e-05, + "loss": 1.3925, + "num_input_tokens_seen": 337928276, + "step": 5052 + }, + { + "epoch": 0.5732765957446808, + "loss": 1.3902668952941895, + "loss_ce": 0.005012880079448223, + "loss_iou": 0.58203125, + "loss_num": 0.0439453125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 337928276, + "step": 5052 + }, + { + "epoch": 0.5733900709219858, + "grad_norm": 19.08242416381836, + "learning_rate": 5e-05, + "loss": 1.1373, + "num_input_tokens_seen": 337994824, + "step": 5053 + }, + { + "epoch": 0.5733900709219858, + "loss": 1.1174246072769165, + "loss_ce": 0.004143367521464825, + "loss_iou": 0.455078125, + "loss_num": 0.04052734375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 337994824, + "step": 5053 + }, + { + "epoch": 0.5735035460992908, + "grad_norm": 18.687328338623047, + "learning_rate": 5e-05, + "loss": 1.1759, + "num_input_tokens_seen": 338060980, + "step": 5054 + }, + { + "epoch": 0.5735035460992908, + "loss": 1.02549409866333, + "loss_ce": 0.004803169518709183, + "loss_iou": 0.40625, + "loss_num": 0.041748046875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 338060980, + "step": 5054 + }, + { + "epoch": 0.5736170212765958, + "grad_norm": 33.763816833496094, + "learning_rate": 5e-05, + "loss": 1.2939, + "num_input_tokens_seen": 338128276, + "step": 5055 + }, + { + "epoch": 0.5736170212765958, + "loss": 1.1972901821136475, + "loss_ce": 0.00539568904787302, + "loss_iou": 0.5, + "loss_num": 0.037841796875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 338128276, + "step": 5055 + }, + { + "epoch": 0.5737304964539007, + "grad_norm": 33.60575485229492, + "learning_rate": 5e-05, + "loss": 1.2174, + "num_input_tokens_seen": 338194500, + "step": 5056 + }, + { + "epoch": 0.5737304964539007, + "loss": 1.3178735971450806, + "loss_ce": 0.015139206312596798, + "loss_iou": 0.57421875, + "loss_num": 0.0306396484375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 338194500, + "step": 5056 + }, + { + "epoch": 0.5738439716312057, + "grad_norm": 93.7576675415039, + "learning_rate": 5e-05, + "loss": 1.1385, + "num_input_tokens_seen": 338261528, + "step": 5057 + }, + { + "epoch": 0.5738439716312057, + "loss": 1.3509770631790161, + "loss_ce": 0.008203687146306038, + "loss_iou": 0.5625, + "loss_num": 0.042724609375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 338261528, + "step": 5057 + }, + { + "epoch": 0.5739574468085107, + "grad_norm": 18.745746612548828, + "learning_rate": 5e-05, + "loss": 1.0957, + "num_input_tokens_seen": 338328864, + "step": 5058 + }, + { + "epoch": 0.5739574468085107, + "loss": 1.1067249774932861, + "loss_ce": 0.006627380382269621, + "loss_iou": 0.45703125, + "loss_num": 0.03759765625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 338328864, + "step": 5058 + }, + { + "epoch": 0.5740709219858156, + "grad_norm": 27.022451400756836, + "learning_rate": 5e-05, + "loss": 1.2208, + "num_input_tokens_seen": 338396240, + "step": 5059 + }, + { + "epoch": 0.5740709219858156, + "loss": 1.1681420803070068, + "loss_ce": 0.007009265944361687, + "loss_iou": 0.4609375, + "loss_num": 0.0478515625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 338396240, + "step": 5059 + }, + { + "epoch": 0.5741843971631205, + "grad_norm": 19.526653289794922, + "learning_rate": 5e-05, + "loss": 1.0974, + "num_input_tokens_seen": 338463012, + "step": 5060 + }, + { + "epoch": 0.5741843971631205, + "loss": 1.1295710802078247, + "loss_ce": 0.005059369839727879, + "loss_iou": 0.4609375, + "loss_num": 0.040283203125, + "loss_xval": 1.125, + "num_input_tokens_seen": 338463012, + "step": 5060 + }, + { + "epoch": 0.5742978723404255, + "grad_norm": 17.200489044189453, + "learning_rate": 5e-05, + "loss": 1.1632, + "num_input_tokens_seen": 338530848, + "step": 5061 + }, + { + "epoch": 0.5742978723404255, + "loss": 1.1237616539001465, + "loss_ce": 0.007550659589469433, + "loss_iou": 0.46484375, + "loss_num": 0.037841796875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 338530848, + "step": 5061 + }, + { + "epoch": 0.5744113475177305, + "grad_norm": 76.53041076660156, + "learning_rate": 5e-05, + "loss": 1.1763, + "num_input_tokens_seen": 338597840, + "step": 5062 + }, + { + "epoch": 0.5744113475177305, + "loss": 1.2165089845657349, + "loss_ce": 0.00996602326631546, + "loss_iou": 0.498046875, + "loss_num": 0.041748046875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 338597840, + "step": 5062 + }, + { + "epoch": 0.5745248226950355, + "grad_norm": 31.008892059326172, + "learning_rate": 5e-05, + "loss": 1.2358, + "num_input_tokens_seen": 338665496, + "step": 5063 + }, + { + "epoch": 0.5745248226950355, + "loss": 1.0944569110870361, + "loss_ce": 0.007298623211681843, + "loss_iou": 0.458984375, + "loss_num": 0.0341796875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 338665496, + "step": 5063 + }, + { + "epoch": 0.5746382978723404, + "grad_norm": 36.39582443237305, + "learning_rate": 5e-05, + "loss": 1.3511, + "num_input_tokens_seen": 338732228, + "step": 5064 + }, + { + "epoch": 0.5746382978723404, + "loss": 1.4313905239105225, + "loss_ce": 0.010492075234651566, + "loss_iou": 0.546875, + "loss_num": 0.06591796875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 338732228, + "step": 5064 + }, + { + "epoch": 0.5747517730496454, + "grad_norm": 259.6797180175781, + "learning_rate": 5e-05, + "loss": 1.1529, + "num_input_tokens_seen": 338799212, + "step": 5065 + }, + { + "epoch": 0.5747517730496454, + "loss": 1.2294586896896362, + "loss_ce": 0.005825841799378395, + "loss_iou": 0.51953125, + "loss_num": 0.037353515625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 338799212, + "step": 5065 + }, + { + "epoch": 0.5748652482269504, + "grad_norm": 33.98682403564453, + "learning_rate": 5e-05, + "loss": 1.4622, + "num_input_tokens_seen": 338866444, + "step": 5066 + }, + { + "epoch": 0.5748652482269504, + "loss": 1.4767038822174072, + "loss_ce": 0.009906947612762451, + "loss_iou": 0.56640625, + "loss_num": 0.06689453125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 338866444, + "step": 5066 + }, + { + "epoch": 0.5749787234042553, + "grad_norm": 12.70759391784668, + "learning_rate": 5e-05, + "loss": 1.0132, + "num_input_tokens_seen": 338933808, + "step": 5067 + }, + { + "epoch": 0.5749787234042553, + "loss": 0.9547373056411743, + "loss_ce": 0.010004709474742413, + "loss_iou": 0.341796875, + "loss_num": 0.05224609375, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 338933808, + "step": 5067 + }, + { + "epoch": 0.5750921985815602, + "grad_norm": 15.478302955627441, + "learning_rate": 5e-05, + "loss": 1.3054, + "num_input_tokens_seen": 339000744, + "step": 5068 + }, + { + "epoch": 0.5750921985815602, + "loss": 1.1906445026397705, + "loss_ce": 0.004609290510416031, + "loss_iou": 0.48828125, + "loss_num": 0.041748046875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 339000744, + "step": 5068 + }, + { + "epoch": 0.5752056737588652, + "grad_norm": 21.463294982910156, + "learning_rate": 5e-05, + "loss": 0.9531, + "num_input_tokens_seen": 339067612, + "step": 5069 + }, + { + "epoch": 0.5752056737588652, + "loss": 0.8795952796936035, + "loss_ce": 0.006060126703232527, + "loss_iou": 0.369140625, + "loss_num": 0.0272216796875, + "loss_xval": 0.875, + "num_input_tokens_seen": 339067612, + "step": 5069 + }, + { + "epoch": 0.5753191489361702, + "grad_norm": 31.66077995300293, + "learning_rate": 5e-05, + "loss": 1.1559, + "num_input_tokens_seen": 339135584, + "step": 5070 + }, + { + "epoch": 0.5753191489361702, + "loss": 1.1401476860046387, + "loss_ce": 0.006847002077847719, + "loss_iou": 0.486328125, + "loss_num": 0.031982421875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 339135584, + "step": 5070 + }, + { + "epoch": 0.5754326241134752, + "grad_norm": 26.986787796020508, + "learning_rate": 5e-05, + "loss": 1.1679, + "num_input_tokens_seen": 339202128, + "step": 5071 + }, + { + "epoch": 0.5754326241134752, + "loss": 1.3098201751708984, + "loss_ce": 0.005132653750479221, + "loss_iou": 0.55078125, + "loss_num": 0.041015625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 339202128, + "step": 5071 + }, + { + "epoch": 0.5755460992907802, + "grad_norm": 32.93424987792969, + "learning_rate": 5e-05, + "loss": 1.127, + "num_input_tokens_seen": 339269188, + "step": 5072 + }, + { + "epoch": 0.5755460992907802, + "loss": 1.2240955829620361, + "loss_ce": 0.008763524703681469, + "loss_iou": 0.52734375, + "loss_num": 0.03173828125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 339269188, + "step": 5072 + }, + { + "epoch": 0.5756595744680851, + "grad_norm": 28.249656677246094, + "learning_rate": 5e-05, + "loss": 1.5709, + "num_input_tokens_seen": 339336924, + "step": 5073 + }, + { + "epoch": 0.5756595744680851, + "loss": 1.515276551246643, + "loss_ce": 0.006487515289336443, + "loss_iou": 0.6484375, + "loss_num": 0.04248046875, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 339336924, + "step": 5073 + }, + { + "epoch": 0.5757730496453901, + "grad_norm": 9.295825004577637, + "learning_rate": 5e-05, + "loss": 1.1027, + "num_input_tokens_seen": 339404492, + "step": 5074 + }, + { + "epoch": 0.5757730496453901, + "loss": 1.187873363494873, + "loss_ce": 0.00818585604429245, + "loss_iou": 0.47265625, + "loss_num": 0.046630859375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 339404492, + "step": 5074 + }, + { + "epoch": 0.5758865248226951, + "grad_norm": 47.02360916137695, + "learning_rate": 5e-05, + "loss": 1.0494, + "num_input_tokens_seen": 339472148, + "step": 5075 + }, + { + "epoch": 0.5758865248226951, + "loss": 0.9591970443725586, + "loss_ce": 0.0036306409165263176, + "loss_iou": 0.408203125, + "loss_num": 0.028076171875, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 339472148, + "step": 5075 + }, + { + "epoch": 0.576, + "grad_norm": 36.78461456298828, + "learning_rate": 5e-05, + "loss": 1.3025, + "num_input_tokens_seen": 339539544, + "step": 5076 + }, + { + "epoch": 0.576, + "loss": 1.2782108783721924, + "loss_ce": 0.005994059145450592, + "loss_iou": 0.51171875, + "loss_num": 0.049560546875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 339539544, + "step": 5076 + }, + { + "epoch": 0.5761134751773049, + "grad_norm": 29.243398666381836, + "learning_rate": 5e-05, + "loss": 1.2009, + "num_input_tokens_seen": 339606460, + "step": 5077 + }, + { + "epoch": 0.5761134751773049, + "loss": 1.3017995357513428, + "loss_ce": 0.005901041440665722, + "loss_iou": 0.53125, + "loss_num": 0.046630859375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 339606460, + "step": 5077 + }, + { + "epoch": 0.5762269503546099, + "grad_norm": 31.688499450683594, + "learning_rate": 5e-05, + "loss": 1.5326, + "num_input_tokens_seen": 339673872, + "step": 5078 + }, + { + "epoch": 0.5762269503546099, + "loss": 1.401785135269165, + "loss_ce": 0.00481246504932642, + "loss_iou": 0.53515625, + "loss_num": 0.06591796875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 339673872, + "step": 5078 + }, + { + "epoch": 0.5763404255319149, + "grad_norm": 24.60127067565918, + "learning_rate": 5e-05, + "loss": 1.2983, + "num_input_tokens_seen": 339740144, + "step": 5079 + }, + { + "epoch": 0.5763404255319149, + "loss": 1.194787621498108, + "loss_ce": 0.010705595836043358, + "loss_iou": 0.44921875, + "loss_num": 0.0576171875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 339740144, + "step": 5079 + }, + { + "epoch": 0.5764539007092199, + "grad_norm": 64.84151458740234, + "learning_rate": 5e-05, + "loss": 1.0739, + "num_input_tokens_seen": 339807148, + "step": 5080 + }, + { + "epoch": 0.5764539007092199, + "loss": 1.0402213335037231, + "loss_ce": 0.0040885526686906815, + "loss_iou": 0.458984375, + "loss_num": 0.02392578125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 339807148, + "step": 5080 + }, + { + "epoch": 0.5765673758865248, + "grad_norm": 33.64522171020508, + "learning_rate": 5e-05, + "loss": 1.1944, + "num_input_tokens_seen": 339875440, + "step": 5081 + }, + { + "epoch": 0.5765673758865248, + "loss": 1.2197356224060059, + "loss_ce": 0.006844940595328808, + "loss_iou": 0.494140625, + "loss_num": 0.044677734375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 339875440, + "step": 5081 + }, + { + "epoch": 0.5766808510638298, + "grad_norm": 24.0281925201416, + "learning_rate": 5e-05, + "loss": 1.1974, + "num_input_tokens_seen": 339942732, + "step": 5082 + }, + { + "epoch": 0.5766808510638298, + "loss": 1.3020493984222412, + "loss_ce": 0.007127492688596249, + "loss_iou": 0.57421875, + "loss_num": 0.02978515625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 339942732, + "step": 5082 + }, + { + "epoch": 0.5767943262411348, + "grad_norm": 44.460453033447266, + "learning_rate": 5e-05, + "loss": 1.3925, + "num_input_tokens_seen": 340009172, + "step": 5083 + }, + { + "epoch": 0.5767943262411348, + "loss": 1.3576254844665527, + "loss_ce": 0.008016165345907211, + "loss_iou": 0.53515625, + "loss_num": 0.055908203125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 340009172, + "step": 5083 + }, + { + "epoch": 0.5769078014184397, + "grad_norm": 25.65808868408203, + "learning_rate": 5e-05, + "loss": 1.4917, + "num_input_tokens_seen": 340075956, + "step": 5084 + }, + { + "epoch": 0.5769078014184397, + "loss": 1.6096348762512207, + "loss_ce": 0.010513785295188427, + "loss_iou": 0.6484375, + "loss_num": 0.059814453125, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 340075956, + "step": 5084 + }, + { + "epoch": 0.5770212765957446, + "grad_norm": 20.424840927124023, + "learning_rate": 5e-05, + "loss": 0.9328, + "num_input_tokens_seen": 340142724, + "step": 5085 + }, + { + "epoch": 0.5770212765957446, + "loss": 0.7901753187179565, + "loss_ce": 0.002882785629481077, + "loss_iou": 0.296875, + "loss_num": 0.0390625, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 340142724, + "step": 5085 + }, + { + "epoch": 0.5771347517730496, + "grad_norm": 30.316661834716797, + "learning_rate": 5e-05, + "loss": 1.1703, + "num_input_tokens_seen": 340208696, + "step": 5086 + }, + { + "epoch": 0.5771347517730496, + "loss": 1.1311379671096802, + "loss_ce": 0.006137899123132229, + "loss_iou": 0.453125, + "loss_num": 0.0439453125, + "loss_xval": 1.125, + "num_input_tokens_seen": 340208696, + "step": 5086 + }, + { + "epoch": 0.5772482269503546, + "grad_norm": 26.049564361572266, + "learning_rate": 5e-05, + "loss": 1.2911, + "num_input_tokens_seen": 340275100, + "step": 5087 + }, + { + "epoch": 0.5772482269503546, + "loss": 1.2606303691864014, + "loss_ce": 0.005991668440401554, + "loss_iou": 0.5, + "loss_num": 0.051513671875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 340275100, + "step": 5087 + }, + { + "epoch": 0.5773617021276596, + "grad_norm": 29.02607536315918, + "learning_rate": 5e-05, + "loss": 1.4261, + "num_input_tokens_seen": 340343152, + "step": 5088 + }, + { + "epoch": 0.5773617021276596, + "loss": 1.506303071975708, + "loss_ce": 0.009232819080352783, + "loss_iou": 0.56640625, + "loss_num": 0.0732421875, + "loss_xval": 1.5, + "num_input_tokens_seen": 340343152, + "step": 5088 + }, + { + "epoch": 0.5774751773049646, + "grad_norm": 27.17856216430664, + "learning_rate": 5e-05, + "loss": 1.2017, + "num_input_tokens_seen": 340410224, + "step": 5089 + }, + { + "epoch": 0.5774751773049646, + "loss": 1.0908933877944946, + "loss_ce": 0.007885567843914032, + "loss_iou": 0.453125, + "loss_num": 0.035400390625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 340410224, + "step": 5089 + }, + { + "epoch": 0.5775886524822695, + "grad_norm": 39.26023864746094, + "learning_rate": 5e-05, + "loss": 1.2387, + "num_input_tokens_seen": 340476536, + "step": 5090 + }, + { + "epoch": 0.5775886524822695, + "loss": 1.2368123531341553, + "loss_ce": 0.007808410096913576, + "loss_iou": 0.5078125, + "loss_num": 0.04345703125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 340476536, + "step": 5090 + }, + { + "epoch": 0.5777021276595745, + "grad_norm": 40.43464279174805, + "learning_rate": 5e-05, + "loss": 1.3459, + "num_input_tokens_seen": 340543588, + "step": 5091 + }, + { + "epoch": 0.5777021276595745, + "loss": 1.4377870559692383, + "loss_ce": 0.005169974640011787, + "loss_iou": 0.6171875, + "loss_num": 0.039306640625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 340543588, + "step": 5091 + }, + { + "epoch": 0.5778156028368794, + "grad_norm": 132.29222106933594, + "learning_rate": 5e-05, + "loss": 1.2175, + "num_input_tokens_seen": 340609368, + "step": 5092 + }, + { + "epoch": 0.5778156028368794, + "loss": 1.3020155429840088, + "loss_ce": 0.006117078009992838, + "loss_iou": 0.515625, + "loss_num": 0.05322265625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 340609368, + "step": 5092 + }, + { + "epoch": 0.5779290780141844, + "grad_norm": 70.03705596923828, + "learning_rate": 5e-05, + "loss": 1.3888, + "num_input_tokens_seen": 340675344, + "step": 5093 + }, + { + "epoch": 0.5779290780141844, + "loss": 1.5051991939544678, + "loss_ce": 0.006664017681032419, + "loss_iou": 0.5859375, + "loss_num": 0.064453125, + "loss_xval": 1.5, + "num_input_tokens_seen": 340675344, + "step": 5093 + }, + { + "epoch": 0.5780425531914893, + "grad_norm": 21.801422119140625, + "learning_rate": 5e-05, + "loss": 1.3633, + "num_input_tokens_seen": 340742216, + "step": 5094 + }, + { + "epoch": 0.5780425531914893, + "loss": 1.295203447341919, + "loss_ce": 0.009558970108628273, + "loss_iou": 0.5078125, + "loss_num": 0.0546875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 340742216, + "step": 5094 + }, + { + "epoch": 0.5781560283687943, + "grad_norm": 12.797572135925293, + "learning_rate": 5e-05, + "loss": 1.0646, + "num_input_tokens_seen": 340809048, + "step": 5095 + }, + { + "epoch": 0.5781560283687943, + "loss": 1.213773250579834, + "loss_ce": 0.008695116266608238, + "loss_iou": 0.48828125, + "loss_num": 0.045654296875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 340809048, + "step": 5095 + }, + { + "epoch": 0.5782695035460993, + "grad_norm": 34.6213493347168, + "learning_rate": 5e-05, + "loss": 1.1011, + "num_input_tokens_seen": 340876212, + "step": 5096 + }, + { + "epoch": 0.5782695035460993, + "loss": 1.1013855934143066, + "loss_ce": 0.006170761771500111, + "loss_iou": 0.470703125, + "loss_num": 0.03076171875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 340876212, + "step": 5096 + }, + { + "epoch": 0.5783829787234043, + "grad_norm": 46.3421745300293, + "learning_rate": 5e-05, + "loss": 1.3951, + "num_input_tokens_seen": 340943104, + "step": 5097 + }, + { + "epoch": 0.5783829787234043, + "loss": 1.5165315866470337, + "loss_ce": 0.006277682259678841, + "loss_iou": 0.59765625, + "loss_num": 0.0625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 340943104, + "step": 5097 + }, + { + "epoch": 0.5784964539007093, + "grad_norm": 28.152992248535156, + "learning_rate": 5e-05, + "loss": 1.2584, + "num_input_tokens_seen": 341009692, + "step": 5098 + }, + { + "epoch": 0.5784964539007093, + "loss": 1.5382407903671265, + "loss_ce": 0.011873647570610046, + "loss_iou": 0.59765625, + "loss_num": 0.06591796875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 341009692, + "step": 5098 + }, + { + "epoch": 0.5786099290780142, + "grad_norm": 20.59693145751953, + "learning_rate": 5e-05, + "loss": 1.2187, + "num_input_tokens_seen": 341075656, + "step": 5099 + }, + { + "epoch": 0.5786099290780142, + "loss": 1.3177521228790283, + "loss_ce": 0.006716929376125336, + "loss_iou": 0.48828125, + "loss_num": 0.06689453125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 341075656, + "step": 5099 + }, + { + "epoch": 0.5787234042553191, + "grad_norm": 26.325008392333984, + "learning_rate": 5e-05, + "loss": 1.3367, + "num_input_tokens_seen": 341142176, + "step": 5100 + }, + { + "epoch": 0.5787234042553191, + "loss": 1.2727789878845215, + "loss_ce": 0.010572010651230812, + "loss_iou": 0.51171875, + "loss_num": 0.048095703125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 341142176, + "step": 5100 + }, + { + "epoch": 0.5788368794326241, + "grad_norm": 40.24207305908203, + "learning_rate": 5e-05, + "loss": 1.3935, + "num_input_tokens_seen": 341207336, + "step": 5101 + }, + { + "epoch": 0.5788368794326241, + "loss": 1.3465521335601807, + "loss_ce": 0.008905680850148201, + "loss_iou": 0.5234375, + "loss_num": 0.058349609375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 341207336, + "step": 5101 + }, + { + "epoch": 0.578950354609929, + "grad_norm": 23.291399002075195, + "learning_rate": 5e-05, + "loss": 1.3246, + "num_input_tokens_seen": 341274184, + "step": 5102 + }, + { + "epoch": 0.578950354609929, + "loss": 1.3285260200500488, + "loss_ce": 0.0033307941630482674, + "loss_iou": 0.5625, + "loss_num": 0.040283203125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 341274184, + "step": 5102 + }, + { + "epoch": 0.579063829787234, + "grad_norm": 16.071273803710938, + "learning_rate": 5e-05, + "loss": 0.9857, + "num_input_tokens_seen": 341340588, + "step": 5103 + }, + { + "epoch": 0.579063829787234, + "loss": 0.916499137878418, + "loss_ce": 0.0036573358811438084, + "loss_iou": 0.3984375, + "loss_num": 0.023681640625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 341340588, + "step": 5103 + }, + { + "epoch": 0.579177304964539, + "grad_norm": 16.43182373046875, + "learning_rate": 5e-05, + "loss": 1.0975, + "num_input_tokens_seen": 341407612, + "step": 5104 + }, + { + "epoch": 0.579177304964539, + "loss": 1.043475866317749, + "loss_ce": 0.004413324873894453, + "loss_iou": 0.455078125, + "loss_num": 0.02587890625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 341407612, + "step": 5104 + }, + { + "epoch": 0.579290780141844, + "grad_norm": 30.37273406982422, + "learning_rate": 5e-05, + "loss": 1.3935, + "num_input_tokens_seen": 341474860, + "step": 5105 + }, + { + "epoch": 0.579290780141844, + "loss": 1.4676408767700195, + "loss_ce": 0.006215176545083523, + "loss_iou": 0.58984375, + "loss_num": 0.056884765625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 341474860, + "step": 5105 + }, + { + "epoch": 0.579404255319149, + "grad_norm": 32.061771392822266, + "learning_rate": 5e-05, + "loss": 1.3697, + "num_input_tokens_seen": 341542464, + "step": 5106 + }, + { + "epoch": 0.579404255319149, + "loss": 1.4676867723464966, + "loss_ce": 0.0057727196253836155, + "loss_iou": 0.59765625, + "loss_num": 0.0537109375, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 341542464, + "step": 5106 + }, + { + "epoch": 0.5795177304964539, + "grad_norm": 25.232105255126953, + "learning_rate": 5e-05, + "loss": 1.1076, + "num_input_tokens_seen": 341609940, + "step": 5107 + }, + { + "epoch": 0.5795177304964539, + "loss": 1.2104694843292236, + "loss_ce": 0.006368016824126244, + "loss_iou": 0.48046875, + "loss_num": 0.048583984375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 341609940, + "step": 5107 + }, + { + "epoch": 0.5796312056737589, + "grad_norm": 21.11666488647461, + "learning_rate": 5e-05, + "loss": 1.0328, + "num_input_tokens_seen": 341674984, + "step": 5108 + }, + { + "epoch": 0.5796312056737589, + "loss": 1.1310498714447021, + "loss_ce": 0.009711979888379574, + "loss_iou": 0.44140625, + "loss_num": 0.04736328125, + "loss_xval": 1.125, + "num_input_tokens_seen": 341674984, + "step": 5108 + }, + { + "epoch": 0.5797446808510638, + "grad_norm": 169.57769775390625, + "learning_rate": 5e-05, + "loss": 1.4398, + "num_input_tokens_seen": 341741908, + "step": 5109 + }, + { + "epoch": 0.5797446808510638, + "loss": 1.2432454824447632, + "loss_ce": 0.006917376071214676, + "loss_iou": 0.466796875, + "loss_num": 0.060302734375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 341741908, + "step": 5109 + }, + { + "epoch": 0.5798581560283688, + "grad_norm": 33.932777404785156, + "learning_rate": 5e-05, + "loss": 1.1406, + "num_input_tokens_seen": 341808804, + "step": 5110 + }, + { + "epoch": 0.5798581560283688, + "loss": 1.1160364151000977, + "loss_ce": 0.006661504507064819, + "loss_iou": 0.4765625, + "loss_num": 0.03173828125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 341808804, + "step": 5110 + }, + { + "epoch": 0.5799716312056737, + "grad_norm": 29.122047424316406, + "learning_rate": 5e-05, + "loss": 1.1713, + "num_input_tokens_seen": 341876796, + "step": 5111 + }, + { + "epoch": 0.5799716312056737, + "loss": 1.036750316619873, + "loss_ce": 0.00794167723506689, + "loss_iou": 0.45703125, + "loss_num": 0.023193359375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 341876796, + "step": 5111 + }, + { + "epoch": 0.5800851063829787, + "grad_norm": 24.6907901763916, + "learning_rate": 5e-05, + "loss": 1.2478, + "num_input_tokens_seen": 341943452, + "step": 5112 + }, + { + "epoch": 0.5800851063829787, + "loss": 1.4039933681488037, + "loss_ce": 0.005555884912610054, + "loss_iou": 0.61328125, + "loss_num": 0.033447265625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 341943452, + "step": 5112 + }, + { + "epoch": 0.5801985815602837, + "grad_norm": 25.51943588256836, + "learning_rate": 5e-05, + "loss": 1.1126, + "num_input_tokens_seen": 342011640, + "step": 5113 + }, + { + "epoch": 0.5801985815602837, + "loss": 1.165773868560791, + "loss_ce": 0.006594124250113964, + "loss_iou": 0.45703125, + "loss_num": 0.048828125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 342011640, + "step": 5113 + }, + { + "epoch": 0.5803120567375887, + "grad_norm": 25.509836196899414, + "learning_rate": 5e-05, + "loss": 1.0366, + "num_input_tokens_seen": 342078012, + "step": 5114 + }, + { + "epoch": 0.5803120567375887, + "loss": 1.1180036067962646, + "loss_ce": 0.010276539251208305, + "loss_iou": 0.455078125, + "loss_num": 0.039794921875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 342078012, + "step": 5114 + }, + { + "epoch": 0.5804255319148937, + "grad_norm": 32.80645751953125, + "learning_rate": 5e-05, + "loss": 1.1807, + "num_input_tokens_seen": 342144444, + "step": 5115 + }, + { + "epoch": 0.5804255319148937, + "loss": 1.087551236152649, + "loss_ce": 0.003322707023471594, + "loss_iou": 0.46484375, + "loss_num": 0.0306396484375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 342144444, + "step": 5115 + }, + { + "epoch": 0.5805390070921986, + "grad_norm": 18.876693725585938, + "learning_rate": 5e-05, + "loss": 1.2591, + "num_input_tokens_seen": 342210108, + "step": 5116 + }, + { + "epoch": 0.5805390070921986, + "loss": 1.0346224308013916, + "loss_ce": 0.0026400843635201454, + "loss_iou": 0.43359375, + "loss_num": 0.033203125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 342210108, + "step": 5116 + }, + { + "epoch": 0.5806524822695035, + "grad_norm": 29.491243362426758, + "learning_rate": 5e-05, + "loss": 1.0193, + "num_input_tokens_seen": 342276484, + "step": 5117 + }, + { + "epoch": 0.5806524822695035, + "loss": 0.8636393547058105, + "loss_ce": 0.006461568176746368, + "loss_iou": 0.3515625, + "loss_num": 0.030517578125, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 342276484, + "step": 5117 + }, + { + "epoch": 0.5807659574468085, + "grad_norm": 34.618980407714844, + "learning_rate": 5e-05, + "loss": 1.185, + "num_input_tokens_seen": 342343464, + "step": 5118 + }, + { + "epoch": 0.5807659574468085, + "loss": 1.3231687545776367, + "loss_ce": 0.007739105727523565, + "loss_iou": 0.53515625, + "loss_num": 0.048583984375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 342343464, + "step": 5118 + }, + { + "epoch": 0.5808794326241135, + "grad_norm": 29.214126586914062, + "learning_rate": 5e-05, + "loss": 1.3988, + "num_input_tokens_seen": 342409972, + "step": 5119 + }, + { + "epoch": 0.5808794326241135, + "loss": 1.5525448322296143, + "loss_ce": 0.004693263676017523, + "loss_iou": 0.6171875, + "loss_num": 0.062255859375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 342409972, + "step": 5119 + }, + { + "epoch": 0.5809929078014184, + "grad_norm": 20.06621551513672, + "learning_rate": 5e-05, + "loss": 1.1776, + "num_input_tokens_seen": 342475940, + "step": 5120 + }, + { + "epoch": 0.5809929078014184, + "loss": 1.21096670627594, + "loss_ce": 0.008818315342068672, + "loss_iou": 0.4609375, + "loss_num": 0.056396484375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 342475940, + "step": 5120 + }, + { + "epoch": 0.5811063829787234, + "grad_norm": 18.960147857666016, + "learning_rate": 5e-05, + "loss": 1.1778, + "num_input_tokens_seen": 342543132, + "step": 5121 + }, + { + "epoch": 0.5811063829787234, + "loss": 1.1177141666412354, + "loss_ce": 0.00785087514668703, + "loss_iou": 0.412109375, + "loss_num": 0.057373046875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 342543132, + "step": 5121 + }, + { + "epoch": 0.5812198581560284, + "grad_norm": 24.422544479370117, + "learning_rate": 5e-05, + "loss": 1.3004, + "num_input_tokens_seen": 342611720, + "step": 5122 + }, + { + "epoch": 0.5812198581560284, + "loss": 1.2114168405532837, + "loss_ce": 0.005362107418477535, + "loss_iou": 0.51953125, + "loss_num": 0.033203125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 342611720, + "step": 5122 + }, + { + "epoch": 0.5813333333333334, + "grad_norm": 34.4885139465332, + "learning_rate": 5e-05, + "loss": 1.3332, + "num_input_tokens_seen": 342679188, + "step": 5123 + }, + { + "epoch": 0.5813333333333334, + "loss": 1.1780221462249756, + "loss_ce": 0.00517057441174984, + "loss_iou": 0.48046875, + "loss_num": 0.042236328125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 342679188, + "step": 5123 + }, + { + "epoch": 0.5814468085106383, + "grad_norm": 37.302913665771484, + "learning_rate": 5e-05, + "loss": 1.2324, + "num_input_tokens_seen": 342746276, + "step": 5124 + }, + { + "epoch": 0.5814468085106383, + "loss": 1.2269829511642456, + "loss_ce": 0.005791541188955307, + "loss_iou": 0.5234375, + "loss_num": 0.034423828125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 342746276, + "step": 5124 + }, + { + "epoch": 0.5815602836879432, + "grad_norm": 27.589214324951172, + "learning_rate": 5e-05, + "loss": 1.3261, + "num_input_tokens_seen": 342812812, + "step": 5125 + }, + { + "epoch": 0.5815602836879432, + "loss": 1.4210717678070068, + "loss_ce": 0.007009241729974747, + "loss_iou": 0.59375, + "loss_num": 0.04443359375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 342812812, + "step": 5125 + }, + { + "epoch": 0.5816737588652482, + "grad_norm": 22.33315086364746, + "learning_rate": 5e-05, + "loss": 1.0501, + "num_input_tokens_seen": 342879068, + "step": 5126 + }, + { + "epoch": 0.5816737588652482, + "loss": 1.0774288177490234, + "loss_ce": 0.003698385786265135, + "loss_iou": 0.443359375, + "loss_num": 0.037353515625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 342879068, + "step": 5126 + }, + { + "epoch": 0.5817872340425532, + "grad_norm": 16.599445343017578, + "learning_rate": 5e-05, + "loss": 1.231, + "num_input_tokens_seen": 342945560, + "step": 5127 + }, + { + "epoch": 0.5817872340425532, + "loss": 1.1586276292800903, + "loss_ce": 0.0033541976008564234, + "loss_iou": 0.50390625, + "loss_num": 0.029052734375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 342945560, + "step": 5127 + }, + { + "epoch": 0.5819007092198581, + "grad_norm": 23.588794708251953, + "learning_rate": 5e-05, + "loss": 1.1355, + "num_input_tokens_seen": 343011996, + "step": 5128 + }, + { + "epoch": 0.5819007092198581, + "loss": 1.1480133533477783, + "loss_ce": 0.005679388530552387, + "loss_iou": 0.490234375, + "loss_num": 0.0322265625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 343011996, + "step": 5128 + }, + { + "epoch": 0.5820141843971631, + "grad_norm": 24.139114379882812, + "learning_rate": 5e-05, + "loss": 1.2988, + "num_input_tokens_seen": 343079816, + "step": 5129 + }, + { + "epoch": 0.5820141843971631, + "loss": 1.2110475301742554, + "loss_ce": 0.004016293212771416, + "loss_iou": 0.49609375, + "loss_num": 0.043212890625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 343079816, + "step": 5129 + }, + { + "epoch": 0.5821276595744681, + "grad_norm": 17.279014587402344, + "learning_rate": 5e-05, + "loss": 1.0212, + "num_input_tokens_seen": 343146612, + "step": 5130 + }, + { + "epoch": 0.5821276595744681, + "loss": 1.0965394973754883, + "loss_ce": 0.0059023345820605755, + "loss_iou": 0.443359375, + "loss_num": 0.04052734375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 343146612, + "step": 5130 + }, + { + "epoch": 0.5822411347517731, + "grad_norm": 16.78385353088379, + "learning_rate": 5e-05, + "loss": 1.0937, + "num_input_tokens_seen": 343213260, + "step": 5131 + }, + { + "epoch": 0.5822411347517731, + "loss": 1.144860029220581, + "loss_ce": 0.00618820870295167, + "loss_iou": 0.451171875, + "loss_num": 0.047119140625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 343213260, + "step": 5131 + }, + { + "epoch": 0.5823546099290781, + "grad_norm": 83.50956726074219, + "learning_rate": 5e-05, + "loss": 1.0087, + "num_input_tokens_seen": 343278016, + "step": 5132 + }, + { + "epoch": 0.5823546099290781, + "loss": 0.9136486053466797, + "loss_ce": 0.008375154808163643, + "loss_iou": 0.359375, + "loss_num": 0.037109375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 343278016, + "step": 5132 + }, + { + "epoch": 0.5824680851063829, + "grad_norm": 31.203258514404297, + "learning_rate": 5e-05, + "loss": 1.0391, + "num_input_tokens_seen": 343345516, + "step": 5133 + }, + { + "epoch": 0.5824680851063829, + "loss": 1.1403346061706543, + "loss_ce": 0.008498642593622208, + "loss_iou": 0.48046875, + "loss_num": 0.034423828125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 343345516, + "step": 5133 + }, + { + "epoch": 0.5825815602836879, + "grad_norm": 35.59392547607422, + "learning_rate": 5e-05, + "loss": 0.987, + "num_input_tokens_seen": 343411788, + "step": 5134 + }, + { + "epoch": 0.5825815602836879, + "loss": 0.9451280832290649, + "loss_ce": 0.005674981512129307, + "loss_iou": 0.33203125, + "loss_num": 0.0546875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 343411788, + "step": 5134 + }, + { + "epoch": 0.5826950354609929, + "grad_norm": 19.25301742553711, + "learning_rate": 5e-05, + "loss": 1.1698, + "num_input_tokens_seen": 343478020, + "step": 5135 + }, + { + "epoch": 0.5826950354609929, + "loss": 1.0374360084533691, + "loss_ce": 0.007650874555110931, + "loss_iou": 0.375, + "loss_num": 0.055419921875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 343478020, + "step": 5135 + }, + { + "epoch": 0.5828085106382979, + "grad_norm": 33.14656448364258, + "learning_rate": 5e-05, + "loss": 0.9964, + "num_input_tokens_seen": 343545648, + "step": 5136 + }, + { + "epoch": 0.5828085106382979, + "loss": 0.9888591766357422, + "loss_ce": 0.003507622517645359, + "loss_iou": 0.423828125, + "loss_num": 0.028076171875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 343545648, + "step": 5136 + }, + { + "epoch": 0.5829219858156028, + "grad_norm": 35.84299087524414, + "learning_rate": 5e-05, + "loss": 1.3607, + "num_input_tokens_seen": 343612588, + "step": 5137 + }, + { + "epoch": 0.5829219858156028, + "loss": 1.5245983600616455, + "loss_ce": 0.004090514499694109, + "loss_iou": 0.62109375, + "loss_num": 0.05517578125, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 343612588, + "step": 5137 + }, + { + "epoch": 0.5830354609929078, + "grad_norm": 20.54759407043457, + "learning_rate": 5e-05, + "loss": 1.1156, + "num_input_tokens_seen": 343679700, + "step": 5138 + }, + { + "epoch": 0.5830354609929078, + "loss": 0.9215977787971497, + "loss_ce": 0.004117301199585199, + "loss_iou": 0.40625, + "loss_num": 0.0211181640625, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 343679700, + "step": 5138 + }, + { + "epoch": 0.5831489361702128, + "grad_norm": 20.460554122924805, + "learning_rate": 5e-05, + "loss": 1.2557, + "num_input_tokens_seen": 343746928, + "step": 5139 + }, + { + "epoch": 0.5831489361702128, + "loss": 1.3338572978973389, + "loss_ce": 0.009150300174951553, + "loss_iou": 0.52734375, + "loss_num": 0.05419921875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 343746928, + "step": 5139 + }, + { + "epoch": 0.5832624113475178, + "grad_norm": 29.911367416381836, + "learning_rate": 5e-05, + "loss": 1.1509, + "num_input_tokens_seen": 343814028, + "step": 5140 + }, + { + "epoch": 0.5832624113475178, + "loss": 1.0764515399932861, + "loss_ce": 0.005162552464753389, + "loss_iou": 0.45703125, + "loss_num": 0.03125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 343814028, + "step": 5140 + }, + { + "epoch": 0.5833758865248226, + "grad_norm": 28.844829559326172, + "learning_rate": 5e-05, + "loss": 1.1522, + "num_input_tokens_seen": 343879460, + "step": 5141 + }, + { + "epoch": 0.5833758865248226, + "loss": 1.2653498649597168, + "loss_ce": 0.004607710056006908, + "loss_iou": 0.515625, + "loss_num": 0.04541015625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 343879460, + "step": 5141 + }, + { + "epoch": 0.5834893617021276, + "grad_norm": 41.01547622680664, + "learning_rate": 5e-05, + "loss": 1.4503, + "num_input_tokens_seen": 343946324, + "step": 5142 + }, + { + "epoch": 0.5834893617021276, + "loss": 1.4915730953216553, + "loss_ce": 0.009151307865977287, + "loss_iou": 0.58984375, + "loss_num": 0.06103515625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 343946324, + "step": 5142 + }, + { + "epoch": 0.5836028368794326, + "grad_norm": 22.30230712890625, + "learning_rate": 5e-05, + "loss": 1.3265, + "num_input_tokens_seen": 344013696, + "step": 5143 + }, + { + "epoch": 0.5836028368794326, + "loss": 1.4722400903701782, + "loss_ce": 0.006419775541871786, + "loss_iou": 0.60546875, + "loss_num": 0.05029296875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 344013696, + "step": 5143 + }, + { + "epoch": 0.5837163120567376, + "grad_norm": 18.598451614379883, + "learning_rate": 5e-05, + "loss": 1.2181, + "num_input_tokens_seen": 344079812, + "step": 5144 + }, + { + "epoch": 0.5837163120567376, + "loss": 1.1261568069458008, + "loss_ce": 0.007992724888026714, + "loss_iou": 0.453125, + "loss_num": 0.042236328125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 344079812, + "step": 5144 + }, + { + "epoch": 0.5838297872340426, + "grad_norm": 39.172325134277344, + "learning_rate": 5e-05, + "loss": 1.2349, + "num_input_tokens_seen": 344147300, + "step": 5145 + }, + { + "epoch": 0.5838297872340426, + "loss": 1.2131905555725098, + "loss_ce": 0.006647618021816015, + "loss_iou": 0.482421875, + "loss_num": 0.04833984375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 344147300, + "step": 5145 + }, + { + "epoch": 0.5839432624113475, + "grad_norm": 33.312355041503906, + "learning_rate": 5e-05, + "loss": 1.2372, + "num_input_tokens_seen": 344213548, + "step": 5146 + }, + { + "epoch": 0.5839432624113475, + "loss": 1.2015223503112793, + "loss_ce": 0.004745029844343662, + "loss_iou": 0.49609375, + "loss_num": 0.041259765625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 344213548, + "step": 5146 + }, + { + "epoch": 0.5840567375886525, + "grad_norm": 42.45109176635742, + "learning_rate": 5e-05, + "loss": 1.0974, + "num_input_tokens_seen": 344280024, + "step": 5147 + }, + { + "epoch": 0.5840567375886525, + "loss": 1.2068536281585693, + "loss_ce": 0.004216879140585661, + "loss_iou": 0.46484375, + "loss_num": 0.05517578125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 344280024, + "step": 5147 + }, + { + "epoch": 0.5841702127659575, + "grad_norm": 20.726552963256836, + "learning_rate": 5e-05, + "loss": 1.0702, + "num_input_tokens_seen": 344346440, + "step": 5148 + }, + { + "epoch": 0.5841702127659575, + "loss": 1.149639368057251, + "loss_ce": 0.005596536211669445, + "loss_iou": 0.39453125, + "loss_num": 0.07080078125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 344346440, + "step": 5148 + }, + { + "epoch": 0.5842836879432625, + "grad_norm": 35.70650863647461, + "learning_rate": 5e-05, + "loss": 1.0934, + "num_input_tokens_seen": 344413136, + "step": 5149 + }, + { + "epoch": 0.5842836879432625, + "loss": 1.0634201765060425, + "loss_ce": 0.007267793174833059, + "loss_iou": 0.4453125, + "loss_num": 0.032470703125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 344413136, + "step": 5149 + }, + { + "epoch": 0.5843971631205673, + "grad_norm": 27.351804733276367, + "learning_rate": 5e-05, + "loss": 1.2703, + "num_input_tokens_seen": 344479988, + "step": 5150 + }, + { + "epoch": 0.5843971631205673, + "loss": 1.2660356760025024, + "loss_ce": 0.00901662278920412, + "loss_iou": 0.4765625, + "loss_num": 0.060302734375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 344479988, + "step": 5150 + }, + { + "epoch": 0.5845106382978723, + "grad_norm": 13.234912872314453, + "learning_rate": 5e-05, + "loss": 1.1069, + "num_input_tokens_seen": 344547352, + "step": 5151 + }, + { + "epoch": 0.5845106382978723, + "loss": 1.016517162322998, + "loss_ce": 0.007483972702175379, + "loss_iou": 0.412109375, + "loss_num": 0.03662109375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 344547352, + "step": 5151 + }, + { + "epoch": 0.5846241134751773, + "grad_norm": 22.451814651489258, + "learning_rate": 5e-05, + "loss": 1.0873, + "num_input_tokens_seen": 344614156, + "step": 5152 + }, + { + "epoch": 0.5846241134751773, + "loss": 1.0373374223709106, + "loss_ce": 0.005110797937959433, + "loss_iou": 0.4140625, + "loss_num": 0.041259765625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 344614156, + "step": 5152 + }, + { + "epoch": 0.5847375886524823, + "grad_norm": 24.14647674560547, + "learning_rate": 5e-05, + "loss": 1.2548, + "num_input_tokens_seen": 344681592, + "step": 5153 + }, + { + "epoch": 0.5847375886524823, + "loss": 1.1431586742401123, + "loss_ce": 0.006134803872555494, + "loss_iou": 0.482421875, + "loss_num": 0.0341796875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 344681592, + "step": 5153 + }, + { + "epoch": 0.5848510638297872, + "grad_norm": 26.003131866455078, + "learning_rate": 5e-05, + "loss": 1.1292, + "num_input_tokens_seen": 344748288, + "step": 5154 + }, + { + "epoch": 0.5848510638297872, + "loss": 1.1579316854476929, + "loss_ce": 0.006076209247112274, + "loss_iou": 0.5, + "loss_num": 0.0303955078125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 344748288, + "step": 5154 + }, + { + "epoch": 0.5849645390070922, + "grad_norm": 40.886348724365234, + "learning_rate": 5e-05, + "loss": 1.1941, + "num_input_tokens_seen": 344815060, + "step": 5155 + }, + { + "epoch": 0.5849645390070922, + "loss": 1.149776816368103, + "loss_ce": 0.003780745668336749, + "loss_iou": 0.515625, + "loss_num": 0.0225830078125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 344815060, + "step": 5155 + }, + { + "epoch": 0.5850780141843972, + "grad_norm": 33.43921661376953, + "learning_rate": 5e-05, + "loss": 1.3725, + "num_input_tokens_seen": 344882020, + "step": 5156 + }, + { + "epoch": 0.5850780141843972, + "loss": 1.2201545238494873, + "loss_ce": 0.003845923114567995, + "loss_iou": 0.51953125, + "loss_num": 0.035888671875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 344882020, + "step": 5156 + }, + { + "epoch": 0.5851914893617022, + "grad_norm": 14.187970161437988, + "learning_rate": 5e-05, + "loss": 1.112, + "num_input_tokens_seen": 344948112, + "step": 5157 + }, + { + "epoch": 0.5851914893617022, + "loss": 1.0111148357391357, + "loss_ce": 0.004767235368490219, + "loss_iou": 0.40625, + "loss_num": 0.0390625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 344948112, + "step": 5157 + }, + { + "epoch": 0.585304964539007, + "grad_norm": 32.15829849243164, + "learning_rate": 5e-05, + "loss": 1.4063, + "num_input_tokens_seen": 345014832, + "step": 5158 + }, + { + "epoch": 0.585304964539007, + "loss": 1.5496816635131836, + "loss_ce": 0.005248108878731728, + "loss_iou": 0.625, + "loss_num": 0.059326171875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 345014832, + "step": 5158 + }, + { + "epoch": 0.585418439716312, + "grad_norm": 23.26718521118164, + "learning_rate": 5e-05, + "loss": 1.2165, + "num_input_tokens_seen": 345081880, + "step": 5159 + }, + { + "epoch": 0.585418439716312, + "loss": 1.1077251434326172, + "loss_ce": 0.0071391453966498375, + "loss_iou": 0.45703125, + "loss_num": 0.03759765625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 345081880, + "step": 5159 + }, + { + "epoch": 0.585531914893617, + "grad_norm": 46.1168098449707, + "learning_rate": 5e-05, + "loss": 1.1162, + "num_input_tokens_seen": 345148760, + "step": 5160 + }, + { + "epoch": 0.585531914893617, + "loss": 1.1486986875534058, + "loss_ce": 0.0031908873934298754, + "loss_iou": 0.48828125, + "loss_num": 0.033203125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 345148760, + "step": 5160 + }, + { + "epoch": 0.585645390070922, + "grad_norm": 26.536724090576172, + "learning_rate": 5e-05, + "loss": 1.2433, + "num_input_tokens_seen": 345215356, + "step": 5161 + }, + { + "epoch": 0.585645390070922, + "loss": 1.4106571674346924, + "loss_ce": 0.0053837476298213005, + "loss_iou": 0.57421875, + "loss_num": 0.05078125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 345215356, + "step": 5161 + }, + { + "epoch": 0.585758865248227, + "grad_norm": 47.805908203125, + "learning_rate": 5e-05, + "loss": 1.2716, + "num_input_tokens_seen": 345282240, + "step": 5162 + }, + { + "epoch": 0.585758865248227, + "loss": 1.2132513523101807, + "loss_ce": 0.0042670308612287045, + "loss_iou": 0.53125, + "loss_num": 0.0289306640625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 345282240, + "step": 5162 + }, + { + "epoch": 0.5858723404255319, + "grad_norm": 49.09259033203125, + "learning_rate": 5e-05, + "loss": 1.5124, + "num_input_tokens_seen": 345348704, + "step": 5163 + }, + { + "epoch": 0.5858723404255319, + "loss": 1.434255838394165, + "loss_ce": 0.010916020721197128, + "loss_iou": 0.57421875, + "loss_num": 0.054443359375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 345348704, + "step": 5163 + }, + { + "epoch": 0.5859858156028369, + "grad_norm": 30.71399688720703, + "learning_rate": 5e-05, + "loss": 1.3252, + "num_input_tokens_seen": 345416152, + "step": 5164 + }, + { + "epoch": 0.5859858156028369, + "loss": 1.3904463052749634, + "loss_ce": 0.006657275836914778, + "loss_iou": 0.515625, + "loss_num": 0.0703125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 345416152, + "step": 5164 + }, + { + "epoch": 0.5860992907801419, + "grad_norm": 37.732818603515625, + "learning_rate": 5e-05, + "loss": 1.3218, + "num_input_tokens_seen": 345483208, + "step": 5165 + }, + { + "epoch": 0.5860992907801419, + "loss": 1.3271770477294922, + "loss_ce": 0.004911464639008045, + "loss_iou": 0.5625, + "loss_num": 0.0390625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 345483208, + "step": 5165 + }, + { + "epoch": 0.5862127659574468, + "grad_norm": 29.20330810546875, + "learning_rate": 5e-05, + "loss": 1.1169, + "num_input_tokens_seen": 345548472, + "step": 5166 + }, + { + "epoch": 0.5862127659574468, + "loss": 1.1214772462844849, + "loss_ce": 0.014665741473436356, + "loss_iou": 0.421875, + "loss_num": 0.052978515625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 345548472, + "step": 5166 + }, + { + "epoch": 0.5863262411347517, + "grad_norm": 17.325342178344727, + "learning_rate": 5e-05, + "loss": 0.979, + "num_input_tokens_seen": 345616120, + "step": 5167 + }, + { + "epoch": 0.5863262411347517, + "loss": 1.0957905054092407, + "loss_ce": 0.003993603866547346, + "loss_iou": 0.447265625, + "loss_num": 0.0390625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 345616120, + "step": 5167 + }, + { + "epoch": 0.5864397163120567, + "grad_norm": 22.9591064453125, + "learning_rate": 5e-05, + "loss": 1.2472, + "num_input_tokens_seen": 345683176, + "step": 5168 + }, + { + "epoch": 0.5864397163120567, + "loss": 1.4087812900543213, + "loss_ce": 0.007902301847934723, + "loss_iou": 0.515625, + "loss_num": 0.07470703125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 345683176, + "step": 5168 + }, + { + "epoch": 0.5865531914893617, + "grad_norm": 36.58011245727539, + "learning_rate": 5e-05, + "loss": 1.303, + "num_input_tokens_seen": 345750468, + "step": 5169 + }, + { + "epoch": 0.5865531914893617, + "loss": 1.3659989833831787, + "loss_ce": 0.007600528188049793, + "loss_iou": 0.5703125, + "loss_num": 0.042724609375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 345750468, + "step": 5169 + }, + { + "epoch": 0.5866666666666667, + "grad_norm": 33.825138092041016, + "learning_rate": 5e-05, + "loss": 1.2441, + "num_input_tokens_seen": 345816432, + "step": 5170 + }, + { + "epoch": 0.5866666666666667, + "loss": 1.2225844860076904, + "loss_ce": 0.007496517151594162, + "loss_iou": 0.494140625, + "loss_num": 0.04541015625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 345816432, + "step": 5170 + }, + { + "epoch": 0.5867801418439716, + "grad_norm": 38.60681915283203, + "learning_rate": 5e-05, + "loss": 1.1674, + "num_input_tokens_seen": 345884084, + "step": 5171 + }, + { + "epoch": 0.5867801418439716, + "loss": 1.1378178596496582, + "loss_ce": 0.006225965451449156, + "loss_iou": 0.478515625, + "loss_num": 0.03466796875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 345884084, + "step": 5171 + }, + { + "epoch": 0.5868936170212766, + "grad_norm": 24.86497688293457, + "learning_rate": 5e-05, + "loss": 1.333, + "num_input_tokens_seen": 345949560, + "step": 5172 + }, + { + "epoch": 0.5868936170212766, + "loss": 1.0712966918945312, + "loss_ce": 0.007576004136353731, + "loss_iou": 0.431640625, + "loss_num": 0.039794921875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 345949560, + "step": 5172 + }, + { + "epoch": 0.5870070921985816, + "grad_norm": 22.709043502807617, + "learning_rate": 5e-05, + "loss": 1.2251, + "num_input_tokens_seen": 346016304, + "step": 5173 + }, + { + "epoch": 0.5870070921985816, + "loss": 1.031813144683838, + "loss_ce": 0.005445901770144701, + "loss_iou": 0.416015625, + "loss_num": 0.03857421875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 346016304, + "step": 5173 + }, + { + "epoch": 0.5871205673758865, + "grad_norm": 27.027833938598633, + "learning_rate": 5e-05, + "loss": 1.2629, + "num_input_tokens_seen": 346083420, + "step": 5174 + }, + { + "epoch": 0.5871205673758865, + "loss": 1.1565933227539062, + "loss_ce": 0.008155873976647854, + "loss_iou": 0.50390625, + "loss_num": 0.0283203125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 346083420, + "step": 5174 + }, + { + "epoch": 0.5872340425531914, + "grad_norm": 19.312644958496094, + "learning_rate": 5e-05, + "loss": 1.1109, + "num_input_tokens_seen": 346150800, + "step": 5175 + }, + { + "epoch": 0.5872340425531914, + "loss": 0.9680023193359375, + "loss_ce": 0.004135127644985914, + "loss_iou": 0.435546875, + "loss_num": 0.0189208984375, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 346150800, + "step": 5175 + }, + { + "epoch": 0.5873475177304964, + "grad_norm": 96.95267486572266, + "learning_rate": 5e-05, + "loss": 1.1096, + "num_input_tokens_seen": 346217724, + "step": 5176 + }, + { + "epoch": 0.5873475177304964, + "loss": 1.1412079334259033, + "loss_ce": 0.009372005239129066, + "loss_iou": 0.455078125, + "loss_num": 0.04443359375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 346217724, + "step": 5176 + }, + { + "epoch": 0.5874609929078014, + "grad_norm": 29.362930297851562, + "learning_rate": 5e-05, + "loss": 1.3586, + "num_input_tokens_seen": 346284808, + "step": 5177 + }, + { + "epoch": 0.5874609929078014, + "loss": 1.2447938919067383, + "loss_ce": 0.007977459579706192, + "loss_iou": 0.5, + "loss_num": 0.047607421875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 346284808, + "step": 5177 + }, + { + "epoch": 0.5875744680851064, + "grad_norm": 23.050214767456055, + "learning_rate": 5e-05, + "loss": 1.3691, + "num_input_tokens_seen": 346351016, + "step": 5178 + }, + { + "epoch": 0.5875744680851064, + "loss": 1.4638313055038452, + "loss_ce": 0.0038703675381839275, + "loss_iou": 0.6015625, + "loss_num": 0.051025390625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 346351016, + "step": 5178 + }, + { + "epoch": 0.5876879432624114, + "grad_norm": 24.55034065246582, + "learning_rate": 5e-05, + "loss": 1.2376, + "num_input_tokens_seen": 346418316, + "step": 5179 + }, + { + "epoch": 0.5876879432624114, + "loss": 1.2308619022369385, + "loss_ce": 0.006740882992744446, + "loss_iou": 0.498046875, + "loss_num": 0.046142578125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 346418316, + "step": 5179 + }, + { + "epoch": 0.5878014184397163, + "grad_norm": 18.118030548095703, + "learning_rate": 5e-05, + "loss": 1.1496, + "num_input_tokens_seen": 346483988, + "step": 5180 + }, + { + "epoch": 0.5878014184397163, + "loss": 1.2996655702590942, + "loss_ce": 0.00883309543132782, + "loss_iou": 0.46875, + "loss_num": 0.0712890625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 346483988, + "step": 5180 + }, + { + "epoch": 0.5879148936170213, + "grad_norm": 19.62983512878418, + "learning_rate": 5e-05, + "loss": 1.1163, + "num_input_tokens_seen": 346550236, + "step": 5181 + }, + { + "epoch": 0.5879148936170213, + "loss": 1.1227606534957886, + "loss_ce": 0.007038078270852566, + "loss_iou": 0.48046875, + "loss_num": 0.0311279296875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 346550236, + "step": 5181 + }, + { + "epoch": 0.5880283687943263, + "grad_norm": 32.57837677001953, + "learning_rate": 5e-05, + "loss": 1.384, + "num_input_tokens_seen": 346618100, + "step": 5182 + }, + { + "epoch": 0.5880283687943263, + "loss": 1.4879792928695679, + "loss_ce": 0.005557362921535969, + "loss_iou": 0.58203125, + "loss_num": 0.0634765625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 346618100, + "step": 5182 + }, + { + "epoch": 0.5881418439716312, + "grad_norm": 50.94278335571289, + "learning_rate": 5e-05, + "loss": 1.2855, + "num_input_tokens_seen": 346685796, + "step": 5183 + }, + { + "epoch": 0.5881418439716312, + "loss": 1.3942968845367432, + "loss_ce": 0.004160217009484768, + "loss_iou": 0.57421875, + "loss_num": 0.048828125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 346685796, + "step": 5183 + }, + { + "epoch": 0.5882553191489361, + "grad_norm": 34.118751525878906, + "learning_rate": 5e-05, + "loss": 1.3138, + "num_input_tokens_seen": 346753136, + "step": 5184 + }, + { + "epoch": 0.5882553191489361, + "loss": 1.3925622701644897, + "loss_ce": 0.006820109207183123, + "loss_iou": 0.54296875, + "loss_num": 0.0595703125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 346753136, + "step": 5184 + }, + { + "epoch": 0.5883687943262411, + "grad_norm": 30.563182830810547, + "learning_rate": 5e-05, + "loss": 1.0463, + "num_input_tokens_seen": 346819952, + "step": 5185 + }, + { + "epoch": 0.5883687943262411, + "loss": 0.9820534586906433, + "loss_ce": 0.008786840364336967, + "loss_iou": 0.392578125, + "loss_num": 0.037841796875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 346819952, + "step": 5185 + }, + { + "epoch": 0.5884822695035461, + "grad_norm": 30.26340103149414, + "learning_rate": 5e-05, + "loss": 1.2572, + "num_input_tokens_seen": 346886556, + "step": 5186 + }, + { + "epoch": 0.5884822695035461, + "loss": 1.1582913398742676, + "loss_ce": 0.005459317937493324, + "loss_iou": 0.5, + "loss_num": 0.0308837890625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 346886556, + "step": 5186 + }, + { + "epoch": 0.5885957446808511, + "grad_norm": 34.372013092041016, + "learning_rate": 5e-05, + "loss": 1.327, + "num_input_tokens_seen": 346954284, + "step": 5187 + }, + { + "epoch": 0.5885957446808511, + "loss": 1.4042339324951172, + "loss_ce": 0.005796508863568306, + "loss_iou": 0.58203125, + "loss_num": 0.046875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 346954284, + "step": 5187 + }, + { + "epoch": 0.588709219858156, + "grad_norm": 39.17215347290039, + "learning_rate": 5e-05, + "loss": 1.3059, + "num_input_tokens_seen": 347021508, + "step": 5188 + }, + { + "epoch": 0.588709219858156, + "loss": 1.2932003736495972, + "loss_ce": 0.004626179113984108, + "loss_iou": 0.54296875, + "loss_num": 0.041259765625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 347021508, + "step": 5188 + }, + { + "epoch": 0.588822695035461, + "grad_norm": 25.993560791015625, + "learning_rate": 5e-05, + "loss": 1.0597, + "num_input_tokens_seen": 347086912, + "step": 5189 + }, + { + "epoch": 0.588822695035461, + "loss": 1.030557632446289, + "loss_ce": 0.006876002997159958, + "loss_iou": 0.4375, + "loss_num": 0.0301513671875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 347086912, + "step": 5189 + }, + { + "epoch": 0.588936170212766, + "grad_norm": 25.97187614440918, + "learning_rate": 5e-05, + "loss": 1.2226, + "num_input_tokens_seen": 347152832, + "step": 5190 + }, + { + "epoch": 0.588936170212766, + "loss": 1.2421133518218994, + "loss_ce": 0.006029333919286728, + "loss_iou": 0.478515625, + "loss_num": 0.055419921875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 347152832, + "step": 5190 + }, + { + "epoch": 0.5890496453900709, + "grad_norm": 33.15626907348633, + "learning_rate": 5e-05, + "loss": 1.1263, + "num_input_tokens_seen": 347219236, + "step": 5191 + }, + { + "epoch": 0.5890496453900709, + "loss": 1.0864839553833008, + "loss_ce": 0.011532856151461601, + "loss_iou": 0.44140625, + "loss_num": 0.0380859375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 347219236, + "step": 5191 + }, + { + "epoch": 0.5891631205673759, + "grad_norm": 28.007041931152344, + "learning_rate": 5e-05, + "loss": 1.3215, + "num_input_tokens_seen": 347287200, + "step": 5192 + }, + { + "epoch": 0.5891631205673759, + "loss": 1.3558998107910156, + "loss_ce": 0.00580217270180583, + "loss_iou": 0.58203125, + "loss_num": 0.03662109375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 347287200, + "step": 5192 + }, + { + "epoch": 0.5892765957446808, + "grad_norm": 32.094783782958984, + "learning_rate": 5e-05, + "loss": 1.1812, + "num_input_tokens_seen": 347355656, + "step": 5193 + }, + { + "epoch": 0.5892765957446808, + "loss": 1.13847017288208, + "loss_ce": 0.00565769337117672, + "loss_iou": 0.458984375, + "loss_num": 0.042724609375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 347355656, + "step": 5193 + }, + { + "epoch": 0.5893900709219858, + "grad_norm": 17.34196662902832, + "learning_rate": 5e-05, + "loss": 1.2058, + "num_input_tokens_seen": 347422276, + "step": 5194 + }, + { + "epoch": 0.5893900709219858, + "loss": 1.1923032999038696, + "loss_ce": 0.007244716864079237, + "loss_iou": 0.4921875, + "loss_num": 0.0400390625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 347422276, + "step": 5194 + }, + { + "epoch": 0.5895035460992908, + "grad_norm": 20.483394622802734, + "learning_rate": 5e-05, + "loss": 1.2199, + "num_input_tokens_seen": 347489912, + "step": 5195 + }, + { + "epoch": 0.5895035460992908, + "loss": 1.143264651298523, + "loss_ce": 0.008499098010361195, + "loss_iou": 0.423828125, + "loss_num": 0.0576171875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 347489912, + "step": 5195 + }, + { + "epoch": 0.5896170212765958, + "grad_norm": 27.18734359741211, + "learning_rate": 5e-05, + "loss": 1.1409, + "num_input_tokens_seen": 347556692, + "step": 5196 + }, + { + "epoch": 0.5896170212765958, + "loss": 1.169738531112671, + "loss_ce": 0.0046994686126708984, + "loss_iou": 0.49609375, + "loss_num": 0.03466796875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 347556692, + "step": 5196 + }, + { + "epoch": 0.5897304964539007, + "grad_norm": 32.283103942871094, + "learning_rate": 5e-05, + "loss": 1.1555, + "num_input_tokens_seen": 347622792, + "step": 5197 + }, + { + "epoch": 0.5897304964539007, + "loss": 1.3396823406219482, + "loss_ce": 0.008139369077980518, + "loss_iou": 0.55859375, + "loss_num": 0.042236328125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 347622792, + "step": 5197 + }, + { + "epoch": 0.5898439716312057, + "grad_norm": 22.99104118347168, + "learning_rate": 5e-05, + "loss": 1.2495, + "num_input_tokens_seen": 347689164, + "step": 5198 + }, + { + "epoch": 0.5898439716312057, + "loss": 1.4614657163619995, + "loss_ce": 0.007852433249354362, + "loss_iou": 0.6015625, + "loss_num": 0.049560546875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 347689164, + "step": 5198 + }, + { + "epoch": 0.5899574468085106, + "grad_norm": 19.142772674560547, + "learning_rate": 5e-05, + "loss": 1.1403, + "num_input_tokens_seen": 347755248, + "step": 5199 + }, + { + "epoch": 0.5899574468085106, + "loss": 0.9642843008041382, + "loss_ce": 0.002858526771888137, + "loss_iou": 0.392578125, + "loss_num": 0.034912109375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 347755248, + "step": 5199 + }, + { + "epoch": 0.5900709219858156, + "grad_norm": 33.765621185302734, + "learning_rate": 5e-05, + "loss": 1.1461, + "num_input_tokens_seen": 347822628, + "step": 5200 + }, + { + "epoch": 0.5900709219858156, + "loss": 1.2719004154205322, + "loss_ce": 0.00920502096414566, + "loss_iou": 0.5234375, + "loss_num": 0.042724609375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 347822628, + "step": 5200 + }, + { + "epoch": 0.5901843971631205, + "grad_norm": 29.245216369628906, + "learning_rate": 5e-05, + "loss": 1.4341, + "num_input_tokens_seen": 347889656, + "step": 5201 + }, + { + "epoch": 0.5901843971631205, + "loss": 1.3909192085266113, + "loss_ce": 0.007618376985192299, + "loss_iou": 0.5546875, + "loss_num": 0.05419921875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 347889656, + "step": 5201 + }, + { + "epoch": 0.5902978723404255, + "grad_norm": 27.748598098754883, + "learning_rate": 5e-05, + "loss": 1.2586, + "num_input_tokens_seen": 347956304, + "step": 5202 + }, + { + "epoch": 0.5902978723404255, + "loss": 1.389332890510559, + "loss_ce": 0.007496892474591732, + "loss_iou": 0.5625, + "loss_num": 0.05078125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 347956304, + "step": 5202 + }, + { + "epoch": 0.5904113475177305, + "grad_norm": 36.380619049072266, + "learning_rate": 5e-05, + "loss": 1.1501, + "num_input_tokens_seen": 348021956, + "step": 5203 + }, + { + "epoch": 0.5904113475177305, + "loss": 1.1558011770248413, + "loss_ce": 0.010293376632034779, + "loss_iou": 0.4453125, + "loss_num": 0.05078125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 348021956, + "step": 5203 + }, + { + "epoch": 0.5905248226950355, + "grad_norm": 27.17952537536621, + "learning_rate": 5e-05, + "loss": 1.1797, + "num_input_tokens_seen": 348088880, + "step": 5204 + }, + { + "epoch": 0.5905248226950355, + "loss": 1.2650326490402222, + "loss_ce": 0.003313929308205843, + "loss_iou": 0.546875, + "loss_num": 0.033203125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 348088880, + "step": 5204 + }, + { + "epoch": 0.5906382978723405, + "grad_norm": 11.629331588745117, + "learning_rate": 5e-05, + "loss": 0.9886, + "num_input_tokens_seen": 348154840, + "step": 5205 + }, + { + "epoch": 0.5906382978723405, + "loss": 1.108777403831482, + "loss_ce": 0.004773447290062904, + "loss_iou": 0.4453125, + "loss_num": 0.042724609375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 348154840, + "step": 5205 + }, + { + "epoch": 0.5907517730496454, + "grad_norm": 19.198244094848633, + "learning_rate": 5e-05, + "loss": 1.09, + "num_input_tokens_seen": 348221312, + "step": 5206 + }, + { + "epoch": 0.5907517730496454, + "loss": 1.1784881353378296, + "loss_ce": 0.00417169788852334, + "loss_iou": 0.5078125, + "loss_num": 0.031494140625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 348221312, + "step": 5206 + }, + { + "epoch": 0.5908652482269503, + "grad_norm": 37.13549041748047, + "learning_rate": 5e-05, + "loss": 1.0564, + "num_input_tokens_seen": 348288412, + "step": 5207 + }, + { + "epoch": 0.5908652482269503, + "loss": 1.0020818710327148, + "loss_ce": 0.005011516623198986, + "loss_iou": 0.431640625, + "loss_num": 0.02685546875, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 348288412, + "step": 5207 + }, + { + "epoch": 0.5909787234042553, + "grad_norm": 31.689699172973633, + "learning_rate": 5e-05, + "loss": 1.275, + "num_input_tokens_seen": 348355544, + "step": 5208 + }, + { + "epoch": 0.5909787234042553, + "loss": 1.2424805164337158, + "loss_ce": 0.004199179820716381, + "loss_iou": 0.490234375, + "loss_num": 0.0517578125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 348355544, + "step": 5208 + }, + { + "epoch": 0.5910921985815603, + "grad_norm": 17.354665756225586, + "learning_rate": 5e-05, + "loss": 1.0145, + "num_input_tokens_seen": 348422276, + "step": 5209 + }, + { + "epoch": 0.5910921985815603, + "loss": 0.9794162511825562, + "loss_ce": 0.003830286907032132, + "loss_iou": 0.416015625, + "loss_num": 0.0284423828125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 348422276, + "step": 5209 + }, + { + "epoch": 0.5912056737588652, + "grad_norm": 75.70966339111328, + "learning_rate": 5e-05, + "loss": 1.1413, + "num_input_tokens_seen": 348489568, + "step": 5210 + }, + { + "epoch": 0.5912056737588652, + "loss": 1.170790433883667, + "loss_ce": 0.010145902633666992, + "loss_iou": 0.486328125, + "loss_num": 0.037841796875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 348489568, + "step": 5210 + }, + { + "epoch": 0.5913191489361702, + "grad_norm": 22.657968521118164, + "learning_rate": 5e-05, + "loss": 1.2168, + "num_input_tokens_seen": 348556136, + "step": 5211 + }, + { + "epoch": 0.5913191489361702, + "loss": 1.3372901678085327, + "loss_ce": 0.006723738741129637, + "loss_iou": 0.5546875, + "loss_num": 0.043701171875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 348556136, + "step": 5211 + }, + { + "epoch": 0.5914326241134752, + "grad_norm": 23.95658302307129, + "learning_rate": 5e-05, + "loss": 1.2515, + "num_input_tokens_seen": 348622088, + "step": 5212 + }, + { + "epoch": 0.5914326241134752, + "loss": 1.2090520858764648, + "loss_ce": 0.006171218119561672, + "loss_iou": 0.515625, + "loss_num": 0.033447265625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 348622088, + "step": 5212 + }, + { + "epoch": 0.5915460992907802, + "grad_norm": 31.39935302734375, + "learning_rate": 5e-05, + "loss": 1.1729, + "num_input_tokens_seen": 348689136, + "step": 5213 + }, + { + "epoch": 0.5915460992907802, + "loss": 1.1843162775039673, + "loss_ce": 0.009511634707450867, + "loss_iou": 0.51171875, + "loss_num": 0.030517578125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 348689136, + "step": 5213 + }, + { + "epoch": 0.5916595744680851, + "grad_norm": 101.91954803466797, + "learning_rate": 5e-05, + "loss": 1.4173, + "num_input_tokens_seen": 348755736, + "step": 5214 + }, + { + "epoch": 0.5916595744680851, + "loss": 1.492008924484253, + "loss_ce": 0.007145700976252556, + "loss_iou": 0.59765625, + "loss_num": 0.057861328125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 348755736, + "step": 5214 + }, + { + "epoch": 0.5917730496453901, + "grad_norm": 48.151248931884766, + "learning_rate": 5e-05, + "loss": 1.3416, + "num_input_tokens_seen": 348823288, + "step": 5215 + }, + { + "epoch": 0.5917730496453901, + "loss": 1.2965648174285889, + "loss_ce": 0.004084325395524502, + "loss_iou": 0.578125, + "loss_num": 0.02783203125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 348823288, + "step": 5215 + }, + { + "epoch": 0.591886524822695, + "grad_norm": 24.022319793701172, + "learning_rate": 5e-05, + "loss": 1.2651, + "num_input_tokens_seen": 348889872, + "step": 5216 + }, + { + "epoch": 0.591886524822695, + "loss": 1.1921601295471191, + "loss_ce": 0.004660182632505894, + "loss_iou": 0.51171875, + "loss_num": 0.032958984375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 348889872, + "step": 5216 + }, + { + "epoch": 0.592, + "grad_norm": 16.734521865844727, + "learning_rate": 5e-05, + "loss": 1.1438, + "num_input_tokens_seen": 348956320, + "step": 5217 + }, + { + "epoch": 0.592, + "loss": 1.0060656070709229, + "loss_ce": 0.004112553782761097, + "loss_iou": 0.416015625, + "loss_num": 0.0341796875, + "loss_xval": 1.0, + "num_input_tokens_seen": 348956320, + "step": 5217 + }, + { + "epoch": 0.592113475177305, + "grad_norm": 28.299556732177734, + "learning_rate": 5e-05, + "loss": 1.3133, + "num_input_tokens_seen": 349023352, + "step": 5218 + }, + { + "epoch": 0.592113475177305, + "loss": 1.2906287908554077, + "loss_ce": 0.00937887653708458, + "loss_iou": 0.5234375, + "loss_num": 0.0478515625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 349023352, + "step": 5218 + }, + { + "epoch": 0.5922269503546099, + "grad_norm": 23.695688247680664, + "learning_rate": 5e-05, + "loss": 1.27, + "num_input_tokens_seen": 349089560, + "step": 5219 + }, + { + "epoch": 0.5922269503546099, + "loss": 1.2620747089385986, + "loss_ce": 0.004262283444404602, + "loss_iou": 0.5234375, + "loss_num": 0.0419921875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 349089560, + "step": 5219 + }, + { + "epoch": 0.5923404255319149, + "grad_norm": 27.655658721923828, + "learning_rate": 5e-05, + "loss": 1.2068, + "num_input_tokens_seen": 349156424, + "step": 5220 + }, + { + "epoch": 0.5923404255319149, + "loss": 1.185323715209961, + "loss_ce": 0.004659601487219334, + "loss_iou": 0.498046875, + "loss_num": 0.036865234375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 349156424, + "step": 5220 + }, + { + "epoch": 0.5924539007092199, + "grad_norm": 21.781829833984375, + "learning_rate": 5e-05, + "loss": 1.284, + "num_input_tokens_seen": 349224020, + "step": 5221 + }, + { + "epoch": 0.5924539007092199, + "loss": 1.2020254135131836, + "loss_ce": 0.00817771814763546, + "loss_iou": 0.51171875, + "loss_num": 0.03369140625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 349224020, + "step": 5221 + }, + { + "epoch": 0.5925673758865249, + "grad_norm": 21.885631561279297, + "learning_rate": 5e-05, + "loss": 1.3547, + "num_input_tokens_seen": 349291960, + "step": 5222 + }, + { + "epoch": 0.5925673758865249, + "loss": 1.3671958446502686, + "loss_ce": 0.012215265072882175, + "loss_iou": 0.52734375, + "loss_num": 0.060546875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 349291960, + "step": 5222 + }, + { + "epoch": 0.5926808510638298, + "grad_norm": 23.4332332611084, + "learning_rate": 5e-05, + "loss": 1.3384, + "num_input_tokens_seen": 349359880, + "step": 5223 + }, + { + "epoch": 0.5926808510638298, + "loss": 1.3763940334320068, + "loss_ce": 0.010183143429458141, + "loss_iou": 0.54296875, + "loss_num": 0.056640625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 349359880, + "step": 5223 + }, + { + "epoch": 0.5927943262411347, + "grad_norm": 24.311168670654297, + "learning_rate": 5e-05, + "loss": 1.26, + "num_input_tokens_seen": 349425776, + "step": 5224 + }, + { + "epoch": 0.5927943262411347, + "loss": 1.255448341369629, + "loss_ce": 0.0030068473424762487, + "loss_iou": 0.482421875, + "loss_num": 0.057373046875, + "loss_xval": 1.25, + "num_input_tokens_seen": 349425776, + "step": 5224 + }, + { + "epoch": 0.5929078014184397, + "grad_norm": 29.48330307006836, + "learning_rate": 5e-05, + "loss": 1.2756, + "num_input_tokens_seen": 349492724, + "step": 5225 + }, + { + "epoch": 0.5929078014184397, + "loss": 1.2420161962509155, + "loss_ce": 0.010570872575044632, + "loss_iou": 0.5, + "loss_num": 0.04638671875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 349492724, + "step": 5225 + }, + { + "epoch": 0.5930212765957447, + "grad_norm": 29.370153427124023, + "learning_rate": 5e-05, + "loss": 1.0434, + "num_input_tokens_seen": 349560096, + "step": 5226 + }, + { + "epoch": 0.5930212765957447, + "loss": 0.9194873571395874, + "loss_ce": 0.004936588928103447, + "loss_iou": 0.39453125, + "loss_num": 0.0252685546875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 349560096, + "step": 5226 + }, + { + "epoch": 0.5931347517730496, + "grad_norm": 30.9648380279541, + "learning_rate": 5e-05, + "loss": 1.1695, + "num_input_tokens_seen": 349627096, + "step": 5227 + }, + { + "epoch": 0.5931347517730496, + "loss": 1.217336654663086, + "loss_ce": 0.004690178204327822, + "loss_iou": 0.478515625, + "loss_num": 0.0517578125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 349627096, + "step": 5227 + }, + { + "epoch": 0.5932482269503546, + "grad_norm": 165.9469757080078, + "learning_rate": 5e-05, + "loss": 1.0828, + "num_input_tokens_seen": 349693436, + "step": 5228 + }, + { + "epoch": 0.5932482269503546, + "loss": 1.0419470071792603, + "loss_ce": 0.005814217962324619, + "loss_iou": 0.44921875, + "loss_num": 0.028076171875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 349693436, + "step": 5228 + }, + { + "epoch": 0.5933617021276596, + "grad_norm": 27.27887725830078, + "learning_rate": 5e-05, + "loss": 1.2038, + "num_input_tokens_seen": 349760524, + "step": 5229 + }, + { + "epoch": 0.5933617021276596, + "loss": 1.0709404945373535, + "loss_ce": 0.006487381644546986, + "loss_iou": 0.4453125, + "loss_num": 0.03515625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 349760524, + "step": 5229 + }, + { + "epoch": 0.5934751773049646, + "grad_norm": 40.45064163208008, + "learning_rate": 5e-05, + "loss": 1.1284, + "num_input_tokens_seen": 349826688, + "step": 5230 + }, + { + "epoch": 0.5934751773049646, + "loss": 1.1890974044799805, + "loss_ce": 0.0020856158807873726, + "loss_iou": 0.45703125, + "loss_num": 0.053955078125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 349826688, + "step": 5230 + }, + { + "epoch": 0.5935886524822696, + "grad_norm": 33.909873962402344, + "learning_rate": 5e-05, + "loss": 1.3127, + "num_input_tokens_seen": 349893524, + "step": 5231 + }, + { + "epoch": 0.5935886524822696, + "loss": 1.3910770416259766, + "loss_ce": 0.006311402656137943, + "loss_iou": 0.55859375, + "loss_num": 0.0537109375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 349893524, + "step": 5231 + }, + { + "epoch": 0.5937021276595744, + "grad_norm": 22.530467987060547, + "learning_rate": 5e-05, + "loss": 1.234, + "num_input_tokens_seen": 349959992, + "step": 5232 + }, + { + "epoch": 0.5937021276595744, + "loss": 1.280309796333313, + "loss_ce": 0.006383963860571384, + "loss_iou": 0.50390625, + "loss_num": 0.052978515625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 349959992, + "step": 5232 + }, + { + "epoch": 0.5938156028368794, + "grad_norm": 23.966333389282227, + "learning_rate": 5e-05, + "loss": 1.1741, + "num_input_tokens_seen": 350026632, + "step": 5233 + }, + { + "epoch": 0.5938156028368794, + "loss": 1.3777343034744263, + "loss_ce": 0.00908198393881321, + "loss_iou": 0.55859375, + "loss_num": 0.05078125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 350026632, + "step": 5233 + }, + { + "epoch": 0.5939290780141844, + "grad_norm": 19.51811408996582, + "learning_rate": 5e-05, + "loss": 1.1324, + "num_input_tokens_seen": 350093168, + "step": 5234 + }, + { + "epoch": 0.5939290780141844, + "loss": 1.0928038358688354, + "loss_ce": 0.007354566361755133, + "loss_iou": 0.4453125, + "loss_num": 0.0390625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 350093168, + "step": 5234 + }, + { + "epoch": 0.5940425531914894, + "grad_norm": 24.13877296447754, + "learning_rate": 5e-05, + "loss": 1.2018, + "num_input_tokens_seen": 350160344, + "step": 5235 + }, + { + "epoch": 0.5940425531914894, + "loss": 0.9190192818641663, + "loss_ce": 0.005445028655230999, + "loss_iou": 0.39453125, + "loss_num": 0.025146484375, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 350160344, + "step": 5235 + }, + { + "epoch": 0.5941560283687943, + "grad_norm": 30.236377716064453, + "learning_rate": 5e-05, + "loss": 1.304, + "num_input_tokens_seen": 350226784, + "step": 5236 + }, + { + "epoch": 0.5941560283687943, + "loss": 1.251030683517456, + "loss_ce": 0.004937022924423218, + "loss_iou": 0.4921875, + "loss_num": 0.052490234375, + "loss_xval": 1.25, + "num_input_tokens_seen": 350226784, + "step": 5236 + }, + { + "epoch": 0.5942695035460993, + "grad_norm": 21.124713897705078, + "learning_rate": 5e-05, + "loss": 1.2785, + "num_input_tokens_seen": 350292636, + "step": 5237 + }, + { + "epoch": 0.5942695035460993, + "loss": 1.1347205638885498, + "loss_ce": 0.009628917090594769, + "loss_iou": 0.4765625, + "loss_num": 0.033935546875, + "loss_xval": 1.125, + "num_input_tokens_seen": 350292636, + "step": 5237 + }, + { + "epoch": 0.5943829787234043, + "grad_norm": 12.9218168258667, + "learning_rate": 5e-05, + "loss": 1.1011, + "num_input_tokens_seen": 350360072, + "step": 5238 + }, + { + "epoch": 0.5943829787234043, + "loss": 1.0986567735671997, + "loss_ce": 0.007348233833909035, + "loss_iou": 0.43359375, + "loss_num": 0.044677734375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 350360072, + "step": 5238 + }, + { + "epoch": 0.5944964539007093, + "grad_norm": 21.91920280456543, + "learning_rate": 5e-05, + "loss": 1.2221, + "num_input_tokens_seen": 350425488, + "step": 5239 + }, + { + "epoch": 0.5944964539007093, + "loss": 1.151231288909912, + "loss_ce": 0.002793780295178294, + "loss_iou": 0.4375, + "loss_num": 0.054443359375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 350425488, + "step": 5239 + }, + { + "epoch": 0.5946099290780141, + "grad_norm": 23.108295440673828, + "learning_rate": 5e-05, + "loss": 1.153, + "num_input_tokens_seen": 350492448, + "step": 5240 + }, + { + "epoch": 0.5946099290780141, + "loss": 0.960399329662323, + "loss_ce": 0.004344663117080927, + "loss_iou": 0.4140625, + "loss_num": 0.0252685546875, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 350492448, + "step": 5240 + }, + { + "epoch": 0.5947234042553191, + "grad_norm": 29.201934814453125, + "learning_rate": 5e-05, + "loss": 1.2704, + "num_input_tokens_seen": 350559140, + "step": 5241 + }, + { + "epoch": 0.5947234042553191, + "loss": 1.2752777338027954, + "loss_ce": 0.008432049304246902, + "loss_iou": 0.515625, + "loss_num": 0.0478515625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 350559140, + "step": 5241 + }, + { + "epoch": 0.5948368794326241, + "grad_norm": 37.03778076171875, + "learning_rate": 5e-05, + "loss": 1.2347, + "num_input_tokens_seen": 350625800, + "step": 5242 + }, + { + "epoch": 0.5948368794326241, + "loss": 1.2427105903625488, + "loss_ce": 0.009617246687412262, + "loss_iou": 0.48828125, + "loss_num": 0.0517578125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 350625800, + "step": 5242 + }, + { + "epoch": 0.5949503546099291, + "grad_norm": 29.401029586791992, + "learning_rate": 5e-05, + "loss": 1.3002, + "num_input_tokens_seen": 350692736, + "step": 5243 + }, + { + "epoch": 0.5949503546099291, + "loss": 1.1837660074234009, + "loss_ce": 0.0033460762351751328, + "loss_iou": 0.5078125, + "loss_num": 0.033447265625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 350692736, + "step": 5243 + }, + { + "epoch": 0.595063829787234, + "grad_norm": 23.147567749023438, + "learning_rate": 5e-05, + "loss": 1.2722, + "num_input_tokens_seen": 350760568, + "step": 5244 + }, + { + "epoch": 0.595063829787234, + "loss": 1.3746730089187622, + "loss_ce": 0.007973778992891312, + "loss_iou": 0.57421875, + "loss_num": 0.044189453125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 350760568, + "step": 5244 + }, + { + "epoch": 0.595177304964539, + "grad_norm": 31.417898178100586, + "learning_rate": 5e-05, + "loss": 1.0851, + "num_input_tokens_seen": 350828032, + "step": 5245 + }, + { + "epoch": 0.595177304964539, + "loss": 1.0377397537231445, + "loss_ce": 0.005024883896112442, + "loss_iou": 0.46875, + "loss_num": 0.01904296875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 350828032, + "step": 5245 + }, + { + "epoch": 0.595290780141844, + "grad_norm": 41.65482711791992, + "learning_rate": 5e-05, + "loss": 1.1477, + "num_input_tokens_seen": 350894192, + "step": 5246 + }, + { + "epoch": 0.595290780141844, + "loss": 1.1770085096359253, + "loss_ce": 0.005133511498570442, + "loss_iou": 0.5, + "loss_num": 0.03466796875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 350894192, + "step": 5246 + }, + { + "epoch": 0.595404255319149, + "grad_norm": 33.67938232421875, + "learning_rate": 5e-05, + "loss": 1.3882, + "num_input_tokens_seen": 350962032, + "step": 5247 + }, + { + "epoch": 0.595404255319149, + "loss": 1.5525517463684082, + "loss_ce": 0.004700094927102327, + "loss_iou": 0.69921875, + "loss_num": 0.030029296875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 350962032, + "step": 5247 + }, + { + "epoch": 0.5955177304964538, + "grad_norm": 16.306169509887695, + "learning_rate": 5e-05, + "loss": 1.0588, + "num_input_tokens_seen": 351029140, + "step": 5248 + }, + { + "epoch": 0.5955177304964538, + "loss": 1.2546013593673706, + "loss_ce": 0.010460719466209412, + "loss_iou": 0.53125, + "loss_num": 0.03662109375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 351029140, + "step": 5248 + }, + { + "epoch": 0.5956312056737588, + "grad_norm": 16.503568649291992, + "learning_rate": 5e-05, + "loss": 1.2393, + "num_input_tokens_seen": 351095724, + "step": 5249 + }, + { + "epoch": 0.5956312056737588, + "loss": 1.3688971996307373, + "loss_ce": 0.010010547935962677, + "loss_iou": 0.486328125, + "loss_num": 0.0771484375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 351095724, + "step": 5249 + }, + { + "epoch": 0.5957446808510638, + "grad_norm": 19.16020965576172, + "learning_rate": 5e-05, + "loss": 1.0667, + "num_input_tokens_seen": 351162844, + "step": 5250 + }, + { + "epoch": 0.5957446808510638, + "eval_seeclick_CIoU": 0.34001170098781586, + "eval_seeclick_GIoU": 0.31463269889354706, + "eval_seeclick_IoU": 0.4436201900243759, + "eval_seeclick_MAE_all": 0.17587018758058548, + "eval_seeclick_MAE_h": 0.0855863131582737, + "eval_seeclick_MAE_w": 0.1600918173789978, + "eval_seeclick_MAE_x_boxes": 0.27480240166187286, + "eval_seeclick_MAE_y_boxes": 0.139260433614254, + "eval_seeclick_NUM_probability": 0.9999546110630035, + "eval_seeclick_inside_bbox": 0.628125011920929, + "eval_seeclick_loss": 2.600639581680298, + "eval_seeclick_loss_ce": 0.014117536135017872, + "eval_seeclick_loss_iou": 0.87579345703125, + "eval_seeclick_loss_num": 0.17191314697265625, + "eval_seeclick_loss_xval": 2.610107421875, + "eval_seeclick_runtime": 68.0889, + "eval_seeclick_samples_per_second": 0.69, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 351162844, + "step": 5250 + }, + { + "epoch": 0.5957446808510638, + "eval_icons_CIoU": 0.5473143458366394, + "eval_icons_GIoU": 0.5348850190639496, + "eval_icons_IoU": 0.5767833292484283, + "eval_icons_MAE_all": 0.13403624668717384, + "eval_icons_MAE_h": 0.1082710362970829, + "eval_icons_MAE_w": 0.13304252177476883, + "eval_icons_MAE_x_boxes": 0.10269341245293617, + "eval_icons_MAE_y_boxes": 0.05193965137004852, + "eval_icons_NUM_probability": 0.9999643564224243, + "eval_icons_inside_bbox": 0.8385416567325592, + "eval_icons_loss": 2.2778375148773193, + "eval_icons_loss_ce": 2.2920062747289194e-05, + "eval_icons_loss_iou": 0.8271484375, + "eval_icons_loss_num": 0.126220703125, + "eval_icons_loss_xval": 2.2841796875, + "eval_icons_runtime": 70.0661, + "eval_icons_samples_per_second": 0.714, + "eval_icons_steps_per_second": 0.029, + "num_input_tokens_seen": 351162844, + "step": 5250 + }, + { + "epoch": 0.5957446808510638, + "eval_screenspot_CIoU": 0.2473964368303617, + "eval_screenspot_GIoU": 0.2208134854833285, + "eval_screenspot_IoU": 0.3540586829185486, + "eval_screenspot_MAE_all": 0.2183143893877665, + "eval_screenspot_MAE_h": 0.17171490689118704, + "eval_screenspot_MAE_w": 0.1639973446726799, + "eval_screenspot_MAE_x_boxes": 0.3437652637561162, + "eval_screenspot_MAE_y_boxes": 0.07808779800931613, + "eval_screenspot_NUM_probability": 0.9996145367622375, + "eval_screenspot_inside_bbox": 0.5570833285649618, + "eval_screenspot_loss": 3.1461424827575684, + "eval_screenspot_loss_ce": 0.01629393237332503, + "eval_screenspot_loss_iou": 1.0149739583333333, + "eval_screenspot_loss_num": 0.23872884114583334, + "eval_screenspot_loss_xval": 3.2229817708333335, + "eval_screenspot_runtime": 135.3965, + "eval_screenspot_samples_per_second": 0.657, + "eval_screenspot_steps_per_second": 0.022, + "num_input_tokens_seen": 351162844, + "step": 5250 + }, + { + "epoch": 0.5957446808510638, + "eval_compot_CIoU": 0.21928708255290985, + "eval_compot_GIoU": 0.16671060770750046, + "eval_compot_IoU": 0.32416799664497375, + "eval_compot_MAE_all": 0.2522463947534561, + "eval_compot_MAE_h": 0.14867711067199707, + "eval_compot_MAE_w": 0.25321274995803833, + "eval_compot_MAE_x_boxes": 0.2389518842101097, + "eval_compot_MAE_y_boxes": 0.15533104538917542, + "eval_compot_NUM_probability": 0.9998933374881744, + "eval_compot_inside_bbox": 0.4809027910232544, + "eval_compot_loss": 3.350013017654419, + "eval_compot_loss_ce": 0.0037825030740350485, + "eval_compot_loss_iou": 1.032958984375, + "eval_compot_loss_num": 0.2631988525390625, + "eval_compot_loss_xval": 3.3828125, + "eval_compot_runtime": 72.1687, + "eval_compot_samples_per_second": 0.693, + "eval_compot_steps_per_second": 0.028, + "num_input_tokens_seen": 351162844, + "step": 5250 + }, + { + "epoch": 0.5957446808510638, + "loss": 3.3186402320861816, + "loss_ce": 0.004187196027487516, + "loss_iou": 1.03125, + "loss_num": 0.2490234375, + "loss_xval": 3.3125, + "num_input_tokens_seen": 351162844, + "step": 5250 + }, + { + "epoch": 0.5958581560283688, + "grad_norm": 35.929481506347656, + "learning_rate": 5e-05, + "loss": 1.1384, + "num_input_tokens_seen": 351229984, + "step": 5251 + }, + { + "epoch": 0.5958581560283688, + "loss": 0.9098497629165649, + "loss_ce": 0.007261844351887703, + "loss_iou": 0.37109375, + "loss_num": 0.03173828125, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 351229984, + "step": 5251 + }, + { + "epoch": 0.5959716312056738, + "grad_norm": 28.88573455810547, + "learning_rate": 5e-05, + "loss": 1.3306, + "num_input_tokens_seen": 351295820, + "step": 5252 + }, + { + "epoch": 0.5959716312056738, + "loss": 1.4797756671905518, + "loss_ce": 0.004189766943454742, + "loss_iou": 0.5859375, + "loss_num": 0.059814453125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 351295820, + "step": 5252 + }, + { + "epoch": 0.5960851063829787, + "grad_norm": 19.99615478515625, + "learning_rate": 5e-05, + "loss": 1.1597, + "num_input_tokens_seen": 351363616, + "step": 5253 + }, + { + "epoch": 0.5960851063829787, + "loss": 1.0367344617843628, + "loss_ce": 0.01027565635740757, + "loss_iou": 0.392578125, + "loss_num": 0.048095703125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 351363616, + "step": 5253 + }, + { + "epoch": 0.5961985815602837, + "grad_norm": 75.92514038085938, + "learning_rate": 5e-05, + "loss": 1.2388, + "num_input_tokens_seen": 351430228, + "step": 5254 + }, + { + "epoch": 0.5961985815602837, + "loss": 1.1228127479553223, + "loss_ce": 0.0061135562136769295, + "loss_iou": 0.4765625, + "loss_num": 0.03271484375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 351430228, + "step": 5254 + }, + { + "epoch": 0.5963120567375887, + "grad_norm": 37.35024642944336, + "learning_rate": 5e-05, + "loss": 1.3215, + "num_input_tokens_seen": 351498548, + "step": 5255 + }, + { + "epoch": 0.5963120567375887, + "loss": 1.4022204875946045, + "loss_ce": 0.00720101036131382, + "loss_iou": 0.546875, + "loss_num": 0.059814453125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 351498548, + "step": 5255 + }, + { + "epoch": 0.5964255319148937, + "grad_norm": 27.86048698425293, + "learning_rate": 5e-05, + "loss": 1.3781, + "num_input_tokens_seen": 351565456, + "step": 5256 + }, + { + "epoch": 0.5964255319148937, + "loss": 1.3184590339660645, + "loss_ce": 0.008888755924999714, + "loss_iou": 0.5390625, + "loss_num": 0.046630859375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 351565456, + "step": 5256 + }, + { + "epoch": 0.5965390070921985, + "grad_norm": 24.46096420288086, + "learning_rate": 5e-05, + "loss": 1.224, + "num_input_tokens_seen": 351632784, + "step": 5257 + }, + { + "epoch": 0.5965390070921985, + "loss": 1.2709474563598633, + "loss_ce": 0.0048342421650886536, + "loss_iou": 0.50390625, + "loss_num": 0.051513671875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 351632784, + "step": 5257 + }, + { + "epoch": 0.5966524822695035, + "grad_norm": 36.251312255859375, + "learning_rate": 5e-05, + "loss": 1.1854, + "num_input_tokens_seen": 351699872, + "step": 5258 + }, + { + "epoch": 0.5966524822695035, + "loss": 1.2584996223449707, + "loss_ce": 0.004105192143470049, + "loss_iou": 0.51171875, + "loss_num": 0.046875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 351699872, + "step": 5258 + }, + { + "epoch": 0.5967659574468085, + "grad_norm": 100.38462829589844, + "learning_rate": 5e-05, + "loss": 1.2532, + "num_input_tokens_seen": 351765944, + "step": 5259 + }, + { + "epoch": 0.5967659574468085, + "loss": 1.185866117477417, + "loss_ce": 0.007155153900384903, + "loss_iou": 0.474609375, + "loss_num": 0.046142578125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 351765944, + "step": 5259 + }, + { + "epoch": 0.5968794326241135, + "grad_norm": 28.131113052368164, + "learning_rate": 5e-05, + "loss": 1.404, + "num_input_tokens_seen": 351833712, + "step": 5260 + }, + { + "epoch": 0.5968794326241135, + "loss": 1.3439247608184814, + "loss_ce": 0.0065223234705626965, + "loss_iou": 0.55859375, + "loss_num": 0.04345703125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 351833712, + "step": 5260 + }, + { + "epoch": 0.5969929078014184, + "grad_norm": 33.67125701904297, + "learning_rate": 5e-05, + "loss": 0.9049, + "num_input_tokens_seen": 351900368, + "step": 5261 + }, + { + "epoch": 0.5969929078014184, + "loss": 0.9041411280632019, + "loss_ce": 0.00790092907845974, + "loss_iou": 0.375, + "loss_num": 0.029296875, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 351900368, + "step": 5261 + }, + { + "epoch": 0.5971063829787234, + "grad_norm": 33.58571243286133, + "learning_rate": 5e-05, + "loss": 1.3986, + "num_input_tokens_seen": 351968212, + "step": 5262 + }, + { + "epoch": 0.5971063829787234, + "loss": 1.5006046295166016, + "loss_ce": 0.0054874238558113575, + "loss_iou": 0.5859375, + "loss_num": 0.064453125, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 351968212, + "step": 5262 + }, + { + "epoch": 0.5972198581560284, + "grad_norm": 45.49646759033203, + "learning_rate": 5e-05, + "loss": 1.1191, + "num_input_tokens_seen": 352034276, + "step": 5263 + }, + { + "epoch": 0.5972198581560284, + "loss": 0.8962724208831787, + "loss_ce": 0.007905742153525352, + "loss_iou": 0.375, + "loss_num": 0.02783203125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 352034276, + "step": 5263 + }, + { + "epoch": 0.5973333333333334, + "grad_norm": 23.707876205444336, + "learning_rate": 5e-05, + "loss": 1.0367, + "num_input_tokens_seen": 352099764, + "step": 5264 + }, + { + "epoch": 0.5973333333333334, + "loss": 1.144113540649414, + "loss_ce": 0.005411161575466394, + "loss_iou": 0.47265625, + "loss_num": 0.0390625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 352099764, + "step": 5264 + }, + { + "epoch": 0.5974468085106382, + "grad_norm": 27.718748092651367, + "learning_rate": 5e-05, + "loss": 1.1005, + "num_input_tokens_seen": 352166252, + "step": 5265 + }, + { + "epoch": 0.5974468085106382, + "loss": 1.2597264051437378, + "loss_ce": 0.007529111113399267, + "loss_iou": 0.494140625, + "loss_num": 0.052734375, + "loss_xval": 1.25, + "num_input_tokens_seen": 352166252, + "step": 5265 + }, + { + "epoch": 0.5975602836879432, + "grad_norm": 43.513763427734375, + "learning_rate": 5e-05, + "loss": 1.1054, + "num_input_tokens_seen": 352232568, + "step": 5266 + }, + { + "epoch": 0.5975602836879432, + "loss": 1.0908360481262207, + "loss_ce": 0.006363323889672756, + "loss_iou": 0.4375, + "loss_num": 0.041748046875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 352232568, + "step": 5266 + }, + { + "epoch": 0.5976737588652482, + "grad_norm": 30.467403411865234, + "learning_rate": 5e-05, + "loss": 1.2447, + "num_input_tokens_seen": 352300004, + "step": 5267 + }, + { + "epoch": 0.5976737588652482, + "loss": 1.2496085166931152, + "loss_ce": 0.006932821590453386, + "loss_iou": 0.515625, + "loss_num": 0.041748046875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 352300004, + "step": 5267 + }, + { + "epoch": 0.5977872340425532, + "grad_norm": 36.77969741821289, + "learning_rate": 5e-05, + "loss": 1.2412, + "num_input_tokens_seen": 352366448, + "step": 5268 + }, + { + "epoch": 0.5977872340425532, + "loss": 1.1999504566192627, + "loss_ce": 0.007079340051859617, + "loss_iou": 0.51171875, + "loss_num": 0.033935546875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 352366448, + "step": 5268 + }, + { + "epoch": 0.5979007092198582, + "grad_norm": 33.221534729003906, + "learning_rate": 5e-05, + "loss": 1.2103, + "num_input_tokens_seen": 352433528, + "step": 5269 + }, + { + "epoch": 0.5979007092198582, + "loss": 1.4596260786056519, + "loss_ce": 0.004547966178506613, + "loss_iou": 0.5703125, + "loss_num": 0.06298828125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 352433528, + "step": 5269 + }, + { + "epoch": 0.5980141843971631, + "grad_norm": 29.84182357788086, + "learning_rate": 5e-05, + "loss": 1.2348, + "num_input_tokens_seen": 352500864, + "step": 5270 + }, + { + "epoch": 0.5980141843971631, + "loss": 1.187389850616455, + "loss_ce": 0.009655498899519444, + "loss_iou": 0.51171875, + "loss_num": 0.03173828125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 352500864, + "step": 5270 + }, + { + "epoch": 0.5981276595744681, + "grad_norm": 33.77482604980469, + "learning_rate": 5e-05, + "loss": 1.2481, + "num_input_tokens_seen": 352568576, + "step": 5271 + }, + { + "epoch": 0.5981276595744681, + "loss": 1.152221918106079, + "loss_ce": 0.00476094102486968, + "loss_iou": 0.44921875, + "loss_num": 0.050048828125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 352568576, + "step": 5271 + }, + { + "epoch": 0.5982411347517731, + "grad_norm": 37.14781951904297, + "learning_rate": 5e-05, + "loss": 1.4145, + "num_input_tokens_seen": 352634696, + "step": 5272 + }, + { + "epoch": 0.5982411347517731, + "loss": 1.3107377290725708, + "loss_ce": 0.007515112869441509, + "loss_iou": 0.5234375, + "loss_num": 0.050537109375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 352634696, + "step": 5272 + }, + { + "epoch": 0.598354609929078, + "grad_norm": 58.651092529296875, + "learning_rate": 5e-05, + "loss": 1.0578, + "num_input_tokens_seen": 352701672, + "step": 5273 + }, + { + "epoch": 0.598354609929078, + "loss": 1.0731067657470703, + "loss_ce": 0.004259021952748299, + "loss_iou": 0.4375, + "loss_num": 0.0390625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 352701672, + "step": 5273 + }, + { + "epoch": 0.5984680851063829, + "grad_norm": 43.31918716430664, + "learning_rate": 5e-05, + "loss": 1.1688, + "num_input_tokens_seen": 352766696, + "step": 5274 + }, + { + "epoch": 0.5984680851063829, + "loss": 1.0684782266616821, + "loss_ce": 0.007320941425859928, + "loss_iou": 0.4453125, + "loss_num": 0.033935546875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 352766696, + "step": 5274 + }, + { + "epoch": 0.5985815602836879, + "grad_norm": 1121.0880126953125, + "learning_rate": 5e-05, + "loss": 1.1833, + "num_input_tokens_seen": 352833620, + "step": 5275 + }, + { + "epoch": 0.5985815602836879, + "loss": 1.2886927127838135, + "loss_ce": 0.007930928841233253, + "loss_iou": 0.5390625, + "loss_num": 0.041259765625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 352833620, + "step": 5275 + }, + { + "epoch": 0.5986950354609929, + "grad_norm": 18.16407012939453, + "learning_rate": 5e-05, + "loss": 1.2989, + "num_input_tokens_seen": 352900460, + "step": 5276 + }, + { + "epoch": 0.5986950354609929, + "loss": 1.3228998184204102, + "loss_ce": 0.00893491506576538, + "loss_iou": 0.46875, + "loss_num": 0.07568359375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 352900460, + "step": 5276 + }, + { + "epoch": 0.5988085106382979, + "grad_norm": 13.759963035583496, + "learning_rate": 5e-05, + "loss": 1.146, + "num_input_tokens_seen": 352966508, + "step": 5277 + }, + { + "epoch": 0.5988085106382979, + "loss": 1.4429657459259033, + "loss_ce": 0.008883642964065075, + "loss_iou": 0.578125, + "loss_num": 0.0546875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 352966508, + "step": 5277 + }, + { + "epoch": 0.5989219858156029, + "grad_norm": 31.149866104125977, + "learning_rate": 5e-05, + "loss": 1.2342, + "num_input_tokens_seen": 353033164, + "step": 5278 + }, + { + "epoch": 0.5989219858156029, + "loss": 1.366847276687622, + "loss_ce": 0.005763276480138302, + "loss_iou": 0.515625, + "loss_num": 0.0654296875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 353033164, + "step": 5278 + }, + { + "epoch": 0.5990354609929078, + "grad_norm": 31.3432559967041, + "learning_rate": 5e-05, + "loss": 1.3712, + "num_input_tokens_seen": 353100664, + "step": 5279 + }, + { + "epoch": 0.5990354609929078, + "loss": 1.3423655033111572, + "loss_ce": 0.010334208607673645, + "loss_iou": 0.55078125, + "loss_num": 0.04638671875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 353100664, + "step": 5279 + }, + { + "epoch": 0.5991489361702128, + "grad_norm": 26.86235237121582, + "learning_rate": 5e-05, + "loss": 1.2268, + "num_input_tokens_seen": 353167148, + "step": 5280 + }, + { + "epoch": 0.5991489361702128, + "loss": 1.2828997373580933, + "loss_ce": 0.00897400826215744, + "loss_iou": 0.53515625, + "loss_num": 0.041259765625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 353167148, + "step": 5280 + }, + { + "epoch": 0.5992624113475177, + "grad_norm": 16.237979888916016, + "learning_rate": 5e-05, + "loss": 1.0115, + "num_input_tokens_seen": 353233980, + "step": 5281 + }, + { + "epoch": 0.5992624113475177, + "loss": 1.0891928672790527, + "loss_ce": 0.003255473915487528, + "loss_iou": 0.4609375, + "loss_num": 0.03271484375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 353233980, + "step": 5281 + }, + { + "epoch": 0.5993758865248227, + "grad_norm": 45.48887252807617, + "learning_rate": 5e-05, + "loss": 1.0889, + "num_input_tokens_seen": 353301640, + "step": 5282 + }, + { + "epoch": 0.5993758865248227, + "loss": 1.1353929042816162, + "loss_ce": 0.01625225692987442, + "loss_iou": 0.45703125, + "loss_num": 0.041259765625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 353301640, + "step": 5282 + }, + { + "epoch": 0.5994893617021276, + "grad_norm": 29.40032386779785, + "learning_rate": 5e-05, + "loss": 1.1974, + "num_input_tokens_seen": 353367452, + "step": 5283 + }, + { + "epoch": 0.5994893617021276, + "loss": 1.1822144985198975, + "loss_ce": 0.007653953041881323, + "loss_iou": 0.4453125, + "loss_num": 0.05712890625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 353367452, + "step": 5283 + }, + { + "epoch": 0.5996028368794326, + "grad_norm": 36.00450897216797, + "learning_rate": 5e-05, + "loss": 1.2793, + "num_input_tokens_seen": 353434884, + "step": 5284 + }, + { + "epoch": 0.5996028368794326, + "loss": 1.2755239009857178, + "loss_ce": 0.007945764809846878, + "loss_iou": 0.53515625, + "loss_num": 0.039306640625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 353434884, + "step": 5284 + }, + { + "epoch": 0.5997163120567376, + "grad_norm": 35.95554733276367, + "learning_rate": 5e-05, + "loss": 1.1234, + "num_input_tokens_seen": 353502692, + "step": 5285 + }, + { + "epoch": 0.5997163120567376, + "loss": 1.1463277339935303, + "loss_ce": 0.010585646145045757, + "loss_iou": 0.462890625, + "loss_num": 0.041748046875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 353502692, + "step": 5285 + }, + { + "epoch": 0.5998297872340426, + "grad_norm": 27.019582748413086, + "learning_rate": 5e-05, + "loss": 1.3165, + "num_input_tokens_seen": 353569316, + "step": 5286 + }, + { + "epoch": 0.5998297872340426, + "loss": 1.2835288047790527, + "loss_ce": 0.007894076406955719, + "loss_iou": 0.51953125, + "loss_num": 0.046875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 353569316, + "step": 5286 + }, + { + "epoch": 0.5999432624113475, + "grad_norm": 14.655068397521973, + "learning_rate": 5e-05, + "loss": 1.0034, + "num_input_tokens_seen": 353636116, + "step": 5287 + }, + { + "epoch": 0.5999432624113475, + "loss": 0.8417153358459473, + "loss_ce": 0.006266167387366295, + "loss_iou": 0.37109375, + "loss_num": 0.018798828125, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 353636116, + "step": 5287 + }, + { + "epoch": 0.6000567375886525, + "grad_norm": 19.442800521850586, + "learning_rate": 5e-05, + "loss": 1.1059, + "num_input_tokens_seen": 353702780, + "step": 5288 + }, + { + "epoch": 0.6000567375886525, + "loss": 1.0811495780944824, + "loss_ce": 0.007907327264547348, + "loss_iou": 0.42578125, + "loss_num": 0.044677734375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 353702780, + "step": 5288 + }, + { + "epoch": 0.6001702127659575, + "grad_norm": 27.634977340698242, + "learning_rate": 5e-05, + "loss": 1.1833, + "num_input_tokens_seen": 353770012, + "step": 5289 + }, + { + "epoch": 0.6001702127659575, + "loss": 1.1432678699493408, + "loss_ce": 0.005572536028921604, + "loss_iou": 0.458984375, + "loss_num": 0.044189453125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 353770012, + "step": 5289 + }, + { + "epoch": 0.6002836879432624, + "grad_norm": 33.1256217956543, + "learning_rate": 5e-05, + "loss": 1.3477, + "num_input_tokens_seen": 353837544, + "step": 5290 + }, + { + "epoch": 0.6002836879432624, + "loss": 1.5322961807250977, + "loss_ce": 0.009835360571742058, + "loss_iou": 0.62109375, + "loss_num": 0.05615234375, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 353837544, + "step": 5290 + }, + { + "epoch": 0.6003971631205673, + "grad_norm": 34.282474517822266, + "learning_rate": 5e-05, + "loss": 1.2931, + "num_input_tokens_seen": 353904416, + "step": 5291 + }, + { + "epoch": 0.6003971631205673, + "loss": 1.3371033668518066, + "loss_ce": 0.013861102983355522, + "loss_iou": 0.4921875, + "loss_num": 0.0673828125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 353904416, + "step": 5291 + }, + { + "epoch": 0.6005106382978723, + "grad_norm": 18.903972625732422, + "learning_rate": 5e-05, + "loss": 1.1089, + "num_input_tokens_seen": 353971080, + "step": 5292 + }, + { + "epoch": 0.6005106382978723, + "loss": 1.1309856176376343, + "loss_ce": 0.006840097717940807, + "loss_iou": 0.435546875, + "loss_num": 0.050537109375, + "loss_xval": 1.125, + "num_input_tokens_seen": 353971080, + "step": 5292 + }, + { + "epoch": 0.6006241134751773, + "grad_norm": 55.30221939086914, + "learning_rate": 5e-05, + "loss": 1.0056, + "num_input_tokens_seen": 354037188, + "step": 5293 + }, + { + "epoch": 0.6006241134751773, + "loss": 0.9269206523895264, + "loss_ce": 0.005045612342655659, + "loss_iou": 0.396484375, + "loss_num": 0.0257568359375, + "loss_xval": 0.921875, + "num_input_tokens_seen": 354037188, + "step": 5293 + }, + { + "epoch": 0.6007375886524823, + "grad_norm": 32.66209411621094, + "learning_rate": 5e-05, + "loss": 1.2344, + "num_input_tokens_seen": 354103788, + "step": 5294 + }, + { + "epoch": 0.6007375886524823, + "loss": 1.1830064058303833, + "loss_ce": 0.01381697691977024, + "loss_iou": 0.44140625, + "loss_num": 0.05712890625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 354103788, + "step": 5294 + }, + { + "epoch": 0.6008510638297873, + "grad_norm": 158.47567749023438, + "learning_rate": 5e-05, + "loss": 1.3345, + "num_input_tokens_seen": 354170984, + "step": 5295 + }, + { + "epoch": 0.6008510638297873, + "loss": 1.2128682136535645, + "loss_ce": 0.00486033596098423, + "loss_iou": 0.515625, + "loss_num": 0.0361328125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 354170984, + "step": 5295 + }, + { + "epoch": 0.6009645390070922, + "grad_norm": 40.05718231201172, + "learning_rate": 5e-05, + "loss": 1.0466, + "num_input_tokens_seen": 354237976, + "step": 5296 + }, + { + "epoch": 0.6009645390070922, + "loss": 1.1471847295761108, + "loss_ce": 0.0036301291547715664, + "loss_iou": 0.48828125, + "loss_num": 0.033203125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 354237976, + "step": 5296 + }, + { + "epoch": 0.6010780141843972, + "grad_norm": 35.57700729370117, + "learning_rate": 5e-05, + "loss": 1.1223, + "num_input_tokens_seen": 354304152, + "step": 5297 + }, + { + "epoch": 0.6010780141843972, + "loss": 1.2559512853622437, + "loss_ce": 0.010345838032662868, + "loss_iou": 0.46875, + "loss_num": 0.0615234375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 354304152, + "step": 5297 + }, + { + "epoch": 0.6011914893617021, + "grad_norm": 33.052146911621094, + "learning_rate": 5e-05, + "loss": 1.4738, + "num_input_tokens_seen": 354371136, + "step": 5298 + }, + { + "epoch": 0.6011914893617021, + "loss": 1.597083568572998, + "loss_ce": 0.00919296219944954, + "loss_iou": 0.6171875, + "loss_num": 0.0712890625, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 354371136, + "step": 5298 + }, + { + "epoch": 0.6013049645390071, + "grad_norm": 21.271240234375, + "learning_rate": 5e-05, + "loss": 0.9593, + "num_input_tokens_seen": 354437872, + "step": 5299 + }, + { + "epoch": 0.6013049645390071, + "loss": 0.906607449054718, + "loss_ce": 0.004263692069798708, + "loss_iou": 0.392578125, + "loss_num": 0.0235595703125, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 354437872, + "step": 5299 + }, + { + "epoch": 0.601418439716312, + "grad_norm": 37.58198547363281, + "learning_rate": 5e-05, + "loss": 1.3146, + "num_input_tokens_seen": 354505256, + "step": 5300 + }, + { + "epoch": 0.601418439716312, + "loss": 1.2520067691802979, + "loss_ce": 0.004936519078910351, + "loss_iou": 0.5078125, + "loss_num": 0.045654296875, + "loss_xval": 1.25, + "num_input_tokens_seen": 354505256, + "step": 5300 + }, + { + "epoch": 0.601531914893617, + "grad_norm": 34.27412033081055, + "learning_rate": 5e-05, + "loss": 1.1694, + "num_input_tokens_seen": 354572248, + "step": 5301 + }, + { + "epoch": 0.601531914893617, + "loss": 1.2700824737548828, + "loss_ce": 0.004945714958012104, + "loss_iou": 0.5390625, + "loss_num": 0.037109375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 354572248, + "step": 5301 + }, + { + "epoch": 0.601645390070922, + "grad_norm": 25.926542282104492, + "learning_rate": 5e-05, + "loss": 1.0941, + "num_input_tokens_seen": 354638660, + "step": 5302 + }, + { + "epoch": 0.601645390070922, + "loss": 1.3118637800216675, + "loss_ce": 0.008641086518764496, + "loss_iou": 0.54296875, + "loss_num": 0.043701171875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 354638660, + "step": 5302 + }, + { + "epoch": 0.601758865248227, + "grad_norm": 18.632688522338867, + "learning_rate": 5e-05, + "loss": 1.0353, + "num_input_tokens_seen": 354705524, + "step": 5303 + }, + { + "epoch": 0.601758865248227, + "loss": 1.0528795719146729, + "loss_ce": 0.005516284145414829, + "loss_iou": 0.38671875, + "loss_num": 0.054443359375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 354705524, + "step": 5303 + }, + { + "epoch": 0.601872340425532, + "grad_norm": 16.474863052368164, + "learning_rate": 5e-05, + "loss": 1.0728, + "num_input_tokens_seen": 354772484, + "step": 5304 + }, + { + "epoch": 0.601872340425532, + "loss": 1.0446057319641113, + "loss_ce": 0.008472885005176067, + "loss_iou": 0.423828125, + "loss_num": 0.037841796875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 354772484, + "step": 5304 + }, + { + "epoch": 0.6019858156028369, + "grad_norm": 18.587099075317383, + "learning_rate": 5e-05, + "loss": 0.9974, + "num_input_tokens_seen": 354839100, + "step": 5305 + }, + { + "epoch": 0.6019858156028369, + "loss": 0.8926438093185425, + "loss_ce": 0.00568096898496151, + "loss_iou": 0.361328125, + "loss_num": 0.032470703125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 354839100, + "step": 5305 + }, + { + "epoch": 0.6020992907801418, + "grad_norm": 20.100208282470703, + "learning_rate": 5e-05, + "loss": 1.1377, + "num_input_tokens_seen": 354906256, + "step": 5306 + }, + { + "epoch": 0.6020992907801418, + "loss": 1.1403374671936035, + "loss_ce": 0.003618831979110837, + "loss_iou": 0.482421875, + "loss_num": 0.0341796875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 354906256, + "step": 5306 + }, + { + "epoch": 0.6022127659574468, + "grad_norm": 19.359230041503906, + "learning_rate": 5e-05, + "loss": 0.9685, + "num_input_tokens_seen": 354972648, + "step": 5307 + }, + { + "epoch": 0.6022127659574468, + "loss": 0.9176068305969238, + "loss_ce": 0.007145420182496309, + "loss_iou": 0.392578125, + "loss_num": 0.0252685546875, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 354972648, + "step": 5307 + }, + { + "epoch": 0.6023262411347517, + "grad_norm": 41.105960845947266, + "learning_rate": 5e-05, + "loss": 1.19, + "num_input_tokens_seen": 355039668, + "step": 5308 + }, + { + "epoch": 0.6023262411347517, + "loss": 1.3569406270980835, + "loss_ce": 0.005378146655857563, + "loss_iou": 0.52734375, + "loss_num": 0.05908203125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 355039668, + "step": 5308 + }, + { + "epoch": 0.6024397163120567, + "grad_norm": 34.1581916809082, + "learning_rate": 5e-05, + "loss": 1.1112, + "num_input_tokens_seen": 355107464, + "step": 5309 + }, + { + "epoch": 0.6024397163120567, + "loss": 1.2216167449951172, + "loss_ce": 0.0048197973519563675, + "loss_iou": 0.51953125, + "loss_num": 0.035888671875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 355107464, + "step": 5309 + }, + { + "epoch": 0.6025531914893617, + "grad_norm": 43.39498519897461, + "learning_rate": 5e-05, + "loss": 1.1227, + "num_input_tokens_seen": 355174580, + "step": 5310 + }, + { + "epoch": 0.6025531914893617, + "loss": 1.2099475860595703, + "loss_ce": 0.0048694913275539875, + "loss_iou": 0.50390625, + "loss_num": 0.0390625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 355174580, + "step": 5310 + }, + { + "epoch": 0.6026666666666667, + "grad_norm": 26.89240837097168, + "learning_rate": 5e-05, + "loss": 1.4714, + "num_input_tokens_seen": 355241336, + "step": 5311 + }, + { + "epoch": 0.6026666666666667, + "loss": 1.2352180480957031, + "loss_ce": 0.004261057823896408, + "loss_iou": 0.51953125, + "loss_num": 0.038330078125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 355241336, + "step": 5311 + }, + { + "epoch": 0.6027801418439717, + "grad_norm": 20.41364097595215, + "learning_rate": 5e-05, + "loss": 1.0581, + "num_input_tokens_seen": 355307716, + "step": 5312 + }, + { + "epoch": 0.6027801418439717, + "loss": 1.1996240615844727, + "loss_ce": 0.005776361562311649, + "loss_iou": 0.515625, + "loss_num": 0.032470703125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 355307716, + "step": 5312 + }, + { + "epoch": 0.6028936170212766, + "grad_norm": 35.18798065185547, + "learning_rate": 5e-05, + "loss": 1.3889, + "num_input_tokens_seen": 355374700, + "step": 5313 + }, + { + "epoch": 0.6028936170212766, + "loss": 1.53971266746521, + "loss_ce": 0.0035798943135887384, + "loss_iou": 0.56640625, + "loss_num": 0.0810546875, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 355374700, + "step": 5313 + }, + { + "epoch": 0.6030070921985815, + "grad_norm": 25.360612869262695, + "learning_rate": 5e-05, + "loss": 1.3611, + "num_input_tokens_seen": 355441960, + "step": 5314 + }, + { + "epoch": 0.6030070921985815, + "loss": 1.3563860654830933, + "loss_ce": 0.010042046196758747, + "loss_iou": 0.5703125, + "loss_num": 0.040771484375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 355441960, + "step": 5314 + }, + { + "epoch": 0.6031205673758865, + "grad_norm": 15.956428527832031, + "learning_rate": 5e-05, + "loss": 1.2021, + "num_input_tokens_seen": 355508116, + "step": 5315 + }, + { + "epoch": 0.6031205673758865, + "loss": 1.02347993850708, + "loss_ce": 0.003704588394612074, + "loss_iou": 0.40625, + "loss_num": 0.04150390625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 355508116, + "step": 5315 + }, + { + "epoch": 0.6032340425531915, + "grad_norm": 47.20480728149414, + "learning_rate": 5e-05, + "loss": 1.2647, + "num_input_tokens_seen": 355574564, + "step": 5316 + }, + { + "epoch": 0.6032340425531915, + "loss": 1.3253686428070068, + "loss_ce": 0.007985902950167656, + "loss_iou": 0.4765625, + "loss_num": 0.07275390625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 355574564, + "step": 5316 + }, + { + "epoch": 0.6033475177304964, + "grad_norm": 21.896406173706055, + "learning_rate": 5e-05, + "loss": 1.0849, + "num_input_tokens_seen": 355640920, + "step": 5317 + }, + { + "epoch": 0.6033475177304964, + "loss": 1.1453698873519897, + "loss_ce": 0.008162843063473701, + "loss_iou": 0.439453125, + "loss_num": 0.052001953125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 355640920, + "step": 5317 + }, + { + "epoch": 0.6034609929078014, + "grad_norm": 24.70574951171875, + "learning_rate": 5e-05, + "loss": 1.2841, + "num_input_tokens_seen": 355707008, + "step": 5318 + }, + { + "epoch": 0.6034609929078014, + "loss": 1.384772777557373, + "loss_ce": 0.007331343833357096, + "loss_iou": 0.5546875, + "loss_num": 0.0537109375, + "loss_xval": 1.375, + "num_input_tokens_seen": 355707008, + "step": 5318 + }, + { + "epoch": 0.6035744680851064, + "grad_norm": 38.65155792236328, + "learning_rate": 5e-05, + "loss": 1.0802, + "num_input_tokens_seen": 355773212, + "step": 5319 + }, + { + "epoch": 0.6035744680851064, + "loss": 1.1664413213729858, + "loss_ce": 0.009458906948566437, + "loss_iou": 0.427734375, + "loss_num": 0.06005859375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 355773212, + "step": 5319 + }, + { + "epoch": 0.6036879432624114, + "grad_norm": 23.15352439880371, + "learning_rate": 5e-05, + "loss": 1.0297, + "num_input_tokens_seen": 355840212, + "step": 5320 + }, + { + "epoch": 0.6036879432624114, + "loss": 1.0515838861465454, + "loss_ce": 0.0047088684514164925, + "loss_iou": 0.439453125, + "loss_num": 0.03369140625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 355840212, + "step": 5320 + }, + { + "epoch": 0.6038014184397164, + "grad_norm": 41.99504470825195, + "learning_rate": 5e-05, + "loss": 1.3006, + "num_input_tokens_seen": 355907148, + "step": 5321 + }, + { + "epoch": 0.6038014184397164, + "loss": 1.4756665229797363, + "loss_ce": 0.008869649842381477, + "loss_iou": 0.57421875, + "loss_num": 0.064453125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 355907148, + "step": 5321 + }, + { + "epoch": 0.6039148936170213, + "grad_norm": 26.078554153442383, + "learning_rate": 5e-05, + "loss": 1.5779, + "num_input_tokens_seen": 355973884, + "step": 5322 + }, + { + "epoch": 0.6039148936170213, + "loss": 1.5865802764892578, + "loss_ce": 0.006502088159322739, + "loss_iou": 0.65234375, + "loss_num": 0.055419921875, + "loss_xval": 1.578125, + "num_input_tokens_seen": 355973884, + "step": 5322 + }, + { + "epoch": 0.6040283687943262, + "grad_norm": 73.82965850830078, + "learning_rate": 5e-05, + "loss": 1.0008, + "num_input_tokens_seen": 356040184, + "step": 5323 + }, + { + "epoch": 0.6040283687943262, + "loss": 0.9906253814697266, + "loss_ce": 0.004785480909049511, + "loss_iou": 0.416015625, + "loss_num": 0.031005859375, + "loss_xval": 0.984375, + "num_input_tokens_seen": 356040184, + "step": 5323 + }, + { + "epoch": 0.6041418439716312, + "grad_norm": 18.08827781677246, + "learning_rate": 5e-05, + "loss": 1.2133, + "num_input_tokens_seen": 356107136, + "step": 5324 + }, + { + "epoch": 0.6041418439716312, + "loss": 1.242748737335205, + "loss_ce": 0.007397117093205452, + "loss_iou": 0.46875, + "loss_num": 0.059326171875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 356107136, + "step": 5324 + }, + { + "epoch": 0.6042553191489362, + "grad_norm": 24.22896385192871, + "learning_rate": 5e-05, + "loss": 1.1697, + "num_input_tokens_seen": 356175344, + "step": 5325 + }, + { + "epoch": 0.6042553191489362, + "loss": 1.2955493927001953, + "loss_ce": 0.009416592307388783, + "loss_iou": 0.51171875, + "loss_num": 0.0517578125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 356175344, + "step": 5325 + }, + { + "epoch": 0.6043687943262411, + "grad_norm": 27.68448257446289, + "learning_rate": 5e-05, + "loss": 1.0954, + "num_input_tokens_seen": 356242696, + "step": 5326 + }, + { + "epoch": 0.6043687943262411, + "loss": 1.0273792743682861, + "loss_ce": 0.003941853530704975, + "loss_iou": 0.40234375, + "loss_num": 0.0439453125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 356242696, + "step": 5326 + }, + { + "epoch": 0.6044822695035461, + "grad_norm": 35.16328811645508, + "learning_rate": 5e-05, + "loss": 1.2757, + "num_input_tokens_seen": 356310156, + "step": 5327 + }, + { + "epoch": 0.6044822695035461, + "loss": 1.2385025024414062, + "loss_ce": 0.005592379719018936, + "loss_iou": 0.490234375, + "loss_num": 0.05029296875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 356310156, + "step": 5327 + }, + { + "epoch": 0.6045957446808511, + "grad_norm": 24.759117126464844, + "learning_rate": 5e-05, + "loss": 1.1876, + "num_input_tokens_seen": 356376328, + "step": 5328 + }, + { + "epoch": 0.6045957446808511, + "loss": 1.2687609195709229, + "loss_ce": 0.00937672145664692, + "loss_iou": 0.50390625, + "loss_num": 0.050537109375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 356376328, + "step": 5328 + }, + { + "epoch": 0.6047092198581561, + "grad_norm": 27.77174186706543, + "learning_rate": 5e-05, + "loss": 1.2526, + "num_input_tokens_seen": 356443720, + "step": 5329 + }, + { + "epoch": 0.6047092198581561, + "loss": 1.1837639808654785, + "loss_ce": 0.006517939269542694, + "loss_iou": 0.5, + "loss_num": 0.03515625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 356443720, + "step": 5329 + }, + { + "epoch": 0.604822695035461, + "grad_norm": 40.186100006103516, + "learning_rate": 5e-05, + "loss": 1.1667, + "num_input_tokens_seen": 356510548, + "step": 5330 + }, + { + "epoch": 0.604822695035461, + "loss": 1.056240200996399, + "loss_ce": 0.003994042985141277, + "loss_iou": 0.462890625, + "loss_num": 0.0250244140625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 356510548, + "step": 5330 + }, + { + "epoch": 0.6049361702127659, + "grad_norm": 33.85940933227539, + "learning_rate": 5e-05, + "loss": 1.2841, + "num_input_tokens_seen": 356576568, + "step": 5331 + }, + { + "epoch": 0.6049361702127659, + "loss": 1.4017221927642822, + "loss_ce": 0.006214373745024204, + "loss_iou": 0.609375, + "loss_num": 0.034912109375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 356576568, + "step": 5331 + }, + { + "epoch": 0.6050496453900709, + "grad_norm": 35.390357971191406, + "learning_rate": 5e-05, + "loss": 1.1633, + "num_input_tokens_seen": 356643512, + "step": 5332 + }, + { + "epoch": 0.6050496453900709, + "loss": 1.2358256578445435, + "loss_ce": 0.006333560682833195, + "loss_iou": 0.48828125, + "loss_num": 0.05029296875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 356643512, + "step": 5332 + }, + { + "epoch": 0.6051631205673759, + "grad_norm": 31.970762252807617, + "learning_rate": 5e-05, + "loss": 1.4705, + "num_input_tokens_seen": 356711488, + "step": 5333 + }, + { + "epoch": 0.6051631205673759, + "loss": 1.474800705909729, + "loss_ce": 0.007515551522374153, + "loss_iou": 0.59765625, + "loss_num": 0.0546875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 356711488, + "step": 5333 + }, + { + "epoch": 0.6052765957446808, + "grad_norm": 70.39784240722656, + "learning_rate": 5e-05, + "loss": 1.0674, + "num_input_tokens_seen": 356778956, + "step": 5334 + }, + { + "epoch": 0.6052765957446808, + "loss": 1.1149725914001465, + "loss_ce": 0.004132723901420832, + "loss_iou": 0.4453125, + "loss_num": 0.04443359375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 356778956, + "step": 5334 + }, + { + "epoch": 0.6053900709219858, + "grad_norm": 209.70144653320312, + "learning_rate": 5e-05, + "loss": 1.0515, + "num_input_tokens_seen": 356844812, + "step": 5335 + }, + { + "epoch": 0.6053900709219858, + "loss": 1.1086081266403198, + "loss_ce": 0.007045636419206858, + "loss_iou": 0.44140625, + "loss_num": 0.043701171875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 356844812, + "step": 5335 + }, + { + "epoch": 0.6055035460992908, + "grad_norm": 31.29113006591797, + "learning_rate": 5e-05, + "loss": 1.2271, + "num_input_tokens_seen": 356911060, + "step": 5336 + }, + { + "epoch": 0.6055035460992908, + "loss": 1.2715966701507568, + "loss_ce": 0.005971712060272694, + "loss_iou": 0.515625, + "loss_num": 0.04638671875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 356911060, + "step": 5336 + }, + { + "epoch": 0.6056170212765958, + "grad_norm": 34.8885498046875, + "learning_rate": 5e-05, + "loss": 1.2441, + "num_input_tokens_seen": 356976856, + "step": 5337 + }, + { + "epoch": 0.6056170212765958, + "loss": 1.1139414310455322, + "loss_ce": 0.010914109647274017, + "loss_iou": 0.43359375, + "loss_num": 0.046875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 356976856, + "step": 5337 + }, + { + "epoch": 0.6057304964539008, + "grad_norm": 34.781681060791016, + "learning_rate": 5e-05, + "loss": 1.1093, + "num_input_tokens_seen": 357044436, + "step": 5338 + }, + { + "epoch": 0.6057304964539008, + "loss": 0.9503651857376099, + "loss_ce": 0.004564452916383743, + "loss_iou": 0.38671875, + "loss_num": 0.033935546875, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 357044436, + "step": 5338 + }, + { + "epoch": 0.6058439716312056, + "grad_norm": 29.983978271484375, + "learning_rate": 5e-05, + "loss": 1.2757, + "num_input_tokens_seen": 357110976, + "step": 5339 + }, + { + "epoch": 0.6058439716312056, + "loss": 1.3377106189727783, + "loss_ce": 0.005679399240761995, + "loss_iou": 0.5859375, + "loss_num": 0.031982421875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 357110976, + "step": 5339 + }, + { + "epoch": 0.6059574468085106, + "grad_norm": 16.10724639892578, + "learning_rate": 5e-05, + "loss": 1.2816, + "num_input_tokens_seen": 357177376, + "step": 5340 + }, + { + "epoch": 0.6059574468085106, + "loss": 1.2389461994171143, + "loss_ce": 0.005059501621872187, + "loss_iou": 0.51953125, + "loss_num": 0.03955078125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 357177376, + "step": 5340 + }, + { + "epoch": 0.6060709219858156, + "grad_norm": 14.842789649963379, + "learning_rate": 5e-05, + "loss": 0.9934, + "num_input_tokens_seen": 357242876, + "step": 5341 + }, + { + "epoch": 0.6060709219858156, + "loss": 0.9635041356086731, + "loss_ce": 0.008181851357221603, + "loss_iou": 0.39453125, + "loss_num": 0.033203125, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 357242876, + "step": 5341 + }, + { + "epoch": 0.6061843971631206, + "grad_norm": 20.93911361694336, + "learning_rate": 5e-05, + "loss": 1.2218, + "num_input_tokens_seen": 357310220, + "step": 5342 + }, + { + "epoch": 0.6061843971631206, + "loss": 1.2316617965698242, + "loss_ce": 0.008029044605791569, + "loss_iou": 0.431640625, + "loss_num": 0.07177734375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 357310220, + "step": 5342 + }, + { + "epoch": 0.6062978723404255, + "grad_norm": 38.52899932861328, + "learning_rate": 5e-05, + "loss": 1.3879, + "num_input_tokens_seen": 357377368, + "step": 5343 + }, + { + "epoch": 0.6062978723404255, + "loss": 1.291042685508728, + "loss_ce": 0.005398109555244446, + "loss_iou": 0.52734375, + "loss_num": 0.0458984375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 357377368, + "step": 5343 + }, + { + "epoch": 0.6064113475177305, + "grad_norm": 44.78089904785156, + "learning_rate": 5e-05, + "loss": 1.4534, + "num_input_tokens_seen": 357442036, + "step": 5344 + }, + { + "epoch": 0.6064113475177305, + "loss": 1.6417807340621948, + "loss_ce": 0.0035970609169453382, + "loss_iou": 0.63671875, + "loss_num": 0.072265625, + "loss_xval": 1.640625, + "num_input_tokens_seen": 357442036, + "step": 5344 + }, + { + "epoch": 0.6065248226950355, + "grad_norm": 35.19029235839844, + "learning_rate": 5e-05, + "loss": 1.186, + "num_input_tokens_seen": 357509432, + "step": 5345 + }, + { + "epoch": 0.6065248226950355, + "loss": 1.1649320125579834, + "loss_ce": 0.00868201907724142, + "loss_iou": 0.470703125, + "loss_num": 0.04296875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 357509432, + "step": 5345 + }, + { + "epoch": 0.6066382978723405, + "grad_norm": 36.63424301147461, + "learning_rate": 5e-05, + "loss": 1.2118, + "num_input_tokens_seen": 357576360, + "step": 5346 + }, + { + "epoch": 0.6066382978723405, + "loss": 1.3225135803222656, + "loss_ce": 0.006595676299184561, + "loss_iou": 0.51953125, + "loss_num": 0.05517578125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 357576360, + "step": 5346 + }, + { + "epoch": 0.6067517730496453, + "grad_norm": 34.87723922729492, + "learning_rate": 5e-05, + "loss": 1.1319, + "num_input_tokens_seen": 357643448, + "step": 5347 + }, + { + "epoch": 0.6067517730496453, + "loss": 0.996687650680542, + "loss_ce": 0.008406479842960835, + "loss_iou": 0.4296875, + "loss_num": 0.02587890625, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 357643448, + "step": 5347 + }, + { + "epoch": 0.6068652482269503, + "grad_norm": 33.83712387084961, + "learning_rate": 5e-05, + "loss": 1.3183, + "num_input_tokens_seen": 357709556, + "step": 5348 + }, + { + "epoch": 0.6068652482269503, + "loss": 1.236038327217102, + "loss_ce": 0.002639820333570242, + "loss_iou": 0.5078125, + "loss_num": 0.042724609375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 357709556, + "step": 5348 + }, + { + "epoch": 0.6069787234042553, + "grad_norm": 39.591880798339844, + "learning_rate": 5e-05, + "loss": 1.0765, + "num_input_tokens_seen": 357776500, + "step": 5349 + }, + { + "epoch": 0.6069787234042553, + "loss": 1.0437345504760742, + "loss_ce": 0.006381037179380655, + "loss_iou": 0.4296875, + "loss_num": 0.035888671875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 357776500, + "step": 5349 + }, + { + "epoch": 0.6070921985815603, + "grad_norm": 33.623443603515625, + "learning_rate": 5e-05, + "loss": 1.2318, + "num_input_tokens_seen": 357842752, + "step": 5350 + }, + { + "epoch": 0.6070921985815603, + "loss": 1.1367995738983154, + "loss_ce": 0.0059402440674602985, + "loss_iou": 0.478515625, + "loss_num": 0.03466796875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 357842752, + "step": 5350 + }, + { + "epoch": 0.6072056737588652, + "grad_norm": 21.392404556274414, + "learning_rate": 5e-05, + "loss": 1.2685, + "num_input_tokens_seen": 357909436, + "step": 5351 + }, + { + "epoch": 0.6072056737588652, + "loss": 1.4403226375579834, + "loss_ce": 0.005508149042725563, + "loss_iou": 0.578125, + "loss_num": 0.056640625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 357909436, + "step": 5351 + }, + { + "epoch": 0.6073191489361702, + "grad_norm": 20.608097076416016, + "learning_rate": 5e-05, + "loss": 1.115, + "num_input_tokens_seen": 357976508, + "step": 5352 + }, + { + "epoch": 0.6073191489361702, + "loss": 1.0366222858428955, + "loss_ce": 0.006837038788944483, + "loss_iou": 0.41015625, + "loss_num": 0.0419921875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 357976508, + "step": 5352 + }, + { + "epoch": 0.6074326241134752, + "grad_norm": 19.62257957458496, + "learning_rate": 5e-05, + "loss": 1.0959, + "num_input_tokens_seen": 358043708, + "step": 5353 + }, + { + "epoch": 0.6074326241134752, + "loss": 1.0080797672271729, + "loss_ce": 0.004661690443754196, + "loss_iou": 0.40625, + "loss_num": 0.0380859375, + "loss_xval": 1.0, + "num_input_tokens_seen": 358043708, + "step": 5353 + }, + { + "epoch": 0.6075460992907802, + "grad_norm": 26.422481536865234, + "learning_rate": 5e-05, + "loss": 1.3055, + "num_input_tokens_seen": 358111172, + "step": 5354 + }, + { + "epoch": 0.6075460992907802, + "loss": 1.3276355266571045, + "loss_ce": 0.004393400624394417, + "loss_iou": 0.5234375, + "loss_num": 0.0556640625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 358111172, + "step": 5354 + }, + { + "epoch": 0.6076595744680852, + "grad_norm": 48.646331787109375, + "learning_rate": 5e-05, + "loss": 1.0854, + "num_input_tokens_seen": 358177520, + "step": 5355 + }, + { + "epoch": 0.6076595744680852, + "loss": 1.043600082397461, + "loss_ce": 0.010396949015557766, + "loss_iou": 0.3984375, + "loss_num": 0.047607421875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 358177520, + "step": 5355 + }, + { + "epoch": 0.60777304964539, + "grad_norm": 23.368122100830078, + "learning_rate": 5e-05, + "loss": 1.0886, + "num_input_tokens_seen": 358245088, + "step": 5356 + }, + { + "epoch": 0.60777304964539, + "loss": 1.1051013469696045, + "loss_ce": 0.002562308218330145, + "loss_iou": 0.474609375, + "loss_num": 0.0308837890625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 358245088, + "step": 5356 + }, + { + "epoch": 0.607886524822695, + "grad_norm": 44.063907623291016, + "learning_rate": 5e-05, + "loss": 1.3122, + "num_input_tokens_seen": 358312348, + "step": 5357 + }, + { + "epoch": 0.607886524822695, + "loss": 1.4009888172149658, + "loss_ce": 0.010363858193159103, + "loss_iou": 0.578125, + "loss_num": 0.0478515625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 358312348, + "step": 5357 + }, + { + "epoch": 0.608, + "grad_norm": 37.86326599121094, + "learning_rate": 5e-05, + "loss": 1.4478, + "num_input_tokens_seen": 358379072, + "step": 5358 + }, + { + "epoch": 0.608, + "loss": 1.4675610065460205, + "loss_ce": 0.008576718159019947, + "loss_iou": 0.56640625, + "loss_num": 0.06494140625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 358379072, + "step": 5358 + }, + { + "epoch": 0.608113475177305, + "grad_norm": 77.8921890258789, + "learning_rate": 5e-05, + "loss": 1.1101, + "num_input_tokens_seen": 358445636, + "step": 5359 + }, + { + "epoch": 0.608113475177305, + "loss": 1.08518648147583, + "loss_ce": 0.008526408113539219, + "loss_iou": 0.443359375, + "loss_num": 0.03759765625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 358445636, + "step": 5359 + }, + { + "epoch": 0.6082269503546099, + "grad_norm": 28.63134765625, + "learning_rate": 5e-05, + "loss": 1.2429, + "num_input_tokens_seen": 358512972, + "step": 5360 + }, + { + "epoch": 0.6082269503546099, + "loss": 1.134238839149475, + "loss_ce": 0.00557670695707202, + "loss_iou": 0.49609375, + "loss_num": 0.0274658203125, + "loss_xval": 1.125, + "num_input_tokens_seen": 358512972, + "step": 5360 + }, + { + "epoch": 0.6083404255319149, + "grad_norm": 151.4228057861328, + "learning_rate": 5e-05, + "loss": 1.0812, + "num_input_tokens_seen": 358578964, + "step": 5361 + }, + { + "epoch": 0.6083404255319149, + "loss": 1.1782770156860352, + "loss_ce": 0.008843354880809784, + "loss_iou": 0.45703125, + "loss_num": 0.051025390625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 358578964, + "step": 5361 + }, + { + "epoch": 0.6084539007092199, + "grad_norm": 26.867191314697266, + "learning_rate": 5e-05, + "loss": 1.2148, + "num_input_tokens_seen": 358645832, + "step": 5362 + }, + { + "epoch": 0.6084539007092199, + "loss": 1.1863638162612915, + "loss_ce": 0.0052114734426140785, + "loss_iou": 0.5, + "loss_num": 0.036865234375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 358645832, + "step": 5362 + }, + { + "epoch": 0.6085673758865249, + "grad_norm": 35.53926086425781, + "learning_rate": 5e-05, + "loss": 1.0488, + "num_input_tokens_seen": 358713048, + "step": 5363 + }, + { + "epoch": 0.6085673758865249, + "loss": 1.1403664350509644, + "loss_ce": 0.006089032161980867, + "loss_iou": 0.455078125, + "loss_num": 0.045166015625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 358713048, + "step": 5363 + }, + { + "epoch": 0.6086808510638297, + "grad_norm": 37.489437103271484, + "learning_rate": 5e-05, + "loss": 1.3119, + "num_input_tokens_seen": 358779544, + "step": 5364 + }, + { + "epoch": 0.6086808510638297, + "loss": 1.2945284843444824, + "loss_ce": 0.010348813608288765, + "loss_iou": 0.51171875, + "loss_num": 0.05224609375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 358779544, + "step": 5364 + }, + { + "epoch": 0.6087943262411347, + "grad_norm": 37.1923942565918, + "learning_rate": 5e-05, + "loss": 1.4566, + "num_input_tokens_seen": 358846580, + "step": 5365 + }, + { + "epoch": 0.6087943262411347, + "loss": 1.251549482345581, + "loss_ce": 0.004967379383742809, + "loss_iou": 0.55078125, + "loss_num": 0.028564453125, + "loss_xval": 1.25, + "num_input_tokens_seen": 358846580, + "step": 5365 + }, + { + "epoch": 0.6089078014184397, + "grad_norm": 27.42197608947754, + "learning_rate": 5e-05, + "loss": 1.2612, + "num_input_tokens_seen": 358913084, + "step": 5366 + }, + { + "epoch": 0.6089078014184397, + "loss": 1.5113660097122192, + "loss_ce": 0.005994923412799835, + "loss_iou": 0.59375, + "loss_num": 0.06396484375, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 358913084, + "step": 5366 + }, + { + "epoch": 0.6090212765957447, + "grad_norm": 44.02111053466797, + "learning_rate": 5e-05, + "loss": 1.0815, + "num_input_tokens_seen": 358980784, + "step": 5367 + }, + { + "epoch": 0.6090212765957447, + "loss": 1.0058575868606567, + "loss_ce": 0.006834214087575674, + "loss_iou": 0.400390625, + "loss_num": 0.039794921875, + "loss_xval": 1.0, + "num_input_tokens_seen": 358980784, + "step": 5367 + }, + { + "epoch": 0.6091347517730497, + "grad_norm": 58.3894157409668, + "learning_rate": 5e-05, + "loss": 1.3322, + "num_input_tokens_seen": 359047476, + "step": 5368 + }, + { + "epoch": 0.6091347517730497, + "loss": 1.528833031654358, + "loss_ce": 0.00979006290435791, + "loss_iou": 0.59375, + "loss_num": 0.06640625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 359047476, + "step": 5368 + }, + { + "epoch": 0.6092482269503546, + "grad_norm": 27.387693405151367, + "learning_rate": 5e-05, + "loss": 1.2017, + "num_input_tokens_seen": 359114824, + "step": 5369 + }, + { + "epoch": 0.6092482269503546, + "loss": 1.287834644317627, + "loss_ce": 0.010979113169014454, + "loss_iou": 0.490234375, + "loss_num": 0.05908203125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 359114824, + "step": 5369 + }, + { + "epoch": 0.6093617021276596, + "grad_norm": 46.71889877319336, + "learning_rate": 5e-05, + "loss": 1.3049, + "num_input_tokens_seen": 359181720, + "step": 5370 + }, + { + "epoch": 0.6093617021276596, + "loss": 1.2499186992645264, + "loss_ce": 0.009684349410235882, + "loss_iou": 0.47265625, + "loss_num": 0.059326171875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 359181720, + "step": 5370 + }, + { + "epoch": 0.6094751773049646, + "grad_norm": 28.849103927612305, + "learning_rate": 5e-05, + "loss": 1.189, + "num_input_tokens_seen": 359248000, + "step": 5371 + }, + { + "epoch": 0.6094751773049646, + "loss": 1.3614896535873413, + "loss_ce": 0.004556032828986645, + "loss_iou": 0.55859375, + "loss_num": 0.04736328125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 359248000, + "step": 5371 + }, + { + "epoch": 0.6095886524822695, + "grad_norm": 23.37443733215332, + "learning_rate": 5e-05, + "loss": 1.1246, + "num_input_tokens_seen": 359314004, + "step": 5372 + }, + { + "epoch": 0.6095886524822695, + "loss": 1.208357810974121, + "loss_ce": 0.009627390652894974, + "loss_iou": 0.44921875, + "loss_num": 0.06005859375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 359314004, + "step": 5372 + }, + { + "epoch": 0.6097021276595744, + "grad_norm": 19.993440628051758, + "learning_rate": 5e-05, + "loss": 1.1693, + "num_input_tokens_seen": 359381300, + "step": 5373 + }, + { + "epoch": 0.6097021276595744, + "loss": 1.0433355569839478, + "loss_ce": 0.0076910085044801235, + "loss_iou": 0.44921875, + "loss_num": 0.027587890625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 359381300, + "step": 5373 + }, + { + "epoch": 0.6098156028368794, + "grad_norm": 57.265411376953125, + "learning_rate": 5e-05, + "loss": 1.3521, + "num_input_tokens_seen": 359447468, + "step": 5374 + }, + { + "epoch": 0.6098156028368794, + "loss": 1.2539135217666626, + "loss_ce": 0.006843192968517542, + "loss_iou": 0.5390625, + "loss_num": 0.034423828125, + "loss_xval": 1.25, + "num_input_tokens_seen": 359447468, + "step": 5374 + }, + { + "epoch": 0.6099290780141844, + "grad_norm": 164.82778930664062, + "learning_rate": 5e-05, + "loss": 1.2228, + "num_input_tokens_seen": 359514552, + "step": 5375 + }, + { + "epoch": 0.6099290780141844, + "loss": 1.1029613018035889, + "loss_ce": 0.009211298078298569, + "loss_iou": 0.478515625, + "loss_num": 0.0269775390625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 359514552, + "step": 5375 + }, + { + "epoch": 0.6100425531914894, + "grad_norm": 27.672801971435547, + "learning_rate": 5e-05, + "loss": 1.3924, + "num_input_tokens_seen": 359581000, + "step": 5376 + }, + { + "epoch": 0.6100425531914894, + "loss": 1.354212760925293, + "loss_ce": 0.008021445013582706, + "loss_iou": 0.5546875, + "loss_num": 0.0478515625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 359581000, + "step": 5376 + }, + { + "epoch": 0.6101560283687943, + "grad_norm": 16.489370346069336, + "learning_rate": 5e-05, + "loss": 1.0586, + "num_input_tokens_seen": 359647468, + "step": 5377 + }, + { + "epoch": 0.6101560283687943, + "loss": 0.9712187051773071, + "loss_ce": 0.006863194517791271, + "loss_iou": 0.388671875, + "loss_num": 0.03759765625, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 359647468, + "step": 5377 + }, + { + "epoch": 0.6102695035460993, + "grad_norm": 27.421998977661133, + "learning_rate": 5e-05, + "loss": 1.2996, + "num_input_tokens_seen": 359714828, + "step": 5378 + }, + { + "epoch": 0.6102695035460993, + "loss": 1.414722204208374, + "loss_ce": 0.003589384024962783, + "loss_iou": 0.56640625, + "loss_num": 0.05615234375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 359714828, + "step": 5378 + }, + { + "epoch": 0.6103829787234043, + "grad_norm": 21.148509979248047, + "learning_rate": 5e-05, + "loss": 1.025, + "num_input_tokens_seen": 359781708, + "step": 5379 + }, + { + "epoch": 0.6103829787234043, + "loss": 0.905514121055603, + "loss_ce": 0.0029262080788612366, + "loss_iou": 0.388671875, + "loss_num": 0.0247802734375, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 359781708, + "step": 5379 + }, + { + "epoch": 0.6104964539007092, + "grad_norm": 14.794662475585938, + "learning_rate": 5e-05, + "loss": 1.1513, + "num_input_tokens_seen": 359848836, + "step": 5380 + }, + { + "epoch": 0.6104964539007092, + "loss": 1.192957878112793, + "loss_ce": 0.006922842934727669, + "loss_iou": 0.47265625, + "loss_num": 0.048095703125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 359848836, + "step": 5380 + }, + { + "epoch": 0.6106099290780141, + "grad_norm": 39.783329010009766, + "learning_rate": 5e-05, + "loss": 1.0835, + "num_input_tokens_seen": 359915472, + "step": 5381 + }, + { + "epoch": 0.6106099290780141, + "loss": 1.0523414611816406, + "loss_ce": 0.006443017162382603, + "loss_iou": 0.4453125, + "loss_num": 0.0311279296875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 359915472, + "step": 5381 + }, + { + "epoch": 0.6107234042553191, + "grad_norm": 85.1600570678711, + "learning_rate": 5e-05, + "loss": 1.3013, + "num_input_tokens_seen": 359981716, + "step": 5382 + }, + { + "epoch": 0.6107234042553191, + "loss": 1.44675612449646, + "loss_ce": 0.00632651150226593, + "loss_iou": 0.5703125, + "loss_num": 0.060302734375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 359981716, + "step": 5382 + }, + { + "epoch": 0.6108368794326241, + "grad_norm": 26.58732795715332, + "learning_rate": 5e-05, + "loss": 1.2632, + "num_input_tokens_seen": 360048712, + "step": 5383 + }, + { + "epoch": 0.6108368794326241, + "loss": 1.046086311340332, + "loss_ce": 0.008732849732041359, + "loss_iou": 0.412109375, + "loss_num": 0.042724609375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 360048712, + "step": 5383 + }, + { + "epoch": 0.6109503546099291, + "grad_norm": 49.27089309692383, + "learning_rate": 5e-05, + "loss": 1.1691, + "num_input_tokens_seen": 360115240, + "step": 5384 + }, + { + "epoch": 0.6109503546099291, + "loss": 1.0090429782867432, + "loss_ce": 0.007089921273291111, + "loss_iou": 0.44921875, + "loss_num": 0.0208740234375, + "loss_xval": 1.0, + "num_input_tokens_seen": 360115240, + "step": 5384 + }, + { + "epoch": 0.6110638297872341, + "grad_norm": 20.044782638549805, + "learning_rate": 5e-05, + "loss": 1.1552, + "num_input_tokens_seen": 360182748, + "step": 5385 + }, + { + "epoch": 0.6110638297872341, + "loss": 1.0874855518341064, + "loss_ce": 0.005942555610090494, + "loss_iou": 0.447265625, + "loss_num": 0.037353515625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 360182748, + "step": 5385 + }, + { + "epoch": 0.611177304964539, + "grad_norm": 40.19487762451172, + "learning_rate": 5e-05, + "loss": 1.1233, + "num_input_tokens_seen": 360249980, + "step": 5386 + }, + { + "epoch": 0.611177304964539, + "loss": 1.134110927581787, + "loss_ce": 0.006181322503834963, + "loss_iou": 0.462890625, + "loss_num": 0.0400390625, + "loss_xval": 1.125, + "num_input_tokens_seen": 360249980, + "step": 5386 + }, + { + "epoch": 0.611290780141844, + "grad_norm": 24.636884689331055, + "learning_rate": 5e-05, + "loss": 1.4837, + "num_input_tokens_seen": 360316044, + "step": 5387 + }, + { + "epoch": 0.611290780141844, + "loss": 1.574913501739502, + "loss_ce": 0.013389980420470238, + "loss_iou": 0.625, + "loss_num": 0.0625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 360316044, + "step": 5387 + }, + { + "epoch": 0.6114042553191489, + "grad_norm": 15.070443153381348, + "learning_rate": 5e-05, + "loss": 0.9521, + "num_input_tokens_seen": 360382940, + "step": 5388 + }, + { + "epoch": 0.6114042553191489, + "loss": 0.9832912087440491, + "loss_ce": 0.004287308547645807, + "loss_iou": 0.412109375, + "loss_num": 0.03125, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 360382940, + "step": 5388 + }, + { + "epoch": 0.6115177304964539, + "grad_norm": 24.782926559448242, + "learning_rate": 5e-05, + "loss": 1.1472, + "num_input_tokens_seen": 360449068, + "step": 5389 + }, + { + "epoch": 0.6115177304964539, + "loss": 1.385637879371643, + "loss_ce": 0.008196443319320679, + "loss_iou": 0.546875, + "loss_num": 0.05712890625, + "loss_xval": 1.375, + "num_input_tokens_seen": 360449068, + "step": 5389 + }, + { + "epoch": 0.6116312056737588, + "grad_norm": 20.549354553222656, + "learning_rate": 5e-05, + "loss": 1.091, + "num_input_tokens_seen": 360515628, + "step": 5390 + }, + { + "epoch": 0.6116312056737588, + "loss": 0.9955339431762695, + "loss_ce": 0.003834719303995371, + "loss_iou": 0.451171875, + "loss_num": 0.01806640625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 360515628, + "step": 5390 + }, + { + "epoch": 0.6117446808510638, + "grad_norm": 27.73138427734375, + "learning_rate": 5e-05, + "loss": 1.1687, + "num_input_tokens_seen": 360582992, + "step": 5391 + }, + { + "epoch": 0.6117446808510638, + "loss": 1.1595087051391602, + "loss_ce": 0.004723480902612209, + "loss_iou": 0.451171875, + "loss_num": 0.050537109375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 360582992, + "step": 5391 + }, + { + "epoch": 0.6118581560283688, + "grad_norm": 33.37724304199219, + "learning_rate": 5e-05, + "loss": 1.2707, + "num_input_tokens_seen": 360650720, + "step": 5392 + }, + { + "epoch": 0.6118581560283688, + "loss": 1.1532721519470215, + "loss_ce": 0.008069497533142567, + "loss_iou": 0.45703125, + "loss_num": 0.04638671875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 360650720, + "step": 5392 + }, + { + "epoch": 0.6119716312056738, + "grad_norm": 257.30242919921875, + "learning_rate": 5e-05, + "loss": 1.127, + "num_input_tokens_seen": 360717448, + "step": 5393 + }, + { + "epoch": 0.6119716312056738, + "loss": 1.249593734741211, + "loss_ce": 0.003499890211969614, + "loss_iou": 0.515625, + "loss_num": 0.04296875, + "loss_xval": 1.25, + "num_input_tokens_seen": 360717448, + "step": 5393 + }, + { + "epoch": 0.6120851063829787, + "grad_norm": 29.072891235351562, + "learning_rate": 5e-05, + "loss": 1.3125, + "num_input_tokens_seen": 360785112, + "step": 5394 + }, + { + "epoch": 0.6120851063829787, + "loss": 1.228670358657837, + "loss_ce": 0.002596196485683322, + "loss_iou": 0.53515625, + "loss_num": 0.03076171875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 360785112, + "step": 5394 + }, + { + "epoch": 0.6121985815602837, + "grad_norm": 18.724023818969727, + "learning_rate": 5e-05, + "loss": 0.9824, + "num_input_tokens_seen": 360852216, + "step": 5395 + }, + { + "epoch": 0.6121985815602837, + "loss": 0.9766969680786133, + "loss_ce": 0.006176878698170185, + "loss_iou": 0.376953125, + "loss_num": 0.04345703125, + "loss_xval": 0.96875, + "num_input_tokens_seen": 360852216, + "step": 5395 + }, + { + "epoch": 0.6123120567375887, + "grad_norm": 31.551372528076172, + "learning_rate": 5e-05, + "loss": 1.0511, + "num_input_tokens_seen": 360919164, + "step": 5396 + }, + { + "epoch": 0.6123120567375887, + "loss": 1.1461856365203857, + "loss_ce": 0.008002103306353092, + "loss_iou": 0.455078125, + "loss_num": 0.0458984375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 360919164, + "step": 5396 + }, + { + "epoch": 0.6124255319148936, + "grad_norm": 38.73286819458008, + "learning_rate": 5e-05, + "loss": 1.2686, + "num_input_tokens_seen": 360986320, + "step": 5397 + }, + { + "epoch": 0.6124255319148936, + "loss": 1.208899974822998, + "loss_ce": 0.0101695591583848, + "loss_iou": 0.439453125, + "loss_num": 0.0634765625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 360986320, + "step": 5397 + }, + { + "epoch": 0.6125390070921986, + "grad_norm": 30.88497543334961, + "learning_rate": 5e-05, + "loss": 1.5197, + "num_input_tokens_seen": 361053628, + "step": 5398 + }, + { + "epoch": 0.6125390070921986, + "loss": 1.7377878427505493, + "loss_ce": 0.006342546083033085, + "loss_iou": 0.66796875, + "loss_num": 0.0791015625, + "loss_xval": 1.734375, + "num_input_tokens_seen": 361053628, + "step": 5398 + }, + { + "epoch": 0.6126524822695035, + "grad_norm": 11.931997299194336, + "learning_rate": 5e-05, + "loss": 1.0169, + "num_input_tokens_seen": 361119496, + "step": 5399 + }, + { + "epoch": 0.6126524822695035, + "loss": 1.1721044778823853, + "loss_ce": 0.006333008408546448, + "loss_iou": 0.44921875, + "loss_num": 0.05419921875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 361119496, + "step": 5399 + }, + { + "epoch": 0.6127659574468085, + "grad_norm": 45.02085494995117, + "learning_rate": 5e-05, + "loss": 1.0298, + "num_input_tokens_seen": 361185500, + "step": 5400 + }, + { + "epoch": 0.6127659574468085, + "loss": 1.1564126014709473, + "loss_ce": 0.008951699361205101, + "loss_iou": 0.4765625, + "loss_num": 0.038818359375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 361185500, + "step": 5400 + }, + { + "epoch": 0.6128794326241135, + "grad_norm": 37.44388961791992, + "learning_rate": 5e-05, + "loss": 1.2003, + "num_input_tokens_seen": 361252336, + "step": 5401 + }, + { + "epoch": 0.6128794326241135, + "loss": 1.1641143560409546, + "loss_ce": 0.004934662487357855, + "loss_iou": 0.48828125, + "loss_num": 0.0361328125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 361252336, + "step": 5401 + }, + { + "epoch": 0.6129929078014185, + "grad_norm": 32.22121047973633, + "learning_rate": 5e-05, + "loss": 1.4294, + "num_input_tokens_seen": 361319100, + "step": 5402 + }, + { + "epoch": 0.6129929078014185, + "loss": 1.3025420904159546, + "loss_ce": 0.0046905167400836945, + "loss_iou": 0.5234375, + "loss_num": 0.05029296875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 361319100, + "step": 5402 + }, + { + "epoch": 0.6131063829787234, + "grad_norm": 35.676307678222656, + "learning_rate": 5e-05, + "loss": 1.1902, + "num_input_tokens_seen": 361385208, + "step": 5403 + }, + { + "epoch": 0.6131063829787234, + "loss": 1.1507086753845215, + "loss_ce": 0.008374670520424843, + "loss_iou": 0.48046875, + "loss_num": 0.036376953125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 361385208, + "step": 5403 + }, + { + "epoch": 0.6132198581560284, + "grad_norm": 28.7883243560791, + "learning_rate": 5e-05, + "loss": 1.3459, + "num_input_tokens_seen": 361452808, + "step": 5404 + }, + { + "epoch": 0.6132198581560284, + "loss": 1.3167285919189453, + "loss_ce": 0.006181749515235424, + "loss_iou": 0.5546875, + "loss_num": 0.040283203125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 361452808, + "step": 5404 + }, + { + "epoch": 0.6133333333333333, + "grad_norm": 18.588850021362305, + "learning_rate": 5e-05, + "loss": 1.1549, + "num_input_tokens_seen": 361519500, + "step": 5405 + }, + { + "epoch": 0.6133333333333333, + "loss": 1.328473687171936, + "loss_ce": 0.009137713350355625, + "loss_iou": 0.52734375, + "loss_num": 0.052734375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 361519500, + "step": 5405 + }, + { + "epoch": 0.6134468085106383, + "grad_norm": 32.33906173706055, + "learning_rate": 5e-05, + "loss": 1.0846, + "num_input_tokens_seen": 361586884, + "step": 5406 + }, + { + "epoch": 0.6134468085106383, + "loss": 1.1547642946243286, + "loss_ce": 0.00583849148824811, + "loss_iou": 0.443359375, + "loss_num": 0.05224609375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 361586884, + "step": 5406 + }, + { + "epoch": 0.6135602836879432, + "grad_norm": 38.41267395019531, + "learning_rate": 5e-05, + "loss": 1.2059, + "num_input_tokens_seen": 361653928, + "step": 5407 + }, + { + "epoch": 0.6135602836879432, + "loss": 1.1981534957885742, + "loss_ce": 0.008700400590896606, + "loss_iou": 0.48046875, + "loss_num": 0.0458984375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 361653928, + "step": 5407 + }, + { + "epoch": 0.6136737588652482, + "grad_norm": 32.29811096191406, + "learning_rate": 5e-05, + "loss": 1.2641, + "num_input_tokens_seen": 361721296, + "step": 5408 + }, + { + "epoch": 0.6136737588652482, + "loss": 1.3638628721237183, + "loss_ce": 0.0064409226179122925, + "loss_iou": 0.578125, + "loss_num": 0.03955078125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 361721296, + "step": 5408 + }, + { + "epoch": 0.6137872340425532, + "grad_norm": 18.16703987121582, + "learning_rate": 5e-05, + "loss": 0.9457, + "num_input_tokens_seen": 361787784, + "step": 5409 + }, + { + "epoch": 0.6137872340425532, + "loss": 0.9774279594421387, + "loss_ce": 0.008677881211042404, + "loss_iou": 0.380859375, + "loss_num": 0.041259765625, + "loss_xval": 0.96875, + "num_input_tokens_seen": 361787784, + "step": 5409 + }, + { + "epoch": 0.6139007092198582, + "grad_norm": 21.92331886291504, + "learning_rate": 5e-05, + "loss": 1.2543, + "num_input_tokens_seen": 361854512, + "step": 5410 + }, + { + "epoch": 0.6139007092198582, + "loss": 1.1732372045516968, + "loss_ce": 0.006245024502277374, + "loss_iou": 0.46875, + "loss_num": 0.04541015625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 361854512, + "step": 5410 + }, + { + "epoch": 0.6140141843971632, + "grad_norm": 32.598533630371094, + "learning_rate": 5e-05, + "loss": 1.2626, + "num_input_tokens_seen": 361921752, + "step": 5411 + }, + { + "epoch": 0.6140141843971632, + "loss": 1.4034391641616821, + "loss_ce": 0.007931306026875973, + "loss_iou": 0.58203125, + "loss_num": 0.04638671875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 361921752, + "step": 5411 + }, + { + "epoch": 0.6141276595744681, + "grad_norm": 26.707317352294922, + "learning_rate": 5e-05, + "loss": 1.3314, + "num_input_tokens_seen": 361988992, + "step": 5412 + }, + { + "epoch": 0.6141276595744681, + "loss": 1.4341565370559692, + "loss_ce": 0.006422141566872597, + "loss_iou": 0.6171875, + "loss_num": 0.0380859375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 361988992, + "step": 5412 + }, + { + "epoch": 0.614241134751773, + "grad_norm": 12.934741020202637, + "learning_rate": 5e-05, + "loss": 0.9966, + "num_input_tokens_seen": 362055220, + "step": 5413 + }, + { + "epoch": 0.614241134751773, + "loss": 0.8943085670471191, + "loss_ce": 0.00490419939160347, + "loss_iou": 0.376953125, + "loss_num": 0.027587890625, + "loss_xval": 0.890625, + "num_input_tokens_seen": 362055220, + "step": 5413 + }, + { + "epoch": 0.614354609929078, + "grad_norm": 18.11347198486328, + "learning_rate": 5e-05, + "loss": 1.0448, + "num_input_tokens_seen": 362121648, + "step": 5414 + }, + { + "epoch": 0.614354609929078, + "loss": 1.0778992176055908, + "loss_ce": 0.005877736955881119, + "loss_iou": 0.419921875, + "loss_num": 0.0458984375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 362121648, + "step": 5414 + }, + { + "epoch": 0.614468085106383, + "grad_norm": 24.9248104095459, + "learning_rate": 5e-05, + "loss": 1.1426, + "num_input_tokens_seen": 362188548, + "step": 5415 + }, + { + "epoch": 0.614468085106383, + "loss": 1.075348138809204, + "loss_ce": 0.004059082828462124, + "loss_iou": 0.4375, + "loss_num": 0.03955078125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 362188548, + "step": 5415 + }, + { + "epoch": 0.6145815602836879, + "grad_norm": 26.77396583557129, + "learning_rate": 5e-05, + "loss": 1.2388, + "num_input_tokens_seen": 362254996, + "step": 5416 + }, + { + "epoch": 0.6145815602836879, + "loss": 1.2186079025268555, + "loss_ce": 0.005228984169661999, + "loss_iou": 0.5, + "loss_num": 0.04296875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 362254996, + "step": 5416 + }, + { + "epoch": 0.6146950354609929, + "grad_norm": 44.48963165283203, + "learning_rate": 5e-05, + "loss": 1.3494, + "num_input_tokens_seen": 362320572, + "step": 5417 + }, + { + "epoch": 0.6146950354609929, + "loss": 1.2989492416381836, + "loss_ce": 0.005278675816953182, + "loss_iou": 0.482421875, + "loss_num": 0.0654296875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 362320572, + "step": 5417 + }, + { + "epoch": 0.6148085106382979, + "grad_norm": 39.3917236328125, + "learning_rate": 5e-05, + "loss": 1.1383, + "num_input_tokens_seen": 362386680, + "step": 5418 + }, + { + "epoch": 0.6148085106382979, + "loss": 1.0632615089416504, + "loss_ce": 0.006864969618618488, + "loss_iou": 0.39453125, + "loss_num": 0.053466796875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 362386680, + "step": 5418 + }, + { + "epoch": 0.6149219858156029, + "grad_norm": 19.105722427368164, + "learning_rate": 5e-05, + "loss": 1.024, + "num_input_tokens_seen": 362453340, + "step": 5419 + }, + { + "epoch": 0.6149219858156029, + "loss": 0.9946867227554321, + "loss_ce": 0.005917150527238846, + "loss_iou": 0.419921875, + "loss_num": 0.029541015625, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 362453340, + "step": 5419 + }, + { + "epoch": 0.6150354609929078, + "grad_norm": 20.77765655517578, + "learning_rate": 5e-05, + "loss": 1.0993, + "num_input_tokens_seen": 362519028, + "step": 5420 + }, + { + "epoch": 0.6150354609929078, + "loss": 1.3470877408981323, + "loss_ce": 0.005779123865067959, + "loss_iou": 0.56640625, + "loss_num": 0.041015625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 362519028, + "step": 5420 + }, + { + "epoch": 0.6151489361702127, + "grad_norm": 41.600032806396484, + "learning_rate": 5e-05, + "loss": 1.3718, + "num_input_tokens_seen": 362585132, + "step": 5421 + }, + { + "epoch": 0.6151489361702127, + "loss": 1.4634222984313965, + "loss_ce": 0.006390952970832586, + "loss_iou": 0.57421875, + "loss_num": 0.06103515625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 362585132, + "step": 5421 + }, + { + "epoch": 0.6152624113475177, + "grad_norm": 28.111900329589844, + "learning_rate": 5e-05, + "loss": 1.3949, + "num_input_tokens_seen": 362651512, + "step": 5422 + }, + { + "epoch": 0.6152624113475177, + "loss": 1.413116216659546, + "loss_ce": 0.008086849935352802, + "loss_iou": 0.55078125, + "loss_num": 0.06103515625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 362651512, + "step": 5422 + }, + { + "epoch": 0.6153758865248227, + "grad_norm": 12.81157112121582, + "learning_rate": 5e-05, + "loss": 1.2011, + "num_input_tokens_seen": 362717948, + "step": 5423 + }, + { + "epoch": 0.6153758865248227, + "loss": 1.201512098312378, + "loss_ce": 0.008885201066732407, + "loss_iou": 0.470703125, + "loss_num": 0.0498046875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 362717948, + "step": 5423 + }, + { + "epoch": 0.6154893617021276, + "grad_norm": 23.264019012451172, + "learning_rate": 5e-05, + "loss": 1.2506, + "num_input_tokens_seen": 362784892, + "step": 5424 + }, + { + "epoch": 0.6154893617021276, + "loss": 1.341152548789978, + "loss_ce": 0.008144708350300789, + "loss_iou": 0.53125, + "loss_num": 0.053955078125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 362784892, + "step": 5424 + }, + { + "epoch": 0.6156028368794326, + "grad_norm": 39.13882827758789, + "learning_rate": 5e-05, + "loss": 1.1482, + "num_input_tokens_seen": 362852204, + "step": 5425 + }, + { + "epoch": 0.6156028368794326, + "loss": 1.2519066333770752, + "loss_ce": 0.005812913179397583, + "loss_iou": 0.50390625, + "loss_num": 0.04833984375, + "loss_xval": 1.25, + "num_input_tokens_seen": 362852204, + "step": 5425 + }, + { + "epoch": 0.6157163120567376, + "grad_norm": 24.055225372314453, + "learning_rate": 5e-05, + "loss": 1.5175, + "num_input_tokens_seen": 362919796, + "step": 5426 + }, + { + "epoch": 0.6157163120567376, + "loss": 1.5053201913833618, + "loss_ce": 0.004831929691135883, + "loss_iou": 0.625, + "loss_num": 0.051025390625, + "loss_xval": 1.5, + "num_input_tokens_seen": 362919796, + "step": 5426 + }, + { + "epoch": 0.6158297872340426, + "grad_norm": 30.147672653198242, + "learning_rate": 5e-05, + "loss": 1.1096, + "num_input_tokens_seen": 362986212, + "step": 5427 + }, + { + "epoch": 0.6158297872340426, + "loss": 0.9760938882827759, + "loss_ce": 0.007832158356904984, + "loss_iou": 0.41015625, + "loss_num": 0.029541015625, + "loss_xval": 0.96875, + "num_input_tokens_seen": 362986212, + "step": 5427 + }, + { + "epoch": 0.6159432624113476, + "grad_norm": 20.15474510192871, + "learning_rate": 5e-05, + "loss": 1.3273, + "num_input_tokens_seen": 363053956, + "step": 5428 + }, + { + "epoch": 0.6159432624113476, + "loss": 1.2242323160171509, + "loss_ce": 0.0054823304526507854, + "loss_iou": 0.50390625, + "loss_num": 0.0419921875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 363053956, + "step": 5428 + }, + { + "epoch": 0.6160567375886525, + "grad_norm": 22.243141174316406, + "learning_rate": 5e-05, + "loss": 1.1339, + "num_input_tokens_seen": 363119988, + "step": 5429 + }, + { + "epoch": 0.6160567375886525, + "loss": 1.1342873573303223, + "loss_ce": 0.008799036964774132, + "loss_iou": 0.48828125, + "loss_num": 0.0296630859375, + "loss_xval": 1.125, + "num_input_tokens_seen": 363119988, + "step": 5429 + }, + { + "epoch": 0.6161702127659574, + "grad_norm": 36.75324630737305, + "learning_rate": 5e-05, + "loss": 1.2509, + "num_input_tokens_seen": 363187832, + "step": 5430 + }, + { + "epoch": 0.6161702127659574, + "loss": 1.160115122795105, + "loss_ce": 0.004841663874685764, + "loss_iou": 0.4765625, + "loss_num": 0.040283203125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 363187832, + "step": 5430 + }, + { + "epoch": 0.6162836879432624, + "grad_norm": 30.62025260925293, + "learning_rate": 5e-05, + "loss": 1.5345, + "num_input_tokens_seen": 363254428, + "step": 5431 + }, + { + "epoch": 0.6162836879432624, + "loss": 1.48964524269104, + "loss_ce": 0.005270276218652725, + "loss_iou": 0.6328125, + "loss_num": 0.04345703125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 363254428, + "step": 5431 + }, + { + "epoch": 0.6163971631205674, + "grad_norm": 28.367568969726562, + "learning_rate": 5e-05, + "loss": 1.2275, + "num_input_tokens_seen": 363320704, + "step": 5432 + }, + { + "epoch": 0.6163971631205674, + "loss": 1.2449122667312622, + "loss_ce": 0.00614274712279439, + "loss_iou": 0.50390625, + "loss_num": 0.046630859375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 363320704, + "step": 5432 + }, + { + "epoch": 0.6165106382978723, + "grad_norm": 38.46651840209961, + "learning_rate": 5e-05, + "loss": 1.1733, + "num_input_tokens_seen": 363387892, + "step": 5433 + }, + { + "epoch": 0.6165106382978723, + "loss": 1.0877643823623657, + "loss_ce": 0.007197997998446226, + "loss_iou": 0.453125, + "loss_num": 0.03466796875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 363387892, + "step": 5433 + }, + { + "epoch": 0.6166241134751773, + "grad_norm": 32.11519241333008, + "learning_rate": 5e-05, + "loss": 1.1106, + "num_input_tokens_seen": 363455688, + "step": 5434 + }, + { + "epoch": 0.6166241134751773, + "loss": 1.0279512405395508, + "loss_ce": 0.005490320734679699, + "loss_iou": 0.435546875, + "loss_num": 0.030029296875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 363455688, + "step": 5434 + }, + { + "epoch": 0.6167375886524823, + "grad_norm": 26.693923950195312, + "learning_rate": 5e-05, + "loss": 1.4311, + "num_input_tokens_seen": 363522836, + "step": 5435 + }, + { + "epoch": 0.6167375886524823, + "loss": 1.3318917751312256, + "loss_ce": 0.006208103150129318, + "loss_iou": 0.5546875, + "loss_num": 0.0419921875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 363522836, + "step": 5435 + }, + { + "epoch": 0.6168510638297873, + "grad_norm": 27.996692657470703, + "learning_rate": 5e-05, + "loss": 1.137, + "num_input_tokens_seen": 363590016, + "step": 5436 + }, + { + "epoch": 0.6168510638297873, + "loss": 1.179323434829712, + "loss_ce": 0.004518694244325161, + "loss_iou": 0.451171875, + "loss_num": 0.0546875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 363590016, + "step": 5436 + }, + { + "epoch": 0.6169645390070922, + "grad_norm": 26.51134490966797, + "learning_rate": 5e-05, + "loss": 1.2784, + "num_input_tokens_seen": 363656544, + "step": 5437 + }, + { + "epoch": 0.6169645390070922, + "loss": 1.4636602401733398, + "loss_ce": 0.005652374587953091, + "loss_iou": 0.58203125, + "loss_num": 0.058837890625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 363656544, + "step": 5437 + }, + { + "epoch": 0.6170780141843971, + "grad_norm": 22.06644630432129, + "learning_rate": 5e-05, + "loss": 1.3068, + "num_input_tokens_seen": 363722528, + "step": 5438 + }, + { + "epoch": 0.6170780141843971, + "loss": 1.1983554363250732, + "loss_ce": 0.010855487547814846, + "loss_iou": 0.494140625, + "loss_num": 0.0400390625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 363722528, + "step": 5438 + }, + { + "epoch": 0.6171914893617021, + "grad_norm": 43.147056579589844, + "learning_rate": 5e-05, + "loss": 1.2037, + "num_input_tokens_seen": 363789112, + "step": 5439 + }, + { + "epoch": 0.6171914893617021, + "loss": 1.1100471019744873, + "loss_ce": 0.0036017834208905697, + "loss_iou": 0.46875, + "loss_num": 0.033447265625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 363789112, + "step": 5439 + }, + { + "epoch": 0.6173049645390071, + "grad_norm": 35.60696029663086, + "learning_rate": 5e-05, + "loss": 1.17, + "num_input_tokens_seen": 363855496, + "step": 5440 + }, + { + "epoch": 0.6173049645390071, + "loss": 1.189126968383789, + "loss_ce": 0.009195367805659771, + "loss_iou": 0.4921875, + "loss_num": 0.039306640625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 363855496, + "step": 5440 + }, + { + "epoch": 0.617418439716312, + "grad_norm": 35.864768981933594, + "learning_rate": 5e-05, + "loss": 1.568, + "num_input_tokens_seen": 363922428, + "step": 5441 + }, + { + "epoch": 0.617418439716312, + "loss": 1.8852202892303467, + "loss_ce": 0.006314092315733433, + "loss_iou": 0.7109375, + "loss_num": 0.091796875, + "loss_xval": 1.875, + "num_input_tokens_seen": 363922428, + "step": 5441 + }, + { + "epoch": 0.617531914893617, + "grad_norm": 36.27168273925781, + "learning_rate": 5e-05, + "loss": 1.2654, + "num_input_tokens_seen": 363989064, + "step": 5442 + }, + { + "epoch": 0.617531914893617, + "loss": 1.5116709470748901, + "loss_ce": 0.010694384574890137, + "loss_iou": 0.6484375, + "loss_num": 0.040283203125, + "loss_xval": 1.5, + "num_input_tokens_seen": 363989064, + "step": 5442 + }, + { + "epoch": 0.617645390070922, + "grad_norm": 26.82082176208496, + "learning_rate": 5e-05, + "loss": 1.4034, + "num_input_tokens_seen": 364053684, + "step": 5443 + }, + { + "epoch": 0.617645390070922, + "loss": 1.4721145629882812, + "loss_ce": 0.0067825922742486, + "loss_iou": 0.546875, + "loss_num": 0.07470703125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 364053684, + "step": 5443 + }, + { + "epoch": 0.617758865248227, + "grad_norm": 19.101526260375977, + "learning_rate": 5e-05, + "loss": 1.2446, + "num_input_tokens_seen": 364120508, + "step": 5444 + }, + { + "epoch": 0.617758865248227, + "loss": 1.399274230003357, + "loss_ce": 0.008649258874356747, + "loss_iou": 0.5703125, + "loss_num": 0.050537109375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 364120508, + "step": 5444 + }, + { + "epoch": 0.617872340425532, + "grad_norm": 16.460542678833008, + "learning_rate": 5e-05, + "loss": 0.9848, + "num_input_tokens_seen": 364187928, + "step": 5445 + }, + { + "epoch": 0.617872340425532, + "loss": 1.042243480682373, + "loss_ce": 0.0022044978104531765, + "loss_iou": 0.43359375, + "loss_num": 0.034912109375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 364187928, + "step": 5445 + }, + { + "epoch": 0.6179858156028368, + "grad_norm": 23.36495018005371, + "learning_rate": 5e-05, + "loss": 1.2335, + "num_input_tokens_seen": 364255364, + "step": 5446 + }, + { + "epoch": 0.6179858156028368, + "loss": 1.0553312301635742, + "loss_ce": 0.00503831822425127, + "loss_iou": 0.4375, + "loss_num": 0.03515625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 364255364, + "step": 5446 + }, + { + "epoch": 0.6180992907801418, + "grad_norm": 36.754783630371094, + "learning_rate": 5e-05, + "loss": 1.3562, + "num_input_tokens_seen": 364321952, + "step": 5447 + }, + { + "epoch": 0.6180992907801418, + "loss": 1.4093143939971924, + "loss_ce": 0.006482352502644062, + "loss_iou": 0.55078125, + "loss_num": 0.060791015625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 364321952, + "step": 5447 + }, + { + "epoch": 0.6182127659574468, + "grad_norm": 41.6567497253418, + "learning_rate": 5e-05, + "loss": 1.5065, + "num_input_tokens_seen": 364387832, + "step": 5448 + }, + { + "epoch": 0.6182127659574468, + "loss": 1.5043065547943115, + "loss_ce": 0.005283140577375889, + "loss_iou": 0.60546875, + "loss_num": 0.05810546875, + "loss_xval": 1.5, + "num_input_tokens_seen": 364387832, + "step": 5448 + }, + { + "epoch": 0.6183262411347518, + "grad_norm": 32.22853088378906, + "learning_rate": 5e-05, + "loss": 1.132, + "num_input_tokens_seen": 364455344, + "step": 5449 + }, + { + "epoch": 0.6183262411347518, + "loss": 1.1150672435760498, + "loss_ce": 0.0076453471556305885, + "loss_iou": 0.44921875, + "loss_num": 0.04150390625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 364455344, + "step": 5449 + }, + { + "epoch": 0.6184397163120567, + "grad_norm": 30.268810272216797, + "learning_rate": 5e-05, + "loss": 1.4247, + "num_input_tokens_seen": 364523336, + "step": 5450 + }, + { + "epoch": 0.6184397163120567, + "loss": 1.3722245693206787, + "loss_ce": 0.003572189249098301, + "loss_iou": 0.5625, + "loss_num": 0.04833984375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 364523336, + "step": 5450 + }, + { + "epoch": 0.6185531914893617, + "grad_norm": 18.33560562133789, + "learning_rate": 5e-05, + "loss": 1.1761, + "num_input_tokens_seen": 364591068, + "step": 5451 + }, + { + "epoch": 0.6185531914893617, + "loss": 1.2746089696884155, + "loss_ce": 0.009472256526350975, + "loss_iou": 0.490234375, + "loss_num": 0.056396484375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 364591068, + "step": 5451 + }, + { + "epoch": 0.6186666666666667, + "grad_norm": 19.683496475219727, + "learning_rate": 5e-05, + "loss": 1.2046, + "num_input_tokens_seen": 364658528, + "step": 5452 + }, + { + "epoch": 0.6186666666666667, + "loss": 1.2637211084365845, + "loss_ce": 0.005420280620455742, + "loss_iou": 0.51953125, + "loss_num": 0.043701171875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 364658528, + "step": 5452 + }, + { + "epoch": 0.6187801418439717, + "grad_norm": 31.08062744140625, + "learning_rate": 5e-05, + "loss": 1.2023, + "num_input_tokens_seen": 364725424, + "step": 5453 + }, + { + "epoch": 0.6187801418439717, + "loss": 1.2669751644134521, + "loss_ce": 0.005256411153823137, + "loss_iou": 0.515625, + "loss_num": 0.04638671875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 364725424, + "step": 5453 + }, + { + "epoch": 0.6188936170212765, + "grad_norm": 21.725282669067383, + "learning_rate": 5e-05, + "loss": 1.2366, + "num_input_tokens_seen": 364791112, + "step": 5454 + }, + { + "epoch": 0.6188936170212765, + "loss": 1.1531858444213867, + "loss_ce": 0.015368564985692501, + "loss_iou": 0.44140625, + "loss_num": 0.05078125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 364791112, + "step": 5454 + }, + { + "epoch": 0.6190070921985815, + "grad_norm": 21.75276756286621, + "learning_rate": 5e-05, + "loss": 1.0776, + "num_input_tokens_seen": 364857648, + "step": 5455 + }, + { + "epoch": 0.6190070921985815, + "loss": 1.0416910648345947, + "loss_ce": 0.003361043054610491, + "loss_iou": 0.3984375, + "loss_num": 0.0478515625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 364857648, + "step": 5455 + }, + { + "epoch": 0.6191205673758865, + "grad_norm": 29.791345596313477, + "learning_rate": 5e-05, + "loss": 1.2129, + "num_input_tokens_seen": 364924340, + "step": 5456 + }, + { + "epoch": 0.6191205673758865, + "loss": 1.328317403793335, + "loss_ce": 0.005075126886367798, + "loss_iou": 0.5703125, + "loss_num": 0.037353515625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 364924340, + "step": 5456 + }, + { + "epoch": 0.6192340425531915, + "grad_norm": 51.80371856689453, + "learning_rate": 5e-05, + "loss": 1.2458, + "num_input_tokens_seen": 364992308, + "step": 5457 + }, + { + "epoch": 0.6192340425531915, + "loss": 1.0494060516357422, + "loss_ce": 0.009855261072516441, + "loss_iou": 0.4296875, + "loss_num": 0.036376953125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 364992308, + "step": 5457 + }, + { + "epoch": 0.6193475177304965, + "grad_norm": 55.30954360961914, + "learning_rate": 5e-05, + "loss": 1.5807, + "num_input_tokens_seen": 365059288, + "step": 5458 + }, + { + "epoch": 0.6193475177304965, + "loss": 1.3469990491867065, + "loss_ce": 0.0017841632943600416, + "loss_iou": 0.54296875, + "loss_num": 0.05224609375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 365059288, + "step": 5458 + }, + { + "epoch": 0.6194609929078014, + "grad_norm": 19.00155258178711, + "learning_rate": 5e-05, + "loss": 1.2297, + "num_input_tokens_seen": 365125736, + "step": 5459 + }, + { + "epoch": 0.6194609929078014, + "loss": 1.416556477546692, + "loss_ce": 0.008841590024530888, + "loss_iou": 0.5546875, + "loss_num": 0.059814453125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 365125736, + "step": 5459 + }, + { + "epoch": 0.6195744680851064, + "grad_norm": 55.063926696777344, + "learning_rate": 5e-05, + "loss": 1.1405, + "num_input_tokens_seen": 365192100, + "step": 5460 + }, + { + "epoch": 0.6195744680851064, + "loss": 1.0738413333892822, + "loss_ce": 0.00816752016544342, + "loss_iou": 0.419921875, + "loss_num": 0.04541015625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 365192100, + "step": 5460 + }, + { + "epoch": 0.6196879432624114, + "grad_norm": 118.20280456542969, + "learning_rate": 5e-05, + "loss": 1.2327, + "num_input_tokens_seen": 365259972, + "step": 5461 + }, + { + "epoch": 0.6196879432624114, + "loss": 1.3182694911956787, + "loss_ce": 0.0038164136931300163, + "loss_iou": 0.55078125, + "loss_num": 0.04248046875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 365259972, + "step": 5461 + }, + { + "epoch": 0.6198014184397164, + "grad_norm": 31.820568084716797, + "learning_rate": 5e-05, + "loss": 1.2467, + "num_input_tokens_seen": 365326472, + "step": 5462 + }, + { + "epoch": 0.6198014184397164, + "loss": 1.1139442920684814, + "loss_ce": 0.0065224068239331245, + "loss_iou": 0.42578125, + "loss_num": 0.05126953125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 365326472, + "step": 5462 + }, + { + "epoch": 0.6199148936170212, + "grad_norm": 33.47092819213867, + "learning_rate": 5e-05, + "loss": 1.232, + "num_input_tokens_seen": 365393744, + "step": 5463 + }, + { + "epoch": 0.6199148936170212, + "loss": 1.0086581707000732, + "loss_ce": 0.0037753453943878412, + "loss_iou": 0.4296875, + "loss_num": 0.029296875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 365393744, + "step": 5463 + }, + { + "epoch": 0.6200283687943262, + "grad_norm": 24.448760986328125, + "learning_rate": 5e-05, + "loss": 1.2453, + "num_input_tokens_seen": 365461292, + "step": 5464 + }, + { + "epoch": 0.6200283687943262, + "loss": 1.182084560394287, + "loss_ce": 0.004106047563254833, + "loss_iou": 0.48046875, + "loss_num": 0.04345703125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 365461292, + "step": 5464 + }, + { + "epoch": 0.6201418439716312, + "grad_norm": 22.69968032836914, + "learning_rate": 5e-05, + "loss": 1.2146, + "num_input_tokens_seen": 365526564, + "step": 5465 + }, + { + "epoch": 0.6201418439716312, + "loss": 1.3883689641952515, + "loss_ce": 0.010439297184348106, + "loss_iou": 0.55078125, + "loss_num": 0.0556640625, + "loss_xval": 1.375, + "num_input_tokens_seen": 365526564, + "step": 5465 + }, + { + "epoch": 0.6202553191489362, + "grad_norm": 32.230926513671875, + "learning_rate": 5e-05, + "loss": 1.3291, + "num_input_tokens_seen": 365592676, + "step": 5466 + }, + { + "epoch": 0.6202553191489362, + "loss": 1.3357884883880615, + "loss_ce": 0.00522203091531992, + "loss_iou": 0.490234375, + "loss_num": 0.06982421875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 365592676, + "step": 5466 + }, + { + "epoch": 0.6203687943262411, + "grad_norm": 24.64505386352539, + "learning_rate": 5e-05, + "loss": 1.3792, + "num_input_tokens_seen": 365659568, + "step": 5467 + }, + { + "epoch": 0.6203687943262411, + "loss": 1.428372859954834, + "loss_ce": 0.005521305836737156, + "loss_iou": 0.58984375, + "loss_num": 0.048583984375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 365659568, + "step": 5467 + }, + { + "epoch": 0.6204822695035461, + "grad_norm": 25.94478988647461, + "learning_rate": 5e-05, + "loss": 1.1899, + "num_input_tokens_seen": 365725984, + "step": 5468 + }, + { + "epoch": 0.6204822695035461, + "loss": 1.2756876945495605, + "loss_ce": 0.005668249446898699, + "loss_iou": 0.515625, + "loss_num": 0.047119140625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 365725984, + "step": 5468 + }, + { + "epoch": 0.6205957446808511, + "grad_norm": 32.039573669433594, + "learning_rate": 5e-05, + "loss": 1.3324, + "num_input_tokens_seen": 365793328, + "step": 5469 + }, + { + "epoch": 0.6205957446808511, + "loss": 1.2070077657699585, + "loss_ce": 0.006324159912765026, + "loss_iou": 0.5078125, + "loss_num": 0.0380859375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 365793328, + "step": 5469 + }, + { + "epoch": 0.6207092198581561, + "grad_norm": 29.053564071655273, + "learning_rate": 5e-05, + "loss": 1.2019, + "num_input_tokens_seen": 365860620, + "step": 5470 + }, + { + "epoch": 0.6207092198581561, + "loss": 1.1032156944274902, + "loss_ce": 0.009099503979086876, + "loss_iou": 0.4375, + "loss_num": 0.0439453125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 365860620, + "step": 5470 + }, + { + "epoch": 0.620822695035461, + "grad_norm": 30.008691787719727, + "learning_rate": 5e-05, + "loss": 1.1831, + "num_input_tokens_seen": 365928580, + "step": 5471 + }, + { + "epoch": 0.620822695035461, + "loss": 1.3021581172943115, + "loss_ce": 0.009189467877149582, + "loss_iou": 0.54296875, + "loss_num": 0.04150390625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 365928580, + "step": 5471 + }, + { + "epoch": 0.6209361702127659, + "grad_norm": 39.491146087646484, + "learning_rate": 5e-05, + "loss": 1.2151, + "num_input_tokens_seen": 365995816, + "step": 5472 + }, + { + "epoch": 0.6209361702127659, + "loss": 1.448158860206604, + "loss_ce": 0.004311246797442436, + "loss_iou": 0.5625, + "loss_num": 0.06396484375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 365995816, + "step": 5472 + }, + { + "epoch": 0.6210496453900709, + "grad_norm": 26.612838745117188, + "learning_rate": 5e-05, + "loss": 1.3254, + "num_input_tokens_seen": 366062360, + "step": 5473 + }, + { + "epoch": 0.6210496453900709, + "loss": 1.1625101566314697, + "loss_ce": 0.006748429033905268, + "loss_iou": 0.498046875, + "loss_num": 0.0322265625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 366062360, + "step": 5473 + }, + { + "epoch": 0.6211631205673759, + "grad_norm": 24.420120239257812, + "learning_rate": 5e-05, + "loss": 1.0578, + "num_input_tokens_seen": 366129996, + "step": 5474 + }, + { + "epoch": 0.6211631205673759, + "loss": 1.017775297164917, + "loss_ce": 0.0031268487218767405, + "loss_iou": 0.451171875, + "loss_num": 0.022216796875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 366129996, + "step": 5474 + }, + { + "epoch": 0.6212765957446809, + "grad_norm": 30.36160659790039, + "learning_rate": 5e-05, + "loss": 1.1468, + "num_input_tokens_seen": 366196136, + "step": 5475 + }, + { + "epoch": 0.6212765957446809, + "loss": 1.1539615392684937, + "loss_ce": 0.008941968902945518, + "loss_iou": 0.498046875, + "loss_num": 0.029541015625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 366196136, + "step": 5475 + }, + { + "epoch": 0.6213900709219858, + "grad_norm": 35.029998779296875, + "learning_rate": 5e-05, + "loss": 1.4547, + "num_input_tokens_seen": 366262760, + "step": 5476 + }, + { + "epoch": 0.6213900709219858, + "loss": 1.4389519691467285, + "loss_ce": 0.010241016745567322, + "loss_iou": 0.5703125, + "loss_num": 0.05712890625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 366262760, + "step": 5476 + }, + { + "epoch": 0.6215035460992908, + "grad_norm": 32.1762809753418, + "learning_rate": 5e-05, + "loss": 1.2501, + "num_input_tokens_seen": 366329672, + "step": 5477 + }, + { + "epoch": 0.6215035460992908, + "loss": 1.35512375831604, + "loss_ce": 0.004049637354910374, + "loss_iou": 0.5078125, + "loss_num": 0.06591796875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 366329672, + "step": 5477 + }, + { + "epoch": 0.6216170212765958, + "grad_norm": 37.15167236328125, + "learning_rate": 5e-05, + "loss": 1.0593, + "num_input_tokens_seen": 366395504, + "step": 5478 + }, + { + "epoch": 0.6216170212765958, + "loss": 1.1046745777130127, + "loss_ce": 0.006041822023689747, + "loss_iou": 0.46484375, + "loss_num": 0.033447265625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 366395504, + "step": 5478 + }, + { + "epoch": 0.6217304964539007, + "grad_norm": 28.516862869262695, + "learning_rate": 5e-05, + "loss": 1.357, + "num_input_tokens_seen": 366461984, + "step": 5479 + }, + { + "epoch": 0.6217304964539007, + "loss": 1.3253698348999023, + "loss_ce": 0.008963601663708687, + "loss_iou": 0.53125, + "loss_num": 0.0498046875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 366461984, + "step": 5479 + }, + { + "epoch": 0.6218439716312056, + "grad_norm": 24.06541633605957, + "learning_rate": 5e-05, + "loss": 1.1002, + "num_input_tokens_seen": 366529404, + "step": 5480 + }, + { + "epoch": 0.6218439716312056, + "loss": 1.2195956707000732, + "loss_ce": 0.0037752909120172262, + "loss_iou": 0.490234375, + "loss_num": 0.046875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 366529404, + "step": 5480 + }, + { + "epoch": 0.6219574468085106, + "grad_norm": 27.15410804748535, + "learning_rate": 5e-05, + "loss": 1.1573, + "num_input_tokens_seen": 366596604, + "step": 5481 + }, + { + "epoch": 0.6219574468085106, + "loss": 1.1293952465057373, + "loss_ce": 0.005371762439608574, + "loss_iou": 0.462890625, + "loss_num": 0.0390625, + "loss_xval": 1.125, + "num_input_tokens_seen": 366596604, + "step": 5481 + }, + { + "epoch": 0.6220709219858156, + "grad_norm": 33.95677185058594, + "learning_rate": 5e-05, + "loss": 1.2904, + "num_input_tokens_seen": 366663880, + "step": 5482 + }, + { + "epoch": 0.6220709219858156, + "loss": 1.1325852870941162, + "loss_ce": 0.0051438757218420506, + "loss_iou": 0.462890625, + "loss_num": 0.040283203125, + "loss_xval": 1.125, + "num_input_tokens_seen": 366663880, + "step": 5482 + }, + { + "epoch": 0.6221843971631206, + "grad_norm": 36.33676528930664, + "learning_rate": 5e-05, + "loss": 1.2731, + "num_input_tokens_seen": 366730360, + "step": 5483 + }, + { + "epoch": 0.6221843971631206, + "loss": 1.3522212505340576, + "loss_ce": 0.006029783748090267, + "loss_iou": 0.55859375, + "loss_num": 0.044921875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 366730360, + "step": 5483 + }, + { + "epoch": 0.6222978723404256, + "grad_norm": 27.489103317260742, + "learning_rate": 5e-05, + "loss": 0.9899, + "num_input_tokens_seen": 366797192, + "step": 5484 + }, + { + "epoch": 0.6222978723404256, + "loss": 0.9196835160255432, + "loss_ce": 0.003667894285172224, + "loss_iou": 0.400390625, + "loss_num": 0.0230712890625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 366797192, + "step": 5484 + }, + { + "epoch": 0.6224113475177305, + "grad_norm": 124.77582550048828, + "learning_rate": 5e-05, + "loss": 1.0702, + "num_input_tokens_seen": 366864100, + "step": 5485 + }, + { + "epoch": 0.6224113475177305, + "loss": 1.1278163194656372, + "loss_ce": 0.00379289616830647, + "loss_iou": 0.453125, + "loss_num": 0.04345703125, + "loss_xval": 1.125, + "num_input_tokens_seen": 366864100, + "step": 5485 + }, + { + "epoch": 0.6225248226950355, + "grad_norm": 41.89673614501953, + "learning_rate": 5e-05, + "loss": 1.1595, + "num_input_tokens_seen": 366932028, + "step": 5486 + }, + { + "epoch": 0.6225248226950355, + "loss": 1.1987595558166504, + "loss_ce": 0.0068649472668766975, + "loss_iou": 0.474609375, + "loss_num": 0.048583984375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 366932028, + "step": 5486 + }, + { + "epoch": 0.6226382978723404, + "grad_norm": 26.482799530029297, + "learning_rate": 5e-05, + "loss": 1.2615, + "num_input_tokens_seen": 366999656, + "step": 5487 + }, + { + "epoch": 0.6226382978723404, + "loss": 1.42875075340271, + "loss_ce": 0.004922644235193729, + "loss_iou": 0.5546875, + "loss_num": 0.0625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 366999656, + "step": 5487 + }, + { + "epoch": 0.6227517730496454, + "grad_norm": 30.13232421875, + "learning_rate": 5e-05, + "loss": 1.292, + "num_input_tokens_seen": 367065376, + "step": 5488 + }, + { + "epoch": 0.6227517730496454, + "loss": 1.297929048538208, + "loss_ce": 0.007889924570918083, + "loss_iou": 0.486328125, + "loss_num": 0.0634765625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 367065376, + "step": 5488 + }, + { + "epoch": 0.6228652482269503, + "grad_norm": 28.779272079467773, + "learning_rate": 5e-05, + "loss": 1.397, + "num_input_tokens_seen": 367131212, + "step": 5489 + }, + { + "epoch": 0.6228652482269503, + "loss": 1.4021068811416626, + "loss_ce": 0.008063862100243568, + "loss_iou": 0.56640625, + "loss_num": 0.05126953125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 367131212, + "step": 5489 + }, + { + "epoch": 0.6229787234042553, + "grad_norm": 46.178104400634766, + "learning_rate": 5e-05, + "loss": 1.2489, + "num_input_tokens_seen": 367197156, + "step": 5490 + }, + { + "epoch": 0.6229787234042553, + "loss": 1.324089527130127, + "loss_ce": 0.0096364077180624, + "loss_iou": 0.52734375, + "loss_num": 0.051513671875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 367197156, + "step": 5490 + }, + { + "epoch": 0.6230921985815603, + "grad_norm": 57.40777587890625, + "learning_rate": 5e-05, + "loss": 1.3196, + "num_input_tokens_seen": 367263640, + "step": 5491 + }, + { + "epoch": 0.6230921985815603, + "loss": 1.2051188945770264, + "loss_ce": 0.007853157818317413, + "loss_iou": 0.51171875, + "loss_num": 0.034423828125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 367263640, + "step": 5491 + }, + { + "epoch": 0.6232056737588653, + "grad_norm": 30.214082717895508, + "learning_rate": 5e-05, + "loss": 1.0194, + "num_input_tokens_seen": 367330072, + "step": 5492 + }, + { + "epoch": 0.6232056737588653, + "loss": 0.977163553237915, + "loss_ce": 0.006704549305140972, + "loss_iou": 0.396484375, + "loss_num": 0.035400390625, + "loss_xval": 0.96875, + "num_input_tokens_seen": 367330072, + "step": 5492 + }, + { + "epoch": 0.6233191489361702, + "grad_norm": 36.715328216552734, + "learning_rate": 5e-05, + "loss": 1.2404, + "num_input_tokens_seen": 367396768, + "step": 5493 + }, + { + "epoch": 0.6233191489361702, + "loss": 1.2578909397125244, + "loss_ce": 0.005937783047556877, + "loss_iou": 0.5078125, + "loss_num": 0.0478515625, + "loss_xval": 1.25, + "num_input_tokens_seen": 367396768, + "step": 5493 + }, + { + "epoch": 0.6234326241134752, + "grad_norm": 27.33146858215332, + "learning_rate": 5e-05, + "loss": 1.0602, + "num_input_tokens_seen": 367463636, + "step": 5494 + }, + { + "epoch": 0.6234326241134752, + "loss": 1.129012107849121, + "loss_ce": 0.008162526413798332, + "loss_iou": 0.4375, + "loss_num": 0.049560546875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 367463636, + "step": 5494 + }, + { + "epoch": 0.6235460992907801, + "grad_norm": 20.27959632873535, + "learning_rate": 5e-05, + "loss": 1.3428, + "num_input_tokens_seen": 367530744, + "step": 5495 + }, + { + "epoch": 0.6235460992907801, + "loss": 1.4609538316726685, + "loss_ce": 0.005387364886701107, + "loss_iou": 0.546875, + "loss_num": 0.0712890625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 367530744, + "step": 5495 + }, + { + "epoch": 0.6236595744680851, + "grad_norm": 23.65703773498535, + "learning_rate": 5e-05, + "loss": 1.177, + "num_input_tokens_seen": 367598000, + "step": 5496 + }, + { + "epoch": 0.6236595744680851, + "loss": 1.2350879907608032, + "loss_ce": 0.007548989728093147, + "loss_iou": 0.51171875, + "loss_num": 0.04150390625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 367598000, + "step": 5496 + }, + { + "epoch": 0.62377304964539, + "grad_norm": 21.23586082458496, + "learning_rate": 5e-05, + "loss": 1.0698, + "num_input_tokens_seen": 367665360, + "step": 5497 + }, + { + "epoch": 0.62377304964539, + "loss": 1.1656228303909302, + "loss_ce": 0.004001753870397806, + "loss_iou": 0.4609375, + "loss_num": 0.04833984375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 367665360, + "step": 5497 + }, + { + "epoch": 0.623886524822695, + "grad_norm": 36.45185470581055, + "learning_rate": 5e-05, + "loss": 1.2241, + "num_input_tokens_seen": 367731744, + "step": 5498 + }, + { + "epoch": 0.623886524822695, + "loss": 1.2006080150604248, + "loss_ce": 0.00676040630787611, + "loss_iou": 0.486328125, + "loss_num": 0.044189453125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 367731744, + "step": 5498 + }, + { + "epoch": 0.624, + "grad_norm": 28.394020080566406, + "learning_rate": 5e-05, + "loss": 1.3767, + "num_input_tokens_seen": 367799136, + "step": 5499 + }, + { + "epoch": 0.624, + "loss": 1.4827930927276611, + "loss_ce": 0.007695349399000406, + "loss_iou": 0.62109375, + "loss_num": 0.046630859375, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 367799136, + "step": 5499 + }, + { + "epoch": 0.624113475177305, + "grad_norm": 16.622989654541016, + "learning_rate": 5e-05, + "loss": 1.3641, + "num_input_tokens_seen": 367865456, + "step": 5500 + }, + { + "epoch": 0.624113475177305, + "eval_seeclick_CIoU": 0.34997862577438354, + "eval_seeclick_GIoU": 0.3205792307853699, + "eval_seeclick_IoU": 0.44863902032375336, + "eval_seeclick_MAE_all": 0.17489873617887497, + "eval_seeclick_MAE_h": 0.0853498000651598, + "eval_seeclick_MAE_w": 0.16073502600193024, + "eval_seeclick_MAE_x_boxes": 0.2527330666780472, + "eval_seeclick_MAE_y_boxes": 0.14890677854418755, + "eval_seeclick_NUM_probability": 0.9999575614929199, + "eval_seeclick_inside_bbox": 0.612500011920929, + "eval_seeclick_loss": 2.567417621612549, + "eval_seeclick_loss_ce": 0.014671193435788155, + "eval_seeclick_loss_iou": 0.86065673828125, + "eval_seeclick_loss_num": 0.16872215270996094, + "eval_seeclick_loss_xval": 2.56298828125, + "eval_seeclick_runtime": 67.3448, + "eval_seeclick_samples_per_second": 0.698, + "eval_seeclick_steps_per_second": 0.03, + "num_input_tokens_seen": 367865456, + "step": 5500 + }, + { + "epoch": 0.624113475177305, + "eval_icons_CIoU": 0.48249298334121704, + "eval_icons_GIoU": 0.4778600037097931, + "eval_icons_IoU": 0.5295025706291199, + "eval_icons_MAE_all": 0.13443121686577797, + "eval_icons_MAE_h": 0.07235587202012539, + "eval_icons_MAE_w": 0.11206072941422462, + "eval_icons_MAE_x_boxes": 0.1406923420727253, + "eval_icons_MAE_y_boxes": 0.06694543920457363, + "eval_icons_NUM_probability": 0.9999722242355347, + "eval_icons_inside_bbox": 0.7326388955116272, + "eval_icons_loss": 2.3069090843200684, + "eval_icons_loss_ce": 3.162736538797617e-05, + "eval_icons_loss_iou": 0.81787109375, + "eval_icons_loss_num": 0.141693115234375, + "eval_icons_loss_xval": 2.34326171875, + "eval_icons_runtime": 68.6842, + "eval_icons_samples_per_second": 0.728, + "eval_icons_steps_per_second": 0.029, + "num_input_tokens_seen": 367865456, + "step": 5500 + }, + { + "epoch": 0.624113475177305, + "eval_screenspot_CIoU": 0.1898193061351776, + "eval_screenspot_GIoU": 0.15100708852211633, + "eval_screenspot_IoU": 0.316967248916626, + "eval_screenspot_MAE_all": 0.2282434950272242, + "eval_screenspot_MAE_h": 0.14615421493848166, + "eval_screenspot_MAE_w": 0.14729160318771997, + "eval_screenspot_MAE_x_boxes": 0.3853241602579753, + "eval_screenspot_MAE_y_boxes": 0.11745007832845052, + "eval_screenspot_NUM_probability": 0.9998834133148193, + "eval_screenspot_inside_bbox": 0.5200000007947286, + "eval_screenspot_loss": 3.1819534301757812, + "eval_screenspot_loss_ce": 0.012186208118995031, + "eval_screenspot_loss_iou": 1.0006510416666667, + "eval_screenspot_loss_num": 0.24509684244791666, + "eval_screenspot_loss_xval": 3.2265625, + "eval_screenspot_runtime": 117.7271, + "eval_screenspot_samples_per_second": 0.756, + "eval_screenspot_steps_per_second": 0.025, + "num_input_tokens_seen": 367865456, + "step": 5500 + }, + { + "epoch": 0.624113475177305, + "eval_compot_CIoU": 0.21379275619983673, + "eval_compot_GIoU": 0.1755223646759987, + "eval_compot_IoU": 0.32211096584796906, + "eval_compot_MAE_all": 0.23349326848983765, + "eval_compot_MAE_h": 0.11809191852807999, + "eval_compot_MAE_w": 0.2139819785952568, + "eval_compot_MAE_x_boxes": 0.2654552757740021, + "eval_compot_MAE_y_boxes": 0.1387285813689232, + "eval_compot_NUM_probability": 0.9997577369213104, + "eval_compot_inside_bbox": 0.4375, + "eval_compot_loss": 3.2535059452056885, + "eval_compot_loss_ce": 0.0035757344448938966, + "eval_compot_loss_iou": 1.031982421875, + "eval_compot_loss_num": 0.244171142578125, + "eval_compot_loss_xval": 3.28662109375, + "eval_compot_runtime": 79.202, + "eval_compot_samples_per_second": 0.631, + "eval_compot_steps_per_second": 0.025, + "num_input_tokens_seen": 367865456, + "step": 5500 + }, + { + "epoch": 0.624113475177305, + "loss": 3.1235427856445312, + "loss_ce": 0.0034254291094839573, + "loss_iou": 1.0078125, + "loss_num": 0.2197265625, + "loss_xval": 3.125, + "num_input_tokens_seen": 367865456, + "step": 5500 + }, + { + "epoch": 0.62422695035461, + "grad_norm": 14.789314270019531, + "learning_rate": 5e-05, + "loss": 1.2504, + "num_input_tokens_seen": 367931828, + "step": 5501 + }, + { + "epoch": 0.62422695035461, + "loss": 1.296309232711792, + "loss_ce": 0.00919987540692091, + "loss_iou": 0.498046875, + "loss_num": 0.058349609375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 367931828, + "step": 5501 + }, + { + "epoch": 0.6243404255319149, + "grad_norm": 41.1438102722168, + "learning_rate": 5e-05, + "loss": 1.137, + "num_input_tokens_seen": 367998708, + "step": 5502 + }, + { + "epoch": 0.6243404255319149, + "loss": 1.1391762495040894, + "loss_ce": 0.008072692900896072, + "loss_iou": 0.435546875, + "loss_num": 0.05224609375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 367998708, + "step": 5502 + }, + { + "epoch": 0.6244539007092199, + "grad_norm": 21.062965393066406, + "learning_rate": 5e-05, + "loss": 1.2948, + "num_input_tokens_seen": 368065648, + "step": 5503 + }, + { + "epoch": 0.6244539007092199, + "loss": 1.471343755722046, + "loss_ce": 0.005523421801626682, + "loss_iou": 0.578125, + "loss_num": 0.061767578125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 368065648, + "step": 5503 + }, + { + "epoch": 0.6245673758865248, + "grad_norm": 91.71260833740234, + "learning_rate": 5e-05, + "loss": 1.2927, + "num_input_tokens_seen": 368131912, + "step": 5504 + }, + { + "epoch": 0.6245673758865248, + "loss": 1.1891651153564453, + "loss_ce": 0.006547925993800163, + "loss_iou": 0.466796875, + "loss_num": 0.0498046875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 368131912, + "step": 5504 + }, + { + "epoch": 0.6246808510638298, + "grad_norm": 35.89158630371094, + "learning_rate": 5e-05, + "loss": 1.2921, + "num_input_tokens_seen": 368199344, + "step": 5505 + }, + { + "epoch": 0.6246808510638298, + "loss": 1.2740578651428223, + "loss_ce": 0.004038251005113125, + "loss_iou": 0.54296875, + "loss_num": 0.037353515625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 368199344, + "step": 5505 + }, + { + "epoch": 0.6247943262411347, + "grad_norm": 38.20760726928711, + "learning_rate": 5e-05, + "loss": 1.3318, + "num_input_tokens_seen": 368266164, + "step": 5506 + }, + { + "epoch": 0.6247943262411347, + "loss": 1.3227118253707886, + "loss_ce": 0.004352404735982418, + "loss_iou": 0.55078125, + "loss_num": 0.0439453125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 368266164, + "step": 5506 + }, + { + "epoch": 0.6249078014184397, + "grad_norm": 26.374662399291992, + "learning_rate": 5e-05, + "loss": 1.2914, + "num_input_tokens_seen": 368333284, + "step": 5507 + }, + { + "epoch": 0.6249078014184397, + "loss": 1.2939727306365967, + "loss_ce": 0.005886845290660858, + "loss_iou": 0.55078125, + "loss_num": 0.036865234375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 368333284, + "step": 5507 + }, + { + "epoch": 0.6250212765957447, + "grad_norm": 19.922334671020508, + "learning_rate": 5e-05, + "loss": 1.1084, + "num_input_tokens_seen": 368400964, + "step": 5508 + }, + { + "epoch": 0.6250212765957447, + "loss": 1.1724947690963745, + "loss_ce": 0.004526033997535706, + "loss_iou": 0.47265625, + "loss_num": 0.044921875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 368400964, + "step": 5508 + }, + { + "epoch": 0.6251347517730497, + "grad_norm": 23.650571823120117, + "learning_rate": 5e-05, + "loss": 1.3592, + "num_input_tokens_seen": 368469516, + "step": 5509 + }, + { + "epoch": 0.6251347517730497, + "loss": 1.2300028800964355, + "loss_ce": 0.010764592327177525, + "loss_iou": 0.51171875, + "loss_num": 0.038330078125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 368469516, + "step": 5509 + }, + { + "epoch": 0.6252482269503546, + "grad_norm": 30.046056747436523, + "learning_rate": 5e-05, + "loss": 1.3841, + "num_input_tokens_seen": 368536240, + "step": 5510 + }, + { + "epoch": 0.6252482269503546, + "loss": 1.4059075117111206, + "loss_ce": 0.004540387541055679, + "loss_iou": 0.53515625, + "loss_num": 0.06591796875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 368536240, + "step": 5510 + }, + { + "epoch": 0.6253617021276596, + "grad_norm": 55.44031524658203, + "learning_rate": 5e-05, + "loss": 1.3255, + "num_input_tokens_seen": 368603144, + "step": 5511 + }, + { + "epoch": 0.6253617021276596, + "loss": 1.2179065942764282, + "loss_ce": 0.005015967879444361, + "loss_iou": 0.52734375, + "loss_num": 0.03173828125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 368603144, + "step": 5511 + }, + { + "epoch": 0.6254751773049645, + "grad_norm": 28.13646125793457, + "learning_rate": 5e-05, + "loss": 1.1239, + "num_input_tokens_seen": 368670300, + "step": 5512 + }, + { + "epoch": 0.6254751773049645, + "loss": 1.1412949562072754, + "loss_ce": 0.006773441564291716, + "loss_iou": 0.419921875, + "loss_num": 0.058837890625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 368670300, + "step": 5512 + }, + { + "epoch": 0.6255886524822695, + "grad_norm": 24.5393123626709, + "learning_rate": 5e-05, + "loss": 1.3118, + "num_input_tokens_seen": 368736948, + "step": 5513 + }, + { + "epoch": 0.6255886524822695, + "loss": 1.241267204284668, + "loss_ce": 0.0039624907076358795, + "loss_iou": 0.48828125, + "loss_num": 0.0517578125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 368736948, + "step": 5513 + }, + { + "epoch": 0.6257021276595744, + "grad_norm": 21.074975967407227, + "learning_rate": 5e-05, + "loss": 1.2649, + "num_input_tokens_seen": 368803328, + "step": 5514 + }, + { + "epoch": 0.6257021276595744, + "loss": 1.2446134090423584, + "loss_ce": 0.004867222625762224, + "loss_iou": 0.53125, + "loss_num": 0.03515625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 368803328, + "step": 5514 + }, + { + "epoch": 0.6258156028368794, + "grad_norm": 25.32822036743164, + "learning_rate": 5e-05, + "loss": 1.2243, + "num_input_tokens_seen": 368869736, + "step": 5515 + }, + { + "epoch": 0.6258156028368794, + "loss": 1.2867088317871094, + "loss_ce": 0.00936507061123848, + "loss_iou": 0.53125, + "loss_num": 0.043212890625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 368869736, + "step": 5515 + }, + { + "epoch": 0.6259290780141844, + "grad_norm": 39.397064208984375, + "learning_rate": 5e-05, + "loss": 1.3398, + "num_input_tokens_seen": 368937008, + "step": 5516 + }, + { + "epoch": 0.6259290780141844, + "loss": 1.2347612380981445, + "loss_ce": 0.007222077809274197, + "loss_iou": 0.515625, + "loss_num": 0.039306640625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 368937008, + "step": 5516 + }, + { + "epoch": 0.6260425531914894, + "grad_norm": 53.135135650634766, + "learning_rate": 5e-05, + "loss": 1.2834, + "num_input_tokens_seen": 369004232, + "step": 5517 + }, + { + "epoch": 0.6260425531914894, + "loss": 1.4488252401351929, + "loss_ce": 0.007418968249112368, + "loss_iou": 0.6015625, + "loss_num": 0.047607421875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 369004232, + "step": 5517 + }, + { + "epoch": 0.6261560283687944, + "grad_norm": 21.599666595458984, + "learning_rate": 5e-05, + "loss": 1.0942, + "num_input_tokens_seen": 369070852, + "step": 5518 + }, + { + "epoch": 0.6261560283687944, + "loss": 1.0689901113510132, + "loss_ce": 0.007954999804496765, + "loss_iou": 0.435546875, + "loss_num": 0.037841796875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 369070852, + "step": 5518 + }, + { + "epoch": 0.6262695035460993, + "grad_norm": 20.920324325561523, + "learning_rate": 5e-05, + "loss": 1.0698, + "num_input_tokens_seen": 369137772, + "step": 5519 + }, + { + "epoch": 0.6262695035460993, + "loss": 0.9910410642623901, + "loss_ce": 0.005445410963147879, + "loss_iou": 0.412109375, + "loss_num": 0.032470703125, + "loss_xval": 0.984375, + "num_input_tokens_seen": 369137772, + "step": 5519 + }, + { + "epoch": 0.6263829787234042, + "grad_norm": 32.569637298583984, + "learning_rate": 5e-05, + "loss": 1.3236, + "num_input_tokens_seen": 369203948, + "step": 5520 + }, + { + "epoch": 0.6263829787234042, + "loss": 1.5043771266937256, + "loss_ce": 0.008283352479338646, + "loss_iou": 0.60546875, + "loss_num": 0.056640625, + "loss_xval": 1.5, + "num_input_tokens_seen": 369203948, + "step": 5520 + }, + { + "epoch": 0.6264964539007092, + "grad_norm": 27.33081817626953, + "learning_rate": 5e-05, + "loss": 1.3079, + "num_input_tokens_seen": 369271436, + "step": 5521 + }, + { + "epoch": 0.6264964539007092, + "loss": 1.285871982574463, + "loss_ce": 0.006941423285752535, + "loss_iou": 0.5078125, + "loss_num": 0.052001953125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 369271436, + "step": 5521 + }, + { + "epoch": 0.6266099290780142, + "grad_norm": 21.616897583007812, + "learning_rate": 5e-05, + "loss": 1.1108, + "num_input_tokens_seen": 369338456, + "step": 5522 + }, + { + "epoch": 0.6266099290780142, + "loss": 1.2216198444366455, + "loss_ce": 0.004334632307291031, + "loss_iou": 0.4921875, + "loss_num": 0.046630859375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 369338456, + "step": 5522 + }, + { + "epoch": 0.6267234042553191, + "grad_norm": 24.69090461730957, + "learning_rate": 5e-05, + "loss": 1.1568, + "num_input_tokens_seen": 369405624, + "step": 5523 + }, + { + "epoch": 0.6267234042553191, + "loss": 1.2017897367477417, + "loss_ce": 0.004035827703773975, + "loss_iou": 0.484375, + "loss_num": 0.045654296875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 369405624, + "step": 5523 + }, + { + "epoch": 0.6268368794326241, + "grad_norm": 33.640716552734375, + "learning_rate": 5e-05, + "loss": 1.1548, + "num_input_tokens_seen": 369472284, + "step": 5524 + }, + { + "epoch": 0.6268368794326241, + "loss": 1.2124090194702148, + "loss_ce": 0.009772224351763725, + "loss_iou": 0.486328125, + "loss_num": 0.04638671875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 369472284, + "step": 5524 + }, + { + "epoch": 0.6269503546099291, + "grad_norm": 24.141748428344727, + "learning_rate": 5e-05, + "loss": 1.3871, + "num_input_tokens_seen": 369539172, + "step": 5525 + }, + { + "epoch": 0.6269503546099291, + "loss": 1.4550654888153076, + "loss_ce": 0.0068233017809689045, + "loss_iou": 0.5625, + "loss_num": 0.0654296875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 369539172, + "step": 5525 + }, + { + "epoch": 0.6270638297872341, + "grad_norm": 82.68095397949219, + "learning_rate": 5e-05, + "loss": 1.3145, + "num_input_tokens_seen": 369605088, + "step": 5526 + }, + { + "epoch": 0.6270638297872341, + "loss": 1.3522818088531494, + "loss_ce": 0.008531739935278893, + "loss_iou": 0.546875, + "loss_num": 0.05029296875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 369605088, + "step": 5526 + }, + { + "epoch": 0.627177304964539, + "grad_norm": 23.29595184326172, + "learning_rate": 5e-05, + "loss": 0.9905, + "num_input_tokens_seen": 369671980, + "step": 5527 + }, + { + "epoch": 0.627177304964539, + "loss": 1.046661138534546, + "loss_ce": 0.006133706774562597, + "loss_iou": 0.4453125, + "loss_num": 0.030517578125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 369671980, + "step": 5527 + }, + { + "epoch": 0.6272907801418439, + "grad_norm": 31.395030975341797, + "learning_rate": 5e-05, + "loss": 1.1066, + "num_input_tokens_seen": 369739224, + "step": 5528 + }, + { + "epoch": 0.6272907801418439, + "loss": 1.0816292762756348, + "loss_ce": 0.0020393850281834602, + "loss_iou": 0.46875, + "loss_num": 0.028076171875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 369739224, + "step": 5528 + }, + { + "epoch": 0.6274042553191489, + "grad_norm": 115.2126693725586, + "learning_rate": 5e-05, + "loss": 1.1739, + "num_input_tokens_seen": 369805656, + "step": 5529 + }, + { + "epoch": 0.6274042553191489, + "loss": 1.1962908506393433, + "loss_ce": 0.0063493940979242325, + "loss_iou": 0.421875, + "loss_num": 0.06884765625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 369805656, + "step": 5529 + }, + { + "epoch": 0.6275177304964539, + "grad_norm": 41.03136444091797, + "learning_rate": 5e-05, + "loss": 1.2922, + "num_input_tokens_seen": 369872588, + "step": 5530 + }, + { + "epoch": 0.6275177304964539, + "loss": 1.2510969638824463, + "loss_ce": 0.003050051163882017, + "loss_iou": 0.498046875, + "loss_num": 0.0498046875, + "loss_xval": 1.25, + "num_input_tokens_seen": 369872588, + "step": 5530 + }, + { + "epoch": 0.6276312056737589, + "grad_norm": 54.76342010498047, + "learning_rate": 5e-05, + "loss": 1.2701, + "num_input_tokens_seen": 369938732, + "step": 5531 + }, + { + "epoch": 0.6276312056737589, + "loss": 1.2753970623016357, + "loss_ce": 0.026862001046538353, + "loss_iou": 0.490234375, + "loss_num": 0.0537109375, + "loss_xval": 1.25, + "num_input_tokens_seen": 369938732, + "step": 5531 + }, + { + "epoch": 0.6277446808510638, + "grad_norm": 20.975244522094727, + "learning_rate": 5e-05, + "loss": 1.352, + "num_input_tokens_seen": 370005516, + "step": 5532 + }, + { + "epoch": 0.6277446808510638, + "loss": 1.480180025100708, + "loss_ce": 0.008500298485159874, + "loss_iou": 0.61328125, + "loss_num": 0.048583984375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 370005516, + "step": 5532 + }, + { + "epoch": 0.6278581560283688, + "grad_norm": 24.344270706176758, + "learning_rate": 5e-05, + "loss": 1.2166, + "num_input_tokens_seen": 370071120, + "step": 5533 + }, + { + "epoch": 0.6278581560283688, + "loss": 1.2295786142349243, + "loss_ce": 0.006434065289795399, + "loss_iou": 0.474609375, + "loss_num": 0.054931640625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 370071120, + "step": 5533 + }, + { + "epoch": 0.6279716312056738, + "grad_norm": 33.08083724975586, + "learning_rate": 5e-05, + "loss": 1.1099, + "num_input_tokens_seen": 370138344, + "step": 5534 + }, + { + "epoch": 0.6279716312056738, + "loss": 1.0295743942260742, + "loss_ce": 0.006136854644864798, + "loss_iou": 0.4296875, + "loss_num": 0.032958984375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 370138344, + "step": 5534 + }, + { + "epoch": 0.6280851063829788, + "grad_norm": 37.042694091796875, + "learning_rate": 5e-05, + "loss": 1.4146, + "num_input_tokens_seen": 370206744, + "step": 5535 + }, + { + "epoch": 0.6280851063829788, + "loss": 1.536801815032959, + "loss_ce": 0.005551897920668125, + "loss_iou": 0.57421875, + "loss_num": 0.0771484375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 370206744, + "step": 5535 + }, + { + "epoch": 0.6281985815602837, + "grad_norm": 90.71072387695312, + "learning_rate": 5e-05, + "loss": 1.2796, + "num_input_tokens_seen": 370273892, + "step": 5536 + }, + { + "epoch": 0.6281985815602837, + "loss": 1.4339745044708252, + "loss_ce": 0.006240133661776781, + "loss_iou": 0.60546875, + "loss_num": 0.044189453125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 370273892, + "step": 5536 + }, + { + "epoch": 0.6283120567375886, + "grad_norm": 22.936967849731445, + "learning_rate": 5e-05, + "loss": 1.1166, + "num_input_tokens_seen": 370341072, + "step": 5537 + }, + { + "epoch": 0.6283120567375886, + "loss": 1.0430526733398438, + "loss_ce": 0.0056992280296981335, + "loss_iou": 0.41796875, + "loss_num": 0.040283203125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 370341072, + "step": 5537 + }, + { + "epoch": 0.6284255319148936, + "grad_norm": 21.6973876953125, + "learning_rate": 5e-05, + "loss": 1.1958, + "num_input_tokens_seen": 370407716, + "step": 5538 + }, + { + "epoch": 0.6284255319148936, + "loss": 1.324796199798584, + "loss_ce": 0.005460226908326149, + "loss_iou": 0.5234375, + "loss_num": 0.0546875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 370407716, + "step": 5538 + }, + { + "epoch": 0.6285390070921986, + "grad_norm": 19.99853515625, + "learning_rate": 5e-05, + "loss": 1.0947, + "num_input_tokens_seen": 370474964, + "step": 5539 + }, + { + "epoch": 0.6285390070921986, + "loss": 1.1081888675689697, + "loss_ce": 0.005161442793905735, + "loss_iou": 0.4296875, + "loss_num": 0.048828125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 370474964, + "step": 5539 + }, + { + "epoch": 0.6286524822695035, + "grad_norm": 24.826711654663086, + "learning_rate": 5e-05, + "loss": 0.9881, + "num_input_tokens_seen": 370542000, + "step": 5540 + }, + { + "epoch": 0.6286524822695035, + "loss": 1.1700814962387085, + "loss_ce": 0.004798304755240679, + "loss_iou": 0.484375, + "loss_num": 0.03955078125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 370542000, + "step": 5540 + }, + { + "epoch": 0.6287659574468085, + "grad_norm": 23.5643310546875, + "learning_rate": 5e-05, + "loss": 1.2377, + "num_input_tokens_seen": 370608980, + "step": 5541 + }, + { + "epoch": 0.6287659574468085, + "loss": 1.1986582279205322, + "loss_ce": 0.008716778829693794, + "loss_iou": 0.51171875, + "loss_num": 0.03369140625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 370608980, + "step": 5541 + }, + { + "epoch": 0.6288794326241135, + "grad_norm": 26.83608055114746, + "learning_rate": 5e-05, + "loss": 1.2511, + "num_input_tokens_seen": 370676468, + "step": 5542 + }, + { + "epoch": 0.6288794326241135, + "loss": 1.230172872543335, + "loss_ce": 0.007272388786077499, + "loss_iou": 0.48046875, + "loss_num": 0.0517578125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 370676468, + "step": 5542 + }, + { + "epoch": 0.6289929078014185, + "grad_norm": 34.84340286254883, + "learning_rate": 5e-05, + "loss": 1.4022, + "num_input_tokens_seen": 370743324, + "step": 5543 + }, + { + "epoch": 0.6289929078014185, + "loss": 1.263075590133667, + "loss_ce": 0.004774872213602066, + "loss_iou": 0.53125, + "loss_num": 0.038818359375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 370743324, + "step": 5543 + }, + { + "epoch": 0.6291063829787235, + "grad_norm": 35.409175872802734, + "learning_rate": 5e-05, + "loss": 1.1954, + "num_input_tokens_seen": 370810228, + "step": 5544 + }, + { + "epoch": 0.6291063829787235, + "loss": 1.0846459865570068, + "loss_ce": 0.005544386804103851, + "loss_iou": 0.40625, + "loss_num": 0.05322265625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 370810228, + "step": 5544 + }, + { + "epoch": 0.6292198581560283, + "grad_norm": 28.51036262512207, + "learning_rate": 5e-05, + "loss": 1.287, + "num_input_tokens_seen": 370876784, + "step": 5545 + }, + { + "epoch": 0.6292198581560283, + "loss": 1.1664514541625977, + "loss_ce": 0.004937133751809597, + "loss_iou": 0.470703125, + "loss_num": 0.044189453125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 370876784, + "step": 5545 + }, + { + "epoch": 0.6293333333333333, + "grad_norm": 33.55268478393555, + "learning_rate": 5e-05, + "loss": 1.0972, + "num_input_tokens_seen": 370943572, + "step": 5546 + }, + { + "epoch": 0.6293333333333333, + "loss": 1.0573163032531738, + "loss_ce": 0.004093732684850693, + "loss_iou": 0.44921875, + "loss_num": 0.031005859375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 370943572, + "step": 5546 + }, + { + "epoch": 0.6294468085106383, + "grad_norm": 29.381223678588867, + "learning_rate": 5e-05, + "loss": 1.1191, + "num_input_tokens_seen": 371011164, + "step": 5547 + }, + { + "epoch": 0.6294468085106383, + "loss": 0.9915059804916382, + "loss_ce": 0.0027364143170416355, + "loss_iou": 0.4296875, + "loss_num": 0.0262451171875, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 371011164, + "step": 5547 + }, + { + "epoch": 0.6295602836879433, + "grad_norm": 36.20018768310547, + "learning_rate": 5e-05, + "loss": 1.2048, + "num_input_tokens_seen": 371077956, + "step": 5548 + }, + { + "epoch": 0.6295602836879433, + "loss": 1.341025471687317, + "loss_ce": 0.008994219824671745, + "loss_iou": 0.53125, + "loss_num": 0.0537109375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 371077956, + "step": 5548 + }, + { + "epoch": 0.6296737588652482, + "grad_norm": 40.16315841674805, + "learning_rate": 5e-05, + "loss": 1.2469, + "num_input_tokens_seen": 371144704, + "step": 5549 + }, + { + "epoch": 0.6296737588652482, + "loss": 1.2982606887817383, + "loss_ce": 0.005291879177093506, + "loss_iou": 0.55859375, + "loss_num": 0.035888671875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 371144704, + "step": 5549 + }, + { + "epoch": 0.6297872340425532, + "grad_norm": 36.80409622192383, + "learning_rate": 5e-05, + "loss": 1.1648, + "num_input_tokens_seen": 371212508, + "step": 5550 + }, + { + "epoch": 0.6297872340425532, + "loss": 1.2145390510559082, + "loss_ce": 0.0036016039084643126, + "loss_iou": 0.4765625, + "loss_num": 0.051025390625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 371212508, + "step": 5550 + }, + { + "epoch": 0.6299007092198582, + "grad_norm": 24.6176700592041, + "learning_rate": 5e-05, + "loss": 1.3821, + "num_input_tokens_seen": 371278188, + "step": 5551 + }, + { + "epoch": 0.6299007092198582, + "loss": 1.355475664138794, + "loss_ce": 0.0063545117154717445, + "loss_iou": 0.57421875, + "loss_num": 0.039794921875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 371278188, + "step": 5551 + }, + { + "epoch": 0.6300141843971632, + "grad_norm": 18.856660842895508, + "learning_rate": 5e-05, + "loss": 1.1801, + "num_input_tokens_seen": 371344664, + "step": 5552 + }, + { + "epoch": 0.6300141843971632, + "loss": 1.1026995182037354, + "loss_ce": 0.0038226195611059666, + "loss_iou": 0.443359375, + "loss_num": 0.042724609375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 371344664, + "step": 5552 + }, + { + "epoch": 0.630127659574468, + "grad_norm": 22.533321380615234, + "learning_rate": 5e-05, + "loss": 1.1326, + "num_input_tokens_seen": 371412072, + "step": 5553 + }, + { + "epoch": 0.630127659574468, + "loss": 1.1650073528289795, + "loss_ce": 0.009245593100786209, + "loss_iou": 0.490234375, + "loss_num": 0.03515625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 371412072, + "step": 5553 + }, + { + "epoch": 0.630241134751773, + "grad_norm": 30.735057830810547, + "learning_rate": 5e-05, + "loss": 1.1777, + "num_input_tokens_seen": 371479804, + "step": 5554 + }, + { + "epoch": 0.630241134751773, + "loss": 1.1621439456939697, + "loss_ce": 0.006382239982485771, + "loss_iou": 0.466796875, + "loss_num": 0.0439453125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 371479804, + "step": 5554 + }, + { + "epoch": 0.630354609929078, + "grad_norm": 23.5407772064209, + "learning_rate": 5e-05, + "loss": 1.167, + "num_input_tokens_seen": 371546524, + "step": 5555 + }, + { + "epoch": 0.630354609929078, + "loss": 1.2353326082229614, + "loss_ce": 0.00730524118989706, + "loss_iou": 0.52734375, + "loss_num": 0.034423828125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 371546524, + "step": 5555 + }, + { + "epoch": 0.630468085106383, + "grad_norm": 19.69687843322754, + "learning_rate": 5e-05, + "loss": 1.08, + "num_input_tokens_seen": 371613156, + "step": 5556 + }, + { + "epoch": 0.630468085106383, + "loss": 1.1906651258468628, + "loss_ce": 0.007803780026733875, + "loss_iou": 0.42578125, + "loss_num": 0.06689453125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 371613156, + "step": 5556 + }, + { + "epoch": 0.630581560283688, + "grad_norm": 16.37898063659668, + "learning_rate": 5e-05, + "loss": 1.1957, + "num_input_tokens_seen": 371680980, + "step": 5557 + }, + { + "epoch": 0.630581560283688, + "loss": 1.1810529232025146, + "loss_ce": 0.004783276468515396, + "loss_iou": 0.50390625, + "loss_num": 0.032958984375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 371680980, + "step": 5557 + }, + { + "epoch": 0.6306950354609929, + "grad_norm": 36.008975982666016, + "learning_rate": 5e-05, + "loss": 1.1473, + "num_input_tokens_seen": 371748764, + "step": 5558 + }, + { + "epoch": 0.6306950354609929, + "loss": 1.1713154315948486, + "loss_ce": 0.005055700428783894, + "loss_iou": 0.435546875, + "loss_num": 0.058837890625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 371748764, + "step": 5558 + }, + { + "epoch": 0.6308085106382979, + "grad_norm": 31.000083923339844, + "learning_rate": 5e-05, + "loss": 1.4691, + "num_input_tokens_seen": 371816188, + "step": 5559 + }, + { + "epoch": 0.6308085106382979, + "loss": 1.4552181959152222, + "loss_ce": 0.007952533662319183, + "loss_iou": 0.6328125, + "loss_num": 0.036865234375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 371816188, + "step": 5559 + }, + { + "epoch": 0.6309219858156029, + "grad_norm": 51.085350036621094, + "learning_rate": 5e-05, + "loss": 1.2156, + "num_input_tokens_seen": 371882484, + "step": 5560 + }, + { + "epoch": 0.6309219858156029, + "loss": 1.0259021520614624, + "loss_ce": 0.003929451107978821, + "loss_iou": 0.4140625, + "loss_num": 0.03857421875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 371882484, + "step": 5560 + }, + { + "epoch": 0.6310354609929077, + "grad_norm": 24.94021224975586, + "learning_rate": 5e-05, + "loss": 1.1607, + "num_input_tokens_seen": 371949928, + "step": 5561 + }, + { + "epoch": 0.6310354609929077, + "loss": 1.160111427307129, + "loss_ce": 0.006302841007709503, + "loss_iou": 0.453125, + "loss_num": 0.04931640625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 371949928, + "step": 5561 + }, + { + "epoch": 0.6311489361702127, + "grad_norm": 24.796123504638672, + "learning_rate": 5e-05, + "loss": 0.9604, + "num_input_tokens_seen": 372016788, + "step": 5562 + }, + { + "epoch": 0.6311489361702127, + "loss": 1.0231249332427979, + "loss_ce": 0.008476462215185165, + "loss_iou": 0.431640625, + "loss_num": 0.0302734375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 372016788, + "step": 5562 + }, + { + "epoch": 0.6312624113475177, + "grad_norm": 32.337074279785156, + "learning_rate": 5e-05, + "loss": 1.1743, + "num_input_tokens_seen": 372082736, + "step": 5563 + }, + { + "epoch": 0.6312624113475177, + "loss": 1.3753557205200195, + "loss_ce": 0.010121412575244904, + "loss_iou": 0.5625, + "loss_num": 0.048583984375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 372082736, + "step": 5563 + }, + { + "epoch": 0.6313758865248227, + "grad_norm": 23.541410446166992, + "learning_rate": 5e-05, + "loss": 1.0683, + "num_input_tokens_seen": 372148592, + "step": 5564 + }, + { + "epoch": 0.6313758865248227, + "loss": 1.1171185970306396, + "loss_ce": 0.00969670619815588, + "loss_iou": 0.421875, + "loss_num": 0.052734375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 372148592, + "step": 5564 + }, + { + "epoch": 0.6314893617021277, + "grad_norm": 19.829065322875977, + "learning_rate": 5e-05, + "loss": 1.1757, + "num_input_tokens_seen": 372216032, + "step": 5565 + }, + { + "epoch": 0.6314893617021277, + "loss": 1.2926968336105347, + "loss_ce": 0.006564065348356962, + "loss_iou": 0.53515625, + "loss_num": 0.043701171875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 372216032, + "step": 5565 + }, + { + "epoch": 0.6316028368794326, + "grad_norm": 46.25163269042969, + "learning_rate": 5e-05, + "loss": 1.1955, + "num_input_tokens_seen": 372282952, + "step": 5566 + }, + { + "epoch": 0.6316028368794326, + "loss": 1.2278921604156494, + "loss_ce": 0.0067006549797952175, + "loss_iou": 0.5078125, + "loss_num": 0.041015625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 372282952, + "step": 5566 + }, + { + "epoch": 0.6317163120567376, + "grad_norm": 35.842750549316406, + "learning_rate": 5e-05, + "loss": 1.2729, + "num_input_tokens_seen": 372350152, + "step": 5567 + }, + { + "epoch": 0.6317163120567376, + "loss": 1.1811124086380005, + "loss_ce": 0.009237382560968399, + "loss_iou": 0.50390625, + "loss_num": 0.031982421875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 372350152, + "step": 5567 + }, + { + "epoch": 0.6318297872340426, + "grad_norm": 26.747465133666992, + "learning_rate": 5e-05, + "loss": 1.2795, + "num_input_tokens_seen": 372416764, + "step": 5568 + }, + { + "epoch": 0.6318297872340426, + "loss": 1.3240147829055786, + "loss_ce": 0.008585140109062195, + "loss_iou": 0.53515625, + "loss_num": 0.04833984375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 372416764, + "step": 5568 + }, + { + "epoch": 0.6319432624113476, + "grad_norm": 21.838520050048828, + "learning_rate": 5e-05, + "loss": 1.2075, + "num_input_tokens_seen": 372482928, + "step": 5569 + }, + { + "epoch": 0.6319432624113476, + "loss": 1.3840864896774292, + "loss_ce": 0.008598211221396923, + "loss_iou": 0.546875, + "loss_num": 0.055908203125, + "loss_xval": 1.375, + "num_input_tokens_seen": 372482928, + "step": 5569 + }, + { + "epoch": 0.6320567375886524, + "grad_norm": 36.04517364501953, + "learning_rate": 5e-05, + "loss": 1.2144, + "num_input_tokens_seen": 372549888, + "step": 5570 + }, + { + "epoch": 0.6320567375886524, + "loss": 1.156994342803955, + "loss_ce": 0.008068662136793137, + "loss_iou": 0.435546875, + "loss_num": 0.055419921875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 372549888, + "step": 5570 + }, + { + "epoch": 0.6321702127659574, + "grad_norm": 29.069101333618164, + "learning_rate": 5e-05, + "loss": 1.3641, + "num_input_tokens_seen": 372616928, + "step": 5571 + }, + { + "epoch": 0.6321702127659574, + "loss": 1.2917218208312988, + "loss_ce": 0.006077256053686142, + "loss_iou": 0.55859375, + "loss_num": 0.033935546875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 372616928, + "step": 5571 + }, + { + "epoch": 0.6322836879432624, + "grad_norm": 23.359838485717773, + "learning_rate": 5e-05, + "loss": 1.0839, + "num_input_tokens_seen": 372684636, + "step": 5572 + }, + { + "epoch": 0.6322836879432624, + "loss": 1.130967378616333, + "loss_ce": 0.008866480551660061, + "loss_iou": 0.482421875, + "loss_num": 0.03125, + "loss_xval": 1.125, + "num_input_tokens_seen": 372684636, + "step": 5572 + }, + { + "epoch": 0.6323971631205674, + "grad_norm": 24.638765335083008, + "learning_rate": 5e-05, + "loss": 1.0128, + "num_input_tokens_seen": 372751448, + "step": 5573 + }, + { + "epoch": 0.6323971631205674, + "loss": 0.9672664403915405, + "loss_ce": 0.004436873365193605, + "loss_iou": 0.40234375, + "loss_num": 0.0308837890625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 372751448, + "step": 5573 + }, + { + "epoch": 0.6325106382978724, + "grad_norm": 31.011953353881836, + "learning_rate": 5e-05, + "loss": 1.2297, + "num_input_tokens_seen": 372818424, + "step": 5574 + }, + { + "epoch": 0.6325106382978724, + "loss": 1.0968201160430908, + "loss_ce": 0.006488062907010317, + "loss_iou": 0.470703125, + "loss_num": 0.02978515625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 372818424, + "step": 5574 + }, + { + "epoch": 0.6326241134751773, + "grad_norm": 28.509403228759766, + "learning_rate": 5e-05, + "loss": 1.4622, + "num_input_tokens_seen": 372885776, + "step": 5575 + }, + { + "epoch": 0.6326241134751773, + "loss": 1.3875049352645874, + "loss_ce": 0.006157273426651955, + "loss_iou": 0.515625, + "loss_num": 0.0693359375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 372885776, + "step": 5575 + }, + { + "epoch": 0.6327375886524823, + "grad_norm": 21.609601974487305, + "learning_rate": 5e-05, + "loss": 1.0736, + "num_input_tokens_seen": 372952840, + "step": 5576 + }, + { + "epoch": 0.6327375886524823, + "loss": 1.2580859661102295, + "loss_ce": 0.005644608289003372, + "loss_iou": 0.53125, + "loss_num": 0.038818359375, + "loss_xval": 1.25, + "num_input_tokens_seen": 372952840, + "step": 5576 + }, + { + "epoch": 0.6328510638297873, + "grad_norm": 19.446693420410156, + "learning_rate": 5e-05, + "loss": 1.0592, + "num_input_tokens_seen": 373019724, + "step": 5577 + }, + { + "epoch": 0.6328510638297873, + "loss": 0.9981842041015625, + "loss_ce": 0.0033111386001110077, + "loss_iou": 0.423828125, + "loss_num": 0.0294189453125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 373019724, + "step": 5577 + }, + { + "epoch": 0.6329645390070922, + "grad_norm": 21.880300521850586, + "learning_rate": 5e-05, + "loss": 1.1829, + "num_input_tokens_seen": 373086636, + "step": 5578 + }, + { + "epoch": 0.6329645390070922, + "loss": 0.9893501996994019, + "loss_ce": 0.007721759378910065, + "loss_iou": 0.4140625, + "loss_num": 0.0311279296875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 373086636, + "step": 5578 + }, + { + "epoch": 0.6330780141843971, + "grad_norm": 39.661293029785156, + "learning_rate": 5e-05, + "loss": 1.1212, + "num_input_tokens_seen": 373152104, + "step": 5579 + }, + { + "epoch": 0.6330780141843971, + "loss": 1.1950234174728394, + "loss_ce": 0.002884750720113516, + "loss_iou": 0.49609375, + "loss_num": 0.040283203125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 373152104, + "step": 5579 + }, + { + "epoch": 0.6331914893617021, + "grad_norm": 22.228832244873047, + "learning_rate": 5e-05, + "loss": 1.3113, + "num_input_tokens_seen": 373220412, + "step": 5580 + }, + { + "epoch": 0.6331914893617021, + "loss": 1.2344064712524414, + "loss_ce": 0.009308811277151108, + "loss_iou": 0.54296875, + "loss_num": 0.0283203125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 373220412, + "step": 5580 + }, + { + "epoch": 0.6333049645390071, + "grad_norm": 14.340449333190918, + "learning_rate": 5e-05, + "loss": 1.0792, + "num_input_tokens_seen": 373287500, + "step": 5581 + }, + { + "epoch": 0.6333049645390071, + "loss": 1.151975393295288, + "loss_ce": 0.007444102317094803, + "loss_iou": 0.431640625, + "loss_num": 0.056396484375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 373287500, + "step": 5581 + }, + { + "epoch": 0.6334184397163121, + "grad_norm": 18.818723678588867, + "learning_rate": 5e-05, + "loss": 1.1166, + "num_input_tokens_seen": 373353720, + "step": 5582 + }, + { + "epoch": 0.6334184397163121, + "loss": 1.2547802925109863, + "loss_ce": 0.003803790546953678, + "loss_iou": 0.484375, + "loss_num": 0.05615234375, + "loss_xval": 1.25, + "num_input_tokens_seen": 373353720, + "step": 5582 + }, + { + "epoch": 0.633531914893617, + "grad_norm": 34.78107833862305, + "learning_rate": 5e-05, + "loss": 1.2334, + "num_input_tokens_seen": 373420408, + "step": 5583 + }, + { + "epoch": 0.633531914893617, + "loss": 1.177398443222046, + "loss_ce": 0.006500108167529106, + "loss_iou": 0.54296875, + "loss_num": 0.016845703125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 373420408, + "step": 5583 + }, + { + "epoch": 0.633645390070922, + "grad_norm": 35.63119125366211, + "learning_rate": 5e-05, + "loss": 1.2569, + "num_input_tokens_seen": 373487848, + "step": 5584 + }, + { + "epoch": 0.633645390070922, + "loss": 1.1988582611083984, + "loss_ce": 0.0035458109341561794, + "loss_iou": 0.5390625, + "loss_num": 0.0240478515625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 373487848, + "step": 5584 + }, + { + "epoch": 0.633758865248227, + "grad_norm": 25.067697525024414, + "learning_rate": 5e-05, + "loss": 1.0839, + "num_input_tokens_seen": 373553900, + "step": 5585 + }, + { + "epoch": 0.633758865248227, + "loss": 1.1778963804244995, + "loss_ce": 0.0038240542635321617, + "loss_iou": 0.48046875, + "loss_num": 0.042724609375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 373553900, + "step": 5585 + }, + { + "epoch": 0.6338723404255319, + "grad_norm": 22.410432815551758, + "learning_rate": 5e-05, + "loss": 1.2996, + "num_input_tokens_seen": 373621508, + "step": 5586 + }, + { + "epoch": 0.6338723404255319, + "loss": 1.4614219665527344, + "loss_ce": 0.007808722089976072, + "loss_iou": 0.55859375, + "loss_num": 0.06689453125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 373621508, + "step": 5586 + }, + { + "epoch": 0.6339858156028368, + "grad_norm": 26.41457748413086, + "learning_rate": 5e-05, + "loss": 1.1935, + "num_input_tokens_seen": 373688456, + "step": 5587 + }, + { + "epoch": 0.6339858156028368, + "loss": 1.1022562980651855, + "loss_ce": 0.00508831487968564, + "loss_iou": 0.453125, + "loss_num": 0.03857421875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 373688456, + "step": 5587 + }, + { + "epoch": 0.6340992907801418, + "grad_norm": 30.730436325073242, + "learning_rate": 5e-05, + "loss": 1.2437, + "num_input_tokens_seen": 373754452, + "step": 5588 + }, + { + "epoch": 0.6340992907801418, + "loss": 1.0961164236068726, + "loss_ce": 0.004807847552001476, + "loss_iou": 0.44921875, + "loss_num": 0.03857421875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 373754452, + "step": 5588 + }, + { + "epoch": 0.6342127659574468, + "grad_norm": 39.43853759765625, + "learning_rate": 5e-05, + "loss": 1.3906, + "num_input_tokens_seen": 373821512, + "step": 5589 + }, + { + "epoch": 0.6342127659574468, + "loss": 1.397684097290039, + "loss_ce": 0.008523955009877682, + "loss_iou": 0.5625, + "loss_num": 0.053466796875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 373821512, + "step": 5589 + }, + { + "epoch": 0.6343262411347518, + "grad_norm": 24.899381637573242, + "learning_rate": 5e-05, + "loss": 1.3651, + "num_input_tokens_seen": 373888572, + "step": 5590 + }, + { + "epoch": 0.6343262411347518, + "loss": 1.3304815292358398, + "loss_ce": 0.004798022098839283, + "loss_iou": 0.55078125, + "loss_num": 0.04443359375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 373888572, + "step": 5590 + }, + { + "epoch": 0.6344397163120568, + "grad_norm": 26.187211990356445, + "learning_rate": 5e-05, + "loss": 1.2109, + "num_input_tokens_seen": 373956308, + "step": 5591 + }, + { + "epoch": 0.6344397163120568, + "loss": 1.2585623264312744, + "loss_ce": 0.0070975422859191895, + "loss_iou": 0.53125, + "loss_num": 0.037841796875, + "loss_xval": 1.25, + "num_input_tokens_seen": 373956308, + "step": 5591 + }, + { + "epoch": 0.6345531914893617, + "grad_norm": 24.5286865234375, + "learning_rate": 5e-05, + "loss": 1.1854, + "num_input_tokens_seen": 374022724, + "step": 5592 + }, + { + "epoch": 0.6345531914893617, + "loss": 1.1319098472595215, + "loss_ce": 0.011670563369989395, + "loss_iou": 0.443359375, + "loss_num": 0.04638671875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 374022724, + "step": 5592 + }, + { + "epoch": 0.6346666666666667, + "grad_norm": 37.0821647644043, + "learning_rate": 5e-05, + "loss": 1.1549, + "num_input_tokens_seen": 374089284, + "step": 5593 + }, + { + "epoch": 0.6346666666666667, + "loss": 1.0339634418487549, + "loss_ce": 0.004971715621650219, + "loss_iou": 0.4140625, + "loss_num": 0.0400390625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 374089284, + "step": 5593 + }, + { + "epoch": 0.6347801418439716, + "grad_norm": 32.24293518066406, + "learning_rate": 5e-05, + "loss": 1.2258, + "num_input_tokens_seen": 374156592, + "step": 5594 + }, + { + "epoch": 0.6347801418439716, + "loss": 1.2545651197433472, + "loss_ce": 0.007494817487895489, + "loss_iou": 0.498046875, + "loss_num": 0.05029296875, + "loss_xval": 1.25, + "num_input_tokens_seen": 374156592, + "step": 5594 + }, + { + "epoch": 0.6348936170212766, + "grad_norm": 40.54066848754883, + "learning_rate": 5e-05, + "loss": 1.5518, + "num_input_tokens_seen": 374223528, + "step": 5595 + }, + { + "epoch": 0.6348936170212766, + "loss": 1.641107439994812, + "loss_ce": 0.009271511808037758, + "loss_iou": 0.6484375, + "loss_num": 0.0673828125, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 374223528, + "step": 5595 + }, + { + "epoch": 0.6350070921985815, + "grad_norm": 35.283145904541016, + "learning_rate": 5e-05, + "loss": 1.14, + "num_input_tokens_seen": 374290720, + "step": 5596 + }, + { + "epoch": 0.6350070921985815, + "loss": 1.3373236656188965, + "loss_ce": 0.008222028613090515, + "loss_iou": 0.5234375, + "loss_num": 0.056396484375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 374290720, + "step": 5596 + }, + { + "epoch": 0.6351205673758865, + "grad_norm": 29.767810821533203, + "learning_rate": 5e-05, + "loss": 1.1756, + "num_input_tokens_seen": 374358552, + "step": 5597 + }, + { + "epoch": 0.6351205673758865, + "loss": 0.9794971346855164, + "loss_ce": 0.006352645345032215, + "loss_iou": 0.439453125, + "loss_num": 0.0189208984375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 374358552, + "step": 5597 + }, + { + "epoch": 0.6352340425531915, + "grad_norm": 22.422298431396484, + "learning_rate": 5e-05, + "loss": 1.0196, + "num_input_tokens_seen": 374424644, + "step": 5598 + }, + { + "epoch": 0.6352340425531915, + "loss": 1.0034762620925903, + "loss_ce": 0.004696958232671022, + "loss_iou": 0.4140625, + "loss_num": 0.033935546875, + "loss_xval": 1.0, + "num_input_tokens_seen": 374424644, + "step": 5598 + }, + { + "epoch": 0.6353475177304965, + "grad_norm": 32.73448181152344, + "learning_rate": 5e-05, + "loss": 1.0633, + "num_input_tokens_seen": 374491432, + "step": 5599 + }, + { + "epoch": 0.6353475177304965, + "loss": 1.0587959289550781, + "loss_ce": 0.006061547435820103, + "loss_iou": 0.4453125, + "loss_num": 0.03271484375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 374491432, + "step": 5599 + }, + { + "epoch": 0.6354609929078014, + "grad_norm": 51.00859069824219, + "learning_rate": 5e-05, + "loss": 1.1831, + "num_input_tokens_seen": 374558032, + "step": 5600 + }, + { + "epoch": 0.6354609929078014, + "loss": 1.1009514331817627, + "loss_ce": 0.007689756341278553, + "loss_iou": 0.45703125, + "loss_num": 0.035888671875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 374558032, + "step": 5600 + }, + { + "epoch": 0.6355744680851064, + "grad_norm": 24.259592056274414, + "learning_rate": 5e-05, + "loss": 1.2975, + "num_input_tokens_seen": 374623800, + "step": 5601 + }, + { + "epoch": 0.6355744680851064, + "loss": 1.3653067350387573, + "loss_ce": 0.003978567197918892, + "loss_iou": 0.58203125, + "loss_num": 0.039794921875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 374623800, + "step": 5601 + }, + { + "epoch": 0.6356879432624113, + "grad_norm": 24.209402084350586, + "learning_rate": 5e-05, + "loss": 1.1284, + "num_input_tokens_seen": 374690936, + "step": 5602 + }, + { + "epoch": 0.6356879432624113, + "loss": 1.135524868965149, + "loss_ce": 0.005153800826519728, + "loss_iou": 0.453125, + "loss_num": 0.044677734375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 374690936, + "step": 5602 + }, + { + "epoch": 0.6358014184397163, + "grad_norm": 26.576011657714844, + "learning_rate": 5e-05, + "loss": 1.1597, + "num_input_tokens_seen": 374758032, + "step": 5603 + }, + { + "epoch": 0.6358014184397163, + "loss": 1.2398371696472168, + "loss_ce": 0.005462197586894035, + "loss_iou": 0.5234375, + "loss_num": 0.037841796875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 374758032, + "step": 5603 + }, + { + "epoch": 0.6359148936170212, + "grad_norm": 29.807086944580078, + "learning_rate": 5e-05, + "loss": 1.0965, + "num_input_tokens_seen": 374824500, + "step": 5604 + }, + { + "epoch": 0.6359148936170212, + "loss": 1.004364252090454, + "loss_ce": 0.007538001984357834, + "loss_iou": 0.412109375, + "loss_num": 0.034423828125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 374824500, + "step": 5604 + }, + { + "epoch": 0.6360283687943262, + "grad_norm": 31.45743179321289, + "learning_rate": 5e-05, + "loss": 1.0731, + "num_input_tokens_seen": 374891856, + "step": 5605 + }, + { + "epoch": 0.6360283687943262, + "loss": 1.1367114782333374, + "loss_ce": 0.009025927633047104, + "loss_iou": 0.4765625, + "loss_num": 0.034912109375, + "loss_xval": 1.125, + "num_input_tokens_seen": 374891856, + "step": 5605 + }, + { + "epoch": 0.6361418439716312, + "grad_norm": 64.44062805175781, + "learning_rate": 5e-05, + "loss": 1.18, + "num_input_tokens_seen": 374957116, + "step": 5606 + }, + { + "epoch": 0.6361418439716312, + "loss": 1.3598432540893555, + "loss_ce": 0.0038862982764840126, + "loss_iou": 0.53125, + "loss_num": 0.05810546875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 374957116, + "step": 5606 + }, + { + "epoch": 0.6362553191489362, + "grad_norm": 27.445144653320312, + "learning_rate": 5e-05, + "loss": 1.3316, + "num_input_tokens_seen": 375023712, + "step": 5607 + }, + { + "epoch": 0.6362553191489362, + "loss": 1.4270297288894653, + "loss_ce": 0.010037560015916824, + "loss_iou": 0.58203125, + "loss_num": 0.0498046875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 375023712, + "step": 5607 + }, + { + "epoch": 0.6363687943262412, + "grad_norm": 37.276222229003906, + "learning_rate": 5e-05, + "loss": 1.3468, + "num_input_tokens_seen": 375090052, + "step": 5608 + }, + { + "epoch": 0.6363687943262412, + "loss": 1.3602499961853027, + "loss_ce": 0.007711012847721577, + "loss_iou": 0.5390625, + "loss_num": 0.0556640625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 375090052, + "step": 5608 + }, + { + "epoch": 0.6364822695035461, + "grad_norm": 25.401565551757812, + "learning_rate": 5e-05, + "loss": 1.2819, + "num_input_tokens_seen": 375156816, + "step": 5609 + }, + { + "epoch": 0.6364822695035461, + "loss": 1.2855538129806519, + "loss_ce": 0.0041206711903214455, + "loss_iou": 0.48046875, + "loss_num": 0.06396484375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 375156816, + "step": 5609 + }, + { + "epoch": 0.6365957446808511, + "grad_norm": 14.805289268493652, + "learning_rate": 5e-05, + "loss": 1.1725, + "num_input_tokens_seen": 375222680, + "step": 5610 + }, + { + "epoch": 0.6365957446808511, + "loss": 0.9760659337043762, + "loss_ce": 0.0068276203237473965, + "loss_iou": 0.40625, + "loss_num": 0.031494140625, + "loss_xval": 0.96875, + "num_input_tokens_seen": 375222680, + "step": 5610 + }, + { + "epoch": 0.636709219858156, + "grad_norm": 31.84251594543457, + "learning_rate": 5e-05, + "loss": 1.1869, + "num_input_tokens_seen": 375289036, + "step": 5611 + }, + { + "epoch": 0.636709219858156, + "loss": 1.0332666635513306, + "loss_ce": 0.008303293026983738, + "loss_iou": 0.41796875, + "loss_num": 0.037841796875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 375289036, + "step": 5611 + }, + { + "epoch": 0.636822695035461, + "grad_norm": 19.85338020324707, + "learning_rate": 5e-05, + "loss": 1.2174, + "num_input_tokens_seen": 375355944, + "step": 5612 + }, + { + "epoch": 0.636822695035461, + "loss": 1.0337152481079102, + "loss_ce": 0.005913813132792711, + "loss_iou": 0.423828125, + "loss_num": 0.036376953125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 375355944, + "step": 5612 + }, + { + "epoch": 0.6369361702127659, + "grad_norm": 22.561729431152344, + "learning_rate": 5e-05, + "loss": 1.168, + "num_input_tokens_seen": 375422176, + "step": 5613 + }, + { + "epoch": 0.6369361702127659, + "loss": 1.1856590509414673, + "loss_ce": 0.004994992166757584, + "loss_iou": 0.47265625, + "loss_num": 0.046875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 375422176, + "step": 5613 + }, + { + "epoch": 0.6370496453900709, + "grad_norm": 18.110017776489258, + "learning_rate": 5e-05, + "loss": 1.0961, + "num_input_tokens_seen": 375488612, + "step": 5614 + }, + { + "epoch": 0.6370496453900709, + "loss": 1.1331226825714111, + "loss_ce": 0.007146146614104509, + "loss_iou": 0.486328125, + "loss_num": 0.0303955078125, + "loss_xval": 1.125, + "num_input_tokens_seen": 375488612, + "step": 5614 + }, + { + "epoch": 0.6371631205673759, + "grad_norm": 21.036468505859375, + "learning_rate": 5e-05, + "loss": 1.2867, + "num_input_tokens_seen": 375555536, + "step": 5615 + }, + { + "epoch": 0.6371631205673759, + "loss": 0.9758687019348145, + "loss_ce": 0.00614209845662117, + "loss_iou": 0.400390625, + "loss_num": 0.03369140625, + "loss_xval": 0.96875, + "num_input_tokens_seen": 375555536, + "step": 5615 + }, + { + "epoch": 0.6372765957446809, + "grad_norm": 23.453330993652344, + "learning_rate": 5e-05, + "loss": 1.3316, + "num_input_tokens_seen": 375623336, + "step": 5616 + }, + { + "epoch": 0.6372765957446809, + "loss": 1.3254945278167725, + "loss_ce": 0.008111648261547089, + "loss_iou": 0.51953125, + "loss_num": 0.0556640625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 375623336, + "step": 5616 + }, + { + "epoch": 0.6373900709219859, + "grad_norm": 25.15416145324707, + "learning_rate": 5e-05, + "loss": 1.1977, + "num_input_tokens_seen": 375690056, + "step": 5617 + }, + { + "epoch": 0.6373900709219859, + "loss": 1.1993634700775146, + "loss_ce": 0.007957326248288155, + "loss_iou": 0.5078125, + "loss_num": 0.03564453125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 375690056, + "step": 5617 + }, + { + "epoch": 0.6375035460992908, + "grad_norm": 17.572813034057617, + "learning_rate": 5e-05, + "loss": 1.1974, + "num_input_tokens_seen": 375756540, + "step": 5618 + }, + { + "epoch": 0.6375035460992908, + "loss": 1.1556785106658936, + "loss_ce": 0.006874807178974152, + "loss_iou": 0.484375, + "loss_num": 0.035888671875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 375756540, + "step": 5618 + }, + { + "epoch": 0.6376170212765957, + "grad_norm": 117.183837890625, + "learning_rate": 5e-05, + "loss": 1.1112, + "num_input_tokens_seen": 375823704, + "step": 5619 + }, + { + "epoch": 0.6376170212765957, + "loss": 1.0611121654510498, + "loss_ce": 0.0069129979237914085, + "loss_iou": 0.453125, + "loss_num": 0.029296875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 375823704, + "step": 5619 + }, + { + "epoch": 0.6377304964539007, + "grad_norm": 30.8128719329834, + "learning_rate": 5e-05, + "loss": 1.3242, + "num_input_tokens_seen": 375889392, + "step": 5620 + }, + { + "epoch": 0.6377304964539007, + "loss": 1.273424744606018, + "loss_ce": 0.011950138956308365, + "loss_iou": 0.5078125, + "loss_num": 0.0498046875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 375889392, + "step": 5620 + }, + { + "epoch": 0.6378439716312057, + "grad_norm": 17.351728439331055, + "learning_rate": 5e-05, + "loss": 1.0814, + "num_input_tokens_seen": 375955876, + "step": 5621 + }, + { + "epoch": 0.6378439716312057, + "loss": 1.0549931526184082, + "loss_ce": 0.00787409208714962, + "loss_iou": 0.427734375, + "loss_num": 0.038330078125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 375955876, + "step": 5621 + }, + { + "epoch": 0.6379574468085106, + "grad_norm": 32.280128479003906, + "learning_rate": 5e-05, + "loss": 1.2273, + "num_input_tokens_seen": 376021844, + "step": 5622 + }, + { + "epoch": 0.6379574468085106, + "loss": 1.266413688659668, + "loss_ce": 0.00542729115113616, + "loss_iou": 0.51171875, + "loss_num": 0.048095703125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 376021844, + "step": 5622 + }, + { + "epoch": 0.6380709219858156, + "grad_norm": 32.774356842041016, + "learning_rate": 5e-05, + "loss": 1.286, + "num_input_tokens_seen": 376089724, + "step": 5623 + }, + { + "epoch": 0.6380709219858156, + "loss": 1.3663091659545898, + "loss_ce": 0.008887204341590405, + "loss_iou": 0.55859375, + "loss_num": 0.04833984375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 376089724, + "step": 5623 + }, + { + "epoch": 0.6381843971631206, + "grad_norm": 45.10712432861328, + "learning_rate": 5e-05, + "loss": 1.2767, + "num_input_tokens_seen": 376156568, + "step": 5624 + }, + { + "epoch": 0.6381843971631206, + "loss": 1.3191481828689575, + "loss_ce": 0.004206789657473564, + "loss_iou": 0.5390625, + "loss_num": 0.047119140625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 376156568, + "step": 5624 + }, + { + "epoch": 0.6382978723404256, + "grad_norm": 56.96250915527344, + "learning_rate": 5e-05, + "loss": 1.1201, + "num_input_tokens_seen": 376223440, + "step": 5625 + }, + { + "epoch": 0.6382978723404256, + "loss": 1.0777907371520996, + "loss_ce": 0.009431383572518826, + "loss_iou": 0.42578125, + "loss_num": 0.043212890625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 376223440, + "step": 5625 + }, + { + "epoch": 0.6384113475177305, + "grad_norm": 24.452327728271484, + "learning_rate": 5e-05, + "loss": 1.4445, + "num_input_tokens_seen": 376290552, + "step": 5626 + }, + { + "epoch": 0.6384113475177305, + "loss": 1.3571958541870117, + "loss_ce": 0.009539678692817688, + "loss_iou": 0.53515625, + "loss_num": 0.0546875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 376290552, + "step": 5626 + }, + { + "epoch": 0.6385248226950354, + "grad_norm": 22.019140243530273, + "learning_rate": 5e-05, + "loss": 1.0776, + "num_input_tokens_seen": 376357044, + "step": 5627 + }, + { + "epoch": 0.6385248226950354, + "loss": 0.9715983867645264, + "loss_ce": 0.010905053466558456, + "loss_iou": 0.412109375, + "loss_num": 0.0269775390625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 376357044, + "step": 5627 + }, + { + "epoch": 0.6386382978723404, + "grad_norm": 24.827713012695312, + "learning_rate": 5e-05, + "loss": 1.237, + "num_input_tokens_seen": 376423752, + "step": 5628 + }, + { + "epoch": 0.6386382978723404, + "loss": 1.3356564044952393, + "loss_ce": 0.008507948368787766, + "loss_iou": 0.5234375, + "loss_num": 0.056396484375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 376423752, + "step": 5628 + }, + { + "epoch": 0.6387517730496454, + "grad_norm": 31.770469665527344, + "learning_rate": 5e-05, + "loss": 1.3174, + "num_input_tokens_seen": 376491544, + "step": 5629 + }, + { + "epoch": 0.6387517730496454, + "loss": 1.360650658607483, + "loss_ce": 0.007135045249015093, + "loss_iou": 0.5234375, + "loss_num": 0.06201171875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 376491544, + "step": 5629 + }, + { + "epoch": 0.6388652482269503, + "grad_norm": 30.510854721069336, + "learning_rate": 5e-05, + "loss": 1.3867, + "num_input_tokens_seen": 376557588, + "step": 5630 + }, + { + "epoch": 0.6388652482269503, + "loss": 1.4286993741989136, + "loss_ce": 0.008777465671300888, + "loss_iou": 0.58203125, + "loss_num": 0.051025390625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 376557588, + "step": 5630 + }, + { + "epoch": 0.6389787234042553, + "grad_norm": 27.45404815673828, + "learning_rate": 5e-05, + "loss": 1.0032, + "num_input_tokens_seen": 376624072, + "step": 5631 + }, + { + "epoch": 0.6389787234042553, + "loss": 1.1101088523864746, + "loss_ce": 0.0051285275258123875, + "loss_iou": 0.466796875, + "loss_num": 0.034423828125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 376624072, + "step": 5631 + }, + { + "epoch": 0.6390921985815603, + "grad_norm": 29.568500518798828, + "learning_rate": 5e-05, + "loss": 1.2236, + "num_input_tokens_seen": 376691032, + "step": 5632 + }, + { + "epoch": 0.6390921985815603, + "loss": 1.2346489429473877, + "loss_ce": 0.002227050717920065, + "loss_iou": 0.52734375, + "loss_num": 0.0361328125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 376691032, + "step": 5632 + }, + { + "epoch": 0.6392056737588653, + "grad_norm": 30.591487884521484, + "learning_rate": 5e-05, + "loss": 1.2258, + "num_input_tokens_seen": 376758512, + "step": 5633 + }, + { + "epoch": 0.6392056737588653, + "loss": 1.2827767133712769, + "loss_ce": 0.00885089673101902, + "loss_iou": 0.51171875, + "loss_num": 0.05029296875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 376758512, + "step": 5633 + }, + { + "epoch": 0.6393191489361703, + "grad_norm": 143.85289001464844, + "learning_rate": 5e-05, + "loss": 1.4663, + "num_input_tokens_seen": 376825800, + "step": 5634 + }, + { + "epoch": 0.6393191489361703, + "loss": 1.3802807331085205, + "loss_ce": 0.008698683232069016, + "loss_iou": 0.56640625, + "loss_num": 0.047119140625, + "loss_xval": 1.375, + "num_input_tokens_seen": 376825800, + "step": 5634 + }, + { + "epoch": 0.6394326241134751, + "grad_norm": 57.162628173828125, + "learning_rate": 5e-05, + "loss": 1.2731, + "num_input_tokens_seen": 376892380, + "step": 5635 + }, + { + "epoch": 0.6394326241134751, + "loss": 1.4660260677337646, + "loss_ce": 0.01143624261021614, + "loss_iou": 0.56640625, + "loss_num": 0.0634765625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 376892380, + "step": 5635 + }, + { + "epoch": 0.6395460992907801, + "grad_norm": 27.45564842224121, + "learning_rate": 5e-05, + "loss": 1.2637, + "num_input_tokens_seen": 376958776, + "step": 5636 + }, + { + "epoch": 0.6395460992907801, + "loss": 1.4230334758758545, + "loss_ce": 0.009215099737048149, + "loss_iou": 0.5546875, + "loss_num": 0.061279296875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 376958776, + "step": 5636 + }, + { + "epoch": 0.6396595744680851, + "grad_norm": 27.6306095123291, + "learning_rate": 5e-05, + "loss": 1.2356, + "num_input_tokens_seen": 377026476, + "step": 5637 + }, + { + "epoch": 0.6396595744680851, + "loss": 1.1869090795516968, + "loss_ce": 0.005756752099841833, + "loss_iou": 0.5, + "loss_num": 0.035888671875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 377026476, + "step": 5637 + }, + { + "epoch": 0.6397730496453901, + "grad_norm": 32.87144470214844, + "learning_rate": 5e-05, + "loss": 1.3863, + "num_input_tokens_seen": 377093032, + "step": 5638 + }, + { + "epoch": 0.6397730496453901, + "loss": 1.213831901550293, + "loss_ce": 0.005824102554470301, + "loss_iou": 0.5, + "loss_num": 0.041748046875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 377093032, + "step": 5638 + }, + { + "epoch": 0.639886524822695, + "grad_norm": 53.76021957397461, + "learning_rate": 5e-05, + "loss": 1.257, + "num_input_tokens_seen": 377160440, + "step": 5639 + }, + { + "epoch": 0.639886524822695, + "loss": 1.3560904264450073, + "loss_ce": 0.005016172770410776, + "loss_iou": 0.54296875, + "loss_num": 0.052978515625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 377160440, + "step": 5639 + }, + { + "epoch": 0.64, + "grad_norm": 25.629413604736328, + "learning_rate": 5e-05, + "loss": 1.4585, + "num_input_tokens_seen": 377227692, + "step": 5640 + }, + { + "epoch": 0.64, + "loss": 1.4290924072265625, + "loss_ce": 0.010147091932594776, + "loss_iou": 0.6015625, + "loss_num": 0.0439453125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 377227692, + "step": 5640 + }, + { + "epoch": 0.640113475177305, + "grad_norm": 143.5091552734375, + "learning_rate": 5e-05, + "loss": 1.2647, + "num_input_tokens_seen": 377294812, + "step": 5641 + }, + { + "epoch": 0.640113475177305, + "loss": 1.3684715032577515, + "loss_ce": 0.005678596906363964, + "loss_iou": 0.578125, + "loss_num": 0.040771484375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 377294812, + "step": 5641 + }, + { + "epoch": 0.64022695035461, + "grad_norm": 26.585479736328125, + "learning_rate": 5e-05, + "loss": 1.3755, + "num_input_tokens_seen": 377361788, + "step": 5642 + }, + { + "epoch": 0.64022695035461, + "loss": 1.3127753734588623, + "loss_ce": 0.008087792433798313, + "loss_iou": 0.54296875, + "loss_num": 0.0439453125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 377361788, + "step": 5642 + }, + { + "epoch": 0.640340425531915, + "grad_norm": 15.839091300964355, + "learning_rate": 5e-05, + "loss": 1.1688, + "num_input_tokens_seen": 377428220, + "step": 5643 + }, + { + "epoch": 0.640340425531915, + "loss": 1.2744166851043701, + "loss_ce": 0.007815111428499222, + "loss_iou": 0.515625, + "loss_num": 0.048095703125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 377428220, + "step": 5643 + }, + { + "epoch": 0.6404539007092198, + "grad_norm": 12.270648002624512, + "learning_rate": 5e-05, + "loss": 0.897, + "num_input_tokens_seen": 377493792, + "step": 5644 + }, + { + "epoch": 0.6404539007092198, + "loss": 0.8193325996398926, + "loss_ce": 0.010708415880799294, + "loss_iou": 0.3046875, + "loss_num": 0.03955078125, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 377493792, + "step": 5644 + }, + { + "epoch": 0.6405673758865248, + "grad_norm": 26.40445327758789, + "learning_rate": 5e-05, + "loss": 1.2404, + "num_input_tokens_seen": 377560648, + "step": 5645 + }, + { + "epoch": 0.6405673758865248, + "loss": 1.1222213506698608, + "loss_ce": 0.005522174760699272, + "loss_iou": 0.470703125, + "loss_num": 0.034912109375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 377560648, + "step": 5645 + }, + { + "epoch": 0.6406808510638298, + "grad_norm": 27.768966674804688, + "learning_rate": 5e-05, + "loss": 1.0822, + "num_input_tokens_seen": 377627560, + "step": 5646 + }, + { + "epoch": 0.6406808510638298, + "loss": 0.9734035730361938, + "loss_ce": 0.004653602838516235, + "loss_iou": 0.375, + "loss_num": 0.043701171875, + "loss_xval": 0.96875, + "num_input_tokens_seen": 377627560, + "step": 5646 + }, + { + "epoch": 0.6407943262411347, + "grad_norm": 42.89715576171875, + "learning_rate": 5e-05, + "loss": 1.3943, + "num_input_tokens_seen": 377695088, + "step": 5647 + }, + { + "epoch": 0.6407943262411347, + "loss": 1.5719020366668701, + "loss_ce": 0.006472395732998848, + "loss_iou": 0.609375, + "loss_num": 0.0703125, + "loss_xval": 1.5625, + "num_input_tokens_seen": 377695088, + "step": 5647 + }, + { + "epoch": 0.6409078014184397, + "grad_norm": 34.136558532714844, + "learning_rate": 5e-05, + "loss": 1.3944, + "num_input_tokens_seen": 377762348, + "step": 5648 + }, + { + "epoch": 0.6409078014184397, + "loss": 1.4223520755767822, + "loss_ce": 0.006336421240121126, + "loss_iou": 0.5703125, + "loss_num": 0.05615234375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 377762348, + "step": 5648 + }, + { + "epoch": 0.6410212765957447, + "grad_norm": 15.056360244750977, + "learning_rate": 5e-05, + "loss": 1.1205, + "num_input_tokens_seen": 377829980, + "step": 5649 + }, + { + "epoch": 0.6410212765957447, + "loss": 1.102056622505188, + "loss_ce": 0.007207965478301048, + "loss_iou": 0.4609375, + "loss_num": 0.03466796875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 377829980, + "step": 5649 + }, + { + "epoch": 0.6411347517730497, + "grad_norm": 27.59983253479004, + "learning_rate": 5e-05, + "loss": 1.3263, + "num_input_tokens_seen": 377896176, + "step": 5650 + }, + { + "epoch": 0.6411347517730497, + "loss": 1.3256291151046753, + "loss_ce": 0.00482829799875617, + "loss_iou": 0.5, + "loss_num": 0.064453125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 377896176, + "step": 5650 + }, + { + "epoch": 0.6412482269503547, + "grad_norm": 24.870418548583984, + "learning_rate": 5e-05, + "loss": 1.3834, + "num_input_tokens_seen": 377963656, + "step": 5651 + }, + { + "epoch": 0.6412482269503547, + "loss": 1.5068551301956177, + "loss_ce": 0.006366833113133907, + "loss_iou": 0.578125, + "loss_num": 0.06884765625, + "loss_xval": 1.5, + "num_input_tokens_seen": 377963656, + "step": 5651 + }, + { + "epoch": 0.6413617021276595, + "grad_norm": 42.79179382324219, + "learning_rate": 5e-05, + "loss": 1.2699, + "num_input_tokens_seen": 378030992, + "step": 5652 + }, + { + "epoch": 0.6413617021276595, + "loss": 1.3187918663024902, + "loss_ce": 0.004827023949474096, + "loss_iou": 0.5, + "loss_num": 0.0625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 378030992, + "step": 5652 + }, + { + "epoch": 0.6414751773049645, + "grad_norm": 45.778038024902344, + "learning_rate": 5e-05, + "loss": 1.2783, + "num_input_tokens_seen": 378098184, + "step": 5653 + }, + { + "epoch": 0.6414751773049645, + "loss": 1.219768762588501, + "loss_ce": 0.008831297978758812, + "loss_iou": 0.46875, + "loss_num": 0.054931640625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 378098184, + "step": 5653 + }, + { + "epoch": 0.6415886524822695, + "grad_norm": 31.260234832763672, + "learning_rate": 5e-05, + "loss": 1.2382, + "num_input_tokens_seen": 378164832, + "step": 5654 + }, + { + "epoch": 0.6415886524822695, + "loss": 1.1286391019821167, + "loss_ce": 0.003394956700503826, + "loss_iou": 0.458984375, + "loss_num": 0.041015625, + "loss_xval": 1.125, + "num_input_tokens_seen": 378164832, + "step": 5654 + }, + { + "epoch": 0.6417021276595745, + "grad_norm": 31.240699768066406, + "learning_rate": 5e-05, + "loss": 1.3071, + "num_input_tokens_seen": 378231304, + "step": 5655 + }, + { + "epoch": 0.6417021276595745, + "loss": 1.4089841842651367, + "loss_ce": 0.008593523874878883, + "loss_iou": 0.5078125, + "loss_num": 0.076171875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 378231304, + "step": 5655 + }, + { + "epoch": 0.6418156028368794, + "grad_norm": 19.693933486938477, + "learning_rate": 5e-05, + "loss": 1.3597, + "num_input_tokens_seen": 378298552, + "step": 5656 + }, + { + "epoch": 0.6418156028368794, + "loss": 1.2836931943893433, + "loss_ce": 0.00830258708447218, + "loss_iou": 0.49609375, + "loss_num": 0.05615234375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 378298552, + "step": 5656 + }, + { + "epoch": 0.6419290780141844, + "grad_norm": 16.774415969848633, + "learning_rate": 5e-05, + "loss": 1.4021, + "num_input_tokens_seen": 378365504, + "step": 5657 + }, + { + "epoch": 0.6419290780141844, + "loss": 1.219231367111206, + "loss_ce": 0.009758714586496353, + "loss_iou": 0.4765625, + "loss_num": 0.05078125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 378365504, + "step": 5657 + }, + { + "epoch": 0.6420425531914894, + "grad_norm": 18.698701858520508, + "learning_rate": 5e-05, + "loss": 1.3668, + "num_input_tokens_seen": 378433180, + "step": 5658 + }, + { + "epoch": 0.6420425531914894, + "loss": 1.474636435508728, + "loss_ce": 0.0068630073219537735, + "loss_iou": 0.5234375, + "loss_num": 0.083984375, + "loss_xval": 1.46875, + "num_input_tokens_seen": 378433180, + "step": 5658 + }, + { + "epoch": 0.6421560283687944, + "grad_norm": 26.87287139892578, + "learning_rate": 5e-05, + "loss": 1.2164, + "num_input_tokens_seen": 378499548, + "step": 5659 + }, + { + "epoch": 0.6421560283687944, + "loss": 1.170164942741394, + "loss_ce": 0.006346581038087606, + "loss_iou": 0.470703125, + "loss_num": 0.044189453125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 378499548, + "step": 5659 + }, + { + "epoch": 0.6422695035460992, + "grad_norm": 49.829498291015625, + "learning_rate": 5e-05, + "loss": 1.4932, + "num_input_tokens_seen": 378567152, + "step": 5660 + }, + { + "epoch": 0.6422695035460992, + "loss": 1.586236834526062, + "loss_ce": 0.008111849427223206, + "loss_iou": 0.609375, + "loss_num": 0.072265625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 378567152, + "step": 5660 + }, + { + "epoch": 0.6423829787234042, + "grad_norm": 56.99772644042969, + "learning_rate": 5e-05, + "loss": 1.5283, + "num_input_tokens_seen": 378634560, + "step": 5661 + }, + { + "epoch": 0.6423829787234042, + "loss": 1.5453910827636719, + "loss_ce": 0.007305072154849768, + "loss_iou": 0.6171875, + "loss_num": 0.06103515625, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 378634560, + "step": 5661 + }, + { + "epoch": 0.6424964539007092, + "grad_norm": 18.80501365661621, + "learning_rate": 5e-05, + "loss": 0.872, + "num_input_tokens_seen": 378701320, + "step": 5662 + }, + { + "epoch": 0.6424964539007092, + "loss": 0.8653701543807983, + "loss_ce": 0.005506826564669609, + "loss_iou": 0.373046875, + "loss_num": 0.022705078125, + "loss_xval": 0.859375, + "num_input_tokens_seen": 378701320, + "step": 5662 + }, + { + "epoch": 0.6426099290780142, + "grad_norm": 19.339635848999023, + "learning_rate": 5e-05, + "loss": 1.3647, + "num_input_tokens_seen": 378768776, + "step": 5663 + }, + { + "epoch": 0.6426099290780142, + "loss": 1.4781136512756348, + "loss_ce": 0.011316724121570587, + "loss_iou": 0.57421875, + "loss_num": 0.06298828125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 378768776, + "step": 5663 + }, + { + "epoch": 0.6427234042553192, + "grad_norm": 23.420534133911133, + "learning_rate": 5e-05, + "loss": 1.2741, + "num_input_tokens_seen": 378835852, + "step": 5664 + }, + { + "epoch": 0.6427234042553192, + "loss": 1.1779882907867432, + "loss_ce": 0.0061132656410336494, + "loss_iou": 0.51171875, + "loss_num": 0.029296875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 378835852, + "step": 5664 + }, + { + "epoch": 0.6428368794326241, + "grad_norm": 1329.50439453125, + "learning_rate": 5e-05, + "loss": 1.2788, + "num_input_tokens_seen": 378903756, + "step": 5665 + }, + { + "epoch": 0.6428368794326241, + "loss": 1.2403459548950195, + "loss_ce": 0.007435860577970743, + "loss_iou": 0.478515625, + "loss_num": 0.054931640625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 378903756, + "step": 5665 + }, + { + "epoch": 0.6429503546099291, + "grad_norm": 19.215343475341797, + "learning_rate": 5e-05, + "loss": 1.1605, + "num_input_tokens_seen": 378970172, + "step": 5666 + }, + { + "epoch": 0.6429503546099291, + "loss": 1.1062036752700806, + "loss_ce": 0.016848212108016014, + "loss_iou": 0.49609375, + "loss_num": 0.01953125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 378970172, + "step": 5666 + }, + { + "epoch": 0.6430638297872341, + "grad_norm": 26.050121307373047, + "learning_rate": 5e-05, + "loss": 1.1811, + "num_input_tokens_seen": 379036196, + "step": 5667 + }, + { + "epoch": 0.6430638297872341, + "loss": 1.0563530921936035, + "loss_ce": 0.009478051215410233, + "loss_iou": 0.421875, + "loss_num": 0.041015625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 379036196, + "step": 5667 + }, + { + "epoch": 0.643177304964539, + "grad_norm": 37.940879821777344, + "learning_rate": 5e-05, + "loss": 1.0985, + "num_input_tokens_seen": 379103856, + "step": 5668 + }, + { + "epoch": 0.643177304964539, + "loss": 1.112473487854004, + "loss_ce": 0.006028215400874615, + "loss_iou": 0.439453125, + "loss_num": 0.045654296875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 379103856, + "step": 5668 + }, + { + "epoch": 0.6432907801418439, + "grad_norm": 24.429567337036133, + "learning_rate": 5e-05, + "loss": 1.1589, + "num_input_tokens_seen": 379170656, + "step": 5669 + }, + { + "epoch": 0.6432907801418439, + "loss": 1.3530175685882568, + "loss_ce": 0.00536138191819191, + "loss_iou": 0.5703125, + "loss_num": 0.04150390625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 379170656, + "step": 5669 + }, + { + "epoch": 0.6434042553191489, + "grad_norm": 27.65247917175293, + "learning_rate": 5e-05, + "loss": 1.2996, + "num_input_tokens_seen": 379236956, + "step": 5670 + }, + { + "epoch": 0.6434042553191489, + "loss": 1.225736379623413, + "loss_ce": 0.010892697609961033, + "loss_iou": 0.4765625, + "loss_num": 0.0517578125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 379236956, + "step": 5670 + }, + { + "epoch": 0.6435177304964539, + "grad_norm": 24.311256408691406, + "learning_rate": 5e-05, + "loss": 1.2535, + "num_input_tokens_seen": 379304316, + "step": 5671 + }, + { + "epoch": 0.6435177304964539, + "loss": 1.3067004680633545, + "loss_ce": 0.0073839640244841576, + "loss_iou": 0.5078125, + "loss_num": 0.056640625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 379304316, + "step": 5671 + }, + { + "epoch": 0.6436312056737589, + "grad_norm": 43.316001892089844, + "learning_rate": 5e-05, + "loss": 1.026, + "num_input_tokens_seen": 379371848, + "step": 5672 + }, + { + "epoch": 0.6436312056737589, + "loss": 1.0430763959884644, + "loss_ce": 0.006943634711205959, + "loss_iou": 0.4140625, + "loss_num": 0.041748046875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 379371848, + "step": 5672 + }, + { + "epoch": 0.6437446808510638, + "grad_norm": 19.917675018310547, + "learning_rate": 5e-05, + "loss": 1.1319, + "num_input_tokens_seen": 379438788, + "step": 5673 + }, + { + "epoch": 0.6437446808510638, + "loss": 1.1609766483306885, + "loss_ce": 0.004238374065607786, + "loss_iou": 0.48828125, + "loss_num": 0.0361328125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 379438788, + "step": 5673 + }, + { + "epoch": 0.6438581560283688, + "grad_norm": 37.1545524597168, + "learning_rate": 5e-05, + "loss": 1.2785, + "num_input_tokens_seen": 379506300, + "step": 5674 + }, + { + "epoch": 0.6438581560283688, + "loss": 1.4525258541107178, + "loss_ce": 0.005260186735540628, + "loss_iou": 0.57421875, + "loss_num": 0.05908203125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 379506300, + "step": 5674 + }, + { + "epoch": 0.6439716312056738, + "grad_norm": 31.402313232421875, + "learning_rate": 5e-05, + "loss": 1.23, + "num_input_tokens_seen": 379573172, + "step": 5675 + }, + { + "epoch": 0.6439716312056738, + "loss": 1.1308363676071167, + "loss_ce": 0.0053480686619877815, + "loss_iou": 0.466796875, + "loss_num": 0.037841796875, + "loss_xval": 1.125, + "num_input_tokens_seen": 379573172, + "step": 5675 + }, + { + "epoch": 0.6440851063829788, + "grad_norm": 27.2700138092041, + "learning_rate": 5e-05, + "loss": 1.2218, + "num_input_tokens_seen": 379639632, + "step": 5676 + }, + { + "epoch": 0.6440851063829788, + "loss": 1.0299984216690063, + "loss_ce": 0.002654652576893568, + "loss_iou": 0.36328125, + "loss_num": 0.06005859375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 379639632, + "step": 5676 + }, + { + "epoch": 0.6441985815602836, + "grad_norm": 26.232040405273438, + "learning_rate": 5e-05, + "loss": 1.3102, + "num_input_tokens_seen": 379705452, + "step": 5677 + }, + { + "epoch": 0.6441985815602836, + "loss": 1.319064974784851, + "loss_ce": 0.0032691098749637604, + "loss_iou": 0.5078125, + "loss_num": 0.0595703125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 379705452, + "step": 5677 + }, + { + "epoch": 0.6443120567375886, + "grad_norm": 35.25519943237305, + "learning_rate": 5e-05, + "loss": 1.1214, + "num_input_tokens_seen": 379772164, + "step": 5678 + }, + { + "epoch": 0.6443120567375886, + "loss": 1.0666100978851318, + "loss_ce": 0.00948115810751915, + "loss_iou": 0.396484375, + "loss_num": 0.05322265625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 379772164, + "step": 5678 + }, + { + "epoch": 0.6444255319148936, + "grad_norm": 33.71491622924805, + "learning_rate": 5e-05, + "loss": 1.0985, + "num_input_tokens_seen": 379838828, + "step": 5679 + }, + { + "epoch": 0.6444255319148936, + "loss": 1.1771693229675293, + "loss_ce": 0.007735739462077618, + "loss_iou": 0.482421875, + "loss_num": 0.040771484375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 379838828, + "step": 5679 + }, + { + "epoch": 0.6445390070921986, + "grad_norm": 44.64533233642578, + "learning_rate": 5e-05, + "loss": 1.3173, + "num_input_tokens_seen": 379906444, + "step": 5680 + }, + { + "epoch": 0.6445390070921986, + "loss": 1.1099865436553955, + "loss_ce": 0.002564647700637579, + "loss_iou": 0.46484375, + "loss_num": 0.035400390625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 379906444, + "step": 5680 + }, + { + "epoch": 0.6446524822695036, + "grad_norm": 31.705535888671875, + "learning_rate": 5e-05, + "loss": 1.2557, + "num_input_tokens_seen": 379973932, + "step": 5681 + }, + { + "epoch": 0.6446524822695036, + "loss": 1.177464485168457, + "loss_ce": 0.011937076225876808, + "loss_iou": 0.4921875, + "loss_num": 0.03662109375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 379973932, + "step": 5681 + }, + { + "epoch": 0.6447659574468085, + "grad_norm": 23.157163619995117, + "learning_rate": 5e-05, + "loss": 1.2485, + "num_input_tokens_seen": 380040488, + "step": 5682 + }, + { + "epoch": 0.6447659574468085, + "loss": 1.3411680459976196, + "loss_ce": 0.005718916188925505, + "loss_iou": 0.5, + "loss_num": 0.0673828125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 380040488, + "step": 5682 + }, + { + "epoch": 0.6448794326241135, + "grad_norm": 20.01812744140625, + "learning_rate": 5e-05, + "loss": 1.1067, + "num_input_tokens_seen": 380107420, + "step": 5683 + }, + { + "epoch": 0.6448794326241135, + "loss": 1.1916264295578003, + "loss_ce": 0.010962357744574547, + "loss_iou": 0.470703125, + "loss_num": 0.0478515625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 380107420, + "step": 5683 + }, + { + "epoch": 0.6449929078014185, + "grad_norm": 27.328100204467773, + "learning_rate": 5e-05, + "loss": 1.2212, + "num_input_tokens_seen": 380174168, + "step": 5684 + }, + { + "epoch": 0.6449929078014185, + "loss": 1.2896220684051514, + "loss_ce": 0.005930646322667599, + "loss_iou": 0.51171875, + "loss_num": 0.0517578125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 380174168, + "step": 5684 + }, + { + "epoch": 0.6451063829787234, + "grad_norm": 29.914535522460938, + "learning_rate": 5e-05, + "loss": 1.2779, + "num_input_tokens_seen": 380241020, + "step": 5685 + }, + { + "epoch": 0.6451063829787234, + "loss": 1.297094702720642, + "loss_ce": 0.009496971033513546, + "loss_iou": 0.46875, + "loss_num": 0.0703125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 380241020, + "step": 5685 + }, + { + "epoch": 0.6452198581560283, + "grad_norm": 26.685083389282227, + "learning_rate": 5e-05, + "loss": 1.314, + "num_input_tokens_seen": 380308016, + "step": 5686 + }, + { + "epoch": 0.6452198581560283, + "loss": 1.176175594329834, + "loss_ce": 0.004300658591091633, + "loss_iou": 0.4921875, + "loss_num": 0.037841796875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 380308016, + "step": 5686 + }, + { + "epoch": 0.6453333333333333, + "grad_norm": 34.02827072143555, + "learning_rate": 5e-05, + "loss": 1.1787, + "num_input_tokens_seen": 380375516, + "step": 5687 + }, + { + "epoch": 0.6453333333333333, + "loss": 1.2365411520004272, + "loss_ce": 0.007537237834185362, + "loss_iou": 0.486328125, + "loss_num": 0.051513671875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 380375516, + "step": 5687 + }, + { + "epoch": 0.6454468085106383, + "grad_norm": 27.734107971191406, + "learning_rate": 5e-05, + "loss": 1.1636, + "num_input_tokens_seen": 380443148, + "step": 5688 + }, + { + "epoch": 0.6454468085106383, + "loss": 1.044072151184082, + "loss_ce": 0.006474556867033243, + "loss_iou": 0.423828125, + "loss_num": 0.037841796875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 380443148, + "step": 5688 + }, + { + "epoch": 0.6455602836879433, + "grad_norm": 53.42191696166992, + "learning_rate": 5e-05, + "loss": 1.09, + "num_input_tokens_seen": 380510872, + "step": 5689 + }, + { + "epoch": 0.6455602836879433, + "loss": 1.0008316040039062, + "loss_ce": 0.007911685854196548, + "loss_iou": 0.435546875, + "loss_num": 0.0240478515625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 380510872, + "step": 5689 + }, + { + "epoch": 0.6456737588652482, + "grad_norm": 31.78778839111328, + "learning_rate": 5e-05, + "loss": 1.4397, + "num_input_tokens_seen": 380577672, + "step": 5690 + }, + { + "epoch": 0.6456737588652482, + "loss": 1.4088001251220703, + "loss_ce": 0.008409427478909492, + "loss_iou": 0.5625, + "loss_num": 0.05517578125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 380577672, + "step": 5690 + }, + { + "epoch": 0.6457872340425532, + "grad_norm": 43.592124938964844, + "learning_rate": 5e-05, + "loss": 1.2336, + "num_input_tokens_seen": 380644696, + "step": 5691 + }, + { + "epoch": 0.6457872340425532, + "loss": 1.138966679573059, + "loss_ce": 0.00468943128362298, + "loss_iou": 0.470703125, + "loss_num": 0.03857421875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 380644696, + "step": 5691 + }, + { + "epoch": 0.6459007092198582, + "grad_norm": 26.23984718322754, + "learning_rate": 5e-05, + "loss": 1.1059, + "num_input_tokens_seen": 380710864, + "step": 5692 + }, + { + "epoch": 0.6459007092198582, + "loss": 1.2893778085708618, + "loss_ce": 0.00422159768640995, + "loss_iou": 0.54296875, + "loss_num": 0.0390625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 380710864, + "step": 5692 + }, + { + "epoch": 0.6460141843971631, + "grad_norm": 41.9895133972168, + "learning_rate": 5e-05, + "loss": 1.1188, + "num_input_tokens_seen": 380777656, + "step": 5693 + }, + { + "epoch": 0.6460141843971631, + "loss": 0.9322373270988464, + "loss_ce": 0.006456124596297741, + "loss_iou": 0.380859375, + "loss_num": 0.032958984375, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 380777656, + "step": 5693 + }, + { + "epoch": 0.646127659574468, + "grad_norm": 39.78883743286133, + "learning_rate": 5e-05, + "loss": 1.065, + "num_input_tokens_seen": 380844844, + "step": 5694 + }, + { + "epoch": 0.646127659574468, + "loss": 0.946003258228302, + "loss_ce": 0.005573544651269913, + "loss_iou": 0.390625, + "loss_num": 0.031494140625, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 380844844, + "step": 5694 + }, + { + "epoch": 0.646241134751773, + "grad_norm": 43.82582092285156, + "learning_rate": 5e-05, + "loss": 1.171, + "num_input_tokens_seen": 380912384, + "step": 5695 + }, + { + "epoch": 0.646241134751773, + "loss": 1.2064040899276733, + "loss_ce": 0.007185341790318489, + "loss_iou": 0.5, + "loss_num": 0.039306640625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 380912384, + "step": 5695 + }, + { + "epoch": 0.646354609929078, + "grad_norm": 24.212080001831055, + "learning_rate": 5e-05, + "loss": 1.3405, + "num_input_tokens_seen": 380978988, + "step": 5696 + }, + { + "epoch": 0.646354609929078, + "loss": 1.2541719675064087, + "loss_ce": 0.01076376810669899, + "loss_iou": 0.46484375, + "loss_num": 0.06298828125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 380978988, + "step": 5696 + }, + { + "epoch": 0.646468085106383, + "grad_norm": 30.11241340637207, + "learning_rate": 5e-05, + "loss": 1.1029, + "num_input_tokens_seen": 381045708, + "step": 5697 + }, + { + "epoch": 0.646468085106383, + "loss": 1.1764217615127563, + "loss_ce": 0.008453082293272018, + "loss_iou": 0.4609375, + "loss_num": 0.0498046875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 381045708, + "step": 5697 + }, + { + "epoch": 0.646581560283688, + "grad_norm": 62.65323257446289, + "learning_rate": 5e-05, + "loss": 1.3494, + "num_input_tokens_seen": 381113036, + "step": 5698 + }, + { + "epoch": 0.646581560283688, + "loss": 1.144514560699463, + "loss_ce": 0.0058426507748663425, + "loss_iou": 0.50390625, + "loss_num": 0.0269775390625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 381113036, + "step": 5698 + }, + { + "epoch": 0.6466950354609929, + "grad_norm": 91.9684829711914, + "learning_rate": 5e-05, + "loss": 1.2319, + "num_input_tokens_seen": 381180084, + "step": 5699 + }, + { + "epoch": 0.6466950354609929, + "loss": 1.413492202758789, + "loss_ce": 0.006265679374337196, + "loss_iou": 0.578125, + "loss_num": 0.05029296875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 381180084, + "step": 5699 + }, + { + "epoch": 0.6468085106382979, + "grad_norm": 31.490766525268555, + "learning_rate": 5e-05, + "loss": 1.3327, + "num_input_tokens_seen": 381246988, + "step": 5700 + }, + { + "epoch": 0.6468085106382979, + "loss": 1.3401679992675781, + "loss_ce": 0.01167682558298111, + "loss_iou": 0.51171875, + "loss_num": 0.061767578125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 381246988, + "step": 5700 + }, + { + "epoch": 0.6469219858156028, + "grad_norm": 37.75407409667969, + "learning_rate": 5e-05, + "loss": 1.088, + "num_input_tokens_seen": 381314196, + "step": 5701 + }, + { + "epoch": 0.6469219858156028, + "loss": 1.1313018798828125, + "loss_ce": 0.01191712636500597, + "loss_iou": 0.4765625, + "loss_num": 0.033447265625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 381314196, + "step": 5701 + }, + { + "epoch": 0.6470354609929078, + "grad_norm": 53.6619987487793, + "learning_rate": 5e-05, + "loss": 1.3185, + "num_input_tokens_seen": 381381240, + "step": 5702 + }, + { + "epoch": 0.6470354609929078, + "loss": 1.309889554977417, + "loss_ce": 0.007155084051191807, + "loss_iou": 0.5, + "loss_num": 0.06103515625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 381381240, + "step": 5702 + }, + { + "epoch": 0.6471489361702127, + "grad_norm": 27.030582427978516, + "learning_rate": 5e-05, + "loss": 1.215, + "num_input_tokens_seen": 381448276, + "step": 5703 + }, + { + "epoch": 0.6471489361702127, + "loss": 1.2393505573272705, + "loss_ce": 0.007416939362883568, + "loss_iou": 0.4921875, + "loss_num": 0.0498046875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 381448276, + "step": 5703 + }, + { + "epoch": 0.6472624113475177, + "grad_norm": 32.96426010131836, + "learning_rate": 5e-05, + "loss": 1.2438, + "num_input_tokens_seen": 381515524, + "step": 5704 + }, + { + "epoch": 0.6472624113475177, + "loss": 1.2778677940368652, + "loss_ce": 0.004430332221090794, + "loss_iou": 0.5546875, + "loss_num": 0.032958984375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 381515524, + "step": 5704 + }, + { + "epoch": 0.6473758865248227, + "grad_norm": 41.938072204589844, + "learning_rate": 5e-05, + "loss": 1.2772, + "num_input_tokens_seen": 381583176, + "step": 5705 + }, + { + "epoch": 0.6473758865248227, + "loss": 1.334465503692627, + "loss_ce": 0.008293593302369118, + "loss_iou": 0.55859375, + "loss_num": 0.042236328125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 381583176, + "step": 5705 + }, + { + "epoch": 0.6474893617021277, + "grad_norm": 25.113994598388672, + "learning_rate": 5e-05, + "loss": 1.2119, + "num_input_tokens_seen": 381649760, + "step": 5706 + }, + { + "epoch": 0.6474893617021277, + "loss": 1.1765618324279785, + "loss_ce": 0.009569733403623104, + "loss_iou": 0.515625, + "loss_num": 0.0277099609375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 381649760, + "step": 5706 + }, + { + "epoch": 0.6476028368794327, + "grad_norm": 55.15050506591797, + "learning_rate": 5e-05, + "loss": 1.0307, + "num_input_tokens_seen": 381715200, + "step": 5707 + }, + { + "epoch": 0.6476028368794327, + "loss": 1.012694239616394, + "loss_ce": 0.005858323536813259, + "loss_iou": 0.396484375, + "loss_num": 0.042724609375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 381715200, + "step": 5707 + }, + { + "epoch": 0.6477163120567376, + "grad_norm": 16.752031326293945, + "learning_rate": 5e-05, + "loss": 1.013, + "num_input_tokens_seen": 381782240, + "step": 5708 + }, + { + "epoch": 0.6477163120567376, + "loss": 1.0416958332061768, + "loss_ce": 0.007027892395853996, + "loss_iou": 0.462890625, + "loss_num": 0.021484375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 381782240, + "step": 5708 + }, + { + "epoch": 0.6478297872340425, + "grad_norm": 12.989364624023438, + "learning_rate": 5e-05, + "loss": 1.2237, + "num_input_tokens_seen": 381850204, + "step": 5709 + }, + { + "epoch": 0.6478297872340425, + "loss": 1.2501530647277832, + "loss_ce": 0.007477234583348036, + "loss_iou": 0.443359375, + "loss_num": 0.07080078125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 381850204, + "step": 5709 + }, + { + "epoch": 0.6479432624113475, + "grad_norm": 28.625736236572266, + "learning_rate": 5e-05, + "loss": 1.1103, + "num_input_tokens_seen": 381917272, + "step": 5710 + }, + { + "epoch": 0.6479432624113475, + "loss": 1.1770949363708496, + "loss_ce": 0.005708274431526661, + "loss_iou": 0.44921875, + "loss_num": 0.054931640625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 381917272, + "step": 5710 + }, + { + "epoch": 0.6480567375886525, + "grad_norm": 33.01088333129883, + "learning_rate": 5e-05, + "loss": 1.1972, + "num_input_tokens_seen": 381983536, + "step": 5711 + }, + { + "epoch": 0.6480567375886525, + "loss": 1.3149442672729492, + "loss_ce": 0.004397407174110413, + "loss_iou": 0.515625, + "loss_num": 0.0556640625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 381983536, + "step": 5711 + }, + { + "epoch": 0.6481702127659574, + "grad_norm": 33.346282958984375, + "learning_rate": 5e-05, + "loss": 1.0805, + "num_input_tokens_seen": 382049992, + "step": 5712 + }, + { + "epoch": 0.6481702127659574, + "loss": 0.9805620312690735, + "loss_ce": 0.007417462766170502, + "loss_iou": 0.408203125, + "loss_num": 0.0311279296875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 382049992, + "step": 5712 + }, + { + "epoch": 0.6482836879432624, + "grad_norm": 42.3198127746582, + "learning_rate": 5e-05, + "loss": 1.0716, + "num_input_tokens_seen": 382117392, + "step": 5713 + }, + { + "epoch": 0.6482836879432624, + "loss": 1.1018261909484863, + "loss_ce": 0.008564450778067112, + "loss_iou": 0.4921875, + "loss_num": 0.0218505859375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 382117392, + "step": 5713 + }, + { + "epoch": 0.6483971631205674, + "grad_norm": 41.186439514160156, + "learning_rate": 5e-05, + "loss": 1.1737, + "num_input_tokens_seen": 382183600, + "step": 5714 + }, + { + "epoch": 0.6483971631205674, + "loss": 1.1686832904815674, + "loss_ce": 0.007062227465212345, + "loss_iou": 0.447265625, + "loss_num": 0.052978515625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 382183600, + "step": 5714 + }, + { + "epoch": 0.6485106382978724, + "grad_norm": 25.27222442626953, + "learning_rate": 5e-05, + "loss": 1.2402, + "num_input_tokens_seen": 382250924, + "step": 5715 + }, + { + "epoch": 0.6485106382978724, + "loss": 1.3716943264007568, + "loss_ce": 0.004018557723611593, + "loss_iou": 0.54296875, + "loss_num": 0.056640625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 382250924, + "step": 5715 + }, + { + "epoch": 0.6486241134751773, + "grad_norm": 13.900808334350586, + "learning_rate": 5e-05, + "loss": 1.1377, + "num_input_tokens_seen": 382317732, + "step": 5716 + }, + { + "epoch": 0.6486241134751773, + "loss": 1.1030797958374023, + "loss_ce": 0.006400194950401783, + "loss_iou": 0.40234375, + "loss_num": 0.05908203125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 382317732, + "step": 5716 + }, + { + "epoch": 0.6487375886524823, + "grad_norm": 24.30515480041504, + "learning_rate": 5e-05, + "loss": 1.0559, + "num_input_tokens_seen": 382385312, + "step": 5717 + }, + { + "epoch": 0.6487375886524823, + "loss": 1.101438045501709, + "loss_ce": 0.009641177952289581, + "loss_iou": 0.45703125, + "loss_num": 0.035400390625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 382385312, + "step": 5717 + }, + { + "epoch": 0.6488510638297872, + "grad_norm": 26.759374618530273, + "learning_rate": 5e-05, + "loss": 1.2653, + "num_input_tokens_seen": 382452632, + "step": 5718 + }, + { + "epoch": 0.6488510638297872, + "loss": 1.2848042249679565, + "loss_ce": 0.0069720852188766, + "loss_iou": 0.50390625, + "loss_num": 0.053466796875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 382452632, + "step": 5718 + }, + { + "epoch": 0.6489645390070922, + "grad_norm": 27.132516860961914, + "learning_rate": 5e-05, + "loss": 1.1133, + "num_input_tokens_seen": 382519272, + "step": 5719 + }, + { + "epoch": 0.6489645390070922, + "loss": 1.2389256954193115, + "loss_ce": 0.005527273751795292, + "loss_iou": 0.47265625, + "loss_num": 0.057373046875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 382519272, + "step": 5719 + }, + { + "epoch": 0.6490780141843971, + "grad_norm": 29.028379440307617, + "learning_rate": 5e-05, + "loss": 1.2269, + "num_input_tokens_seen": 382586600, + "step": 5720 + }, + { + "epoch": 0.6490780141843971, + "loss": 1.2221763134002686, + "loss_ce": 0.009285666048526764, + "loss_iou": 0.494140625, + "loss_num": 0.04541015625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 382586600, + "step": 5720 + }, + { + "epoch": 0.6491914893617021, + "grad_norm": 66.05146789550781, + "learning_rate": 5e-05, + "loss": 1.0586, + "num_input_tokens_seen": 382653188, + "step": 5721 + }, + { + "epoch": 0.6491914893617021, + "loss": 1.0996524095535278, + "loss_ce": 0.0049258507788181305, + "loss_iou": 0.4140625, + "loss_num": 0.053466796875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 382653188, + "step": 5721 + }, + { + "epoch": 0.6493049645390071, + "grad_norm": 41.80366516113281, + "learning_rate": 5e-05, + "loss": 1.1488, + "num_input_tokens_seen": 382720628, + "step": 5722 + }, + { + "epoch": 0.6493049645390071, + "loss": 1.172330379486084, + "loss_ce": 0.017545241862535477, + "loss_iou": 0.46875, + "loss_num": 0.043701171875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 382720628, + "step": 5722 + }, + { + "epoch": 0.6494184397163121, + "grad_norm": 33.64881896972656, + "learning_rate": 5e-05, + "loss": 0.9503, + "num_input_tokens_seen": 382787172, + "step": 5723 + }, + { + "epoch": 0.6494184397163121, + "loss": 1.0890204906463623, + "loss_ce": 0.005524378269910812, + "loss_iou": 0.462890625, + "loss_num": 0.031982421875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 382787172, + "step": 5723 + }, + { + "epoch": 0.6495319148936171, + "grad_norm": 35.694671630859375, + "learning_rate": 5e-05, + "loss": 1.3031, + "num_input_tokens_seen": 382855364, + "step": 5724 + }, + { + "epoch": 0.6495319148936171, + "loss": 1.2418813705444336, + "loss_ce": 0.011412596330046654, + "loss_iou": 0.51953125, + "loss_num": 0.037841796875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 382855364, + "step": 5724 + }, + { + "epoch": 0.649645390070922, + "grad_norm": 89.38372802734375, + "learning_rate": 5e-05, + "loss": 1.278, + "num_input_tokens_seen": 382923444, + "step": 5725 + }, + { + "epoch": 0.649645390070922, + "loss": 1.1888389587402344, + "loss_ce": 0.01061627920717001, + "loss_iou": 0.4765625, + "loss_num": 0.044921875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 382923444, + "step": 5725 + }, + { + "epoch": 0.6497588652482269, + "grad_norm": 31.613182067871094, + "learning_rate": 5e-05, + "loss": 1.4314, + "num_input_tokens_seen": 382990704, + "step": 5726 + }, + { + "epoch": 0.6497588652482269, + "loss": 1.4534056186676025, + "loss_ce": 0.012975992634892464, + "loss_iou": 0.51953125, + "loss_num": 0.08056640625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 382990704, + "step": 5726 + }, + { + "epoch": 0.6498723404255319, + "grad_norm": 39.571250915527344, + "learning_rate": 5e-05, + "loss": 1.2769, + "num_input_tokens_seen": 383057260, + "step": 5727 + }, + { + "epoch": 0.6498723404255319, + "loss": 1.2551159858703613, + "loss_ce": 0.009998815134167671, + "loss_iou": 0.419921875, + "loss_num": 0.08154296875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 383057260, + "step": 5727 + }, + { + "epoch": 0.6499858156028369, + "grad_norm": 25.271230697631836, + "learning_rate": 5e-05, + "loss": 1.1885, + "num_input_tokens_seen": 383124864, + "step": 5728 + }, + { + "epoch": 0.6499858156028369, + "loss": 1.1731013059616089, + "loss_ce": 0.0051325848326087, + "loss_iou": 0.46875, + "loss_num": 0.046142578125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 383124864, + "step": 5728 + }, + { + "epoch": 0.6500992907801418, + "grad_norm": 30.997695922851562, + "learning_rate": 5e-05, + "loss": 1.0645, + "num_input_tokens_seen": 383191064, + "step": 5729 + }, + { + "epoch": 0.6500992907801418, + "loss": 1.284332513809204, + "loss_ce": 0.008941888809204102, + "loss_iou": 0.5, + "loss_num": 0.05517578125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 383191064, + "step": 5729 + }, + { + "epoch": 0.6502127659574468, + "grad_norm": 32.788455963134766, + "learning_rate": 5e-05, + "loss": 1.3374, + "num_input_tokens_seen": 383257576, + "step": 5730 + }, + { + "epoch": 0.6502127659574468, + "loss": 1.3218638896942139, + "loss_ce": 0.004481046460568905, + "loss_iou": 0.57421875, + "loss_num": 0.033203125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 383257576, + "step": 5730 + }, + { + "epoch": 0.6503262411347518, + "grad_norm": 11.650556564331055, + "learning_rate": 5e-05, + "loss": 1.0264, + "num_input_tokens_seen": 383323392, + "step": 5731 + }, + { + "epoch": 0.6503262411347518, + "loss": 1.0279738903045654, + "loss_ce": 0.007710321806371212, + "loss_iou": 0.41796875, + "loss_num": 0.03662109375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 383323392, + "step": 5731 + }, + { + "epoch": 0.6504397163120568, + "grad_norm": 13.506937980651855, + "learning_rate": 5e-05, + "loss": 1.1268, + "num_input_tokens_seen": 383390472, + "step": 5732 + }, + { + "epoch": 0.6504397163120568, + "loss": 1.16943359375, + "loss_ce": 0.00659175356850028, + "loss_iou": 0.498046875, + "loss_num": 0.033447265625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 383390472, + "step": 5732 + }, + { + "epoch": 0.6505531914893617, + "grad_norm": 24.47905158996582, + "learning_rate": 5e-05, + "loss": 0.9936, + "num_input_tokens_seen": 383457860, + "step": 5733 + }, + { + "epoch": 0.6505531914893617, + "loss": 1.0177932977676392, + "loss_ce": 0.006074518896639347, + "loss_iou": 0.41796875, + "loss_num": 0.03515625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 383457860, + "step": 5733 + }, + { + "epoch": 0.6506666666666666, + "grad_norm": 22.080883026123047, + "learning_rate": 5e-05, + "loss": 1.2517, + "num_input_tokens_seen": 383525516, + "step": 5734 + }, + { + "epoch": 0.6506666666666666, + "loss": 1.1129308938980103, + "loss_ce": 0.005508993752300739, + "loss_iou": 0.455078125, + "loss_num": 0.039306640625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 383525516, + "step": 5734 + }, + { + "epoch": 0.6507801418439716, + "grad_norm": 17.452253341674805, + "learning_rate": 5e-05, + "loss": 1.1067, + "num_input_tokens_seen": 383592856, + "step": 5735 + }, + { + "epoch": 0.6507801418439716, + "loss": 1.0824320316314697, + "loss_ce": 0.010166396386921406, + "loss_iou": 0.412109375, + "loss_num": 0.0498046875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 383592856, + "step": 5735 + }, + { + "epoch": 0.6508936170212766, + "grad_norm": 106.6192398071289, + "learning_rate": 5e-05, + "loss": 1.0447, + "num_input_tokens_seen": 383659488, + "step": 5736 + }, + { + "epoch": 0.6508936170212766, + "loss": 0.9505890607833862, + "loss_ce": 0.007962075993418694, + "loss_iou": 0.388671875, + "loss_num": 0.032958984375, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 383659488, + "step": 5736 + }, + { + "epoch": 0.6510070921985815, + "grad_norm": 27.203157424926758, + "learning_rate": 5e-05, + "loss": 1.5137, + "num_input_tokens_seen": 383726248, + "step": 5737 + }, + { + "epoch": 0.6510070921985815, + "loss": 1.5425055027008057, + "loss_ce": 0.008325883187353611, + "loss_iou": 0.6171875, + "loss_num": 0.059326171875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 383726248, + "step": 5737 + }, + { + "epoch": 0.6511205673758865, + "grad_norm": 31.754905700683594, + "learning_rate": 5e-05, + "loss": 1.065, + "num_input_tokens_seen": 383793932, + "step": 5738 + }, + { + "epoch": 0.6511205673758865, + "loss": 1.0316013097763062, + "loss_ce": 0.0066989874467253685, + "loss_iou": 0.42578125, + "loss_num": 0.034912109375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 383793932, + "step": 5738 + }, + { + "epoch": 0.6512340425531915, + "grad_norm": 29.30533790588379, + "learning_rate": 5e-05, + "loss": 1.3069, + "num_input_tokens_seen": 383860676, + "step": 5739 + }, + { + "epoch": 0.6512340425531915, + "loss": 1.297924280166626, + "loss_ce": 0.009838433936238289, + "loss_iou": 0.498046875, + "loss_num": 0.05859375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 383860676, + "step": 5739 + }, + { + "epoch": 0.6513475177304965, + "grad_norm": 28.93695831298828, + "learning_rate": 5e-05, + "loss": 1.1276, + "num_input_tokens_seen": 383927604, + "step": 5740 + }, + { + "epoch": 0.6513475177304965, + "loss": 1.1283347606658936, + "loss_ce": 0.007241018116474152, + "loss_iou": 0.470703125, + "loss_num": 0.0361328125, + "loss_xval": 1.125, + "num_input_tokens_seen": 383927604, + "step": 5740 + }, + { + "epoch": 0.6514609929078015, + "grad_norm": 28.26323127746582, + "learning_rate": 5e-05, + "loss": 1.2831, + "num_input_tokens_seen": 383994188, + "step": 5741 + }, + { + "epoch": 0.6514609929078015, + "loss": 1.273013949394226, + "loss_ce": 0.0088538508862257, + "loss_iou": 0.5546875, + "loss_num": 0.0306396484375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 383994188, + "step": 5741 + }, + { + "epoch": 0.6515744680851063, + "grad_norm": 29.320850372314453, + "learning_rate": 5e-05, + "loss": 1.2413, + "num_input_tokens_seen": 384060856, + "step": 5742 + }, + { + "epoch": 0.6515744680851063, + "loss": 1.1587090492248535, + "loss_ce": 0.007830210961401463, + "loss_iou": 0.490234375, + "loss_num": 0.0341796875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 384060856, + "step": 5742 + }, + { + "epoch": 0.6516879432624113, + "grad_norm": 29.039228439331055, + "learning_rate": 5e-05, + "loss": 1.2119, + "num_input_tokens_seen": 384127164, + "step": 5743 + }, + { + "epoch": 0.6516879432624113, + "loss": 1.4228074550628662, + "loss_ce": 0.006791808642446995, + "loss_iou": 0.59765625, + "loss_num": 0.044677734375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 384127164, + "step": 5743 + }, + { + "epoch": 0.6518014184397163, + "grad_norm": 21.98311424255371, + "learning_rate": 5e-05, + "loss": 1.1528, + "num_input_tokens_seen": 384194936, + "step": 5744 + }, + { + "epoch": 0.6518014184397163, + "loss": 1.3057806491851807, + "loss_ce": 0.006464310921728611, + "loss_iou": 0.53125, + "loss_num": 0.046630859375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 384194936, + "step": 5744 + }, + { + "epoch": 0.6519148936170213, + "grad_norm": 27.623830795288086, + "learning_rate": 5e-05, + "loss": 0.9252, + "num_input_tokens_seen": 384261116, + "step": 5745 + }, + { + "epoch": 0.6519148936170213, + "loss": 0.9474028944969177, + "loss_ce": 0.0067290496081113815, + "loss_iou": 0.390625, + "loss_num": 0.03173828125, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 384261116, + "step": 5745 + }, + { + "epoch": 0.6520283687943262, + "grad_norm": 20.062057495117188, + "learning_rate": 5e-05, + "loss": 1.1961, + "num_input_tokens_seen": 384327192, + "step": 5746 + }, + { + "epoch": 0.6520283687943262, + "loss": 1.0635082721710205, + "loss_ce": 0.007355853449553251, + "loss_iou": 0.458984375, + "loss_num": 0.0277099609375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 384327192, + "step": 5746 + }, + { + "epoch": 0.6521418439716312, + "grad_norm": 14.121713638305664, + "learning_rate": 5e-05, + "loss": 1.1306, + "num_input_tokens_seen": 384394052, + "step": 5747 + }, + { + "epoch": 0.6521418439716312, + "loss": 1.0941542387008667, + "loss_ce": 0.005638022907078266, + "loss_iou": 0.435546875, + "loss_num": 0.043701171875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 384394052, + "step": 5747 + }, + { + "epoch": 0.6522553191489362, + "grad_norm": 16.1317081451416, + "learning_rate": 5e-05, + "loss": 1.137, + "num_input_tokens_seen": 384461640, + "step": 5748 + }, + { + "epoch": 0.6522553191489362, + "loss": 0.9714064002037048, + "loss_ce": 0.006074357777833939, + "loss_iou": 0.37890625, + "loss_num": 0.041015625, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 384461640, + "step": 5748 + }, + { + "epoch": 0.6523687943262412, + "grad_norm": 13.215799331665039, + "learning_rate": 5e-05, + "loss": 1.0252, + "num_input_tokens_seen": 384527224, + "step": 5749 + }, + { + "epoch": 0.6523687943262412, + "loss": 0.9774537086486816, + "loss_ce": 0.008703668601810932, + "loss_iou": 0.40625, + "loss_num": 0.031494140625, + "loss_xval": 0.96875, + "num_input_tokens_seen": 384527224, + "step": 5749 + }, + { + "epoch": 0.6524822695035462, + "grad_norm": 23.284122467041016, + "learning_rate": 5e-05, + "loss": 1.249, + "num_input_tokens_seen": 384594124, + "step": 5750 + }, + { + "epoch": 0.6524822695035462, + "eval_seeclick_CIoU": 0.39470207691192627, + "eval_seeclick_GIoU": 0.3638252466917038, + "eval_seeclick_IoU": 0.4776924103498459, + "eval_seeclick_MAE_all": 0.1742047592997551, + "eval_seeclick_MAE_h": 0.14827542752027512, + "eval_seeclick_MAE_w": 0.11220770329236984, + "eval_seeclick_MAE_x_boxes": 0.22355995327234268, + "eval_seeclick_MAE_y_boxes": 0.12866895273327827, + "eval_seeclick_NUM_probability": 0.9999584853649139, + "eval_seeclick_inside_bbox": 0.6770833432674408, + "eval_seeclick_loss": 2.4996533393859863, + "eval_seeclick_loss_ce": 0.013945908285677433, + "eval_seeclick_loss_iou": 0.824462890625, + "eval_seeclick_loss_num": 0.16404342651367188, + "eval_seeclick_loss_xval": 2.4686279296875, + "eval_seeclick_runtime": 67.135, + "eval_seeclick_samples_per_second": 0.7, + "eval_seeclick_steps_per_second": 0.03, + "num_input_tokens_seen": 384594124, + "step": 5750 + }, + { + "epoch": 0.6524822695035462, + "eval_icons_CIoU": 0.5012784898281097, + "eval_icons_GIoU": 0.5030496567487717, + "eval_icons_IoU": 0.5406898260116577, + "eval_icons_MAE_all": 0.12711407616734505, + "eval_icons_MAE_h": 0.07096726074814796, + "eval_icons_MAE_w": 0.12254028022289276, + "eval_icons_MAE_x_boxes": 0.13355178013443947, + "eval_icons_MAE_y_boxes": 0.043629106134176254, + "eval_icons_NUM_probability": 0.9999215006828308, + "eval_icons_inside_bbox": 0.7916666567325592, + "eval_icons_loss": 2.30161190032959, + "eval_icons_loss_ce": 4.389884452393744e-05, + "eval_icons_loss_iou": 0.82958984375, + "eval_icons_loss_num": 0.135101318359375, + "eval_icons_loss_xval": 2.3369140625, + "eval_icons_runtime": 69.4468, + "eval_icons_samples_per_second": 0.72, + "eval_icons_steps_per_second": 0.029, + "num_input_tokens_seen": 384594124, + "step": 5750 + }, + { + "epoch": 0.6524822695035462, + "eval_screenspot_CIoU": 0.23320325215657553, + "eval_screenspot_GIoU": 0.1981291820605596, + "eval_screenspot_IoU": 0.3444737096627553, + "eval_screenspot_MAE_all": 0.22769387066364288, + "eval_screenspot_MAE_h": 0.17199955383936563, + "eval_screenspot_MAE_w": 0.14898707966009775, + "eval_screenspot_MAE_x_boxes": 0.33478257060050964, + "eval_screenspot_MAE_y_boxes": 0.11195129652818044, + "eval_screenspot_NUM_probability": 0.9997716744740804, + "eval_screenspot_inside_bbox": 0.5570833285649618, + "eval_screenspot_loss": 3.0554277896881104, + "eval_screenspot_loss_ce": 0.011003635823726654, + "eval_screenspot_loss_iou": 0.953125, + "eval_screenspot_loss_num": 0.242584228515625, + "eval_screenspot_loss_xval": 3.1201171875, + "eval_screenspot_runtime": 118.5569, + "eval_screenspot_samples_per_second": 0.751, + "eval_screenspot_steps_per_second": 0.025, + "num_input_tokens_seen": 384594124, + "step": 5750 + }, + { + "epoch": 0.6524822695035462, + "eval_compot_CIoU": 0.29545939713716507, + "eval_compot_GIoU": 0.2620028108358383, + "eval_compot_IoU": 0.3761375844478607, + "eval_compot_MAE_all": 0.2083142325282097, + "eval_compot_MAE_h": 0.1290091574192047, + "eval_compot_MAE_w": 0.21248406171798706, + "eval_compot_MAE_x_boxes": 0.19564886391162872, + "eval_compot_MAE_y_boxes": 0.11649933829903603, + "eval_compot_NUM_probability": 0.9996976256370544, + "eval_compot_inside_bbox": 0.5121527910232544, + "eval_compot_loss": 3.0063891410827637, + "eval_compot_loss_ce": 0.0029003943782299757, + "eval_compot_loss_iou": 0.96337890625, + "eval_compot_loss_num": 0.2094573974609375, + "eval_compot_loss_xval": 2.97412109375, + "eval_compot_runtime": 79.3835, + "eval_compot_samples_per_second": 0.63, + "eval_compot_steps_per_second": 0.025, + "num_input_tokens_seen": 384594124, + "step": 5750 + }, + { + "epoch": 0.6524822695035462, + "loss": 2.859734058380127, + "loss_ce": 0.0032889186404645443, + "loss_iou": 0.9609375, + "loss_num": 0.1875, + "loss_xval": 2.859375, + "num_input_tokens_seen": 384594124, + "step": 5750 + }, + { + "epoch": 0.652595744680851, + "grad_norm": 80.79107666015625, + "learning_rate": 5e-05, + "loss": 1.2549, + "num_input_tokens_seen": 384661992, + "step": 5751 + }, + { + "epoch": 0.652595744680851, + "loss": 1.172378659248352, + "loss_ce": 0.004409911576658487, + "loss_iou": 0.478515625, + "loss_num": 0.04248046875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 384661992, + "step": 5751 + }, + { + "epoch": 0.652709219858156, + "grad_norm": 52.476863861083984, + "learning_rate": 5e-05, + "loss": 1.2812, + "num_input_tokens_seen": 384728976, + "step": 5752 + }, + { + "epoch": 0.652709219858156, + "loss": 1.344097375869751, + "loss_ce": 0.010112910531461239, + "loss_iou": 0.515625, + "loss_num": 0.060302734375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 384728976, + "step": 5752 + }, + { + "epoch": 0.652822695035461, + "grad_norm": 44.823272705078125, + "learning_rate": 5e-05, + "loss": 1.2591, + "num_input_tokens_seen": 384797140, + "step": 5753 + }, + { + "epoch": 0.652822695035461, + "loss": 1.2035373449325562, + "loss_ce": 0.006271661259233952, + "loss_iou": 0.5, + "loss_num": 0.038818359375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 384797140, + "step": 5753 + }, + { + "epoch": 0.652936170212766, + "grad_norm": 38.59379577636719, + "learning_rate": 5e-05, + "loss": 1.0307, + "num_input_tokens_seen": 384864444, + "step": 5754 + }, + { + "epoch": 0.652936170212766, + "loss": 0.907175600528717, + "loss_ce": 0.004831839352846146, + "loss_iou": 0.3984375, + "loss_num": 0.02099609375, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 384864444, + "step": 5754 + }, + { + "epoch": 0.6530496453900709, + "grad_norm": 24.549785614013672, + "learning_rate": 5e-05, + "loss": 1.0143, + "num_input_tokens_seen": 384930236, + "step": 5755 + }, + { + "epoch": 0.6530496453900709, + "loss": 1.0061531066894531, + "loss_ce": 0.007617915980517864, + "loss_iou": 0.39453125, + "loss_num": 0.041748046875, + "loss_xval": 1.0, + "num_input_tokens_seen": 384930236, + "step": 5755 + }, + { + "epoch": 0.6531631205673759, + "grad_norm": 10.245805740356445, + "learning_rate": 5e-05, + "loss": 1.1272, + "num_input_tokens_seen": 384997324, + "step": 5756 + }, + { + "epoch": 0.6531631205673759, + "loss": 0.943468451499939, + "loss_ce": 0.006456742994487286, + "loss_iou": 0.40625, + "loss_num": 0.0250244140625, + "loss_xval": 0.9375, + "num_input_tokens_seen": 384997324, + "step": 5756 + }, + { + "epoch": 0.6532765957446809, + "grad_norm": 18.477136611938477, + "learning_rate": 5e-05, + "loss": 1.3378, + "num_input_tokens_seen": 385063012, + "step": 5757 + }, + { + "epoch": 0.6532765957446809, + "loss": 1.3686625957489014, + "loss_ce": 0.006357813719660044, + "loss_iou": 0.515625, + "loss_num": 0.06640625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 385063012, + "step": 5757 + }, + { + "epoch": 0.6533900709219859, + "grad_norm": 34.877845764160156, + "learning_rate": 5e-05, + "loss": 1.149, + "num_input_tokens_seen": 385130132, + "step": 5758 + }, + { + "epoch": 0.6533900709219859, + "loss": 1.0910446643829346, + "loss_ce": 0.008525146171450615, + "loss_iou": 0.45703125, + "loss_num": 0.03369140625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 385130132, + "step": 5758 + }, + { + "epoch": 0.6535035460992907, + "grad_norm": 31.23430061340332, + "learning_rate": 5e-05, + "loss": 1.4836, + "num_input_tokens_seen": 385196776, + "step": 5759 + }, + { + "epoch": 0.6535035460992907, + "loss": 1.4268460273742676, + "loss_ce": 0.014248359017074108, + "loss_iou": 0.55859375, + "loss_num": 0.05859375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 385196776, + "step": 5759 + }, + { + "epoch": 0.6536170212765957, + "grad_norm": 30.57171058654785, + "learning_rate": 5e-05, + "loss": 0.9574, + "num_input_tokens_seen": 385263124, + "step": 5760 + }, + { + "epoch": 0.6536170212765957, + "loss": 1.067720651626587, + "loss_ce": 0.004244023002684116, + "loss_iou": 0.41015625, + "loss_num": 0.04833984375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 385263124, + "step": 5760 + }, + { + "epoch": 0.6537304964539007, + "grad_norm": 37.48321533203125, + "learning_rate": 5e-05, + "loss": 1.2696, + "num_input_tokens_seen": 385329484, + "step": 5761 + }, + { + "epoch": 0.6537304964539007, + "loss": 1.3759799003601074, + "loss_ce": 0.0061067333444952965, + "loss_iou": 0.546875, + "loss_num": 0.0546875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 385329484, + "step": 5761 + }, + { + "epoch": 0.6538439716312057, + "grad_norm": 44.654144287109375, + "learning_rate": 5e-05, + "loss": 1.2471, + "num_input_tokens_seen": 385396496, + "step": 5762 + }, + { + "epoch": 0.6538439716312057, + "loss": 1.2814992666244507, + "loss_ce": 0.007817544043064117, + "loss_iou": 0.46875, + "loss_num": 0.0673828125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 385396496, + "step": 5762 + }, + { + "epoch": 0.6539574468085106, + "grad_norm": 24.338260650634766, + "learning_rate": 5e-05, + "loss": 1.3341, + "num_input_tokens_seen": 385464000, + "step": 5763 + }, + { + "epoch": 0.6539574468085106, + "loss": 1.3279800415039062, + "loss_ce": 0.006202705204486847, + "loss_iou": 0.55859375, + "loss_num": 0.04052734375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 385464000, + "step": 5763 + }, + { + "epoch": 0.6540709219858156, + "grad_norm": 16.23333740234375, + "learning_rate": 5e-05, + "loss": 1.203, + "num_input_tokens_seen": 385531552, + "step": 5764 + }, + { + "epoch": 0.6540709219858156, + "loss": 1.3059808015823364, + "loss_ce": 0.007640937343239784, + "loss_iou": 0.5, + "loss_num": 0.0595703125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 385531552, + "step": 5764 + }, + { + "epoch": 0.6541843971631206, + "grad_norm": 27.97612762451172, + "learning_rate": 5e-05, + "loss": 1.1278, + "num_input_tokens_seen": 385598032, + "step": 5765 + }, + { + "epoch": 0.6541843971631206, + "loss": 1.230181336402893, + "loss_ce": 0.005327857099473476, + "loss_iou": 0.50390625, + "loss_num": 0.04345703125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 385598032, + "step": 5765 + }, + { + "epoch": 0.6542978723404256, + "grad_norm": 52.727622985839844, + "learning_rate": 5e-05, + "loss": 1.3811, + "num_input_tokens_seen": 385665256, + "step": 5766 + }, + { + "epoch": 0.6542978723404256, + "loss": 1.4035840034484863, + "loss_ce": 0.00905282236635685, + "loss_iou": 0.55859375, + "loss_num": 0.0556640625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 385665256, + "step": 5766 + }, + { + "epoch": 0.6544113475177304, + "grad_norm": 25.901823043823242, + "learning_rate": 5e-05, + "loss": 1.4132, + "num_input_tokens_seen": 385732780, + "step": 5767 + }, + { + "epoch": 0.6544113475177304, + "loss": 1.663698434829712, + "loss_ce": 0.008424893021583557, + "loss_iou": 0.66796875, + "loss_num": 0.0634765625, + "loss_xval": 1.65625, + "num_input_tokens_seen": 385732780, + "step": 5767 + }, + { + "epoch": 0.6545248226950354, + "grad_norm": 16.925867080688477, + "learning_rate": 5e-05, + "loss": 1.1414, + "num_input_tokens_seen": 385799244, + "step": 5768 + }, + { + "epoch": 0.6545248226950354, + "loss": 1.2766355276107788, + "loss_ce": 0.006127706728875637, + "loss_iou": 0.50390625, + "loss_num": 0.052978515625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 385799244, + "step": 5768 + }, + { + "epoch": 0.6546382978723404, + "grad_norm": 31.11012077331543, + "learning_rate": 5e-05, + "loss": 1.1796, + "num_input_tokens_seen": 385865340, + "step": 5769 + }, + { + "epoch": 0.6546382978723404, + "loss": 0.9103091955184937, + "loss_ce": 0.005646135658025742, + "loss_iou": 0.345703125, + "loss_num": 0.042724609375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 385865340, + "step": 5769 + }, + { + "epoch": 0.6547517730496454, + "grad_norm": 26.196439743041992, + "learning_rate": 5e-05, + "loss": 1.1884, + "num_input_tokens_seen": 385932228, + "step": 5770 + }, + { + "epoch": 0.6547517730496454, + "loss": 1.1016504764556885, + "loss_ce": 0.003994263708591461, + "loss_iou": 0.4609375, + "loss_num": 0.035400390625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 385932228, + "step": 5770 + }, + { + "epoch": 0.6548652482269504, + "grad_norm": 19.616884231567383, + "learning_rate": 5e-05, + "loss": 1.0159, + "num_input_tokens_seen": 385998768, + "step": 5771 + }, + { + "epoch": 0.6548652482269504, + "loss": 0.9910379648208618, + "loss_ce": 0.008371922187507153, + "loss_iou": 0.44140625, + "loss_num": 0.020263671875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 385998768, + "step": 5771 + }, + { + "epoch": 0.6549787234042553, + "grad_norm": 25.2861270904541, + "learning_rate": 5e-05, + "loss": 1.2089, + "num_input_tokens_seen": 386065976, + "step": 5772 + }, + { + "epoch": 0.6549787234042553, + "loss": 1.1802270412445068, + "loss_ce": 0.004934100434184074, + "loss_iou": 0.52734375, + "loss_num": 0.0247802734375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 386065976, + "step": 5772 + }, + { + "epoch": 0.6550921985815603, + "grad_norm": 29.56524085998535, + "learning_rate": 5e-05, + "loss": 1.14, + "num_input_tokens_seen": 386133028, + "step": 5773 + }, + { + "epoch": 0.6550921985815603, + "loss": 1.083533763885498, + "loss_ce": 0.005408754572272301, + "loss_iou": 0.447265625, + "loss_num": 0.03662109375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 386133028, + "step": 5773 + }, + { + "epoch": 0.6552056737588653, + "grad_norm": 30.07413101196289, + "learning_rate": 5e-05, + "loss": 1.1849, + "num_input_tokens_seen": 386200372, + "step": 5774 + }, + { + "epoch": 0.6552056737588653, + "loss": 1.1699573993682861, + "loss_ce": 0.005406571552157402, + "loss_iou": 0.453125, + "loss_num": 0.051513671875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 386200372, + "step": 5774 + }, + { + "epoch": 0.6553191489361702, + "grad_norm": 30.78038787841797, + "learning_rate": 5e-05, + "loss": 1.222, + "num_input_tokens_seen": 386266976, + "step": 5775 + }, + { + "epoch": 0.6553191489361702, + "loss": 1.015423059463501, + "loss_ce": 0.009563722647726536, + "loss_iou": 0.3515625, + "loss_num": 0.06005859375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 386266976, + "step": 5775 + }, + { + "epoch": 0.6554326241134751, + "grad_norm": 27.04463768005371, + "learning_rate": 5e-05, + "loss": 1.3333, + "num_input_tokens_seen": 386334364, + "step": 5776 + }, + { + "epoch": 0.6554326241134751, + "loss": 1.1987924575805664, + "loss_ce": 0.006409571971744299, + "loss_iou": 0.515625, + "loss_num": 0.03173828125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 386334364, + "step": 5776 + }, + { + "epoch": 0.6555460992907801, + "grad_norm": 38.11520767211914, + "learning_rate": 5e-05, + "loss": 1.3197, + "num_input_tokens_seen": 386401384, + "step": 5777 + }, + { + "epoch": 0.6555460992907801, + "loss": 1.2124115228652954, + "loss_ce": 0.006845114286988974, + "loss_iou": 0.4765625, + "loss_num": 0.05029296875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 386401384, + "step": 5777 + }, + { + "epoch": 0.6556595744680851, + "grad_norm": 34.7640266418457, + "learning_rate": 5e-05, + "loss": 1.3141, + "num_input_tokens_seen": 386467948, + "step": 5778 + }, + { + "epoch": 0.6556595744680851, + "loss": 1.3859360218048096, + "loss_ce": 0.007518027909100056, + "loss_iou": 0.5546875, + "loss_num": 0.053955078125, + "loss_xval": 1.375, + "num_input_tokens_seen": 386467948, + "step": 5778 + }, + { + "epoch": 0.6557730496453901, + "grad_norm": 33.7504997253418, + "learning_rate": 5e-05, + "loss": 1.2716, + "num_input_tokens_seen": 386534532, + "step": 5779 + }, + { + "epoch": 0.6557730496453901, + "loss": 1.1775537729263306, + "loss_ce": 0.009585062973201275, + "loss_iou": 0.470703125, + "loss_num": 0.045166015625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 386534532, + "step": 5779 + }, + { + "epoch": 0.655886524822695, + "grad_norm": 27.51457405090332, + "learning_rate": 5e-05, + "loss": 1.2127, + "num_input_tokens_seen": 386601108, + "step": 5780 + }, + { + "epoch": 0.655886524822695, + "loss": 1.3171411752700806, + "loss_ce": 0.007570945657789707, + "loss_iou": 0.5625, + "loss_num": 0.03662109375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 386601108, + "step": 5780 + }, + { + "epoch": 0.656, + "grad_norm": 22.62253189086914, + "learning_rate": 5e-05, + "loss": 1.1282, + "num_input_tokens_seen": 386668944, + "step": 5781 + }, + { + "epoch": 0.656, + "loss": 1.0857993364334106, + "loss_ce": 0.004744565114378929, + "loss_iou": 0.466796875, + "loss_num": 0.029296875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 386668944, + "step": 5781 + }, + { + "epoch": 0.656113475177305, + "grad_norm": 127.7236099243164, + "learning_rate": 5e-05, + "loss": 0.9547, + "num_input_tokens_seen": 386735396, + "step": 5782 + }, + { + "epoch": 0.656113475177305, + "loss": 0.8794055581092834, + "loss_ce": 0.012126488611102104, + "loss_iou": 0.3515625, + "loss_num": 0.033447265625, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 386735396, + "step": 5782 + }, + { + "epoch": 0.65622695035461, + "grad_norm": 31.076766967773438, + "learning_rate": 5e-05, + "loss": 1.1019, + "num_input_tokens_seen": 386802976, + "step": 5783 + }, + { + "epoch": 0.65622695035461, + "loss": 1.0397191047668457, + "loss_ce": 0.008102906867861748, + "loss_iou": 0.41796875, + "loss_num": 0.0390625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 386802976, + "step": 5783 + }, + { + "epoch": 0.6563404255319149, + "grad_norm": 21.28169059753418, + "learning_rate": 5e-05, + "loss": 1.2039, + "num_input_tokens_seen": 386870100, + "step": 5784 + }, + { + "epoch": 0.6563404255319149, + "loss": 1.2421486377716064, + "loss_ce": 0.006308810785412788, + "loss_iou": 0.50390625, + "loss_num": 0.0458984375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 386870100, + "step": 5784 + }, + { + "epoch": 0.6564539007092198, + "grad_norm": 37.3423957824707, + "learning_rate": 5e-05, + "loss": 1.2862, + "num_input_tokens_seen": 386936672, + "step": 5785 + }, + { + "epoch": 0.6564539007092198, + "loss": 1.527024745941162, + "loss_ce": 0.01139980461448431, + "loss_iou": 0.59765625, + "loss_num": 0.06396484375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 386936672, + "step": 5785 + }, + { + "epoch": 0.6565673758865248, + "grad_norm": 39.55050277709961, + "learning_rate": 5e-05, + "loss": 1.5077, + "num_input_tokens_seen": 387003544, + "step": 5786 + }, + { + "epoch": 0.6565673758865248, + "loss": 1.5864804983139038, + "loss_ce": 0.006402374245226383, + "loss_iou": 0.65625, + "loss_num": 0.052490234375, + "loss_xval": 1.578125, + "num_input_tokens_seen": 387003544, + "step": 5786 + }, + { + "epoch": 0.6566808510638298, + "grad_norm": 20.164974212646484, + "learning_rate": 5e-05, + "loss": 1.1791, + "num_input_tokens_seen": 387070804, + "step": 5787 + }, + { + "epoch": 0.6566808510638298, + "loss": 1.2594497203826904, + "loss_ce": 0.00896146148443222, + "loss_iou": 0.474609375, + "loss_num": 0.06005859375, + "loss_xval": 1.25, + "num_input_tokens_seen": 387070804, + "step": 5787 + }, + { + "epoch": 0.6567943262411348, + "grad_norm": 11.58591079711914, + "learning_rate": 5e-05, + "loss": 0.975, + "num_input_tokens_seen": 387138308, + "step": 5788 + }, + { + "epoch": 0.6567943262411348, + "loss": 0.8594099879264832, + "loss_ce": 0.005406053736805916, + "loss_iou": 0.33984375, + "loss_num": 0.03466796875, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 387138308, + "step": 5788 + }, + { + "epoch": 0.6569078014184397, + "grad_norm": 29.307538986206055, + "learning_rate": 5e-05, + "loss": 1.1131, + "num_input_tokens_seen": 387204608, + "step": 5789 + }, + { + "epoch": 0.6569078014184397, + "loss": 1.0127506256103516, + "loss_ce": 0.006158843170851469, + "loss_iou": 0.375, + "loss_num": 0.051025390625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 387204608, + "step": 5789 + }, + { + "epoch": 0.6570212765957447, + "grad_norm": 20.4054012298584, + "learning_rate": 5e-05, + "loss": 0.9877, + "num_input_tokens_seen": 387271340, + "step": 5790 + }, + { + "epoch": 0.6570212765957447, + "loss": 1.010404109954834, + "loss_ce": 0.009915843605995178, + "loss_iou": 0.423828125, + "loss_num": 0.0306396484375, + "loss_xval": 1.0, + "num_input_tokens_seen": 387271340, + "step": 5790 + }, + { + "epoch": 0.6571347517730497, + "grad_norm": 26.57575035095215, + "learning_rate": 5e-05, + "loss": 1.0045, + "num_input_tokens_seen": 387337308, + "step": 5791 + }, + { + "epoch": 0.6571347517730497, + "loss": 0.9064806699752808, + "loss_ce": 0.009019714780151844, + "loss_iou": 0.38671875, + "loss_num": 0.025146484375, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 387337308, + "step": 5791 + }, + { + "epoch": 0.6572482269503546, + "grad_norm": 37.55781936645508, + "learning_rate": 5e-05, + "loss": 1.3429, + "num_input_tokens_seen": 387405008, + "step": 5792 + }, + { + "epoch": 0.6572482269503546, + "loss": 1.2422816753387451, + "loss_ce": 0.006930036470293999, + "loss_iou": 0.51171875, + "loss_num": 0.041748046875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 387405008, + "step": 5792 + }, + { + "epoch": 0.6573617021276595, + "grad_norm": 535.3367309570312, + "learning_rate": 5e-05, + "loss": 1.1677, + "num_input_tokens_seen": 387472496, + "step": 5793 + }, + { + "epoch": 0.6573617021276595, + "loss": 1.2958406209945679, + "loss_ce": 0.006778187118470669, + "loss_iou": 0.54296875, + "loss_num": 0.040771484375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 387472496, + "step": 5793 + }, + { + "epoch": 0.6574751773049645, + "grad_norm": 37.751564025878906, + "learning_rate": 5e-05, + "loss": 1.3767, + "num_input_tokens_seen": 387537832, + "step": 5794 + }, + { + "epoch": 0.6574751773049645, + "loss": 1.3071120977401733, + "loss_ce": 0.007551614660769701, + "loss_iou": 0.5078125, + "loss_num": 0.05615234375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 387537832, + "step": 5794 + }, + { + "epoch": 0.6575886524822695, + "grad_norm": 81.24908447265625, + "learning_rate": 5e-05, + "loss": 1.2614, + "num_input_tokens_seen": 387604676, + "step": 5795 + }, + { + "epoch": 0.6575886524822695, + "loss": 1.237088918685913, + "loss_ce": 0.010526337660849094, + "loss_iou": 0.474609375, + "loss_num": 0.055419921875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 387604676, + "step": 5795 + }, + { + "epoch": 0.6577021276595745, + "grad_norm": 27.238330841064453, + "learning_rate": 5e-05, + "loss": 1.1309, + "num_input_tokens_seen": 387672316, + "step": 5796 + }, + { + "epoch": 0.6577021276595745, + "loss": 1.1542820930480957, + "loss_ce": 0.006088638212531805, + "loss_iou": 0.47265625, + "loss_num": 0.04052734375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 387672316, + "step": 5796 + }, + { + "epoch": 0.6578156028368795, + "grad_norm": 50.38874435424805, + "learning_rate": 5e-05, + "loss": 1.3286, + "num_input_tokens_seen": 387739692, + "step": 5797 + }, + { + "epoch": 0.6578156028368795, + "loss": 1.4894450902938843, + "loss_ce": 0.003117040265351534, + "loss_iou": 0.59765625, + "loss_num": 0.057861328125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 387739692, + "step": 5797 + }, + { + "epoch": 0.6579290780141844, + "grad_norm": 91.84044647216797, + "learning_rate": 5e-05, + "loss": 1.4007, + "num_input_tokens_seen": 387807804, + "step": 5798 + }, + { + "epoch": 0.6579290780141844, + "loss": 1.3614147901535034, + "loss_ce": 0.005457684863358736, + "loss_iou": 0.578125, + "loss_num": 0.039306640625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 387807804, + "step": 5798 + }, + { + "epoch": 0.6580425531914894, + "grad_norm": 24.286598205566406, + "learning_rate": 5e-05, + "loss": 1.0761, + "num_input_tokens_seen": 387875412, + "step": 5799 + }, + { + "epoch": 0.6580425531914894, + "loss": 1.0427786111831665, + "loss_ce": 0.0056691900826990604, + "loss_iou": 0.40234375, + "loss_num": 0.046630859375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 387875412, + "step": 5799 + }, + { + "epoch": 0.6581560283687943, + "grad_norm": 27.064712524414062, + "learning_rate": 5e-05, + "loss": 1.1027, + "num_input_tokens_seen": 387942596, + "step": 5800 + }, + { + "epoch": 0.6581560283687943, + "loss": 1.1320468187332153, + "loss_ce": 0.007535156793892384, + "loss_iou": 0.455078125, + "loss_num": 0.043212890625, + "loss_xval": 1.125, + "num_input_tokens_seen": 387942596, + "step": 5800 + }, + { + "epoch": 0.6582695035460993, + "grad_norm": 33.971065521240234, + "learning_rate": 5e-05, + "loss": 1.1438, + "num_input_tokens_seen": 388008916, + "step": 5801 + }, + { + "epoch": 0.6582695035460993, + "loss": 1.2196730375289917, + "loss_ce": 0.006782361306250095, + "loss_iou": 0.484375, + "loss_num": 0.049072265625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 388008916, + "step": 5801 + }, + { + "epoch": 0.6583829787234042, + "grad_norm": 27.94561195373535, + "learning_rate": 5e-05, + "loss": 1.2937, + "num_input_tokens_seen": 388075432, + "step": 5802 + }, + { + "epoch": 0.6583829787234042, + "loss": 1.3032294511795044, + "loss_ce": 0.007331031374633312, + "loss_iou": 0.51953125, + "loss_num": 0.05078125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 388075432, + "step": 5802 + }, + { + "epoch": 0.6584964539007092, + "grad_norm": 38.087738037109375, + "learning_rate": 5e-05, + "loss": 1.4171, + "num_input_tokens_seen": 388142520, + "step": 5803 + }, + { + "epoch": 0.6584964539007092, + "loss": 1.3657009601593018, + "loss_ce": 0.005349315702915192, + "loss_iou": 0.5625, + "loss_num": 0.04638671875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 388142520, + "step": 5803 + }, + { + "epoch": 0.6586099290780142, + "grad_norm": 27.945545196533203, + "learning_rate": 5e-05, + "loss": 1.3709, + "num_input_tokens_seen": 388210144, + "step": 5804 + }, + { + "epoch": 0.6586099290780142, + "loss": 1.3619400262832642, + "loss_ce": 0.006959543563425541, + "loss_iou": 0.60546875, + "loss_num": 0.02880859375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 388210144, + "step": 5804 + }, + { + "epoch": 0.6587234042553192, + "grad_norm": 14.85710334777832, + "learning_rate": 5e-05, + "loss": 1.2823, + "num_input_tokens_seen": 388277820, + "step": 5805 + }, + { + "epoch": 0.6587234042553192, + "loss": 1.2293543815612793, + "loss_ce": 0.006698209326714277, + "loss_iou": 0.4765625, + "loss_num": 0.0537109375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 388277820, + "step": 5805 + }, + { + "epoch": 0.6588368794326241, + "grad_norm": 21.662755966186523, + "learning_rate": 5e-05, + "loss": 1.3733, + "num_input_tokens_seen": 388345076, + "step": 5806 + }, + { + "epoch": 0.6588368794326241, + "loss": 1.1927449703216553, + "loss_ce": 0.008662980981171131, + "loss_iou": 0.48828125, + "loss_num": 0.041015625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 388345076, + "step": 5806 + }, + { + "epoch": 0.6589503546099291, + "grad_norm": 32.669307708740234, + "learning_rate": 5e-05, + "loss": 1.3412, + "num_input_tokens_seen": 388412784, + "step": 5807 + }, + { + "epoch": 0.6589503546099291, + "loss": 1.369755744934082, + "loss_ce": 0.005986299365758896, + "loss_iou": 0.53515625, + "loss_num": 0.057861328125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 388412784, + "step": 5807 + }, + { + "epoch": 0.659063829787234, + "grad_norm": 37.76339340209961, + "learning_rate": 5e-05, + "loss": 1.1797, + "num_input_tokens_seen": 388480568, + "step": 5808 + }, + { + "epoch": 0.659063829787234, + "loss": 1.1871013641357422, + "loss_ce": 0.006437288597226143, + "loss_iou": 0.478515625, + "loss_num": 0.044921875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 388480568, + "step": 5808 + }, + { + "epoch": 0.659177304964539, + "grad_norm": 37.25705337524414, + "learning_rate": 5e-05, + "loss": 1.2196, + "num_input_tokens_seen": 388547020, + "step": 5809 + }, + { + "epoch": 0.659177304964539, + "loss": 1.0817548036575317, + "loss_ce": 0.007536048069596291, + "loss_iou": 0.4453125, + "loss_num": 0.03662109375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 388547020, + "step": 5809 + }, + { + "epoch": 0.659290780141844, + "grad_norm": 38.4364013671875, + "learning_rate": 5e-05, + "loss": 1.3051, + "num_input_tokens_seen": 388614252, + "step": 5810 + }, + { + "epoch": 0.659290780141844, + "loss": 1.2157540321350098, + "loss_ce": 0.005304771009832621, + "loss_iou": 0.53125, + "loss_num": 0.02978515625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 388614252, + "step": 5810 + }, + { + "epoch": 0.6594042553191489, + "grad_norm": 33.47722244262695, + "learning_rate": 5e-05, + "loss": 1.2708, + "num_input_tokens_seen": 388680448, + "step": 5811 + }, + { + "epoch": 0.6594042553191489, + "loss": 1.192185640335083, + "loss_ce": 0.007615269161760807, + "loss_iou": 0.48828125, + "loss_num": 0.041259765625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 388680448, + "step": 5811 + }, + { + "epoch": 0.6595177304964539, + "grad_norm": 25.152877807617188, + "learning_rate": 5e-05, + "loss": 1.1823, + "num_input_tokens_seen": 388746240, + "step": 5812 + }, + { + "epoch": 0.6595177304964539, + "loss": 1.185826063156128, + "loss_ce": 0.009068164974451065, + "loss_iou": 0.455078125, + "loss_num": 0.05322265625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 388746240, + "step": 5812 + }, + { + "epoch": 0.6596312056737589, + "grad_norm": 18.17763328552246, + "learning_rate": 5e-05, + "loss": 1.143, + "num_input_tokens_seen": 388813540, + "step": 5813 + }, + { + "epoch": 0.6596312056737589, + "loss": 1.153794527053833, + "loss_ce": 0.005356953479349613, + "loss_iou": 0.46484375, + "loss_num": 0.044189453125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 388813540, + "step": 5813 + }, + { + "epoch": 0.6597446808510639, + "grad_norm": 32.44540786743164, + "learning_rate": 5e-05, + "loss": 1.2507, + "num_input_tokens_seen": 388880020, + "step": 5814 + }, + { + "epoch": 0.6597446808510639, + "loss": 1.1590049266815186, + "loss_ce": 0.009590852074325085, + "loss_iou": 0.49609375, + "loss_num": 0.03173828125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 388880020, + "step": 5814 + }, + { + "epoch": 0.6598581560283688, + "grad_norm": 43.15461349487305, + "learning_rate": 5e-05, + "loss": 1.3465, + "num_input_tokens_seen": 388946388, + "step": 5815 + }, + { + "epoch": 0.6598581560283688, + "loss": 1.439575433731079, + "loss_ce": 0.010864434763789177, + "loss_iou": 0.5625, + "loss_num": 0.060546875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 388946388, + "step": 5815 + }, + { + "epoch": 0.6599716312056738, + "grad_norm": 25.659893035888672, + "learning_rate": 5e-05, + "loss": 1.2402, + "num_input_tokens_seen": 389014060, + "step": 5816 + }, + { + "epoch": 0.6599716312056738, + "loss": 1.1774359941482544, + "loss_ce": 0.003699439112097025, + "loss_iou": 0.5078125, + "loss_num": 0.03173828125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 389014060, + "step": 5816 + }, + { + "epoch": 0.6600851063829787, + "grad_norm": 20.15684700012207, + "learning_rate": 5e-05, + "loss": 1.0143, + "num_input_tokens_seen": 389081088, + "step": 5817 + }, + { + "epoch": 0.6600851063829787, + "loss": 1.0584051609039307, + "loss_ce": 0.008600561879575253, + "loss_iou": 0.4453125, + "loss_num": 0.0322265625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 389081088, + "step": 5817 + }, + { + "epoch": 0.6601985815602837, + "grad_norm": 15.87252426147461, + "learning_rate": 5e-05, + "loss": 1.09, + "num_input_tokens_seen": 389148704, + "step": 5818 + }, + { + "epoch": 0.6601985815602837, + "loss": 1.020446538925171, + "loss_ce": 0.0055540138855576515, + "loss_iou": 0.37890625, + "loss_num": 0.0517578125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 389148704, + "step": 5818 + }, + { + "epoch": 0.6603120567375886, + "grad_norm": 20.361167907714844, + "learning_rate": 5e-05, + "loss": 1.1536, + "num_input_tokens_seen": 389214740, + "step": 5819 + }, + { + "epoch": 0.6603120567375886, + "loss": 0.9541483521461487, + "loss_ce": 0.006638606544584036, + "loss_iou": 0.34375, + "loss_num": 0.0517578125, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 389214740, + "step": 5819 + }, + { + "epoch": 0.6604255319148936, + "grad_norm": 22.176151275634766, + "learning_rate": 5e-05, + "loss": 1.0488, + "num_input_tokens_seen": 389281692, + "step": 5820 + }, + { + "epoch": 0.6604255319148936, + "loss": 0.893816351890564, + "loss_ce": 0.004167899023741484, + "loss_iou": 0.396484375, + "loss_num": 0.0196533203125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 389281692, + "step": 5820 + }, + { + "epoch": 0.6605390070921986, + "grad_norm": 31.975467681884766, + "learning_rate": 5e-05, + "loss": 1.1465, + "num_input_tokens_seen": 389348340, + "step": 5821 + }, + { + "epoch": 0.6605390070921986, + "loss": 1.1881189346313477, + "loss_ce": 0.00696648471057415, + "loss_iou": 0.5, + "loss_num": 0.036376953125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 389348340, + "step": 5821 + }, + { + "epoch": 0.6606524822695036, + "grad_norm": 21.9636173248291, + "learning_rate": 5e-05, + "loss": 1.1844, + "num_input_tokens_seen": 389416044, + "step": 5822 + }, + { + "epoch": 0.6606524822695036, + "loss": 1.1323577165603638, + "loss_ce": 0.005404592491686344, + "loss_iou": 0.48046875, + "loss_num": 0.032958984375, + "loss_xval": 1.125, + "num_input_tokens_seen": 389416044, + "step": 5822 + }, + { + "epoch": 0.6607659574468085, + "grad_norm": 18.7414493560791, + "learning_rate": 5e-05, + "loss": 1.1697, + "num_input_tokens_seen": 389482212, + "step": 5823 + }, + { + "epoch": 0.6607659574468085, + "loss": 1.1360270977020264, + "loss_ce": 0.00858562346547842, + "loss_iou": 0.427734375, + "loss_num": 0.054443359375, + "loss_xval": 1.125, + "num_input_tokens_seen": 389482212, + "step": 5823 + }, + { + "epoch": 0.6608794326241135, + "grad_norm": 15.03433609008789, + "learning_rate": 5e-05, + "loss": 0.902, + "num_input_tokens_seen": 389548676, + "step": 5824 + }, + { + "epoch": 0.6608794326241135, + "loss": 0.9293301105499268, + "loss_ce": 0.010384809225797653, + "loss_iou": 0.388671875, + "loss_num": 0.0283203125, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 389548676, + "step": 5824 + }, + { + "epoch": 0.6609929078014184, + "grad_norm": 34.12310028076172, + "learning_rate": 5e-05, + "loss": 1.1561, + "num_input_tokens_seen": 389615136, + "step": 5825 + }, + { + "epoch": 0.6609929078014184, + "loss": 1.121001958847046, + "loss_ce": 0.007720750290900469, + "loss_iou": 0.466796875, + "loss_num": 0.03564453125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 389615136, + "step": 5825 + }, + { + "epoch": 0.6611063829787234, + "grad_norm": 266.210693359375, + "learning_rate": 5e-05, + "loss": 1.3413, + "num_input_tokens_seen": 389682216, + "step": 5826 + }, + { + "epoch": 0.6611063829787234, + "loss": 1.408791422843933, + "loss_ce": 0.0059594279155135155, + "loss_iou": 0.5703125, + "loss_num": 0.052001953125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 389682216, + "step": 5826 + }, + { + "epoch": 0.6612198581560284, + "grad_norm": 29.20998764038086, + "learning_rate": 5e-05, + "loss": 1.1146, + "num_input_tokens_seen": 389749328, + "step": 5827 + }, + { + "epoch": 0.6612198581560284, + "loss": 1.0806562900543213, + "loss_ce": 0.005460962653160095, + "loss_iou": 0.46484375, + "loss_num": 0.0289306640625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 389749328, + "step": 5827 + }, + { + "epoch": 0.6613333333333333, + "grad_norm": 25.41518783569336, + "learning_rate": 5e-05, + "loss": 1.3851, + "num_input_tokens_seen": 389816836, + "step": 5828 + }, + { + "epoch": 0.6613333333333333, + "loss": 1.4294757843017578, + "loss_ce": 0.007600842975080013, + "loss_iou": 0.5859375, + "loss_num": 0.049560546875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 389816836, + "step": 5828 + }, + { + "epoch": 0.6614468085106383, + "grad_norm": 9.39639663696289, + "learning_rate": 5e-05, + "loss": 1.1391, + "num_input_tokens_seen": 389884876, + "step": 5829 + }, + { + "epoch": 0.6614468085106383, + "loss": 1.1293749809265137, + "loss_ce": 0.008769418112933636, + "loss_iou": 0.474609375, + "loss_num": 0.033935546875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 389884876, + "step": 5829 + }, + { + "epoch": 0.6615602836879433, + "grad_norm": 24.065593719482422, + "learning_rate": 5e-05, + "loss": 1.2179, + "num_input_tokens_seen": 389951944, + "step": 5830 + }, + { + "epoch": 0.6615602836879433, + "loss": 1.2291409969329834, + "loss_ce": 0.006484720855951309, + "loss_iou": 0.48828125, + "loss_num": 0.04931640625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 389951944, + "step": 5830 + }, + { + "epoch": 0.6616737588652483, + "grad_norm": 31.929529190063477, + "learning_rate": 5e-05, + "loss": 1.1989, + "num_input_tokens_seen": 390018960, + "step": 5831 + }, + { + "epoch": 0.6616737588652483, + "loss": 1.178753137588501, + "loss_ce": 0.009319530799984932, + "loss_iou": 0.466796875, + "loss_num": 0.046875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 390018960, + "step": 5831 + }, + { + "epoch": 0.6617872340425532, + "grad_norm": 41.01825714111328, + "learning_rate": 5e-05, + "loss": 1.2769, + "num_input_tokens_seen": 390086216, + "step": 5832 + }, + { + "epoch": 0.6617872340425532, + "loss": 1.2595257759094238, + "loss_ce": 0.009037507697939873, + "loss_iou": 0.48828125, + "loss_num": 0.054443359375, + "loss_xval": 1.25, + "num_input_tokens_seen": 390086216, + "step": 5832 + }, + { + "epoch": 0.6619007092198581, + "grad_norm": 21.768352508544922, + "learning_rate": 5e-05, + "loss": 1.1323, + "num_input_tokens_seen": 390153196, + "step": 5833 + }, + { + "epoch": 0.6619007092198581, + "loss": 1.091933250427246, + "loss_ce": 0.005995672661811113, + "loss_iou": 0.462890625, + "loss_num": 0.031982421875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 390153196, + "step": 5833 + }, + { + "epoch": 0.6620141843971631, + "grad_norm": 14.148170471191406, + "learning_rate": 5e-05, + "loss": 1.2216, + "num_input_tokens_seen": 390219444, + "step": 5834 + }, + { + "epoch": 0.6620141843971631, + "loss": 1.4541211128234863, + "loss_ce": 0.004902432672679424, + "loss_iou": 0.53515625, + "loss_num": 0.07568359375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 390219444, + "step": 5834 + }, + { + "epoch": 0.6621276595744681, + "grad_norm": 11.584420204162598, + "learning_rate": 5e-05, + "loss": 1.2423, + "num_input_tokens_seen": 390286168, + "step": 5835 + }, + { + "epoch": 0.6621276595744681, + "loss": 1.1687731742858887, + "loss_ce": 0.004222413059324026, + "loss_iou": 0.4375, + "loss_num": 0.05810546875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 390286168, + "step": 5835 + }, + { + "epoch": 0.662241134751773, + "grad_norm": 61.576416015625, + "learning_rate": 5e-05, + "loss": 1.1119, + "num_input_tokens_seen": 390353604, + "step": 5836 + }, + { + "epoch": 0.662241134751773, + "loss": 1.1332640647888184, + "loss_ce": 0.011681988835334778, + "loss_iou": 0.4375, + "loss_num": 0.0498046875, + "loss_xval": 1.125, + "num_input_tokens_seen": 390353604, + "step": 5836 + }, + { + "epoch": 0.662354609929078, + "grad_norm": 39.27505874633789, + "learning_rate": 5e-05, + "loss": 1.2535, + "num_input_tokens_seen": 390420880, + "step": 5837 + }, + { + "epoch": 0.662354609929078, + "loss": 1.2258412837982178, + "loss_ce": 0.005870511755347252, + "loss_iou": 0.484375, + "loss_num": 0.050537109375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 390420880, + "step": 5837 + }, + { + "epoch": 0.662468085106383, + "grad_norm": 27.422569274902344, + "learning_rate": 5e-05, + "loss": 1.1491, + "num_input_tokens_seen": 390487472, + "step": 5838 + }, + { + "epoch": 0.662468085106383, + "loss": 1.0529812574386597, + "loss_ce": 0.003908946178853512, + "loss_iou": 0.380859375, + "loss_num": 0.05712890625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 390487472, + "step": 5838 + }, + { + "epoch": 0.662581560283688, + "grad_norm": 55.46379470825195, + "learning_rate": 5e-05, + "loss": 1.3446, + "num_input_tokens_seen": 390553040, + "step": 5839 + }, + { + "epoch": 0.662581560283688, + "loss": 1.3586006164550781, + "loss_ce": 0.010944470763206482, + "loss_iou": 0.53515625, + "loss_num": 0.054931640625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 390553040, + "step": 5839 + }, + { + "epoch": 0.662695035460993, + "grad_norm": 46.206363677978516, + "learning_rate": 5e-05, + "loss": 1.2724, + "num_input_tokens_seen": 390619820, + "step": 5840 + }, + { + "epoch": 0.662695035460993, + "loss": 1.2451895475387573, + "loss_ce": 0.007884841412305832, + "loss_iou": 0.49609375, + "loss_num": 0.04931640625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 390619820, + "step": 5840 + }, + { + "epoch": 0.6628085106382978, + "grad_norm": 39.051273345947266, + "learning_rate": 5e-05, + "loss": 1.4694, + "num_input_tokens_seen": 390685316, + "step": 5841 + }, + { + "epoch": 0.6628085106382978, + "loss": 1.325338363647461, + "loss_ce": 0.010397041216492653, + "loss_iou": 0.5390625, + "loss_num": 0.04736328125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 390685316, + "step": 5841 + }, + { + "epoch": 0.6629219858156028, + "grad_norm": 17.710323333740234, + "learning_rate": 5e-05, + "loss": 1.171, + "num_input_tokens_seen": 390751516, + "step": 5842 + }, + { + "epoch": 0.6629219858156028, + "loss": 1.1553966999053955, + "loss_ce": 0.005494368262588978, + "loss_iou": 0.482421875, + "loss_num": 0.037353515625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 390751516, + "step": 5842 + }, + { + "epoch": 0.6630354609929078, + "grad_norm": 22.691041946411133, + "learning_rate": 5e-05, + "loss": 1.2027, + "num_input_tokens_seen": 390818728, + "step": 5843 + }, + { + "epoch": 0.6630354609929078, + "loss": 1.3192331790924072, + "loss_ce": 0.013569076545536518, + "loss_iou": 0.49609375, + "loss_num": 0.0625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 390818728, + "step": 5843 + }, + { + "epoch": 0.6631489361702128, + "grad_norm": 19.1994686126709, + "learning_rate": 5e-05, + "loss": 1.2263, + "num_input_tokens_seen": 390885620, + "step": 5844 + }, + { + "epoch": 0.6631489361702128, + "loss": 1.2454968690872192, + "loss_ce": 0.006239024922251701, + "loss_iou": 0.490234375, + "loss_num": 0.05126953125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 390885620, + "step": 5844 + }, + { + "epoch": 0.6632624113475177, + "grad_norm": 22.28495979309082, + "learning_rate": 5e-05, + "loss": 1.1528, + "num_input_tokens_seen": 390952428, + "step": 5845 + }, + { + "epoch": 0.6632624113475177, + "loss": 1.1481575965881348, + "loss_ce": 0.0031381628941744566, + "loss_iou": 0.46484375, + "loss_num": 0.04345703125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 390952428, + "step": 5845 + }, + { + "epoch": 0.6633758865248227, + "grad_norm": 41.76035690307617, + "learning_rate": 5e-05, + "loss": 1.3827, + "num_input_tokens_seen": 391019740, + "step": 5846 + }, + { + "epoch": 0.6633758865248227, + "loss": 1.3513174057006836, + "loss_ce": 0.00756747554987669, + "loss_iou": 0.5, + "loss_num": 0.0693359375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 391019740, + "step": 5846 + }, + { + "epoch": 0.6634893617021277, + "grad_norm": 32.390750885009766, + "learning_rate": 5e-05, + "loss": 1.6967, + "num_input_tokens_seen": 391087628, + "step": 5847 + }, + { + "epoch": 0.6634893617021277, + "loss": 1.6867303848266602, + "loss_ce": 0.00460157822817564, + "loss_iou": 0.68359375, + "loss_num": 0.0625, + "loss_xval": 1.6796875, + "num_input_tokens_seen": 391087628, + "step": 5847 + }, + { + "epoch": 0.6636028368794327, + "grad_norm": 37.67389678955078, + "learning_rate": 5e-05, + "loss": 0.932, + "num_input_tokens_seen": 391154912, + "step": 5848 + }, + { + "epoch": 0.6636028368794327, + "loss": 0.9057447910308838, + "loss_ce": 0.006330731324851513, + "loss_iou": 0.3828125, + "loss_num": 0.0267333984375, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 391154912, + "step": 5848 + }, + { + "epoch": 0.6637163120567375, + "grad_norm": 52.60084533691406, + "learning_rate": 5e-05, + "loss": 1.2684, + "num_input_tokens_seen": 391221408, + "step": 5849 + }, + { + "epoch": 0.6637163120567375, + "loss": 1.2291674613952637, + "loss_ce": 0.0030932840891182423, + "loss_iou": 0.50390625, + "loss_num": 0.043212890625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 391221408, + "step": 5849 + }, + { + "epoch": 0.6638297872340425, + "grad_norm": 24.525558471679688, + "learning_rate": 5e-05, + "loss": 1.6984, + "num_input_tokens_seen": 391289236, + "step": 5850 + }, + { + "epoch": 0.6638297872340425, + "loss": 1.8400015830993652, + "loss_ce": 0.007970426231622696, + "loss_iou": 0.7421875, + "loss_num": 0.07080078125, + "loss_xval": 1.828125, + "num_input_tokens_seen": 391289236, + "step": 5850 + }, + { + "epoch": 0.6639432624113475, + "grad_norm": 18.299665451049805, + "learning_rate": 5e-05, + "loss": 1.098, + "num_input_tokens_seen": 391356660, + "step": 5851 + }, + { + "epoch": 0.6639432624113475, + "loss": 1.1498806476593018, + "loss_ce": 0.0058377389796078205, + "loss_iou": 0.478515625, + "loss_num": 0.037353515625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 391356660, + "step": 5851 + }, + { + "epoch": 0.6640567375886525, + "grad_norm": 49.49456024169922, + "learning_rate": 5e-05, + "loss": 1.3022, + "num_input_tokens_seen": 391423728, + "step": 5852 + }, + { + "epoch": 0.6640567375886525, + "loss": 0.9082521200180054, + "loss_ce": 0.004931831732392311, + "loss_iou": 0.390625, + "loss_num": 0.0240478515625, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 391423728, + "step": 5852 + }, + { + "epoch": 0.6641702127659574, + "grad_norm": 27.108566284179688, + "learning_rate": 5e-05, + "loss": 1.101, + "num_input_tokens_seen": 391489812, + "step": 5853 + }, + { + "epoch": 0.6641702127659574, + "loss": 1.104796290397644, + "loss_ce": 0.006651776842772961, + "loss_iou": 0.44140625, + "loss_num": 0.043212890625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 391489812, + "step": 5853 + }, + { + "epoch": 0.6642836879432624, + "grad_norm": 26.31740379333496, + "learning_rate": 5e-05, + "loss": 1.1684, + "num_input_tokens_seen": 391557700, + "step": 5854 + }, + { + "epoch": 0.6642836879432624, + "loss": 1.1843795776367188, + "loss_ce": 0.00713350810110569, + "loss_iou": 0.482421875, + "loss_num": 0.04248046875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 391557700, + "step": 5854 + }, + { + "epoch": 0.6643971631205674, + "grad_norm": 23.918209075927734, + "learning_rate": 5e-05, + "loss": 1.2287, + "num_input_tokens_seen": 391624204, + "step": 5855 + }, + { + "epoch": 0.6643971631205674, + "loss": 1.5419976711273193, + "loss_ce": 0.0092827919870615, + "loss_iou": 0.56640625, + "loss_num": 0.080078125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 391624204, + "step": 5855 + }, + { + "epoch": 0.6645106382978724, + "grad_norm": 34.75970458984375, + "learning_rate": 5e-05, + "loss": 1.2213, + "num_input_tokens_seen": 391690708, + "step": 5856 + }, + { + "epoch": 0.6645106382978724, + "loss": 1.167440414428711, + "loss_ce": 0.009725521318614483, + "loss_iou": 0.47265625, + "loss_num": 0.042724609375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 391690708, + "step": 5856 + }, + { + "epoch": 0.6646241134751774, + "grad_norm": 61.05065155029297, + "learning_rate": 5e-05, + "loss": 1.3008, + "num_input_tokens_seen": 391757908, + "step": 5857 + }, + { + "epoch": 0.6646241134751774, + "loss": 1.2134411334991455, + "loss_ce": 0.004456713330000639, + "loss_iou": 0.484375, + "loss_num": 0.048095703125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 391757908, + "step": 5857 + }, + { + "epoch": 0.6647375886524822, + "grad_norm": 25.061616897583008, + "learning_rate": 5e-05, + "loss": 1.473, + "num_input_tokens_seen": 391823088, + "step": 5858 + }, + { + "epoch": 0.6647375886524822, + "loss": 1.2594740390777588, + "loss_ce": 0.0078567024320364, + "loss_iou": 0.494140625, + "loss_num": 0.05224609375, + "loss_xval": 1.25, + "num_input_tokens_seen": 391823088, + "step": 5858 + }, + { + "epoch": 0.6648510638297872, + "grad_norm": 27.24336814880371, + "learning_rate": 5e-05, + "loss": 1.1313, + "num_input_tokens_seen": 391889096, + "step": 5859 + }, + { + "epoch": 0.6648510638297872, + "loss": 1.3506470918655396, + "loss_ce": 0.004455626010894775, + "loss_iou": 0.53515625, + "loss_num": 0.055419921875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 391889096, + "step": 5859 + }, + { + "epoch": 0.6649645390070922, + "grad_norm": 26.352916717529297, + "learning_rate": 5e-05, + "loss": 1.1942, + "num_input_tokens_seen": 391955136, + "step": 5860 + }, + { + "epoch": 0.6649645390070922, + "loss": 1.2392513751983643, + "loss_ce": 0.00512045668438077, + "loss_iou": 0.474609375, + "loss_num": 0.057373046875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 391955136, + "step": 5860 + }, + { + "epoch": 0.6650780141843972, + "grad_norm": 36.800376892089844, + "learning_rate": 5e-05, + "loss": 1.3017, + "num_input_tokens_seen": 392022084, + "step": 5861 + }, + { + "epoch": 0.6650780141843972, + "loss": 1.3378746509552002, + "loss_ce": 0.006820059381425381, + "loss_iou": 0.53515625, + "loss_num": 0.052001953125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 392022084, + "step": 5861 + }, + { + "epoch": 0.6651914893617021, + "grad_norm": 30.188766479492188, + "learning_rate": 5e-05, + "loss": 1.2577, + "num_input_tokens_seen": 392088160, + "step": 5862 + }, + { + "epoch": 0.6651914893617021, + "loss": 1.2320160865783691, + "loss_ce": 0.003988741431385279, + "loss_iou": 0.51171875, + "loss_num": 0.040283203125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 392088160, + "step": 5862 + }, + { + "epoch": 0.6653049645390071, + "grad_norm": 21.10012435913086, + "learning_rate": 5e-05, + "loss": 1.1451, + "num_input_tokens_seen": 392155668, + "step": 5863 + }, + { + "epoch": 0.6653049645390071, + "loss": 1.1375072002410889, + "loss_ce": 0.010065866634249687, + "loss_iou": 0.462890625, + "loss_num": 0.04052734375, + "loss_xval": 1.125, + "num_input_tokens_seen": 392155668, + "step": 5863 + }, + { + "epoch": 0.6654184397163121, + "grad_norm": 23.190200805664062, + "learning_rate": 5e-05, + "loss": 1.1393, + "num_input_tokens_seen": 392221540, + "step": 5864 + }, + { + "epoch": 0.6654184397163121, + "loss": 1.1216483116149902, + "loss_ce": 0.004216654226183891, + "loss_iou": 0.462890625, + "loss_num": 0.038330078125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 392221540, + "step": 5864 + }, + { + "epoch": 0.6655319148936171, + "grad_norm": 30.279157638549805, + "learning_rate": 5e-05, + "loss": 1.1985, + "num_input_tokens_seen": 392289484, + "step": 5865 + }, + { + "epoch": 0.6655319148936171, + "loss": 1.3454563617706299, + "loss_ce": 0.003659445559605956, + "loss_iou": 0.515625, + "loss_num": 0.0615234375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 392289484, + "step": 5865 + }, + { + "epoch": 0.6656453900709219, + "grad_norm": 35.30691909790039, + "learning_rate": 5e-05, + "loss": 1.2359, + "num_input_tokens_seen": 392356252, + "step": 5866 + }, + { + "epoch": 0.6656453900709219, + "loss": 1.122511625289917, + "loss_ce": 0.009718623012304306, + "loss_iou": 0.4765625, + "loss_num": 0.031982421875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 392356252, + "step": 5866 + }, + { + "epoch": 0.6657588652482269, + "grad_norm": 45.87765884399414, + "learning_rate": 5e-05, + "loss": 1.3371, + "num_input_tokens_seen": 392424340, + "step": 5867 + }, + { + "epoch": 0.6657588652482269, + "loss": 1.1899199485778809, + "loss_ce": 0.008767573162913322, + "loss_iou": 0.486328125, + "loss_num": 0.0419921875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 392424340, + "step": 5867 + }, + { + "epoch": 0.6658723404255319, + "grad_norm": 38.02565383911133, + "learning_rate": 5e-05, + "loss": 1.4993, + "num_input_tokens_seen": 392491228, + "step": 5868 + }, + { + "epoch": 0.6658723404255319, + "loss": 1.4305180311203003, + "loss_ce": 0.007666514255106449, + "loss_iou": 0.578125, + "loss_num": 0.05419921875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 392491228, + "step": 5868 + }, + { + "epoch": 0.6659858156028369, + "grad_norm": 18.954322814941406, + "learning_rate": 5e-05, + "loss": 1.1082, + "num_input_tokens_seen": 392558140, + "step": 5869 + }, + { + "epoch": 0.6659858156028369, + "loss": 1.1691548824310303, + "loss_ce": 0.008754510432481766, + "loss_iou": 0.453125, + "loss_num": 0.050537109375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 392558140, + "step": 5869 + }, + { + "epoch": 0.6660992907801419, + "grad_norm": 11.751955032348633, + "learning_rate": 5e-05, + "loss": 1.2875, + "num_input_tokens_seen": 392624240, + "step": 5870 + }, + { + "epoch": 0.6660992907801419, + "loss": 1.2205876111984253, + "loss_ce": 0.011603251099586487, + "loss_iou": 0.416015625, + "loss_num": 0.07568359375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 392624240, + "step": 5870 + }, + { + "epoch": 0.6662127659574468, + "grad_norm": 28.840858459472656, + "learning_rate": 5e-05, + "loss": 1.0766, + "num_input_tokens_seen": 392691124, + "step": 5871 + }, + { + "epoch": 0.6662127659574468, + "loss": 1.01228666305542, + "loss_ce": 0.0074038440361619, + "loss_iou": 0.375, + "loss_num": 0.051025390625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 392691124, + "step": 5871 + }, + { + "epoch": 0.6663262411347518, + "grad_norm": 21.843721389770508, + "learning_rate": 5e-05, + "loss": 1.1384, + "num_input_tokens_seen": 392757552, + "step": 5872 + }, + { + "epoch": 0.6663262411347518, + "loss": 1.2725030183792114, + "loss_ce": 0.006878057960420847, + "loss_iou": 0.51953125, + "loss_num": 0.044921875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 392757552, + "step": 5872 + }, + { + "epoch": 0.6664397163120568, + "grad_norm": 31.25461769104004, + "learning_rate": 5e-05, + "loss": 1.1769, + "num_input_tokens_seen": 392824868, + "step": 5873 + }, + { + "epoch": 0.6664397163120568, + "loss": 1.3225793838500977, + "loss_ce": 0.005684853997081518, + "loss_iou": 0.53125, + "loss_num": 0.05029296875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 392824868, + "step": 5873 + }, + { + "epoch": 0.6665531914893617, + "grad_norm": 27.602039337158203, + "learning_rate": 5e-05, + "loss": 1.3711, + "num_input_tokens_seen": 392891124, + "step": 5874 + }, + { + "epoch": 0.6665531914893617, + "loss": 1.1335188150405884, + "loss_ce": 0.008030545897781849, + "loss_iou": 0.490234375, + "loss_num": 0.0289306640625, + "loss_xval": 1.125, + "num_input_tokens_seen": 392891124, + "step": 5874 + }, + { + "epoch": 0.6666666666666666, + "grad_norm": 43.944419860839844, + "learning_rate": 5e-05, + "loss": 1.0662, + "num_input_tokens_seen": 392958524, + "step": 5875 + }, + { + "epoch": 0.6666666666666666, + "loss": 1.255807876586914, + "loss_ce": 0.007761046756058931, + "loss_iou": 0.5078125, + "loss_num": 0.045654296875, + "loss_xval": 1.25, + "num_input_tokens_seen": 392958524, + "step": 5875 + }, + { + "epoch": 0.6667801418439716, + "grad_norm": 23.439910888671875, + "learning_rate": 5e-05, + "loss": 1.2351, + "num_input_tokens_seen": 393025116, + "step": 5876 + }, + { + "epoch": 0.6667801418439716, + "loss": 1.415899395942688, + "loss_ce": 0.0057431962341070175, + "loss_iou": 0.59375, + "loss_num": 0.0439453125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 393025116, + "step": 5876 + }, + { + "epoch": 0.6668936170212766, + "grad_norm": 33.94044494628906, + "learning_rate": 5e-05, + "loss": 1.2237, + "num_input_tokens_seen": 393091952, + "step": 5877 + }, + { + "epoch": 0.6668936170212766, + "loss": 1.1272380352020264, + "loss_ce": 0.0056559196673333645, + "loss_iou": 0.4453125, + "loss_num": 0.04638671875, + "loss_xval": 1.125, + "num_input_tokens_seen": 393091952, + "step": 5877 + }, + { + "epoch": 0.6670070921985816, + "grad_norm": 28.012996673583984, + "learning_rate": 5e-05, + "loss": 1.3403, + "num_input_tokens_seen": 393159436, + "step": 5878 + }, + { + "epoch": 0.6670070921985816, + "loss": 1.2750040292739868, + "loss_ce": 0.006937605328857899, + "loss_iou": 0.546875, + "loss_num": 0.03466796875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 393159436, + "step": 5878 + }, + { + "epoch": 0.6671205673758865, + "grad_norm": 25.72968292236328, + "learning_rate": 5e-05, + "loss": 1.0389, + "num_input_tokens_seen": 393225184, + "step": 5879 + }, + { + "epoch": 0.6671205673758865, + "loss": 0.9100556969642639, + "loss_ce": 0.0064912354573607445, + "loss_iou": 0.328125, + "loss_num": 0.0498046875, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 393225184, + "step": 5879 + }, + { + "epoch": 0.6672340425531915, + "grad_norm": 31.69163703918457, + "learning_rate": 5e-05, + "loss": 1.2637, + "num_input_tokens_seen": 393293444, + "step": 5880 + }, + { + "epoch": 0.6672340425531915, + "loss": 1.3230260610580444, + "loss_ce": 0.011014279909431934, + "loss_iou": 0.5703125, + "loss_num": 0.03369140625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 393293444, + "step": 5880 + }, + { + "epoch": 0.6673475177304965, + "grad_norm": 25.63760757446289, + "learning_rate": 5e-05, + "loss": 1.1376, + "num_input_tokens_seen": 393360200, + "step": 5881 + }, + { + "epoch": 0.6673475177304965, + "loss": 1.2413063049316406, + "loss_ce": 0.0054663969203829765, + "loss_iou": 0.52734375, + "loss_num": 0.03662109375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 393360200, + "step": 5881 + }, + { + "epoch": 0.6674609929078014, + "grad_norm": 641.04345703125, + "learning_rate": 5e-05, + "loss": 1.2613, + "num_input_tokens_seen": 393426376, + "step": 5882 + }, + { + "epoch": 0.6674609929078014, + "loss": 1.26021409034729, + "loss_ce": 0.018514912575483322, + "loss_iou": 0.484375, + "loss_num": 0.0546875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 393426376, + "step": 5882 + }, + { + "epoch": 0.6675744680851063, + "grad_norm": 24.03923225402832, + "learning_rate": 5e-05, + "loss": 1.0951, + "num_input_tokens_seen": 393494120, + "step": 5883 + }, + { + "epoch": 0.6675744680851063, + "loss": 1.0537042617797852, + "loss_ce": 0.006341050378978252, + "loss_iou": 0.404296875, + "loss_num": 0.047607421875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 393494120, + "step": 5883 + }, + { + "epoch": 0.6676879432624113, + "grad_norm": 28.697595596313477, + "learning_rate": 5e-05, + "loss": 1.0774, + "num_input_tokens_seen": 393560932, + "step": 5884 + }, + { + "epoch": 0.6676879432624113, + "loss": 0.9520516395568848, + "loss_ce": 0.007959806360304356, + "loss_iou": 0.3515625, + "loss_num": 0.048095703125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 393560932, + "step": 5884 + }, + { + "epoch": 0.6678014184397163, + "grad_norm": 93.03516387939453, + "learning_rate": 5e-05, + "loss": 1.2023, + "num_input_tokens_seen": 393627664, + "step": 5885 + }, + { + "epoch": 0.6678014184397163, + "loss": 1.176023244857788, + "loss_ce": 0.004636507015675306, + "loss_iou": 0.45703125, + "loss_num": 0.05126953125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 393627664, + "step": 5885 + }, + { + "epoch": 0.6679148936170213, + "grad_norm": 26.249032974243164, + "learning_rate": 5e-05, + "loss": 1.1283, + "num_input_tokens_seen": 393695288, + "step": 5886 + }, + { + "epoch": 0.6679148936170213, + "loss": 1.133087158203125, + "loss_ce": 0.009552099741995335, + "loss_iou": 0.4375, + "loss_num": 0.04931640625, + "loss_xval": 1.125, + "num_input_tokens_seen": 393695288, + "step": 5886 + }, + { + "epoch": 0.6680283687943263, + "grad_norm": 21.719093322753906, + "learning_rate": 5e-05, + "loss": 1.2739, + "num_input_tokens_seen": 393761668, + "step": 5887 + }, + { + "epoch": 0.6680283687943263, + "loss": 1.2143882513046265, + "loss_ce": 0.004427344538271427, + "loss_iou": 0.44921875, + "loss_num": 0.0625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 393761668, + "step": 5887 + }, + { + "epoch": 0.6681418439716312, + "grad_norm": 25.020532608032227, + "learning_rate": 5e-05, + "loss": 1.3261, + "num_input_tokens_seen": 393829020, + "step": 5888 + }, + { + "epoch": 0.6681418439716312, + "loss": 1.30755615234375, + "loss_ce": 0.008728023618459702, + "loss_iou": 0.51953125, + "loss_num": 0.0517578125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 393829020, + "step": 5888 + }, + { + "epoch": 0.6682553191489362, + "grad_norm": 33.60303497314453, + "learning_rate": 5e-05, + "loss": 1.1435, + "num_input_tokens_seen": 393895256, + "step": 5889 + }, + { + "epoch": 0.6682553191489362, + "loss": 1.0140609741210938, + "loss_ce": 0.007713336497545242, + "loss_iou": 0.3828125, + "loss_num": 0.0478515625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 393895256, + "step": 5889 + }, + { + "epoch": 0.6683687943262412, + "grad_norm": 41.555782318115234, + "learning_rate": 5e-05, + "loss": 1.1775, + "num_input_tokens_seen": 393961772, + "step": 5890 + }, + { + "epoch": 0.6683687943262412, + "loss": 1.2654162645339966, + "loss_ce": 0.006138941738754511, + "loss_iou": 0.546875, + "loss_num": 0.03271484375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 393961772, + "step": 5890 + }, + { + "epoch": 0.6684822695035461, + "grad_norm": 27.42561149597168, + "learning_rate": 5e-05, + "loss": 1.2073, + "num_input_tokens_seen": 394028892, + "step": 5891 + }, + { + "epoch": 0.6684822695035461, + "loss": 1.0751069784164429, + "loss_ce": 0.00528278574347496, + "loss_iou": 0.46484375, + "loss_num": 0.0274658203125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 394028892, + "step": 5891 + }, + { + "epoch": 0.668595744680851, + "grad_norm": 29.06510353088379, + "learning_rate": 5e-05, + "loss": 1.2853, + "num_input_tokens_seen": 394096112, + "step": 5892 + }, + { + "epoch": 0.668595744680851, + "loss": 1.0906848907470703, + "loss_ce": 0.006700406316667795, + "loss_iou": 0.4765625, + "loss_num": 0.0263671875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 394096112, + "step": 5892 + }, + { + "epoch": 0.668709219858156, + "grad_norm": 28.032760620117188, + "learning_rate": 5e-05, + "loss": 1.3001, + "num_input_tokens_seen": 394162080, + "step": 5893 + }, + { + "epoch": 0.668709219858156, + "loss": 1.3393275737762451, + "loss_ce": 0.007784558460116386, + "loss_iou": 0.515625, + "loss_num": 0.06005859375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 394162080, + "step": 5893 + }, + { + "epoch": 0.668822695035461, + "grad_norm": 29.030536651611328, + "learning_rate": 5e-05, + "loss": 1.2823, + "num_input_tokens_seen": 394228480, + "step": 5894 + }, + { + "epoch": 0.668822695035461, + "loss": 1.1761516332626343, + "loss_ce": 0.008671166375279427, + "loss_iou": 0.4921875, + "loss_num": 0.03662109375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 394228480, + "step": 5894 + }, + { + "epoch": 0.668936170212766, + "grad_norm": 37.03972244262695, + "learning_rate": 5e-05, + "loss": 1.2314, + "num_input_tokens_seen": 394295404, + "step": 5895 + }, + { + "epoch": 0.668936170212766, + "loss": 1.2469550371170044, + "loss_ce": 0.005255859345197678, + "loss_iou": 0.53125, + "loss_num": 0.0361328125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 394295404, + "step": 5895 + }, + { + "epoch": 0.669049645390071, + "grad_norm": 26.657756805419922, + "learning_rate": 5e-05, + "loss": 1.397, + "num_input_tokens_seen": 394362164, + "step": 5896 + }, + { + "epoch": 0.669049645390071, + "loss": 1.4508016109466553, + "loss_ce": 0.005977444350719452, + "loss_iou": 0.54296875, + "loss_num": 0.0712890625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 394362164, + "step": 5896 + }, + { + "epoch": 0.6691631205673759, + "grad_norm": 45.789772033691406, + "learning_rate": 5e-05, + "loss": 1.084, + "num_input_tokens_seen": 394428600, + "step": 5897 + }, + { + "epoch": 0.6691631205673759, + "loss": 1.0192441940307617, + "loss_ce": 0.004595729056745768, + "loss_iou": 0.40234375, + "loss_num": 0.042236328125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 394428600, + "step": 5897 + }, + { + "epoch": 0.6692765957446809, + "grad_norm": 24.84782600402832, + "learning_rate": 5e-05, + "loss": 1.208, + "num_input_tokens_seen": 394495324, + "step": 5898 + }, + { + "epoch": 0.6692765957446809, + "loss": 1.0364494323730469, + "loss_ce": 0.007885050028562546, + "loss_iou": 0.44921875, + "loss_num": 0.0263671875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 394495324, + "step": 5898 + }, + { + "epoch": 0.6693900709219858, + "grad_norm": 53.66986846923828, + "learning_rate": 5e-05, + "loss": 1.1902, + "num_input_tokens_seen": 394562612, + "step": 5899 + }, + { + "epoch": 0.6693900709219858, + "loss": 1.0358589887619019, + "loss_ce": 0.008027004078030586, + "loss_iou": 0.4453125, + "loss_num": 0.02783203125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 394562612, + "step": 5899 + }, + { + "epoch": 0.6695035460992907, + "grad_norm": 31.416584014892578, + "learning_rate": 5e-05, + "loss": 1.376, + "num_input_tokens_seen": 394629300, + "step": 5900 + }, + { + "epoch": 0.6695035460992907, + "loss": 1.3430700302124023, + "loss_ce": 0.005179470404982567, + "loss_iou": 0.57421875, + "loss_num": 0.037353515625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 394629300, + "step": 5900 + }, + { + "epoch": 0.6696170212765957, + "grad_norm": 15.670997619628906, + "learning_rate": 5e-05, + "loss": 1.0669, + "num_input_tokens_seen": 394695768, + "step": 5901 + }, + { + "epoch": 0.6696170212765957, + "loss": 1.008797287940979, + "loss_ce": 0.0065999687649309635, + "loss_iou": 0.376953125, + "loss_num": 0.050048828125, + "loss_xval": 1.0, + "num_input_tokens_seen": 394695768, + "step": 5901 + }, + { + "epoch": 0.6697304964539007, + "grad_norm": 18.341880798339844, + "learning_rate": 5e-05, + "loss": 1.07, + "num_input_tokens_seen": 394761888, + "step": 5902 + }, + { + "epoch": 0.6697304964539007, + "loss": 0.8954035043716431, + "loss_ce": 0.006609536707401276, + "loss_iou": 0.36328125, + "loss_num": 0.032470703125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 394761888, + "step": 5902 + }, + { + "epoch": 0.6698439716312057, + "grad_norm": 59.262630462646484, + "learning_rate": 5e-05, + "loss": 1.1808, + "num_input_tokens_seen": 394828728, + "step": 5903 + }, + { + "epoch": 0.6698439716312057, + "loss": 1.106587529182434, + "loss_ce": 0.009419579990208149, + "loss_iou": 0.45703125, + "loss_num": 0.036376953125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 394828728, + "step": 5903 + }, + { + "epoch": 0.6699574468085107, + "grad_norm": 26.23919677734375, + "learning_rate": 5e-05, + "loss": 1.3541, + "num_input_tokens_seen": 394896224, + "step": 5904 + }, + { + "epoch": 0.6699574468085107, + "loss": 1.5034376382827759, + "loss_ce": 0.006367325317114592, + "loss_iou": 0.56640625, + "loss_num": 0.0732421875, + "loss_xval": 1.5, + "num_input_tokens_seen": 394896224, + "step": 5904 + }, + { + "epoch": 0.6700709219858156, + "grad_norm": 25.854843139648438, + "learning_rate": 5e-05, + "loss": 1.1941, + "num_input_tokens_seen": 394963260, + "step": 5905 + }, + { + "epoch": 0.6700709219858156, + "loss": 1.1707333326339722, + "loss_ce": 0.005205980967730284, + "loss_iou": 0.443359375, + "loss_num": 0.055908203125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 394963260, + "step": 5905 + }, + { + "epoch": 0.6701843971631206, + "grad_norm": 25.17776870727539, + "learning_rate": 5e-05, + "loss": 1.3172, + "num_input_tokens_seen": 395028440, + "step": 5906 + }, + { + "epoch": 0.6701843971631206, + "loss": 1.2367230653762817, + "loss_ce": 0.0045452299527823925, + "loss_iou": 0.515625, + "loss_num": 0.0400390625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 395028440, + "step": 5906 + }, + { + "epoch": 0.6702978723404255, + "grad_norm": 50.536434173583984, + "learning_rate": 5e-05, + "loss": 1.0071, + "num_input_tokens_seen": 395096320, + "step": 5907 + }, + { + "epoch": 0.6702978723404255, + "loss": 1.0158085823059082, + "loss_ce": 0.003357368055731058, + "loss_iou": 0.447265625, + "loss_num": 0.0235595703125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 395096320, + "step": 5907 + }, + { + "epoch": 0.6704113475177305, + "grad_norm": 57.48963928222656, + "learning_rate": 5e-05, + "loss": 1.3182, + "num_input_tokens_seen": 395163120, + "step": 5908 + }, + { + "epoch": 0.6704113475177305, + "loss": 1.2902777194976807, + "loss_ce": 0.006586229428648949, + "loss_iou": 0.53515625, + "loss_num": 0.0419921875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 395163120, + "step": 5908 + }, + { + "epoch": 0.6705248226950354, + "grad_norm": 51.707828521728516, + "learning_rate": 5e-05, + "loss": 1.6027, + "num_input_tokens_seen": 395230028, + "step": 5909 + }, + { + "epoch": 0.6705248226950354, + "loss": 1.4071979522705078, + "loss_ce": 0.005708720069378614, + "loss_iou": 0.53125, + "loss_num": 0.068359375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 395230028, + "step": 5909 + }, + { + "epoch": 0.6706382978723404, + "grad_norm": 35.977535247802734, + "learning_rate": 5e-05, + "loss": 1.2063, + "num_input_tokens_seen": 395297164, + "step": 5910 + }, + { + "epoch": 0.6706382978723404, + "loss": 1.2369637489318848, + "loss_ce": 0.0069832950830459595, + "loss_iou": 0.5078125, + "loss_num": 0.04248046875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 395297164, + "step": 5910 + }, + { + "epoch": 0.6707517730496454, + "grad_norm": 95.26815032958984, + "learning_rate": 5e-05, + "loss": 1.0155, + "num_input_tokens_seen": 395364824, + "step": 5911 + }, + { + "epoch": 0.6707517730496454, + "loss": 0.8417586088180542, + "loss_ce": 0.005088677164167166, + "loss_iou": 0.365234375, + "loss_num": 0.0213623046875, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 395364824, + "step": 5911 + }, + { + "epoch": 0.6708652482269504, + "grad_norm": 24.950693130493164, + "learning_rate": 5e-05, + "loss": 1.1638, + "num_input_tokens_seen": 395431380, + "step": 5912 + }, + { + "epoch": 0.6708652482269504, + "loss": 1.1394658088684082, + "loss_ce": 0.00567679014056921, + "loss_iou": 0.44140625, + "loss_num": 0.0498046875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 395431380, + "step": 5912 + }, + { + "epoch": 0.6709787234042554, + "grad_norm": 42.17686080932617, + "learning_rate": 5e-05, + "loss": 1.2578, + "num_input_tokens_seen": 395498764, + "step": 5913 + }, + { + "epoch": 0.6709787234042554, + "loss": 1.3290064334869385, + "loss_ce": 0.004787641577422619, + "loss_iou": 0.5, + "loss_num": 0.0654296875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 395498764, + "step": 5913 + }, + { + "epoch": 0.6710921985815603, + "grad_norm": 82.23526000976562, + "learning_rate": 5e-05, + "loss": 1.4824, + "num_input_tokens_seen": 395565868, + "step": 5914 + }, + { + "epoch": 0.6710921985815603, + "loss": 1.431535243988037, + "loss_ce": 0.011125150136649609, + "loss_iou": 0.55859375, + "loss_num": 0.060546875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 395565868, + "step": 5914 + }, + { + "epoch": 0.6712056737588652, + "grad_norm": 23.071022033691406, + "learning_rate": 5e-05, + "loss": 1.0745, + "num_input_tokens_seen": 395632948, + "step": 5915 + }, + { + "epoch": 0.6712056737588652, + "loss": 1.074451208114624, + "loss_ce": 0.004138678312301636, + "loss_iou": 0.45703125, + "loss_num": 0.031494140625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 395632948, + "step": 5915 + }, + { + "epoch": 0.6713191489361702, + "grad_norm": 22.163494110107422, + "learning_rate": 5e-05, + "loss": 0.942, + "num_input_tokens_seen": 395699376, + "step": 5916 + }, + { + "epoch": 0.6713191489361702, + "loss": 0.9090235233306885, + "loss_ce": 0.006206718273460865, + "loss_iou": 0.373046875, + "loss_num": 0.031494140625, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 395699376, + "step": 5916 + }, + { + "epoch": 0.6714326241134752, + "grad_norm": 29.750545501708984, + "learning_rate": 5e-05, + "loss": 1.4266, + "num_input_tokens_seen": 395765872, + "step": 5917 + }, + { + "epoch": 0.6714326241134752, + "loss": 1.3693370819091797, + "loss_ce": 0.004590964410454035, + "loss_iou": 0.51171875, + "loss_num": 0.06884765625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 395765872, + "step": 5917 + }, + { + "epoch": 0.6715460992907801, + "grad_norm": 30.570274353027344, + "learning_rate": 5e-05, + "loss": 1.2103, + "num_input_tokens_seen": 395832568, + "step": 5918 + }, + { + "epoch": 0.6715460992907801, + "loss": 1.1297215223312378, + "loss_ce": 0.008139502257108688, + "loss_iou": 0.48046875, + "loss_num": 0.032470703125, + "loss_xval": 1.125, + "num_input_tokens_seen": 395832568, + "step": 5918 + }, + { + "epoch": 0.6716595744680851, + "grad_norm": 32.45125198364258, + "learning_rate": 5e-05, + "loss": 1.2354, + "num_input_tokens_seen": 395899732, + "step": 5919 + }, + { + "epoch": 0.6716595744680851, + "loss": 1.3200232982635498, + "loss_ce": 0.008499879390001297, + "loss_iou": 0.546875, + "loss_num": 0.04345703125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 395899732, + "step": 5919 + }, + { + "epoch": 0.6717730496453901, + "grad_norm": 25.50892448425293, + "learning_rate": 5e-05, + "loss": 1.2926, + "num_input_tokens_seen": 395966320, + "step": 5920 + }, + { + "epoch": 0.6717730496453901, + "loss": 1.428945541381836, + "loss_ce": 0.010488471016287804, + "loss_iou": 0.5703125, + "loss_num": 0.054931640625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 395966320, + "step": 5920 + }, + { + "epoch": 0.6718865248226951, + "grad_norm": 20.17902374267578, + "learning_rate": 5e-05, + "loss": 1.1125, + "num_input_tokens_seen": 396033812, + "step": 5921 + }, + { + "epoch": 0.6718865248226951, + "loss": 1.3614377975463867, + "loss_ce": 0.007922263815999031, + "loss_iou": 0.5078125, + "loss_num": 0.06787109375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 396033812, + "step": 5921 + }, + { + "epoch": 0.672, + "grad_norm": 19.927507400512695, + "learning_rate": 5e-05, + "loss": 1.2187, + "num_input_tokens_seen": 396100684, + "step": 5922 + }, + { + "epoch": 0.672, + "loss": 1.1493854522705078, + "loss_ce": 0.007783900946378708, + "loss_iou": 0.48828125, + "loss_num": 0.032958984375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 396100684, + "step": 5922 + }, + { + "epoch": 0.672113475177305, + "grad_norm": 17.271543502807617, + "learning_rate": 5e-05, + "loss": 1.2501, + "num_input_tokens_seen": 396166124, + "step": 5923 + }, + { + "epoch": 0.672113475177305, + "loss": 1.303847312927246, + "loss_ce": 0.004530993290245533, + "loss_iou": 0.51953125, + "loss_num": 0.052490234375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 396166124, + "step": 5923 + }, + { + "epoch": 0.6722269503546099, + "grad_norm": 20.499862670898438, + "learning_rate": 5e-05, + "loss": 0.9656, + "num_input_tokens_seen": 396232024, + "step": 5924 + }, + { + "epoch": 0.6722269503546099, + "loss": 1.0608915090560913, + "loss_ce": 0.00864543579518795, + "loss_iou": 0.4296875, + "loss_num": 0.038818359375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 396232024, + "step": 5924 + }, + { + "epoch": 0.6723404255319149, + "grad_norm": 36.87358093261719, + "learning_rate": 5e-05, + "loss": 1.2457, + "num_input_tokens_seen": 396299284, + "step": 5925 + }, + { + "epoch": 0.6723404255319149, + "loss": 1.17421555519104, + "loss_ce": 0.006735104136168957, + "loss_iou": 0.44921875, + "loss_num": 0.0537109375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 396299284, + "step": 5925 + }, + { + "epoch": 0.6724539007092198, + "grad_norm": 43.04545593261719, + "learning_rate": 5e-05, + "loss": 1.4325, + "num_input_tokens_seen": 396367300, + "step": 5926 + }, + { + "epoch": 0.6724539007092198, + "loss": 1.4332973957061768, + "loss_ce": 0.009469341486692429, + "loss_iou": 0.58984375, + "loss_num": 0.048828125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 396367300, + "step": 5926 + }, + { + "epoch": 0.6725673758865248, + "grad_norm": 24.580183029174805, + "learning_rate": 5e-05, + "loss": 1.1019, + "num_input_tokens_seen": 396435148, + "step": 5927 + }, + { + "epoch": 0.6725673758865248, + "loss": 1.0712217092514038, + "loss_ce": 0.01165144331753254, + "loss_iou": 0.4375, + "loss_num": 0.036865234375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 396435148, + "step": 5927 + }, + { + "epoch": 0.6726808510638298, + "grad_norm": 21.68295669555664, + "learning_rate": 5e-05, + "loss": 1.3627, + "num_input_tokens_seen": 396502064, + "step": 5928 + }, + { + "epoch": 0.6726808510638298, + "loss": 1.3590638637542725, + "loss_ce": 0.006036469247192144, + "loss_iou": 0.5390625, + "loss_num": 0.0546875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 396502064, + "step": 5928 + }, + { + "epoch": 0.6727943262411348, + "grad_norm": 30.930030822753906, + "learning_rate": 5e-05, + "loss": 1.0908, + "num_input_tokens_seen": 396568988, + "step": 5929 + }, + { + "epoch": 0.6727943262411348, + "loss": 1.0953781604766846, + "loss_ce": 0.006999182514846325, + "loss_iou": 0.40234375, + "loss_num": 0.056884765625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 396568988, + "step": 5929 + }, + { + "epoch": 0.6729078014184398, + "grad_norm": 37.67666244506836, + "learning_rate": 5e-05, + "loss": 1.1471, + "num_input_tokens_seen": 396635444, + "step": 5930 + }, + { + "epoch": 0.6729078014184398, + "loss": 1.2059683799743652, + "loss_ce": 0.006795430555939674, + "loss_iou": 0.48046875, + "loss_num": 0.0478515625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 396635444, + "step": 5930 + }, + { + "epoch": 0.6730212765957447, + "grad_norm": 21.39958381652832, + "learning_rate": 5e-05, + "loss": 0.8937, + "num_input_tokens_seen": 396702276, + "step": 5931 + }, + { + "epoch": 0.6730212765957447, + "loss": 0.8687708973884583, + "loss_ce": 0.00573379173874855, + "loss_iou": 0.361328125, + "loss_num": 0.02783203125, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 396702276, + "step": 5931 + }, + { + "epoch": 0.6731347517730496, + "grad_norm": 30.019634246826172, + "learning_rate": 5e-05, + "loss": 1.3254, + "num_input_tokens_seen": 396769048, + "step": 5932 + }, + { + "epoch": 0.6731347517730496, + "loss": 1.3820191621780396, + "loss_ce": 0.0055542923510074615, + "loss_iou": 0.5390625, + "loss_num": 0.05908203125, + "loss_xval": 1.375, + "num_input_tokens_seen": 396769048, + "step": 5932 + }, + { + "epoch": 0.6732482269503546, + "grad_norm": 35.684173583984375, + "learning_rate": 5e-05, + "loss": 1.2591, + "num_input_tokens_seen": 396834616, + "step": 5933 + }, + { + "epoch": 0.6732482269503546, + "loss": 1.242013931274414, + "loss_ce": 0.006662261672317982, + "loss_iou": 0.515625, + "loss_num": 0.04052734375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 396834616, + "step": 5933 + }, + { + "epoch": 0.6733617021276596, + "grad_norm": 112.89700317382812, + "learning_rate": 5e-05, + "loss": 1.5607, + "num_input_tokens_seen": 396901508, + "step": 5934 + }, + { + "epoch": 0.6733617021276596, + "loss": 1.6416873931884766, + "loss_ce": 0.003015402238816023, + "loss_iou": 0.66015625, + "loss_num": 0.06396484375, + "loss_xval": 1.640625, + "num_input_tokens_seen": 396901508, + "step": 5934 + }, + { + "epoch": 0.6734751773049645, + "grad_norm": 31.43565559387207, + "learning_rate": 5e-05, + "loss": 1.258, + "num_input_tokens_seen": 396968372, + "step": 5935 + }, + { + "epoch": 0.6734751773049645, + "loss": 1.2333049774169922, + "loss_ce": 0.006742421071976423, + "loss_iou": 0.486328125, + "loss_num": 0.050537109375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 396968372, + "step": 5935 + }, + { + "epoch": 0.6735886524822695, + "grad_norm": 40.870487213134766, + "learning_rate": 5e-05, + "loss": 1.108, + "num_input_tokens_seen": 397035796, + "step": 5936 + }, + { + "epoch": 0.6735886524822695, + "loss": 1.208770513534546, + "loss_ce": 0.006133876740932465, + "loss_iou": 0.486328125, + "loss_num": 0.046142578125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 397035796, + "step": 5936 + }, + { + "epoch": 0.6737021276595745, + "grad_norm": 37.4747200012207, + "learning_rate": 5e-05, + "loss": 1.14, + "num_input_tokens_seen": 397103168, + "step": 5937 + }, + { + "epoch": 0.6737021276595745, + "loss": 1.1073293685913086, + "loss_ce": 0.006987609434872866, + "loss_iou": 0.44140625, + "loss_num": 0.04296875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 397103168, + "step": 5937 + }, + { + "epoch": 0.6738156028368795, + "grad_norm": 30.13249397277832, + "learning_rate": 5e-05, + "loss": 1.3685, + "num_input_tokens_seen": 397170712, + "step": 5938 + }, + { + "epoch": 0.6738156028368795, + "loss": 1.444774866104126, + "loss_ce": 0.005810030736029148, + "loss_iou": 0.5703125, + "loss_num": 0.06005859375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 397170712, + "step": 5938 + }, + { + "epoch": 0.6739290780141844, + "grad_norm": 15.50639533996582, + "learning_rate": 5e-05, + "loss": 1.2937, + "num_input_tokens_seen": 397238080, + "step": 5939 + }, + { + "epoch": 0.6739290780141844, + "loss": 1.4101500511169434, + "loss_ce": 0.00682975584641099, + "loss_iou": 0.515625, + "loss_num": 0.07470703125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 397238080, + "step": 5939 + }, + { + "epoch": 0.6740425531914893, + "grad_norm": 13.700655937194824, + "learning_rate": 5e-05, + "loss": 1.2223, + "num_input_tokens_seen": 397304200, + "step": 5940 + }, + { + "epoch": 0.6740425531914893, + "loss": 1.0473568439483643, + "loss_ce": 0.007806036621332169, + "loss_iou": 0.369140625, + "loss_num": 0.060302734375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 397304200, + "step": 5940 + }, + { + "epoch": 0.6741560283687943, + "grad_norm": 18.545225143432617, + "learning_rate": 5e-05, + "loss": 1.233, + "num_input_tokens_seen": 397370596, + "step": 5941 + }, + { + "epoch": 0.6741560283687943, + "loss": 1.1933343410491943, + "loss_ce": 0.008764002472162247, + "loss_iou": 0.458984375, + "loss_num": 0.05322265625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 397370596, + "step": 5941 + }, + { + "epoch": 0.6742695035460993, + "grad_norm": 20.067445755004883, + "learning_rate": 5e-05, + "loss": 1.176, + "num_input_tokens_seen": 397437712, + "step": 5942 + }, + { + "epoch": 0.6742695035460993, + "loss": 1.0100234746932983, + "loss_ce": 0.008314480073750019, + "loss_iou": 0.40234375, + "loss_num": 0.03955078125, + "loss_xval": 1.0, + "num_input_tokens_seen": 397437712, + "step": 5942 + }, + { + "epoch": 0.6743829787234042, + "grad_norm": 50.69752883911133, + "learning_rate": 5e-05, + "loss": 1.2421, + "num_input_tokens_seen": 397504124, + "step": 5943 + }, + { + "epoch": 0.6743829787234042, + "loss": 1.4137072563171387, + "loss_ce": 0.005504177883267403, + "loss_iou": 0.578125, + "loss_num": 0.050048828125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 397504124, + "step": 5943 + }, + { + "epoch": 0.6744964539007092, + "grad_norm": 19.195178985595703, + "learning_rate": 5e-05, + "loss": 1.2178, + "num_input_tokens_seen": 397570808, + "step": 5944 + }, + { + "epoch": 0.6744964539007092, + "loss": 1.0634284019470215, + "loss_ce": 0.005811211187392473, + "loss_iou": 0.408203125, + "loss_num": 0.048095703125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 397570808, + "step": 5944 + }, + { + "epoch": 0.6746099290780142, + "grad_norm": 17.203872680664062, + "learning_rate": 5e-05, + "loss": 1.2846, + "num_input_tokens_seen": 397638304, + "step": 5945 + }, + { + "epoch": 0.6746099290780142, + "loss": 1.3411452770233154, + "loss_ce": 0.004231105092912912, + "loss_iou": 0.546875, + "loss_num": 0.048583984375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 397638304, + "step": 5945 + }, + { + "epoch": 0.6747234042553192, + "grad_norm": 35.19927215576172, + "learning_rate": 5e-05, + "loss": 1.2098, + "num_input_tokens_seen": 397705320, + "step": 5946 + }, + { + "epoch": 0.6747234042553192, + "loss": 1.0217945575714111, + "loss_ce": 0.008610958233475685, + "loss_iou": 0.375, + "loss_num": 0.052734375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 397705320, + "step": 5946 + }, + { + "epoch": 0.6748368794326242, + "grad_norm": 23.41328239440918, + "learning_rate": 5e-05, + "loss": 1.1037, + "num_input_tokens_seen": 397772308, + "step": 5947 + }, + { + "epoch": 0.6748368794326242, + "loss": 1.2578818798065186, + "loss_ce": 0.0069052549079060555, + "loss_iou": 0.5234375, + "loss_num": 0.04052734375, + "loss_xval": 1.25, + "num_input_tokens_seen": 397772308, + "step": 5947 + }, + { + "epoch": 0.674950354609929, + "grad_norm": 20.885786056518555, + "learning_rate": 5e-05, + "loss": 1.3426, + "num_input_tokens_seen": 397839848, + "step": 5948 + }, + { + "epoch": 0.674950354609929, + "loss": 1.4526035785675049, + "loss_ce": 0.005338072311133146, + "loss_iou": 0.5625, + "loss_num": 0.064453125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 397839848, + "step": 5948 + }, + { + "epoch": 0.675063829787234, + "grad_norm": 99.24678039550781, + "learning_rate": 5e-05, + "loss": 1.2731, + "num_input_tokens_seen": 397906040, + "step": 5949 + }, + { + "epoch": 0.675063829787234, + "loss": 1.5291571617126465, + "loss_ce": 0.008649333380162716, + "loss_iou": 0.58203125, + "loss_num": 0.07177734375, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 397906040, + "step": 5949 + }, + { + "epoch": 0.675177304964539, + "grad_norm": 34.39048385620117, + "learning_rate": 5e-05, + "loss": 1.2089, + "num_input_tokens_seen": 397973236, + "step": 5950 + }, + { + "epoch": 0.675177304964539, + "loss": 1.1120319366455078, + "loss_ce": 0.007905865088105202, + "loss_iou": 0.4765625, + "loss_num": 0.0301513671875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 397973236, + "step": 5950 + }, + { + "epoch": 0.675290780141844, + "grad_norm": 33.172237396240234, + "learning_rate": 5e-05, + "loss": 1.426, + "num_input_tokens_seen": 398040788, + "step": 5951 + }, + { + "epoch": 0.675290780141844, + "loss": 1.2058467864990234, + "loss_ce": 0.00613968912512064, + "loss_iou": 0.48828125, + "loss_num": 0.044189453125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 398040788, + "step": 5951 + }, + { + "epoch": 0.6754042553191489, + "grad_norm": 34.998905181884766, + "learning_rate": 5e-05, + "loss": 1.4638, + "num_input_tokens_seen": 398107780, + "step": 5952 + }, + { + "epoch": 0.6754042553191489, + "loss": 1.5249431133270264, + "loss_ce": 0.00882987305521965, + "loss_iou": 0.6015625, + "loss_num": 0.062255859375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 398107780, + "step": 5952 + }, + { + "epoch": 0.6755177304964539, + "grad_norm": 30.018362045288086, + "learning_rate": 5e-05, + "loss": 1.1899, + "num_input_tokens_seen": 398174040, + "step": 5953 + }, + { + "epoch": 0.6755177304964539, + "loss": 1.1788597106933594, + "loss_ce": 0.005519800819456577, + "loss_iou": 0.51171875, + "loss_num": 0.02978515625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 398174040, + "step": 5953 + }, + { + "epoch": 0.6756312056737589, + "grad_norm": 23.149885177612305, + "learning_rate": 5e-05, + "loss": 1.2312, + "num_input_tokens_seen": 398240768, + "step": 5954 + }, + { + "epoch": 0.6756312056737589, + "loss": 1.2618285417556763, + "loss_ce": 0.006945738103240728, + "loss_iou": 0.5, + "loss_num": 0.051025390625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 398240768, + "step": 5954 + }, + { + "epoch": 0.6757446808510639, + "grad_norm": 440.5176086425781, + "learning_rate": 5e-05, + "loss": 1.4094, + "num_input_tokens_seen": 398307824, + "step": 5955 + }, + { + "epoch": 0.6757446808510639, + "loss": 1.2729377746582031, + "loss_ce": 0.004627177957445383, + "loss_iou": 0.51171875, + "loss_num": 0.04833984375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 398307824, + "step": 5955 + }, + { + "epoch": 0.6758581560283687, + "grad_norm": 36.13932418823242, + "learning_rate": 5e-05, + "loss": 1.353, + "num_input_tokens_seen": 398374028, + "step": 5956 + }, + { + "epoch": 0.6758581560283687, + "loss": 1.416368007659912, + "loss_ce": 0.005723500158637762, + "loss_iou": 0.5546875, + "loss_num": 0.060546875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 398374028, + "step": 5956 + }, + { + "epoch": 0.6759716312056737, + "grad_norm": 27.31009292602539, + "learning_rate": 5e-05, + "loss": 1.2515, + "num_input_tokens_seen": 398440532, + "step": 5957 + }, + { + "epoch": 0.6759716312056737, + "loss": 1.239119052886963, + "loss_ce": 0.004499989561736584, + "loss_iou": 0.48828125, + "loss_num": 0.051513671875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 398440532, + "step": 5957 + }, + { + "epoch": 0.6760851063829787, + "grad_norm": 37.850067138671875, + "learning_rate": 5e-05, + "loss": 1.1804, + "num_input_tokens_seen": 398508036, + "step": 5958 + }, + { + "epoch": 0.6760851063829787, + "loss": 1.1727244853973389, + "loss_ce": 0.00670882873237133, + "loss_iou": 0.498046875, + "loss_num": 0.0341796875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 398508036, + "step": 5958 + }, + { + "epoch": 0.6761985815602837, + "grad_norm": 27.0640811920166, + "learning_rate": 5e-05, + "loss": 1.3298, + "num_input_tokens_seen": 398574840, + "step": 5959 + }, + { + "epoch": 0.6761985815602837, + "loss": 1.300616979598999, + "loss_ce": 0.008624805137515068, + "loss_iou": 0.5703125, + "loss_num": 0.03125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 398574840, + "step": 5959 + }, + { + "epoch": 0.6763120567375887, + "grad_norm": 41.34649658203125, + "learning_rate": 5e-05, + "loss": 1.1469, + "num_input_tokens_seen": 398641556, + "step": 5960 + }, + { + "epoch": 0.6763120567375887, + "loss": 1.1512856483459473, + "loss_ce": 0.0062660472467541695, + "loss_iou": 0.484375, + "loss_num": 0.035400390625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 398641556, + "step": 5960 + }, + { + "epoch": 0.6764255319148936, + "grad_norm": 34.034873962402344, + "learning_rate": 5e-05, + "loss": 1.1812, + "num_input_tokens_seen": 398708008, + "step": 5961 + }, + { + "epoch": 0.6764255319148936, + "loss": 1.221198558807373, + "loss_ce": 0.005378230474889278, + "loss_iou": 0.462890625, + "loss_num": 0.058349609375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 398708008, + "step": 5961 + }, + { + "epoch": 0.6765390070921986, + "grad_norm": 31.192554473876953, + "learning_rate": 5e-05, + "loss": 1.4271, + "num_input_tokens_seen": 398774864, + "step": 5962 + }, + { + "epoch": 0.6765390070921986, + "loss": 1.3166568279266357, + "loss_ce": 0.005133401602506638, + "loss_iou": 0.5390625, + "loss_num": 0.046875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 398774864, + "step": 5962 + }, + { + "epoch": 0.6766524822695036, + "grad_norm": 23.6192569732666, + "learning_rate": 5e-05, + "loss": 1.2071, + "num_input_tokens_seen": 398843420, + "step": 5963 + }, + { + "epoch": 0.6766524822695036, + "loss": 1.2134101390838623, + "loss_ce": 0.00344911590218544, + "loss_iou": 0.51171875, + "loss_num": 0.037109375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 398843420, + "step": 5963 + }, + { + "epoch": 0.6767659574468086, + "grad_norm": 23.770160675048828, + "learning_rate": 5e-05, + "loss": 0.981, + "num_input_tokens_seen": 398910516, + "step": 5964 + }, + { + "epoch": 0.6767659574468086, + "loss": 0.8692044019699097, + "loss_ce": 0.009096995927393436, + "loss_iou": 0.39453125, + "loss_num": 0.01458740234375, + "loss_xval": 0.859375, + "num_input_tokens_seen": 398910516, + "step": 5964 + }, + { + "epoch": 0.6768794326241134, + "grad_norm": 30.876235961914062, + "learning_rate": 5e-05, + "loss": 1.3087, + "num_input_tokens_seen": 398977236, + "step": 5965 + }, + { + "epoch": 0.6768794326241134, + "loss": 1.2027428150177002, + "loss_ce": 0.004500505980104208, + "loss_iou": 0.421875, + "loss_num": 0.07080078125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 398977236, + "step": 5965 + }, + { + "epoch": 0.6769929078014184, + "grad_norm": 28.37596893310547, + "learning_rate": 5e-05, + "loss": 1.3339, + "num_input_tokens_seen": 399043828, + "step": 5966 + }, + { + "epoch": 0.6769929078014184, + "loss": 1.270884394645691, + "loss_ce": 0.004771102219820023, + "loss_iou": 0.498046875, + "loss_num": 0.053955078125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 399043828, + "step": 5966 + }, + { + "epoch": 0.6771063829787234, + "grad_norm": 33.79174041748047, + "learning_rate": 5e-05, + "loss": 0.9353, + "num_input_tokens_seen": 399110384, + "step": 5967 + }, + { + "epoch": 0.6771063829787234, + "loss": 1.0328369140625, + "loss_ce": 0.0077209314331412315, + "loss_iou": 0.41796875, + "loss_num": 0.037841796875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 399110384, + "step": 5967 + }, + { + "epoch": 0.6772198581560284, + "grad_norm": 34.04669952392578, + "learning_rate": 5e-05, + "loss": 1.0576, + "num_input_tokens_seen": 399177344, + "step": 5968 + }, + { + "epoch": 0.6772198581560284, + "loss": 1.0045316219329834, + "loss_ce": 0.0048368144780397415, + "loss_iou": 0.43359375, + "loss_num": 0.0263671875, + "loss_xval": 1.0, + "num_input_tokens_seen": 399177344, + "step": 5968 + }, + { + "epoch": 0.6773333333333333, + "grad_norm": 37.89372634887695, + "learning_rate": 5e-05, + "loss": 1.3321, + "num_input_tokens_seen": 399244164, + "step": 5969 + }, + { + "epoch": 0.6773333333333333, + "loss": 1.2993383407592773, + "loss_ce": 0.008322663605213165, + "loss_iou": 0.515625, + "loss_num": 0.052490234375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 399244164, + "step": 5969 + }, + { + "epoch": 0.6774468085106383, + "grad_norm": 38.70955276489258, + "learning_rate": 5e-05, + "loss": 1.5144, + "num_input_tokens_seen": 399310576, + "step": 5970 + }, + { + "epoch": 0.6774468085106383, + "loss": 1.4016296863555908, + "loss_ce": 0.0073426254093647, + "loss_iou": 0.578125, + "loss_num": 0.047119140625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 399310576, + "step": 5970 + }, + { + "epoch": 0.6775602836879433, + "grad_norm": 32.41683578491211, + "learning_rate": 5e-05, + "loss": 1.1508, + "num_input_tokens_seen": 399376808, + "step": 5971 + }, + { + "epoch": 0.6775602836879433, + "loss": 1.0556230545043945, + "loss_ce": 0.005818328820168972, + "loss_iou": 0.447265625, + "loss_num": 0.0308837890625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 399376808, + "step": 5971 + }, + { + "epoch": 0.6776737588652483, + "grad_norm": 33.28594207763672, + "learning_rate": 5e-05, + "loss": 1.382, + "num_input_tokens_seen": 399444468, + "step": 5972 + }, + { + "epoch": 0.6776737588652483, + "loss": 1.2385073900222778, + "loss_ce": 0.005597248673439026, + "loss_iou": 0.50390625, + "loss_num": 0.04541015625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 399444468, + "step": 5972 + }, + { + "epoch": 0.6777872340425531, + "grad_norm": 55.4251594543457, + "learning_rate": 5e-05, + "loss": 1.3375, + "num_input_tokens_seen": 399510604, + "step": 5973 + }, + { + "epoch": 0.6777872340425531, + "loss": 1.3974449634552002, + "loss_ce": 0.008773074485361576, + "loss_iou": 0.515625, + "loss_num": 0.07177734375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 399510604, + "step": 5973 + }, + { + "epoch": 0.6779007092198581, + "grad_norm": 45.87091064453125, + "learning_rate": 5e-05, + "loss": 1.3155, + "num_input_tokens_seen": 399577868, + "step": 5974 + }, + { + "epoch": 0.6779007092198581, + "loss": 1.374859094619751, + "loss_ce": 0.006939167622476816, + "loss_iou": 0.546875, + "loss_num": 0.05419921875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 399577868, + "step": 5974 + }, + { + "epoch": 0.6780141843971631, + "grad_norm": 30.201187133789062, + "learning_rate": 5e-05, + "loss": 1.3373, + "num_input_tokens_seen": 399644968, + "step": 5975 + }, + { + "epoch": 0.6780141843971631, + "loss": 1.3228890895843506, + "loss_ce": 0.008924251422286034, + "loss_iou": 0.5078125, + "loss_num": 0.059814453125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 399644968, + "step": 5975 + }, + { + "epoch": 0.6781276595744681, + "grad_norm": 17.711233139038086, + "learning_rate": 5e-05, + "loss": 1.2307, + "num_input_tokens_seen": 399712472, + "step": 5976 + }, + { + "epoch": 0.6781276595744681, + "loss": 1.179340124130249, + "loss_ce": 0.010135482996702194, + "loss_iou": 0.43359375, + "loss_num": 0.06005859375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 399712472, + "step": 5976 + }, + { + "epoch": 0.6782411347517731, + "grad_norm": 20.986417770385742, + "learning_rate": 5e-05, + "loss": 1.1279, + "num_input_tokens_seen": 399779576, + "step": 5977 + }, + { + "epoch": 0.6782411347517731, + "loss": 1.076406478881836, + "loss_ce": 0.004140838980674744, + "loss_iou": 0.453125, + "loss_num": 0.033447265625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 399779576, + "step": 5977 + }, + { + "epoch": 0.678354609929078, + "grad_norm": 14.862164497375488, + "learning_rate": 5e-05, + "loss": 0.9651, + "num_input_tokens_seen": 399845928, + "step": 5978 + }, + { + "epoch": 0.678354609929078, + "loss": 0.8481579422950745, + "loss_ce": 0.0034313774667680264, + "loss_iou": 0.357421875, + "loss_num": 0.02587890625, + "loss_xval": 0.84375, + "num_input_tokens_seen": 399845928, + "step": 5978 + }, + { + "epoch": 0.678468085106383, + "grad_norm": 26.572216033935547, + "learning_rate": 5e-05, + "loss": 1.0822, + "num_input_tokens_seen": 399912692, + "step": 5979 + }, + { + "epoch": 0.678468085106383, + "loss": 1.126549482345581, + "loss_ce": 0.006920620799064636, + "loss_iou": 0.45703125, + "loss_num": 0.04150390625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 399912692, + "step": 5979 + }, + { + "epoch": 0.678581560283688, + "grad_norm": 67.43982696533203, + "learning_rate": 5e-05, + "loss": 1.0041, + "num_input_tokens_seen": 399978460, + "step": 5980 + }, + { + "epoch": 0.678581560283688, + "loss": 1.0570162534713745, + "loss_ce": 0.010507455095648766, + "loss_iou": 0.4140625, + "loss_num": 0.044189453125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 399978460, + "step": 5980 + }, + { + "epoch": 0.6786950354609929, + "grad_norm": 34.205726623535156, + "learning_rate": 5e-05, + "loss": 1.3286, + "num_input_tokens_seen": 400046476, + "step": 5981 + }, + { + "epoch": 0.6786950354609929, + "loss": 1.3802129030227661, + "loss_ce": 0.007165991701185703, + "loss_iou": 0.578125, + "loss_num": 0.04248046875, + "loss_xval": 1.375, + "num_input_tokens_seen": 400046476, + "step": 5981 + }, + { + "epoch": 0.6788085106382978, + "grad_norm": 29.926095962524414, + "learning_rate": 5e-05, + "loss": 1.2885, + "num_input_tokens_seen": 400114180, + "step": 5982 + }, + { + "epoch": 0.6788085106382978, + "loss": 1.2045960426330566, + "loss_ce": 0.006842153146862984, + "loss_iou": 0.51171875, + "loss_num": 0.0341796875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 400114180, + "step": 5982 + }, + { + "epoch": 0.6789219858156028, + "grad_norm": 22.203052520751953, + "learning_rate": 5e-05, + "loss": 1.4002, + "num_input_tokens_seen": 400179864, + "step": 5983 + }, + { + "epoch": 0.6789219858156028, + "loss": 1.5186150074005127, + "loss_ce": 0.010314276441931725, + "loss_iou": 0.55859375, + "loss_num": 0.078125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 400179864, + "step": 5983 + }, + { + "epoch": 0.6790354609929078, + "grad_norm": 21.06292152404785, + "learning_rate": 5e-05, + "loss": 1.2166, + "num_input_tokens_seen": 400246584, + "step": 5984 + }, + { + "epoch": 0.6790354609929078, + "loss": 1.108864665031433, + "loss_ce": 0.006691832561045885, + "loss_iou": 0.42578125, + "loss_num": 0.050048828125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 400246584, + "step": 5984 + }, + { + "epoch": 0.6791489361702128, + "grad_norm": 83.26758575439453, + "learning_rate": 5e-05, + "loss": 1.2156, + "num_input_tokens_seen": 400312844, + "step": 5985 + }, + { + "epoch": 0.6791489361702128, + "loss": 1.20853590965271, + "loss_ce": 0.0058991326950490475, + "loss_iou": 0.4609375, + "loss_num": 0.05615234375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 400312844, + "step": 5985 + }, + { + "epoch": 0.6792624113475177, + "grad_norm": 37.781211853027344, + "learning_rate": 5e-05, + "loss": 1.3765, + "num_input_tokens_seen": 400379756, + "step": 5986 + }, + { + "epoch": 0.6792624113475177, + "loss": 1.5408258438110352, + "loss_ce": 0.009575811214745045, + "loss_iou": 0.58984375, + "loss_num": 0.0703125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 400379756, + "step": 5986 + }, + { + "epoch": 0.6793758865248227, + "grad_norm": 38.54136657714844, + "learning_rate": 5e-05, + "loss": 1.2925, + "num_input_tokens_seen": 400446020, + "step": 5987 + }, + { + "epoch": 0.6793758865248227, + "loss": 1.313362717628479, + "loss_ce": 0.005257249344140291, + "loss_iou": 0.515625, + "loss_num": 0.055908203125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 400446020, + "step": 5987 + }, + { + "epoch": 0.6794893617021277, + "grad_norm": 33.855926513671875, + "learning_rate": 5e-05, + "loss": 1.3946, + "num_input_tokens_seen": 400512712, + "step": 5988 + }, + { + "epoch": 0.6794893617021277, + "loss": 1.3710256814956665, + "loss_ce": 0.005303007084876299, + "loss_iou": 0.5546875, + "loss_num": 0.05126953125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 400512712, + "step": 5988 + }, + { + "epoch": 0.6796028368794326, + "grad_norm": 25.627492904663086, + "learning_rate": 5e-05, + "loss": 1.2144, + "num_input_tokens_seen": 400580032, + "step": 5989 + }, + { + "epoch": 0.6796028368794326, + "loss": 1.3449938297271729, + "loss_ce": 0.007591515779495239, + "loss_iou": 0.5234375, + "loss_num": 0.05908203125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 400580032, + "step": 5989 + }, + { + "epoch": 0.6797163120567375, + "grad_norm": 28.88280487060547, + "learning_rate": 5e-05, + "loss": 1.1294, + "num_input_tokens_seen": 400646120, + "step": 5990 + }, + { + "epoch": 0.6797163120567375, + "loss": 1.2623608112335205, + "loss_ce": 0.008454550057649612, + "loss_iou": 0.484375, + "loss_num": 0.056640625, + "loss_xval": 1.25, + "num_input_tokens_seen": 400646120, + "step": 5990 + }, + { + "epoch": 0.6798297872340425, + "grad_norm": 30.77800178527832, + "learning_rate": 5e-05, + "loss": 1.123, + "num_input_tokens_seen": 400713284, + "step": 5991 + }, + { + "epoch": 0.6798297872340425, + "loss": 1.0755736827850342, + "loss_ce": 0.007702585309743881, + "loss_iou": 0.455078125, + "loss_num": 0.031494140625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 400713284, + "step": 5991 + }, + { + "epoch": 0.6799432624113475, + "grad_norm": 29.316328048706055, + "learning_rate": 5e-05, + "loss": 1.0703, + "num_input_tokens_seen": 400779908, + "step": 5992 + }, + { + "epoch": 0.6799432624113475, + "loss": 1.0963146686553955, + "loss_ce": 0.007203374989330769, + "loss_iou": 0.46875, + "loss_num": 0.0301513671875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 400779908, + "step": 5992 + }, + { + "epoch": 0.6800567375886525, + "grad_norm": 29.231037139892578, + "learning_rate": 5e-05, + "loss": 1.164, + "num_input_tokens_seen": 400846996, + "step": 5993 + }, + { + "epoch": 0.6800567375886525, + "loss": 1.213376760482788, + "loss_ce": 0.00756620429456234, + "loss_iou": 0.478515625, + "loss_num": 0.0498046875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 400846996, + "step": 5993 + }, + { + "epoch": 0.6801702127659575, + "grad_norm": 21.908937454223633, + "learning_rate": 5e-05, + "loss": 1.1381, + "num_input_tokens_seen": 400913460, + "step": 5994 + }, + { + "epoch": 0.6801702127659575, + "loss": 1.2197763919830322, + "loss_ce": 0.007862278260290623, + "loss_iou": 0.48046875, + "loss_num": 0.05029296875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 400913460, + "step": 5994 + }, + { + "epoch": 0.6802836879432624, + "grad_norm": 28.613903045654297, + "learning_rate": 5e-05, + "loss": 1.2954, + "num_input_tokens_seen": 400980508, + "step": 5995 + }, + { + "epoch": 0.6802836879432624, + "loss": 1.2566261291503906, + "loss_ce": 0.008579201065003872, + "loss_iou": 0.48046875, + "loss_num": 0.05810546875, + "loss_xval": 1.25, + "num_input_tokens_seen": 400980508, + "step": 5995 + }, + { + "epoch": 0.6803971631205674, + "grad_norm": 29.218467712402344, + "learning_rate": 5e-05, + "loss": 1.1675, + "num_input_tokens_seen": 401046532, + "step": 5996 + }, + { + "epoch": 0.6803971631205674, + "loss": 1.0753601789474487, + "loss_ce": 0.006512477062642574, + "loss_iou": 0.40234375, + "loss_num": 0.05322265625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 401046532, + "step": 5996 + }, + { + "epoch": 0.6805106382978724, + "grad_norm": 42.502410888671875, + "learning_rate": 5e-05, + "loss": 1.1522, + "num_input_tokens_seen": 401113768, + "step": 5997 + }, + { + "epoch": 0.6805106382978724, + "loss": 1.2341887950897217, + "loss_ce": 0.010556019842624664, + "loss_iou": 0.47265625, + "loss_num": 0.05517578125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 401113768, + "step": 5997 + }, + { + "epoch": 0.6806241134751773, + "grad_norm": 28.717350006103516, + "learning_rate": 5e-05, + "loss": 1.2188, + "num_input_tokens_seen": 401180868, + "step": 5998 + }, + { + "epoch": 0.6806241134751773, + "loss": 1.2341418266296387, + "loss_ce": 0.006602772511541843, + "loss_iou": 0.5234375, + "loss_num": 0.03515625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 401180868, + "step": 5998 + }, + { + "epoch": 0.6807375886524822, + "grad_norm": 24.076379776000977, + "learning_rate": 5e-05, + "loss": 1.2292, + "num_input_tokens_seen": 401247868, + "step": 5999 + }, + { + "epoch": 0.6807375886524822, + "loss": 1.1652894020080566, + "loss_ce": 0.004644874483346939, + "loss_iou": 0.455078125, + "loss_num": 0.05029296875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 401247868, + "step": 5999 + }, + { + "epoch": 0.6808510638297872, + "grad_norm": 19.60087776184082, + "learning_rate": 5e-05, + "loss": 1.0454, + "num_input_tokens_seen": 401314064, + "step": 6000 + }, + { + "epoch": 0.6808510638297872, + "eval_seeclick_CIoU": 0.42683883011341095, + "eval_seeclick_GIoU": 0.40577566623687744, + "eval_seeclick_IoU": 0.5039343684911728, + "eval_seeclick_MAE_all": 0.15974248945713043, + "eval_seeclick_MAE_h": 0.1131662018597126, + "eval_seeclick_MAE_w": 0.12266816571354866, + "eval_seeclick_MAE_x_boxes": 0.19903521239757538, + "eval_seeclick_MAE_y_boxes": 0.11733388155698776, + "eval_seeclick_NUM_probability": 0.9999554455280304, + "eval_seeclick_inside_bbox": 0.6927083432674408, + "eval_seeclick_loss": 2.43998122215271, + "eval_seeclick_loss_ce": 0.014055794104933739, + "eval_seeclick_loss_iou": 0.83221435546875, + "eval_seeclick_loss_num": 0.1525421142578125, + "eval_seeclick_loss_xval": 2.4263916015625, + "eval_seeclick_runtime": 66.2586, + "eval_seeclick_samples_per_second": 0.709, + "eval_seeclick_steps_per_second": 0.03, + "num_input_tokens_seen": 401314064, + "step": 6000 + }, + { + "epoch": 0.6808510638297872, + "eval_icons_CIoU": 0.5377643406391144, + "eval_icons_GIoU": 0.5250002443790436, + "eval_icons_IoU": 0.577646791934967, + "eval_icons_MAE_all": 0.125640369951725, + "eval_icons_MAE_h": 0.06757982447743416, + "eval_icons_MAE_w": 0.12242797762155533, + "eval_icons_MAE_x_boxes": 0.10722251608967781, + "eval_icons_MAE_y_boxes": 0.08149617910385132, + "eval_icons_NUM_probability": 0.9999567866325378, + "eval_icons_inside_bbox": 0.8072916567325592, + "eval_icons_loss": 2.2976388931274414, + "eval_icons_loss_ce": 0.0003643663840193767, + "eval_icons_loss_iou": 0.836669921875, + "eval_icons_loss_num": 0.1263904571533203, + "eval_icons_loss_xval": 2.3037109375, + "eval_icons_runtime": 68.876, + "eval_icons_samples_per_second": 0.726, + "eval_icons_steps_per_second": 0.029, + "num_input_tokens_seen": 401314064, + "step": 6000 + }, + { + "epoch": 0.6808510638297872, + "eval_screenspot_CIoU": 0.2618577132622401, + "eval_screenspot_GIoU": 0.2301414261261622, + "eval_screenspot_IoU": 0.3671118915081024, + "eval_screenspot_MAE_all": 0.2184499204158783, + "eval_screenspot_MAE_h": 0.14882674564917883, + "eval_screenspot_MAE_w": 0.1669928083817164, + "eval_screenspot_MAE_x_boxes": 0.3154931366443634, + "eval_screenspot_MAE_y_boxes": 0.09773591409126918, + "eval_screenspot_NUM_probability": 0.9998583594957987, + "eval_screenspot_inside_bbox": 0.5704166690508524, + "eval_screenspot_loss": 3.014667272567749, + "eval_screenspot_loss_ce": 0.019778937101364136, + "eval_screenspot_loss_iou": 0.9552408854166666, + "eval_screenspot_loss_num": 0.22898356119791666, + "eval_screenspot_loss_xval": 3.0546875, + "eval_screenspot_runtime": 116.2546, + "eval_screenspot_samples_per_second": 0.766, + "eval_screenspot_steps_per_second": 0.026, + "num_input_tokens_seen": 401314064, + "step": 6000 + }, + { + "epoch": 0.6808510638297872, + "eval_compot_CIoU": 0.23484907299280167, + "eval_compot_GIoU": 0.19301829487085342, + "eval_compot_IoU": 0.33157384395599365, + "eval_compot_MAE_all": 0.23164556175470352, + "eval_compot_MAE_h": 0.11938757076859474, + "eval_compot_MAE_w": 0.2299007996916771, + "eval_compot_MAE_x_boxes": 0.22206997126340866, + "eval_compot_MAE_y_boxes": 0.15177415311336517, + "eval_compot_NUM_probability": 0.9980517029762268, + "eval_compot_inside_bbox": 0.453125, + "eval_compot_loss": 3.126302480697632, + "eval_compot_loss_ce": 0.0059827822260558605, + "eval_compot_loss_iou": 1.00341796875, + "eval_compot_loss_num": 0.229522705078125, + "eval_compot_loss_xval": 3.15185546875, + "eval_compot_runtime": 69.6421, + "eval_compot_samples_per_second": 0.718, + "eval_compot_steps_per_second": 0.029, + "num_input_tokens_seen": 401314064, + "step": 6000 + }, + { + "epoch": 0.6808510638297872, + "loss": 3.139457941055298, + "loss_ce": 0.006645324639976025, + "loss_iou": 1.0078125, + "loss_num": 0.2236328125, + "loss_xval": 3.125, + "num_input_tokens_seen": 401314064, + "step": 6000 + }, + { + "epoch": 0.6809645390070922, + "grad_norm": 28.541101455688477, + "learning_rate": 5e-05, + "loss": 1.2017, + "num_input_tokens_seen": 401380952, + "step": 6001 + }, + { + "epoch": 0.6809645390070922, + "loss": 1.216450572013855, + "loss_ce": 0.011372452601790428, + "loss_iou": 0.466796875, + "loss_num": 0.053955078125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 401380952, + "step": 6001 + }, + { + "epoch": 0.6810780141843972, + "grad_norm": 26.988285064697266, + "learning_rate": 5e-05, + "loss": 1.1608, + "num_input_tokens_seen": 401447628, + "step": 6002 + }, + { + "epoch": 0.6810780141843972, + "loss": 1.4053759574890137, + "loss_ce": 0.006938394624739885, + "loss_iou": 0.5625, + "loss_num": 0.0546875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 401447628, + "step": 6002 + }, + { + "epoch": 0.6811914893617022, + "grad_norm": 25.183265686035156, + "learning_rate": 5e-05, + "loss": 1.095, + "num_input_tokens_seen": 401513880, + "step": 6003 + }, + { + "epoch": 0.6811914893617022, + "loss": 1.1615266799926758, + "loss_ce": 0.004788358695805073, + "loss_iou": 0.46484375, + "loss_num": 0.04541015625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 401513880, + "step": 6003 + }, + { + "epoch": 0.6813049645390071, + "grad_norm": 18.688268661499023, + "learning_rate": 5e-05, + "loss": 1.3334, + "num_input_tokens_seen": 401580664, + "step": 6004 + }, + { + "epoch": 0.6813049645390071, + "loss": 1.2789881229400635, + "loss_ce": 0.007503800559788942, + "loss_iou": 0.53125, + "loss_num": 0.04248046875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 401580664, + "step": 6004 + }, + { + "epoch": 0.6814184397163121, + "grad_norm": 18.43694496154785, + "learning_rate": 5e-05, + "loss": 1.3985, + "num_input_tokens_seen": 401647316, + "step": 6005 + }, + { + "epoch": 0.6814184397163121, + "loss": 1.4018428325653076, + "loss_ce": 0.007067308761179447, + "loss_iou": 0.5234375, + "loss_num": 0.0693359375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 401647316, + "step": 6005 + }, + { + "epoch": 0.681531914893617, + "grad_norm": 33.258888244628906, + "learning_rate": 5e-05, + "loss": 1.257, + "num_input_tokens_seen": 401713956, + "step": 6006 + }, + { + "epoch": 0.681531914893617, + "loss": 1.2613375186920166, + "loss_ce": 0.00596639746800065, + "loss_iou": 0.478515625, + "loss_num": 0.0595703125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 401713956, + "step": 6006 + }, + { + "epoch": 0.681645390070922, + "grad_norm": 27.983699798583984, + "learning_rate": 5e-05, + "loss": 1.3558, + "num_input_tokens_seen": 401781820, + "step": 6007 + }, + { + "epoch": 0.681645390070922, + "loss": 1.453743577003479, + "loss_ce": 0.008919423446059227, + "loss_iou": 0.5546875, + "loss_num": 0.06689453125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 401781820, + "step": 6007 + }, + { + "epoch": 0.6817588652482269, + "grad_norm": 26.95011329650879, + "learning_rate": 5e-05, + "loss": 1.1389, + "num_input_tokens_seen": 401849288, + "step": 6008 + }, + { + "epoch": 0.6817588652482269, + "loss": 1.1834150552749634, + "loss_ce": 0.005192403215914965, + "loss_iou": 0.486328125, + "loss_num": 0.041015625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 401849288, + "step": 6008 + }, + { + "epoch": 0.6818723404255319, + "grad_norm": 37.627464294433594, + "learning_rate": 5e-05, + "loss": 1.0434, + "num_input_tokens_seen": 401915356, + "step": 6009 + }, + { + "epoch": 0.6818723404255319, + "loss": 1.1729097366333008, + "loss_ce": 0.005429270211607218, + "loss_iou": 0.5, + "loss_num": 0.033935546875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 401915356, + "step": 6009 + }, + { + "epoch": 0.6819858156028369, + "grad_norm": 34.2811164855957, + "learning_rate": 5e-05, + "loss": 1.142, + "num_input_tokens_seen": 401982396, + "step": 6010 + }, + { + "epoch": 0.6819858156028369, + "loss": 1.1788418292999268, + "loss_ce": 0.006966784596443176, + "loss_iou": 0.48046875, + "loss_num": 0.042724609375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 401982396, + "step": 6010 + }, + { + "epoch": 0.6820992907801419, + "grad_norm": 34.25148010253906, + "learning_rate": 5e-05, + "loss": 0.9914, + "num_input_tokens_seen": 402051120, + "step": 6011 + }, + { + "epoch": 0.6820992907801419, + "loss": 0.9908192157745361, + "loss_ce": 0.002538006054237485, + "loss_iou": 0.44140625, + "loss_num": 0.020751953125, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 402051120, + "step": 6011 + }, + { + "epoch": 0.6822127659574468, + "grad_norm": 35.199737548828125, + "learning_rate": 5e-05, + "loss": 1.35, + "num_input_tokens_seen": 402117636, + "step": 6012 + }, + { + "epoch": 0.6822127659574468, + "loss": 1.2214603424072266, + "loss_ce": 0.005640129093080759, + "loss_iou": 0.51953125, + "loss_num": 0.03564453125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 402117636, + "step": 6012 + }, + { + "epoch": 0.6823262411347518, + "grad_norm": 29.3485050201416, + "learning_rate": 5e-05, + "loss": 1.0988, + "num_input_tokens_seen": 402184396, + "step": 6013 + }, + { + "epoch": 0.6823262411347518, + "loss": 1.0297374725341797, + "loss_ce": 0.00288194278255105, + "loss_iou": 0.39453125, + "loss_num": 0.047607421875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 402184396, + "step": 6013 + }, + { + "epoch": 0.6824397163120567, + "grad_norm": 29.525264739990234, + "learning_rate": 5e-05, + "loss": 1.3749, + "num_input_tokens_seen": 402251532, + "step": 6014 + }, + { + "epoch": 0.6824397163120567, + "loss": 1.27109694480896, + "loss_ce": 0.012582478113472462, + "loss_iou": 0.51953125, + "loss_num": 0.044677734375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 402251532, + "step": 6014 + }, + { + "epoch": 0.6825531914893617, + "grad_norm": 33.316001892089844, + "learning_rate": 5e-05, + "loss": 1.1203, + "num_input_tokens_seen": 402318656, + "step": 6015 + }, + { + "epoch": 0.6825531914893617, + "loss": 1.0876058340072632, + "loss_ce": 0.0038656299002468586, + "loss_iou": 0.404296875, + "loss_num": 0.055419921875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 402318656, + "step": 6015 + }, + { + "epoch": 0.6826666666666666, + "grad_norm": 34.42886734008789, + "learning_rate": 5e-05, + "loss": 1.3927, + "num_input_tokens_seen": 402385312, + "step": 6016 + }, + { + "epoch": 0.6826666666666666, + "loss": 1.2466111183166504, + "loss_ce": 0.007353275083005428, + "loss_iou": 0.494140625, + "loss_num": 0.050537109375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 402385312, + "step": 6016 + }, + { + "epoch": 0.6827801418439716, + "grad_norm": 31.631074905395508, + "learning_rate": 5e-05, + "loss": 1.1691, + "num_input_tokens_seen": 402453296, + "step": 6017 + }, + { + "epoch": 0.6827801418439716, + "loss": 1.1806222200393677, + "loss_ce": 0.0038644233718514442, + "loss_iou": 0.46484375, + "loss_num": 0.04931640625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 402453296, + "step": 6017 + }, + { + "epoch": 0.6828936170212766, + "grad_norm": 21.524362564086914, + "learning_rate": 5e-05, + "loss": 1.1914, + "num_input_tokens_seen": 402520636, + "step": 6018 + }, + { + "epoch": 0.6828936170212766, + "loss": 1.1596935987472534, + "loss_ce": 0.009302997030317783, + "loss_iou": 0.47265625, + "loss_num": 0.04150390625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 402520636, + "step": 6018 + }, + { + "epoch": 0.6830070921985816, + "grad_norm": 25.984397888183594, + "learning_rate": 5e-05, + "loss": 1.1661, + "num_input_tokens_seen": 402586752, + "step": 6019 + }, + { + "epoch": 0.6830070921985816, + "loss": 1.2358348369598389, + "loss_ce": 0.00976055208593607, + "loss_iou": 0.5234375, + "loss_num": 0.036376953125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 402586752, + "step": 6019 + }, + { + "epoch": 0.6831205673758866, + "grad_norm": 30.952255249023438, + "learning_rate": 5e-05, + "loss": 1.139, + "num_input_tokens_seen": 402653344, + "step": 6020 + }, + { + "epoch": 0.6831205673758866, + "loss": 1.204469919204712, + "loss_ce": 0.005251141265034676, + "loss_iou": 0.4765625, + "loss_num": 0.048583984375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 402653344, + "step": 6020 + }, + { + "epoch": 0.6832340425531915, + "grad_norm": 20.70209312438965, + "learning_rate": 5e-05, + "loss": 1.1632, + "num_input_tokens_seen": 402720676, + "step": 6021 + }, + { + "epoch": 0.6832340425531915, + "loss": 1.2381342649459839, + "loss_ce": 0.005712355952709913, + "loss_iou": 0.5078125, + "loss_num": 0.04345703125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 402720676, + "step": 6021 + }, + { + "epoch": 0.6833475177304964, + "grad_norm": 28.47441864013672, + "learning_rate": 5e-05, + "loss": 1.1938, + "num_input_tokens_seen": 402787932, + "step": 6022 + }, + { + "epoch": 0.6833475177304964, + "loss": 1.127619981765747, + "loss_ce": 0.0075028156861662865, + "loss_iou": 0.4609375, + "loss_num": 0.039794921875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 402787932, + "step": 6022 + }, + { + "epoch": 0.6834609929078014, + "grad_norm": 22.82094955444336, + "learning_rate": 5e-05, + "loss": 1.0209, + "num_input_tokens_seen": 402854240, + "step": 6023 + }, + { + "epoch": 0.6834609929078014, + "loss": 1.1975418329238892, + "loss_ce": 0.008576959371566772, + "loss_iou": 0.462890625, + "loss_num": 0.052490234375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 402854240, + "step": 6023 + }, + { + "epoch": 0.6835744680851064, + "grad_norm": 31.231807708740234, + "learning_rate": 5e-05, + "loss": 1.0637, + "num_input_tokens_seen": 402921020, + "step": 6024 + }, + { + "epoch": 0.6835744680851064, + "loss": 1.1424853801727295, + "loss_ce": 0.005278340540826321, + "loss_iou": 0.478515625, + "loss_num": 0.036376953125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 402921020, + "step": 6024 + }, + { + "epoch": 0.6836879432624113, + "grad_norm": 35.80858612060547, + "learning_rate": 5e-05, + "loss": 1.2203, + "num_input_tokens_seen": 402988108, + "step": 6025 + }, + { + "epoch": 0.6836879432624113, + "loss": 1.2068672180175781, + "loss_ce": 0.008136812597513199, + "loss_iou": 0.50390625, + "loss_num": 0.037841796875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 402988108, + "step": 6025 + }, + { + "epoch": 0.6838014184397163, + "grad_norm": 19.56201934814453, + "learning_rate": 5e-05, + "loss": 1.1554, + "num_input_tokens_seen": 403055680, + "step": 6026 + }, + { + "epoch": 0.6838014184397163, + "loss": 1.15275239944458, + "loss_ce": 0.006756254006177187, + "loss_iou": 0.462890625, + "loss_num": 0.0439453125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 403055680, + "step": 6026 + }, + { + "epoch": 0.6839148936170213, + "grad_norm": 34.77726745605469, + "learning_rate": 5e-05, + "loss": 1.1555, + "num_input_tokens_seen": 403121212, + "step": 6027 + }, + { + "epoch": 0.6839148936170213, + "loss": 0.9344714879989624, + "loss_ce": 0.005516435950994492, + "loss_iou": 0.384765625, + "loss_num": 0.0322265625, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 403121212, + "step": 6027 + }, + { + "epoch": 0.6840283687943263, + "grad_norm": 24.529430389404297, + "learning_rate": 5e-05, + "loss": 1.4338, + "num_input_tokens_seen": 403188700, + "step": 6028 + }, + { + "epoch": 0.6840283687943263, + "loss": 1.4210395812988281, + "loss_ce": 0.008930201642215252, + "loss_iou": 0.60546875, + "loss_num": 0.040771484375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 403188700, + "step": 6028 + }, + { + "epoch": 0.6841418439716312, + "grad_norm": 21.35317039489746, + "learning_rate": 5e-05, + "loss": 1.3052, + "num_input_tokens_seen": 403256300, + "step": 6029 + }, + { + "epoch": 0.6841418439716312, + "loss": 1.4060373306274414, + "loss_ce": 0.00955283734947443, + "loss_iou": 0.546875, + "loss_num": 0.06103515625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 403256300, + "step": 6029 + }, + { + "epoch": 0.6842553191489362, + "grad_norm": 45.10862350463867, + "learning_rate": 5e-05, + "loss": 1.1582, + "num_input_tokens_seen": 403323740, + "step": 6030 + }, + { + "epoch": 0.6842553191489362, + "loss": 1.2059845924377441, + "loss_ce": 0.005301002413034439, + "loss_iou": 0.494140625, + "loss_num": 0.04248046875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 403323740, + "step": 6030 + }, + { + "epoch": 0.6843687943262411, + "grad_norm": 33.087623596191406, + "learning_rate": 5e-05, + "loss": 1.3748, + "num_input_tokens_seen": 403390956, + "step": 6031 + }, + { + "epoch": 0.6843687943262411, + "loss": 1.427328109741211, + "loss_ce": 0.006917964201420546, + "loss_iou": 0.5625, + "loss_num": 0.05859375, + "loss_xval": 1.421875, + "num_input_tokens_seen": 403390956, + "step": 6031 + }, + { + "epoch": 0.6844822695035461, + "grad_norm": 73.75282287597656, + "learning_rate": 5e-05, + "loss": 1.4273, + "num_input_tokens_seen": 403457248, + "step": 6032 + }, + { + "epoch": 0.6844822695035461, + "loss": 1.5297635793685913, + "loss_ce": 0.010232292115688324, + "loss_iou": 0.578125, + "loss_num": 0.07275390625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 403457248, + "step": 6032 + }, + { + "epoch": 0.684595744680851, + "grad_norm": 25.160600662231445, + "learning_rate": 5e-05, + "loss": 1.2872, + "num_input_tokens_seen": 403524548, + "step": 6033 + }, + { + "epoch": 0.684595744680851, + "loss": 1.1929243803024292, + "loss_ce": 0.005424341186881065, + "loss_iou": 0.453125, + "loss_num": 0.056640625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 403524548, + "step": 6033 + }, + { + "epoch": 0.684709219858156, + "grad_norm": 33.845245361328125, + "learning_rate": 5e-05, + "loss": 1.3386, + "num_input_tokens_seen": 403591820, + "step": 6034 + }, + { + "epoch": 0.684709219858156, + "loss": 1.2576605081558228, + "loss_ce": 0.005707353353500366, + "loss_iou": 0.53515625, + "loss_num": 0.03662109375, + "loss_xval": 1.25, + "num_input_tokens_seen": 403591820, + "step": 6034 + }, + { + "epoch": 0.684822695035461, + "grad_norm": 38.064231872558594, + "learning_rate": 5e-05, + "loss": 1.2244, + "num_input_tokens_seen": 403659452, + "step": 6035 + }, + { + "epoch": 0.684822695035461, + "loss": 1.3845337629318237, + "loss_ce": 0.004162694327533245, + "loss_iou": 0.5859375, + "loss_num": 0.04150390625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 403659452, + "step": 6035 + }, + { + "epoch": 0.684936170212766, + "grad_norm": 28.483518600463867, + "learning_rate": 5e-05, + "loss": 1.371, + "num_input_tokens_seen": 403726168, + "step": 6036 + }, + { + "epoch": 0.684936170212766, + "loss": 1.4087717533111572, + "loss_ce": 0.011799116618931293, + "loss_iou": 0.53515625, + "loss_num": 0.06494140625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 403726168, + "step": 6036 + }, + { + "epoch": 0.685049645390071, + "grad_norm": 34.33738708496094, + "learning_rate": 5e-05, + "loss": 1.284, + "num_input_tokens_seen": 403792740, + "step": 6037 + }, + { + "epoch": 0.685049645390071, + "loss": 1.338709831237793, + "loss_ce": 0.006678475067019463, + "loss_iou": 0.4921875, + "loss_num": 0.06982421875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 403792740, + "step": 6037 + }, + { + "epoch": 0.6851631205673759, + "grad_norm": 36.50056838989258, + "learning_rate": 5e-05, + "loss": 1.3899, + "num_input_tokens_seen": 403860512, + "step": 6038 + }, + { + "epoch": 0.6851631205673759, + "loss": 1.2808928489685059, + "loss_ce": 0.008920229971408844, + "loss_iou": 0.498046875, + "loss_num": 0.05517578125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 403860512, + "step": 6038 + }, + { + "epoch": 0.6852765957446808, + "grad_norm": 40.003700256347656, + "learning_rate": 5e-05, + "loss": 1.4398, + "num_input_tokens_seen": 403927652, + "step": 6039 + }, + { + "epoch": 0.6852765957446808, + "loss": 1.1951178312301636, + "loss_ce": 0.008594388142228127, + "loss_iou": 0.470703125, + "loss_num": 0.049072265625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 403927652, + "step": 6039 + }, + { + "epoch": 0.6853900709219858, + "grad_norm": 30.37912368774414, + "learning_rate": 5e-05, + "loss": 1.2312, + "num_input_tokens_seen": 403994400, + "step": 6040 + }, + { + "epoch": 0.6853900709219858, + "loss": 1.2657945156097412, + "loss_ce": 0.008470230735838413, + "loss_iou": 0.53515625, + "loss_num": 0.03759765625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 403994400, + "step": 6040 + }, + { + "epoch": 0.6855035460992908, + "grad_norm": 34.87519073486328, + "learning_rate": 5e-05, + "loss": 1.2828, + "num_input_tokens_seen": 404060964, + "step": 6041 + }, + { + "epoch": 0.6855035460992908, + "loss": 1.3357419967651367, + "loss_ce": 0.010058445855975151, + "loss_iou": 0.5, + "loss_num": 0.0654296875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 404060964, + "step": 6041 + }, + { + "epoch": 0.6856170212765957, + "grad_norm": 30.828662872314453, + "learning_rate": 5e-05, + "loss": 1.2112, + "num_input_tokens_seen": 404127928, + "step": 6042 + }, + { + "epoch": 0.6856170212765957, + "loss": 1.1207897663116455, + "loss_ce": 0.004578795284032822, + "loss_iou": 0.431640625, + "loss_num": 0.05029296875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 404127928, + "step": 6042 + }, + { + "epoch": 0.6857304964539007, + "grad_norm": 108.6653823852539, + "learning_rate": 5e-05, + "loss": 1.2524, + "num_input_tokens_seen": 404195044, + "step": 6043 + }, + { + "epoch": 0.6857304964539007, + "loss": 1.2746634483337402, + "loss_ce": 0.005620487034320831, + "loss_iou": 0.5, + "loss_num": 0.052978515625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 404195044, + "step": 6043 + }, + { + "epoch": 0.6858439716312057, + "grad_norm": 24.605863571166992, + "learning_rate": 5e-05, + "loss": 1.4011, + "num_input_tokens_seen": 404261716, + "step": 6044 + }, + { + "epoch": 0.6858439716312057, + "loss": 1.3034420013427734, + "loss_ce": 0.005102197639644146, + "loss_iou": 0.53125, + "loss_num": 0.047607421875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 404261716, + "step": 6044 + }, + { + "epoch": 0.6859574468085107, + "grad_norm": 46.75754165649414, + "learning_rate": 5e-05, + "loss": 1.1826, + "num_input_tokens_seen": 404329736, + "step": 6045 + }, + { + "epoch": 0.6859574468085107, + "loss": 1.175323486328125, + "loss_ce": 0.006866407580673695, + "loss_iou": 0.4921875, + "loss_num": 0.036865234375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 404329736, + "step": 6045 + }, + { + "epoch": 0.6860709219858157, + "grad_norm": 23.122705459594727, + "learning_rate": 5e-05, + "loss": 1.1915, + "num_input_tokens_seen": 404396852, + "step": 6046 + }, + { + "epoch": 0.6860709219858157, + "loss": 1.1893956661224365, + "loss_ce": 0.009708178229629993, + "loss_iou": 0.478515625, + "loss_num": 0.044677734375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 404396852, + "step": 6046 + }, + { + "epoch": 0.6861843971631205, + "grad_norm": 21.092851638793945, + "learning_rate": 5e-05, + "loss": 1.1421, + "num_input_tokens_seen": 404464032, + "step": 6047 + }, + { + "epoch": 0.6861843971631205, + "loss": 1.2688078880310059, + "loss_ce": 0.006600831635296345, + "loss_iou": 0.51953125, + "loss_num": 0.044677734375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 404464032, + "step": 6047 + }, + { + "epoch": 0.6862978723404255, + "grad_norm": 45.60337448120117, + "learning_rate": 5e-05, + "loss": 1.2217, + "num_input_tokens_seen": 404531484, + "step": 6048 + }, + { + "epoch": 0.6862978723404255, + "loss": 1.2743141651153564, + "loss_ce": 0.008200903423130512, + "loss_iou": 0.51171875, + "loss_num": 0.04833984375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 404531484, + "step": 6048 + }, + { + "epoch": 0.6864113475177305, + "grad_norm": 44.40684127807617, + "learning_rate": 5e-05, + "loss": 1.5278, + "num_input_tokens_seen": 404598856, + "step": 6049 + }, + { + "epoch": 0.6864113475177305, + "loss": 1.6807184219360352, + "loss_ce": 0.004937164019793272, + "loss_iou": 0.625, + "loss_num": 0.08447265625, + "loss_xval": 1.671875, + "num_input_tokens_seen": 404598856, + "step": 6049 + }, + { + "epoch": 0.6865248226950355, + "grad_norm": 38.95964431762695, + "learning_rate": 5e-05, + "loss": 1.2384, + "num_input_tokens_seen": 404666404, + "step": 6050 + }, + { + "epoch": 0.6865248226950355, + "loss": 1.3324311971664429, + "loss_ce": 0.003329688683152199, + "loss_iou": 0.5390625, + "loss_num": 0.049560546875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 404666404, + "step": 6050 + }, + { + "epoch": 0.6866382978723404, + "grad_norm": 57.308189392089844, + "learning_rate": 5e-05, + "loss": 1.441, + "num_input_tokens_seen": 404732336, + "step": 6051 + }, + { + "epoch": 0.6866382978723404, + "loss": 1.689252257347107, + "loss_ce": 0.005658503156155348, + "loss_iou": 0.66796875, + "loss_num": 0.0693359375, + "loss_xval": 1.6875, + "num_input_tokens_seen": 404732336, + "step": 6051 + }, + { + "epoch": 0.6867517730496454, + "grad_norm": 19.88216781616211, + "learning_rate": 5e-05, + "loss": 1.0065, + "num_input_tokens_seen": 404799400, + "step": 6052 + }, + { + "epoch": 0.6867517730496454, + "loss": 0.9790279865264893, + "loss_ce": 0.004418591037392616, + "loss_iou": 0.408203125, + "loss_num": 0.0311279296875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 404799400, + "step": 6052 + }, + { + "epoch": 0.6868652482269504, + "grad_norm": 65.33670043945312, + "learning_rate": 5e-05, + "loss": 0.9656, + "num_input_tokens_seen": 404865620, + "step": 6053 + }, + { + "epoch": 0.6868652482269504, + "loss": 0.9459885954856873, + "loss_ce": 0.007023757789283991, + "loss_iou": 0.3828125, + "loss_num": 0.03466796875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 404865620, + "step": 6053 + }, + { + "epoch": 0.6869787234042554, + "grad_norm": 38.240516662597656, + "learning_rate": 5e-05, + "loss": 1.2286, + "num_input_tokens_seen": 404932372, + "step": 6054 + }, + { + "epoch": 0.6869787234042554, + "loss": 1.1050574779510498, + "loss_ce": 0.00398331880569458, + "loss_iou": 0.46875, + "loss_num": 0.032470703125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 404932372, + "step": 6054 + }, + { + "epoch": 0.6870921985815602, + "grad_norm": 27.38377571105957, + "learning_rate": 5e-05, + "loss": 1.1952, + "num_input_tokens_seen": 404998888, + "step": 6055 + }, + { + "epoch": 0.6870921985815602, + "loss": 1.0771883726119995, + "loss_ce": 0.004434417467564344, + "loss_iou": 0.4609375, + "loss_num": 0.030029296875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 404998888, + "step": 6055 + }, + { + "epoch": 0.6872056737588652, + "grad_norm": 27.08137321472168, + "learning_rate": 5e-05, + "loss": 1.0218, + "num_input_tokens_seen": 405066176, + "step": 6056 + }, + { + "epoch": 0.6872056737588652, + "loss": 0.9992029666900635, + "loss_ce": 0.005550582893192768, + "loss_iou": 0.4375, + "loss_num": 0.0233154296875, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 405066176, + "step": 6056 + }, + { + "epoch": 0.6873191489361702, + "grad_norm": 27.64621353149414, + "learning_rate": 5e-05, + "loss": 1.23, + "num_input_tokens_seen": 405133812, + "step": 6057 + }, + { + "epoch": 0.6873191489361702, + "loss": 1.1724727153778076, + "loss_ce": 0.008410299196839333, + "loss_iou": 0.484375, + "loss_num": 0.039306640625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 405133812, + "step": 6057 + }, + { + "epoch": 0.6874326241134752, + "grad_norm": 22.456768035888672, + "learning_rate": 5e-05, + "loss": 1.2396, + "num_input_tokens_seen": 405200512, + "step": 6058 + }, + { + "epoch": 0.6874326241134752, + "loss": 1.3668473958969116, + "loss_ce": 0.005275102332234383, + "loss_iou": 0.5390625, + "loss_num": 0.056640625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 405200512, + "step": 6058 + }, + { + "epoch": 0.6875460992907801, + "grad_norm": 22.137685775756836, + "learning_rate": 5e-05, + "loss": 1.1869, + "num_input_tokens_seen": 405267668, + "step": 6059 + }, + { + "epoch": 0.6875460992907801, + "loss": 1.208387851715088, + "loss_ce": 0.005262817721813917, + "loss_iou": 0.46875, + "loss_num": 0.052978515625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 405267668, + "step": 6059 + }, + { + "epoch": 0.6876595744680851, + "grad_norm": 19.25117301940918, + "learning_rate": 5e-05, + "loss": 1.2236, + "num_input_tokens_seen": 405334324, + "step": 6060 + }, + { + "epoch": 0.6876595744680851, + "loss": 1.1810667514801025, + "loss_ce": 0.00626207934692502, + "loss_iou": 0.484375, + "loss_num": 0.040771484375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 405334324, + "step": 6060 + }, + { + "epoch": 0.6877730496453901, + "grad_norm": 24.176475524902344, + "learning_rate": 5e-05, + "loss": 1.1777, + "num_input_tokens_seen": 405399800, + "step": 6061 + }, + { + "epoch": 0.6877730496453901, + "loss": 1.319348931312561, + "loss_ce": 0.00782548077404499, + "loss_iou": 0.474609375, + "loss_num": 0.072265625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 405399800, + "step": 6061 + }, + { + "epoch": 0.6878865248226951, + "grad_norm": 28.166555404663086, + "learning_rate": 5e-05, + "loss": 1.2924, + "num_input_tokens_seen": 405466536, + "step": 6062 + }, + { + "epoch": 0.6878865248226951, + "loss": 1.3301931619644165, + "loss_ce": 0.006950942799448967, + "loss_iou": 0.5390625, + "loss_num": 0.04931640625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 405466536, + "step": 6062 + }, + { + "epoch": 0.688, + "grad_norm": 32.377708435058594, + "learning_rate": 5e-05, + "loss": 1.3211, + "num_input_tokens_seen": 405532560, + "step": 6063 + }, + { + "epoch": 0.688, + "loss": 1.3078007698059082, + "loss_ce": 0.006531314458698034, + "loss_iou": 0.48046875, + "loss_num": 0.06787109375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 405532560, + "step": 6063 + }, + { + "epoch": 0.6881134751773049, + "grad_norm": 35.9614143371582, + "learning_rate": 5e-05, + "loss": 1.2726, + "num_input_tokens_seen": 405599076, + "step": 6064 + }, + { + "epoch": 0.6881134751773049, + "loss": 1.501579761505127, + "loss_ce": 0.004997685085982084, + "loss_iou": 0.65234375, + "loss_num": 0.039306640625, + "loss_xval": 1.5, + "num_input_tokens_seen": 405599076, + "step": 6064 + }, + { + "epoch": 0.6882269503546099, + "grad_norm": 15.544755935668945, + "learning_rate": 5e-05, + "loss": 1.1457, + "num_input_tokens_seen": 405666844, + "step": 6065 + }, + { + "epoch": 0.6882269503546099, + "loss": 1.1528838872909546, + "loss_ce": 0.006887783296406269, + "loss_iou": 0.439453125, + "loss_num": 0.05322265625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 405666844, + "step": 6065 + }, + { + "epoch": 0.6883404255319149, + "grad_norm": 20.853321075439453, + "learning_rate": 5e-05, + "loss": 1.1326, + "num_input_tokens_seen": 405733484, + "step": 6066 + }, + { + "epoch": 0.6883404255319149, + "loss": 0.9938380718231201, + "loss_ce": 0.005556849762797356, + "loss_iou": 0.40234375, + "loss_num": 0.036376953125, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 405733484, + "step": 6066 + }, + { + "epoch": 0.6884539007092199, + "grad_norm": 45.76494216918945, + "learning_rate": 5e-05, + "loss": 1.2019, + "num_input_tokens_seen": 405800240, + "step": 6067 + }, + { + "epoch": 0.6884539007092199, + "loss": 1.1007366180419922, + "loss_ce": 0.006254209205508232, + "loss_iou": 0.41796875, + "loss_num": 0.05126953125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 405800240, + "step": 6067 + }, + { + "epoch": 0.6885673758865248, + "grad_norm": 45.06931686401367, + "learning_rate": 5e-05, + "loss": 1.3217, + "num_input_tokens_seen": 405867128, + "step": 6068 + }, + { + "epoch": 0.6885673758865248, + "loss": 1.319455862045288, + "loss_ce": 0.002072976902127266, + "loss_iou": 0.52734375, + "loss_num": 0.052978515625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 405867128, + "step": 6068 + }, + { + "epoch": 0.6886808510638298, + "grad_norm": 34.526878356933594, + "learning_rate": 5e-05, + "loss": 1.4147, + "num_input_tokens_seen": 405934668, + "step": 6069 + }, + { + "epoch": 0.6886808510638298, + "loss": 1.4839589595794678, + "loss_ce": 0.007396395318210125, + "loss_iou": 0.6171875, + "loss_num": 0.048095703125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 405934668, + "step": 6069 + }, + { + "epoch": 0.6887943262411348, + "grad_norm": 33.564334869384766, + "learning_rate": 5e-05, + "loss": 1.3387, + "num_input_tokens_seen": 406001180, + "step": 6070 + }, + { + "epoch": 0.6887943262411348, + "loss": 1.3082911968231201, + "loss_ce": 0.00262707588262856, + "loss_iou": 0.51171875, + "loss_num": 0.05712890625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 406001180, + "step": 6070 + }, + { + "epoch": 0.6889078014184398, + "grad_norm": 21.94266128540039, + "learning_rate": 5e-05, + "loss": 1.0995, + "num_input_tokens_seen": 406067888, + "step": 6071 + }, + { + "epoch": 0.6889078014184398, + "loss": 0.9927325248718262, + "loss_ce": 0.004451304208487272, + "loss_iou": 0.408203125, + "loss_num": 0.03466796875, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 406067888, + "step": 6071 + }, + { + "epoch": 0.6890212765957446, + "grad_norm": 33.66145706176758, + "learning_rate": 5e-05, + "loss": 1.2259, + "num_input_tokens_seen": 406134668, + "step": 6072 + }, + { + "epoch": 0.6890212765957446, + "loss": 1.2768261432647705, + "loss_ce": 0.0033885648008435965, + "loss_iou": 0.4921875, + "loss_num": 0.05810546875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 406134668, + "step": 6072 + }, + { + "epoch": 0.6891347517730496, + "grad_norm": 18.73426628112793, + "learning_rate": 5e-05, + "loss": 1.3091, + "num_input_tokens_seen": 406201236, + "step": 6073 + }, + { + "epoch": 0.6891347517730496, + "loss": 1.316968560218811, + "loss_ce": 0.0044686272740364075, + "loss_iou": 0.478515625, + "loss_num": 0.0712890625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 406201236, + "step": 6073 + }, + { + "epoch": 0.6892482269503546, + "grad_norm": 34.148189544677734, + "learning_rate": 5e-05, + "loss": 1.2672, + "num_input_tokens_seen": 406268104, + "step": 6074 + }, + { + "epoch": 0.6892482269503546, + "loss": 1.4408091306686401, + "loss_ce": 0.005262257996946573, + "loss_iou": 0.5625, + "loss_num": 0.0625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 406268104, + "step": 6074 + }, + { + "epoch": 0.6893617021276596, + "grad_norm": 27.34520149230957, + "learning_rate": 5e-05, + "loss": 1.1967, + "num_input_tokens_seen": 406334784, + "step": 6075 + }, + { + "epoch": 0.6893617021276596, + "loss": 1.2638099193572998, + "loss_ce": 0.007462306413799524, + "loss_iou": 0.52734375, + "loss_num": 0.040771484375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 406334784, + "step": 6075 + }, + { + "epoch": 0.6894751773049645, + "grad_norm": 23.685827255249023, + "learning_rate": 5e-05, + "loss": 1.2738, + "num_input_tokens_seen": 406401956, + "step": 6076 + }, + { + "epoch": 0.6894751773049645, + "loss": 1.1281766891479492, + "loss_ce": 0.0075712185353040695, + "loss_iou": 0.46484375, + "loss_num": 0.03759765625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 406401956, + "step": 6076 + }, + { + "epoch": 0.6895886524822695, + "grad_norm": 25.902416229248047, + "learning_rate": 5e-05, + "loss": 1.2364, + "num_input_tokens_seen": 406468788, + "step": 6077 + }, + { + "epoch": 0.6895886524822695, + "loss": 1.1533656120300293, + "loss_ce": 0.0034632310271263123, + "loss_iou": 0.470703125, + "loss_num": 0.0419921875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 406468788, + "step": 6077 + }, + { + "epoch": 0.6897021276595745, + "grad_norm": 22.905635833740234, + "learning_rate": 5e-05, + "loss": 1.1347, + "num_input_tokens_seen": 406536560, + "step": 6078 + }, + { + "epoch": 0.6897021276595745, + "loss": 1.0635600090026855, + "loss_ce": 0.00789596326649189, + "loss_iou": 0.453125, + "loss_num": 0.029541015625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 406536560, + "step": 6078 + }, + { + "epoch": 0.6898156028368795, + "grad_norm": 24.168134689331055, + "learning_rate": 5e-05, + "loss": 1.0386, + "num_input_tokens_seen": 406602696, + "step": 6079 + }, + { + "epoch": 0.6898156028368795, + "loss": 1.331556797027588, + "loss_ce": 0.010267806239426136, + "loss_iou": 0.5234375, + "loss_num": 0.0556640625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 406602696, + "step": 6079 + }, + { + "epoch": 0.6899290780141843, + "grad_norm": 38.37074661254883, + "learning_rate": 5e-05, + "loss": 1.1273, + "num_input_tokens_seen": 406669716, + "step": 6080 + }, + { + "epoch": 0.6899290780141843, + "loss": 1.2607477903366089, + "loss_ce": 0.00928301177918911, + "loss_iou": 0.5, + "loss_num": 0.0498046875, + "loss_xval": 1.25, + "num_input_tokens_seen": 406669716, + "step": 6080 + }, + { + "epoch": 0.6900425531914893, + "grad_norm": 51.31241989135742, + "learning_rate": 5e-05, + "loss": 1.1839, + "num_input_tokens_seen": 406737140, + "step": 6081 + }, + { + "epoch": 0.6900425531914893, + "loss": 1.1070821285247803, + "loss_ce": 0.006496158894151449, + "loss_iou": 0.482421875, + "loss_num": 0.0269775390625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 406737140, + "step": 6081 + }, + { + "epoch": 0.6901560283687943, + "grad_norm": 38.77510070800781, + "learning_rate": 5e-05, + "loss": 1.2636, + "num_input_tokens_seen": 406803052, + "step": 6082 + }, + { + "epoch": 0.6901560283687943, + "loss": 1.3467942476272583, + "loss_ce": 0.004997438285499811, + "loss_iou": 0.5078125, + "loss_num": 0.0654296875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 406803052, + "step": 6082 + }, + { + "epoch": 0.6902695035460993, + "grad_norm": 25.11055564880371, + "learning_rate": 5e-05, + "loss": 1.2032, + "num_input_tokens_seen": 406870060, + "step": 6083 + }, + { + "epoch": 0.6902695035460993, + "loss": 1.2375504970550537, + "loss_ce": 0.005616829730570316, + "loss_iou": 0.486328125, + "loss_num": 0.051513671875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 406870060, + "step": 6083 + }, + { + "epoch": 0.6903829787234043, + "grad_norm": 98.98442840576172, + "learning_rate": 5e-05, + "loss": 1.1957, + "num_input_tokens_seen": 406937312, + "step": 6084 + }, + { + "epoch": 0.6903829787234043, + "loss": 1.2963190078735352, + "loss_ce": 0.003838593838736415, + "loss_iou": 0.46875, + "loss_num": 0.07080078125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 406937312, + "step": 6084 + }, + { + "epoch": 0.6904964539007092, + "grad_norm": 17.91457176208496, + "learning_rate": 5e-05, + "loss": 1.1138, + "num_input_tokens_seen": 407003624, + "step": 6085 + }, + { + "epoch": 0.6904964539007092, + "loss": 1.2556238174438477, + "loss_ce": 0.009530122391879559, + "loss_iou": 0.494140625, + "loss_num": 0.05126953125, + "loss_xval": 1.25, + "num_input_tokens_seen": 407003624, + "step": 6085 + }, + { + "epoch": 0.6906099290780142, + "grad_norm": 19.390737533569336, + "learning_rate": 5e-05, + "loss": 0.947, + "num_input_tokens_seen": 407070692, + "step": 6086 + }, + { + "epoch": 0.6906099290780142, + "loss": 0.8942593336105347, + "loss_ce": 0.007464288733899593, + "loss_iou": 0.357421875, + "loss_num": 0.03466796875, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 407070692, + "step": 6086 + }, + { + "epoch": 0.6907234042553192, + "grad_norm": 33.399436950683594, + "learning_rate": 5e-05, + "loss": 1.3459, + "num_input_tokens_seen": 407137188, + "step": 6087 + }, + { + "epoch": 0.6907234042553192, + "loss": 1.312006950378418, + "loss_ce": 0.00683104433119297, + "loss_iou": 0.4765625, + "loss_num": 0.0703125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 407137188, + "step": 6087 + }, + { + "epoch": 0.6908368794326241, + "grad_norm": 32.466835021972656, + "learning_rate": 5e-05, + "loss": 1.266, + "num_input_tokens_seen": 407203864, + "step": 6088 + }, + { + "epoch": 0.6908368794326241, + "loss": 1.3461040258407593, + "loss_ce": 0.006260299123823643, + "loss_iou": 0.58203125, + "loss_num": 0.034423828125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 407203864, + "step": 6088 + }, + { + "epoch": 0.690950354609929, + "grad_norm": 30.701162338256836, + "learning_rate": 5e-05, + "loss": 1.1132, + "num_input_tokens_seen": 407271488, + "step": 6089 + }, + { + "epoch": 0.690950354609929, + "loss": 1.0800046920776367, + "loss_ce": 0.0033444929867982864, + "loss_iou": 0.455078125, + "loss_num": 0.03369140625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 407271488, + "step": 6089 + }, + { + "epoch": 0.691063829787234, + "grad_norm": 21.661540985107422, + "learning_rate": 5e-05, + "loss": 1.152, + "num_input_tokens_seen": 407338856, + "step": 6090 + }, + { + "epoch": 0.691063829787234, + "loss": 1.181541919708252, + "loss_ce": 0.006248996127396822, + "loss_iou": 0.51171875, + "loss_num": 0.0303955078125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 407338856, + "step": 6090 + }, + { + "epoch": 0.691177304964539, + "grad_norm": 31.833066940307617, + "learning_rate": 5e-05, + "loss": 1.1668, + "num_input_tokens_seen": 407406204, + "step": 6091 + }, + { + "epoch": 0.691177304964539, + "loss": 1.1553869247436523, + "loss_ce": 0.006949461530894041, + "loss_iou": 0.47265625, + "loss_num": 0.04052734375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 407406204, + "step": 6091 + }, + { + "epoch": 0.691290780141844, + "grad_norm": 22.252065658569336, + "learning_rate": 5e-05, + "loss": 1.2797, + "num_input_tokens_seen": 407473776, + "step": 6092 + }, + { + "epoch": 0.691290780141844, + "loss": 1.4074702262878418, + "loss_ce": 0.007079609669744968, + "loss_iou": 0.5625, + "loss_num": 0.054931640625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 407473776, + "step": 6092 + }, + { + "epoch": 0.691404255319149, + "grad_norm": 33.563533782958984, + "learning_rate": 5e-05, + "loss": 1.2104, + "num_input_tokens_seen": 407541284, + "step": 6093 + }, + { + "epoch": 0.691404255319149, + "loss": 1.281713604927063, + "loss_ce": 0.007787794340401888, + "loss_iou": 0.52734375, + "loss_num": 0.04345703125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 407541284, + "step": 6093 + }, + { + "epoch": 0.6915177304964539, + "grad_norm": 84.31416320800781, + "learning_rate": 5e-05, + "loss": 1.1828, + "num_input_tokens_seen": 407608572, + "step": 6094 + }, + { + "epoch": 0.6915177304964539, + "loss": 1.3443071842193604, + "loss_ce": 0.00788150355219841, + "loss_iou": 0.54296875, + "loss_num": 0.050537109375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 407608572, + "step": 6094 + }, + { + "epoch": 0.6916312056737589, + "grad_norm": 36.35014724731445, + "learning_rate": 5e-05, + "loss": 1.0971, + "num_input_tokens_seen": 407675400, + "step": 6095 + }, + { + "epoch": 0.6916312056737589, + "loss": 1.1144543886184692, + "loss_ce": 0.006055944599211216, + "loss_iou": 0.4375, + "loss_num": 0.046630859375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 407675400, + "step": 6095 + }, + { + "epoch": 0.6917446808510638, + "grad_norm": 37.990875244140625, + "learning_rate": 5e-05, + "loss": 1.259, + "num_input_tokens_seen": 407741868, + "step": 6096 + }, + { + "epoch": 0.6917446808510638, + "loss": 1.1157170534133911, + "loss_ce": 0.0021916646510362625, + "loss_iou": 0.47265625, + "loss_num": 0.033935546875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 407741868, + "step": 6096 + }, + { + "epoch": 0.6918581560283688, + "grad_norm": 31.949031829833984, + "learning_rate": 5e-05, + "loss": 1.2773, + "num_input_tokens_seen": 407809116, + "step": 6097 + }, + { + "epoch": 0.6918581560283688, + "loss": 1.4021146297454834, + "loss_ce": 0.005630208179354668, + "loss_iou": 0.5625, + "loss_num": 0.053955078125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 407809116, + "step": 6097 + }, + { + "epoch": 0.6919716312056737, + "grad_norm": 28.880712509155273, + "learning_rate": 5e-05, + "loss": 1.2062, + "num_input_tokens_seen": 407876248, + "step": 6098 + }, + { + "epoch": 0.6919716312056737, + "loss": 1.3061710596084595, + "loss_ce": 0.005389814265072346, + "loss_iou": 0.578125, + "loss_num": 0.0294189453125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 407876248, + "step": 6098 + }, + { + "epoch": 0.6920851063829787, + "grad_norm": 25.114919662475586, + "learning_rate": 5e-05, + "loss": 1.1952, + "num_input_tokens_seen": 407943808, + "step": 6099 + }, + { + "epoch": 0.6920851063829787, + "loss": 1.0019603967666626, + "loss_ce": 0.004401762969791889, + "loss_iou": 0.404296875, + "loss_num": 0.03759765625, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 407943808, + "step": 6099 + }, + { + "epoch": 0.6921985815602837, + "grad_norm": 26.120651245117188, + "learning_rate": 5e-05, + "loss": 1.1698, + "num_input_tokens_seen": 408010784, + "step": 6100 + }, + { + "epoch": 0.6921985815602837, + "loss": 1.3104956150054932, + "loss_ce": 0.00531991571187973, + "loss_iou": 0.50390625, + "loss_num": 0.05908203125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 408010784, + "step": 6100 + }, + { + "epoch": 0.6923120567375887, + "grad_norm": 40.908931732177734, + "learning_rate": 5e-05, + "loss": 1.0938, + "num_input_tokens_seen": 408077316, + "step": 6101 + }, + { + "epoch": 0.6923120567375887, + "loss": 0.9761330485343933, + "loss_ce": 0.006894796155393124, + "loss_iou": 0.412109375, + "loss_num": 0.02880859375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 408077316, + "step": 6101 + }, + { + "epoch": 0.6924255319148936, + "grad_norm": 31.31492805480957, + "learning_rate": 5e-05, + "loss": 1.1726, + "num_input_tokens_seen": 408144760, + "step": 6102 + }, + { + "epoch": 0.6924255319148936, + "loss": 1.0770962238311768, + "loss_ce": 0.009469197131693363, + "loss_iou": 0.470703125, + "loss_num": 0.0250244140625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 408144760, + "step": 6102 + }, + { + "epoch": 0.6925390070921986, + "grad_norm": 20.98089027404785, + "learning_rate": 5e-05, + "loss": 1.1372, + "num_input_tokens_seen": 408211928, + "step": 6103 + }, + { + "epoch": 0.6925390070921986, + "loss": 1.0176079273223877, + "loss_ce": 0.001983032329007983, + "loss_iou": 0.421875, + "loss_num": 0.03515625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 408211928, + "step": 6103 + }, + { + "epoch": 0.6926524822695036, + "grad_norm": 29.50829315185547, + "learning_rate": 5e-05, + "loss": 1.1426, + "num_input_tokens_seen": 408278000, + "step": 6104 + }, + { + "epoch": 0.6926524822695036, + "loss": 1.1121478080749512, + "loss_ce": 0.0037494462449103594, + "loss_iou": 0.4375, + "loss_num": 0.046630859375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 408278000, + "step": 6104 + }, + { + "epoch": 0.6927659574468085, + "grad_norm": 19.387914657592773, + "learning_rate": 5e-05, + "loss": 1.1946, + "num_input_tokens_seen": 408345332, + "step": 6105 + }, + { + "epoch": 0.6927659574468085, + "loss": 1.258544921875, + "loss_ce": 0.005371146835386753, + "loss_iou": 0.486328125, + "loss_num": 0.05615234375, + "loss_xval": 1.25, + "num_input_tokens_seen": 408345332, + "step": 6105 + }, + { + "epoch": 0.6928794326241134, + "grad_norm": 22.18927001953125, + "learning_rate": 5e-05, + "loss": 1.0482, + "num_input_tokens_seen": 408411892, + "step": 6106 + }, + { + "epoch": 0.6928794326241134, + "loss": 1.1545050144195557, + "loss_ce": 0.007044050842523575, + "loss_iou": 0.486328125, + "loss_num": 0.034423828125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 408411892, + "step": 6106 + }, + { + "epoch": 0.6929929078014184, + "grad_norm": 44.81898498535156, + "learning_rate": 5e-05, + "loss": 1.1939, + "num_input_tokens_seen": 408475964, + "step": 6107 + }, + { + "epoch": 0.6929929078014184, + "loss": 1.0557215213775635, + "loss_ce": 0.0033533538226038218, + "loss_iou": 0.390625, + "loss_num": 0.05419921875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 408475964, + "step": 6107 + }, + { + "epoch": 0.6931063829787234, + "grad_norm": 43.33220291137695, + "learning_rate": 5e-05, + "loss": 1.3018, + "num_input_tokens_seen": 408541992, + "step": 6108 + }, + { + "epoch": 0.6931063829787234, + "loss": 1.1433241367340088, + "loss_ce": 0.005628766492009163, + "loss_iou": 0.486328125, + "loss_num": 0.033203125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 408541992, + "step": 6108 + }, + { + "epoch": 0.6932198581560284, + "grad_norm": 48.01873016357422, + "learning_rate": 5e-05, + "loss": 1.387, + "num_input_tokens_seen": 408608364, + "step": 6109 + }, + { + "epoch": 0.6932198581560284, + "loss": 1.4223875999450684, + "loss_ce": 0.005395364947617054, + "loss_iou": 0.59765625, + "loss_num": 0.04443359375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 408608364, + "step": 6109 + }, + { + "epoch": 0.6933333333333334, + "grad_norm": 30.155311584472656, + "learning_rate": 5e-05, + "loss": 1.2776, + "num_input_tokens_seen": 408675564, + "step": 6110 + }, + { + "epoch": 0.6933333333333334, + "loss": 1.2162941694259644, + "loss_ce": 0.005844965577125549, + "loss_iou": 0.50390625, + "loss_num": 0.040283203125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 408675564, + "step": 6110 + }, + { + "epoch": 0.6934468085106383, + "grad_norm": 23.499876022338867, + "learning_rate": 5e-05, + "loss": 1.1628, + "num_input_tokens_seen": 408742432, + "step": 6111 + }, + { + "epoch": 0.6934468085106383, + "loss": 1.1468851566314697, + "loss_ce": 0.007724999915808439, + "loss_iou": 0.462890625, + "loss_num": 0.042724609375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 408742432, + "step": 6111 + }, + { + "epoch": 0.6935602836879433, + "grad_norm": 26.505342483520508, + "learning_rate": 5e-05, + "loss": 1.3087, + "num_input_tokens_seen": 408808492, + "step": 6112 + }, + { + "epoch": 0.6935602836879433, + "loss": 1.401625633239746, + "loss_ce": 0.0066060759127140045, + "loss_iou": 0.51171875, + "loss_num": 0.07470703125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 408808492, + "step": 6112 + }, + { + "epoch": 0.6936737588652482, + "grad_norm": 30.885583877563477, + "learning_rate": 5e-05, + "loss": 1.1881, + "num_input_tokens_seen": 408875404, + "step": 6113 + }, + { + "epoch": 0.6936737588652482, + "loss": 1.447331428527832, + "loss_ce": 0.003971993923187256, + "loss_iou": 0.56640625, + "loss_num": 0.062255859375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 408875404, + "step": 6113 + }, + { + "epoch": 0.6937872340425532, + "grad_norm": 20.802650451660156, + "learning_rate": 5e-05, + "loss": 0.957, + "num_input_tokens_seen": 408941380, + "step": 6114 + }, + { + "epoch": 0.6937872340425532, + "loss": 0.9492270946502686, + "loss_ce": 0.00904160551726818, + "loss_iou": 0.37890625, + "loss_num": 0.035888671875, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 408941380, + "step": 6114 + }, + { + "epoch": 0.6939007092198581, + "grad_norm": 22.648775100708008, + "learning_rate": 5e-05, + "loss": 1.004, + "num_input_tokens_seen": 409008624, + "step": 6115 + }, + { + "epoch": 0.6939007092198581, + "loss": 1.107388973236084, + "loss_ce": 0.012662380002439022, + "loss_iou": 0.396484375, + "loss_num": 0.060302734375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 409008624, + "step": 6115 + }, + { + "epoch": 0.6940141843971631, + "grad_norm": 82.70195770263672, + "learning_rate": 5e-05, + "loss": 1.2792, + "num_input_tokens_seen": 409076648, + "step": 6116 + }, + { + "epoch": 0.6940141843971631, + "loss": 1.1291978359222412, + "loss_ce": 0.012986821122467518, + "loss_iou": 0.45703125, + "loss_num": 0.040771484375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 409076648, + "step": 6116 + }, + { + "epoch": 0.6941276595744681, + "grad_norm": 48.06532669067383, + "learning_rate": 5e-05, + "loss": 1.2217, + "num_input_tokens_seen": 409144028, + "step": 6117 + }, + { + "epoch": 0.6941276595744681, + "loss": 1.1042742729187012, + "loss_ce": 0.008082782849669456, + "loss_iou": 0.46484375, + "loss_num": 0.033203125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 409144028, + "step": 6117 + }, + { + "epoch": 0.6942411347517731, + "grad_norm": 24.410037994384766, + "learning_rate": 5e-05, + "loss": 1.3697, + "num_input_tokens_seen": 409211156, + "step": 6118 + }, + { + "epoch": 0.6942411347517731, + "loss": 1.223437786102295, + "loss_ce": 0.009082373231649399, + "loss_iou": 0.474609375, + "loss_num": 0.05322265625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 409211156, + "step": 6118 + }, + { + "epoch": 0.694354609929078, + "grad_norm": 15.319872856140137, + "learning_rate": 5e-05, + "loss": 1.2256, + "num_input_tokens_seen": 409277552, + "step": 6119 + }, + { + "epoch": 0.694354609929078, + "loss": 1.2681379318237305, + "loss_ce": 0.0071515366435050964, + "loss_iou": 0.478515625, + "loss_num": 0.060546875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 409277552, + "step": 6119 + }, + { + "epoch": 0.694468085106383, + "grad_norm": 9.973519325256348, + "learning_rate": 5e-05, + "loss": 1.1762, + "num_input_tokens_seen": 409345228, + "step": 6120 + }, + { + "epoch": 0.694468085106383, + "loss": 1.1113247871398926, + "loss_ce": 0.008358540013432503, + "loss_iou": 0.431640625, + "loss_num": 0.048095703125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 409345228, + "step": 6120 + }, + { + "epoch": 0.6945815602836879, + "grad_norm": 42.77680587768555, + "learning_rate": 5e-05, + "loss": 1.1575, + "num_input_tokens_seen": 409412280, + "step": 6121 + }, + { + "epoch": 0.6945815602836879, + "loss": 1.1561989784240723, + "loss_ce": 0.010691052302718163, + "loss_iou": 0.4609375, + "loss_num": 0.044921875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 409412280, + "step": 6121 + }, + { + "epoch": 0.6946950354609929, + "grad_norm": 34.94890594482422, + "learning_rate": 5e-05, + "loss": 1.4294, + "num_input_tokens_seen": 409480212, + "step": 6122 + }, + { + "epoch": 0.6946950354609929, + "loss": 1.3669605255126953, + "loss_ce": 0.0061206757090985775, + "loss_iou": 0.546875, + "loss_num": 0.05322265625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 409480212, + "step": 6122 + }, + { + "epoch": 0.6948085106382978, + "grad_norm": 32.68017578125, + "learning_rate": 5e-05, + "loss": 1.0802, + "num_input_tokens_seen": 409546156, + "step": 6123 + }, + { + "epoch": 0.6948085106382978, + "loss": 1.0382001399993896, + "loss_ce": 0.003532137256115675, + "loss_iou": 0.427734375, + "loss_num": 0.0361328125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 409546156, + "step": 6123 + }, + { + "epoch": 0.6949219858156028, + "grad_norm": 32.03809356689453, + "learning_rate": 5e-05, + "loss": 1.1932, + "num_input_tokens_seen": 409611936, + "step": 6124 + }, + { + "epoch": 0.6949219858156028, + "loss": 1.1247950792312622, + "loss_ce": 0.006630985997617245, + "loss_iou": 0.44921875, + "loss_num": 0.04443359375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 409611936, + "step": 6124 + }, + { + "epoch": 0.6950354609929078, + "grad_norm": 36.68061065673828, + "learning_rate": 5e-05, + "loss": 1.2323, + "num_input_tokens_seen": 409678676, + "step": 6125 + }, + { + "epoch": 0.6950354609929078, + "loss": 1.1290481090545654, + "loss_ce": 0.006977756507694721, + "loss_iou": 0.484375, + "loss_num": 0.0306396484375, + "loss_xval": 1.125, + "num_input_tokens_seen": 409678676, + "step": 6125 + }, + { + "epoch": 0.6951489361702128, + "grad_norm": 46.88251495361328, + "learning_rate": 5e-05, + "loss": 1.101, + "num_input_tokens_seen": 409745840, + "step": 6126 + }, + { + "epoch": 0.6951489361702128, + "loss": 1.297860860824585, + "loss_ce": 0.007333470042794943, + "loss_iou": 0.5078125, + "loss_num": 0.05517578125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 409745840, + "step": 6126 + }, + { + "epoch": 0.6952624113475178, + "grad_norm": 22.499845504760742, + "learning_rate": 5e-05, + "loss": 1.2562, + "num_input_tokens_seen": 409813380, + "step": 6127 + }, + { + "epoch": 0.6952624113475178, + "loss": 1.086289644241333, + "loss_ce": 0.011582629755139351, + "loss_iou": 0.40234375, + "loss_num": 0.054443359375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 409813380, + "step": 6127 + }, + { + "epoch": 0.6953758865248227, + "grad_norm": 26.60170555114746, + "learning_rate": 5e-05, + "loss": 1.2602, + "num_input_tokens_seen": 409881044, + "step": 6128 + }, + { + "epoch": 0.6953758865248227, + "loss": 1.4523162841796875, + "loss_ce": 0.008956961333751678, + "loss_iou": 0.60546875, + "loss_num": 0.0458984375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 409881044, + "step": 6128 + }, + { + "epoch": 0.6954893617021276, + "grad_norm": 19.764677047729492, + "learning_rate": 5e-05, + "loss": 1.0581, + "num_input_tokens_seen": 409947928, + "step": 6129 + }, + { + "epoch": 0.6954893617021276, + "loss": 1.1086108684539795, + "loss_ce": 0.009001442231237888, + "loss_iou": 0.470703125, + "loss_num": 0.0322265625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 409947928, + "step": 6129 + }, + { + "epoch": 0.6956028368794326, + "grad_norm": 24.97814178466797, + "learning_rate": 5e-05, + "loss": 1.2029, + "num_input_tokens_seen": 410015740, + "step": 6130 + }, + { + "epoch": 0.6956028368794326, + "loss": 1.3705236911773682, + "loss_ce": 0.011148639023303986, + "loss_iou": 0.546875, + "loss_num": 0.05322265625, + "loss_xval": 1.359375, + "num_input_tokens_seen": 410015740, + "step": 6130 + }, + { + "epoch": 0.6957163120567376, + "grad_norm": 54.2408447265625, + "learning_rate": 5e-05, + "loss": 1.1928, + "num_input_tokens_seen": 410082308, + "step": 6131 + }, + { + "epoch": 0.6957163120567376, + "loss": 1.3067944049835205, + "loss_ce": 0.006013240199536085, + "loss_iou": 0.53125, + "loss_num": 0.04736328125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 410082308, + "step": 6131 + }, + { + "epoch": 0.6958297872340425, + "grad_norm": 22.362459182739258, + "learning_rate": 5e-05, + "loss": 1.302, + "num_input_tokens_seen": 410149588, + "step": 6132 + }, + { + "epoch": 0.6958297872340425, + "loss": 1.429978370666504, + "loss_ce": 0.009812291711568832, + "loss_iou": 0.5546875, + "loss_num": 0.0625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 410149588, + "step": 6132 + }, + { + "epoch": 0.6959432624113475, + "grad_norm": 39.59854507446289, + "learning_rate": 5e-05, + "loss": 1.1465, + "num_input_tokens_seen": 410216944, + "step": 6133 + }, + { + "epoch": 0.6959432624113475, + "loss": 1.295608639717102, + "loss_ce": 0.008499177172780037, + "loss_iou": 0.5078125, + "loss_num": 0.05419921875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 410216944, + "step": 6133 + }, + { + "epoch": 0.6960567375886525, + "grad_norm": 40.695640563964844, + "learning_rate": 5e-05, + "loss": 1.1426, + "num_input_tokens_seen": 410283944, + "step": 6134 + }, + { + "epoch": 0.6960567375886525, + "loss": 1.152225375175476, + "loss_ce": 0.005740987602621317, + "loss_iou": 0.4765625, + "loss_num": 0.038330078125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 410283944, + "step": 6134 + }, + { + "epoch": 0.6961702127659575, + "grad_norm": 31.403568267822266, + "learning_rate": 5e-05, + "loss": 1.334, + "num_input_tokens_seen": 410350920, + "step": 6135 + }, + { + "epoch": 0.6961702127659575, + "loss": 1.3573883771896362, + "loss_ce": 0.007290706969797611, + "loss_iou": 0.56640625, + "loss_num": 0.04296875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 410350920, + "step": 6135 + }, + { + "epoch": 0.6962836879432625, + "grad_norm": 21.731597900390625, + "learning_rate": 5e-05, + "loss": 1.2698, + "num_input_tokens_seen": 410417544, + "step": 6136 + }, + { + "epoch": 0.6962836879432625, + "loss": 1.3583670854568481, + "loss_ce": 0.007781153079122305, + "loss_iou": 0.5234375, + "loss_num": 0.060791015625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 410417544, + "step": 6136 + }, + { + "epoch": 0.6963971631205674, + "grad_norm": 18.852937698364258, + "learning_rate": 5e-05, + "loss": 1.2264, + "num_input_tokens_seen": 410483460, + "step": 6137 + }, + { + "epoch": 0.6963971631205674, + "loss": 1.5545204877853394, + "loss_ce": 0.007157237268984318, + "loss_iou": 0.5625, + "loss_num": 0.0849609375, + "loss_xval": 1.546875, + "num_input_tokens_seen": 410483460, + "step": 6137 + }, + { + "epoch": 0.6965106382978723, + "grad_norm": 17.297367095947266, + "learning_rate": 5e-05, + "loss": 0.9579, + "num_input_tokens_seen": 410550696, + "step": 6138 + }, + { + "epoch": 0.6965106382978723, + "loss": 0.969113826751709, + "loss_ce": 0.007199748419225216, + "loss_iou": 0.388671875, + "loss_num": 0.037109375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 410550696, + "step": 6138 + }, + { + "epoch": 0.6966241134751773, + "grad_norm": 19.67473030090332, + "learning_rate": 5e-05, + "loss": 1.1452, + "num_input_tokens_seen": 410618652, + "step": 6139 + }, + { + "epoch": 0.6966241134751773, + "loss": 1.1858317852020264, + "loss_ce": 0.004191109910607338, + "loss_iou": 0.5078125, + "loss_num": 0.033447265625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 410618652, + "step": 6139 + }, + { + "epoch": 0.6967375886524823, + "grad_norm": 47.42966842651367, + "learning_rate": 5e-05, + "loss": 1.0853, + "num_input_tokens_seen": 410685700, + "step": 6140 + }, + { + "epoch": 0.6967375886524823, + "loss": 1.0338215827941895, + "loss_ce": 0.004768809769302607, + "loss_iou": 0.42578125, + "loss_num": 0.03564453125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 410685700, + "step": 6140 + }, + { + "epoch": 0.6968510638297872, + "grad_norm": 33.28456497192383, + "learning_rate": 5e-05, + "loss": 1.0478, + "num_input_tokens_seen": 410752640, + "step": 6141 + }, + { + "epoch": 0.6968510638297872, + "loss": 1.0504069328308105, + "loss_ce": 0.0049967085942626, + "loss_iou": 0.427734375, + "loss_num": 0.038330078125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 410752640, + "step": 6141 + }, + { + "epoch": 0.6969645390070922, + "grad_norm": 36.81060028076172, + "learning_rate": 5e-05, + "loss": 1.1693, + "num_input_tokens_seen": 410819992, + "step": 6142 + }, + { + "epoch": 0.6969645390070922, + "loss": 1.1593308448791504, + "loss_ce": 0.008940281346440315, + "loss_iou": 0.498046875, + "loss_num": 0.0311279296875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 410819992, + "step": 6142 + }, + { + "epoch": 0.6970780141843972, + "grad_norm": 33.29090118408203, + "learning_rate": 5e-05, + "loss": 1.3784, + "num_input_tokens_seen": 410886936, + "step": 6143 + }, + { + "epoch": 0.6970780141843972, + "loss": 1.3330583572387695, + "loss_ce": 0.007374865468591452, + "loss_iou": 0.5625, + "loss_num": 0.039306640625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 410886936, + "step": 6143 + }, + { + "epoch": 0.6971914893617022, + "grad_norm": 27.786006927490234, + "learning_rate": 5e-05, + "loss": 1.1289, + "num_input_tokens_seen": 410954688, + "step": 6144 + }, + { + "epoch": 0.6971914893617022, + "loss": 1.159834623336792, + "loss_ce": 0.00797923095524311, + "loss_iou": 0.4765625, + "loss_num": 0.039794921875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 410954688, + "step": 6144 + }, + { + "epoch": 0.6973049645390071, + "grad_norm": 30.4647274017334, + "learning_rate": 5e-05, + "loss": 1.3177, + "num_input_tokens_seen": 411021624, + "step": 6145 + }, + { + "epoch": 0.6973049645390071, + "loss": 1.5569102764129639, + "loss_ce": 0.006129010580480099, + "loss_iou": 0.64453125, + "loss_num": 0.052978515625, + "loss_xval": 1.546875, + "num_input_tokens_seen": 411021624, + "step": 6145 + }, + { + "epoch": 0.697418439716312, + "grad_norm": 30.53021240234375, + "learning_rate": 5e-05, + "loss": 1.2522, + "num_input_tokens_seen": 411088364, + "step": 6146 + }, + { + "epoch": 0.697418439716312, + "loss": 0.974278450012207, + "loss_ce": 0.008091889321804047, + "loss_iou": 0.39453125, + "loss_num": 0.03564453125, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 411088364, + "step": 6146 + }, + { + "epoch": 0.697531914893617, + "grad_norm": 27.026662826538086, + "learning_rate": 5e-05, + "loss": 1.4808, + "num_input_tokens_seen": 411155764, + "step": 6147 + }, + { + "epoch": 0.697531914893617, + "loss": 1.4915730953216553, + "loss_ce": 0.005733184982091188, + "loss_iou": 0.60546875, + "loss_num": 0.0546875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 411155764, + "step": 6147 + }, + { + "epoch": 0.697645390070922, + "grad_norm": 29.94334602355957, + "learning_rate": 5e-05, + "loss": 1.3135, + "num_input_tokens_seen": 411223232, + "step": 6148 + }, + { + "epoch": 0.697645390070922, + "loss": 1.2609388828277588, + "loss_ce": 0.004591300152242184, + "loss_iou": 0.51171875, + "loss_num": 0.0458984375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 411223232, + "step": 6148 + }, + { + "epoch": 0.697758865248227, + "grad_norm": 18.91226577758789, + "learning_rate": 5e-05, + "loss": 1.2961, + "num_input_tokens_seen": 411289216, + "step": 6149 + }, + { + "epoch": 0.697758865248227, + "loss": 1.4459766149520874, + "loss_ce": 0.01287120021879673, + "loss_iou": 0.56640625, + "loss_num": 0.059814453125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 411289216, + "step": 6149 + }, + { + "epoch": 0.6978723404255319, + "grad_norm": 14.942412376403809, + "learning_rate": 5e-05, + "loss": 1.018, + "num_input_tokens_seen": 411355996, + "step": 6150 + }, + { + "epoch": 0.6978723404255319, + "loss": 1.0643236637115479, + "loss_ce": 0.008171399123966694, + "loss_iou": 0.419921875, + "loss_num": 0.043212890625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 411355996, + "step": 6150 + }, + { + "epoch": 0.6979858156028369, + "grad_norm": 18.505651473999023, + "learning_rate": 5e-05, + "loss": 1.1242, + "num_input_tokens_seen": 411422684, + "step": 6151 + }, + { + "epoch": 0.6979858156028369, + "loss": 1.1923749446868896, + "loss_ce": 0.005851552356034517, + "loss_iou": 0.44921875, + "loss_num": 0.057861328125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 411422684, + "step": 6151 + }, + { + "epoch": 0.6980992907801419, + "grad_norm": 14.802815437316895, + "learning_rate": 5e-05, + "loss": 1.049, + "num_input_tokens_seen": 411489376, + "step": 6152 + }, + { + "epoch": 0.6980992907801419, + "loss": 0.9838268160820007, + "loss_ce": 0.004334653727710247, + "loss_iou": 0.376953125, + "loss_num": 0.044921875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 411489376, + "step": 6152 + }, + { + "epoch": 0.6982127659574469, + "grad_norm": 12.355113983154297, + "learning_rate": 5e-05, + "loss": 1.1497, + "num_input_tokens_seen": 411556804, + "step": 6153 + }, + { + "epoch": 0.6982127659574469, + "loss": 1.0648062229156494, + "loss_ce": 0.0037710305768996477, + "loss_iou": 0.3984375, + "loss_num": 0.052978515625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 411556804, + "step": 6153 + }, + { + "epoch": 0.6983262411347517, + "grad_norm": 21.33070945739746, + "learning_rate": 5e-05, + "loss": 1.1321, + "num_input_tokens_seen": 411623868, + "step": 6154 + }, + { + "epoch": 0.6983262411347517, + "loss": 1.0388180017471313, + "loss_ce": 0.010497679933905602, + "loss_iou": 0.431640625, + "loss_num": 0.033447265625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 411623868, + "step": 6154 + }, + { + "epoch": 0.6984397163120567, + "grad_norm": 41.8720588684082, + "learning_rate": 5e-05, + "loss": 1.097, + "num_input_tokens_seen": 411691004, + "step": 6155 + }, + { + "epoch": 0.6984397163120567, + "loss": 1.0146114826202393, + "loss_ce": 0.0033809805754572153, + "loss_iou": 0.451171875, + "loss_num": 0.0216064453125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 411691004, + "step": 6155 + }, + { + "epoch": 0.6985531914893617, + "grad_norm": 56.667755126953125, + "learning_rate": 5e-05, + "loss": 1.3427, + "num_input_tokens_seen": 411759044, + "step": 6156 + }, + { + "epoch": 0.6985531914893617, + "loss": 1.278491497039795, + "loss_ce": 0.005542342085391283, + "loss_iou": 0.5625, + "loss_num": 0.02978515625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 411759044, + "step": 6156 + }, + { + "epoch": 0.6986666666666667, + "grad_norm": 30.249507904052734, + "learning_rate": 5e-05, + "loss": 1.4666, + "num_input_tokens_seen": 411826024, + "step": 6157 + }, + { + "epoch": 0.6986666666666667, + "loss": 1.3197747468948364, + "loss_ce": 0.0067864395678043365, + "loss_iou": 0.546875, + "loss_num": 0.04296875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 411826024, + "step": 6157 + }, + { + "epoch": 0.6987801418439716, + "grad_norm": 16.811933517456055, + "learning_rate": 5e-05, + "loss": 1.1765, + "num_input_tokens_seen": 411893696, + "step": 6158 + }, + { + "epoch": 0.6987801418439716, + "loss": 1.0890617370605469, + "loss_ce": 0.008495342917740345, + "loss_iou": 0.42578125, + "loss_num": 0.045654296875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 411893696, + "step": 6158 + }, + { + "epoch": 0.6988936170212766, + "grad_norm": 18.181270599365234, + "learning_rate": 5e-05, + "loss": 1.1751, + "num_input_tokens_seen": 411960388, + "step": 6159 + }, + { + "epoch": 0.6988936170212766, + "loss": 1.1335995197296143, + "loss_ce": 0.005181452259421349, + "loss_iou": 0.439453125, + "loss_num": 0.05029296875, + "loss_xval": 1.125, + "num_input_tokens_seen": 411960388, + "step": 6159 + }, + { + "epoch": 0.6990070921985816, + "grad_norm": 44.539390563964844, + "learning_rate": 5e-05, + "loss": 1.1147, + "num_input_tokens_seen": 412026144, + "step": 6160 + }, + { + "epoch": 0.6990070921985816, + "loss": 1.3369708061218262, + "loss_ce": 0.00860163476318121, + "loss_iou": 0.51171875, + "loss_num": 0.060302734375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 412026144, + "step": 6160 + }, + { + "epoch": 0.6991205673758866, + "grad_norm": 26.918865203857422, + "learning_rate": 5e-05, + "loss": 1.1083, + "num_input_tokens_seen": 412091912, + "step": 6161 + }, + { + "epoch": 0.6991205673758866, + "loss": 1.1883268356323242, + "loss_ce": 0.006686253473162651, + "loss_iou": 0.51953125, + "loss_num": 0.028564453125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 412091912, + "step": 6161 + }, + { + "epoch": 0.6992340425531914, + "grad_norm": 31.131088256835938, + "learning_rate": 5e-05, + "loss": 1.1381, + "num_input_tokens_seen": 412159600, + "step": 6162 + }, + { + "epoch": 0.6992340425531914, + "loss": 1.151781678199768, + "loss_ce": 0.006273836828768253, + "loss_iou": 0.451171875, + "loss_num": 0.048095703125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 412159600, + "step": 6162 + }, + { + "epoch": 0.6993475177304964, + "grad_norm": 31.22933578491211, + "learning_rate": 5e-05, + "loss": 1.2907, + "num_input_tokens_seen": 412226588, + "step": 6163 + }, + { + "epoch": 0.6993475177304964, + "loss": 1.2070658206939697, + "loss_ce": 0.005893871188163757, + "loss_iou": 0.5546875, + "loss_num": 0.017578125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 412226588, + "step": 6163 + }, + { + "epoch": 0.6994609929078014, + "grad_norm": 32.363494873046875, + "learning_rate": 5e-05, + "loss": 1.3628, + "num_input_tokens_seen": 412293132, + "step": 6164 + }, + { + "epoch": 0.6994609929078014, + "loss": 1.5672380924224854, + "loss_ce": 0.006202982738614082, + "loss_iou": 0.59765625, + "loss_num": 0.072265625, + "loss_xval": 1.5625, + "num_input_tokens_seen": 412293132, + "step": 6164 + }, + { + "epoch": 0.6995744680851064, + "grad_norm": 27.23990821838379, + "learning_rate": 5e-05, + "loss": 1.3983, + "num_input_tokens_seen": 412360672, + "step": 6165 + }, + { + "epoch": 0.6995744680851064, + "loss": 1.6513698101043701, + "loss_ce": 0.008791708387434483, + "loss_iou": 0.6484375, + "loss_num": 0.068359375, + "loss_xval": 1.640625, + "num_input_tokens_seen": 412360672, + "step": 6165 + }, + { + "epoch": 0.6996879432624113, + "grad_norm": 13.83617877960205, + "learning_rate": 5e-05, + "loss": 1.4109, + "num_input_tokens_seen": 412427656, + "step": 6166 + }, + { + "epoch": 0.6996879432624113, + "loss": 1.3413043022155762, + "loss_ce": 0.009272998198866844, + "loss_iou": 0.53125, + "loss_num": 0.053466796875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 412427656, + "step": 6166 + }, + { + "epoch": 0.6998014184397163, + "grad_norm": 17.963193893432617, + "learning_rate": 5e-05, + "loss": 1.0728, + "num_input_tokens_seen": 412493824, + "step": 6167 + }, + { + "epoch": 0.6998014184397163, + "loss": 0.8432507514953613, + "loss_ce": 0.007557407952845097, + "loss_iou": 0.345703125, + "loss_num": 0.0286865234375, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 412493824, + "step": 6167 + }, + { + "epoch": 0.6999148936170213, + "grad_norm": 27.857440948486328, + "learning_rate": 5e-05, + "loss": 1.1691, + "num_input_tokens_seen": 412560484, + "step": 6168 + }, + { + "epoch": 0.6999148936170213, + "loss": 1.183864712715149, + "loss_ce": 0.00857175700366497, + "loss_iou": 0.490234375, + "loss_num": 0.03857421875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 412560484, + "step": 6168 + }, + { + "epoch": 0.7000283687943263, + "grad_norm": 223.0620880126953, + "learning_rate": 5e-05, + "loss": 1.2501, + "num_input_tokens_seen": 412627508, + "step": 6169 + }, + { + "epoch": 0.7000283687943263, + "loss": 1.190709114074707, + "loss_ce": 0.010533364489674568, + "loss_iou": 0.451171875, + "loss_num": 0.055419921875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 412627508, + "step": 6169 + }, + { + "epoch": 0.7001418439716312, + "grad_norm": 34.7098388671875, + "learning_rate": 5e-05, + "loss": 1.0787, + "num_input_tokens_seen": 412694484, + "step": 6170 + }, + { + "epoch": 0.7001418439716312, + "loss": 1.0127345323562622, + "loss_ce": 0.004922107793390751, + "loss_iou": 0.41015625, + "loss_num": 0.037353515625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 412694484, + "step": 6170 + }, + { + "epoch": 0.7002553191489361, + "grad_norm": 27.31837272644043, + "learning_rate": 5e-05, + "loss": 1.4363, + "num_input_tokens_seen": 412761120, + "step": 6171 + }, + { + "epoch": 0.7002553191489361, + "loss": 1.3842573165893555, + "loss_ce": 0.007304128259420395, + "loss_iou": 0.5546875, + "loss_num": 0.053955078125, + "loss_xval": 1.375, + "num_input_tokens_seen": 412761120, + "step": 6171 + }, + { + "epoch": 0.7003687943262411, + "grad_norm": 17.914255142211914, + "learning_rate": 5e-05, + "loss": 1.1372, + "num_input_tokens_seen": 412827732, + "step": 6172 + }, + { + "epoch": 0.7003687943262411, + "loss": 1.2948999404907227, + "loss_ce": 0.003884236328303814, + "loss_iou": 0.51953125, + "loss_num": 0.0498046875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 412827732, + "step": 6172 + }, + { + "epoch": 0.7004822695035461, + "grad_norm": 27.16335105895996, + "learning_rate": 5e-05, + "loss": 1.0828, + "num_input_tokens_seen": 412894144, + "step": 6173 + }, + { + "epoch": 0.7004822695035461, + "loss": 1.09817373752594, + "loss_ce": 0.007841745391488075, + "loss_iou": 0.45703125, + "loss_num": 0.035888671875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 412894144, + "step": 6173 + }, + { + "epoch": 0.7005957446808511, + "grad_norm": 58.450408935546875, + "learning_rate": 5e-05, + "loss": 1.4409, + "num_input_tokens_seen": 412961076, + "step": 6174 + }, + { + "epoch": 0.7005957446808511, + "loss": 1.5145349502563477, + "loss_ce": 0.0076989904046058655, + "loss_iou": 0.62890625, + "loss_num": 0.04931640625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 412961076, + "step": 6174 + }, + { + "epoch": 0.700709219858156, + "grad_norm": 29.852569580078125, + "learning_rate": 5e-05, + "loss": 1.365, + "num_input_tokens_seen": 413027832, + "step": 6175 + }, + { + "epoch": 0.700709219858156, + "loss": 1.4605083465576172, + "loss_ce": 0.006406739819794893, + "loss_iou": 0.62890625, + "loss_num": 0.039306640625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 413027832, + "step": 6175 + }, + { + "epoch": 0.700822695035461, + "grad_norm": 14.316628456115723, + "learning_rate": 5e-05, + "loss": 1.1231, + "num_input_tokens_seen": 413094840, + "step": 6176 + }, + { + "epoch": 0.700822695035461, + "loss": 1.0069866180419922, + "loss_ce": 0.00454526674002409, + "loss_iou": 0.423828125, + "loss_num": 0.0308837890625, + "loss_xval": 1.0, + "num_input_tokens_seen": 413094840, + "step": 6176 + }, + { + "epoch": 0.700936170212766, + "grad_norm": 19.89655113220215, + "learning_rate": 5e-05, + "loss": 1.2005, + "num_input_tokens_seen": 413162112, + "step": 6177 + }, + { + "epoch": 0.700936170212766, + "loss": 0.9690731763839722, + "loss_ce": 0.005694305524230003, + "loss_iou": 0.404296875, + "loss_num": 0.03125, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 413162112, + "step": 6177 + }, + { + "epoch": 0.701049645390071, + "grad_norm": 36.08800506591797, + "learning_rate": 5e-05, + "loss": 1.2167, + "num_input_tokens_seen": 413228484, + "step": 6178 + }, + { + "epoch": 0.701049645390071, + "loss": 1.0795708894729614, + "loss_ce": 0.005352160893380642, + "loss_iou": 0.41015625, + "loss_num": 0.05126953125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 413228484, + "step": 6178 + }, + { + "epoch": 0.7011631205673758, + "grad_norm": 46.559078216552734, + "learning_rate": 5e-05, + "loss": 1.1938, + "num_input_tokens_seen": 413294692, + "step": 6179 + }, + { + "epoch": 0.7011631205673758, + "loss": 1.2074651718139648, + "loss_ce": 0.0062933145090937614, + "loss_iou": 0.50390625, + "loss_num": 0.039306640625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 413294692, + "step": 6179 + }, + { + "epoch": 0.7012765957446808, + "grad_norm": 39.917747497558594, + "learning_rate": 5e-05, + "loss": 1.2544, + "num_input_tokens_seen": 413361728, + "step": 6180 + }, + { + "epoch": 0.7012765957446808, + "loss": 1.2508600950241089, + "loss_ce": 0.0047664036974310875, + "loss_iou": 0.515625, + "loss_num": 0.04248046875, + "loss_xval": 1.25, + "num_input_tokens_seen": 413361728, + "step": 6180 + }, + { + "epoch": 0.7013900709219858, + "grad_norm": 22.832107543945312, + "learning_rate": 5e-05, + "loss": 1.1001, + "num_input_tokens_seen": 413428412, + "step": 6181 + }, + { + "epoch": 0.7013900709219858, + "loss": 0.8525294661521912, + "loss_ce": 0.0034083849750459194, + "loss_iou": 0.390625, + "loss_num": 0.01348876953125, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 413428412, + "step": 6181 + }, + { + "epoch": 0.7015035460992908, + "grad_norm": 22.736873626708984, + "learning_rate": 5e-05, + "loss": 1.3457, + "num_input_tokens_seen": 413495292, + "step": 6182 + }, + { + "epoch": 0.7015035460992908, + "loss": 1.3894801139831543, + "loss_ce": 0.004714572802186012, + "loss_iou": 0.5234375, + "loss_num": 0.06689453125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 413495292, + "step": 6182 + }, + { + "epoch": 0.7016170212765958, + "grad_norm": 27.560930252075195, + "learning_rate": 5e-05, + "loss": 1.1408, + "num_input_tokens_seen": 413562140, + "step": 6183 + }, + { + "epoch": 0.7016170212765958, + "loss": 1.003843069076538, + "loss_ce": 0.004697542637586594, + "loss_iou": 0.40234375, + "loss_num": 0.0390625, + "loss_xval": 1.0, + "num_input_tokens_seen": 413562140, + "step": 6183 + }, + { + "epoch": 0.7017304964539007, + "grad_norm": 23.83715057373047, + "learning_rate": 5e-05, + "loss": 1.1198, + "num_input_tokens_seen": 413628440, + "step": 6184 + }, + { + "epoch": 0.7017304964539007, + "loss": 1.1461012363433838, + "loss_ce": 0.006452871020883322, + "loss_iou": 0.484375, + "loss_num": 0.034423828125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 413628440, + "step": 6184 + }, + { + "epoch": 0.7018439716312057, + "grad_norm": 24.661083221435547, + "learning_rate": 5e-05, + "loss": 1.198, + "num_input_tokens_seen": 413695328, + "step": 6185 + }, + { + "epoch": 0.7018439716312057, + "loss": 1.2876677513122559, + "loss_ce": 0.009835716336965561, + "loss_iou": 0.51953125, + "loss_num": 0.04736328125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 413695328, + "step": 6185 + }, + { + "epoch": 0.7019574468085107, + "grad_norm": 24.61517333984375, + "learning_rate": 5e-05, + "loss": 1.195, + "num_input_tokens_seen": 413762260, + "step": 6186 + }, + { + "epoch": 0.7019574468085107, + "loss": 1.2610058784484863, + "loss_ce": 0.005634845234453678, + "loss_iou": 0.51171875, + "loss_num": 0.046142578125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 413762260, + "step": 6186 + }, + { + "epoch": 0.7020709219858156, + "grad_norm": 21.33476448059082, + "learning_rate": 5e-05, + "loss": 1.0982, + "num_input_tokens_seen": 413829144, + "step": 6187 + }, + { + "epoch": 0.7020709219858156, + "loss": 1.0235681533813477, + "loss_ce": 0.005990003701299429, + "loss_iou": 0.453125, + "loss_num": 0.0224609375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 413829144, + "step": 6187 + }, + { + "epoch": 0.7021843971631205, + "grad_norm": 24.54676628112793, + "learning_rate": 5e-05, + "loss": 1.17, + "num_input_tokens_seen": 413896224, + "step": 6188 + }, + { + "epoch": 0.7021843971631205, + "loss": 1.2129037380218506, + "loss_ce": 0.005628414452075958, + "loss_iou": 0.51171875, + "loss_num": 0.037109375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 413896224, + "step": 6188 + }, + { + "epoch": 0.7022978723404255, + "grad_norm": 30.53816032409668, + "learning_rate": 5e-05, + "loss": 1.2296, + "num_input_tokens_seen": 413963360, + "step": 6189 + }, + { + "epoch": 0.7022978723404255, + "loss": 1.2644357681274414, + "loss_ce": 0.004670155234634876, + "loss_iou": 0.5234375, + "loss_num": 0.04248046875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 413963360, + "step": 6189 + }, + { + "epoch": 0.7024113475177305, + "grad_norm": 29.205163955688477, + "learning_rate": 5e-05, + "loss": 1.226, + "num_input_tokens_seen": 414030484, + "step": 6190 + }, + { + "epoch": 0.7024113475177305, + "loss": 1.161085605621338, + "loss_ce": 0.00532385241240263, + "loss_iou": 0.5078125, + "loss_num": 0.0286865234375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 414030484, + "step": 6190 + }, + { + "epoch": 0.7025248226950355, + "grad_norm": 24.53936195373535, + "learning_rate": 5e-05, + "loss": 1.3082, + "num_input_tokens_seen": 414097396, + "step": 6191 + }, + { + "epoch": 0.7025248226950355, + "loss": 1.200824499130249, + "loss_ce": 0.005023684352636337, + "loss_iou": 0.4765625, + "loss_num": 0.0478515625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 414097396, + "step": 6191 + }, + { + "epoch": 0.7026382978723404, + "grad_norm": 67.90033721923828, + "learning_rate": 5e-05, + "loss": 1.32, + "num_input_tokens_seen": 414163960, + "step": 6192 + }, + { + "epoch": 0.7026382978723404, + "loss": 1.327500343322754, + "loss_ce": 0.007431910838931799, + "loss_iou": 0.52734375, + "loss_num": 0.05322265625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 414163960, + "step": 6192 + }, + { + "epoch": 0.7027517730496454, + "grad_norm": 27.08268165588379, + "learning_rate": 5e-05, + "loss": 1.0587, + "num_input_tokens_seen": 414231412, + "step": 6193 + }, + { + "epoch": 0.7027517730496454, + "loss": 1.0551073551177979, + "loss_ce": 0.006767537910491228, + "loss_iou": 0.44140625, + "loss_num": 0.033447265625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 414231412, + "step": 6193 + }, + { + "epoch": 0.7028652482269504, + "grad_norm": 37.02623748779297, + "learning_rate": 5e-05, + "loss": 1.3343, + "num_input_tokens_seen": 414298092, + "step": 6194 + }, + { + "epoch": 0.7028652482269504, + "loss": 1.3314604759216309, + "loss_ce": 0.009683131240308285, + "loss_iou": 0.56640625, + "loss_num": 0.037353515625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 414298092, + "step": 6194 + }, + { + "epoch": 0.7029787234042553, + "grad_norm": 37.165523529052734, + "learning_rate": 5e-05, + "loss": 1.3058, + "num_input_tokens_seen": 414364864, + "step": 6195 + }, + { + "epoch": 0.7029787234042553, + "loss": 1.4090845584869385, + "loss_ce": 0.002834531245753169, + "loss_iou": 0.58984375, + "loss_num": 0.044677734375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 414364864, + "step": 6195 + }, + { + "epoch": 0.7030921985815602, + "grad_norm": 23.49091911315918, + "learning_rate": 5e-05, + "loss": 1.1109, + "num_input_tokens_seen": 414431860, + "step": 6196 + }, + { + "epoch": 0.7030921985815602, + "loss": 1.0648256540298462, + "loss_ce": 0.0057436078786849976, + "loss_iou": 0.451171875, + "loss_num": 0.031494140625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 414431860, + "step": 6196 + }, + { + "epoch": 0.7032056737588652, + "grad_norm": 20.455102920532227, + "learning_rate": 5e-05, + "loss": 1.2522, + "num_input_tokens_seen": 414499600, + "step": 6197 + }, + { + "epoch": 0.7032056737588652, + "loss": 1.3587660789489746, + "loss_ce": 0.005738710984587669, + "loss_iou": 0.546875, + "loss_num": 0.05126953125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 414499600, + "step": 6197 + }, + { + "epoch": 0.7033191489361702, + "grad_norm": 20.13922119140625, + "learning_rate": 5e-05, + "loss": 1.0705, + "num_input_tokens_seen": 414566672, + "step": 6198 + }, + { + "epoch": 0.7033191489361702, + "loss": 0.9585200548171997, + "loss_ce": 0.0034419437870383263, + "loss_iou": 0.416015625, + "loss_num": 0.024658203125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 414566672, + "step": 6198 + }, + { + "epoch": 0.7034326241134752, + "grad_norm": 27.551589965820312, + "learning_rate": 5e-05, + "loss": 1.3132, + "num_input_tokens_seen": 414633136, + "step": 6199 + }, + { + "epoch": 0.7034326241134752, + "loss": 1.283402919769287, + "loss_ce": 0.005082552786916494, + "loss_iou": 0.50390625, + "loss_num": 0.053466796875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 414633136, + "step": 6199 + }, + { + "epoch": 0.7035460992907802, + "grad_norm": 41.07429885864258, + "learning_rate": 5e-05, + "loss": 1.0799, + "num_input_tokens_seen": 414700196, + "step": 6200 + }, + { + "epoch": 0.7035460992907802, + "loss": 1.1543073654174805, + "loss_ce": 0.005381603725254536, + "loss_iou": 0.4765625, + "loss_num": 0.038818359375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 414700196, + "step": 6200 + }, + { + "epoch": 0.7036595744680851, + "grad_norm": 32.35556411743164, + "learning_rate": 5e-05, + "loss": 1.3044, + "num_input_tokens_seen": 414767484, + "step": 6201 + }, + { + "epoch": 0.7036595744680851, + "loss": 1.4263522624969482, + "loss_ce": 0.005942175630480051, + "loss_iou": 0.5859375, + "loss_num": 0.04931640625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 414767484, + "step": 6201 + }, + { + "epoch": 0.7037730496453901, + "grad_norm": 21.166656494140625, + "learning_rate": 5e-05, + "loss": 0.9448, + "num_input_tokens_seen": 414835060, + "step": 6202 + }, + { + "epoch": 0.7037730496453901, + "loss": 0.8744680881500244, + "loss_ce": 0.008013068698346615, + "loss_iou": 0.359375, + "loss_num": 0.029541015625, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 414835060, + "step": 6202 + }, + { + "epoch": 0.703886524822695, + "grad_norm": 20.9836368560791, + "learning_rate": 5e-05, + "loss": 1.1745, + "num_input_tokens_seen": 414901288, + "step": 6203 + }, + { + "epoch": 0.703886524822695, + "loss": 1.086612343788147, + "loss_ce": 0.0055576348677277565, + "loss_iou": 0.46875, + "loss_num": 0.0283203125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 414901288, + "step": 6203 + }, + { + "epoch": 0.704, + "grad_norm": 13.099370956420898, + "learning_rate": 5e-05, + "loss": 1.1478, + "num_input_tokens_seen": 414968232, + "step": 6204 + }, + { + "epoch": 0.704, + "loss": 1.252427577972412, + "loss_ce": 0.008775142952799797, + "loss_iou": 0.5, + "loss_num": 0.048095703125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 414968232, + "step": 6204 + }, + { + "epoch": 0.7041134751773049, + "grad_norm": 21.155990600585938, + "learning_rate": 5e-05, + "loss": 1.1406, + "num_input_tokens_seen": 415036548, + "step": 6205 + }, + { + "epoch": 0.7041134751773049, + "loss": 0.958899974822998, + "loss_ce": 0.00943712703883648, + "loss_iou": 0.37109375, + "loss_num": 0.041748046875, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 415036548, + "step": 6205 + }, + { + "epoch": 0.7042269503546099, + "grad_norm": 25.660160064697266, + "learning_rate": 5e-05, + "loss": 1.186, + "num_input_tokens_seen": 415103700, + "step": 6206 + }, + { + "epoch": 0.7042269503546099, + "loss": 1.2313895225524902, + "loss_ce": 0.008245056495070457, + "loss_iou": 0.50390625, + "loss_num": 0.04248046875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 415103700, + "step": 6206 + }, + { + "epoch": 0.7043404255319149, + "grad_norm": 32.635005950927734, + "learning_rate": 5e-05, + "loss": 1.1737, + "num_input_tokens_seen": 415169892, + "step": 6207 + }, + { + "epoch": 0.7043404255319149, + "loss": 1.1293516159057617, + "loss_ce": 0.005816517863422632, + "loss_iou": 0.443359375, + "loss_num": 0.04736328125, + "loss_xval": 1.125, + "num_input_tokens_seen": 415169892, + "step": 6207 + }, + { + "epoch": 0.7044539007092199, + "grad_norm": 37.96401596069336, + "learning_rate": 5e-05, + "loss": 1.3686, + "num_input_tokens_seen": 415237168, + "step": 6208 + }, + { + "epoch": 0.7044539007092199, + "loss": 1.4462668895721436, + "loss_ce": 0.0038841739296913147, + "loss_iou": 0.58984375, + "loss_num": 0.05224609375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 415237168, + "step": 6208 + }, + { + "epoch": 0.7045673758865248, + "grad_norm": 22.401872634887695, + "learning_rate": 5e-05, + "loss": 1.0309, + "num_input_tokens_seen": 415304340, + "step": 6209 + }, + { + "epoch": 0.7045673758865248, + "loss": 1.0340170860290527, + "loss_ce": 0.008626457303762436, + "loss_iou": 0.4453125, + "loss_num": 0.02685546875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 415304340, + "step": 6209 + }, + { + "epoch": 0.7046808510638298, + "grad_norm": 27.447093963623047, + "learning_rate": 5e-05, + "loss": 1.0492, + "num_input_tokens_seen": 415371228, + "step": 6210 + }, + { + "epoch": 0.7046808510638298, + "loss": 1.1939929723739624, + "loss_ce": 0.007469578180462122, + "loss_iou": 0.4921875, + "loss_num": 0.040283203125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 415371228, + "step": 6210 + }, + { + "epoch": 0.7047943262411348, + "grad_norm": 48.30029296875, + "learning_rate": 5e-05, + "loss": 1.1389, + "num_input_tokens_seen": 415438572, + "step": 6211 + }, + { + "epoch": 0.7047943262411348, + "loss": 1.1748312711715698, + "loss_ce": 0.0044210487976670265, + "loss_iou": 0.46875, + "loss_num": 0.047119140625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 415438572, + "step": 6211 + }, + { + "epoch": 0.7049078014184397, + "grad_norm": 30.972976684570312, + "learning_rate": 5e-05, + "loss": 1.2466, + "num_input_tokens_seen": 415505068, + "step": 6212 + }, + { + "epoch": 0.7049078014184397, + "loss": 1.3966224193572998, + "loss_ce": 0.009415403939783573, + "loss_iou": 0.58984375, + "loss_num": 0.041015625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 415505068, + "step": 6212 + }, + { + "epoch": 0.7050212765957447, + "grad_norm": 48.21162796020508, + "learning_rate": 5e-05, + "loss": 1.1335, + "num_input_tokens_seen": 415571316, + "step": 6213 + }, + { + "epoch": 0.7050212765957447, + "loss": 1.327458143234253, + "loss_ce": 0.012028388679027557, + "loss_iou": 0.48828125, + "loss_num": 0.06787109375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 415571316, + "step": 6213 + }, + { + "epoch": 0.7051347517730496, + "grad_norm": 17.726848602294922, + "learning_rate": 5e-05, + "loss": 1.1399, + "num_input_tokens_seen": 415638940, + "step": 6214 + }, + { + "epoch": 0.7051347517730496, + "loss": 1.3830180168151855, + "loss_ce": 0.005576592404395342, + "loss_iou": 0.55859375, + "loss_num": 0.05224609375, + "loss_xval": 1.375, + "num_input_tokens_seen": 415638940, + "step": 6214 + }, + { + "epoch": 0.7052482269503546, + "grad_norm": 27.106910705566406, + "learning_rate": 5e-05, + "loss": 1.0269, + "num_input_tokens_seen": 415705204, + "step": 6215 + }, + { + "epoch": 0.7052482269503546, + "loss": 1.0313533544540405, + "loss_ce": 0.0030330726876854897, + "loss_iou": 0.421875, + "loss_num": 0.036865234375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 415705204, + "step": 6215 + }, + { + "epoch": 0.7053617021276596, + "grad_norm": 26.88545799255371, + "learning_rate": 5e-05, + "loss": 1.1761, + "num_input_tokens_seen": 415772120, + "step": 6216 + }, + { + "epoch": 0.7053617021276596, + "loss": 1.2022629976272583, + "loss_ce": 0.01036851480603218, + "loss_iou": 0.48046875, + "loss_num": 0.0458984375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 415772120, + "step": 6216 + }, + { + "epoch": 0.7054751773049646, + "grad_norm": 33.487571716308594, + "learning_rate": 5e-05, + "loss": 1.3641, + "num_input_tokens_seen": 415838340, + "step": 6217 + }, + { + "epoch": 0.7054751773049646, + "loss": 1.353345513343811, + "loss_ce": 0.008130733855068684, + "loss_iou": 0.5390625, + "loss_num": 0.053955078125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 415838340, + "step": 6217 + }, + { + "epoch": 0.7055886524822695, + "grad_norm": 41.47890090942383, + "learning_rate": 5e-05, + "loss": 1.1547, + "num_input_tokens_seen": 415905744, + "step": 6218 + }, + { + "epoch": 0.7055886524822695, + "loss": 1.2434451580047607, + "loss_ce": 0.004187296610325575, + "loss_iou": 0.515625, + "loss_num": 0.0419921875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 415905744, + "step": 6218 + }, + { + "epoch": 0.7057021276595745, + "grad_norm": 42.004676818847656, + "learning_rate": 5e-05, + "loss": 1.1162, + "num_input_tokens_seen": 415972808, + "step": 6219 + }, + { + "epoch": 0.7057021276595745, + "loss": 1.2406072616577148, + "loss_ce": 0.010138536803424358, + "loss_iou": 0.51953125, + "loss_num": 0.03857421875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 415972808, + "step": 6219 + }, + { + "epoch": 0.7058156028368794, + "grad_norm": 28.3791446685791, + "learning_rate": 5e-05, + "loss": 1.1378, + "num_input_tokens_seen": 416039012, + "step": 6220 + }, + { + "epoch": 0.7058156028368794, + "loss": 1.2276318073272705, + "loss_ce": 0.0081493454053998, + "loss_iou": 0.5, + "loss_num": 0.044189453125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 416039012, + "step": 6220 + }, + { + "epoch": 0.7059290780141844, + "grad_norm": 21.201833724975586, + "learning_rate": 5e-05, + "loss": 1.1597, + "num_input_tokens_seen": 416106080, + "step": 6221 + }, + { + "epoch": 0.7059290780141844, + "loss": 0.9810951948165894, + "loss_ce": 0.005509302951395512, + "loss_iou": 0.41015625, + "loss_num": 0.03125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 416106080, + "step": 6221 + }, + { + "epoch": 0.7060425531914893, + "grad_norm": 28.982643127441406, + "learning_rate": 5e-05, + "loss": 1.1308, + "num_input_tokens_seen": 416173060, + "step": 6222 + }, + { + "epoch": 0.7060425531914893, + "loss": 1.2680079936981201, + "loss_ce": 0.002871191129088402, + "loss_iou": 0.490234375, + "loss_num": 0.056884765625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 416173060, + "step": 6222 + }, + { + "epoch": 0.7061560283687943, + "grad_norm": 20.898155212402344, + "learning_rate": 5e-05, + "loss": 1.248, + "num_input_tokens_seen": 416240412, + "step": 6223 + }, + { + "epoch": 0.7061560283687943, + "loss": 1.3298753499984741, + "loss_ce": 0.00834216084331274, + "loss_iou": 0.5234375, + "loss_num": 0.055419921875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 416240412, + "step": 6223 + }, + { + "epoch": 0.7062695035460993, + "grad_norm": 12.19046401977539, + "learning_rate": 5e-05, + "loss": 1.1542, + "num_input_tokens_seen": 416307664, + "step": 6224 + }, + { + "epoch": 0.7062695035460993, + "loss": 1.1401069164276123, + "loss_ce": 0.008026894181966782, + "loss_iou": 0.466796875, + "loss_num": 0.039794921875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 416307664, + "step": 6224 + }, + { + "epoch": 0.7063829787234043, + "grad_norm": 12.682760238647461, + "learning_rate": 5e-05, + "loss": 1.0618, + "num_input_tokens_seen": 416374260, + "step": 6225 + }, + { + "epoch": 0.7063829787234043, + "loss": 1.0882818698883057, + "loss_ce": 0.00918037910014391, + "loss_iou": 0.419921875, + "loss_num": 0.0478515625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 416374260, + "step": 6225 + }, + { + "epoch": 0.7064964539007093, + "grad_norm": 32.27968215942383, + "learning_rate": 5e-05, + "loss": 1.1728, + "num_input_tokens_seen": 416440352, + "step": 6226 + }, + { + "epoch": 0.7064964539007093, + "loss": 1.3569109439849854, + "loss_ce": 0.009254834614694118, + "loss_iou": 0.53125, + "loss_num": 0.056884765625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 416440352, + "step": 6226 + }, + { + "epoch": 0.7066099290780142, + "grad_norm": 29.11907386779785, + "learning_rate": 5e-05, + "loss": 1.0359, + "num_input_tokens_seen": 416507532, + "step": 6227 + }, + { + "epoch": 0.7066099290780142, + "loss": 1.0458365678787231, + "loss_ce": 0.005797503516077995, + "loss_iou": 0.44921875, + "loss_num": 0.0283203125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 416507532, + "step": 6227 + }, + { + "epoch": 0.7067234042553191, + "grad_norm": 20.34391975402832, + "learning_rate": 5e-05, + "loss": 1.2014, + "num_input_tokens_seen": 416574684, + "step": 6228 + }, + { + "epoch": 0.7067234042553191, + "loss": 1.1157602071762085, + "loss_ce": 0.005408707074820995, + "loss_iou": 0.458984375, + "loss_num": 0.038818359375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 416574684, + "step": 6228 + }, + { + "epoch": 0.7068368794326241, + "grad_norm": 29.320709228515625, + "learning_rate": 5e-05, + "loss": 1.0883, + "num_input_tokens_seen": 416641728, + "step": 6229 + }, + { + "epoch": 0.7068368794326241, + "loss": 1.0334112644195557, + "loss_ce": 0.007532279938459396, + "loss_iou": 0.41796875, + "loss_num": 0.037841796875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 416641728, + "step": 6229 + }, + { + "epoch": 0.706950354609929, + "grad_norm": 40.539634704589844, + "learning_rate": 5e-05, + "loss": 1.3287, + "num_input_tokens_seen": 416709424, + "step": 6230 + }, + { + "epoch": 0.706950354609929, + "loss": 1.2870991230010986, + "loss_ce": 0.005360801238566637, + "loss_iou": 0.51171875, + "loss_num": 0.051025390625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 416709424, + "step": 6230 + }, + { + "epoch": 0.707063829787234, + "grad_norm": 41.459712982177734, + "learning_rate": 5e-05, + "loss": 1.3674, + "num_input_tokens_seen": 416777220, + "step": 6231 + }, + { + "epoch": 0.707063829787234, + "loss": 1.303849458694458, + "loss_ce": 0.007951086387038231, + "loss_iou": 0.53515625, + "loss_num": 0.044677734375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 416777220, + "step": 6231 + }, + { + "epoch": 0.707177304964539, + "grad_norm": 30.2033748626709, + "learning_rate": 5e-05, + "loss": 1.2892, + "num_input_tokens_seen": 416844620, + "step": 6232 + }, + { + "epoch": 0.707177304964539, + "loss": 1.3245842456817627, + "loss_ce": 0.008177928626537323, + "loss_iou": 0.5234375, + "loss_num": 0.0537109375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 416844620, + "step": 6232 + }, + { + "epoch": 0.707290780141844, + "grad_norm": 18.677112579345703, + "learning_rate": 5e-05, + "loss": 1.1532, + "num_input_tokens_seen": 416911152, + "step": 6233 + }, + { + "epoch": 0.707290780141844, + "loss": 1.1231944561004639, + "loss_ce": 0.005518668331205845, + "loss_iou": 0.427734375, + "loss_num": 0.052490234375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 416911152, + "step": 6233 + }, + { + "epoch": 0.707404255319149, + "grad_norm": 21.43694496154785, + "learning_rate": 5e-05, + "loss": 1.0972, + "num_input_tokens_seen": 416977940, + "step": 6234 + }, + { + "epoch": 0.707404255319149, + "loss": 1.0605661869049072, + "loss_ce": 0.007587616331875324, + "loss_iou": 0.435546875, + "loss_num": 0.036865234375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 416977940, + "step": 6234 + }, + { + "epoch": 0.707517730496454, + "grad_norm": 20.547649383544922, + "learning_rate": 5e-05, + "loss": 1.2236, + "num_input_tokens_seen": 417045840, + "step": 6235 + }, + { + "epoch": 0.707517730496454, + "loss": 1.1659317016601562, + "loss_ce": 0.008216910995543003, + "loss_iou": 0.50390625, + "loss_num": 0.030517578125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 417045840, + "step": 6235 + }, + { + "epoch": 0.7076312056737588, + "grad_norm": 32.30154037475586, + "learning_rate": 5e-05, + "loss": 1.0347, + "num_input_tokens_seen": 417113268, + "step": 6236 + }, + { + "epoch": 0.7076312056737588, + "loss": 1.1697357892990112, + "loss_ce": 0.007138142362236977, + "loss_iou": 0.4609375, + "loss_num": 0.048583984375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 417113268, + "step": 6236 + }, + { + "epoch": 0.7077446808510638, + "grad_norm": 25.175704956054688, + "learning_rate": 5e-05, + "loss": 1.1574, + "num_input_tokens_seen": 417179976, + "step": 6237 + }, + { + "epoch": 0.7077446808510638, + "loss": 1.0985212326049805, + "loss_ce": 0.0040388572961091995, + "loss_iou": 0.4140625, + "loss_num": 0.053466796875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 417179976, + "step": 6237 + }, + { + "epoch": 0.7078581560283688, + "grad_norm": 30.714794158935547, + "learning_rate": 5e-05, + "loss": 1.2408, + "num_input_tokens_seen": 417247824, + "step": 6238 + }, + { + "epoch": 0.7078581560283688, + "loss": 1.4619019031524658, + "loss_ce": 0.006823802832514048, + "loss_iou": 0.59375, + "loss_num": 0.053466796875, + "loss_xval": 1.453125, + "num_input_tokens_seen": 417247824, + "step": 6238 + }, + { + "epoch": 0.7079716312056737, + "grad_norm": 38.122127532958984, + "learning_rate": 5e-05, + "loss": 1.1281, + "num_input_tokens_seen": 417313880, + "step": 6239 + }, + { + "epoch": 0.7079716312056737, + "loss": 1.1973559856414795, + "loss_ce": 0.007414678111672401, + "loss_iou": 0.5, + "loss_num": 0.0380859375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 417313880, + "step": 6239 + }, + { + "epoch": 0.7080851063829787, + "grad_norm": 35.540130615234375, + "learning_rate": 5e-05, + "loss": 1.1924, + "num_input_tokens_seen": 417381060, + "step": 6240 + }, + { + "epoch": 0.7080851063829787, + "loss": 1.2535008192062378, + "loss_ce": 0.005453946068882942, + "loss_iou": 0.53515625, + "loss_num": 0.035888671875, + "loss_xval": 1.25, + "num_input_tokens_seen": 417381060, + "step": 6240 + }, + { + "epoch": 0.7081985815602837, + "grad_norm": 29.54172706604004, + "learning_rate": 5e-05, + "loss": 1.5943, + "num_input_tokens_seen": 417447544, + "step": 6241 + }, + { + "epoch": 0.7081985815602837, + "loss": 1.6655123233795166, + "loss_ce": 0.00828571803867817, + "loss_iou": 0.671875, + "loss_num": 0.061767578125, + "loss_xval": 1.65625, + "num_input_tokens_seen": 417447544, + "step": 6241 + }, + { + "epoch": 0.7083120567375887, + "grad_norm": 30.394641876220703, + "learning_rate": 5e-05, + "loss": 1.1538, + "num_input_tokens_seen": 417513360, + "step": 6242 + }, + { + "epoch": 0.7083120567375887, + "loss": 1.0336787700653076, + "loss_ce": 0.006823232863098383, + "loss_iou": 0.427734375, + "loss_num": 0.0341796875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 417513360, + "step": 6242 + }, + { + "epoch": 0.7084255319148937, + "grad_norm": 19.896421432495117, + "learning_rate": 5e-05, + "loss": 1.2978, + "num_input_tokens_seen": 417579520, + "step": 6243 + }, + { + "epoch": 0.7084255319148937, + "loss": 1.0763013362884521, + "loss_ce": 0.009071039035916328, + "loss_iou": 0.439453125, + "loss_num": 0.03759765625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 417579520, + "step": 6243 + }, + { + "epoch": 0.7085390070921986, + "grad_norm": 38.2544059753418, + "learning_rate": 5e-05, + "loss": 1.2368, + "num_input_tokens_seen": 417647212, + "step": 6244 + }, + { + "epoch": 0.7085390070921986, + "loss": 1.1918022632598877, + "loss_ce": 0.011138305068016052, + "loss_iou": 0.515625, + "loss_num": 0.0301513671875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 417647212, + "step": 6244 + }, + { + "epoch": 0.7086524822695035, + "grad_norm": 31.881303787231445, + "learning_rate": 5e-05, + "loss": 1.2267, + "num_input_tokens_seen": 417715316, + "step": 6245 + }, + { + "epoch": 0.7086524822695035, + "loss": 1.258184552192688, + "loss_ce": 0.005254898685961962, + "loss_iou": 0.51953125, + "loss_num": 0.04248046875, + "loss_xval": 1.25, + "num_input_tokens_seen": 417715316, + "step": 6245 + }, + { + "epoch": 0.7087659574468085, + "grad_norm": 27.5345516204834, + "learning_rate": 5e-05, + "loss": 1.1781, + "num_input_tokens_seen": 417783224, + "step": 6246 + }, + { + "epoch": 0.7087659574468085, + "loss": 1.326878309249878, + "loss_ce": 0.006077612284570932, + "loss_iou": 0.5546875, + "loss_num": 0.0419921875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 417783224, + "step": 6246 + }, + { + "epoch": 0.7088794326241135, + "grad_norm": 19.752485275268555, + "learning_rate": 5e-05, + "loss": 1.1507, + "num_input_tokens_seen": 417850428, + "step": 6247 + }, + { + "epoch": 0.7088794326241135, + "loss": 1.2247207164764404, + "loss_ce": 0.007435597479343414, + "loss_iou": 0.484375, + "loss_num": 0.0498046875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 417850428, + "step": 6247 + }, + { + "epoch": 0.7089929078014184, + "grad_norm": 24.436405181884766, + "learning_rate": 5e-05, + "loss": 1.4694, + "num_input_tokens_seen": 417917764, + "step": 6248 + }, + { + "epoch": 0.7089929078014184, + "loss": 1.4265615940093994, + "loss_ce": 0.0032216934487223625, + "loss_iou": 0.57421875, + "loss_num": 0.0546875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 417917764, + "step": 6248 + }, + { + "epoch": 0.7091063829787234, + "grad_norm": 30.920045852661133, + "learning_rate": 5e-05, + "loss": 1.214, + "num_input_tokens_seen": 417984884, + "step": 6249 + }, + { + "epoch": 0.7091063829787234, + "loss": 1.0852495431900024, + "loss_ce": 0.006147958338260651, + "loss_iou": 0.439453125, + "loss_num": 0.039794921875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 417984884, + "step": 6249 + }, + { + "epoch": 0.7092198581560284, + "grad_norm": 41.382686614990234, + "learning_rate": 5e-05, + "loss": 1.1719, + "num_input_tokens_seen": 418051872, + "step": 6250 + }, + { + "epoch": 0.7092198581560284, + "eval_seeclick_CIoU": 0.38480956852436066, + "eval_seeclick_GIoU": 0.3520917296409607, + "eval_seeclick_IoU": 0.477814644575119, + "eval_seeclick_MAE_all": 0.16267523914575577, + "eval_seeclick_MAE_h": 0.07017900608479977, + "eval_seeclick_MAE_w": 0.13699553906917572, + "eval_seeclick_MAE_x_boxes": 0.24483826756477356, + "eval_seeclick_MAE_y_boxes": 0.13511308282613754, + "eval_seeclick_NUM_probability": 0.9999660849571228, + "eval_seeclick_inside_bbox": 0.6614583432674408, + "eval_seeclick_loss": 2.45108699798584, + "eval_seeclick_loss_ce": 0.01430233521386981, + "eval_seeclick_loss_iou": 0.829345703125, + "eval_seeclick_loss_num": 0.155975341796875, + "eval_seeclick_loss_xval": 2.4365234375, + "eval_seeclick_runtime": 66.8189, + "eval_seeclick_samples_per_second": 0.703, + "eval_seeclick_steps_per_second": 0.03, + "num_input_tokens_seen": 418051872, + "step": 6250 + }, + { + "epoch": 0.7092198581560284, + "eval_icons_CIoU": 0.47226452827453613, + "eval_icons_GIoU": 0.47570258378982544, + "eval_icons_IoU": 0.5251254439353943, + "eval_icons_MAE_all": 0.13860951364040375, + "eval_icons_MAE_h": 0.09243414923548698, + "eval_icons_MAE_w": 0.08867059648036957, + "eval_icons_MAE_x_boxes": 0.1308673769235611, + "eval_icons_MAE_y_boxes": 0.0971447043120861, + "eval_icons_NUM_probability": 0.999963104724884, + "eval_icons_inside_bbox": 0.7326388955116272, + "eval_icons_loss": 2.3037750720977783, + "eval_icons_loss_ce": 2.379915622441331e-05, + "eval_icons_loss_iou": 0.79638671875, + "eval_icons_loss_num": 0.1364459991455078, + "eval_icons_loss_xval": 2.2763671875, + "eval_icons_runtime": 68.3014, + "eval_icons_samples_per_second": 0.732, + "eval_icons_steps_per_second": 0.029, + "num_input_tokens_seen": 418051872, + "step": 6250 + }, + { + "epoch": 0.7092198581560284, + "eval_screenspot_CIoU": 0.2781159778436025, + "eval_screenspot_GIoU": 0.2489770452181498, + "eval_screenspot_IoU": 0.38194623589515686, + "eval_screenspot_MAE_all": 0.21064459780852, + "eval_screenspot_MAE_h": 0.13047186533610025, + "eval_screenspot_MAE_w": 0.15751875936985016, + "eval_screenspot_MAE_x_boxes": 0.313351442416509, + "eval_screenspot_MAE_y_boxes": 0.11701064060131709, + "eval_screenspot_NUM_probability": 0.9998146891593933, + "eval_screenspot_inside_bbox": 0.5958333412806193, + "eval_screenspot_loss": 2.864624500274658, + "eval_screenspot_loss_ce": 0.015840206605692703, + "eval_screenspot_loss_iou": 0.9034830729166666, + "eval_screenspot_loss_num": 0.21947224934895834, + "eval_screenspot_loss_xval": 2.90234375, + "eval_screenspot_runtime": 118.1201, + "eval_screenspot_samples_per_second": 0.753, + "eval_screenspot_steps_per_second": 0.025, + "num_input_tokens_seen": 418051872, + "step": 6250 + }, + { + "epoch": 0.7092198581560284, + "eval_compot_CIoU": 0.22297292202711105, + "eval_compot_GIoU": 0.21074732393026352, + "eval_compot_IoU": 0.32132071256637573, + "eval_compot_MAE_all": 0.2079480215907097, + "eval_compot_MAE_h": 0.09371371567249298, + "eval_compot_MAE_w": 0.1967419907450676, + "eval_compot_MAE_x_boxes": 0.17392908036708832, + "eval_compot_MAE_y_boxes": 0.17184551805257797, + "eval_compot_NUM_probability": 0.9999378621578217, + "eval_compot_inside_bbox": 0.5121527910232544, + "eval_compot_loss": 3.0211479663848877, + "eval_compot_loss_ce": 0.005874468712136149, + "eval_compot_loss_iou": 0.998779296875, + "eval_compot_loss_num": 0.19189453125, + "eval_compot_loss_xval": 2.955078125, + "eval_compot_runtime": 69.5798, + "eval_compot_samples_per_second": 0.719, + "eval_compot_steps_per_second": 0.029, + "num_input_tokens_seen": 418051872, + "step": 6250 + }, + { + "epoch": 0.7092198581560284, + "loss": 2.9424571990966797, + "loss_ce": 0.006910241674631834, + "loss_iou": 0.99609375, + "loss_num": 0.1884765625, + "loss_xval": 2.9375, + "num_input_tokens_seen": 418051872, + "step": 6250 + }, + { + "epoch": 0.7093333333333334, + "grad_norm": 27.365983963012695, + "learning_rate": 5e-05, + "loss": 1.4128, + "num_input_tokens_seen": 418118864, + "step": 6251 + }, + { + "epoch": 0.7093333333333334, + "loss": 1.5587499141693115, + "loss_ce": 0.006503930781036615, + "loss_iou": 0.6171875, + "loss_num": 0.06298828125, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 418118864, + "step": 6251 + }, + { + "epoch": 0.7094468085106383, + "grad_norm": 14.143153190612793, + "learning_rate": 5e-05, + "loss": 1.1708, + "num_input_tokens_seen": 418187216, + "step": 6252 + }, + { + "epoch": 0.7094468085106383, + "loss": 1.2211081981658936, + "loss_ce": 0.0033348402939736843, + "loss_iou": 0.466796875, + "loss_num": 0.05712890625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 418187216, + "step": 6252 + }, + { + "epoch": 0.7095602836879432, + "grad_norm": 15.445548057556152, + "learning_rate": 5e-05, + "loss": 1.335, + "num_input_tokens_seen": 418253716, + "step": 6253 + }, + { + "epoch": 0.7095602836879432, + "loss": 1.245977759361267, + "loss_ce": 0.007208178285509348, + "loss_iou": 0.46484375, + "loss_num": 0.062255859375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 418253716, + "step": 6253 + }, + { + "epoch": 0.7096737588652482, + "grad_norm": 15.163439750671387, + "learning_rate": 5e-05, + "loss": 1.1111, + "num_input_tokens_seen": 418320748, + "step": 6254 + }, + { + "epoch": 0.7096737588652482, + "loss": 1.0412571430206299, + "loss_ce": 0.01000718679279089, + "loss_iou": 0.40625, + "loss_num": 0.043701171875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 418320748, + "step": 6254 + }, + { + "epoch": 0.7097872340425532, + "grad_norm": 24.092405319213867, + "learning_rate": 5e-05, + "loss": 1.0794, + "num_input_tokens_seen": 418387488, + "step": 6255 + }, + { + "epoch": 0.7097872340425532, + "loss": 1.2048382759094238, + "loss_ce": 0.009037463925778866, + "loss_iou": 0.474609375, + "loss_num": 0.049560546875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 418387488, + "step": 6255 + }, + { + "epoch": 0.7099007092198582, + "grad_norm": 29.00006103515625, + "learning_rate": 5e-05, + "loss": 1.1272, + "num_input_tokens_seen": 418455076, + "step": 6256 + }, + { + "epoch": 0.7099007092198582, + "loss": 1.2166056632995605, + "loss_ce": 0.008109547197818756, + "loss_iou": 0.5, + "loss_num": 0.04150390625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 418455076, + "step": 6256 + }, + { + "epoch": 0.7100141843971631, + "grad_norm": 15.206698417663574, + "learning_rate": 5e-05, + "loss": 1.3012, + "num_input_tokens_seen": 418523232, + "step": 6257 + }, + { + "epoch": 0.7100141843971631, + "loss": 1.1007049083709717, + "loss_ce": 0.0059783607721328735, + "loss_iou": 0.431640625, + "loss_num": 0.046142578125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 418523232, + "step": 6257 + }, + { + "epoch": 0.7101276595744681, + "grad_norm": 14.371757507324219, + "learning_rate": 5e-05, + "loss": 1.193, + "num_input_tokens_seen": 418589884, + "step": 6258 + }, + { + "epoch": 0.7101276595744681, + "loss": 1.2276637554168701, + "loss_ce": 0.0076930164359509945, + "loss_iou": 0.490234375, + "loss_num": 0.0478515625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 418589884, + "step": 6258 + }, + { + "epoch": 0.7102411347517731, + "grad_norm": 22.930089950561523, + "learning_rate": 5e-05, + "loss": 1.2148, + "num_input_tokens_seen": 418657020, + "step": 6259 + }, + { + "epoch": 0.7102411347517731, + "loss": 1.3327467441558838, + "loss_ce": 0.007551420014351606, + "loss_iou": 0.5390625, + "loss_num": 0.05029296875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 418657020, + "step": 6259 + }, + { + "epoch": 0.7103546099290781, + "grad_norm": 13.46529769897461, + "learning_rate": 5e-05, + "loss": 1.1363, + "num_input_tokens_seen": 418723632, + "step": 6260 + }, + { + "epoch": 0.7103546099290781, + "loss": 1.00886869430542, + "loss_ce": 0.005939001217484474, + "loss_iou": 0.416015625, + "loss_num": 0.0341796875, + "loss_xval": 1.0, + "num_input_tokens_seen": 418723632, + "step": 6260 + }, + { + "epoch": 0.7104680851063829, + "grad_norm": 22.837421417236328, + "learning_rate": 5e-05, + "loss": 1.2586, + "num_input_tokens_seen": 418789588, + "step": 6261 + }, + { + "epoch": 0.7104680851063829, + "loss": 1.1959638595581055, + "loss_ce": 0.00651068240404129, + "loss_iou": 0.490234375, + "loss_num": 0.042236328125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 418789588, + "step": 6261 + }, + { + "epoch": 0.7105815602836879, + "grad_norm": 30.954946517944336, + "learning_rate": 5e-05, + "loss": 1.2509, + "num_input_tokens_seen": 418856096, + "step": 6262 + }, + { + "epoch": 0.7105815602836879, + "loss": 1.1812176704406738, + "loss_ce": 0.0034833773970603943, + "loss_iou": 0.45703125, + "loss_num": 0.052490234375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 418856096, + "step": 6262 + }, + { + "epoch": 0.7106950354609929, + "grad_norm": 36.47529983520508, + "learning_rate": 5e-05, + "loss": 1.1531, + "num_input_tokens_seen": 418923596, + "step": 6263 + }, + { + "epoch": 0.7106950354609929, + "loss": 1.141351580619812, + "loss_ce": 0.008050807751715183, + "loss_iou": 0.4921875, + "loss_num": 0.0299072265625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 418923596, + "step": 6263 + }, + { + "epoch": 0.7108085106382979, + "grad_norm": 36.67436599731445, + "learning_rate": 5e-05, + "loss": 1.2119, + "num_input_tokens_seen": 418989252, + "step": 6264 + }, + { + "epoch": 0.7108085106382979, + "loss": 1.3001861572265625, + "loss_ce": 0.007705606985837221, + "loss_iou": 0.49609375, + "loss_num": 0.060302734375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 418989252, + "step": 6264 + }, + { + "epoch": 0.7109219858156028, + "grad_norm": 31.23922348022461, + "learning_rate": 5e-05, + "loss": 1.2445, + "num_input_tokens_seen": 419055592, + "step": 6265 + }, + { + "epoch": 0.7109219858156028, + "loss": 1.1702213287353516, + "loss_ce": 0.0076236422173678875, + "loss_iou": 0.474609375, + "loss_num": 0.04296875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 419055592, + "step": 6265 + }, + { + "epoch": 0.7110354609929078, + "grad_norm": 29.768115997314453, + "learning_rate": 5e-05, + "loss": 1.3096, + "num_input_tokens_seen": 419122284, + "step": 6266 + }, + { + "epoch": 0.7110354609929078, + "loss": 1.2628123760223389, + "loss_ce": 0.006952949799597263, + "loss_iou": 0.5234375, + "loss_num": 0.041259765625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 419122284, + "step": 6266 + }, + { + "epoch": 0.7111489361702128, + "grad_norm": 30.9725341796875, + "learning_rate": 5e-05, + "loss": 1.2461, + "num_input_tokens_seen": 419188652, + "step": 6267 + }, + { + "epoch": 0.7111489361702128, + "loss": 1.1941287517547607, + "loss_ce": 0.007605349645018578, + "loss_iou": 0.5, + "loss_num": 0.03759765625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 419188652, + "step": 6267 + }, + { + "epoch": 0.7112624113475178, + "grad_norm": 26.022661209106445, + "learning_rate": 5e-05, + "loss": 1.2523, + "num_input_tokens_seen": 419255124, + "step": 6268 + }, + { + "epoch": 0.7112624113475178, + "loss": 1.1162358522415161, + "loss_ce": 0.006860812194645405, + "loss_iou": 0.486328125, + "loss_num": 0.0277099609375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 419255124, + "step": 6268 + }, + { + "epoch": 0.7113758865248226, + "grad_norm": 30.807941436767578, + "learning_rate": 5e-05, + "loss": 1.1602, + "num_input_tokens_seen": 419321820, + "step": 6269 + }, + { + "epoch": 0.7113758865248226, + "loss": 1.0725533962249756, + "loss_ce": 0.008344447240233421, + "loss_iou": 0.455078125, + "loss_num": 0.03076171875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 419321820, + "step": 6269 + }, + { + "epoch": 0.7114893617021276, + "grad_norm": 20.972150802612305, + "learning_rate": 5e-05, + "loss": 1.2741, + "num_input_tokens_seen": 419388740, + "step": 6270 + }, + { + "epoch": 0.7114893617021276, + "loss": 1.5241584777832031, + "loss_ce": 0.011463227681815624, + "loss_iou": 0.59765625, + "loss_num": 0.06396484375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 419388740, + "step": 6270 + }, + { + "epoch": 0.7116028368794326, + "grad_norm": 22.98398208618164, + "learning_rate": 5e-05, + "loss": 1.0768, + "num_input_tokens_seen": 419455424, + "step": 6271 + }, + { + "epoch": 0.7116028368794326, + "loss": 0.9815253615379333, + "loss_ce": 0.0039862762205302715, + "loss_iou": 0.412109375, + "loss_num": 0.0306396484375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 419455424, + "step": 6271 + }, + { + "epoch": 0.7117163120567376, + "grad_norm": 15.63200855255127, + "learning_rate": 5e-05, + "loss": 1.2573, + "num_input_tokens_seen": 419522660, + "step": 6272 + }, + { + "epoch": 0.7117163120567376, + "loss": 1.4261934757232666, + "loss_ce": 0.01213107630610466, + "loss_iou": 0.56640625, + "loss_num": 0.056396484375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 419522660, + "step": 6272 + }, + { + "epoch": 0.7118297872340426, + "grad_norm": 12.526862144470215, + "learning_rate": 5e-05, + "loss": 1.0815, + "num_input_tokens_seen": 419589080, + "step": 6273 + }, + { + "epoch": 0.7118297872340426, + "loss": 0.9259237051010132, + "loss_ce": 0.0069784438237547874, + "loss_iou": 0.392578125, + "loss_num": 0.026611328125, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 419589080, + "step": 6273 + }, + { + "epoch": 0.7119432624113475, + "grad_norm": 15.353845596313477, + "learning_rate": 5e-05, + "loss": 1.023, + "num_input_tokens_seen": 419656396, + "step": 6274 + }, + { + "epoch": 0.7119432624113475, + "loss": 1.132335901260376, + "loss_ce": 0.00684759858995676, + "loss_iou": 0.419921875, + "loss_num": 0.056640625, + "loss_xval": 1.125, + "num_input_tokens_seen": 419656396, + "step": 6274 + }, + { + "epoch": 0.7120567375886525, + "grad_norm": 24.48352813720703, + "learning_rate": 5e-05, + "loss": 1.1248, + "num_input_tokens_seen": 419723324, + "step": 6275 + }, + { + "epoch": 0.7120567375886525, + "loss": 1.1325099468231201, + "loss_ce": 0.008486506529152393, + "loss_iou": 0.453125, + "loss_num": 0.044189453125, + "loss_xval": 1.125, + "num_input_tokens_seen": 419723324, + "step": 6275 + }, + { + "epoch": 0.7121702127659575, + "grad_norm": 26.992801666259766, + "learning_rate": 5e-05, + "loss": 1.3793, + "num_input_tokens_seen": 419788992, + "step": 6276 + }, + { + "epoch": 0.7121702127659575, + "loss": 1.3243582248687744, + "loss_ce": 0.00551061425358057, + "loss_iou": 0.54296875, + "loss_num": 0.046142578125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 419788992, + "step": 6276 + }, + { + "epoch": 0.7122836879432625, + "grad_norm": 42.01380920410156, + "learning_rate": 5e-05, + "loss": 1.109, + "num_input_tokens_seen": 419855988, + "step": 6277 + }, + { + "epoch": 0.7122836879432625, + "loss": 1.0754942893981934, + "loss_ce": 0.0049376701936125755, + "loss_iou": 0.4375, + "loss_num": 0.038818359375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 419855988, + "step": 6277 + }, + { + "epoch": 0.7123971631205673, + "grad_norm": 26.351022720336914, + "learning_rate": 5e-05, + "loss": 1.0564, + "num_input_tokens_seen": 419922404, + "step": 6278 + }, + { + "epoch": 0.7123971631205673, + "loss": 0.9376319646835327, + "loss_ce": 0.005747197195887566, + "loss_iou": 0.39453125, + "loss_num": 0.029052734375, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 419922404, + "step": 6278 + }, + { + "epoch": 0.7125106382978723, + "grad_norm": 36.94272994995117, + "learning_rate": 5e-05, + "loss": 1.1678, + "num_input_tokens_seen": 419988576, + "step": 6279 + }, + { + "epoch": 0.7125106382978723, + "loss": 1.181579351425171, + "loss_ce": 0.007262923289090395, + "loss_iou": 0.486328125, + "loss_num": 0.04052734375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 419988576, + "step": 6279 + }, + { + "epoch": 0.7126241134751773, + "grad_norm": 35.991615295410156, + "learning_rate": 5e-05, + "loss": 1.2954, + "num_input_tokens_seen": 420055876, + "step": 6280 + }, + { + "epoch": 0.7126241134751773, + "loss": 1.2447270154953003, + "loss_ce": 0.006445786450058222, + "loss_iou": 0.5078125, + "loss_num": 0.04443359375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 420055876, + "step": 6280 + }, + { + "epoch": 0.7127375886524823, + "grad_norm": 35.6984748840332, + "learning_rate": 5e-05, + "loss": 1.2277, + "num_input_tokens_seen": 420123244, + "step": 6281 + }, + { + "epoch": 0.7127375886524823, + "loss": 1.1897361278533936, + "loss_ce": 0.006142427213490009, + "loss_iou": 0.50390625, + "loss_num": 0.035400390625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 420123244, + "step": 6281 + }, + { + "epoch": 0.7128510638297872, + "grad_norm": 34.83245086669922, + "learning_rate": 5e-05, + "loss": 1.135, + "num_input_tokens_seen": 420190344, + "step": 6282 + }, + { + "epoch": 0.7128510638297872, + "loss": 1.125506043434143, + "loss_ce": 0.0058771767653524876, + "loss_iou": 0.4453125, + "loss_num": 0.0458984375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 420190344, + "step": 6282 + }, + { + "epoch": 0.7129645390070922, + "grad_norm": 39.610904693603516, + "learning_rate": 5e-05, + "loss": 1.3217, + "num_input_tokens_seen": 420257236, + "step": 6283 + }, + { + "epoch": 0.7129645390070922, + "loss": 1.3264861106872559, + "loss_ce": 0.009591634385287762, + "loss_iou": 0.546875, + "loss_num": 0.0439453125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 420257236, + "step": 6283 + }, + { + "epoch": 0.7130780141843972, + "grad_norm": 31.741132736206055, + "learning_rate": 5e-05, + "loss": 1.2336, + "num_input_tokens_seen": 420324044, + "step": 6284 + }, + { + "epoch": 0.7130780141843972, + "loss": 1.2693027257919312, + "loss_ce": 0.010513678193092346, + "loss_iou": 0.52734375, + "loss_num": 0.0400390625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 420324044, + "step": 6284 + }, + { + "epoch": 0.7131914893617022, + "grad_norm": 74.85464477539062, + "learning_rate": 5e-05, + "loss": 1.176, + "num_input_tokens_seen": 420390804, + "step": 6285 + }, + { + "epoch": 0.7131914893617022, + "loss": 1.2606420516967773, + "loss_ce": 0.00917716883122921, + "loss_iou": 0.5234375, + "loss_num": 0.04150390625, + "loss_xval": 1.25, + "num_input_tokens_seen": 420390804, + "step": 6285 + }, + { + "epoch": 0.713304964539007, + "grad_norm": 24.296588897705078, + "learning_rate": 5e-05, + "loss": 1.3898, + "num_input_tokens_seen": 420457572, + "step": 6286 + }, + { + "epoch": 0.713304964539007, + "loss": 1.5241879224777222, + "loss_ce": 0.01246910635381937, + "loss_iou": 0.6171875, + "loss_num": 0.054931640625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 420457572, + "step": 6286 + }, + { + "epoch": 0.713418439716312, + "grad_norm": 17.519866943359375, + "learning_rate": 5e-05, + "loss": 1.251, + "num_input_tokens_seen": 420524360, + "step": 6287 + }, + { + "epoch": 0.713418439716312, + "loss": 1.220780611038208, + "loss_ce": 0.008378332480788231, + "loss_iou": 0.466796875, + "loss_num": 0.0556640625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 420524360, + "step": 6287 + }, + { + "epoch": 0.713531914893617, + "grad_norm": 21.04935646057129, + "learning_rate": 5e-05, + "loss": 1.1399, + "num_input_tokens_seen": 420591528, + "step": 6288 + }, + { + "epoch": 0.713531914893617, + "loss": 1.0480504035949707, + "loss_ce": 0.004105155821889639, + "loss_iou": 0.421875, + "loss_num": 0.04052734375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 420591528, + "step": 6288 + }, + { + "epoch": 0.713645390070922, + "grad_norm": 38.06301498413086, + "learning_rate": 5e-05, + "loss": 1.1624, + "num_input_tokens_seen": 420657864, + "step": 6289 + }, + { + "epoch": 0.713645390070922, + "loss": 1.2005364894866943, + "loss_ce": 0.008642081171274185, + "loss_iou": 0.5078125, + "loss_num": 0.035400390625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 420657864, + "step": 6289 + }, + { + "epoch": 0.713758865248227, + "grad_norm": 24.724124908447266, + "learning_rate": 5e-05, + "loss": 1.1965, + "num_input_tokens_seen": 420726060, + "step": 6290 + }, + { + "epoch": 0.713758865248227, + "loss": 1.286785364151001, + "loss_ce": 0.005535363219678402, + "loss_iou": 0.56640625, + "loss_num": 0.0296630859375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 420726060, + "step": 6290 + }, + { + "epoch": 0.7138723404255319, + "grad_norm": 25.160789489746094, + "learning_rate": 5e-05, + "loss": 1.2048, + "num_input_tokens_seen": 420792064, + "step": 6291 + }, + { + "epoch": 0.7138723404255319, + "loss": 1.2215793132781982, + "loss_ce": 0.017477773129940033, + "loss_iou": 0.5078125, + "loss_num": 0.0380859375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 420792064, + "step": 6291 + }, + { + "epoch": 0.7139858156028369, + "grad_norm": 17.480365753173828, + "learning_rate": 5e-05, + "loss": 1.0233, + "num_input_tokens_seen": 420858436, + "step": 6292 + }, + { + "epoch": 0.7139858156028369, + "loss": 1.0055054426193237, + "loss_ce": 0.005993773695081472, + "loss_iou": 0.431640625, + "loss_num": 0.02685546875, + "loss_xval": 1.0, + "num_input_tokens_seen": 420858436, + "step": 6292 + }, + { + "epoch": 0.7140992907801419, + "grad_norm": 32.625186920166016, + "learning_rate": 5e-05, + "loss": 1.1663, + "num_input_tokens_seen": 420925552, + "step": 6293 + }, + { + "epoch": 0.7140992907801419, + "loss": 1.1455094814300537, + "loss_ce": 0.00488448329269886, + "loss_iou": 0.474609375, + "loss_num": 0.0380859375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 420925552, + "step": 6293 + }, + { + "epoch": 0.7142127659574468, + "grad_norm": 30.568130493164062, + "learning_rate": 5e-05, + "loss": 1.0163, + "num_input_tokens_seen": 420993296, + "step": 6294 + }, + { + "epoch": 0.7142127659574468, + "loss": 0.9386771321296692, + "loss_ce": 0.007860459387302399, + "loss_iou": 0.40234375, + "loss_num": 0.0252685546875, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 420993296, + "step": 6294 + }, + { + "epoch": 0.7143262411347517, + "grad_norm": 20.7175235748291, + "learning_rate": 5e-05, + "loss": 1.1762, + "num_input_tokens_seen": 421060456, + "step": 6295 + }, + { + "epoch": 0.7143262411347517, + "loss": 1.1936006546020508, + "loss_ce": 0.0065888911485672, + "loss_iou": 0.5, + "loss_num": 0.03759765625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 421060456, + "step": 6295 + }, + { + "epoch": 0.7144397163120567, + "grad_norm": 14.407917976379395, + "learning_rate": 5e-05, + "loss": 0.7015, + "num_input_tokens_seen": 421126200, + "step": 6296 + }, + { + "epoch": 0.7144397163120567, + "loss": 0.6109148263931274, + "loss_ce": 0.003798130666837096, + "loss_iou": 0.2431640625, + "loss_num": 0.0242919921875, + "loss_xval": 0.60546875, + "num_input_tokens_seen": 421126200, + "step": 6296 + }, + { + "epoch": 0.7145531914893617, + "grad_norm": 20.569778442382812, + "learning_rate": 5e-05, + "loss": 1.0971, + "num_input_tokens_seen": 421194436, + "step": 6297 + }, + { + "epoch": 0.7145531914893617, + "loss": 1.0428531169891357, + "loss_ce": 0.004645174834877253, + "loss_iou": 0.435546875, + "loss_num": 0.033203125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 421194436, + "step": 6297 + }, + { + "epoch": 0.7146666666666667, + "grad_norm": 21.010009765625, + "learning_rate": 5e-05, + "loss": 0.9832, + "num_input_tokens_seen": 421261892, + "step": 6298 + }, + { + "epoch": 0.7146666666666667, + "loss": 0.9843096733093262, + "loss_ce": 0.007441927678883076, + "loss_iou": 0.396484375, + "loss_num": 0.036865234375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 421261892, + "step": 6298 + }, + { + "epoch": 0.7147801418439717, + "grad_norm": 25.062925338745117, + "learning_rate": 5e-05, + "loss": 1.0524, + "num_input_tokens_seen": 421327964, + "step": 6299 + }, + { + "epoch": 0.7147801418439717, + "loss": 1.0929341316223145, + "loss_ce": 0.008949782699346542, + "loss_iou": 0.45703125, + "loss_num": 0.0341796875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 421327964, + "step": 6299 + }, + { + "epoch": 0.7148936170212766, + "grad_norm": 25.377065658569336, + "learning_rate": 5e-05, + "loss": 1.2517, + "num_input_tokens_seen": 421395976, + "step": 6300 + }, + { + "epoch": 0.7148936170212766, + "loss": 1.1712331771850586, + "loss_ce": 0.006682398729026318, + "loss_iou": 0.490234375, + "loss_num": 0.03662109375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 421395976, + "step": 6300 + }, + { + "epoch": 0.7150070921985816, + "grad_norm": 36.58707809448242, + "learning_rate": 5e-05, + "loss": 1.3738, + "num_input_tokens_seen": 421463372, + "step": 6301 + }, + { + "epoch": 0.7150070921985816, + "loss": 1.2361109256744385, + "loss_ce": 0.006130442023277283, + "loss_iou": 0.494140625, + "loss_num": 0.048583984375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 421463372, + "step": 6301 + }, + { + "epoch": 0.7151205673758865, + "grad_norm": 30.60904884338379, + "learning_rate": 5e-05, + "loss": 1.4172, + "num_input_tokens_seen": 421530012, + "step": 6302 + }, + { + "epoch": 0.7151205673758865, + "loss": 1.202409267425537, + "loss_ce": 0.002213874366134405, + "loss_iou": 0.5234375, + "loss_num": 0.0308837890625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 421530012, + "step": 6302 + }, + { + "epoch": 0.7152340425531915, + "grad_norm": 41.33121109008789, + "learning_rate": 5e-05, + "loss": 1.1043, + "num_input_tokens_seen": 421596152, + "step": 6303 + }, + { + "epoch": 0.7152340425531915, + "loss": 1.1542811393737793, + "loss_ce": 0.006331912241876125, + "loss_iou": 0.478515625, + "loss_num": 0.03759765625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 421596152, + "step": 6303 + }, + { + "epoch": 0.7153475177304964, + "grad_norm": 41.18219757080078, + "learning_rate": 5e-05, + "loss": 1.2146, + "num_input_tokens_seen": 421663452, + "step": 6304 + }, + { + "epoch": 0.7153475177304964, + "loss": 1.1969033479690552, + "loss_ce": 0.005985427647829056, + "loss_iou": 0.51953125, + "loss_num": 0.0303955078125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 421663452, + "step": 6304 + }, + { + "epoch": 0.7154609929078014, + "grad_norm": 44.160343170166016, + "learning_rate": 5e-05, + "loss": 1.2753, + "num_input_tokens_seen": 421730232, + "step": 6305 + }, + { + "epoch": 0.7154609929078014, + "loss": 1.245574951171875, + "loss_ce": 0.011199919506907463, + "loss_iou": 0.5078125, + "loss_num": 0.04296875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 421730232, + "step": 6305 + }, + { + "epoch": 0.7155744680851064, + "grad_norm": 24.775728225708008, + "learning_rate": 5e-05, + "loss": 1.3776, + "num_input_tokens_seen": 421797200, + "step": 6306 + }, + { + "epoch": 0.7155744680851064, + "loss": 1.2431728839874268, + "loss_ce": 0.007333043962717056, + "loss_iou": 0.53125, + "loss_num": 0.034912109375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 421797200, + "step": 6306 + }, + { + "epoch": 0.7156879432624114, + "grad_norm": 14.096990585327148, + "learning_rate": 5e-05, + "loss": 1.1093, + "num_input_tokens_seen": 421863960, + "step": 6307 + }, + { + "epoch": 0.7156879432624114, + "loss": 1.004119634628296, + "loss_ce": 0.0060728006064891815, + "loss_iou": 0.423828125, + "loss_num": 0.0301513671875, + "loss_xval": 1.0, + "num_input_tokens_seen": 421863960, + "step": 6307 + }, + { + "epoch": 0.7158014184397163, + "grad_norm": 17.2429141998291, + "learning_rate": 5e-05, + "loss": 1.0675, + "num_input_tokens_seen": 421930676, + "step": 6308 + }, + { + "epoch": 0.7158014184397163, + "loss": 1.14546799659729, + "loss_ce": 0.0077727604657411575, + "loss_iou": 0.46875, + "loss_num": 0.040771484375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 421930676, + "step": 6308 + }, + { + "epoch": 0.7159148936170213, + "grad_norm": 26.7103214263916, + "learning_rate": 5e-05, + "loss": 0.9982, + "num_input_tokens_seen": 421997344, + "step": 6309 + }, + { + "epoch": 0.7159148936170213, + "loss": 1.0786842107772827, + "loss_ce": 0.006906882394105196, + "loss_iou": 0.439453125, + "loss_num": 0.03857421875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 421997344, + "step": 6309 + }, + { + "epoch": 0.7160283687943262, + "grad_norm": 37.55571365356445, + "learning_rate": 5e-05, + "loss": 1.2561, + "num_input_tokens_seen": 422064704, + "step": 6310 + }, + { + "epoch": 0.7160283687943262, + "loss": 1.378197431564331, + "loss_ce": 0.011010056361556053, + "loss_iou": 0.54296875, + "loss_num": 0.055419921875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 422064704, + "step": 6310 + }, + { + "epoch": 0.7161418439716312, + "grad_norm": 34.995304107666016, + "learning_rate": 5e-05, + "loss": 1.1802, + "num_input_tokens_seen": 422131548, + "step": 6311 + }, + { + "epoch": 0.7161418439716312, + "loss": 1.2749223709106445, + "loss_ce": 0.005391232203692198, + "loss_iou": 0.52734375, + "loss_num": 0.0419921875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 422131548, + "step": 6311 + }, + { + "epoch": 0.7162553191489361, + "grad_norm": 24.776748657226562, + "learning_rate": 5e-05, + "loss": 1.1409, + "num_input_tokens_seen": 422198232, + "step": 6312 + }, + { + "epoch": 0.7162553191489361, + "loss": 0.9740036129951477, + "loss_ce": 0.0037887969519943, + "loss_iou": 0.41015625, + "loss_num": 0.0296630859375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 422198232, + "step": 6312 + }, + { + "epoch": 0.7163687943262411, + "grad_norm": 32.71211624145508, + "learning_rate": 5e-05, + "loss": 1.1618, + "num_input_tokens_seen": 422266448, + "step": 6313 + }, + { + "epoch": 0.7163687943262411, + "loss": 1.091050624847412, + "loss_ce": 0.005601357668638229, + "loss_iou": 0.47265625, + "loss_num": 0.02783203125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 422266448, + "step": 6313 + }, + { + "epoch": 0.7164822695035461, + "grad_norm": 23.466114044189453, + "learning_rate": 5e-05, + "loss": 1.3869, + "num_input_tokens_seen": 422334356, + "step": 6314 + }, + { + "epoch": 0.7164822695035461, + "loss": 1.4309895038604736, + "loss_ce": 0.00960281491279602, + "loss_iou": 0.578125, + "loss_num": 0.053466796875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 422334356, + "step": 6314 + }, + { + "epoch": 0.7165957446808511, + "grad_norm": 26.278606414794922, + "learning_rate": 5e-05, + "loss": 1.1554, + "num_input_tokens_seen": 422402188, + "step": 6315 + }, + { + "epoch": 0.7165957446808511, + "loss": 1.241178274154663, + "loss_ce": 0.006314962171018124, + "loss_iou": 0.515625, + "loss_num": 0.040771484375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 422402188, + "step": 6315 + }, + { + "epoch": 0.716709219858156, + "grad_norm": 24.33574104309082, + "learning_rate": 5e-05, + "loss": 1.243, + "num_input_tokens_seen": 422469528, + "step": 6316 + }, + { + "epoch": 0.716709219858156, + "loss": 1.334831714630127, + "loss_ce": 0.008659940212965012, + "loss_iou": 0.53125, + "loss_num": 0.052978515625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 422469528, + "step": 6316 + }, + { + "epoch": 0.716822695035461, + "grad_norm": 32.9157829284668, + "learning_rate": 5e-05, + "loss": 1.3571, + "num_input_tokens_seen": 422537112, + "step": 6317 + }, + { + "epoch": 0.716822695035461, + "loss": 1.4092278480529785, + "loss_ce": 0.008837243542075157, + "loss_iou": 0.578125, + "loss_num": 0.049560546875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 422537112, + "step": 6317 + }, + { + "epoch": 0.716936170212766, + "grad_norm": 43.34706115722656, + "learning_rate": 5e-05, + "loss": 1.3396, + "num_input_tokens_seen": 422605004, + "step": 6318 + }, + { + "epoch": 0.716936170212766, + "loss": 1.28705894947052, + "loss_ce": 0.0033674973528832197, + "loss_iou": 0.55859375, + "loss_num": 0.033447265625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 422605004, + "step": 6318 + }, + { + "epoch": 0.7170496453900709, + "grad_norm": 35.0702018737793, + "learning_rate": 5e-05, + "loss": 1.2469, + "num_input_tokens_seen": 422671320, + "step": 6319 + }, + { + "epoch": 0.7170496453900709, + "loss": 1.2768558263778687, + "loss_ce": 0.00781282503157854, + "loss_iou": 0.51953125, + "loss_num": 0.0458984375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 422671320, + "step": 6319 + }, + { + "epoch": 0.7171631205673759, + "grad_norm": 286.6390380859375, + "learning_rate": 5e-05, + "loss": 1.227, + "num_input_tokens_seen": 422738784, + "step": 6320 + }, + { + "epoch": 0.7171631205673759, + "loss": 1.085028886795044, + "loss_ce": 0.00299771036952734, + "loss_iou": 0.4375, + "loss_num": 0.041259765625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 422738784, + "step": 6320 + }, + { + "epoch": 0.7172765957446808, + "grad_norm": 23.12691307067871, + "learning_rate": 5e-05, + "loss": 1.1091, + "num_input_tokens_seen": 422805452, + "step": 6321 + }, + { + "epoch": 0.7172765957446808, + "loss": 1.2232556343078613, + "loss_ce": 0.006458728574216366, + "loss_iou": 0.51953125, + "loss_num": 0.03515625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 422805452, + "step": 6321 + }, + { + "epoch": 0.7173900709219858, + "grad_norm": 23.752090454101562, + "learning_rate": 5e-05, + "loss": 1.3482, + "num_input_tokens_seen": 422872596, + "step": 6322 + }, + { + "epoch": 0.7173900709219858, + "loss": 1.1968052387237549, + "loss_ce": 0.007352214306592941, + "loss_iou": 0.46484375, + "loss_num": 0.05224609375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 422872596, + "step": 6322 + }, + { + "epoch": 0.7175035460992908, + "grad_norm": 24.894807815551758, + "learning_rate": 5e-05, + "loss": 1.404, + "num_input_tokens_seen": 422939804, + "step": 6323 + }, + { + "epoch": 0.7175035460992908, + "loss": 1.4740092754364014, + "loss_ce": 0.006235751789063215, + "loss_iou": 0.5859375, + "loss_num": 0.0595703125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 422939804, + "step": 6323 + }, + { + "epoch": 0.7176170212765958, + "grad_norm": 43.22356414794922, + "learning_rate": 5e-05, + "loss": 1.0877, + "num_input_tokens_seen": 423006528, + "step": 6324 + }, + { + "epoch": 0.7176170212765958, + "loss": 1.0708142518997192, + "loss_ce": 0.008894071914255619, + "loss_iou": 0.470703125, + "loss_num": 0.0240478515625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 423006528, + "step": 6324 + }, + { + "epoch": 0.7177304964539007, + "grad_norm": 27.800857543945312, + "learning_rate": 5e-05, + "loss": 1.2681, + "num_input_tokens_seen": 423073268, + "step": 6325 + }, + { + "epoch": 0.7177304964539007, + "loss": 1.2736666202545166, + "loss_ce": 0.0056002456694841385, + "loss_iou": 0.5546875, + "loss_num": 0.0311279296875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 423073268, + "step": 6325 + }, + { + "epoch": 0.7178439716312057, + "grad_norm": 67.3135986328125, + "learning_rate": 5e-05, + "loss": 1.1935, + "num_input_tokens_seen": 423140352, + "step": 6326 + }, + { + "epoch": 0.7178439716312057, + "loss": 1.396437644958496, + "loss_ce": 0.005812664516270161, + "loss_iou": 0.53125, + "loss_num": 0.06591796875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 423140352, + "step": 6326 + }, + { + "epoch": 0.7179574468085106, + "grad_norm": 22.346538543701172, + "learning_rate": 5e-05, + "loss": 1.1951, + "num_input_tokens_seen": 423208020, + "step": 6327 + }, + { + "epoch": 0.7179574468085106, + "loss": 0.9936211109161377, + "loss_ce": 0.0068046823143959045, + "loss_iou": 0.431640625, + "loss_num": 0.0247802734375, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 423208020, + "step": 6327 + }, + { + "epoch": 0.7180709219858156, + "grad_norm": 26.943849563598633, + "learning_rate": 5e-05, + "loss": 1.2981, + "num_input_tokens_seen": 423275072, + "step": 6328 + }, + { + "epoch": 0.7180709219858156, + "loss": 1.2041239738464355, + "loss_ce": 0.007529649883508682, + "loss_iou": 0.48828125, + "loss_num": 0.0439453125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 423275072, + "step": 6328 + }, + { + "epoch": 0.7181843971631205, + "grad_norm": 92.44904327392578, + "learning_rate": 5e-05, + "loss": 1.4488, + "num_input_tokens_seen": 423342328, + "step": 6329 + }, + { + "epoch": 0.7181843971631205, + "loss": 1.4109344482421875, + "loss_ce": 0.009567280299961567, + "loss_iou": 0.53515625, + "loss_num": 0.06591796875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 423342328, + "step": 6329 + }, + { + "epoch": 0.7182978723404255, + "grad_norm": 42.802249908447266, + "learning_rate": 5e-05, + "loss": 1.3068, + "num_input_tokens_seen": 423409864, + "step": 6330 + }, + { + "epoch": 0.7182978723404255, + "loss": 1.4370732307434082, + "loss_ce": 0.008362269029021263, + "loss_iou": 0.5703125, + "loss_num": 0.056396484375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 423409864, + "step": 6330 + }, + { + "epoch": 0.7184113475177305, + "grad_norm": 25.58323860168457, + "learning_rate": 5e-05, + "loss": 1.3988, + "num_input_tokens_seen": 423475972, + "step": 6331 + }, + { + "epoch": 0.7184113475177305, + "loss": 1.2300583124160767, + "loss_ce": 0.00593719445168972, + "loss_iou": 0.5, + "loss_num": 0.044189453125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 423475972, + "step": 6331 + }, + { + "epoch": 0.7185248226950355, + "grad_norm": 22.456619262695312, + "learning_rate": 5e-05, + "loss": 1.042, + "num_input_tokens_seen": 423542376, + "step": 6332 + }, + { + "epoch": 0.7185248226950355, + "loss": 0.9625710844993591, + "loss_ce": 0.007981255650520325, + "loss_iou": 0.416015625, + "loss_num": 0.0242919921875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 423542376, + "step": 6332 + }, + { + "epoch": 0.7186382978723405, + "grad_norm": 25.471221923828125, + "learning_rate": 5e-05, + "loss": 0.9786, + "num_input_tokens_seen": 423609088, + "step": 6333 + }, + { + "epoch": 0.7186382978723405, + "loss": 0.8783093094825745, + "loss_ce": 0.00739864818751812, + "loss_iou": 0.35546875, + "loss_num": 0.031982421875, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 423609088, + "step": 6333 + }, + { + "epoch": 0.7187517730496454, + "grad_norm": 34.080482482910156, + "learning_rate": 5e-05, + "loss": 1.5086, + "num_input_tokens_seen": 423675780, + "step": 6334 + }, + { + "epoch": 0.7187517730496454, + "loss": 1.6713871955871582, + "loss_ce": 0.005371584556996822, + "loss_iou": 0.6328125, + "loss_num": 0.0791015625, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 423675780, + "step": 6334 + }, + { + "epoch": 0.7188652482269503, + "grad_norm": 27.018518447875977, + "learning_rate": 5e-05, + "loss": 1.092, + "num_input_tokens_seen": 423742668, + "step": 6335 + }, + { + "epoch": 0.7188652482269503, + "loss": 1.0943207740783691, + "loss_ce": 0.00496539706364274, + "loss_iou": 0.44921875, + "loss_num": 0.0380859375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 423742668, + "step": 6335 + }, + { + "epoch": 0.7189787234042553, + "grad_norm": 19.028806686401367, + "learning_rate": 5e-05, + "loss": 1.2017, + "num_input_tokens_seen": 423808788, + "step": 6336 + }, + { + "epoch": 0.7189787234042553, + "loss": 1.044013500213623, + "loss_ce": 0.005866458639502525, + "loss_iou": 0.427734375, + "loss_num": 0.036376953125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 423808788, + "step": 6336 + }, + { + "epoch": 0.7190921985815603, + "grad_norm": 40.317447662353516, + "learning_rate": 5e-05, + "loss": 0.9816, + "num_input_tokens_seen": 423874924, + "step": 6337 + }, + { + "epoch": 0.7190921985815603, + "loss": 0.9672256112098694, + "loss_ce": 0.006776412948966026, + "loss_iou": 0.416015625, + "loss_num": 0.0257568359375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 423874924, + "step": 6337 + }, + { + "epoch": 0.7192056737588652, + "grad_norm": 29.057456970214844, + "learning_rate": 5e-05, + "loss": 1.282, + "num_input_tokens_seen": 423942620, + "step": 6338 + }, + { + "epoch": 0.7192056737588652, + "loss": 1.3172252178192139, + "loss_ce": 0.00936387199908495, + "loss_iou": 0.515625, + "loss_num": 0.0556640625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 423942620, + "step": 6338 + }, + { + "epoch": 0.7193191489361702, + "grad_norm": 18.335437774658203, + "learning_rate": 5e-05, + "loss": 1.1804, + "num_input_tokens_seen": 424009820, + "step": 6339 + }, + { + "epoch": 0.7193191489361702, + "loss": 1.2925114631652832, + "loss_ce": 0.007843410596251488, + "loss_iou": 0.49609375, + "loss_num": 0.05810546875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 424009820, + "step": 6339 + }, + { + "epoch": 0.7194326241134752, + "grad_norm": 22.41727638244629, + "learning_rate": 5e-05, + "loss": 1.0214, + "num_input_tokens_seen": 424076532, + "step": 6340 + }, + { + "epoch": 0.7194326241134752, + "loss": 1.0536227226257324, + "loss_ce": 0.009189184755086899, + "loss_iou": 0.45703125, + "loss_num": 0.02587890625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 424076532, + "step": 6340 + }, + { + "epoch": 0.7195460992907802, + "grad_norm": 32.261558532714844, + "learning_rate": 5e-05, + "loss": 1.167, + "num_input_tokens_seen": 424143216, + "step": 6341 + }, + { + "epoch": 0.7195460992907802, + "loss": 1.2426215410232544, + "loss_ce": 0.006781701929867268, + "loss_iou": 0.50390625, + "loss_num": 0.04638671875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 424143216, + "step": 6341 + }, + { + "epoch": 0.7196595744680852, + "grad_norm": 24.104520797729492, + "learning_rate": 5e-05, + "loss": 1.4232, + "num_input_tokens_seen": 424210556, + "step": 6342 + }, + { + "epoch": 0.7196595744680852, + "loss": 1.5189223289489746, + "loss_ce": 0.006227030884474516, + "loss_iou": 0.59765625, + "loss_num": 0.06396484375, + "loss_xval": 1.515625, + "num_input_tokens_seen": 424210556, + "step": 6342 + }, + { + "epoch": 0.71977304964539, + "grad_norm": 22.6925106048584, + "learning_rate": 5e-05, + "loss": 1.139, + "num_input_tokens_seen": 424277516, + "step": 6343 + }, + { + "epoch": 0.71977304964539, + "loss": 0.9176952242851257, + "loss_ce": 0.006562390364706516, + "loss_iou": 0.390625, + "loss_num": 0.0260009765625, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 424277516, + "step": 6343 + }, + { + "epoch": 0.719886524822695, + "grad_norm": 23.817079544067383, + "learning_rate": 5e-05, + "loss": 1.3128, + "num_input_tokens_seen": 424344704, + "step": 6344 + }, + { + "epoch": 0.719886524822695, + "loss": 1.3813279867172241, + "loss_ce": 0.009745940566062927, + "loss_iou": 0.5390625, + "loss_num": 0.05859375, + "loss_xval": 1.375, + "num_input_tokens_seen": 424344704, + "step": 6344 + }, + { + "epoch": 0.72, + "grad_norm": 35.73871994018555, + "learning_rate": 5e-05, + "loss": 1.1332, + "num_input_tokens_seen": 424411584, + "step": 6345 + }, + { + "epoch": 0.72, + "loss": 1.2294065952301025, + "loss_ce": 0.009191763587296009, + "loss_iou": 0.494140625, + "loss_num": 0.046142578125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 424411584, + "step": 6345 + }, + { + "epoch": 0.720113475177305, + "grad_norm": 69.2891616821289, + "learning_rate": 5e-05, + "loss": 1.3293, + "num_input_tokens_seen": 424478292, + "step": 6346 + }, + { + "epoch": 0.720113475177305, + "loss": 1.2864933013916016, + "loss_ce": 0.013055853545665741, + "loss_iou": 0.4609375, + "loss_num": 0.0703125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 424478292, + "step": 6346 + }, + { + "epoch": 0.7202269503546099, + "grad_norm": 19.351879119873047, + "learning_rate": 5e-05, + "loss": 1.3624, + "num_input_tokens_seen": 424543896, + "step": 6347 + }, + { + "epoch": 0.7202269503546099, + "loss": 1.3839809894561768, + "loss_ce": 0.00751618342474103, + "loss_iou": 0.55078125, + "loss_num": 0.05517578125, + "loss_xval": 1.375, + "num_input_tokens_seen": 424543896, + "step": 6347 + }, + { + "epoch": 0.7203404255319149, + "grad_norm": 15.927862167358398, + "learning_rate": 5e-05, + "loss": 1.2046, + "num_input_tokens_seen": 424610676, + "step": 6348 + }, + { + "epoch": 0.7203404255319149, + "loss": 1.2498630285263062, + "loss_ce": 0.006210633087903261, + "loss_iou": 0.53515625, + "loss_num": 0.034423828125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 424610676, + "step": 6348 + }, + { + "epoch": 0.7204539007092199, + "grad_norm": 13.644237518310547, + "learning_rate": 5e-05, + "loss": 1.0275, + "num_input_tokens_seen": 424677884, + "step": 6349 + }, + { + "epoch": 0.7204539007092199, + "loss": 1.0797202587127686, + "loss_ce": 0.007210503797978163, + "loss_iou": 0.390625, + "loss_num": 0.058349609375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 424677884, + "step": 6349 + }, + { + "epoch": 0.7205673758865249, + "grad_norm": 17.79207420349121, + "learning_rate": 5e-05, + "loss": 1.1414, + "num_input_tokens_seen": 424742880, + "step": 6350 + }, + { + "epoch": 0.7205673758865249, + "loss": 1.1795315742492676, + "loss_ce": 0.004726864397525787, + "loss_iou": 0.443359375, + "loss_num": 0.057373046875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 424742880, + "step": 6350 + }, + { + "epoch": 0.7206808510638298, + "grad_norm": 20.087060928344727, + "learning_rate": 5e-05, + "loss": 1.2283, + "num_input_tokens_seen": 424810288, + "step": 6351 + }, + { + "epoch": 0.7206808510638298, + "loss": 1.2897312641143799, + "loss_ce": 0.004574999213218689, + "loss_iou": 0.5078125, + "loss_num": 0.05322265625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 424810288, + "step": 6351 + }, + { + "epoch": 0.7207943262411347, + "grad_norm": 30.95464324951172, + "learning_rate": 5e-05, + "loss": 1.1281, + "num_input_tokens_seen": 424878060, + "step": 6352 + }, + { + "epoch": 0.7207943262411347, + "loss": 1.099340558052063, + "loss_ce": 0.007055412512272596, + "loss_iou": 0.421875, + "loss_num": 0.04931640625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 424878060, + "step": 6352 + }, + { + "epoch": 0.7209078014184397, + "grad_norm": 19.497386932373047, + "learning_rate": 5e-05, + "loss": 0.9869, + "num_input_tokens_seen": 424944512, + "step": 6353 + }, + { + "epoch": 0.7209078014184397, + "loss": 0.8751847743988037, + "loss_ce": 0.005555815529078245, + "loss_iou": 0.392578125, + "loss_num": 0.016845703125, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 424944512, + "step": 6353 + }, + { + "epoch": 0.7210212765957447, + "grad_norm": 30.671714782714844, + "learning_rate": 5e-05, + "loss": 1.2264, + "num_input_tokens_seen": 425011928, + "step": 6354 + }, + { + "epoch": 0.7210212765957447, + "loss": 1.38348388671875, + "loss_ce": 0.007507299073040485, + "loss_iou": 0.53125, + "loss_num": 0.0625, + "loss_xval": 1.375, + "num_input_tokens_seen": 425011928, + "step": 6354 + }, + { + "epoch": 0.7211347517730496, + "grad_norm": 55.725040435791016, + "learning_rate": 5e-05, + "loss": 1.2654, + "num_input_tokens_seen": 425078400, + "step": 6355 + }, + { + "epoch": 0.7211347517730496, + "loss": 1.2710384130477905, + "loss_ce": 0.003460243344306946, + "loss_iou": 0.5390625, + "loss_num": 0.0380859375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 425078400, + "step": 6355 + }, + { + "epoch": 0.7212482269503546, + "grad_norm": 59.78592300415039, + "learning_rate": 5e-05, + "loss": 1.2869, + "num_input_tokens_seen": 425145220, + "step": 6356 + }, + { + "epoch": 0.7212482269503546, + "loss": 1.2691699266433716, + "loss_ce": 0.005009772721678019, + "loss_iou": 0.5546875, + "loss_num": 0.0308837890625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 425145220, + "step": 6356 + }, + { + "epoch": 0.7213617021276596, + "grad_norm": 15.40436840057373, + "learning_rate": 5e-05, + "loss": 0.9213, + "num_input_tokens_seen": 425212108, + "step": 6357 + }, + { + "epoch": 0.7213617021276596, + "loss": 0.8755151033401489, + "loss_ce": 0.004421310964971781, + "loss_iou": 0.376953125, + "loss_num": 0.0234375, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 425212108, + "step": 6357 + }, + { + "epoch": 0.7214751773049646, + "grad_norm": 17.114402770996094, + "learning_rate": 5e-05, + "loss": 1.2192, + "num_input_tokens_seen": 425278912, + "step": 6358 + }, + { + "epoch": 0.7214751773049646, + "loss": 1.2814116477966309, + "loss_ce": 0.00504454318434, + "loss_iou": 0.48046875, + "loss_num": 0.0634765625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 425278912, + "step": 6358 + }, + { + "epoch": 0.7215886524822696, + "grad_norm": 19.194908142089844, + "learning_rate": 5e-05, + "loss": 0.9313, + "num_input_tokens_seen": 425345244, + "step": 6359 + }, + { + "epoch": 0.7215886524822696, + "loss": 1.0836212635040283, + "loss_ce": 0.005007955711334944, + "loss_iou": 0.462890625, + "loss_num": 0.030517578125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 425345244, + "step": 6359 + }, + { + "epoch": 0.7217021276595744, + "grad_norm": 21.171417236328125, + "learning_rate": 5e-05, + "loss": 1.195, + "num_input_tokens_seen": 425412976, + "step": 6360 + }, + { + "epoch": 0.7217021276595744, + "loss": 1.1943871974945068, + "loss_ce": 0.00884038396179676, + "loss_iou": 0.48046875, + "loss_num": 0.04443359375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 425412976, + "step": 6360 + }, + { + "epoch": 0.7218156028368794, + "grad_norm": 32.99884796142578, + "learning_rate": 5e-05, + "loss": 1.1346, + "num_input_tokens_seen": 425480344, + "step": 6361 + }, + { + "epoch": 0.7218156028368794, + "loss": 1.1209793090820312, + "loss_ce": 0.0062332297675311565, + "loss_iou": 0.470703125, + "loss_num": 0.03466796875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 425480344, + "step": 6361 + }, + { + "epoch": 0.7219290780141844, + "grad_norm": 39.097747802734375, + "learning_rate": 5e-05, + "loss": 1.1088, + "num_input_tokens_seen": 425546980, + "step": 6362 + }, + { + "epoch": 0.7219290780141844, + "loss": 1.3090789318084717, + "loss_ce": 0.003414833452552557, + "loss_iou": 0.52734375, + "loss_num": 0.050537109375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 425546980, + "step": 6362 + }, + { + "epoch": 0.7220425531914894, + "grad_norm": 46.116424560546875, + "learning_rate": 5e-05, + "loss": 1.408, + "num_input_tokens_seen": 425614628, + "step": 6363 + }, + { + "epoch": 0.7220425531914894, + "loss": 1.5037163496017456, + "loss_ce": 0.006646038964390755, + "loss_iou": 0.57421875, + "loss_num": 0.0703125, + "loss_xval": 1.5, + "num_input_tokens_seen": 425614628, + "step": 6363 + }, + { + "epoch": 0.7221560283687943, + "grad_norm": 26.574522018432617, + "learning_rate": 5e-05, + "loss": 1.3704, + "num_input_tokens_seen": 425682036, + "step": 6364 + }, + { + "epoch": 0.7221560283687943, + "loss": 1.6127594709396362, + "loss_ce": 0.011196983978152275, + "loss_iou": 0.64453125, + "loss_num": 0.0625, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 425682036, + "step": 6364 + }, + { + "epoch": 0.7222695035460993, + "grad_norm": 24.70671272277832, + "learning_rate": 5e-05, + "loss": 1.1014, + "num_input_tokens_seen": 425749552, + "step": 6365 + }, + { + "epoch": 0.7222695035460993, + "loss": 1.1952729225158691, + "loss_ce": 0.005819724872708321, + "loss_iou": 0.4765625, + "loss_num": 0.04736328125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 425749552, + "step": 6365 + }, + { + "epoch": 0.7223829787234043, + "grad_norm": 99.45130157470703, + "learning_rate": 5e-05, + "loss": 1.0923, + "num_input_tokens_seen": 425816052, + "step": 6366 + }, + { + "epoch": 0.7223829787234043, + "loss": 1.1976041793823242, + "loss_ce": 0.006686258129775524, + "loss_iou": 0.466796875, + "loss_num": 0.051025390625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 425816052, + "step": 6366 + }, + { + "epoch": 0.7224964539007093, + "grad_norm": 15.554556846618652, + "learning_rate": 5e-05, + "loss": 1.1568, + "num_input_tokens_seen": 425884420, + "step": 6367 + }, + { + "epoch": 0.7224964539007093, + "loss": 1.2318458557128906, + "loss_ce": 0.007236443925648928, + "loss_iou": 0.4921875, + "loss_num": 0.048095703125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 425884420, + "step": 6367 + }, + { + "epoch": 0.7226099290780141, + "grad_norm": 22.60994529724121, + "learning_rate": 5e-05, + "loss": 1.2137, + "num_input_tokens_seen": 425950564, + "step": 6368 + }, + { + "epoch": 0.7226099290780141, + "loss": 1.294032096862793, + "loss_ce": 0.009852347895503044, + "loss_iou": 0.482421875, + "loss_num": 0.064453125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 425950564, + "step": 6368 + }, + { + "epoch": 0.7227234042553191, + "grad_norm": 19.861560821533203, + "learning_rate": 5e-05, + "loss": 1.0675, + "num_input_tokens_seen": 426017240, + "step": 6369 + }, + { + "epoch": 0.7227234042553191, + "loss": 1.142500877380371, + "loss_ce": 0.004317300859838724, + "loss_iou": 0.478515625, + "loss_num": 0.0361328125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 426017240, + "step": 6369 + }, + { + "epoch": 0.7228368794326241, + "grad_norm": 16.123958587646484, + "learning_rate": 5e-05, + "loss": 1.1557, + "num_input_tokens_seen": 426084620, + "step": 6370 + }, + { + "epoch": 0.7228368794326241, + "loss": 1.126105546951294, + "loss_ce": 0.006232397630810738, + "loss_iou": 0.4296875, + "loss_num": 0.05224609375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 426084620, + "step": 6370 + }, + { + "epoch": 0.7229503546099291, + "grad_norm": 17.532264709472656, + "learning_rate": 5e-05, + "loss": 1.1146, + "num_input_tokens_seen": 426150920, + "step": 6371 + }, + { + "epoch": 0.7229503546099291, + "loss": 1.005308985710144, + "loss_ce": 0.009215287864208221, + "loss_iou": 0.427734375, + "loss_num": 0.0279541015625, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 426150920, + "step": 6371 + }, + { + "epoch": 0.723063829787234, + "grad_norm": 25.472888946533203, + "learning_rate": 5e-05, + "loss": 1.1658, + "num_input_tokens_seen": 426217160, + "step": 6372 + }, + { + "epoch": 0.723063829787234, + "loss": 1.0809842348098755, + "loss_ce": 0.010671697556972504, + "loss_iou": 0.4140625, + "loss_num": 0.048583984375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 426217160, + "step": 6372 + }, + { + "epoch": 0.723177304964539, + "grad_norm": 68.34139251708984, + "learning_rate": 5e-05, + "loss": 1.2839, + "num_input_tokens_seen": 426284352, + "step": 6373 + }, + { + "epoch": 0.723177304964539, + "loss": 1.2088017463684082, + "loss_ce": 0.01007124874740839, + "loss_iou": 0.470703125, + "loss_num": 0.051513671875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 426284352, + "step": 6373 + }, + { + "epoch": 0.723290780141844, + "grad_norm": 38.035160064697266, + "learning_rate": 5e-05, + "loss": 1.3166, + "num_input_tokens_seen": 426352048, + "step": 6374 + }, + { + "epoch": 0.723290780141844, + "loss": 1.2028998136520386, + "loss_ce": 0.0041693029925227165, + "loss_iou": 0.5078125, + "loss_num": 0.035888671875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 426352048, + "step": 6374 + }, + { + "epoch": 0.723404255319149, + "grad_norm": 31.260587692260742, + "learning_rate": 5e-05, + "loss": 0.9492, + "num_input_tokens_seen": 426419344, + "step": 6375 + }, + { + "epoch": 0.723404255319149, + "loss": 0.907440185546875, + "loss_ce": 0.006561310961842537, + "loss_iou": 0.3828125, + "loss_num": 0.0272216796875, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 426419344, + "step": 6375 + }, + { + "epoch": 0.7235177304964538, + "grad_norm": 23.214387893676758, + "learning_rate": 5e-05, + "loss": 1.0434, + "num_input_tokens_seen": 426485900, + "step": 6376 + }, + { + "epoch": 0.7235177304964538, + "loss": 0.9309976100921631, + "loss_ce": 0.007077899761497974, + "loss_iou": 0.376953125, + "loss_num": 0.033935546875, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 426485900, + "step": 6376 + }, + { + "epoch": 0.7236312056737588, + "grad_norm": 18.70481300354004, + "learning_rate": 5e-05, + "loss": 1.1696, + "num_input_tokens_seen": 426553308, + "step": 6377 + }, + { + "epoch": 0.7236312056737588, + "loss": 1.2522058486938477, + "loss_ce": 0.0036706016398966312, + "loss_iou": 0.49609375, + "loss_num": 0.051025390625, + "loss_xval": 1.25, + "num_input_tokens_seen": 426553308, + "step": 6377 + }, + { + "epoch": 0.7237446808510638, + "grad_norm": 28.40659523010254, + "learning_rate": 5e-05, + "loss": 1.1622, + "num_input_tokens_seen": 426619984, + "step": 6378 + }, + { + "epoch": 0.7237446808510638, + "loss": 1.1981332302093506, + "loss_ce": 0.006238776259124279, + "loss_iou": 0.51171875, + "loss_num": 0.033447265625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 426619984, + "step": 6378 + }, + { + "epoch": 0.7238581560283688, + "grad_norm": 37.99853515625, + "learning_rate": 5e-05, + "loss": 1.4971, + "num_input_tokens_seen": 426686760, + "step": 6379 + }, + { + "epoch": 0.7238581560283688, + "loss": 1.4063364267349243, + "loss_ce": 0.0083871865645051, + "loss_iou": 0.5, + "loss_num": 0.07861328125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 426686760, + "step": 6379 + }, + { + "epoch": 0.7239716312056738, + "grad_norm": 28.763988494873047, + "learning_rate": 5e-05, + "loss": 1.1555, + "num_input_tokens_seen": 426753508, + "step": 6380 + }, + { + "epoch": 0.7239716312056738, + "loss": 1.358623743057251, + "loss_ce": 0.008037714287638664, + "loss_iou": 0.59375, + "loss_num": 0.033203125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 426753508, + "step": 6380 + }, + { + "epoch": 0.7240851063829787, + "grad_norm": 39.228355407714844, + "learning_rate": 5e-05, + "loss": 1.139, + "num_input_tokens_seen": 426819352, + "step": 6381 + }, + { + "epoch": 0.7240851063829787, + "loss": 1.2945364713668823, + "loss_ce": 0.009136044420301914, + "loss_iou": 0.5078125, + "loss_num": 0.054443359375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 426819352, + "step": 6381 + }, + { + "epoch": 0.7241985815602837, + "grad_norm": 31.926456451416016, + "learning_rate": 5e-05, + "loss": 1.2022, + "num_input_tokens_seen": 426886560, + "step": 6382 + }, + { + "epoch": 0.7241985815602837, + "loss": 1.3049345016479492, + "loss_ce": 0.012454130686819553, + "loss_iou": 0.4921875, + "loss_num": 0.061279296875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 426886560, + "step": 6382 + }, + { + "epoch": 0.7243120567375887, + "grad_norm": 23.063793182373047, + "learning_rate": 5e-05, + "loss": 0.9487, + "num_input_tokens_seen": 426952632, + "step": 6383 + }, + { + "epoch": 0.7243120567375887, + "loss": 1.0146123170852661, + "loss_ce": 0.00533496867865324, + "loss_iou": 0.408203125, + "loss_num": 0.03857421875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 426952632, + "step": 6383 + }, + { + "epoch": 0.7244255319148937, + "grad_norm": 29.52703857421875, + "learning_rate": 5e-05, + "loss": 1.2684, + "num_input_tokens_seen": 427020196, + "step": 6384 + }, + { + "epoch": 0.7244255319148937, + "loss": 1.1133674383163452, + "loss_ce": 0.004480736795812845, + "loss_iou": 0.46484375, + "loss_num": 0.03515625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 427020196, + "step": 6384 + }, + { + "epoch": 0.7245390070921985, + "grad_norm": 52.5800666809082, + "learning_rate": 5e-05, + "loss": 1.1368, + "num_input_tokens_seen": 427086944, + "step": 6385 + }, + { + "epoch": 0.7245390070921985, + "loss": 1.0303778648376465, + "loss_ce": 0.005231395363807678, + "loss_iou": 0.453125, + "loss_num": 0.02392578125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 427086944, + "step": 6385 + }, + { + "epoch": 0.7246524822695035, + "grad_norm": 29.008657455444336, + "learning_rate": 5e-05, + "loss": 1.4109, + "num_input_tokens_seen": 427154128, + "step": 6386 + }, + { + "epoch": 0.7246524822695035, + "loss": 1.6251294612884521, + "loss_ce": 0.009895111434161663, + "loss_iou": 0.640625, + "loss_num": 0.06640625, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 427154128, + "step": 6386 + }, + { + "epoch": 0.7247659574468085, + "grad_norm": 18.978586196899414, + "learning_rate": 5e-05, + "loss": 1.2161, + "num_input_tokens_seen": 427221556, + "step": 6387 + }, + { + "epoch": 0.7247659574468085, + "loss": 1.1424734592437744, + "loss_ce": 0.006242956966161728, + "loss_iou": 0.46875, + "loss_num": 0.039306640625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 427221556, + "step": 6387 + }, + { + "epoch": 0.7248794326241135, + "grad_norm": 95.34607696533203, + "learning_rate": 5e-05, + "loss": 1.1947, + "num_input_tokens_seen": 427287580, + "step": 6388 + }, + { + "epoch": 0.7248794326241135, + "loss": 1.3245147466659546, + "loss_ce": 0.009573323652148247, + "loss_iou": 0.52734375, + "loss_num": 0.052490234375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 427287580, + "step": 6388 + }, + { + "epoch": 0.7249929078014185, + "grad_norm": 40.93021011352539, + "learning_rate": 5e-05, + "loss": 1.2673, + "num_input_tokens_seen": 427354956, + "step": 6389 + }, + { + "epoch": 0.7249929078014185, + "loss": 1.2743473052978516, + "loss_ce": 0.006769235245883465, + "loss_iou": 0.50390625, + "loss_num": 0.05126953125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 427354956, + "step": 6389 + }, + { + "epoch": 0.7251063829787234, + "grad_norm": 37.0285530090332, + "learning_rate": 5e-05, + "loss": 1.55, + "num_input_tokens_seen": 427421000, + "step": 6390 + }, + { + "epoch": 0.7251063829787234, + "loss": 1.5746161937713623, + "loss_ce": 0.003327104263007641, + "loss_iou": 0.63671875, + "loss_num": 0.06005859375, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 427421000, + "step": 6390 + }, + { + "epoch": 0.7252198581560284, + "grad_norm": 54.2320671081543, + "learning_rate": 5e-05, + "loss": 1.2119, + "num_input_tokens_seen": 427488792, + "step": 6391 + }, + { + "epoch": 0.7252198581560284, + "loss": 1.1767711639404297, + "loss_ce": 0.0044079190120100975, + "loss_iou": 0.50390625, + "loss_num": 0.032470703125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 427488792, + "step": 6391 + }, + { + "epoch": 0.7253333333333334, + "grad_norm": 26.95469093322754, + "learning_rate": 5e-05, + "loss": 1.4767, + "num_input_tokens_seen": 427557532, + "step": 6392 + }, + { + "epoch": 0.7253333333333334, + "loss": 1.5989317893981934, + "loss_ce": 0.00811154767870903, + "loss_iou": 0.63671875, + "loss_num": 0.0634765625, + "loss_xval": 1.59375, + "num_input_tokens_seen": 427557532, + "step": 6392 + }, + { + "epoch": 0.7254468085106383, + "grad_norm": 244.251708984375, + "learning_rate": 5e-05, + "loss": 1.1066, + "num_input_tokens_seen": 427624404, + "step": 6393 + }, + { + "epoch": 0.7254468085106383, + "loss": 1.200657844543457, + "loss_ce": 0.013157864101231098, + "loss_iou": 0.43359375, + "loss_num": 0.06396484375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 427624404, + "step": 6393 + }, + { + "epoch": 0.7255602836879432, + "grad_norm": 24.257972717285156, + "learning_rate": 5e-05, + "loss": 1.124, + "num_input_tokens_seen": 427690112, + "step": 6394 + }, + { + "epoch": 0.7255602836879432, + "loss": 1.1258784532546997, + "loss_ce": 0.007226109970360994, + "loss_iou": 0.47265625, + "loss_num": 0.0341796875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 427690112, + "step": 6394 + }, + { + "epoch": 0.7256737588652482, + "grad_norm": 27.481430053710938, + "learning_rate": 5e-05, + "loss": 1.1921, + "num_input_tokens_seen": 427756984, + "step": 6395 + }, + { + "epoch": 0.7256737588652482, + "loss": 0.9957681894302368, + "loss_ce": 0.004068952985107899, + "loss_iou": 0.404296875, + "loss_num": 0.03662109375, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 427756984, + "step": 6395 + }, + { + "epoch": 0.7257872340425532, + "grad_norm": 22.731515884399414, + "learning_rate": 5e-05, + "loss": 1.2675, + "num_input_tokens_seen": 427823536, + "step": 6396 + }, + { + "epoch": 0.7257872340425532, + "loss": 1.3584643602371216, + "loss_ce": 0.006413529627025127, + "loss_iou": 0.55078125, + "loss_num": 0.050048828125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 427823536, + "step": 6396 + }, + { + "epoch": 0.7259007092198582, + "grad_norm": 12.00439167022705, + "learning_rate": 5e-05, + "loss": 1.0409, + "num_input_tokens_seen": 427890368, + "step": 6397 + }, + { + "epoch": 0.7259007092198582, + "loss": 1.288070559501648, + "loss_ce": 0.004379150457680225, + "loss_iou": 0.5, + "loss_num": 0.056640625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 427890368, + "step": 6397 + }, + { + "epoch": 0.7260141843971631, + "grad_norm": 14.108243942260742, + "learning_rate": 5e-05, + "loss": 1.1552, + "num_input_tokens_seen": 427957204, + "step": 6398 + }, + { + "epoch": 0.7260141843971631, + "loss": 1.2651253938674927, + "loss_ce": 0.008289470337331295, + "loss_iou": 0.51171875, + "loss_num": 0.046875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 427957204, + "step": 6398 + }, + { + "epoch": 0.7261276595744681, + "grad_norm": 22.643888473510742, + "learning_rate": 5e-05, + "loss": 0.9611, + "num_input_tokens_seen": 428023412, + "step": 6399 + }, + { + "epoch": 0.7261276595744681, + "loss": 1.1439740657806396, + "loss_ce": 0.006766979116946459, + "loss_iou": 0.453125, + "loss_num": 0.046142578125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 428023412, + "step": 6399 + }, + { + "epoch": 0.7262411347517731, + "grad_norm": 25.96820640563965, + "learning_rate": 5e-05, + "loss": 1.3218, + "num_input_tokens_seen": 428090876, + "step": 6400 + }, + { + "epoch": 0.7262411347517731, + "loss": 1.3355910778045654, + "loss_ce": 0.006489566992968321, + "loss_iou": 0.5390625, + "loss_num": 0.050048828125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 428090876, + "step": 6400 + }, + { + "epoch": 0.726354609929078, + "grad_norm": 33.95534896850586, + "learning_rate": 5e-05, + "loss": 1.1034, + "num_input_tokens_seen": 428157128, + "step": 6401 + }, + { + "epoch": 0.726354609929078, + "loss": 1.256441593170166, + "loss_ce": 0.008638819679617882, + "loss_iou": 0.447265625, + "loss_num": 0.07080078125, + "loss_xval": 1.25, + "num_input_tokens_seen": 428157128, + "step": 6401 + }, + { + "epoch": 0.7264680851063829, + "grad_norm": 44.14629364013672, + "learning_rate": 5e-05, + "loss": 1.2147, + "num_input_tokens_seen": 428223716, + "step": 6402 + }, + { + "epoch": 0.7264680851063829, + "loss": 1.3267103433609009, + "loss_ce": 0.012257199734449387, + "loss_iou": 0.5625, + "loss_num": 0.037841796875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 428223716, + "step": 6402 + }, + { + "epoch": 0.7265815602836879, + "grad_norm": 19.36736297607422, + "learning_rate": 5e-05, + "loss": 1.1072, + "num_input_tokens_seen": 428291388, + "step": 6403 + }, + { + "epoch": 0.7265815602836879, + "loss": 1.082430362701416, + "loss_ce": 0.005281892605125904, + "loss_iou": 0.44140625, + "loss_num": 0.039306640625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 428291388, + "step": 6403 + }, + { + "epoch": 0.7266950354609929, + "grad_norm": 54.37877655029297, + "learning_rate": 5e-05, + "loss": 1.0699, + "num_input_tokens_seen": 428358472, + "step": 6404 + }, + { + "epoch": 0.7266950354609929, + "loss": 1.1123398542404175, + "loss_ce": 0.007863009348511696, + "loss_iou": 0.4375, + "loss_num": 0.045654296875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 428358472, + "step": 6404 + }, + { + "epoch": 0.7268085106382979, + "grad_norm": 44.46159744262695, + "learning_rate": 5e-05, + "loss": 1.2, + "num_input_tokens_seen": 428424796, + "step": 6405 + }, + { + "epoch": 0.7268085106382979, + "loss": 1.297579050064087, + "loss_ce": 0.006563476752489805, + "loss_iou": 0.52734375, + "loss_num": 0.047607421875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 428424796, + "step": 6405 + }, + { + "epoch": 0.7269219858156029, + "grad_norm": 34.24674606323242, + "learning_rate": 5e-05, + "loss": 1.2859, + "num_input_tokens_seen": 428491268, + "step": 6406 + }, + { + "epoch": 0.7269219858156029, + "loss": 1.2069600820541382, + "loss_ce": 0.006276463158428669, + "loss_iou": 0.48046875, + "loss_num": 0.04833984375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 428491268, + "step": 6406 + }, + { + "epoch": 0.7270354609929078, + "grad_norm": 28.081764221191406, + "learning_rate": 5e-05, + "loss": 1.1366, + "num_input_tokens_seen": 428558952, + "step": 6407 + }, + { + "epoch": 0.7270354609929078, + "loss": 1.1083095073699951, + "loss_ce": 0.005282096564769745, + "loss_iou": 0.462890625, + "loss_num": 0.035400390625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 428558952, + "step": 6407 + }, + { + "epoch": 0.7271489361702128, + "grad_norm": 12.826560020446777, + "learning_rate": 5e-05, + "loss": 0.9002, + "num_input_tokens_seen": 428625608, + "step": 6408 + }, + { + "epoch": 0.7271489361702128, + "loss": 0.7920384407043457, + "loss_ce": 0.008835278451442719, + "loss_iou": 0.30078125, + "loss_num": 0.036376953125, + "loss_xval": 0.78125, + "num_input_tokens_seen": 428625608, + "step": 6408 + }, + { + "epoch": 0.7272624113475177, + "grad_norm": 19.04265594482422, + "learning_rate": 5e-05, + "loss": 1.1451, + "num_input_tokens_seen": 428693384, + "step": 6409 + }, + { + "epoch": 0.7272624113475177, + "loss": 1.1220180988311768, + "loss_ce": 0.009713426232337952, + "loss_iou": 0.451171875, + "loss_num": 0.042236328125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 428693384, + "step": 6409 + }, + { + "epoch": 0.7273758865248227, + "grad_norm": 108.23531341552734, + "learning_rate": 5e-05, + "loss": 1.0664, + "num_input_tokens_seen": 428759904, + "step": 6410 + }, + { + "epoch": 0.7273758865248227, + "loss": 1.149462342262268, + "loss_ce": 0.008471162989735603, + "loss_iou": 0.4375, + "loss_num": 0.053466796875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 428759904, + "step": 6410 + }, + { + "epoch": 0.7274893617021276, + "grad_norm": 32.490509033203125, + "learning_rate": 5e-05, + "loss": 1.2238, + "num_input_tokens_seen": 428826536, + "step": 6411 + }, + { + "epoch": 0.7274893617021276, + "loss": 1.1697520017623901, + "loss_ce": 0.005689476616680622, + "loss_iou": 0.458984375, + "loss_num": 0.04931640625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 428826536, + "step": 6411 + }, + { + "epoch": 0.7276028368794326, + "grad_norm": 44.88636016845703, + "learning_rate": 5e-05, + "loss": 1.2417, + "num_input_tokens_seen": 428893076, + "step": 6412 + }, + { + "epoch": 0.7276028368794326, + "loss": 1.4279347658157349, + "loss_ce": 0.006548031698912382, + "loss_iou": 0.5546875, + "loss_num": 0.0625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 428893076, + "step": 6412 + }, + { + "epoch": 0.7277163120567376, + "grad_norm": 43.357330322265625, + "learning_rate": 5e-05, + "loss": 1.3026, + "num_input_tokens_seen": 428959176, + "step": 6413 + }, + { + "epoch": 0.7277163120567376, + "loss": 1.2195489406585693, + "loss_ce": 0.006658415775746107, + "loss_iou": 0.46484375, + "loss_num": 0.056640625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 428959176, + "step": 6413 + }, + { + "epoch": 0.7278297872340426, + "grad_norm": 30.828800201416016, + "learning_rate": 5e-05, + "loss": 1.3007, + "num_input_tokens_seen": 429026708, + "step": 6414 + }, + { + "epoch": 0.7278297872340426, + "loss": 1.3646447658538818, + "loss_ce": 0.005269844084978104, + "loss_iou": 0.57421875, + "loss_num": 0.042236328125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 429026708, + "step": 6414 + }, + { + "epoch": 0.7279432624113475, + "grad_norm": 20.962818145751953, + "learning_rate": 5e-05, + "loss": 1.2037, + "num_input_tokens_seen": 429093116, + "step": 6415 + }, + { + "epoch": 0.7279432624113475, + "loss": 1.3521208763122559, + "loss_ce": 0.015206875279545784, + "loss_iou": 0.5, + "loss_num": 0.06787109375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 429093116, + "step": 6415 + }, + { + "epoch": 0.7280567375886525, + "grad_norm": 31.901729583740234, + "learning_rate": 5e-05, + "loss": 1.345, + "num_input_tokens_seen": 429161832, + "step": 6416 + }, + { + "epoch": 0.7280567375886525, + "loss": 1.2838932275772095, + "loss_ce": 0.008502631448209286, + "loss_iou": 0.53125, + "loss_num": 0.042236328125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 429161832, + "step": 6416 + }, + { + "epoch": 0.7281702127659574, + "grad_norm": 34.17063903808594, + "learning_rate": 5e-05, + "loss": 1.2389, + "num_input_tokens_seen": 429228828, + "step": 6417 + }, + { + "epoch": 0.7281702127659574, + "loss": 1.1793618202209473, + "loss_ce": 0.009439965710043907, + "loss_iou": 0.490234375, + "loss_num": 0.037841796875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 429228828, + "step": 6417 + }, + { + "epoch": 0.7282836879432624, + "grad_norm": 29.322824478149414, + "learning_rate": 5e-05, + "loss": 1.324, + "num_input_tokens_seen": 429296284, + "step": 6418 + }, + { + "epoch": 0.7282836879432624, + "loss": 1.4060524702072144, + "loss_ce": 0.005173609592020512, + "loss_iou": 0.546875, + "loss_num": 0.062255859375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 429296284, + "step": 6418 + }, + { + "epoch": 0.7283971631205673, + "grad_norm": 29.743362426757812, + "learning_rate": 5e-05, + "loss": 1.0664, + "num_input_tokens_seen": 429362540, + "step": 6419 + }, + { + "epoch": 0.7283971631205673, + "loss": 1.0607620477676392, + "loss_ce": 0.010469088330864906, + "loss_iou": 0.44140625, + "loss_num": 0.033447265625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 429362540, + "step": 6419 + }, + { + "epoch": 0.7285106382978723, + "grad_norm": 24.33355140686035, + "learning_rate": 5e-05, + "loss": 1.1929, + "num_input_tokens_seen": 429429488, + "step": 6420 + }, + { + "epoch": 0.7285106382978723, + "loss": 1.0893397331237793, + "loss_ce": 0.004378870129585266, + "loss_iou": 0.474609375, + "loss_num": 0.027099609375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 429429488, + "step": 6420 + }, + { + "epoch": 0.7286241134751773, + "grad_norm": 32.163536071777344, + "learning_rate": 5e-05, + "loss": 0.9755, + "num_input_tokens_seen": 429495984, + "step": 6421 + }, + { + "epoch": 0.7286241134751773, + "loss": 0.9137080311775208, + "loss_ce": 0.008190443739295006, + "loss_iou": 0.341796875, + "loss_num": 0.04443359375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 429495984, + "step": 6421 + }, + { + "epoch": 0.7287375886524823, + "grad_norm": 36.848514556884766, + "learning_rate": 5e-05, + "loss": 1.2127, + "num_input_tokens_seen": 429563300, + "step": 6422 + }, + { + "epoch": 0.7287375886524823, + "loss": 1.110904574394226, + "loss_ce": 0.009342065081000328, + "loss_iou": 0.4375, + "loss_num": 0.04541015625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 429563300, + "step": 6422 + }, + { + "epoch": 0.7288510638297873, + "grad_norm": 45.76752853393555, + "learning_rate": 5e-05, + "loss": 1.1963, + "num_input_tokens_seen": 429629996, + "step": 6423 + }, + { + "epoch": 0.7288510638297873, + "loss": 1.1933705806732178, + "loss_ce": 0.007335439790040255, + "loss_iou": 0.490234375, + "loss_num": 0.040771484375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 429629996, + "step": 6423 + }, + { + "epoch": 0.7289645390070922, + "grad_norm": 35.35934066772461, + "learning_rate": 5e-05, + "loss": 1.4356, + "num_input_tokens_seen": 429697596, + "step": 6424 + }, + { + "epoch": 0.7289645390070922, + "loss": 1.4544997215270996, + "loss_ce": 0.00869888998568058, + "loss_iou": 0.5859375, + "loss_num": 0.0537109375, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 429697596, + "step": 6424 + }, + { + "epoch": 0.7290780141843972, + "grad_norm": 22.686735153198242, + "learning_rate": 5e-05, + "loss": 1.2319, + "num_input_tokens_seen": 429765692, + "step": 6425 + }, + { + "epoch": 0.7290780141843972, + "loss": 1.252543330192566, + "loss_ce": 0.004496426787227392, + "loss_iou": 0.4921875, + "loss_num": 0.052734375, + "loss_xval": 1.25, + "num_input_tokens_seen": 429765692, + "step": 6425 + }, + { + "epoch": 0.7291914893617021, + "grad_norm": 13.352502822875977, + "learning_rate": 5e-05, + "loss": 1.0443, + "num_input_tokens_seen": 429832124, + "step": 6426 + }, + { + "epoch": 0.7291914893617021, + "loss": 1.1089344024658203, + "loss_ce": 0.008348477073013783, + "loss_iou": 0.44140625, + "loss_num": 0.043701171875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 429832124, + "step": 6426 + }, + { + "epoch": 0.7293049645390071, + "grad_norm": 13.892581939697266, + "learning_rate": 5e-05, + "loss": 1.2398, + "num_input_tokens_seen": 429899080, + "step": 6427 + }, + { + "epoch": 0.7293049645390071, + "loss": 1.2112563848495483, + "loss_ce": 0.004713334608823061, + "loss_iou": 0.4765625, + "loss_num": 0.051025390625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 429899080, + "step": 6427 + }, + { + "epoch": 0.729418439716312, + "grad_norm": 14.507207870483398, + "learning_rate": 5e-05, + "loss": 1.0142, + "num_input_tokens_seen": 429965744, + "step": 6428 + }, + { + "epoch": 0.729418439716312, + "loss": 1.0277684926986694, + "loss_ce": 0.011166995391249657, + "loss_iou": 0.390625, + "loss_num": 0.04736328125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 429965744, + "step": 6428 + }, + { + "epoch": 0.729531914893617, + "grad_norm": 12.663400650024414, + "learning_rate": 5e-05, + "loss": 1.1854, + "num_input_tokens_seen": 430033512, + "step": 6429 + }, + { + "epoch": 0.729531914893617, + "loss": 1.2204978466033936, + "loss_ce": 0.008095412515103817, + "loss_iou": 0.474609375, + "loss_num": 0.052978515625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 430033512, + "step": 6429 + }, + { + "epoch": 0.729645390070922, + "grad_norm": 29.574670791625977, + "learning_rate": 5e-05, + "loss": 1.2373, + "num_input_tokens_seen": 430100500, + "step": 6430 + }, + { + "epoch": 0.729645390070922, + "loss": 1.3730852603912354, + "loss_ce": 0.006874384358525276, + "loss_iou": 0.55078125, + "loss_num": 0.053955078125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 430100500, + "step": 6430 + }, + { + "epoch": 0.729758865248227, + "grad_norm": 35.498199462890625, + "learning_rate": 5e-05, + "loss": 1.3612, + "num_input_tokens_seen": 430167496, + "step": 6431 + }, + { + "epoch": 0.729758865248227, + "loss": 1.1247789859771729, + "loss_ce": 0.008568093180656433, + "loss_iou": 0.48828125, + "loss_num": 0.02783203125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 430167496, + "step": 6431 + }, + { + "epoch": 0.729872340425532, + "grad_norm": 25.191452026367188, + "learning_rate": 5e-05, + "loss": 1.3585, + "num_input_tokens_seen": 430235396, + "step": 6432 + }, + { + "epoch": 0.729872340425532, + "loss": 1.510115623474121, + "loss_ce": 0.0042561995796859264, + "loss_iou": 0.58984375, + "loss_num": 0.06494140625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 430235396, + "step": 6432 + }, + { + "epoch": 0.7299858156028369, + "grad_norm": 31.738052368164062, + "learning_rate": 5e-05, + "loss": 1.2026, + "num_input_tokens_seen": 430302392, + "step": 6433 + }, + { + "epoch": 0.7299858156028369, + "loss": 1.140912413597107, + "loss_ce": 0.005658478941768408, + "loss_iou": 0.46875, + "loss_num": 0.03955078125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 430302392, + "step": 6433 + }, + { + "epoch": 0.7300992907801418, + "grad_norm": 44.387901306152344, + "learning_rate": 5e-05, + "loss": 1.2886, + "num_input_tokens_seen": 430369528, + "step": 6434 + }, + { + "epoch": 0.7300992907801418, + "loss": 1.3076605796813965, + "loss_ce": 0.004437924362719059, + "loss_iou": 0.55859375, + "loss_num": 0.03662109375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 430369528, + "step": 6434 + }, + { + "epoch": 0.7302127659574468, + "grad_norm": 36.042049407958984, + "learning_rate": 5e-05, + "loss": 1.4365, + "num_input_tokens_seen": 430436172, + "step": 6435 + }, + { + "epoch": 0.7302127659574468, + "loss": 1.4790630340576172, + "loss_ce": 0.00640681479126215, + "loss_iou": 0.625, + "loss_num": 0.045166015625, + "loss_xval": 1.46875, + "num_input_tokens_seen": 430436172, + "step": 6435 + }, + { + "epoch": 0.7303262411347518, + "grad_norm": 16.038501739501953, + "learning_rate": 5e-05, + "loss": 1.2773, + "num_input_tokens_seen": 430502832, + "step": 6436 + }, + { + "epoch": 0.7303262411347518, + "loss": 1.1604535579681396, + "loss_ce": 0.006156792864203453, + "loss_iou": 0.48046875, + "loss_num": 0.03857421875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 430502832, + "step": 6436 + }, + { + "epoch": 0.7304397163120567, + "grad_norm": 45.5121955871582, + "learning_rate": 5e-05, + "loss": 1.1734, + "num_input_tokens_seen": 430569752, + "step": 6437 + }, + { + "epoch": 0.7304397163120567, + "loss": 1.1845273971557617, + "loss_ce": 0.008746078237891197, + "loss_iou": 0.482421875, + "loss_num": 0.042236328125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 430569752, + "step": 6437 + }, + { + "epoch": 0.7305531914893617, + "grad_norm": 20.592632293701172, + "learning_rate": 5e-05, + "loss": 1.1765, + "num_input_tokens_seen": 430636620, + "step": 6438 + }, + { + "epoch": 0.7305531914893617, + "loss": 1.2258920669555664, + "loss_ce": 0.01031587179750204, + "loss_iou": 0.51171875, + "loss_num": 0.038330078125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 430636620, + "step": 6438 + }, + { + "epoch": 0.7306666666666667, + "grad_norm": 21.378141403198242, + "learning_rate": 5e-05, + "loss": 1.1707, + "num_input_tokens_seen": 430702704, + "step": 6439 + }, + { + "epoch": 0.7306666666666667, + "loss": 1.2163338661193848, + "loss_ce": 0.008326132781803608, + "loss_iou": 0.49609375, + "loss_num": 0.043212890625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 430702704, + "step": 6439 + }, + { + "epoch": 0.7307801418439717, + "grad_norm": 23.139177322387695, + "learning_rate": 5e-05, + "loss": 1.0569, + "num_input_tokens_seen": 430768848, + "step": 6440 + }, + { + "epoch": 0.7307801418439717, + "loss": 1.018698811531067, + "loss_ce": 0.009421451017260551, + "loss_iou": 0.404296875, + "loss_num": 0.039794921875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 430768848, + "step": 6440 + }, + { + "epoch": 0.7308936170212766, + "grad_norm": 26.494247436523438, + "learning_rate": 5e-05, + "loss": 1.129, + "num_input_tokens_seen": 430835832, + "step": 6441 + }, + { + "epoch": 0.7308936170212766, + "loss": 1.2294080257415771, + "loss_ce": 0.006263584829866886, + "loss_iou": 0.51171875, + "loss_num": 0.039794921875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 430835832, + "step": 6441 + }, + { + "epoch": 0.7310070921985815, + "grad_norm": 25.36931037902832, + "learning_rate": 5e-05, + "loss": 1.3902, + "num_input_tokens_seen": 430903400, + "step": 6442 + }, + { + "epoch": 0.7310070921985815, + "loss": 1.238603115081787, + "loss_ce": 0.004716433119028807, + "loss_iou": 0.48828125, + "loss_num": 0.051025390625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 430903400, + "step": 6442 + }, + { + "epoch": 0.7311205673758865, + "grad_norm": 22.463197708129883, + "learning_rate": 5e-05, + "loss": 1.0436, + "num_input_tokens_seen": 430969604, + "step": 6443 + }, + { + "epoch": 0.7311205673758865, + "loss": 1.2890366315841675, + "loss_ce": 0.008763181045651436, + "loss_iou": 0.53515625, + "loss_num": 0.042236328125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 430969604, + "step": 6443 + }, + { + "epoch": 0.7312340425531915, + "grad_norm": 32.937835693359375, + "learning_rate": 5e-05, + "loss": 1.2103, + "num_input_tokens_seen": 431035904, + "step": 6444 + }, + { + "epoch": 0.7312340425531915, + "loss": 1.0494881868362427, + "loss_ce": 0.0038338908925652504, + "loss_iou": 0.41015625, + "loss_num": 0.04443359375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 431035904, + "step": 6444 + }, + { + "epoch": 0.7313475177304964, + "grad_norm": 15.600845336914062, + "learning_rate": 5e-05, + "loss": 1.0799, + "num_input_tokens_seen": 431101984, + "step": 6445 + }, + { + "epoch": 0.7313475177304964, + "loss": 1.121403694152832, + "loss_ce": 0.008122343569993973, + "loss_iou": 0.453125, + "loss_num": 0.04150390625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 431101984, + "step": 6445 + }, + { + "epoch": 0.7314609929078014, + "grad_norm": 20.41636085510254, + "learning_rate": 5e-05, + "loss": 1.1698, + "num_input_tokens_seen": 431169088, + "step": 6446 + }, + { + "epoch": 0.7314609929078014, + "loss": 1.2595939636230469, + "loss_ce": 0.003246308770030737, + "loss_iou": 0.5, + "loss_num": 0.050537109375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 431169088, + "step": 6446 + }, + { + "epoch": 0.7315744680851064, + "grad_norm": 41.337158203125, + "learning_rate": 5e-05, + "loss": 1.1908, + "num_input_tokens_seen": 431235964, + "step": 6447 + }, + { + "epoch": 0.7315744680851064, + "loss": 1.0558134317398071, + "loss_ce": 0.008907931856811047, + "loss_iou": 0.41015625, + "loss_num": 0.04541015625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 431235964, + "step": 6447 + }, + { + "epoch": 0.7316879432624114, + "grad_norm": 30.530881881713867, + "learning_rate": 5e-05, + "loss": 1.709, + "num_input_tokens_seen": 431303168, + "step": 6448 + }, + { + "epoch": 0.7316879432624114, + "loss": 1.8227382898330688, + "loss_ce": 0.006331945303827524, + "loss_iou": 0.6953125, + "loss_num": 0.0849609375, + "loss_xval": 1.8125, + "num_input_tokens_seen": 431303168, + "step": 6448 + }, + { + "epoch": 0.7318014184397164, + "grad_norm": 47.717899322509766, + "learning_rate": 5e-05, + "loss": 1.2165, + "num_input_tokens_seen": 431369684, + "step": 6449 + }, + { + "epoch": 0.7318014184397164, + "loss": 1.0975745916366577, + "loss_ce": 0.008951576426625252, + "loss_iou": 0.43359375, + "loss_num": 0.043701171875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 431369684, + "step": 6449 + }, + { + "epoch": 0.7319148936170212, + "grad_norm": 21.095508575439453, + "learning_rate": 5e-05, + "loss": 1.2025, + "num_input_tokens_seen": 431437364, + "step": 6450 + }, + { + "epoch": 0.7319148936170212, + "loss": 1.052572250366211, + "loss_ce": 0.005697233136743307, + "loss_iou": 0.447265625, + "loss_num": 0.0303955078125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 431437364, + "step": 6450 + }, + { + "epoch": 0.7320283687943262, + "grad_norm": 46.74847412109375, + "learning_rate": 5e-05, + "loss": 1.3058, + "num_input_tokens_seen": 431504372, + "step": 6451 + }, + { + "epoch": 0.7320283687943262, + "loss": 1.4131742715835571, + "loss_ce": 0.00838911347091198, + "loss_iou": 0.55078125, + "loss_num": 0.06005859375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 431504372, + "step": 6451 + }, + { + "epoch": 0.7321418439716312, + "grad_norm": 37.850975036621094, + "learning_rate": 5e-05, + "loss": 1.1635, + "num_input_tokens_seen": 431570892, + "step": 6452 + }, + { + "epoch": 0.7321418439716312, + "loss": 1.2483608722686768, + "loss_ce": 0.008370628580451012, + "loss_iou": 0.5078125, + "loss_num": 0.045654296875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 431570892, + "step": 6452 + }, + { + "epoch": 0.7322553191489362, + "grad_norm": 28.344968795776367, + "learning_rate": 5e-05, + "loss": 1.3331, + "num_input_tokens_seen": 431638380, + "step": 6453 + }, + { + "epoch": 0.7322553191489362, + "loss": 1.4067556858062744, + "loss_ce": 0.006365093402564526, + "loss_iou": 0.5390625, + "loss_num": 0.06396484375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 431638380, + "step": 6453 + }, + { + "epoch": 0.7323687943262411, + "grad_norm": 19.280479431152344, + "learning_rate": 5e-05, + "loss": 1.0954, + "num_input_tokens_seen": 431703852, + "step": 6454 + }, + { + "epoch": 0.7323687943262411, + "loss": 1.2043774127960205, + "loss_ce": 0.008576666936278343, + "loss_iou": 0.51171875, + "loss_num": 0.03369140625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 431703852, + "step": 6454 + }, + { + "epoch": 0.7324822695035461, + "grad_norm": 12.28358268737793, + "learning_rate": 5e-05, + "loss": 1.06, + "num_input_tokens_seen": 431770604, + "step": 6455 + }, + { + "epoch": 0.7324822695035461, + "loss": 1.075130820274353, + "loss_ce": 0.004818331450223923, + "loss_iou": 0.431640625, + "loss_num": 0.04150390625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 431770604, + "step": 6455 + }, + { + "epoch": 0.7325957446808511, + "grad_norm": 17.682796478271484, + "learning_rate": 5e-05, + "loss": 0.8597, + "num_input_tokens_seen": 431837360, + "step": 6456 + }, + { + "epoch": 0.7325957446808511, + "loss": 0.8293420076370239, + "loss_ce": 0.00658811628818512, + "loss_iou": 0.31640625, + "loss_num": 0.038330078125, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 431837360, + "step": 6456 + }, + { + "epoch": 0.7327092198581561, + "grad_norm": 27.489112854003906, + "learning_rate": 5e-05, + "loss": 1.1645, + "num_input_tokens_seen": 431903628, + "step": 6457 + }, + { + "epoch": 0.7327092198581561, + "loss": 1.2840616703033447, + "loss_ce": 0.00525304488837719, + "loss_iou": 0.48046875, + "loss_num": 0.0634765625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 431903628, + "step": 6457 + }, + { + "epoch": 0.732822695035461, + "grad_norm": 33.3886604309082, + "learning_rate": 5e-05, + "loss": 1.2607, + "num_input_tokens_seen": 431970832, + "step": 6458 + }, + { + "epoch": 0.732822695035461, + "loss": 1.2882766723632812, + "loss_ce": 0.006050067022442818, + "loss_iou": 0.53515625, + "loss_num": 0.04150390625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 431970832, + "step": 6458 + }, + { + "epoch": 0.7329361702127659, + "grad_norm": 31.50432014465332, + "learning_rate": 5e-05, + "loss": 1.2223, + "num_input_tokens_seen": 432037896, + "step": 6459 + }, + { + "epoch": 0.7329361702127659, + "loss": 0.997357189655304, + "loss_ce": 0.007244855165481567, + "loss_iou": 0.427734375, + "loss_num": 0.026611328125, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 432037896, + "step": 6459 + }, + { + "epoch": 0.7330496453900709, + "grad_norm": 26.938251495361328, + "learning_rate": 5e-05, + "loss": 1.0684, + "num_input_tokens_seen": 432104360, + "step": 6460 + }, + { + "epoch": 0.7330496453900709, + "loss": 1.1465034484863281, + "loss_ce": 0.011859850957989693, + "loss_iou": 0.439453125, + "loss_num": 0.05078125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 432104360, + "step": 6460 + }, + { + "epoch": 0.7331631205673759, + "grad_norm": 34.887332916259766, + "learning_rate": 5e-05, + "loss": 1.293, + "num_input_tokens_seen": 432171992, + "step": 6461 + }, + { + "epoch": 0.7331631205673759, + "loss": 1.3593298196792603, + "loss_ce": 0.005814232863485813, + "loss_iou": 0.5703125, + "loss_num": 0.04296875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 432171992, + "step": 6461 + }, + { + "epoch": 0.7332765957446808, + "grad_norm": 30.644760131835938, + "learning_rate": 5e-05, + "loss": 1.1151, + "num_input_tokens_seen": 432239548, + "step": 6462 + }, + { + "epoch": 0.7332765957446808, + "loss": 1.1368341445922852, + "loss_ce": 0.005486511625349522, + "loss_iou": 0.474609375, + "loss_num": 0.036376953125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 432239548, + "step": 6462 + }, + { + "epoch": 0.7333900709219858, + "grad_norm": 39.26530838012695, + "learning_rate": 5e-05, + "loss": 1.1599, + "num_input_tokens_seen": 432305956, + "step": 6463 + }, + { + "epoch": 0.7333900709219858, + "loss": 1.183911919593811, + "loss_ce": 0.006665792316198349, + "loss_iou": 0.50390625, + "loss_num": 0.034423828125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 432305956, + "step": 6463 + }, + { + "epoch": 0.7335035460992908, + "grad_norm": 39.9691276550293, + "learning_rate": 5e-05, + "loss": 1.2341, + "num_input_tokens_seen": 432373436, + "step": 6464 + }, + { + "epoch": 0.7335035460992908, + "loss": 1.265466332435608, + "loss_ce": 0.006677235011011362, + "loss_iou": 0.52734375, + "loss_num": 0.040283203125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 432373436, + "step": 6464 + }, + { + "epoch": 0.7336170212765958, + "grad_norm": 24.33230209350586, + "learning_rate": 5e-05, + "loss": 1.3821, + "num_input_tokens_seen": 432441124, + "step": 6465 + }, + { + "epoch": 0.7336170212765958, + "loss": 1.4181315898895264, + "loss_ce": 0.005533856339752674, + "loss_iou": 0.59765625, + "loss_num": 0.042724609375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 432441124, + "step": 6465 + }, + { + "epoch": 0.7337304964539008, + "grad_norm": 14.22178840637207, + "learning_rate": 5e-05, + "loss": 1.1352, + "num_input_tokens_seen": 432506888, + "step": 6466 + }, + { + "epoch": 0.7337304964539008, + "loss": 1.0050091743469238, + "loss_ce": 0.00671824486926198, + "loss_iou": 0.37890625, + "loss_num": 0.0478515625, + "loss_xval": 1.0, + "num_input_tokens_seen": 432506888, + "step": 6466 + }, + { + "epoch": 0.7338439716312056, + "grad_norm": 46.90750503540039, + "learning_rate": 5e-05, + "loss": 1.2274, + "num_input_tokens_seen": 432575468, + "step": 6467 + }, + { + "epoch": 0.7338439716312056, + "loss": 1.2659413814544678, + "loss_ce": 0.010081931948661804, + "loss_iou": 0.482421875, + "loss_num": 0.05810546875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 432575468, + "step": 6467 + }, + { + "epoch": 0.7339574468085106, + "grad_norm": 33.25527572631836, + "learning_rate": 5e-05, + "loss": 1.2237, + "num_input_tokens_seen": 432641420, + "step": 6468 + }, + { + "epoch": 0.7339574468085106, + "loss": 0.905497670173645, + "loss_ce": 0.012003977783024311, + "loss_iou": 0.36328125, + "loss_num": 0.033935546875, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 432641420, + "step": 6468 + }, + { + "epoch": 0.7340709219858156, + "grad_norm": 32.55046081542969, + "learning_rate": 5e-05, + "loss": 1.3242, + "num_input_tokens_seen": 432708592, + "step": 6469 + }, + { + "epoch": 0.7340709219858156, + "loss": 1.410205364227295, + "loss_ce": 0.006396750919520855, + "loss_iou": 0.52734375, + "loss_num": 0.0703125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 432708592, + "step": 6469 + }, + { + "epoch": 0.7341843971631206, + "grad_norm": 28.738439559936523, + "learning_rate": 5e-05, + "loss": 1.2973, + "num_input_tokens_seen": 432775120, + "step": 6470 + }, + { + "epoch": 0.7341843971631206, + "loss": 1.4857879877090454, + "loss_ce": 0.007760646753013134, + "loss_iou": 0.58203125, + "loss_num": 0.06298828125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 432775120, + "step": 6470 + }, + { + "epoch": 0.7342978723404255, + "grad_norm": 37.06528091430664, + "learning_rate": 5e-05, + "loss": 1.2438, + "num_input_tokens_seen": 432842104, + "step": 6471 + }, + { + "epoch": 0.7342978723404255, + "loss": 1.102342128753662, + "loss_ce": 0.008103788830339909, + "loss_iou": 0.470703125, + "loss_num": 0.0308837890625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 432842104, + "step": 6471 + }, + { + "epoch": 0.7344113475177305, + "grad_norm": 24.122966766357422, + "learning_rate": 5e-05, + "loss": 1.2851, + "num_input_tokens_seen": 432910100, + "step": 6472 + }, + { + "epoch": 0.7344113475177305, + "loss": 1.1412174701690674, + "loss_ce": 0.006451843306422234, + "loss_iou": 0.49609375, + "loss_num": 0.0289306640625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 432910100, + "step": 6472 + }, + { + "epoch": 0.7345248226950355, + "grad_norm": 21.162857055664062, + "learning_rate": 5e-05, + "loss": 1.2992, + "num_input_tokens_seen": 432977020, + "step": 6473 + }, + { + "epoch": 0.7345248226950355, + "loss": 1.339644193649292, + "loss_ce": 0.010542649775743484, + "loss_iou": 0.546875, + "loss_num": 0.047119140625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 432977020, + "step": 6473 + }, + { + "epoch": 0.7346382978723405, + "grad_norm": 28.044099807739258, + "learning_rate": 5e-05, + "loss": 1.2324, + "num_input_tokens_seen": 433043256, + "step": 6474 + }, + { + "epoch": 0.7346382978723405, + "loss": 1.004844069480896, + "loss_ce": 0.007285473868250847, + "loss_iou": 0.419921875, + "loss_num": 0.031494140625, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 433043256, + "step": 6474 + }, + { + "epoch": 0.7347517730496453, + "grad_norm": 35.372955322265625, + "learning_rate": 5e-05, + "loss": 1.3211, + "num_input_tokens_seen": 433109516, + "step": 6475 + }, + { + "epoch": 0.7347517730496453, + "loss": 1.3023616075515747, + "loss_ce": 0.004510015714913607, + "loss_iou": 0.54296875, + "loss_num": 0.042236328125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 433109516, + "step": 6475 + }, + { + "epoch": 0.7348652482269503, + "grad_norm": 26.233665466308594, + "learning_rate": 5e-05, + "loss": 1.3484, + "num_input_tokens_seen": 433176848, + "step": 6476 + }, + { + "epoch": 0.7348652482269503, + "loss": 1.2602248191833496, + "loss_ce": 0.008759930729866028, + "loss_iou": 0.490234375, + "loss_num": 0.053955078125, + "loss_xval": 1.25, + "num_input_tokens_seen": 433176848, + "step": 6476 + }, + { + "epoch": 0.7349787234042553, + "grad_norm": 28.639013290405273, + "learning_rate": 5e-05, + "loss": 1.0865, + "num_input_tokens_seen": 433243972, + "step": 6477 + }, + { + "epoch": 0.7349787234042553, + "loss": 0.9701774716377258, + "loss_ce": 0.005333774257451296, + "loss_iou": 0.41796875, + "loss_num": 0.025634765625, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 433243972, + "step": 6477 + }, + { + "epoch": 0.7350921985815603, + "grad_norm": 20.89000129699707, + "learning_rate": 5e-05, + "loss": 1.1211, + "num_input_tokens_seen": 433310324, + "step": 6478 + }, + { + "epoch": 0.7350921985815603, + "loss": 1.0995674133300781, + "loss_ce": 0.012043032795190811, + "loss_iou": 0.4375, + "loss_num": 0.042236328125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 433310324, + "step": 6478 + }, + { + "epoch": 0.7352056737588653, + "grad_norm": 27.362857818603516, + "learning_rate": 5e-05, + "loss": 1.2423, + "num_input_tokens_seen": 433378496, + "step": 6479 + }, + { + "epoch": 0.7352056737588653, + "loss": 1.4422070980072021, + "loss_ce": 0.00763689074665308, + "loss_iou": 0.5703125, + "loss_num": 0.05859375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 433378496, + "step": 6479 + }, + { + "epoch": 0.7353191489361702, + "grad_norm": 50.461883544921875, + "learning_rate": 5e-05, + "loss": 1.1752, + "num_input_tokens_seen": 433444756, + "step": 6480 + }, + { + "epoch": 0.7353191489361702, + "loss": 1.3163833618164062, + "loss_ce": 0.0074234819039702415, + "loss_iou": 0.5, + "loss_num": 0.06201171875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 433444756, + "step": 6480 + }, + { + "epoch": 0.7354326241134752, + "grad_norm": 24.207887649536133, + "learning_rate": 5e-05, + "loss": 0.9496, + "num_input_tokens_seen": 433512260, + "step": 6481 + }, + { + "epoch": 0.7354326241134752, + "loss": 0.9457342028617859, + "loss_ce": 0.005060352385044098, + "loss_iou": 0.396484375, + "loss_num": 0.029541015625, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 433512260, + "step": 6481 + }, + { + "epoch": 0.7355460992907802, + "grad_norm": 33.17621994018555, + "learning_rate": 5e-05, + "loss": 1.2991, + "num_input_tokens_seen": 433579184, + "step": 6482 + }, + { + "epoch": 0.7355460992907802, + "loss": 1.3522844314575195, + "loss_ce": 0.005116506479680538, + "loss_iou": 0.5234375, + "loss_num": 0.0595703125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 433579184, + "step": 6482 + }, + { + "epoch": 0.735659574468085, + "grad_norm": 23.824642181396484, + "learning_rate": 5e-05, + "loss": 1.1491, + "num_input_tokens_seen": 433646988, + "step": 6483 + }, + { + "epoch": 0.735659574468085, + "loss": 1.1990870237350464, + "loss_ce": 0.00865735299885273, + "loss_iou": 0.484375, + "loss_num": 0.044921875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 433646988, + "step": 6483 + }, + { + "epoch": 0.73577304964539, + "grad_norm": 19.296707153320312, + "learning_rate": 5e-05, + "loss": 1.2166, + "num_input_tokens_seen": 433714224, + "step": 6484 + }, + { + "epoch": 0.73577304964539, + "loss": 1.2053042650222778, + "loss_ce": 0.004620653577148914, + "loss_iou": 0.4921875, + "loss_num": 0.043701171875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 433714224, + "step": 6484 + }, + { + "epoch": 0.735886524822695, + "grad_norm": 32.127227783203125, + "learning_rate": 5e-05, + "loss": 1.1571, + "num_input_tokens_seen": 433780696, + "step": 6485 + }, + { + "epoch": 0.735886524822695, + "loss": 1.2914848327636719, + "loss_ce": 0.007305137347429991, + "loss_iou": 0.53515625, + "loss_num": 0.04345703125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 433780696, + "step": 6485 + }, + { + "epoch": 0.736, + "grad_norm": 27.971830368041992, + "learning_rate": 5e-05, + "loss": 1.2508, + "num_input_tokens_seen": 433847748, + "step": 6486 + }, + { + "epoch": 0.736, + "loss": 1.1836801767349243, + "loss_ce": 0.006189960986375809, + "loss_iou": 0.51171875, + "loss_num": 0.0308837890625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 433847748, + "step": 6486 + }, + { + "epoch": 0.736113475177305, + "grad_norm": 36.815582275390625, + "learning_rate": 5e-05, + "loss": 1.1072, + "num_input_tokens_seen": 433915076, + "step": 6487 + }, + { + "epoch": 0.736113475177305, + "loss": 1.0689804553985596, + "loss_ce": 0.005992159713059664, + "loss_iou": 0.470703125, + "loss_num": 0.024169921875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 433915076, + "step": 6487 + }, + { + "epoch": 0.7362269503546099, + "grad_norm": 25.78327178955078, + "learning_rate": 5e-05, + "loss": 1.2541, + "num_input_tokens_seen": 433981604, + "step": 6488 + }, + { + "epoch": 0.7362269503546099, + "loss": 1.2983930110931396, + "loss_ce": 0.007865680381655693, + "loss_iou": 0.498046875, + "loss_num": 0.058837890625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 433981604, + "step": 6488 + }, + { + "epoch": 0.7363404255319149, + "grad_norm": 44.034202575683594, + "learning_rate": 5e-05, + "loss": 1.0838, + "num_input_tokens_seen": 434049124, + "step": 6489 + }, + { + "epoch": 0.7363404255319149, + "loss": 1.1000032424926758, + "loss_ce": 0.0038118590600788593, + "loss_iou": 0.466796875, + "loss_num": 0.032470703125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 434049124, + "step": 6489 + }, + { + "epoch": 0.7364539007092199, + "grad_norm": 47.15003967285156, + "learning_rate": 5e-05, + "loss": 1.6403, + "num_input_tokens_seen": 434115800, + "step": 6490 + }, + { + "epoch": 0.7364539007092199, + "loss": 1.6664509773254395, + "loss_ce": 0.0094685610383749, + "loss_iou": 0.640625, + "loss_num": 0.07470703125, + "loss_xval": 1.65625, + "num_input_tokens_seen": 434115800, + "step": 6490 + }, + { + "epoch": 0.7365673758865249, + "grad_norm": 28.365144729614258, + "learning_rate": 5e-05, + "loss": 1.2912, + "num_input_tokens_seen": 434180800, + "step": 6491 + }, + { + "epoch": 0.7365673758865249, + "loss": 1.2409356832504272, + "loss_ce": 0.009978670626878738, + "loss_iou": 0.52734375, + "loss_num": 0.03564453125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 434180800, + "step": 6491 + }, + { + "epoch": 0.7366808510638297, + "grad_norm": 15.037735939025879, + "learning_rate": 5e-05, + "loss": 1.0031, + "num_input_tokens_seen": 434247896, + "step": 6492 + }, + { + "epoch": 0.7366808510638297, + "loss": 0.8949398994445801, + "loss_ce": 0.004314887803047895, + "loss_iou": 0.3671875, + "loss_num": 0.031494140625, + "loss_xval": 0.890625, + "num_input_tokens_seen": 434247896, + "step": 6492 + }, + { + "epoch": 0.7367943262411347, + "grad_norm": 19.04848289489746, + "learning_rate": 5e-05, + "loss": 1.2643, + "num_input_tokens_seen": 434314836, + "step": 6493 + }, + { + "epoch": 0.7367943262411347, + "loss": 1.3233827352523804, + "loss_ce": 0.011737216264009476, + "loss_iou": 0.5, + "loss_num": 0.0615234375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 434314836, + "step": 6493 + }, + { + "epoch": 0.7369078014184397, + "grad_norm": 28.09398651123047, + "learning_rate": 5e-05, + "loss": 1.0474, + "num_input_tokens_seen": 434380780, + "step": 6494 + }, + { + "epoch": 0.7369078014184397, + "loss": 0.9727485179901123, + "loss_ce": 0.007904743775725365, + "loss_iou": 0.40625, + "loss_num": 0.0306396484375, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 434380780, + "step": 6494 + }, + { + "epoch": 0.7370212765957447, + "grad_norm": 33.00429916381836, + "learning_rate": 5e-05, + "loss": 1.3381, + "num_input_tokens_seen": 434446632, + "step": 6495 + }, + { + "epoch": 0.7370212765957447, + "loss": 1.5337436199188232, + "loss_ce": 0.005911667365580797, + "loss_iou": 0.65625, + "loss_num": 0.042236328125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 434446632, + "step": 6495 + }, + { + "epoch": 0.7371347517730497, + "grad_norm": 18.86634063720703, + "learning_rate": 5e-05, + "loss": 1.1837, + "num_input_tokens_seen": 434512472, + "step": 6496 + }, + { + "epoch": 0.7371347517730497, + "loss": 1.1581153869628906, + "loss_ce": 0.003818454220890999, + "loss_iou": 0.435546875, + "loss_num": 0.056396484375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 434512472, + "step": 6496 + }, + { + "epoch": 0.7372482269503546, + "grad_norm": 28.517627716064453, + "learning_rate": 5e-05, + "loss": 1.2773, + "num_input_tokens_seen": 434580036, + "step": 6497 + }, + { + "epoch": 0.7372482269503546, + "loss": 1.3914456367492676, + "loss_ce": 0.010586274787783623, + "loss_iou": 0.54296875, + "loss_num": 0.058837890625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 434580036, + "step": 6497 + }, + { + "epoch": 0.7373617021276596, + "grad_norm": 32.93824768066406, + "learning_rate": 5e-05, + "loss": 1.1534, + "num_input_tokens_seen": 434646536, + "step": 6498 + }, + { + "epoch": 0.7373617021276596, + "loss": 1.2239402532577515, + "loss_ce": 0.0071434136480093, + "loss_iou": 0.515625, + "loss_num": 0.037353515625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 434646536, + "step": 6498 + }, + { + "epoch": 0.7374751773049646, + "grad_norm": 27.84211540222168, + "learning_rate": 5e-05, + "loss": 1.1326, + "num_input_tokens_seen": 434712868, + "step": 6499 + }, + { + "epoch": 0.7374751773049646, + "loss": 1.1332294940948486, + "loss_ce": 0.003346716519445181, + "loss_iou": 0.439453125, + "loss_num": 0.050537109375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 434712868, + "step": 6499 + }, + { + "epoch": 0.7375886524822695, + "grad_norm": 37.431270599365234, + "learning_rate": 5e-05, + "loss": 1.304, + "num_input_tokens_seen": 434779792, + "step": 6500 + }, + { + "epoch": 0.7375886524822695, + "eval_seeclick_CIoU": 0.4177742451429367, + "eval_seeclick_GIoU": 0.38833050429821014, + "eval_seeclick_IoU": 0.4906437546014786, + "eval_seeclick_MAE_all": 0.1601552516222, + "eval_seeclick_MAE_h": 0.08687831275165081, + "eval_seeclick_MAE_w": 0.11372598633170128, + "eval_seeclick_MAE_x_boxes": 0.1927899718284607, + "eval_seeclick_MAE_y_boxes": 0.1465914398431778, + "eval_seeclick_NUM_probability": 0.9999776184558868, + "eval_seeclick_inside_bbox": 0.6770833432674408, + "eval_seeclick_loss": 2.3905956745147705, + "eval_seeclick_loss_ce": 0.013554324395954609, + "eval_seeclick_loss_iou": 0.8040771484375, + "eval_seeclick_loss_num": 0.16099929809570312, + "eval_seeclick_loss_xval": 2.4097900390625, + "eval_seeclick_runtime": 78.9793, + "eval_seeclick_samples_per_second": 0.595, + "eval_seeclick_steps_per_second": 0.025, + "num_input_tokens_seen": 434779792, + "step": 6500 + }, + { + "epoch": 0.7375886524822695, + "eval_icons_CIoU": 0.45902106165885925, + "eval_icons_GIoU": 0.4535284787416458, + "eval_icons_IoU": 0.5100252628326416, + "eval_icons_MAE_all": 0.14169417321681976, + "eval_icons_MAE_h": 0.052508166059851646, + "eval_icons_MAE_w": 0.1370474472641945, + "eval_icons_MAE_x_boxes": 0.13428571075201035, + "eval_icons_MAE_y_boxes": 0.09742728620767593, + "eval_icons_NUM_probability": 0.999983012676239, + "eval_icons_inside_bbox": 0.7482638955116272, + "eval_icons_loss": 2.3750076293945312, + "eval_icons_loss_ce": 8.060374511842383e-06, + "eval_icons_loss_iou": 0.8193359375, + "eval_icons_loss_num": 0.14197158813476562, + "eval_icons_loss_xval": 2.3486328125, + "eval_icons_runtime": 67.16, + "eval_icons_samples_per_second": 0.744, + "eval_icons_steps_per_second": 0.03, + "num_input_tokens_seen": 434779792, + "step": 6500 + }, + { + "epoch": 0.7375886524822695, + "eval_screenspot_CIoU": 0.2786928713321686, + "eval_screenspot_GIoU": 0.2570389310518901, + "eval_screenspot_IoU": 0.3812679847081502, + "eval_screenspot_MAE_all": 0.20260735352834067, + "eval_screenspot_MAE_h": 0.10102259616057079, + "eval_screenspot_MAE_w": 0.1603366732597351, + "eval_screenspot_MAE_x_boxes": 0.30852071444193524, + "eval_screenspot_MAE_y_boxes": 0.12304912755886714, + "eval_screenspot_NUM_probability": 0.9999539852142334, + "eval_screenspot_inside_bbox": 0.6150000095367432, + "eval_screenspot_loss": 2.8488965034484863, + "eval_screenspot_loss_ce": 0.014826510101556778, + "eval_screenspot_loss_iou": 0.9088541666666666, + "eval_screenspot_loss_num": 0.21006266276041666, + "eval_screenspot_loss_xval": 2.8684895833333335, + "eval_screenspot_runtime": 117.8171, + "eval_screenspot_samples_per_second": 0.755, + "eval_screenspot_steps_per_second": 0.025, + "num_input_tokens_seen": 434779792, + "step": 6500 + }, + { + "epoch": 0.7375886524822695, + "eval_compot_CIoU": 0.26299647986888885, + "eval_compot_GIoU": 0.25004788488149643, + "eval_compot_IoU": 0.34389202296733856, + "eval_compot_MAE_all": 0.2323005199432373, + "eval_compot_MAE_h": 0.1368020847439766, + "eval_compot_MAE_w": 0.2656126469373703, + "eval_compot_MAE_x_boxes": 0.21168293803930283, + "eval_compot_MAE_y_boxes": 0.1030372679233551, + "eval_compot_NUM_probability": 0.9999686479568481, + "eval_compot_inside_bbox": 0.5833333432674408, + "eval_compot_loss": 3.1634397506713867, + "eval_compot_loss_ce": 0.005834448151290417, + "eval_compot_loss_iou": 0.992919921875, + "eval_compot_loss_num": 0.23491668701171875, + "eval_compot_loss_xval": 3.16064453125, + "eval_compot_runtime": 69.1176, + "eval_compot_samples_per_second": 0.723, + "eval_compot_steps_per_second": 0.029, + "num_input_tokens_seen": 434779792, + "step": 6500 + }, + { + "epoch": 0.7375886524822695, + "loss": 3.0254364013671875, + "loss_ce": 0.006881955545395613, + "loss_iou": 0.96484375, + "loss_num": 0.2177734375, + "loss_xval": 3.015625, + "num_input_tokens_seen": 434779792, + "step": 6500 + }, + { + "epoch": 0.7377021276595744, + "grad_norm": 31.54376792907715, + "learning_rate": 5e-05, + "loss": 1.2782, + "num_input_tokens_seen": 434846300, + "step": 6501 + }, + { + "epoch": 0.7377021276595744, + "loss": 1.2932264804840088, + "loss_ce": 0.0066054328344762325, + "loss_iou": 0.5, + "loss_num": 0.056396484375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 434846300, + "step": 6501 + }, + { + "epoch": 0.7378156028368794, + "grad_norm": 27.15633773803711, + "learning_rate": 5e-05, + "loss": 1.1112, + "num_input_tokens_seen": 434914304, + "step": 6502 + }, + { + "epoch": 0.7378156028368794, + "loss": 1.1632052659988403, + "loss_ce": 0.00304896617308259, + "loss_iou": 0.466796875, + "loss_num": 0.045166015625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 434914304, + "step": 6502 + }, + { + "epoch": 0.7379290780141844, + "grad_norm": 78.48091125488281, + "learning_rate": 5e-05, + "loss": 1.124, + "num_input_tokens_seen": 434982096, + "step": 6503 + }, + { + "epoch": 0.7379290780141844, + "loss": 1.2938783168792725, + "loss_ce": 0.006280732341110706, + "loss_iou": 0.546875, + "loss_num": 0.0390625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 434982096, + "step": 6503 + }, + { + "epoch": 0.7380425531914894, + "grad_norm": 17.055204391479492, + "learning_rate": 5e-05, + "loss": 1.0412, + "num_input_tokens_seen": 435049120, + "step": 6504 + }, + { + "epoch": 0.7380425531914894, + "loss": 0.99622642993927, + "loss_ce": 0.005992061924189329, + "loss_iou": 0.427734375, + "loss_num": 0.026611328125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 435049120, + "step": 6504 + }, + { + "epoch": 0.7381560283687943, + "grad_norm": 24.87428092956543, + "learning_rate": 5e-05, + "loss": 1.1488, + "num_input_tokens_seen": 435116124, + "step": 6505 + }, + { + "epoch": 0.7381560283687943, + "loss": 1.2159569263458252, + "loss_ce": 0.006972635164856911, + "loss_iou": 0.482421875, + "loss_num": 0.049072265625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 435116124, + "step": 6505 + }, + { + "epoch": 0.7382695035460993, + "grad_norm": 30.250831604003906, + "learning_rate": 5e-05, + "loss": 1.1434, + "num_input_tokens_seen": 435183100, + "step": 6506 + }, + { + "epoch": 0.7382695035460993, + "loss": 1.0994912385940552, + "loss_ce": 0.007694409694522619, + "loss_iou": 0.435546875, + "loss_num": 0.04443359375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 435183100, + "step": 6506 + }, + { + "epoch": 0.7383829787234043, + "grad_norm": 24.523765563964844, + "learning_rate": 5e-05, + "loss": 1.3249, + "num_input_tokens_seen": 435249384, + "step": 6507 + }, + { + "epoch": 0.7383829787234043, + "loss": 1.558453917503357, + "loss_ce": 0.006207792088389397, + "loss_iou": 0.6171875, + "loss_num": 0.0634765625, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 435249384, + "step": 6507 + }, + { + "epoch": 0.7384964539007092, + "grad_norm": 51.02789306640625, + "learning_rate": 5e-05, + "loss": 1.1694, + "num_input_tokens_seen": 435316336, + "step": 6508 + }, + { + "epoch": 0.7384964539007092, + "loss": 1.0859769582748413, + "loss_ce": 0.004922311753034592, + "loss_iou": 0.44140625, + "loss_num": 0.03955078125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 435316336, + "step": 6508 + }, + { + "epoch": 0.7386099290780141, + "grad_norm": 27.362518310546875, + "learning_rate": 5e-05, + "loss": 1.2002, + "num_input_tokens_seen": 435382984, + "step": 6509 + }, + { + "epoch": 0.7386099290780141, + "loss": 1.2018461227416992, + "loss_ce": 0.007510146126151085, + "loss_iou": 0.494140625, + "loss_num": 0.041259765625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 435382984, + "step": 6509 + }, + { + "epoch": 0.7387234042553191, + "grad_norm": 31.82097816467285, + "learning_rate": 5e-05, + "loss": 1.2396, + "num_input_tokens_seen": 435449800, + "step": 6510 + }, + { + "epoch": 0.7387234042553191, + "loss": 1.2517889738082886, + "loss_ce": 0.008136622607707977, + "loss_iou": 0.55078125, + "loss_num": 0.0283203125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 435449800, + "step": 6510 + }, + { + "epoch": 0.7388368794326241, + "grad_norm": 31.46908187866211, + "learning_rate": 5e-05, + "loss": 1.1377, + "num_input_tokens_seen": 435516608, + "step": 6511 + }, + { + "epoch": 0.7388368794326241, + "loss": 1.152493953704834, + "loss_ce": 0.005033052526414394, + "loss_iou": 0.474609375, + "loss_num": 0.0400390625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 435516608, + "step": 6511 + }, + { + "epoch": 0.7389503546099291, + "grad_norm": 29.208215713500977, + "learning_rate": 5e-05, + "loss": 1.3364, + "num_input_tokens_seen": 435582068, + "step": 6512 + }, + { + "epoch": 0.7389503546099291, + "loss": 1.2575347423553467, + "loss_ce": 0.00558159314095974, + "loss_iou": 0.5546875, + "loss_num": 0.0289306640625, + "loss_xval": 1.25, + "num_input_tokens_seen": 435582068, + "step": 6512 + }, + { + "epoch": 0.7390638297872341, + "grad_norm": 23.762365341186523, + "learning_rate": 5e-05, + "loss": 1.0176, + "num_input_tokens_seen": 435649752, + "step": 6513 + }, + { + "epoch": 0.7390638297872341, + "loss": 1.0420336723327637, + "loss_ce": 0.005900949705392122, + "loss_iou": 0.41015625, + "loss_num": 0.043212890625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 435649752, + "step": 6513 + }, + { + "epoch": 0.739177304964539, + "grad_norm": 24.83986473083496, + "learning_rate": 5e-05, + "loss": 1.1255, + "num_input_tokens_seen": 435716764, + "step": 6514 + }, + { + "epoch": 0.739177304964539, + "loss": 1.186643123626709, + "loss_ce": 0.006467263214290142, + "loss_iou": 0.490234375, + "loss_num": 0.039794921875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 435716764, + "step": 6514 + }, + { + "epoch": 0.739290780141844, + "grad_norm": 409.66021728515625, + "learning_rate": 5e-05, + "loss": 1.2088, + "num_input_tokens_seen": 435783264, + "step": 6515 + }, + { + "epoch": 0.739290780141844, + "loss": 1.1952292919158936, + "loss_ce": 0.006752626039087772, + "loss_iou": 0.462890625, + "loss_num": 0.05322265625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 435783264, + "step": 6515 + }, + { + "epoch": 0.7394042553191489, + "grad_norm": 34.447017669677734, + "learning_rate": 5e-05, + "loss": 1.2854, + "num_input_tokens_seen": 435850464, + "step": 6516 + }, + { + "epoch": 0.7394042553191489, + "loss": 1.2814491987228394, + "loss_ce": 0.00703517347574234, + "loss_iou": 0.5234375, + "loss_num": 0.045166015625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 435850464, + "step": 6516 + }, + { + "epoch": 0.7395177304964539, + "grad_norm": 46.39405822753906, + "learning_rate": 5e-05, + "loss": 1.2314, + "num_input_tokens_seen": 435917752, + "step": 6517 + }, + { + "epoch": 0.7395177304964539, + "loss": 1.2587131261825562, + "loss_ce": 0.00480692321434617, + "loss_iou": 0.52734375, + "loss_num": 0.04052734375, + "loss_xval": 1.25, + "num_input_tokens_seen": 435917752, + "step": 6517 + }, + { + "epoch": 0.7396312056737588, + "grad_norm": 35.861507415771484, + "learning_rate": 5e-05, + "loss": 1.3887, + "num_input_tokens_seen": 435984352, + "step": 6518 + }, + { + "epoch": 0.7396312056737588, + "loss": 1.532644271850586, + "loss_ce": 0.0038357283920049667, + "loss_iou": 0.6171875, + "loss_num": 0.05859375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 435984352, + "step": 6518 + }, + { + "epoch": 0.7397446808510638, + "grad_norm": 17.67146873474121, + "learning_rate": 5e-05, + "loss": 1.0787, + "num_input_tokens_seen": 436051180, + "step": 6519 + }, + { + "epoch": 0.7397446808510638, + "loss": 1.2028770446777344, + "loss_ce": 0.010494244284927845, + "loss_iou": 0.48828125, + "loss_num": 0.042724609375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 436051180, + "step": 6519 + }, + { + "epoch": 0.7398581560283688, + "grad_norm": 22.0462646484375, + "learning_rate": 5e-05, + "loss": 1.2675, + "num_input_tokens_seen": 436117632, + "step": 6520 + }, + { + "epoch": 0.7398581560283688, + "loss": 1.2203636169433594, + "loss_ce": 0.007961235009133816, + "loss_iou": 0.47265625, + "loss_num": 0.053466796875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 436117632, + "step": 6520 + }, + { + "epoch": 0.7399716312056738, + "grad_norm": 30.03647232055664, + "learning_rate": 5e-05, + "loss": 1.2134, + "num_input_tokens_seen": 436184956, + "step": 6521 + }, + { + "epoch": 0.7399716312056738, + "loss": 1.1853396892547607, + "loss_ce": 0.00516388937830925, + "loss_iou": 0.46875, + "loss_num": 0.048583984375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 436184956, + "step": 6521 + }, + { + "epoch": 0.7400851063829788, + "grad_norm": 27.222688674926758, + "learning_rate": 5e-05, + "loss": 1.2676, + "num_input_tokens_seen": 436252188, + "step": 6522 + }, + { + "epoch": 0.7400851063829788, + "loss": 1.3271864652633667, + "loss_ce": 0.007850526832044125, + "loss_iou": 0.546875, + "loss_num": 0.045654296875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 436252188, + "step": 6522 + }, + { + "epoch": 0.7401985815602837, + "grad_norm": 21.63599967956543, + "learning_rate": 5e-05, + "loss": 1.11, + "num_input_tokens_seen": 436319956, + "step": 6523 + }, + { + "epoch": 0.7401985815602837, + "loss": 1.3187940120697021, + "loss_ce": 0.007758958265185356, + "loss_iou": 0.50390625, + "loss_num": 0.06005859375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 436319956, + "step": 6523 + }, + { + "epoch": 0.7403120567375886, + "grad_norm": 40.81991958618164, + "learning_rate": 5e-05, + "loss": 0.9189, + "num_input_tokens_seen": 436386536, + "step": 6524 + }, + { + "epoch": 0.7403120567375886, + "loss": 0.808967113494873, + "loss_ce": 0.005744424648582935, + "loss_iou": 0.33984375, + "loss_num": 0.02490234375, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 436386536, + "step": 6524 + }, + { + "epoch": 0.7404255319148936, + "grad_norm": 38.49642562866211, + "learning_rate": 5e-05, + "loss": 1.0542, + "num_input_tokens_seen": 436452200, + "step": 6525 + }, + { + "epoch": 0.7404255319148936, + "loss": 1.223861575126648, + "loss_ce": 0.0055998931638896465, + "loss_iou": 0.51171875, + "loss_num": 0.038818359375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 436452200, + "step": 6525 + }, + { + "epoch": 0.7405390070921986, + "grad_norm": 45.26475143432617, + "learning_rate": 5e-05, + "loss": 1.4045, + "num_input_tokens_seen": 436518984, + "step": 6526 + }, + { + "epoch": 0.7405390070921986, + "loss": 1.3866164684295654, + "loss_ce": 0.010639971122145653, + "loss_iou": 0.56640625, + "loss_num": 0.048095703125, + "loss_xval": 1.375, + "num_input_tokens_seen": 436518984, + "step": 6526 + }, + { + "epoch": 0.7406524822695035, + "grad_norm": 40.740638732910156, + "learning_rate": 5e-05, + "loss": 1.1089, + "num_input_tokens_seen": 436585152, + "step": 6527 + }, + { + "epoch": 0.7406524822695035, + "loss": 1.1192550659179688, + "loss_ce": 0.007926948368549347, + "loss_iou": 0.462890625, + "loss_num": 0.037353515625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 436585152, + "step": 6527 + }, + { + "epoch": 0.7407659574468085, + "grad_norm": 87.79241943359375, + "learning_rate": 5e-05, + "loss": 1.1983, + "num_input_tokens_seen": 436652172, + "step": 6528 + }, + { + "epoch": 0.7407659574468085, + "loss": 1.2653142213821411, + "loss_ce": 0.012384526431560516, + "loss_iou": 0.474609375, + "loss_num": 0.060546875, + "loss_xval": 1.25, + "num_input_tokens_seen": 436652172, + "step": 6528 + }, + { + "epoch": 0.7408794326241135, + "grad_norm": 23.55064582824707, + "learning_rate": 5e-05, + "loss": 0.9954, + "num_input_tokens_seen": 436718156, + "step": 6529 + }, + { + "epoch": 0.7408794326241135, + "loss": 0.9637253880500793, + "loss_ce": 0.007182421628385782, + "loss_iou": 0.390625, + "loss_num": 0.03515625, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 436718156, + "step": 6529 + }, + { + "epoch": 0.7409929078014185, + "grad_norm": 20.51356315612793, + "learning_rate": 5e-05, + "loss": 1.0173, + "num_input_tokens_seen": 436785328, + "step": 6530 + }, + { + "epoch": 0.7409929078014185, + "loss": 1.0013750791549683, + "loss_ce": 0.005281304940581322, + "loss_iou": 0.41015625, + "loss_num": 0.035400390625, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 436785328, + "step": 6530 + }, + { + "epoch": 0.7411063829787234, + "grad_norm": 32.52088928222656, + "learning_rate": 5e-05, + "loss": 1.1146, + "num_input_tokens_seen": 436852272, + "step": 6531 + }, + { + "epoch": 0.7411063829787234, + "loss": 1.1684715747833252, + "loss_ce": 0.007827136665582657, + "loss_iou": 0.48828125, + "loss_num": 0.036865234375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 436852272, + "step": 6531 + }, + { + "epoch": 0.7412198581560284, + "grad_norm": 27.18418312072754, + "learning_rate": 5e-05, + "loss": 1.0905, + "num_input_tokens_seen": 436919124, + "step": 6532 + }, + { + "epoch": 0.7412198581560284, + "loss": 1.229860544204712, + "loss_ce": 0.007692480459809303, + "loss_iou": 0.4921875, + "loss_num": 0.047607421875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 436919124, + "step": 6532 + }, + { + "epoch": 0.7413333333333333, + "grad_norm": 45.68538284301758, + "learning_rate": 5e-05, + "loss": 1.4972, + "num_input_tokens_seen": 436986040, + "step": 6533 + }, + { + "epoch": 0.7413333333333333, + "loss": 1.3273239135742188, + "loss_ce": 0.00554653350263834, + "loss_iou": 0.53125, + "loss_num": 0.05224609375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 436986040, + "step": 6533 + }, + { + "epoch": 0.7414468085106383, + "grad_norm": 28.3549861907959, + "learning_rate": 5e-05, + "loss": 1.4371, + "num_input_tokens_seen": 437053276, + "step": 6534 + }, + { + "epoch": 0.7414468085106383, + "loss": 1.24031662940979, + "loss_ce": 0.003988583572208881, + "loss_iou": 0.53125, + "loss_num": 0.03466796875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 437053276, + "step": 6534 + }, + { + "epoch": 0.7415602836879432, + "grad_norm": 26.16122055053711, + "learning_rate": 5e-05, + "loss": 1.0581, + "num_input_tokens_seen": 437120832, + "step": 6535 + }, + { + "epoch": 0.7415602836879432, + "loss": 1.0461180210113525, + "loss_ce": 0.007543834857642651, + "loss_iou": 0.41015625, + "loss_num": 0.043701171875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 437120832, + "step": 6535 + }, + { + "epoch": 0.7416737588652482, + "grad_norm": 22.17603302001953, + "learning_rate": 5e-05, + "loss": 0.9874, + "num_input_tokens_seen": 437188300, + "step": 6536 + }, + { + "epoch": 0.7416737588652482, + "loss": 0.9718542695045471, + "loss_ce": 0.009940162301063538, + "loss_iou": 0.40234375, + "loss_num": 0.031494140625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 437188300, + "step": 6536 + }, + { + "epoch": 0.7417872340425532, + "grad_norm": 31.621313095092773, + "learning_rate": 5e-05, + "loss": 1.1595, + "num_input_tokens_seen": 437255880, + "step": 6537 + }, + { + "epoch": 0.7417872340425532, + "loss": 1.2324765920639038, + "loss_ce": 0.00493755005300045, + "loss_iou": 0.546875, + "loss_num": 0.0272216796875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 437255880, + "step": 6537 + }, + { + "epoch": 0.7419007092198582, + "grad_norm": 28.12327003479004, + "learning_rate": 5e-05, + "loss": 1.3289, + "num_input_tokens_seen": 437322904, + "step": 6538 + }, + { + "epoch": 0.7419007092198582, + "loss": 1.2223634719848633, + "loss_ce": 0.005078264977782965, + "loss_iou": 0.51953125, + "loss_num": 0.03564453125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 437322904, + "step": 6538 + }, + { + "epoch": 0.7420141843971632, + "grad_norm": 30.737533569335938, + "learning_rate": 5e-05, + "loss": 1.2168, + "num_input_tokens_seen": 437389220, + "step": 6539 + }, + { + "epoch": 0.7420141843971632, + "loss": 1.1568591594696045, + "loss_ce": 0.006468550302088261, + "loss_iou": 0.48046875, + "loss_num": 0.0380859375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 437389220, + "step": 6539 + }, + { + "epoch": 0.7421276595744681, + "grad_norm": 36.06966018676758, + "learning_rate": 5e-05, + "loss": 1.2943, + "num_input_tokens_seen": 437457188, + "step": 6540 + }, + { + "epoch": 0.7421276595744681, + "loss": 1.2359626293182373, + "loss_ce": 0.009400086477398872, + "loss_iou": 0.50390625, + "loss_num": 0.044189453125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 437457188, + "step": 6540 + }, + { + "epoch": 0.742241134751773, + "grad_norm": 35.95475769042969, + "learning_rate": 5e-05, + "loss": 0.933, + "num_input_tokens_seen": 437523560, + "step": 6541 + }, + { + "epoch": 0.742241134751773, + "loss": 0.9404191374778748, + "loss_ce": 0.008290223777294159, + "loss_iou": 0.41015625, + "loss_num": 0.022705078125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 437523560, + "step": 6541 + }, + { + "epoch": 0.742354609929078, + "grad_norm": 29.657455444335938, + "learning_rate": 5e-05, + "loss": 1.2333, + "num_input_tokens_seen": 437590560, + "step": 6542 + }, + { + "epoch": 0.742354609929078, + "loss": 1.0066946744918823, + "loss_ce": 0.005229875911027193, + "loss_iou": 0.462890625, + "loss_num": 0.0150146484375, + "loss_xval": 1.0, + "num_input_tokens_seen": 437590560, + "step": 6542 + }, + { + "epoch": 0.742468085106383, + "grad_norm": 22.637252807617188, + "learning_rate": 5e-05, + "loss": 1.0181, + "num_input_tokens_seen": 437656484, + "step": 6543 + }, + { + "epoch": 0.742468085106383, + "loss": 0.7407025098800659, + "loss_ce": 0.004130230285227299, + "loss_iou": 0.33203125, + "loss_num": 0.0145263671875, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 437656484, + "step": 6543 + }, + { + "epoch": 0.7425815602836879, + "grad_norm": 30.31768035888672, + "learning_rate": 5e-05, + "loss": 1.1023, + "num_input_tokens_seen": 437724212, + "step": 6544 + }, + { + "epoch": 0.7425815602836879, + "loss": 1.1939798593521118, + "loss_ce": 0.007944697514176369, + "loss_iou": 0.47265625, + "loss_num": 0.04833984375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 437724212, + "step": 6544 + }, + { + "epoch": 0.7426950354609929, + "grad_norm": 31.493072509765625, + "learning_rate": 5e-05, + "loss": 1.2703, + "num_input_tokens_seen": 437790300, + "step": 6545 + }, + { + "epoch": 0.7426950354609929, + "loss": 1.236810564994812, + "loss_ce": 0.004877022001892328, + "loss_iou": 0.515625, + "loss_num": 0.04052734375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 437790300, + "step": 6545 + }, + { + "epoch": 0.7428085106382979, + "grad_norm": 31.88843536376953, + "learning_rate": 5e-05, + "loss": 1.3143, + "num_input_tokens_seen": 437857900, + "step": 6546 + }, + { + "epoch": 0.7428085106382979, + "loss": 1.2127907276153564, + "loss_ce": 0.012595416978001595, + "loss_iou": 0.4609375, + "loss_num": 0.0556640625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 437857900, + "step": 6546 + }, + { + "epoch": 0.7429219858156029, + "grad_norm": 33.97758483886719, + "learning_rate": 5e-05, + "loss": 1.1994, + "num_input_tokens_seen": 437925248, + "step": 6547 + }, + { + "epoch": 0.7429219858156029, + "loss": 1.1468405723571777, + "loss_ce": 0.004750578664243221, + "loss_iou": 0.494140625, + "loss_num": 0.0303955078125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 437925248, + "step": 6547 + }, + { + "epoch": 0.7430354609929078, + "grad_norm": 27.284076690673828, + "learning_rate": 5e-05, + "loss": 1.1035, + "num_input_tokens_seen": 437992224, + "step": 6548 + }, + { + "epoch": 0.7430354609929078, + "loss": 1.1698274612426758, + "loss_ce": 0.0047884127125144005, + "loss_iou": 0.455078125, + "loss_num": 0.05078125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 437992224, + "step": 6548 + }, + { + "epoch": 0.7431489361702127, + "grad_norm": 24.11396598815918, + "learning_rate": 5e-05, + "loss": 1.3799, + "num_input_tokens_seen": 438059956, + "step": 6549 + }, + { + "epoch": 0.7431489361702127, + "loss": 1.3497658967971802, + "loss_ce": 0.007480769883841276, + "loss_iou": 0.53515625, + "loss_num": 0.054443359375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 438059956, + "step": 6549 + }, + { + "epoch": 0.7432624113475177, + "grad_norm": 23.080821990966797, + "learning_rate": 5e-05, + "loss": 1.2671, + "num_input_tokens_seen": 438126248, + "step": 6550 + }, + { + "epoch": 0.7432624113475177, + "loss": 1.310678482055664, + "loss_ce": 0.005014466587454081, + "loss_iou": 0.5078125, + "loss_num": 0.05810546875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 438126248, + "step": 6550 + }, + { + "epoch": 0.7433758865248227, + "grad_norm": 18.03363609313965, + "learning_rate": 5e-05, + "loss": 1.0296, + "num_input_tokens_seen": 438193164, + "step": 6551 + }, + { + "epoch": 0.7433758865248227, + "loss": 1.064736008644104, + "loss_ce": 0.006630551069974899, + "loss_iou": 0.4609375, + "loss_num": 0.0272216796875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 438193164, + "step": 6551 + }, + { + "epoch": 0.7434893617021276, + "grad_norm": 19.590293884277344, + "learning_rate": 5e-05, + "loss": 1.219, + "num_input_tokens_seen": 438259424, + "step": 6552 + }, + { + "epoch": 0.7434893617021276, + "loss": 1.1868419647216797, + "loss_ce": 0.011060741730034351, + "loss_iou": 0.4921875, + "loss_num": 0.037841796875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 438259424, + "step": 6552 + }, + { + "epoch": 0.7436028368794326, + "grad_norm": 26.944355010986328, + "learning_rate": 5e-05, + "loss": 1.3125, + "num_input_tokens_seen": 438327220, + "step": 6553 + }, + { + "epoch": 0.7436028368794326, + "loss": 1.504939079284668, + "loss_ce": 0.004450795240700245, + "loss_iou": 0.54296875, + "loss_num": 0.08349609375, + "loss_xval": 1.5, + "num_input_tokens_seen": 438327220, + "step": 6553 + }, + { + "epoch": 0.7437163120567376, + "grad_norm": 32.265811920166016, + "learning_rate": 5e-05, + "loss": 1.3065, + "num_input_tokens_seen": 438394024, + "step": 6554 + }, + { + "epoch": 0.7437163120567376, + "loss": 1.2740364074707031, + "loss_ce": 0.006458223331719637, + "loss_iou": 0.51953125, + "loss_num": 0.0458984375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 438394024, + "step": 6554 + }, + { + "epoch": 0.7438297872340426, + "grad_norm": 33.574214935302734, + "learning_rate": 5e-05, + "loss": 1.1235, + "num_input_tokens_seen": 438461432, + "step": 6555 + }, + { + "epoch": 0.7438297872340426, + "loss": 1.1347715854644775, + "loss_ce": 0.008794937282800674, + "loss_iou": 0.47265625, + "loss_num": 0.036376953125, + "loss_xval": 1.125, + "num_input_tokens_seen": 438461432, + "step": 6555 + }, + { + "epoch": 0.7439432624113476, + "grad_norm": 35.7299919128418, + "learning_rate": 5e-05, + "loss": 1.1631, + "num_input_tokens_seen": 438529328, + "step": 6556 + }, + { + "epoch": 0.7439432624113476, + "loss": 1.237851858139038, + "loss_ce": 0.005918152630329132, + "loss_iou": 0.515625, + "loss_num": 0.0400390625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 438529328, + "step": 6556 + }, + { + "epoch": 0.7440567375886524, + "grad_norm": 41.606510162353516, + "learning_rate": 5e-05, + "loss": 1.1957, + "num_input_tokens_seen": 438596752, + "step": 6557 + }, + { + "epoch": 0.7440567375886524, + "loss": 1.0550909042358398, + "loss_ce": 0.006751019973307848, + "loss_iou": 0.4296875, + "loss_num": 0.038330078125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 438596752, + "step": 6557 + }, + { + "epoch": 0.7441702127659574, + "grad_norm": 34.97700500488281, + "learning_rate": 5e-05, + "loss": 1.2232, + "num_input_tokens_seen": 438663384, + "step": 6558 + }, + { + "epoch": 0.7441702127659574, + "loss": 1.336807370185852, + "loss_ce": 0.008194156922399998, + "loss_iou": 0.5625, + "loss_num": 0.040771484375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 438663384, + "step": 6558 + }, + { + "epoch": 0.7442836879432624, + "grad_norm": 36.73731231689453, + "learning_rate": 5e-05, + "loss": 1.1045, + "num_input_tokens_seen": 438730324, + "step": 6559 + }, + { + "epoch": 0.7442836879432624, + "loss": 1.2112494707107544, + "loss_ce": 0.007636186666786671, + "loss_iou": 0.4921875, + "loss_num": 0.044189453125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 438730324, + "step": 6559 + }, + { + "epoch": 0.7443971631205674, + "grad_norm": 17.55241584777832, + "learning_rate": 5e-05, + "loss": 1.1368, + "num_input_tokens_seen": 438797232, + "step": 6560 + }, + { + "epoch": 0.7443971631205674, + "loss": 1.226244330406189, + "loss_ce": 0.005052902735769749, + "loss_iou": 0.5078125, + "loss_num": 0.040771484375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 438797232, + "step": 6560 + }, + { + "epoch": 0.7445106382978723, + "grad_norm": 36.390594482421875, + "learning_rate": 5e-05, + "loss": 1.2017, + "num_input_tokens_seen": 438863948, + "step": 6561 + }, + { + "epoch": 0.7445106382978723, + "loss": 1.1755149364471436, + "loss_ce": 0.006081259809434414, + "loss_iou": 0.45703125, + "loss_num": 0.05126953125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 438863948, + "step": 6561 + }, + { + "epoch": 0.7446241134751773, + "grad_norm": 27.958438873291016, + "learning_rate": 5e-05, + "loss": 1.459, + "num_input_tokens_seen": 438932176, + "step": 6562 + }, + { + "epoch": 0.7446241134751773, + "loss": 1.5478334426879883, + "loss_ce": 0.008770878426730633, + "loss_iou": 0.58203125, + "loss_num": 0.07568359375, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 438932176, + "step": 6562 + }, + { + "epoch": 0.7447375886524823, + "grad_norm": 14.111446380615234, + "learning_rate": 5e-05, + "loss": 0.9272, + "num_input_tokens_seen": 438998016, + "step": 6563 + }, + { + "epoch": 0.7447375886524823, + "loss": 0.9975356459617615, + "loss_ce": 0.0041274442337453365, + "loss_iou": 0.4140625, + "loss_num": 0.033203125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 438998016, + "step": 6563 + }, + { + "epoch": 0.7448510638297873, + "grad_norm": 46.5938720703125, + "learning_rate": 5e-05, + "loss": 1.2744, + "num_input_tokens_seen": 439065068, + "step": 6564 + }, + { + "epoch": 0.7448510638297873, + "loss": 1.1460727453231812, + "loss_ce": 0.010818824172019958, + "loss_iou": 0.46875, + "loss_num": 0.0400390625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 439065068, + "step": 6564 + }, + { + "epoch": 0.7449645390070923, + "grad_norm": 27.6741886138916, + "learning_rate": 5e-05, + "loss": 1.1438, + "num_input_tokens_seen": 439130736, + "step": 6565 + }, + { + "epoch": 0.7449645390070923, + "loss": 1.4487390518188477, + "loss_ce": 0.006356225814670324, + "loss_iou": 0.59375, + "loss_num": 0.051025390625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 439130736, + "step": 6565 + }, + { + "epoch": 0.7450780141843971, + "grad_norm": 38.64679718017578, + "learning_rate": 5e-05, + "loss": 1.0687, + "num_input_tokens_seen": 439198168, + "step": 6566 + }, + { + "epoch": 0.7450780141843971, + "loss": 1.1443469524383545, + "loss_ce": 0.006163435056805611, + "loss_iou": 0.44140625, + "loss_num": 0.05078125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 439198168, + "step": 6566 + }, + { + "epoch": 0.7451914893617021, + "grad_norm": 29.56553840637207, + "learning_rate": 5e-05, + "loss": 1.2659, + "num_input_tokens_seen": 439265144, + "step": 6567 + }, + { + "epoch": 0.7451914893617021, + "loss": 1.0920113325119019, + "loss_ce": 0.0026558588724583387, + "loss_iou": 0.44921875, + "loss_num": 0.038330078125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 439265144, + "step": 6567 + }, + { + "epoch": 0.7453049645390071, + "grad_norm": 24.168506622314453, + "learning_rate": 5e-05, + "loss": 1.0354, + "num_input_tokens_seen": 439332092, + "step": 6568 + }, + { + "epoch": 0.7453049645390071, + "loss": 1.0197453498840332, + "loss_ce": 0.006073486525565386, + "loss_iou": 0.4296875, + "loss_num": 0.030517578125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 439332092, + "step": 6568 + }, + { + "epoch": 0.745418439716312, + "grad_norm": 34.77493667602539, + "learning_rate": 5e-05, + "loss": 1.1474, + "num_input_tokens_seen": 439399968, + "step": 6569 + }, + { + "epoch": 0.745418439716312, + "loss": 1.083505630493164, + "loss_ce": 0.006357185542583466, + "loss_iou": 0.46484375, + "loss_num": 0.0302734375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 439399968, + "step": 6569 + }, + { + "epoch": 0.745531914893617, + "grad_norm": 80.38280487060547, + "learning_rate": 5e-05, + "loss": 1.4807, + "num_input_tokens_seen": 439466856, + "step": 6570 + }, + { + "epoch": 0.745531914893617, + "loss": 1.4612531661987305, + "loss_ce": 0.008128169924020767, + "loss_iou": 0.59765625, + "loss_num": 0.0517578125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 439466856, + "step": 6570 + }, + { + "epoch": 0.745645390070922, + "grad_norm": 16.336851119995117, + "learning_rate": 5e-05, + "loss": 1.0675, + "num_input_tokens_seen": 439533352, + "step": 6571 + }, + { + "epoch": 0.745645390070922, + "loss": 1.091285228729248, + "loss_ce": 0.0073008667677640915, + "loss_iou": 0.48046875, + "loss_num": 0.024169921875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 439533352, + "step": 6571 + }, + { + "epoch": 0.745758865248227, + "grad_norm": 19.039762496948242, + "learning_rate": 5e-05, + "loss": 1.1131, + "num_input_tokens_seen": 439600276, + "step": 6572 + }, + { + "epoch": 0.745758865248227, + "loss": 1.0732500553131104, + "loss_ce": 0.005378998350352049, + "loss_iou": 0.4609375, + "loss_num": 0.029052734375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 439600276, + "step": 6572 + }, + { + "epoch": 0.745872340425532, + "grad_norm": 19.79744529724121, + "learning_rate": 5e-05, + "loss": 1.3716, + "num_input_tokens_seen": 439667488, + "step": 6573 + }, + { + "epoch": 0.745872340425532, + "loss": 1.3386942148208618, + "loss_ce": 0.0061746626161038876, + "loss_iou": 0.5859375, + "loss_num": 0.032958984375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 439667488, + "step": 6573 + }, + { + "epoch": 0.7459858156028368, + "grad_norm": 21.48115348815918, + "learning_rate": 5e-05, + "loss": 1.074, + "num_input_tokens_seen": 439734840, + "step": 6574 + }, + { + "epoch": 0.7459858156028368, + "loss": 1.1877522468566895, + "loss_ce": 0.011482657864689827, + "loss_iou": 0.50390625, + "loss_num": 0.033447265625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 439734840, + "step": 6574 + }, + { + "epoch": 0.7460992907801418, + "grad_norm": 35.780364990234375, + "learning_rate": 5e-05, + "loss": 1.2458, + "num_input_tokens_seen": 439801552, + "step": 6575 + }, + { + "epoch": 0.7460992907801418, + "loss": 1.2491122484207153, + "loss_ce": 0.007901293225586414, + "loss_iou": 0.51953125, + "loss_num": 0.041259765625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 439801552, + "step": 6575 + }, + { + "epoch": 0.7462127659574468, + "grad_norm": 38.14342498779297, + "learning_rate": 5e-05, + "loss": 1.329, + "num_input_tokens_seen": 439869728, + "step": 6576 + }, + { + "epoch": 0.7462127659574468, + "loss": 1.2813327312469482, + "loss_ce": 0.005453860852867365, + "loss_iou": 0.51171875, + "loss_num": 0.05029296875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 439869728, + "step": 6576 + }, + { + "epoch": 0.7463262411347518, + "grad_norm": 49.46323776245117, + "learning_rate": 5e-05, + "loss": 1.4567, + "num_input_tokens_seen": 439935720, + "step": 6577 + }, + { + "epoch": 0.7463262411347518, + "loss": 1.545712947845459, + "loss_ce": 0.0066504571586847305, + "loss_iou": 0.640625, + "loss_num": 0.051025390625, + "loss_xval": 1.5390625, + "num_input_tokens_seen": 439935720, + "step": 6577 + }, + { + "epoch": 0.7464397163120567, + "grad_norm": 39.13529968261719, + "learning_rate": 5e-05, + "loss": 1.3321, + "num_input_tokens_seen": 440002420, + "step": 6578 + }, + { + "epoch": 0.7464397163120567, + "loss": 1.3612537384033203, + "loss_ce": 0.010179468430578709, + "loss_iou": 0.52734375, + "loss_num": 0.0595703125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 440002420, + "step": 6578 + }, + { + "epoch": 0.7465531914893617, + "grad_norm": 15.515563011169434, + "learning_rate": 5e-05, + "loss": 1.3414, + "num_input_tokens_seen": 440068880, + "step": 6579 + }, + { + "epoch": 0.7465531914893617, + "loss": 1.3755168914794922, + "loss_ce": 0.004911421798169613, + "loss_iou": 0.546875, + "loss_num": 0.055908203125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 440068880, + "step": 6579 + }, + { + "epoch": 0.7466666666666667, + "grad_norm": 109.54253387451172, + "learning_rate": 5e-05, + "loss": 1.1205, + "num_input_tokens_seen": 440135680, + "step": 6580 + }, + { + "epoch": 0.7466666666666667, + "loss": 1.2984933853149414, + "loss_ce": 0.005524659529328346, + "loss_iou": 0.48828125, + "loss_num": 0.0634765625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 440135680, + "step": 6580 + }, + { + "epoch": 0.7467801418439717, + "grad_norm": 29.844751358032227, + "learning_rate": 5e-05, + "loss": 1.1478, + "num_input_tokens_seen": 440202304, + "step": 6581 + }, + { + "epoch": 0.7467801418439717, + "loss": 1.0872374773025513, + "loss_ce": 0.005206199362874031, + "loss_iou": 0.478515625, + "loss_num": 0.025146484375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 440202304, + "step": 6581 + }, + { + "epoch": 0.7468936170212765, + "grad_norm": 23.116683959960938, + "learning_rate": 5e-05, + "loss": 1.1113, + "num_input_tokens_seen": 440269240, + "step": 6582 + }, + { + "epoch": 0.7468936170212765, + "loss": 1.1833055019378662, + "loss_ce": 0.007524291984736919, + "loss_iou": 0.49609375, + "loss_num": 0.036865234375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 440269240, + "step": 6582 + }, + { + "epoch": 0.7470070921985815, + "grad_norm": 15.37598705291748, + "learning_rate": 5e-05, + "loss": 1.188, + "num_input_tokens_seen": 440336116, + "step": 6583 + }, + { + "epoch": 0.7470070921985815, + "loss": 1.418188452720642, + "loss_ce": 0.00998539850115776, + "loss_iou": 0.5390625, + "loss_num": 0.0654296875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 440336116, + "step": 6583 + }, + { + "epoch": 0.7471205673758865, + "grad_norm": 32.895545959472656, + "learning_rate": 5e-05, + "loss": 1.1175, + "num_input_tokens_seen": 440402808, + "step": 6584 + }, + { + "epoch": 0.7471205673758865, + "loss": 1.0450619459152222, + "loss_ce": 0.009417378343641758, + "loss_iou": 0.3984375, + "loss_num": 0.048095703125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 440402808, + "step": 6584 + }, + { + "epoch": 0.7472340425531915, + "grad_norm": 18.08452033996582, + "learning_rate": 5e-05, + "loss": 1.2462, + "num_input_tokens_seen": 440469392, + "step": 6585 + }, + { + "epoch": 0.7472340425531915, + "loss": 1.29411780834198, + "loss_ce": 0.0052994368597865105, + "loss_iou": 0.51171875, + "loss_num": 0.052978515625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 440469392, + "step": 6585 + }, + { + "epoch": 0.7473475177304965, + "grad_norm": 25.623533248901367, + "learning_rate": 5e-05, + "loss": 1.16, + "num_input_tokens_seen": 440536468, + "step": 6586 + }, + { + "epoch": 0.7473475177304965, + "loss": 1.135622262954712, + "loss_ce": 0.009157403372228146, + "loss_iou": 0.44921875, + "loss_num": 0.0458984375, + "loss_xval": 1.125, + "num_input_tokens_seen": 440536468, + "step": 6586 + }, + { + "epoch": 0.7474609929078014, + "grad_norm": 22.55150604248047, + "learning_rate": 5e-05, + "loss": 0.9996, + "num_input_tokens_seen": 440603684, + "step": 6587 + }, + { + "epoch": 0.7474609929078014, + "loss": 0.9697716236114502, + "loss_ce": 0.006880969740450382, + "loss_iou": 0.40625, + "loss_num": 0.02978515625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 440603684, + "step": 6587 + }, + { + "epoch": 0.7475744680851064, + "grad_norm": 23.06641960144043, + "learning_rate": 5e-05, + "loss": 0.9926, + "num_input_tokens_seen": 440669548, + "step": 6588 + }, + { + "epoch": 0.7475744680851064, + "loss": 0.9703456163406372, + "loss_ce": 0.004281169269233942, + "loss_iou": 0.3828125, + "loss_num": 0.040283203125, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 440669548, + "step": 6588 + }, + { + "epoch": 0.7476879432624114, + "grad_norm": 24.379667282104492, + "learning_rate": 5e-05, + "loss": 1.0984, + "num_input_tokens_seen": 440736748, + "step": 6589 + }, + { + "epoch": 0.7476879432624114, + "loss": 1.0392348766326904, + "loss_ce": 0.005787694361060858, + "loss_iou": 0.40625, + "loss_num": 0.04443359375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 440736748, + "step": 6589 + }, + { + "epoch": 0.7478014184397163, + "grad_norm": 33.50723648071289, + "learning_rate": 5e-05, + "loss": 1.333, + "num_input_tokens_seen": 440803760, + "step": 6590 + }, + { + "epoch": 0.7478014184397163, + "loss": 1.2170970439910889, + "loss_ce": 0.008356835693120956, + "loss_iou": 0.484375, + "loss_num": 0.048095703125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 440803760, + "step": 6590 + }, + { + "epoch": 0.7479148936170212, + "grad_norm": 30.986074447631836, + "learning_rate": 5e-05, + "loss": 1.2402, + "num_input_tokens_seen": 440870900, + "step": 6591 + }, + { + "epoch": 0.7479148936170212, + "loss": 1.3210687637329102, + "loss_ce": 0.00905705988407135, + "loss_iou": 0.5546875, + "loss_num": 0.0400390625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 440870900, + "step": 6591 + }, + { + "epoch": 0.7480283687943262, + "grad_norm": 38.7654914855957, + "learning_rate": 5e-05, + "loss": 0.98, + "num_input_tokens_seen": 440936948, + "step": 6592 + }, + { + "epoch": 0.7480283687943262, + "loss": 0.995089054107666, + "loss_ce": 0.007296147756278515, + "loss_iou": 0.404296875, + "loss_num": 0.03564453125, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 440936948, + "step": 6592 + }, + { + "epoch": 0.7481418439716312, + "grad_norm": 19.48436164855957, + "learning_rate": 5e-05, + "loss": 1.1637, + "num_input_tokens_seen": 441003680, + "step": 6593 + }, + { + "epoch": 0.7481418439716312, + "loss": 1.0770647525787354, + "loss_ce": 0.011879312805831432, + "loss_iou": 0.42578125, + "loss_num": 0.04248046875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 441003680, + "step": 6593 + }, + { + "epoch": 0.7482553191489362, + "grad_norm": 21.890073776245117, + "learning_rate": 5e-05, + "loss": 1.1404, + "num_input_tokens_seen": 441070168, + "step": 6594 + }, + { + "epoch": 0.7482553191489362, + "loss": 1.154009222984314, + "loss_ce": 0.009966246783733368, + "loss_iou": 0.4921875, + "loss_num": 0.0322265625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 441070168, + "step": 6594 + }, + { + "epoch": 0.7483687943262411, + "grad_norm": 30.011409759521484, + "learning_rate": 5e-05, + "loss": 1.0488, + "num_input_tokens_seen": 441136696, + "step": 6595 + }, + { + "epoch": 0.7483687943262411, + "loss": 0.977973461151123, + "loss_ce": 0.010596692562103271, + "loss_iou": 0.38671875, + "loss_num": 0.0390625, + "loss_xval": 0.96875, + "num_input_tokens_seen": 441136696, + "step": 6595 + }, + { + "epoch": 0.7484822695035461, + "grad_norm": 47.37881088256836, + "learning_rate": 5e-05, + "loss": 1.0687, + "num_input_tokens_seen": 441202776, + "step": 6596 + }, + { + "epoch": 0.7484822695035461, + "loss": 0.8583790063858032, + "loss_ce": 0.003886887803673744, + "loss_iou": 0.3515625, + "loss_num": 0.0306396484375, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 441202776, + "step": 6596 + }, + { + "epoch": 0.7485957446808511, + "grad_norm": 33.07606506347656, + "learning_rate": 5e-05, + "loss": 1.3859, + "num_input_tokens_seen": 441270536, + "step": 6597 + }, + { + "epoch": 0.7485957446808511, + "loss": 1.177319049835205, + "loss_ce": 0.0076413811184465885, + "loss_iou": 0.470703125, + "loss_num": 0.0458984375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 441270536, + "step": 6597 + }, + { + "epoch": 0.7487092198581561, + "grad_norm": 89.7479019165039, + "learning_rate": 5e-05, + "loss": 1.1446, + "num_input_tokens_seen": 441336968, + "step": 6598 + }, + { + "epoch": 0.7487092198581561, + "loss": 0.9541171193122864, + "loss_ce": 0.008316336199641228, + "loss_iou": 0.376953125, + "loss_num": 0.038330078125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 441336968, + "step": 6598 + }, + { + "epoch": 0.748822695035461, + "grad_norm": 26.3861141204834, + "learning_rate": 5e-05, + "loss": 1.1804, + "num_input_tokens_seen": 441403084, + "step": 6599 + }, + { + "epoch": 0.748822695035461, + "loss": 1.2567236423492432, + "loss_ce": 0.002329118549823761, + "loss_iou": 0.51171875, + "loss_num": 0.046142578125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 441403084, + "step": 6599 + }, + { + "epoch": 0.7489361702127659, + "grad_norm": 36.286224365234375, + "learning_rate": 5e-05, + "loss": 1.2108, + "num_input_tokens_seen": 441470180, + "step": 6600 + }, + { + "epoch": 0.7489361702127659, + "loss": 1.1142104864120483, + "loss_ce": 0.0058120256289839745, + "loss_iou": 0.451171875, + "loss_num": 0.04150390625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 441470180, + "step": 6600 + }, + { + "epoch": 0.7490496453900709, + "grad_norm": 37.05250549316406, + "learning_rate": 5e-05, + "loss": 1.3277, + "num_input_tokens_seen": 441537640, + "step": 6601 + }, + { + "epoch": 0.7490496453900709, + "loss": 1.2161566019058228, + "loss_ce": 0.006195696070790291, + "loss_iou": 0.52734375, + "loss_num": 0.031005859375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 441537640, + "step": 6601 + }, + { + "epoch": 0.7491631205673759, + "grad_norm": 47.17432403564453, + "learning_rate": 5e-05, + "loss": 1.2697, + "num_input_tokens_seen": 441604576, + "step": 6602 + }, + { + "epoch": 0.7491631205673759, + "loss": 1.300499677658081, + "loss_ce": 0.00861425418406725, + "loss_iou": 0.50390625, + "loss_num": 0.056396484375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 441604576, + "step": 6602 + }, + { + "epoch": 0.7492765957446809, + "grad_norm": 79.35326385498047, + "learning_rate": 5e-05, + "loss": 1.4744, + "num_input_tokens_seen": 441670584, + "step": 6603 + }, + { + "epoch": 0.7492765957446809, + "loss": 1.5811471939086914, + "loss_ce": 0.00888163410127163, + "loss_iou": 0.6328125, + "loss_num": 0.060546875, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 441670584, + "step": 6603 + }, + { + "epoch": 0.7493900709219858, + "grad_norm": 16.211450576782227, + "learning_rate": 5e-05, + "loss": 0.9977, + "num_input_tokens_seen": 441737564, + "step": 6604 + }, + { + "epoch": 0.7493900709219858, + "loss": 0.9939229488372803, + "loss_ce": 0.00661825854331255, + "loss_iou": 0.412109375, + "loss_num": 0.03271484375, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 441737564, + "step": 6604 + }, + { + "epoch": 0.7495035460992908, + "grad_norm": 30.45174789428711, + "learning_rate": 5e-05, + "loss": 1.3681, + "num_input_tokens_seen": 441804052, + "step": 6605 + }, + { + "epoch": 0.7495035460992908, + "loss": 1.2943484783172607, + "loss_ce": 0.009192228317260742, + "loss_iou": 0.51171875, + "loss_num": 0.051513671875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 441804052, + "step": 6605 + }, + { + "epoch": 0.7496170212765958, + "grad_norm": 30.76072120666504, + "learning_rate": 5e-05, + "loss": 1.3139, + "num_input_tokens_seen": 441869900, + "step": 6606 + }, + { + "epoch": 0.7496170212765958, + "loss": 1.271390676498413, + "loss_ce": 0.008695412427186966, + "loss_iou": 0.54296875, + "loss_num": 0.0361328125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 441869900, + "step": 6606 + }, + { + "epoch": 0.7497304964539007, + "grad_norm": 54.23448944091797, + "learning_rate": 5e-05, + "loss": 1.2635, + "num_input_tokens_seen": 441936376, + "step": 6607 + }, + { + "epoch": 0.7497304964539007, + "loss": 1.4110968112945557, + "loss_ce": 0.00875310692936182, + "loss_iou": 0.57421875, + "loss_num": 0.05078125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 441936376, + "step": 6607 + }, + { + "epoch": 0.7498439716312056, + "grad_norm": 28.490097045898438, + "learning_rate": 5e-05, + "loss": 1.378, + "num_input_tokens_seen": 442002524, + "step": 6608 + }, + { + "epoch": 0.7498439716312056, + "loss": 1.199549913406372, + "loss_ce": 0.0032608569599688053, + "loss_iou": 0.4609375, + "loss_num": 0.054443359375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 442002524, + "step": 6608 + }, + { + "epoch": 0.7499574468085106, + "grad_norm": 9.585573196411133, + "learning_rate": 5e-05, + "loss": 0.9417, + "num_input_tokens_seen": 442069348, + "step": 6609 + }, + { + "epoch": 0.7499574468085106, + "loss": 0.8861391544342041, + "loss_ce": 0.0038149843458086252, + "loss_iou": 0.384765625, + "loss_num": 0.0223388671875, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 442069348, + "step": 6609 + }, + { + "epoch": 0.7500709219858156, + "grad_norm": 13.158976554870605, + "learning_rate": 5e-05, + "loss": 0.9058, + "num_input_tokens_seen": 442136668, + "step": 6610 + }, + { + "epoch": 0.7500709219858156, + "loss": 1.043200135231018, + "loss_ce": 0.009508728981018066, + "loss_iou": 0.419921875, + "loss_num": 0.0390625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 442136668, + "step": 6610 + }, + { + "epoch": 0.7501843971631206, + "grad_norm": 35.62871170043945, + "learning_rate": 5e-05, + "loss": 1.0412, + "num_input_tokens_seen": 442203356, + "step": 6611 + }, + { + "epoch": 0.7501843971631206, + "loss": 1.0137996673583984, + "loss_ce": 0.004522374831140041, + "loss_iou": 0.45703125, + "loss_num": 0.0186767578125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 442203356, + "step": 6611 + }, + { + "epoch": 0.7502978723404256, + "grad_norm": 13.596041679382324, + "learning_rate": 5e-05, + "loss": 1.1285, + "num_input_tokens_seen": 442270336, + "step": 6612 + }, + { + "epoch": 0.7502978723404256, + "loss": 1.0936732292175293, + "loss_ce": 0.005294371396303177, + "loss_iou": 0.439453125, + "loss_num": 0.0419921875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 442270336, + "step": 6612 + }, + { + "epoch": 0.7504113475177305, + "grad_norm": 22.5541934967041, + "learning_rate": 5e-05, + "loss": 1.126, + "num_input_tokens_seen": 442339108, + "step": 6613 + }, + { + "epoch": 0.7504113475177305, + "loss": 1.0749146938323975, + "loss_ce": 0.004602222237735987, + "loss_iou": 0.453125, + "loss_num": 0.032470703125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 442339108, + "step": 6613 + }, + { + "epoch": 0.7505248226950355, + "grad_norm": 42.737281799316406, + "learning_rate": 5e-05, + "loss": 1.3574, + "num_input_tokens_seen": 442406172, + "step": 6614 + }, + { + "epoch": 0.7505248226950355, + "loss": 1.4334123134613037, + "loss_ce": 0.010560693219304085, + "loss_iou": 0.55859375, + "loss_num": 0.060546875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 442406172, + "step": 6614 + }, + { + "epoch": 0.7506382978723404, + "grad_norm": 48.5746955871582, + "learning_rate": 5e-05, + "loss": 1.3985, + "num_input_tokens_seen": 442473476, + "step": 6615 + }, + { + "epoch": 0.7506382978723404, + "loss": 1.312177300453186, + "loss_ce": 0.006513272412121296, + "loss_iou": 0.53125, + "loss_num": 0.048583984375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 442473476, + "step": 6615 + }, + { + "epoch": 0.7507517730496454, + "grad_norm": 25.471046447753906, + "learning_rate": 5e-05, + "loss": 1.4052, + "num_input_tokens_seen": 442538472, + "step": 6616 + }, + { + "epoch": 0.7507517730496454, + "loss": 1.6351324319839478, + "loss_ce": 0.008179277181625366, + "loss_iou": 0.69140625, + "loss_num": 0.04833984375, + "loss_xval": 1.625, + "num_input_tokens_seen": 442538472, + "step": 6616 + }, + { + "epoch": 0.7508652482269503, + "grad_norm": 21.854093551635742, + "learning_rate": 5e-05, + "loss": 1.3589, + "num_input_tokens_seen": 442604728, + "step": 6617 + }, + { + "epoch": 0.7508652482269503, + "loss": 1.1533795595169067, + "loss_ce": 0.007383424788713455, + "loss_iou": 0.451171875, + "loss_num": 0.048583984375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 442604728, + "step": 6617 + }, + { + "epoch": 0.7509787234042553, + "grad_norm": 33.33598327636719, + "learning_rate": 5e-05, + "loss": 1.2937, + "num_input_tokens_seen": 442670608, + "step": 6618 + }, + { + "epoch": 0.7509787234042553, + "loss": 1.3887324333190918, + "loss_ce": 0.006408178247511387, + "loss_iou": 0.55859375, + "loss_num": 0.053466796875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 442670608, + "step": 6618 + }, + { + "epoch": 0.7510921985815603, + "grad_norm": 41.72659683227539, + "learning_rate": 5e-05, + "loss": 1.4843, + "num_input_tokens_seen": 442738056, + "step": 6619 + }, + { + "epoch": 0.7510921985815603, + "loss": 1.3901965618133545, + "loss_ce": 0.007384058088064194, + "loss_iou": 0.55078125, + "loss_num": 0.056396484375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 442738056, + "step": 6619 + }, + { + "epoch": 0.7512056737588653, + "grad_norm": 59.44675064086914, + "learning_rate": 5e-05, + "loss": 1.1151, + "num_input_tokens_seen": 442805448, + "step": 6620 + }, + { + "epoch": 0.7512056737588653, + "loss": 1.1126846075057983, + "loss_ce": 0.006483382545411587, + "loss_iou": 0.44921875, + "loss_num": 0.041259765625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 442805448, + "step": 6620 + }, + { + "epoch": 0.7513191489361702, + "grad_norm": 24.226808547973633, + "learning_rate": 5e-05, + "loss": 1.123, + "num_input_tokens_seen": 442871736, + "step": 6621 + }, + { + "epoch": 0.7513191489361702, + "loss": 1.034407377243042, + "loss_ce": 0.007063521072268486, + "loss_iou": 0.412109375, + "loss_num": 0.04052734375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 442871736, + "step": 6621 + }, + { + "epoch": 0.7514326241134752, + "grad_norm": 35.08103942871094, + "learning_rate": 5e-05, + "loss": 1.1583, + "num_input_tokens_seen": 442938676, + "step": 6622 + }, + { + "epoch": 0.7514326241134752, + "loss": 1.1092318296432495, + "loss_ce": 0.007669335231184959, + "loss_iou": 0.44921875, + "loss_num": 0.040771484375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 442938676, + "step": 6622 + }, + { + "epoch": 0.7515460992907801, + "grad_norm": 27.777090072631836, + "learning_rate": 5e-05, + "loss": 1.085, + "num_input_tokens_seen": 443005848, + "step": 6623 + }, + { + "epoch": 0.7515460992907801, + "loss": 0.9567105770111084, + "loss_ce": 0.0037076084408909082, + "loss_iou": 0.4296875, + "loss_num": 0.018798828125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 443005848, + "step": 6623 + }, + { + "epoch": 0.7516595744680851, + "grad_norm": 31.28456687927246, + "learning_rate": 5e-05, + "loss": 1.2655, + "num_input_tokens_seen": 443073520, + "step": 6624 + }, + { + "epoch": 0.7516595744680851, + "loss": 1.3539310693740845, + "loss_ce": 0.008716173470020294, + "loss_iou": 0.5234375, + "loss_num": 0.060302734375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 443073520, + "step": 6624 + }, + { + "epoch": 0.75177304964539, + "grad_norm": 33.256500244140625, + "learning_rate": 5e-05, + "loss": 1.3204, + "num_input_tokens_seen": 443141276, + "step": 6625 + }, + { + "epoch": 0.75177304964539, + "loss": 1.42866849899292, + "loss_ce": 0.00825828593224287, + "loss_iou": 0.546875, + "loss_num": 0.0654296875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 443141276, + "step": 6625 + }, + { + "epoch": 0.751886524822695, + "grad_norm": 30.666587829589844, + "learning_rate": 5e-05, + "loss": 1.1436, + "num_input_tokens_seen": 443208344, + "step": 6626 + }, + { + "epoch": 0.751886524822695, + "loss": 0.9830593466758728, + "loss_ce": 0.004543670918792486, + "loss_iou": 0.419921875, + "loss_num": 0.0279541015625, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 443208344, + "step": 6626 + }, + { + "epoch": 0.752, + "grad_norm": 24.773324966430664, + "learning_rate": 5e-05, + "loss": 1.1571, + "num_input_tokens_seen": 443276044, + "step": 6627 + }, + { + "epoch": 0.752, + "loss": 1.0617899894714355, + "loss_ce": 0.0066142091527581215, + "loss_iou": 0.478515625, + "loss_num": 0.0198974609375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 443276044, + "step": 6627 + }, + { + "epoch": 0.752113475177305, + "grad_norm": 21.701383590698242, + "learning_rate": 5e-05, + "loss": 0.9538, + "num_input_tokens_seen": 443342112, + "step": 6628 + }, + { + "epoch": 0.752113475177305, + "loss": 0.9602630138397217, + "loss_ce": 0.0054289596155285835, + "loss_iou": 0.41015625, + "loss_num": 0.026611328125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 443342112, + "step": 6628 + }, + { + "epoch": 0.75222695035461, + "grad_norm": 21.44999885559082, + "learning_rate": 5e-05, + "loss": 1.2911, + "num_input_tokens_seen": 443409776, + "step": 6629 + }, + { + "epoch": 0.75222695035461, + "loss": 1.3248244524002075, + "loss_ce": 0.008418193086981773, + "loss_iou": 0.51171875, + "loss_num": 0.058837890625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 443409776, + "step": 6629 + }, + { + "epoch": 0.7523404255319149, + "grad_norm": 13.656866073608398, + "learning_rate": 5e-05, + "loss": 1.1904, + "num_input_tokens_seen": 443477676, + "step": 6630 + }, + { + "epoch": 0.7523404255319149, + "loss": 1.2143833637237549, + "loss_ce": 0.011258319951593876, + "loss_iou": 0.474609375, + "loss_num": 0.050537109375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 443477676, + "step": 6630 + }, + { + "epoch": 0.7524539007092198, + "grad_norm": 24.282339096069336, + "learning_rate": 5e-05, + "loss": 1.5109, + "num_input_tokens_seen": 443544616, + "step": 6631 + }, + { + "epoch": 0.7524539007092198, + "loss": 1.3080735206604004, + "loss_ce": 0.0028977207839488983, + "loss_iou": 0.515625, + "loss_num": 0.054443359375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 443544616, + "step": 6631 + }, + { + "epoch": 0.7525673758865248, + "grad_norm": 28.82913589477539, + "learning_rate": 5e-05, + "loss": 1.1211, + "num_input_tokens_seen": 443611516, + "step": 6632 + }, + { + "epoch": 0.7525673758865248, + "loss": 1.1410707235336304, + "loss_ce": 0.0031313090585172176, + "loss_iou": 0.474609375, + "loss_num": 0.037841796875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 443611516, + "step": 6632 + }, + { + "epoch": 0.7526808510638298, + "grad_norm": 20.56412124633789, + "learning_rate": 5e-05, + "loss": 1.0342, + "num_input_tokens_seen": 443677684, + "step": 6633 + }, + { + "epoch": 0.7526808510638298, + "loss": 1.050370693206787, + "loss_ce": 0.005448802839964628, + "loss_iou": 0.435546875, + "loss_num": 0.034912109375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 443677684, + "step": 6633 + }, + { + "epoch": 0.7527943262411347, + "grad_norm": 26.253501892089844, + "learning_rate": 5e-05, + "loss": 0.9379, + "num_input_tokens_seen": 443744196, + "step": 6634 + }, + { + "epoch": 0.7527943262411347, + "loss": 0.9983024597167969, + "loss_ce": 0.008068075403571129, + "loss_iou": 0.404296875, + "loss_num": 0.036376953125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 443744196, + "step": 6634 + }, + { + "epoch": 0.7529078014184397, + "grad_norm": 52.8765869140625, + "learning_rate": 5e-05, + "loss": 1.3034, + "num_input_tokens_seen": 443810580, + "step": 6635 + }, + { + "epoch": 0.7529078014184397, + "loss": 1.1791305541992188, + "loss_ce": 0.006523223593831062, + "loss_iou": 0.458984375, + "loss_num": 0.051025390625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 443810580, + "step": 6635 + }, + { + "epoch": 0.7530212765957447, + "grad_norm": 35.65728759765625, + "learning_rate": 5e-05, + "loss": 1.5341, + "num_input_tokens_seen": 443877624, + "step": 6636 + }, + { + "epoch": 0.7530212765957447, + "loss": 1.603514552116394, + "loss_ce": 0.005858351476490498, + "loss_iou": 0.6484375, + "loss_num": 0.059814453125, + "loss_xval": 1.59375, + "num_input_tokens_seen": 443877624, + "step": 6636 + }, + { + "epoch": 0.7531347517730497, + "grad_norm": 16.141332626342773, + "learning_rate": 5e-05, + "loss": 1.1018, + "num_input_tokens_seen": 443945116, + "step": 6637 + }, + { + "epoch": 0.7531347517730497, + "loss": 1.152554988861084, + "loss_ce": 0.0065588559955358505, + "loss_iou": 0.421875, + "loss_num": 0.060302734375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 443945116, + "step": 6637 + }, + { + "epoch": 0.7532482269503546, + "grad_norm": 22.430753707885742, + "learning_rate": 5e-05, + "loss": 1.0582, + "num_input_tokens_seen": 444012248, + "step": 6638 + }, + { + "epoch": 0.7532482269503546, + "loss": 1.0382134914398193, + "loss_ce": 0.005498562008142471, + "loss_iou": 0.44921875, + "loss_num": 0.02685546875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 444012248, + "step": 6638 + }, + { + "epoch": 0.7533617021276596, + "grad_norm": 25.034605026245117, + "learning_rate": 5e-05, + "loss": 1.092, + "num_input_tokens_seen": 444079556, + "step": 6639 + }, + { + "epoch": 0.7533617021276596, + "loss": 1.257602334022522, + "loss_ce": 0.008578868582844734, + "loss_iou": 0.5078125, + "loss_num": 0.046875, + "loss_xval": 1.25, + "num_input_tokens_seen": 444079556, + "step": 6639 + }, + { + "epoch": 0.7534751773049645, + "grad_norm": 24.868942260742188, + "learning_rate": 5e-05, + "loss": 1.1993, + "num_input_tokens_seen": 444146976, + "step": 6640 + }, + { + "epoch": 0.7534751773049645, + "loss": 1.1027827262878418, + "loss_ce": 0.004882234148681164, + "loss_iou": 0.43359375, + "loss_num": 0.04638671875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 444146976, + "step": 6640 + }, + { + "epoch": 0.7535886524822695, + "grad_norm": 44.15297317504883, + "learning_rate": 5e-05, + "loss": 1.3264, + "num_input_tokens_seen": 444214444, + "step": 6641 + }, + { + "epoch": 0.7535886524822695, + "loss": 1.4648926258087158, + "loss_ce": 0.007373001892119646, + "loss_iou": 0.57421875, + "loss_num": 0.06103515625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 444214444, + "step": 6641 + }, + { + "epoch": 0.7537021276595745, + "grad_norm": 29.802284240722656, + "learning_rate": 5e-05, + "loss": 1.3352, + "num_input_tokens_seen": 444281652, + "step": 6642 + }, + { + "epoch": 0.7537021276595745, + "loss": 1.2333729267120361, + "loss_ce": 0.003880757372826338, + "loss_iou": 0.5234375, + "loss_num": 0.036865234375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 444281652, + "step": 6642 + }, + { + "epoch": 0.7538156028368794, + "grad_norm": 24.367055892944336, + "learning_rate": 5e-05, + "loss": 1.2869, + "num_input_tokens_seen": 444348564, + "step": 6643 + }, + { + "epoch": 0.7538156028368794, + "loss": 1.3495688438415527, + "loss_ce": 0.004842227324843407, + "loss_iou": 0.53125, + "loss_num": 0.05615234375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 444348564, + "step": 6643 + }, + { + "epoch": 0.7539290780141844, + "grad_norm": 33.273441314697266, + "learning_rate": 5e-05, + "loss": 1.2851, + "num_input_tokens_seen": 444416552, + "step": 6644 + }, + { + "epoch": 0.7539290780141844, + "loss": 1.2999231815338135, + "loss_ce": 0.010372367687523365, + "loss_iou": 0.52734375, + "loss_num": 0.046142578125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 444416552, + "step": 6644 + }, + { + "epoch": 0.7540425531914894, + "grad_norm": 44.27968978881836, + "learning_rate": 5e-05, + "loss": 1.3834, + "num_input_tokens_seen": 444484148, + "step": 6645 + }, + { + "epoch": 0.7540425531914894, + "loss": 1.4256675243377686, + "loss_ce": 0.0037925534415990114, + "loss_iou": 0.58203125, + "loss_num": 0.05126953125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 444484148, + "step": 6645 + }, + { + "epoch": 0.7541560283687944, + "grad_norm": 27.33612823486328, + "learning_rate": 5e-05, + "loss": 1.0736, + "num_input_tokens_seen": 444550140, + "step": 6646 + }, + { + "epoch": 0.7541560283687944, + "loss": 1.0198488235473633, + "loss_ce": 0.0052308193407952785, + "loss_iou": 0.423828125, + "loss_num": 0.033203125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 444550140, + "step": 6646 + }, + { + "epoch": 0.7542695035460993, + "grad_norm": 24.86091423034668, + "learning_rate": 5e-05, + "loss": 1.2599, + "num_input_tokens_seen": 444615932, + "step": 6647 + }, + { + "epoch": 0.7542695035460993, + "loss": 1.2453703880310059, + "loss_ce": 0.0070890760980546474, + "loss_iou": 0.458984375, + "loss_num": 0.0634765625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 444615932, + "step": 6647 + }, + { + "epoch": 0.7543829787234042, + "grad_norm": 31.0516414642334, + "learning_rate": 5e-05, + "loss": 1.2159, + "num_input_tokens_seen": 444682328, + "step": 6648 + }, + { + "epoch": 0.7543829787234042, + "loss": 1.4091498851776123, + "loss_ce": 0.008759252727031708, + "loss_iou": 0.52734375, + "loss_num": 0.06884765625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 444682328, + "step": 6648 + }, + { + "epoch": 0.7544964539007092, + "grad_norm": 36.940269470214844, + "learning_rate": 5e-05, + "loss": 1.2529, + "num_input_tokens_seen": 444748476, + "step": 6649 + }, + { + "epoch": 0.7544964539007092, + "loss": 1.1521447896957397, + "loss_ce": 0.005660403985530138, + "loss_iou": 0.498046875, + "loss_num": 0.0303955078125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 444748476, + "step": 6649 + }, + { + "epoch": 0.7546099290780142, + "grad_norm": 45.55266189575195, + "learning_rate": 5e-05, + "loss": 1.5008, + "num_input_tokens_seen": 444814820, + "step": 6650 + }, + { + "epoch": 0.7546099290780142, + "loss": 1.6623338460922241, + "loss_ce": 0.007060437463223934, + "loss_iou": 0.62890625, + "loss_num": 0.080078125, + "loss_xval": 1.65625, + "num_input_tokens_seen": 444814820, + "step": 6650 + }, + { + "epoch": 0.7547234042553191, + "grad_norm": 30.136743545532227, + "learning_rate": 5e-05, + "loss": 1.3091, + "num_input_tokens_seen": 444881124, + "step": 6651 + }, + { + "epoch": 0.7547234042553191, + "loss": 1.2180254459381104, + "loss_ce": 0.006111428141593933, + "loss_iou": 0.5390625, + "loss_num": 0.0269775390625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 444881124, + "step": 6651 + }, + { + "epoch": 0.7548368794326241, + "grad_norm": 10.850530624389648, + "learning_rate": 5e-05, + "loss": 1.071, + "num_input_tokens_seen": 444947624, + "step": 6652 + }, + { + "epoch": 0.7548368794326241, + "loss": 1.0060033798217773, + "loss_ce": 0.0038060846272855997, + "loss_iou": 0.421875, + "loss_num": 0.03173828125, + "loss_xval": 1.0, + "num_input_tokens_seen": 444947624, + "step": 6652 + }, + { + "epoch": 0.7549503546099291, + "grad_norm": 36.64085388183594, + "learning_rate": 5e-05, + "loss": 1.179, + "num_input_tokens_seen": 445015636, + "step": 6653 + }, + { + "epoch": 0.7549503546099291, + "loss": 1.2134654521942139, + "loss_ce": 0.010340487584471703, + "loss_iou": 0.498046875, + "loss_num": 0.041259765625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 445015636, + "step": 6653 + }, + { + "epoch": 0.7550638297872341, + "grad_norm": 43.65794372558594, + "learning_rate": 5e-05, + "loss": 1.2878, + "num_input_tokens_seen": 445081192, + "step": 6654 + }, + { + "epoch": 0.7550638297872341, + "loss": 1.1698338985443115, + "loss_ce": 0.007480410393327475, + "loss_iou": 0.49609375, + "loss_num": 0.0341796875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 445081192, + "step": 6654 + }, + { + "epoch": 0.755177304964539, + "grad_norm": 24.38861083984375, + "learning_rate": 5e-05, + "loss": 1.4062, + "num_input_tokens_seen": 445147756, + "step": 6655 + }, + { + "epoch": 0.755177304964539, + "loss": 1.3218330144882202, + "loss_ce": 0.004450247623026371, + "loss_iou": 0.55859375, + "loss_num": 0.040771484375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 445147756, + "step": 6655 + }, + { + "epoch": 0.7552907801418439, + "grad_norm": 12.53515338897705, + "learning_rate": 5e-05, + "loss": 1.0359, + "num_input_tokens_seen": 445213972, + "step": 6656 + }, + { + "epoch": 0.7552907801418439, + "loss": 1.105867624282837, + "loss_ce": 0.006258129142224789, + "loss_iou": 0.427734375, + "loss_num": 0.048583984375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 445213972, + "step": 6656 + }, + { + "epoch": 0.7554042553191489, + "grad_norm": 14.133986473083496, + "learning_rate": 5e-05, + "loss": 1.0461, + "num_input_tokens_seen": 445280536, + "step": 6657 + }, + { + "epoch": 0.7554042553191489, + "loss": 1.1477916240692139, + "loss_ce": 0.008631434291601181, + "loss_iou": 0.48046875, + "loss_num": 0.035888671875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 445280536, + "step": 6657 + }, + { + "epoch": 0.7555177304964539, + "grad_norm": 57.992523193359375, + "learning_rate": 5e-05, + "loss": 1.1847, + "num_input_tokens_seen": 445347756, + "step": 6658 + }, + { + "epoch": 0.7555177304964539, + "loss": 1.2496612071990967, + "loss_ce": 0.005032224114984274, + "loss_iou": 0.48828125, + "loss_num": 0.0537109375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 445347756, + "step": 6658 + }, + { + "epoch": 0.7556312056737589, + "grad_norm": 26.045970916748047, + "learning_rate": 5e-05, + "loss": 1.2384, + "num_input_tokens_seen": 445415312, + "step": 6659 + }, + { + "epoch": 0.7556312056737589, + "loss": 1.1747758388519287, + "loss_ce": 0.006318727973848581, + "loss_iou": 0.4609375, + "loss_num": 0.048828125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 445415312, + "step": 6659 + }, + { + "epoch": 0.7557446808510638, + "grad_norm": 25.039169311523438, + "learning_rate": 5e-05, + "loss": 1.1846, + "num_input_tokens_seen": 445482028, + "step": 6660 + }, + { + "epoch": 0.7557446808510638, + "loss": 1.0871901512145996, + "loss_ce": 0.0044264597818255424, + "loss_iou": 0.4296875, + "loss_num": 0.045166015625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 445482028, + "step": 6660 + }, + { + "epoch": 0.7558581560283688, + "grad_norm": 25.145009994506836, + "learning_rate": 5e-05, + "loss": 1.0101, + "num_input_tokens_seen": 445548340, + "step": 6661 + }, + { + "epoch": 0.7558581560283688, + "loss": 1.1162898540496826, + "loss_ce": 0.00993603840470314, + "loss_iou": 0.44140625, + "loss_num": 0.044921875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 445548340, + "step": 6661 + }, + { + "epoch": 0.7559716312056738, + "grad_norm": 30.379566192626953, + "learning_rate": 5e-05, + "loss": 1.1125, + "num_input_tokens_seen": 445615188, + "step": 6662 + }, + { + "epoch": 0.7559716312056738, + "loss": 1.026849627494812, + "loss_ce": 0.008630627766251564, + "loss_iou": 0.439453125, + "loss_num": 0.028076171875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 445615188, + "step": 6662 + }, + { + "epoch": 0.7560851063829788, + "grad_norm": 33.86381149291992, + "learning_rate": 5e-05, + "loss": 1.3714, + "num_input_tokens_seen": 445681976, + "step": 6663 + }, + { + "epoch": 0.7560851063829788, + "loss": 1.7004201412200928, + "loss_ce": 0.015361546538770199, + "loss_iou": 0.6015625, + "loss_num": 0.09619140625, + "loss_xval": 1.6875, + "num_input_tokens_seen": 445681976, + "step": 6663 + }, + { + "epoch": 0.7561985815602836, + "grad_norm": 27.33017349243164, + "learning_rate": 5e-05, + "loss": 1.208, + "num_input_tokens_seen": 445748772, + "step": 6664 + }, + { + "epoch": 0.7561985815602836, + "loss": 1.1367988586425781, + "loss_ce": 0.008380910381674767, + "loss_iou": 0.427734375, + "loss_num": 0.054443359375, + "loss_xval": 1.125, + "num_input_tokens_seen": 445748772, + "step": 6664 + }, + { + "epoch": 0.7563120567375886, + "grad_norm": 24.74114227294922, + "learning_rate": 5e-05, + "loss": 1.1383, + "num_input_tokens_seen": 445815868, + "step": 6665 + }, + { + "epoch": 0.7563120567375886, + "loss": 0.9972738027572632, + "loss_ce": 0.0040487004444003105, + "loss_iou": 0.41796875, + "loss_num": 0.031494140625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 445815868, + "step": 6665 + }, + { + "epoch": 0.7564255319148936, + "grad_norm": 17.015033721923828, + "learning_rate": 5e-05, + "loss": 1.2342, + "num_input_tokens_seen": 445883256, + "step": 6666 + }, + { + "epoch": 0.7564255319148936, + "loss": 1.1720401048660278, + "loss_ce": 0.006726416759192944, + "loss_iou": 0.4765625, + "loss_num": 0.042724609375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 445883256, + "step": 6666 + }, + { + "epoch": 0.7565390070921986, + "grad_norm": 38.73220443725586, + "learning_rate": 5e-05, + "loss": 1.191, + "num_input_tokens_seen": 445950396, + "step": 6667 + }, + { + "epoch": 0.7565390070921986, + "loss": 1.3213906288146973, + "loss_ce": 0.004984297323971987, + "loss_iou": 0.5078125, + "loss_num": 0.060302734375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 445950396, + "step": 6667 + }, + { + "epoch": 0.7566524822695035, + "grad_norm": 22.206729888916016, + "learning_rate": 5e-05, + "loss": 1.0738, + "num_input_tokens_seen": 446016796, + "step": 6668 + }, + { + "epoch": 0.7566524822695035, + "loss": 1.2065556049346924, + "loss_ce": 0.006848616059869528, + "loss_iou": 0.5, + "loss_num": 0.039306640625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 446016796, + "step": 6668 + }, + { + "epoch": 0.7567659574468085, + "grad_norm": 31.57144546508789, + "learning_rate": 5e-05, + "loss": 1.5543, + "num_input_tokens_seen": 446084028, + "step": 6669 + }, + { + "epoch": 0.7567659574468085, + "loss": 1.3329499959945679, + "loss_ce": 0.007266493514180183, + "loss_iou": 0.5390625, + "loss_num": 0.049072265625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 446084028, + "step": 6669 + }, + { + "epoch": 0.7568794326241135, + "grad_norm": 37.43610763549805, + "learning_rate": 5e-05, + "loss": 1.0879, + "num_input_tokens_seen": 446150812, + "step": 6670 + }, + { + "epoch": 0.7568794326241135, + "loss": 1.1849889755249023, + "loss_ce": 0.007742779329419136, + "loss_iou": 0.470703125, + "loss_num": 0.04736328125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 446150812, + "step": 6670 + }, + { + "epoch": 0.7569929078014185, + "grad_norm": 76.87760925292969, + "learning_rate": 5e-05, + "loss": 1.3283, + "num_input_tokens_seen": 446217400, + "step": 6671 + }, + { + "epoch": 0.7569929078014185, + "loss": 1.44132399559021, + "loss_ce": 0.005777071230113506, + "loss_iou": 0.55078125, + "loss_num": 0.06640625, + "loss_xval": 1.4375, + "num_input_tokens_seen": 446217400, + "step": 6671 + }, + { + "epoch": 0.7571063829787235, + "grad_norm": 30.064163208007812, + "learning_rate": 5e-05, + "loss": 1.073, + "num_input_tokens_seen": 446284304, + "step": 6672 + }, + { + "epoch": 0.7571063829787235, + "loss": 1.0309619903564453, + "loss_ce": 0.008012784644961357, + "loss_iou": 0.408203125, + "loss_num": 0.040771484375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 446284304, + "step": 6672 + }, + { + "epoch": 0.7572198581560283, + "grad_norm": 31.008594512939453, + "learning_rate": 5e-05, + "loss": 1.1112, + "num_input_tokens_seen": 446350976, + "step": 6673 + }, + { + "epoch": 0.7572198581560283, + "loss": 1.0316002368927002, + "loss_ce": 0.007308237254619598, + "loss_iou": 0.388671875, + "loss_num": 0.04931640625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 446350976, + "step": 6673 + }, + { + "epoch": 0.7573333333333333, + "grad_norm": 31.113813400268555, + "learning_rate": 5e-05, + "loss": 1.0287, + "num_input_tokens_seen": 446417300, + "step": 6674 + }, + { + "epoch": 0.7573333333333333, + "loss": 1.20951509475708, + "loss_ce": 0.009319874458014965, + "loss_iou": 0.46484375, + "loss_num": 0.05419921875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 446417300, + "step": 6674 + }, + { + "epoch": 0.7574468085106383, + "grad_norm": 23.33226776123047, + "learning_rate": 5e-05, + "loss": 1.1064, + "num_input_tokens_seen": 446483000, + "step": 6675 + }, + { + "epoch": 0.7574468085106383, + "loss": 1.0191636085510254, + "loss_ce": 0.0057358811609447, + "loss_iou": 0.423828125, + "loss_num": 0.032958984375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 446483000, + "step": 6675 + }, + { + "epoch": 0.7575602836879433, + "grad_norm": 16.82381248474121, + "learning_rate": 5e-05, + "loss": 1.0062, + "num_input_tokens_seen": 446549008, + "step": 6676 + }, + { + "epoch": 0.7575602836879433, + "loss": 1.0869526863098145, + "loss_ce": 0.007027165964245796, + "loss_iou": 0.4375, + "loss_num": 0.041259765625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 446549008, + "step": 6676 + }, + { + "epoch": 0.7576737588652482, + "grad_norm": 43.056861877441406, + "learning_rate": 5e-05, + "loss": 1.2882, + "num_input_tokens_seen": 446616060, + "step": 6677 + }, + { + "epoch": 0.7576737588652482, + "loss": 1.3979469537734985, + "loss_ce": 0.004880525171756744, + "loss_iou": 0.52734375, + "loss_num": 0.06787109375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 446616060, + "step": 6677 + }, + { + "epoch": 0.7577872340425532, + "grad_norm": 22.786144256591797, + "learning_rate": 5e-05, + "loss": 0.941, + "num_input_tokens_seen": 446682792, + "step": 6678 + }, + { + "epoch": 0.7577872340425532, + "loss": 0.9848964214324951, + "loss_ce": 0.0034511222038418055, + "loss_iou": 0.41015625, + "loss_num": 0.03173828125, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 446682792, + "step": 6678 + }, + { + "epoch": 0.7579007092198582, + "grad_norm": 24.99087905883789, + "learning_rate": 5e-05, + "loss": 1.2214, + "num_input_tokens_seen": 446750052, + "step": 6679 + }, + { + "epoch": 0.7579007092198582, + "loss": 1.2541834115982056, + "loss_ce": 0.011507692746818066, + "loss_iou": 0.46875, + "loss_num": 0.060791015625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 446750052, + "step": 6679 + }, + { + "epoch": 0.7580141843971632, + "grad_norm": 33.19645309448242, + "learning_rate": 5e-05, + "loss": 1.2228, + "num_input_tokens_seen": 446817640, + "step": 6680 + }, + { + "epoch": 0.7580141843971632, + "loss": 1.230677604675293, + "loss_ce": 0.0046034217812120914, + "loss_iou": 0.470703125, + "loss_num": 0.056884765625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 446817640, + "step": 6680 + }, + { + "epoch": 0.758127659574468, + "grad_norm": 37.02022171020508, + "learning_rate": 5e-05, + "loss": 1.2744, + "num_input_tokens_seen": 446884364, + "step": 6681 + }, + { + "epoch": 0.758127659574468, + "loss": 1.15757417678833, + "loss_ce": 0.006207003258168697, + "loss_iou": 0.50390625, + "loss_num": 0.0289306640625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 446884364, + "step": 6681 + }, + { + "epoch": 0.758241134751773, + "grad_norm": 39.69169616699219, + "learning_rate": 5e-05, + "loss": 1.3676, + "num_input_tokens_seen": 446949996, + "step": 6682 + }, + { + "epoch": 0.758241134751773, + "loss": 1.5044913291931152, + "loss_ce": 0.005956186912953854, + "loss_iou": 0.60546875, + "loss_num": 0.0576171875, + "loss_xval": 1.5, + "num_input_tokens_seen": 446949996, + "step": 6682 + }, + { + "epoch": 0.758354609929078, + "grad_norm": 33.24539566040039, + "learning_rate": 5e-05, + "loss": 1.2591, + "num_input_tokens_seen": 447016872, + "step": 6683 + }, + { + "epoch": 0.758354609929078, + "loss": 1.255221962928772, + "loss_ce": 0.004733659792691469, + "loss_iou": 0.51953125, + "loss_num": 0.041748046875, + "loss_xval": 1.25, + "num_input_tokens_seen": 447016872, + "step": 6683 + }, + { + "epoch": 0.758468085106383, + "grad_norm": 34.231510162353516, + "learning_rate": 5e-05, + "loss": 1.2859, + "num_input_tokens_seen": 447083904, + "step": 6684 + }, + { + "epoch": 0.758468085106383, + "loss": 1.2206377983093262, + "loss_ce": 0.00823542382568121, + "loss_iou": 0.48828125, + "loss_num": 0.046630859375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 447083904, + "step": 6684 + }, + { + "epoch": 0.758581560283688, + "grad_norm": 26.514827728271484, + "learning_rate": 5e-05, + "loss": 1.1307, + "num_input_tokens_seen": 447150788, + "step": 6685 + }, + { + "epoch": 0.758581560283688, + "loss": 1.2147202491760254, + "loss_ce": 0.004759347066283226, + "loss_iou": 0.46875, + "loss_num": 0.05419921875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 447150788, + "step": 6685 + }, + { + "epoch": 0.7586950354609929, + "grad_norm": 20.83736801147461, + "learning_rate": 5e-05, + "loss": 1.182, + "num_input_tokens_seen": 447217740, + "step": 6686 + }, + { + "epoch": 0.7586950354609929, + "loss": 1.1588141918182373, + "loss_ce": 0.012818161398172379, + "loss_iou": 0.4921875, + "loss_num": 0.03271484375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 447217740, + "step": 6686 + }, + { + "epoch": 0.7588085106382979, + "grad_norm": 30.114810943603516, + "learning_rate": 5e-05, + "loss": 1.0228, + "num_input_tokens_seen": 447285908, + "step": 6687 + }, + { + "epoch": 0.7588085106382979, + "loss": 1.123764991760254, + "loss_ce": 0.003891853615641594, + "loss_iou": 0.466796875, + "loss_num": 0.037841796875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 447285908, + "step": 6687 + }, + { + "epoch": 0.7589219858156029, + "grad_norm": 42.738468170166016, + "learning_rate": 5e-05, + "loss": 1.1688, + "num_input_tokens_seen": 447353392, + "step": 6688 + }, + { + "epoch": 0.7589219858156029, + "loss": 1.0950167179107666, + "loss_ce": 0.009567588567733765, + "loss_iou": 0.46484375, + "loss_num": 0.0311279296875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 447353392, + "step": 6688 + }, + { + "epoch": 0.7590354609929078, + "grad_norm": 27.485027313232422, + "learning_rate": 5e-05, + "loss": 1.3533, + "num_input_tokens_seen": 447420488, + "step": 6689 + }, + { + "epoch": 0.7590354609929078, + "loss": 1.392154574394226, + "loss_ce": 0.004947580862790346, + "loss_iou": 0.5703125, + "loss_num": 0.048583984375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 447420488, + "step": 6689 + }, + { + "epoch": 0.7591489361702127, + "grad_norm": 15.639766693115234, + "learning_rate": 5e-05, + "loss": 1.0709, + "num_input_tokens_seen": 447487328, + "step": 6690 + }, + { + "epoch": 0.7591489361702127, + "loss": 1.1064163446426392, + "loss_ce": 0.006318746134638786, + "loss_iou": 0.451171875, + "loss_num": 0.039794921875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 447487328, + "step": 6690 + }, + { + "epoch": 0.7592624113475177, + "grad_norm": 20.627729415893555, + "learning_rate": 5e-05, + "loss": 1.0219, + "num_input_tokens_seen": 447553448, + "step": 6691 + }, + { + "epoch": 0.7592624113475177, + "loss": 1.0802335739135742, + "loss_ce": 0.006747229024767876, + "loss_iou": 0.4375, + "loss_num": 0.03955078125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 447553448, + "step": 6691 + }, + { + "epoch": 0.7593758865248227, + "grad_norm": 16.8764591217041, + "learning_rate": 5e-05, + "loss": 1.075, + "num_input_tokens_seen": 447620468, + "step": 6692 + }, + { + "epoch": 0.7593758865248227, + "loss": 1.0273925065994263, + "loss_ce": 0.005297804716974497, + "loss_iou": 0.38671875, + "loss_num": 0.04931640625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 447620468, + "step": 6692 + }, + { + "epoch": 0.7594893617021277, + "grad_norm": 28.15681266784668, + "learning_rate": 5e-05, + "loss": 0.9401, + "num_input_tokens_seen": 447685840, + "step": 6693 + }, + { + "epoch": 0.7594893617021277, + "loss": 0.712406575679779, + "loss_ce": 0.0026592400390654802, + "loss_iou": 0.275390625, + "loss_num": 0.0322265625, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 447685840, + "step": 6693 + }, + { + "epoch": 0.7596028368794326, + "grad_norm": 21.365514755249023, + "learning_rate": 5e-05, + "loss": 1.0494, + "num_input_tokens_seen": 447752940, + "step": 6694 + }, + { + "epoch": 0.7596028368794326, + "loss": 1.0803009271621704, + "loss_ce": 0.004129033535718918, + "loss_iou": 0.45703125, + "loss_num": 0.032470703125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 447752940, + "step": 6694 + }, + { + "epoch": 0.7597163120567376, + "grad_norm": 56.53215789794922, + "learning_rate": 5e-05, + "loss": 1.0065, + "num_input_tokens_seen": 447820716, + "step": 6695 + }, + { + "epoch": 0.7597163120567376, + "loss": 1.0008211135864258, + "loss_ce": 0.005215534940361977, + "loss_iou": 0.4453125, + "loss_num": 0.0206298828125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 447820716, + "step": 6695 + }, + { + "epoch": 0.7598297872340426, + "grad_norm": 40.44229507446289, + "learning_rate": 5e-05, + "loss": 1.1403, + "num_input_tokens_seen": 447887376, + "step": 6696 + }, + { + "epoch": 0.7598297872340426, + "loss": 1.1621159315109253, + "loss_ce": 0.0034245350398123264, + "loss_iou": 0.4921875, + "loss_num": 0.03515625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 447887376, + "step": 6696 + }, + { + "epoch": 0.7599432624113475, + "grad_norm": 27.69923973083496, + "learning_rate": 5e-05, + "loss": 1.1739, + "num_input_tokens_seen": 447954740, + "step": 6697 + }, + { + "epoch": 0.7599432624113475, + "loss": 1.175407886505127, + "loss_ce": 0.01085713692009449, + "loss_iou": 0.453125, + "loss_num": 0.05126953125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 447954740, + "step": 6697 + }, + { + "epoch": 0.7600567375886524, + "grad_norm": 36.998695373535156, + "learning_rate": 5e-05, + "loss": 1.0422, + "num_input_tokens_seen": 448020972, + "step": 6698 + }, + { + "epoch": 0.7600567375886524, + "loss": 1.2065714597702026, + "loss_ce": 0.007840987294912338, + "loss_iou": 0.515625, + "loss_num": 0.033203125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 448020972, + "step": 6698 + }, + { + "epoch": 0.7601702127659574, + "grad_norm": 25.82764434814453, + "learning_rate": 5e-05, + "loss": 1.1112, + "num_input_tokens_seen": 448088116, + "step": 6699 + }, + { + "epoch": 0.7601702127659574, + "loss": 1.3839677572250366, + "loss_ce": 0.005549841094762087, + "loss_iou": 0.53515625, + "loss_num": 0.061767578125, + "loss_xval": 1.375, + "num_input_tokens_seen": 448088116, + "step": 6699 + }, + { + "epoch": 0.7602836879432624, + "grad_norm": 29.21987533569336, + "learning_rate": 5e-05, + "loss": 1.3696, + "num_input_tokens_seen": 448154928, + "step": 6700 + }, + { + "epoch": 0.7602836879432624, + "loss": 1.2061184644699097, + "loss_ce": 0.0036038109101355076, + "loss_iou": 0.484375, + "loss_num": 0.046875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 448154928, + "step": 6700 + }, + { + "epoch": 0.7603971631205674, + "grad_norm": 30.866418838500977, + "learning_rate": 5e-05, + "loss": 1.1794, + "num_input_tokens_seen": 448222152, + "step": 6701 + }, + { + "epoch": 0.7603971631205674, + "loss": 1.197349190711975, + "loss_ce": 0.00838435161858797, + "loss_iou": 0.494140625, + "loss_num": 0.040283203125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 448222152, + "step": 6701 + }, + { + "epoch": 0.7605106382978724, + "grad_norm": 30.267595291137695, + "learning_rate": 5e-05, + "loss": 1.1233, + "num_input_tokens_seen": 448289840, + "step": 6702 + }, + { + "epoch": 0.7605106382978724, + "loss": 1.1246850490570068, + "loss_ce": 0.008474193513393402, + "loss_iou": 0.49609375, + "loss_num": 0.024658203125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 448289840, + "step": 6702 + }, + { + "epoch": 0.7606241134751773, + "grad_norm": 45.30342483520508, + "learning_rate": 5e-05, + "loss": 1.3095, + "num_input_tokens_seen": 448356640, + "step": 6703 + }, + { + "epoch": 0.7606241134751773, + "loss": 1.3024007081985474, + "loss_ce": 0.006990573834627867, + "loss_iou": 0.55859375, + "loss_num": 0.035400390625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 448356640, + "step": 6703 + }, + { + "epoch": 0.7607375886524823, + "grad_norm": 31.247419357299805, + "learning_rate": 5e-05, + "loss": 1.4136, + "num_input_tokens_seen": 448424248, + "step": 6704 + }, + { + "epoch": 0.7607375886524823, + "loss": 1.4344425201416016, + "loss_ce": 0.006219891831278801, + "loss_iou": 0.5859375, + "loss_num": 0.05078125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 448424248, + "step": 6704 + }, + { + "epoch": 0.7608510638297873, + "grad_norm": 64.75756072998047, + "learning_rate": 5e-05, + "loss": 1.2234, + "num_input_tokens_seen": 448491736, + "step": 6705 + }, + { + "epoch": 0.7608510638297873, + "loss": 1.0431607961654663, + "loss_ce": 0.007272054441273212, + "loss_iou": 0.419921875, + "loss_num": 0.0390625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 448491736, + "step": 6705 + }, + { + "epoch": 0.7609645390070922, + "grad_norm": 17.309959411621094, + "learning_rate": 5e-05, + "loss": 1.2664, + "num_input_tokens_seen": 448557536, + "step": 6706 + }, + { + "epoch": 0.7609645390070922, + "loss": 1.1172826290130615, + "loss_ce": 0.006442815065383911, + "loss_iou": 0.423828125, + "loss_num": 0.052734375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 448557536, + "step": 6706 + }, + { + "epoch": 0.7610780141843971, + "grad_norm": 22.129962921142578, + "learning_rate": 5e-05, + "loss": 1.084, + "num_input_tokens_seen": 448625176, + "step": 6707 + }, + { + "epoch": 0.7610780141843971, + "loss": 1.1041905879974365, + "loss_ce": 0.00506956921890378, + "loss_iou": 0.451171875, + "loss_num": 0.039794921875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 448625176, + "step": 6707 + }, + { + "epoch": 0.7611914893617021, + "grad_norm": 30.258983612060547, + "learning_rate": 5e-05, + "loss": 1.0708, + "num_input_tokens_seen": 448691608, + "step": 6708 + }, + { + "epoch": 0.7611914893617021, + "loss": 1.1678943634033203, + "loss_ce": 0.004808417521417141, + "loss_iou": 0.49609375, + "loss_num": 0.0341796875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 448691608, + "step": 6708 + }, + { + "epoch": 0.7613049645390071, + "grad_norm": 89.07263946533203, + "learning_rate": 5e-05, + "loss": 1.1903, + "num_input_tokens_seen": 448759440, + "step": 6709 + }, + { + "epoch": 0.7613049645390071, + "loss": 1.3233611583709717, + "loss_ce": 0.007931584492325783, + "loss_iou": 0.53125, + "loss_num": 0.050537109375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 448759440, + "step": 6709 + }, + { + "epoch": 0.7614184397163121, + "grad_norm": 43.37592697143555, + "learning_rate": 5e-05, + "loss": 1.1577, + "num_input_tokens_seen": 448827216, + "step": 6710 + }, + { + "epoch": 0.7614184397163121, + "loss": 1.0843634605407715, + "loss_ce": 0.003308723447844386, + "loss_iou": 0.466796875, + "loss_num": 0.0294189453125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 448827216, + "step": 6710 + }, + { + "epoch": 0.761531914893617, + "grad_norm": 38.40521240234375, + "learning_rate": 5e-05, + "loss": 1.5335, + "num_input_tokens_seen": 448893440, + "step": 6711 + }, + { + "epoch": 0.761531914893617, + "loss": 1.4393811225891113, + "loss_ce": 0.005787340924143791, + "loss_iou": 0.546875, + "loss_num": 0.0673828125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 448893440, + "step": 6711 + }, + { + "epoch": 0.761645390070922, + "grad_norm": 9.741979598999023, + "learning_rate": 5e-05, + "loss": 0.9849, + "num_input_tokens_seen": 448960876, + "step": 6712 + }, + { + "epoch": 0.761645390070922, + "loss": 1.0443086624145508, + "loss_ce": 0.008175778202712536, + "loss_iou": 0.4296875, + "loss_num": 0.035888671875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 448960876, + "step": 6712 + }, + { + "epoch": 0.761758865248227, + "grad_norm": 16.929649353027344, + "learning_rate": 5e-05, + "loss": 1.0818, + "num_input_tokens_seen": 449027264, + "step": 6713 + }, + { + "epoch": 0.761758865248227, + "loss": 1.0218563079833984, + "loss_ce": 0.007452056743204594, + "loss_iou": 0.453125, + "loss_num": 0.0213623046875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 449027264, + "step": 6713 + }, + { + "epoch": 0.7618723404255319, + "grad_norm": 41.22208786010742, + "learning_rate": 5e-05, + "loss": 1.231, + "num_input_tokens_seen": 449094464, + "step": 6714 + }, + { + "epoch": 0.7618723404255319, + "loss": 1.1890416145324707, + "loss_ce": 0.006912697572261095, + "loss_iou": 0.515625, + "loss_num": 0.0296630859375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 449094464, + "step": 6714 + }, + { + "epoch": 0.7619858156028368, + "grad_norm": 26.847623825073242, + "learning_rate": 5e-05, + "loss": 1.3973, + "num_input_tokens_seen": 449161600, + "step": 6715 + }, + { + "epoch": 0.7619858156028368, + "loss": 1.4956083297729492, + "loss_ce": 0.004885601811110973, + "loss_iou": 0.61328125, + "loss_num": 0.052978515625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 449161600, + "step": 6715 + }, + { + "epoch": 0.7620992907801418, + "grad_norm": 21.586599349975586, + "learning_rate": 5e-05, + "loss": 1.2028, + "num_input_tokens_seen": 449228000, + "step": 6716 + }, + { + "epoch": 0.7620992907801418, + "loss": 1.1647449731826782, + "loss_ce": 0.005077033769339323, + "loss_iou": 0.50390625, + "loss_num": 0.0303955078125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 449228000, + "step": 6716 + }, + { + "epoch": 0.7622127659574468, + "grad_norm": 36.189361572265625, + "learning_rate": 5e-05, + "loss": 1.136, + "num_input_tokens_seen": 449295472, + "step": 6717 + }, + { + "epoch": 0.7622127659574468, + "loss": 1.184981107711792, + "loss_ce": 0.004805335775017738, + "loss_iou": 0.4921875, + "loss_num": 0.039306640625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 449295472, + "step": 6717 + }, + { + "epoch": 0.7623262411347518, + "grad_norm": 32.435123443603516, + "learning_rate": 5e-05, + "loss": 1.1715, + "num_input_tokens_seen": 449362672, + "step": 6718 + }, + { + "epoch": 0.7623262411347518, + "loss": 1.0561881065368652, + "loss_ce": 0.004918566904962063, + "loss_iou": 0.4453125, + "loss_num": 0.032470703125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 449362672, + "step": 6718 + }, + { + "epoch": 0.7624397163120568, + "grad_norm": 43.603485107421875, + "learning_rate": 5e-05, + "loss": 1.1362, + "num_input_tokens_seen": 449429792, + "step": 6719 + }, + { + "epoch": 0.7624397163120568, + "loss": 1.087639570236206, + "loss_ce": 0.00658478494733572, + "loss_iou": 0.453125, + "loss_num": 0.03515625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 449429792, + "step": 6719 + }, + { + "epoch": 0.7625531914893617, + "grad_norm": 25.598922729492188, + "learning_rate": 5e-05, + "loss": 1.3103, + "num_input_tokens_seen": 449496404, + "step": 6720 + }, + { + "epoch": 0.7625531914893617, + "loss": 1.366175651550293, + "loss_ce": 0.0038710441440343857, + "loss_iou": 0.578125, + "loss_num": 0.04052734375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 449496404, + "step": 6720 + }, + { + "epoch": 0.7626666666666667, + "grad_norm": 18.753589630126953, + "learning_rate": 5e-05, + "loss": 1.1518, + "num_input_tokens_seen": 449563584, + "step": 6721 + }, + { + "epoch": 0.7626666666666667, + "loss": 1.226701021194458, + "loss_ce": 0.006486136931926012, + "loss_iou": 0.51171875, + "loss_num": 0.038818359375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 449563584, + "step": 6721 + }, + { + "epoch": 0.7627801418439716, + "grad_norm": 31.097440719604492, + "learning_rate": 5e-05, + "loss": 1.3475, + "num_input_tokens_seen": 449632052, + "step": 6722 + }, + { + "epoch": 0.7627801418439716, + "loss": 1.2335394620895386, + "loss_ce": 0.0069770002737641335, + "loss_iou": 0.51953125, + "loss_num": 0.037109375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 449632052, + "step": 6722 + }, + { + "epoch": 0.7628936170212766, + "grad_norm": 25.037324905395508, + "learning_rate": 5e-05, + "loss": 1.2422, + "num_input_tokens_seen": 449698352, + "step": 6723 + }, + { + "epoch": 0.7628936170212766, + "loss": 1.1429563760757446, + "loss_ce": 0.008190754801034927, + "loss_iou": 0.447265625, + "loss_num": 0.048095703125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 449698352, + "step": 6723 + }, + { + "epoch": 0.7630070921985815, + "grad_norm": 80.36835479736328, + "learning_rate": 5e-05, + "loss": 0.9929, + "num_input_tokens_seen": 449764180, + "step": 6724 + }, + { + "epoch": 0.7630070921985815, + "loss": 0.9689071178436279, + "loss_ce": 0.007115087937563658, + "loss_iou": 0.3984375, + "loss_num": 0.03271484375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 449764180, + "step": 6724 + }, + { + "epoch": 0.7631205673758865, + "grad_norm": 25.24677276611328, + "learning_rate": 5e-05, + "loss": 1.1116, + "num_input_tokens_seen": 449831296, + "step": 6725 + }, + { + "epoch": 0.7631205673758865, + "loss": 1.12605619430542, + "loss_ce": 0.005450760945677757, + "loss_iou": 0.43359375, + "loss_num": 0.050537109375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 449831296, + "step": 6725 + }, + { + "epoch": 0.7632340425531915, + "grad_norm": 42.495033264160156, + "learning_rate": 5e-05, + "loss": 1.182, + "num_input_tokens_seen": 449898616, + "step": 6726 + }, + { + "epoch": 0.7632340425531915, + "loss": 1.1455326080322266, + "loss_ce": 0.007837343961000443, + "loss_iou": 0.47265625, + "loss_num": 0.03857421875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 449898616, + "step": 6726 + }, + { + "epoch": 0.7633475177304965, + "grad_norm": 25.99017906188965, + "learning_rate": 5e-05, + "loss": 1.301, + "num_input_tokens_seen": 449965460, + "step": 6727 + }, + { + "epoch": 0.7633475177304965, + "loss": 1.1737700700759888, + "loss_ce": 0.006777916103601456, + "loss_iou": 0.53125, + "loss_num": 0.0213623046875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 449965460, + "step": 6727 + }, + { + "epoch": 0.7634609929078015, + "grad_norm": 12.269665718078613, + "learning_rate": 5e-05, + "loss": 0.9857, + "num_input_tokens_seen": 450032240, + "step": 6728 + }, + { + "epoch": 0.7634609929078015, + "loss": 1.1349873542785645, + "loss_ce": 0.009010709822177887, + "loss_iou": 0.45703125, + "loss_num": 0.04248046875, + "loss_xval": 1.125, + "num_input_tokens_seen": 450032240, + "step": 6728 + }, + { + "epoch": 0.7635744680851064, + "grad_norm": 16.165382385253906, + "learning_rate": 5e-05, + "loss": 1.2545, + "num_input_tokens_seen": 450099008, + "step": 6729 + }, + { + "epoch": 0.7635744680851064, + "loss": 1.0914709568023682, + "loss_ce": 0.006021701730787754, + "loss_iou": 0.458984375, + "loss_num": 0.03369140625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 450099008, + "step": 6729 + }, + { + "epoch": 0.7636879432624113, + "grad_norm": 24.727380752563477, + "learning_rate": 5e-05, + "loss": 1.2383, + "num_input_tokens_seen": 450165696, + "step": 6730 + }, + { + "epoch": 0.7636879432624113, + "loss": 1.2150487899780273, + "loss_ce": 0.0021582464687526226, + "loss_iou": 0.47265625, + "loss_num": 0.0537109375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 450165696, + "step": 6730 + }, + { + "epoch": 0.7638014184397163, + "grad_norm": 27.670181274414062, + "learning_rate": 5e-05, + "loss": 1.0996, + "num_input_tokens_seen": 450232164, + "step": 6731 + }, + { + "epoch": 0.7638014184397163, + "loss": 1.0048041343688965, + "loss_ce": 0.009198619052767754, + "loss_iou": 0.40625, + "loss_num": 0.036865234375, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 450232164, + "step": 6731 + }, + { + "epoch": 0.7639148936170213, + "grad_norm": 33.67862319946289, + "learning_rate": 5e-05, + "loss": 1.0124, + "num_input_tokens_seen": 450299140, + "step": 6732 + }, + { + "epoch": 0.7639148936170213, + "loss": 0.8748698234558105, + "loss_ce": 0.0052408985793590546, + "loss_iou": 0.384765625, + "loss_num": 0.0201416015625, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 450299140, + "step": 6732 + }, + { + "epoch": 0.7640283687943262, + "grad_norm": 54.7871208190918, + "learning_rate": 5e-05, + "loss": 1.4149, + "num_input_tokens_seen": 450365680, + "step": 6733 + }, + { + "epoch": 0.7640283687943262, + "loss": 1.5262086391448975, + "loss_ce": 0.008142197504639626, + "loss_iou": 0.61328125, + "loss_num": 0.058837890625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 450365680, + "step": 6733 + }, + { + "epoch": 0.7641418439716312, + "grad_norm": 21.610170364379883, + "learning_rate": 5e-05, + "loss": 1.3012, + "num_input_tokens_seen": 450433036, + "step": 6734 + }, + { + "epoch": 0.7641418439716312, + "loss": 1.217397689819336, + "loss_ce": 0.010366518050432205, + "loss_iou": 0.515625, + "loss_num": 0.035400390625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 450433036, + "step": 6734 + }, + { + "epoch": 0.7642553191489362, + "grad_norm": 22.71718978881836, + "learning_rate": 5e-05, + "loss": 1.2284, + "num_input_tokens_seen": 450500004, + "step": 6735 + }, + { + "epoch": 0.7642553191489362, + "loss": 1.2651221752166748, + "loss_ce": 0.006333064287900925, + "loss_iou": 0.49609375, + "loss_num": 0.05322265625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 450500004, + "step": 6735 + }, + { + "epoch": 0.7643687943262412, + "grad_norm": 38.01597213745117, + "learning_rate": 5e-05, + "loss": 1.1198, + "num_input_tokens_seen": 450566388, + "step": 6736 + }, + { + "epoch": 0.7643687943262412, + "loss": 1.0768895149230957, + "loss_ce": 0.007065304089337587, + "loss_iou": 0.474609375, + "loss_num": 0.0238037109375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 450566388, + "step": 6736 + }, + { + "epoch": 0.7644822695035461, + "grad_norm": 34.73257827758789, + "learning_rate": 5e-05, + "loss": 1.3901, + "num_input_tokens_seen": 450633272, + "step": 6737 + }, + { + "epoch": 0.7644822695035461, + "loss": 1.2003107070922852, + "loss_ce": 0.0074395169503986835, + "loss_iou": 0.47265625, + "loss_num": 0.049072265625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 450633272, + "step": 6737 + }, + { + "epoch": 0.7645957446808511, + "grad_norm": 35.60504913330078, + "learning_rate": 5e-05, + "loss": 1.2452, + "num_input_tokens_seen": 450700716, + "step": 6738 + }, + { + "epoch": 0.7645957446808511, + "loss": 1.2669556140899658, + "loss_ce": 0.005236922297626734, + "loss_iou": 0.48828125, + "loss_num": 0.057373046875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 450700716, + "step": 6738 + }, + { + "epoch": 0.764709219858156, + "grad_norm": 37.09321212768555, + "learning_rate": 5e-05, + "loss": 1.153, + "num_input_tokens_seen": 450767512, + "step": 6739 + }, + { + "epoch": 0.764709219858156, + "loss": 1.1259725093841553, + "loss_ce": 0.005855333060026169, + "loss_iou": 0.494140625, + "loss_num": 0.0263671875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 450767512, + "step": 6739 + }, + { + "epoch": 0.764822695035461, + "grad_norm": 30.80705451965332, + "learning_rate": 5e-05, + "loss": 1.0876, + "num_input_tokens_seen": 450835380, + "step": 6740 + }, + { + "epoch": 0.764822695035461, + "loss": 1.0643006563186646, + "loss_ce": 0.0037537359166890383, + "loss_iou": 0.474609375, + "loss_num": 0.0223388671875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 450835380, + "step": 6740 + }, + { + "epoch": 0.7649361702127659, + "grad_norm": 29.221284866333008, + "learning_rate": 5e-05, + "loss": 1.2022, + "num_input_tokens_seen": 450902588, + "step": 6741 + }, + { + "epoch": 0.7649361702127659, + "loss": 1.168081283569336, + "loss_ce": 0.005483664572238922, + "loss_iou": 0.490234375, + "loss_num": 0.03662109375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 450902588, + "step": 6741 + }, + { + "epoch": 0.7650496453900709, + "grad_norm": 16.945199966430664, + "learning_rate": 5e-05, + "loss": 1.1039, + "num_input_tokens_seen": 450969028, + "step": 6742 + }, + { + "epoch": 0.7650496453900709, + "loss": 1.1933704614639282, + "loss_ce": 0.004893874749541283, + "loss_iou": 0.470703125, + "loss_num": 0.049072265625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 450969028, + "step": 6742 + }, + { + "epoch": 0.7651631205673759, + "grad_norm": 23.565261840820312, + "learning_rate": 5e-05, + "loss": 1.2581, + "num_input_tokens_seen": 451035704, + "step": 6743 + }, + { + "epoch": 0.7651631205673759, + "loss": 1.316870927810669, + "loss_ce": 0.008277136832475662, + "loss_iou": 0.54296875, + "loss_num": 0.044189453125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 451035704, + "step": 6743 + }, + { + "epoch": 0.7652765957446809, + "grad_norm": 30.76888656616211, + "learning_rate": 5e-05, + "loss": 1.2842, + "num_input_tokens_seen": 451101736, + "step": 6744 + }, + { + "epoch": 0.7652765957446809, + "loss": 1.1110877990722656, + "loss_ce": 0.006534583866596222, + "loss_iou": 0.41015625, + "loss_num": 0.057373046875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 451101736, + "step": 6744 + }, + { + "epoch": 0.7653900709219859, + "grad_norm": 26.660667419433594, + "learning_rate": 5e-05, + "loss": 1.1148, + "num_input_tokens_seen": 451168308, + "step": 6745 + }, + { + "epoch": 0.7653900709219859, + "loss": 1.2006173133850098, + "loss_ce": 0.004328242503106594, + "loss_iou": 0.50390625, + "loss_num": 0.03759765625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 451168308, + "step": 6745 + }, + { + "epoch": 0.7655035460992908, + "grad_norm": 24.591705322265625, + "learning_rate": 5e-05, + "loss": 1.23, + "num_input_tokens_seen": 451235484, + "step": 6746 + }, + { + "epoch": 0.7655035460992908, + "loss": 1.3163105249404907, + "loss_ce": 0.00429880665615201, + "loss_iou": 0.50390625, + "loss_num": 0.061767578125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 451235484, + "step": 6746 + }, + { + "epoch": 0.7656170212765957, + "grad_norm": 25.03845977783203, + "learning_rate": 5e-05, + "loss": 1.0471, + "num_input_tokens_seen": 451302060, + "step": 6747 + }, + { + "epoch": 0.7656170212765957, + "loss": 1.2817096710205078, + "loss_ce": 0.007051458582282066, + "loss_iou": 0.5234375, + "loss_num": 0.044921875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 451302060, + "step": 6747 + }, + { + "epoch": 0.7657304964539007, + "grad_norm": 25.203462600708008, + "learning_rate": 5e-05, + "loss": 1.2353, + "num_input_tokens_seen": 451368056, + "step": 6748 + }, + { + "epoch": 0.7657304964539007, + "loss": 1.2659317255020142, + "loss_ce": 0.0042129335924983025, + "loss_iou": 0.498046875, + "loss_num": 0.05322265625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 451368056, + "step": 6748 + }, + { + "epoch": 0.7658439716312057, + "grad_norm": 25.51482391357422, + "learning_rate": 5e-05, + "loss": 1.1377, + "num_input_tokens_seen": 451434972, + "step": 6749 + }, + { + "epoch": 0.7658439716312057, + "loss": 1.1875391006469727, + "loss_ce": 0.007363371085375547, + "loss_iou": 0.474609375, + "loss_num": 0.046142578125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 451434972, + "step": 6749 + }, + { + "epoch": 0.7659574468085106, + "grad_norm": 37.965911865234375, + "learning_rate": 5e-05, + "loss": 1.2761, + "num_input_tokens_seen": 451501836, + "step": 6750 + }, + { + "epoch": 0.7659574468085106, + "eval_seeclick_CIoU": 0.3852258771657944, + "eval_seeclick_GIoU": 0.3650595098733902, + "eval_seeclick_IoU": 0.4765753746032715, + "eval_seeclick_MAE_all": 0.1573888212442398, + "eval_seeclick_MAE_h": 0.07881077937781811, + "eval_seeclick_MAE_w": 0.14403484016656876, + "eval_seeclick_MAE_x_boxes": 0.2572897747159004, + "eval_seeclick_MAE_y_boxes": 0.1046774834394455, + "eval_seeclick_NUM_probability": 0.9999788701534271, + "eval_seeclick_inside_bbox": 0.6770833432674408, + "eval_seeclick_loss": 2.4844210147857666, + "eval_seeclick_loss_ce": 0.013232672587037086, + "eval_seeclick_loss_iou": 0.85101318359375, + "eval_seeclick_loss_num": 0.16387939453125, + "eval_seeclick_loss_xval": 2.522216796875, + "eval_seeclick_runtime": 67.3035, + "eval_seeclick_samples_per_second": 0.698, + "eval_seeclick_steps_per_second": 0.03, + "num_input_tokens_seen": 451501836, + "step": 6750 + }, + { + "epoch": 0.7659574468085106, + "eval_icons_CIoU": 0.5478874444961548, + "eval_icons_GIoU": 0.5466618239879608, + "eval_icons_IoU": 0.5796228349208832, + "eval_icons_MAE_all": 0.12043264135718346, + "eval_icons_MAE_h": 0.08543887361884117, + "eval_icons_MAE_w": 0.11305024102330208, + "eval_icons_MAE_x_boxes": 0.10218040645122528, + "eval_icons_MAE_y_boxes": 0.04659481719136238, + "eval_icons_NUM_probability": 0.9999916851520538, + "eval_icons_inside_bbox": 0.8385416567325592, + "eval_icons_loss": 2.2075116634368896, + "eval_icons_loss_ce": 1.190183775179321e-05, + "eval_icons_loss_iou": 0.80029296875, + "eval_icons_loss_num": 0.1160888671875, + "eval_icons_loss_xval": 2.1806640625, + "eval_icons_runtime": 70.5982, + "eval_icons_samples_per_second": 0.708, + "eval_icons_steps_per_second": 0.028, + "num_input_tokens_seen": 451501836, + "step": 6750 + }, + { + "epoch": 0.7659574468085106, + "eval_screenspot_CIoU": 0.27013582984606427, + "eval_screenspot_GIoU": 0.2538364926973979, + "eval_screenspot_IoU": 0.36946095029513043, + "eval_screenspot_MAE_all": 0.2030626485745112, + "eval_screenspot_MAE_h": 0.141190767288208, + "eval_screenspot_MAE_w": 0.14950434366861978, + "eval_screenspot_MAE_x_boxes": 0.3334835072358449, + "eval_screenspot_MAE_y_boxes": 0.06172531098127365, + "eval_screenspot_NUM_probability": 0.999956488609314, + "eval_screenspot_inside_bbox": 0.5674999952316284, + "eval_screenspot_loss": 2.9086804389953613, + "eval_screenspot_loss_ce": 0.019886836409568787, + "eval_screenspot_loss_iou": 0.9345703125, + "eval_screenspot_loss_num": 0.214599609375, + "eval_screenspot_loss_xval": 2.9430338541666665, + "eval_screenspot_runtime": 118.3003, + "eval_screenspot_samples_per_second": 0.752, + "eval_screenspot_steps_per_second": 0.025, + "num_input_tokens_seen": 451501836, + "step": 6750 + }, + { + "epoch": 0.7659574468085106, + "eval_compot_CIoU": 0.3388002961874008, + "eval_compot_GIoU": 0.31468747556209564, + "eval_compot_IoU": 0.4154481142759323, + "eval_compot_MAE_all": 0.20508825778961182, + "eval_compot_MAE_h": 0.1889111027121544, + "eval_compot_MAE_w": 0.19849741458892822, + "eval_compot_MAE_x_boxes": 0.17248673737049103, + "eval_compot_MAE_y_boxes": 0.09765777364373207, + "eval_compot_NUM_probability": 0.9999613761901855, + "eval_compot_inside_bbox": 0.6024305522441864, + "eval_compot_loss": 2.9784412384033203, + "eval_compot_loss_ce": 0.006711647612974048, + "eval_compot_loss_iou": 0.981689453125, + "eval_compot_loss_num": 0.18813323974609375, + "eval_compot_loss_xval": 2.90380859375, + "eval_compot_runtime": 69.7368, + "eval_compot_samples_per_second": 0.717, + "eval_compot_steps_per_second": 0.029, + "num_input_tokens_seen": 451501836, + "step": 6750 + }, + { + "epoch": 0.7659574468085106, + "loss": 2.925474166870117, + "loss_ce": 0.00848203245550394, + "loss_iou": 0.9765625, + "loss_num": 0.193359375, + "loss_xval": 2.921875, + "num_input_tokens_seen": 451501836, + "step": 6750 + }, + { + "epoch": 0.7660709219858156, + "grad_norm": 30.675806045532227, + "learning_rate": 5e-05, + "loss": 1.5532, + "num_input_tokens_seen": 451568400, + "step": 6751 + }, + { + "epoch": 0.7660709219858156, + "loss": 1.4644885063171387, + "loss_ce": 0.006480642594397068, + "loss_iou": 0.5859375, + "loss_num": 0.0576171875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 451568400, + "step": 6751 + }, + { + "epoch": 0.7661843971631206, + "grad_norm": 14.778109550476074, + "learning_rate": 5e-05, + "loss": 0.9051, + "num_input_tokens_seen": 451636164, + "step": 6752 + }, + { + "epoch": 0.7661843971631206, + "loss": 0.9036844968795776, + "loss_ce": 0.0042703887447714806, + "loss_iou": 0.390625, + "loss_num": 0.0235595703125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 451636164, + "step": 6752 + }, + { + "epoch": 0.7662978723404256, + "grad_norm": 16.5281982421875, + "learning_rate": 5e-05, + "loss": 1.0621, + "num_input_tokens_seen": 451703392, + "step": 6753 + }, + { + "epoch": 0.7662978723404256, + "loss": 0.9721628427505493, + "loss_ce": 0.0029245801270008087, + "loss_iou": 0.421875, + "loss_num": 0.0247802734375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 451703392, + "step": 6753 + }, + { + "epoch": 0.7664113475177305, + "grad_norm": 33.75365447998047, + "learning_rate": 5e-05, + "loss": 1.2352, + "num_input_tokens_seen": 451770856, + "step": 6754 + }, + { + "epoch": 0.7664113475177305, + "loss": 1.1778895854949951, + "loss_ce": 0.003084865864366293, + "loss_iou": 0.515625, + "loss_num": 0.0284423828125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 451770856, + "step": 6754 + }, + { + "epoch": 0.7665248226950354, + "grad_norm": 36.17774200439453, + "learning_rate": 5e-05, + "loss": 1.3647, + "num_input_tokens_seen": 451836672, + "step": 6755 + }, + { + "epoch": 0.7665248226950354, + "loss": 1.4139240980148315, + "loss_ce": 0.007185843773186207, + "loss_iou": 0.53515625, + "loss_num": 0.0673828125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 451836672, + "step": 6755 + }, + { + "epoch": 0.7666382978723404, + "grad_norm": 17.209186553955078, + "learning_rate": 5e-05, + "loss": 1.1637, + "num_input_tokens_seen": 451903524, + "step": 6756 + }, + { + "epoch": 0.7666382978723404, + "loss": 1.2124745845794678, + "loss_ce": 0.004466773010790348, + "loss_iou": 0.482421875, + "loss_num": 0.04833984375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 451903524, + "step": 6756 + }, + { + "epoch": 0.7667517730496454, + "grad_norm": 16.867788314819336, + "learning_rate": 5e-05, + "loss": 1.1777, + "num_input_tokens_seen": 451970988, + "step": 6757 + }, + { + "epoch": 0.7667517730496454, + "loss": 1.1817071437835693, + "loss_ce": 0.009343920275568962, + "loss_iou": 0.474609375, + "loss_num": 0.04443359375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 451970988, + "step": 6757 + }, + { + "epoch": 0.7668652482269503, + "grad_norm": 19.780166625976562, + "learning_rate": 5e-05, + "loss": 1.2927, + "num_input_tokens_seen": 452036624, + "step": 6758 + }, + { + "epoch": 0.7668652482269503, + "loss": 1.2069826126098633, + "loss_ce": 0.007275572046637535, + "loss_iou": 0.470703125, + "loss_num": 0.051513671875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 452036624, + "step": 6758 + }, + { + "epoch": 0.7669787234042553, + "grad_norm": 15.574360847473145, + "learning_rate": 5e-05, + "loss": 1.1655, + "num_input_tokens_seen": 452104160, + "step": 6759 + }, + { + "epoch": 0.7669787234042553, + "loss": 1.0714524984359741, + "loss_ce": 0.010417354293167591, + "loss_iou": 0.421875, + "loss_num": 0.04296875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 452104160, + "step": 6759 + }, + { + "epoch": 0.7670921985815603, + "grad_norm": 30.478975296020508, + "learning_rate": 5e-05, + "loss": 0.9699, + "num_input_tokens_seen": 452171436, + "step": 6760 + }, + { + "epoch": 0.7670921985815603, + "loss": 0.8531641960144043, + "loss_ce": 0.006484486162662506, + "loss_iou": 0.37890625, + "loss_num": 0.0177001953125, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 452171436, + "step": 6760 + }, + { + "epoch": 0.7672056737588653, + "grad_norm": 40.927574157714844, + "learning_rate": 5e-05, + "loss": 1.1582, + "num_input_tokens_seen": 452237108, + "step": 6761 + }, + { + "epoch": 0.7672056737588653, + "loss": 1.3780558109283447, + "loss_ce": 0.005497266072779894, + "loss_iou": 0.55859375, + "loss_num": 0.0517578125, + "loss_xval": 1.375, + "num_input_tokens_seen": 452237108, + "step": 6761 + }, + { + "epoch": 0.7673191489361703, + "grad_norm": 27.830198287963867, + "learning_rate": 5e-05, + "loss": 0.8908, + "num_input_tokens_seen": 452303400, + "step": 6762 + }, + { + "epoch": 0.7673191489361703, + "loss": 0.9335465431213379, + "loss_ce": 0.008589266799390316, + "loss_iou": 0.3828125, + "loss_num": 0.031494140625, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 452303400, + "step": 6762 + }, + { + "epoch": 0.7674326241134751, + "grad_norm": 64.46080780029297, + "learning_rate": 5e-05, + "loss": 1.0136, + "num_input_tokens_seen": 452369780, + "step": 6763 + }, + { + "epoch": 0.7674326241134751, + "loss": 1.0277161598205566, + "loss_ce": 0.003546161577105522, + "loss_iou": 0.43359375, + "loss_num": 0.03125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 452369780, + "step": 6763 + }, + { + "epoch": 0.7675460992907801, + "grad_norm": 25.737445831298828, + "learning_rate": 5e-05, + "loss": 1.3508, + "num_input_tokens_seen": 452436916, + "step": 6764 + }, + { + "epoch": 0.7675460992907801, + "loss": 1.505815029144287, + "loss_ce": 0.01313919760286808, + "loss_iou": 0.5625, + "loss_num": 0.0732421875, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 452436916, + "step": 6764 + }, + { + "epoch": 0.7676595744680851, + "grad_norm": 41.560123443603516, + "learning_rate": 5e-05, + "loss": 1.2285, + "num_input_tokens_seen": 452504048, + "step": 6765 + }, + { + "epoch": 0.7676595744680851, + "loss": 1.1049742698669434, + "loss_ce": 0.005364825949072838, + "loss_iou": 0.458984375, + "loss_num": 0.035888671875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 452504048, + "step": 6765 + }, + { + "epoch": 0.7677730496453901, + "grad_norm": 35.57231903076172, + "learning_rate": 5e-05, + "loss": 1.3524, + "num_input_tokens_seen": 452570164, + "step": 6766 + }, + { + "epoch": 0.7677730496453901, + "loss": 1.2658089399337769, + "loss_ce": 0.006531595718115568, + "loss_iou": 0.52734375, + "loss_num": 0.04052734375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 452570164, + "step": 6766 + }, + { + "epoch": 0.767886524822695, + "grad_norm": 38.9010009765625, + "learning_rate": 5e-05, + "loss": 1.2618, + "num_input_tokens_seen": 452637184, + "step": 6767 + }, + { + "epoch": 0.767886524822695, + "loss": 1.3817055225372314, + "loss_ce": 0.01207663118839264, + "loss_iou": 0.52734375, + "loss_num": 0.06298828125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 452637184, + "step": 6767 + }, + { + "epoch": 0.768, + "grad_norm": 23.96320152282715, + "learning_rate": 5e-05, + "loss": 1.1749, + "num_input_tokens_seen": 452705376, + "step": 6768 + }, + { + "epoch": 0.768, + "loss": 1.2505414485931396, + "loss_ce": 0.009818797931075096, + "loss_iou": 0.49609375, + "loss_num": 0.0498046875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 452705376, + "step": 6768 + }, + { + "epoch": 0.768113475177305, + "grad_norm": 43.7763557434082, + "learning_rate": 5e-05, + "loss": 1.1369, + "num_input_tokens_seen": 452773220, + "step": 6769 + }, + { + "epoch": 0.768113475177305, + "loss": 1.173719882965088, + "loss_ce": 0.008192680776119232, + "loss_iou": 0.5, + "loss_num": 0.032470703125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 452773220, + "step": 6769 + }, + { + "epoch": 0.76822695035461, + "grad_norm": 26.89236831665039, + "learning_rate": 5e-05, + "loss": 1.5703, + "num_input_tokens_seen": 452841180, + "step": 6770 + }, + { + "epoch": 0.76822695035461, + "loss": 1.4408173561096191, + "loss_ce": 0.0077118584886193275, + "loss_iou": 0.58203125, + "loss_num": 0.054443359375, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 452841180, + "step": 6770 + }, + { + "epoch": 0.7683404255319148, + "grad_norm": 19.43583106994629, + "learning_rate": 5e-05, + "loss": 1.2802, + "num_input_tokens_seen": 452908296, + "step": 6771 + }, + { + "epoch": 0.7683404255319148, + "loss": 1.2467901706695557, + "loss_ce": 0.005090947262942791, + "loss_iou": 0.51953125, + "loss_num": 0.041259765625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 452908296, + "step": 6771 + }, + { + "epoch": 0.7684539007092198, + "grad_norm": 38.4482536315918, + "learning_rate": 5e-05, + "loss": 1.2914, + "num_input_tokens_seen": 452975996, + "step": 6772 + }, + { + "epoch": 0.7684539007092198, + "loss": 1.2837498188018799, + "loss_ce": 0.004453042522072792, + "loss_iou": 0.51171875, + "loss_num": 0.0517578125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 452975996, + "step": 6772 + }, + { + "epoch": 0.7685673758865248, + "grad_norm": 46.23192596435547, + "learning_rate": 5e-05, + "loss": 1.1654, + "num_input_tokens_seen": 453042248, + "step": 6773 + }, + { + "epoch": 0.7685673758865248, + "loss": 1.3548893928527832, + "loss_ce": 0.004791663959622383, + "loss_iou": 0.5546875, + "loss_num": 0.04736328125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 453042248, + "step": 6773 + }, + { + "epoch": 0.7686808510638298, + "grad_norm": 16.053709030151367, + "learning_rate": 5e-05, + "loss": 1.1166, + "num_input_tokens_seen": 453109524, + "step": 6774 + }, + { + "epoch": 0.7686808510638298, + "loss": 1.207245111465454, + "loss_ce": 0.006561468821018934, + "loss_iou": 0.48828125, + "loss_num": 0.04443359375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 453109524, + "step": 6774 + }, + { + "epoch": 0.7687943262411348, + "grad_norm": 27.508190155029297, + "learning_rate": 5e-05, + "loss": 1.271, + "num_input_tokens_seen": 453175996, + "step": 6775 + }, + { + "epoch": 0.7687943262411348, + "loss": 1.3924480676651, + "loss_ce": 0.0111004039645195, + "loss_iou": 0.58984375, + "loss_num": 0.041015625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 453175996, + "step": 6775 + }, + { + "epoch": 0.7689078014184397, + "grad_norm": 31.15863037109375, + "learning_rate": 5e-05, + "loss": 1.1515, + "num_input_tokens_seen": 453243260, + "step": 6776 + }, + { + "epoch": 0.7689078014184397, + "loss": 1.3347461223602295, + "loss_ce": 0.004179767332971096, + "loss_iou": 0.5625, + "loss_num": 0.040771484375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 453243260, + "step": 6776 + }, + { + "epoch": 0.7690212765957447, + "grad_norm": 24.38945960998535, + "learning_rate": 5e-05, + "loss": 1.2849, + "num_input_tokens_seen": 453310236, + "step": 6777 + }, + { + "epoch": 0.7690212765957447, + "loss": 1.47310471534729, + "loss_ce": 0.012167282402515411, + "loss_iou": 0.6171875, + "loss_num": 0.045654296875, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 453310236, + "step": 6777 + }, + { + "epoch": 0.7691347517730497, + "grad_norm": 15.323793411254883, + "learning_rate": 5e-05, + "loss": 0.9656, + "num_input_tokens_seen": 453377020, + "step": 6778 + }, + { + "epoch": 0.7691347517730497, + "loss": 1.0664414167404175, + "loss_ce": 0.005894545000046492, + "loss_iou": 0.439453125, + "loss_num": 0.03662109375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 453377020, + "step": 6778 + }, + { + "epoch": 0.7692482269503547, + "grad_norm": 28.2534236907959, + "learning_rate": 5e-05, + "loss": 1.2013, + "num_input_tokens_seen": 453443536, + "step": 6779 + }, + { + "epoch": 0.7692482269503547, + "loss": 1.1497721672058105, + "loss_ce": 0.003775994759052992, + "loss_iou": 0.48046875, + "loss_num": 0.037109375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 453443536, + "step": 6779 + }, + { + "epoch": 0.7693617021276595, + "grad_norm": 48.617279052734375, + "learning_rate": 5e-05, + "loss": 1.1771, + "num_input_tokens_seen": 453510140, + "step": 6780 + }, + { + "epoch": 0.7693617021276595, + "loss": 1.0705060958862305, + "loss_ce": 0.007762018125504255, + "loss_iou": 0.451171875, + "loss_num": 0.032470703125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 453510140, + "step": 6780 + }, + { + "epoch": 0.7694751773049645, + "grad_norm": 26.867931365966797, + "learning_rate": 5e-05, + "loss": 1.2328, + "num_input_tokens_seen": 453576808, + "step": 6781 + }, + { + "epoch": 0.7694751773049645, + "loss": 1.2438924312591553, + "loss_ce": 0.0036580022424459457, + "loss_iou": 0.56640625, + "loss_num": 0.0220947265625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 453576808, + "step": 6781 + }, + { + "epoch": 0.7695886524822695, + "grad_norm": 19.32779312133789, + "learning_rate": 5e-05, + "loss": 1.4157, + "num_input_tokens_seen": 453643416, + "step": 6782 + }, + { + "epoch": 0.7695886524822695, + "loss": 1.3164514303207397, + "loss_ce": 0.006881115958094597, + "loss_iou": 0.515625, + "loss_num": 0.055908203125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 453643416, + "step": 6782 + }, + { + "epoch": 0.7697021276595745, + "grad_norm": 16.939640045166016, + "learning_rate": 5e-05, + "loss": 0.9945, + "num_input_tokens_seen": 453710400, + "step": 6783 + }, + { + "epoch": 0.7697021276595745, + "loss": 1.0532931089401245, + "loss_ce": 0.008127041161060333, + "loss_iou": 0.427734375, + "loss_num": 0.037841796875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 453710400, + "step": 6783 + }, + { + "epoch": 0.7698156028368794, + "grad_norm": 25.365249633789062, + "learning_rate": 5e-05, + "loss": 1.2914, + "num_input_tokens_seen": 453777444, + "step": 6784 + }, + { + "epoch": 0.7698156028368794, + "loss": 1.3329260349273682, + "loss_ce": 0.009195697493851185, + "loss_iou": 0.56640625, + "loss_num": 0.037841796875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 453777444, + "step": 6784 + }, + { + "epoch": 0.7699290780141844, + "grad_norm": 38.23518753051758, + "learning_rate": 5e-05, + "loss": 1.2279, + "num_input_tokens_seen": 453843584, + "step": 6785 + }, + { + "epoch": 0.7699290780141844, + "loss": 1.4830622673034668, + "loss_ce": 0.006988056935369968, + "loss_iou": 0.5859375, + "loss_num": 0.0615234375, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 453843584, + "step": 6785 + }, + { + "epoch": 0.7700425531914894, + "grad_norm": 26.612926483154297, + "learning_rate": 5e-05, + "loss": 1.0289, + "num_input_tokens_seen": 453910628, + "step": 6786 + }, + { + "epoch": 0.7700425531914894, + "loss": 1.013021469116211, + "loss_ce": 0.006185502745211124, + "loss_iou": 0.4453125, + "loss_num": 0.023681640625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 453910628, + "step": 6786 + }, + { + "epoch": 0.7701560283687944, + "grad_norm": 38.833251953125, + "learning_rate": 5e-05, + "loss": 1.1933, + "num_input_tokens_seen": 453976672, + "step": 6787 + }, + { + "epoch": 0.7701560283687944, + "loss": 1.3688223361968994, + "loss_ce": 0.008958992548286915, + "loss_iou": 0.53515625, + "loss_num": 0.058349609375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 453976672, + "step": 6787 + }, + { + "epoch": 0.7702695035460992, + "grad_norm": 43.80861282348633, + "learning_rate": 5e-05, + "loss": 1.4248, + "num_input_tokens_seen": 454044048, + "step": 6788 + }, + { + "epoch": 0.7702695035460992, + "loss": 1.3656646013259888, + "loss_ce": 0.00839531235396862, + "loss_iou": 0.53515625, + "loss_num": 0.057373046875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 454044048, + "step": 6788 + }, + { + "epoch": 0.7703829787234042, + "grad_norm": 16.99005889892578, + "learning_rate": 5e-05, + "loss": 1.1601, + "num_input_tokens_seen": 454110180, + "step": 6789 + }, + { + "epoch": 0.7703829787234042, + "loss": 1.2141895294189453, + "loss_ce": 0.012041052803397179, + "loss_iou": 0.453125, + "loss_num": 0.0595703125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 454110180, + "step": 6789 + }, + { + "epoch": 0.7704964539007092, + "grad_norm": 29.239681243896484, + "learning_rate": 5e-05, + "loss": 1.1188, + "num_input_tokens_seen": 454176816, + "step": 6790 + }, + { + "epoch": 0.7704964539007092, + "loss": 1.2422386407852173, + "loss_ce": 0.007375403307378292, + "loss_iou": 0.51171875, + "loss_num": 0.042724609375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 454176816, + "step": 6790 + }, + { + "epoch": 0.7706099290780142, + "grad_norm": 45.405540466308594, + "learning_rate": 5e-05, + "loss": 1.149, + "num_input_tokens_seen": 454244140, + "step": 6791 + }, + { + "epoch": 0.7706099290780142, + "loss": 1.225257396697998, + "loss_ce": 0.004065890796482563, + "loss_iou": 0.51953125, + "loss_num": 0.03564453125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 454244140, + "step": 6791 + }, + { + "epoch": 0.7707234042553192, + "grad_norm": 33.78533172607422, + "learning_rate": 5e-05, + "loss": 1.4741, + "num_input_tokens_seen": 454311164, + "step": 6792 + }, + { + "epoch": 0.7707234042553192, + "loss": 1.5209091901779175, + "loss_ce": 0.0072373514994978905, + "loss_iou": 0.65625, + "loss_num": 0.041015625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 454311164, + "step": 6792 + }, + { + "epoch": 0.7708368794326241, + "grad_norm": 12.911633491516113, + "learning_rate": 5e-05, + "loss": 1.0111, + "num_input_tokens_seen": 454378368, + "step": 6793 + }, + { + "epoch": 0.7708368794326241, + "loss": 0.9254412055015564, + "loss_ce": 0.008449006825685501, + "loss_iou": 0.32421875, + "loss_num": 0.05419921875, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 454378368, + "step": 6793 + }, + { + "epoch": 0.7709503546099291, + "grad_norm": 21.116046905517578, + "learning_rate": 5e-05, + "loss": 1.015, + "num_input_tokens_seen": 454444036, + "step": 6794 + }, + { + "epoch": 0.7709503546099291, + "loss": 1.1186174154281616, + "loss_ce": 0.004359606187790632, + "loss_iou": 0.494140625, + "loss_num": 0.0257568359375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 454444036, + "step": 6794 + }, + { + "epoch": 0.7710638297872341, + "grad_norm": 17.886913299560547, + "learning_rate": 5e-05, + "loss": 1.2294, + "num_input_tokens_seen": 454511148, + "step": 6795 + }, + { + "epoch": 0.7710638297872341, + "loss": 1.2633161544799805, + "loss_ce": 0.005015344358980656, + "loss_iou": 0.515625, + "loss_num": 0.045654296875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 454511148, + "step": 6795 + }, + { + "epoch": 0.771177304964539, + "grad_norm": 26.0510311126709, + "learning_rate": 5e-05, + "loss": 1.145, + "num_input_tokens_seen": 454577032, + "step": 6796 + }, + { + "epoch": 0.771177304964539, + "loss": 1.0589337348937988, + "loss_ce": 0.007664273492991924, + "loss_iou": 0.443359375, + "loss_num": 0.03271484375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 454577032, + "step": 6796 + }, + { + "epoch": 0.7712907801418439, + "grad_norm": 20.25769805908203, + "learning_rate": 5e-05, + "loss": 1.166, + "num_input_tokens_seen": 454644136, + "step": 6797 + }, + { + "epoch": 0.7712907801418439, + "loss": 1.2073032855987549, + "loss_ce": 0.00857279822230339, + "loss_iou": 0.51171875, + "loss_num": 0.034912109375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 454644136, + "step": 6797 + }, + { + "epoch": 0.7714042553191489, + "grad_norm": 36.786529541015625, + "learning_rate": 5e-05, + "loss": 1.2673, + "num_input_tokens_seen": 454711344, + "step": 6798 + }, + { + "epoch": 0.7714042553191489, + "loss": 1.219351887702942, + "loss_ce": 0.009390918537974358, + "loss_iou": 0.5078125, + "loss_num": 0.03955078125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 454711344, + "step": 6798 + }, + { + "epoch": 0.7715177304964539, + "grad_norm": 37.726261138916016, + "learning_rate": 5e-05, + "loss": 1.2582, + "num_input_tokens_seen": 454778604, + "step": 6799 + }, + { + "epoch": 0.7715177304964539, + "loss": 1.2488071918487549, + "loss_ce": 0.005154876504093409, + "loss_iou": 0.515625, + "loss_num": 0.042236328125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 454778604, + "step": 6799 + }, + { + "epoch": 0.7716312056737589, + "grad_norm": 24.251550674438477, + "learning_rate": 5e-05, + "loss": 1.3783, + "num_input_tokens_seen": 454844684, + "step": 6800 + }, + { + "epoch": 0.7716312056737589, + "loss": 1.3729079961776733, + "loss_ce": 0.009138448163866997, + "loss_iou": 0.58203125, + "loss_num": 0.0400390625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 454844684, + "step": 6800 + }, + { + "epoch": 0.7717446808510638, + "grad_norm": 21.782390594482422, + "learning_rate": 5e-05, + "loss": 1.2245, + "num_input_tokens_seen": 454912008, + "step": 6801 + }, + { + "epoch": 0.7717446808510638, + "loss": 1.297581672668457, + "loss_ce": 0.007054284680634737, + "loss_iou": 0.515625, + "loss_num": 0.052734375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 454912008, + "step": 6801 + }, + { + "epoch": 0.7718581560283688, + "grad_norm": 27.129552841186523, + "learning_rate": 5e-05, + "loss": 1.087, + "num_input_tokens_seen": 454979784, + "step": 6802 + }, + { + "epoch": 0.7718581560283688, + "loss": 1.178459882736206, + "loss_ce": 0.0056083472445607185, + "loss_iou": 0.470703125, + "loss_num": 0.046630859375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 454979784, + "step": 6802 + }, + { + "epoch": 0.7719716312056738, + "grad_norm": 25.484668731689453, + "learning_rate": 5e-05, + "loss": 1.1454, + "num_input_tokens_seen": 455046324, + "step": 6803 + }, + { + "epoch": 0.7719716312056738, + "loss": 1.1959856748580933, + "loss_ce": 0.006776736583560705, + "loss_iou": 0.482421875, + "loss_num": 0.045166015625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 455046324, + "step": 6803 + }, + { + "epoch": 0.7720851063829787, + "grad_norm": 35.78071975708008, + "learning_rate": 5e-05, + "loss": 1.3554, + "num_input_tokens_seen": 455113352, + "step": 6804 + }, + { + "epoch": 0.7720851063829787, + "loss": 1.3156437873840332, + "loss_ce": 0.01144464686512947, + "loss_iou": 0.515625, + "loss_num": 0.05517578125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 455113352, + "step": 6804 + }, + { + "epoch": 0.7721985815602836, + "grad_norm": 29.710695266723633, + "learning_rate": 5e-05, + "loss": 1.1292, + "num_input_tokens_seen": 455180328, + "step": 6805 + }, + { + "epoch": 0.7721985815602836, + "loss": 0.994661808013916, + "loss_ce": 0.007906414568424225, + "loss_iou": 0.427734375, + "loss_num": 0.02587890625, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 455180328, + "step": 6805 + }, + { + "epoch": 0.7723120567375886, + "grad_norm": 27.002683639526367, + "learning_rate": 5e-05, + "loss": 1.0207, + "num_input_tokens_seen": 455246348, + "step": 6806 + }, + { + "epoch": 0.7723120567375886, + "loss": 1.0944643020629883, + "loss_ce": 0.005108849611133337, + "loss_iou": 0.44140625, + "loss_num": 0.04150390625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 455246348, + "step": 6806 + }, + { + "epoch": 0.7724255319148936, + "grad_norm": 33.39910888671875, + "learning_rate": 5e-05, + "loss": 1.3603, + "num_input_tokens_seen": 455313168, + "step": 6807 + }, + { + "epoch": 0.7724255319148936, + "loss": 1.3560914993286133, + "loss_ce": 0.009900130331516266, + "loss_iou": 0.515625, + "loss_num": 0.06396484375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 455313168, + "step": 6807 + }, + { + "epoch": 0.7725390070921986, + "grad_norm": 36.854339599609375, + "learning_rate": 5e-05, + "loss": 1.3077, + "num_input_tokens_seen": 455379840, + "step": 6808 + }, + { + "epoch": 0.7725390070921986, + "loss": 1.2668989896774292, + "loss_ce": 0.004691931884735823, + "loss_iou": 0.5234375, + "loss_num": 0.04296875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 455379840, + "step": 6808 + }, + { + "epoch": 0.7726524822695036, + "grad_norm": 32.43819808959961, + "learning_rate": 5e-05, + "loss": 1.2861, + "num_input_tokens_seen": 455447448, + "step": 6809 + }, + { + "epoch": 0.7726524822695036, + "loss": 1.295182466506958, + "loss_ce": 0.006119858473539352, + "loss_iou": 0.54296875, + "loss_num": 0.040283203125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 455447448, + "step": 6809 + }, + { + "epoch": 0.7727659574468085, + "grad_norm": 22.13330841064453, + "learning_rate": 5e-05, + "loss": 1.1467, + "num_input_tokens_seen": 455514192, + "step": 6810 + }, + { + "epoch": 0.7727659574468085, + "loss": 1.2800984382629395, + "loss_ce": 0.006660965736955404, + "loss_iou": 0.5234375, + "loss_num": 0.045166015625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 455514192, + "step": 6810 + }, + { + "epoch": 0.7728794326241135, + "grad_norm": 24.013957977294922, + "learning_rate": 5e-05, + "loss": 1.1738, + "num_input_tokens_seen": 455580756, + "step": 6811 + }, + { + "epoch": 0.7728794326241135, + "loss": 1.1776559352874756, + "loss_ce": 0.0067574698477983475, + "loss_iou": 0.494140625, + "loss_num": 0.036376953125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 455580756, + "step": 6811 + }, + { + "epoch": 0.7729929078014185, + "grad_norm": 39.466514587402344, + "learning_rate": 5e-05, + "loss": 1.3073, + "num_input_tokens_seen": 455647512, + "step": 6812 + }, + { + "epoch": 0.7729929078014185, + "loss": 1.4221636056900024, + "loss_ce": 0.0041947802528738976, + "loss_iou": 0.546875, + "loss_num": 0.064453125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 455647512, + "step": 6812 + }, + { + "epoch": 0.7731063829787234, + "grad_norm": 32.55644226074219, + "learning_rate": 5e-05, + "loss": 1.4174, + "num_input_tokens_seen": 455714988, + "step": 6813 + }, + { + "epoch": 0.7731063829787234, + "loss": 1.2896738052368164, + "loss_ce": 0.0025644428096711636, + "loss_iou": 0.53515625, + "loss_num": 0.04345703125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 455714988, + "step": 6813 + }, + { + "epoch": 0.7732198581560283, + "grad_norm": 17.539222717285156, + "learning_rate": 5e-05, + "loss": 1.0137, + "num_input_tokens_seen": 455781492, + "step": 6814 + }, + { + "epoch": 0.7732198581560283, + "loss": 0.8909873962402344, + "loss_ce": 0.008663153275847435, + "loss_iou": 0.3203125, + "loss_num": 0.04833984375, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 455781492, + "step": 6814 + }, + { + "epoch": 0.7733333333333333, + "grad_norm": 21.16798210144043, + "learning_rate": 5e-05, + "loss": 1.098, + "num_input_tokens_seen": 455848076, + "step": 6815 + }, + { + "epoch": 0.7733333333333333, + "loss": 1.0515382289886475, + "loss_ce": 0.006860480643808842, + "loss_iou": 0.40625, + "loss_num": 0.04638671875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 455848076, + "step": 6815 + }, + { + "epoch": 0.7734468085106383, + "grad_norm": 27.30133819580078, + "learning_rate": 5e-05, + "loss": 1.1106, + "num_input_tokens_seen": 455914560, + "step": 6816 + }, + { + "epoch": 0.7734468085106383, + "loss": 0.9925054907798767, + "loss_ce": 0.006665684282779694, + "loss_iou": 0.427734375, + "loss_num": 0.0257568359375, + "loss_xval": 0.984375, + "num_input_tokens_seen": 455914560, + "step": 6816 + }, + { + "epoch": 0.7735602836879433, + "grad_norm": 18.399066925048828, + "learning_rate": 5e-05, + "loss": 1.127, + "num_input_tokens_seen": 455980644, + "step": 6817 + }, + { + "epoch": 0.7735602836879433, + "loss": 1.1792850494384766, + "loss_ce": 0.005945159588009119, + "loss_iou": 0.486328125, + "loss_num": 0.040283203125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 455980644, + "step": 6817 + }, + { + "epoch": 0.7736737588652483, + "grad_norm": 22.88250160217285, + "learning_rate": 5e-05, + "loss": 1.2307, + "num_input_tokens_seen": 456048348, + "step": 6818 + }, + { + "epoch": 0.7736737588652483, + "loss": 1.1667182445526123, + "loss_ce": 0.007050210610032082, + "loss_iou": 0.466796875, + "loss_num": 0.045166015625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 456048348, + "step": 6818 + }, + { + "epoch": 0.7737872340425532, + "grad_norm": 35.27875518798828, + "learning_rate": 5e-05, + "loss": 1.1276, + "num_input_tokens_seen": 456115424, + "step": 6819 + }, + { + "epoch": 0.7737872340425532, + "loss": 1.310410976409912, + "loss_ce": 0.007188195362687111, + "loss_iou": 0.5703125, + "loss_num": 0.03173828125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 456115424, + "step": 6819 + }, + { + "epoch": 0.7739007092198582, + "grad_norm": 75.93000793457031, + "learning_rate": 5e-05, + "loss": 1.473, + "num_input_tokens_seen": 456182848, + "step": 6820 + }, + { + "epoch": 0.7739007092198582, + "loss": 1.4954825639724731, + "loss_ce": 0.006224771961569786, + "loss_iou": 0.625, + "loss_num": 0.048095703125, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 456182848, + "step": 6820 + }, + { + "epoch": 0.7740141843971631, + "grad_norm": 19.019014358520508, + "learning_rate": 5e-05, + "loss": 1.1496, + "num_input_tokens_seen": 456250748, + "step": 6821 + }, + { + "epoch": 0.7740141843971631, + "loss": 1.2311331033706665, + "loss_ce": 0.007988542318344116, + "loss_iou": 0.51171875, + "loss_num": 0.03955078125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 456250748, + "step": 6821 + }, + { + "epoch": 0.774127659574468, + "grad_norm": 22.460060119628906, + "learning_rate": 5e-05, + "loss": 1.3239, + "num_input_tokens_seen": 456317468, + "step": 6822 + }, + { + "epoch": 0.774127659574468, + "loss": 1.2350775003433228, + "loss_ce": 0.009003271348774433, + "loss_iou": 0.498046875, + "loss_num": 0.0458984375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 456317468, + "step": 6822 + }, + { + "epoch": 0.774241134751773, + "grad_norm": 42.389530181884766, + "learning_rate": 5e-05, + "loss": 1.1448, + "num_input_tokens_seen": 456383848, + "step": 6823 + }, + { + "epoch": 0.774241134751773, + "loss": 1.1632871627807617, + "loss_ce": 0.008502049371600151, + "loss_iou": 0.453125, + "loss_num": 0.049560546875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 456383848, + "step": 6823 + }, + { + "epoch": 0.774354609929078, + "grad_norm": 29.904661178588867, + "learning_rate": 5e-05, + "loss": 1.2096, + "num_input_tokens_seen": 456451696, + "step": 6824 + }, + { + "epoch": 0.774354609929078, + "loss": 1.2823927402496338, + "loss_ce": 0.005049010273069143, + "loss_iou": 0.5390625, + "loss_num": 0.0390625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 456451696, + "step": 6824 + }, + { + "epoch": 0.774468085106383, + "grad_norm": 42.52508544921875, + "learning_rate": 5e-05, + "loss": 1.1004, + "num_input_tokens_seen": 456519196, + "step": 6825 + }, + { + "epoch": 0.774468085106383, + "loss": 1.0511757135391235, + "loss_ce": 0.006742155645042658, + "loss_iou": 0.427734375, + "loss_num": 0.03759765625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 456519196, + "step": 6825 + }, + { + "epoch": 0.774581560283688, + "grad_norm": 29.868064880371094, + "learning_rate": 5e-05, + "loss": 1.3302, + "num_input_tokens_seen": 456586004, + "step": 6826 + }, + { + "epoch": 0.774581560283688, + "loss": 1.3897521495819092, + "loss_ce": 0.009869387373328209, + "loss_iou": 0.5078125, + "loss_num": 0.0732421875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 456586004, + "step": 6826 + }, + { + "epoch": 0.7746950354609929, + "grad_norm": 24.19056510925293, + "learning_rate": 5e-05, + "loss": 1.1627, + "num_input_tokens_seen": 456652624, + "step": 6827 + }, + { + "epoch": 0.7746950354609929, + "loss": 1.1151885986328125, + "loss_ce": 0.007278429809957743, + "loss_iou": 0.474609375, + "loss_num": 0.031982421875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 456652624, + "step": 6827 + }, + { + "epoch": 0.7748085106382979, + "grad_norm": 25.347198486328125, + "learning_rate": 5e-05, + "loss": 1.2475, + "num_input_tokens_seen": 456720752, + "step": 6828 + }, + { + "epoch": 0.7748085106382979, + "loss": 1.2290356159210205, + "loss_ce": 0.003937903326004744, + "loss_iou": 0.53515625, + "loss_num": 0.03125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 456720752, + "step": 6828 + }, + { + "epoch": 0.7749219858156028, + "grad_norm": 31.703033447265625, + "learning_rate": 5e-05, + "loss": 1.0938, + "num_input_tokens_seen": 456788168, + "step": 6829 + }, + { + "epoch": 0.7749219858156028, + "loss": 1.2311265468597412, + "loss_ce": 0.008958645164966583, + "loss_iou": 0.5, + "loss_num": 0.04443359375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 456788168, + "step": 6829 + }, + { + "epoch": 0.7750354609929078, + "grad_norm": 36.03573989868164, + "learning_rate": 5e-05, + "loss": 1.3004, + "num_input_tokens_seen": 456854468, + "step": 6830 + }, + { + "epoch": 0.7750354609929078, + "loss": 1.1750463247299194, + "loss_ce": 0.003415541723370552, + "loss_iou": 0.51171875, + "loss_num": 0.0303955078125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 456854468, + "step": 6830 + }, + { + "epoch": 0.7751489361702127, + "grad_norm": 31.798385620117188, + "learning_rate": 5e-05, + "loss": 1.0424, + "num_input_tokens_seen": 456921032, + "step": 6831 + }, + { + "epoch": 0.7751489361702127, + "loss": 1.036453366279602, + "loss_ce": 0.007400620728731155, + "loss_iou": 0.4296875, + "loss_num": 0.0341796875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 456921032, + "step": 6831 + }, + { + "epoch": 0.7752624113475177, + "grad_norm": 29.697956085205078, + "learning_rate": 5e-05, + "loss": 1.4489, + "num_input_tokens_seen": 456988244, + "step": 6832 + }, + { + "epoch": 0.7752624113475177, + "loss": 1.449838399887085, + "loss_ce": 0.005014199297875166, + "loss_iou": 0.53515625, + "loss_num": 0.0751953125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 456988244, + "step": 6832 + }, + { + "epoch": 0.7753758865248227, + "grad_norm": 30.193212509155273, + "learning_rate": 5e-05, + "loss": 1.2007, + "num_input_tokens_seen": 457055364, + "step": 6833 + }, + { + "epoch": 0.7753758865248227, + "loss": 0.994057834148407, + "loss_ce": 0.006478495895862579, + "loss_iou": 0.40625, + "loss_num": 0.03466796875, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 457055364, + "step": 6833 + }, + { + "epoch": 0.7754893617021277, + "grad_norm": 30.403759002685547, + "learning_rate": 5e-05, + "loss": 1.2717, + "num_input_tokens_seen": 457122928, + "step": 6834 + }, + { + "epoch": 0.7754893617021277, + "loss": 1.1457750797271729, + "loss_ce": 0.002220351481810212, + "loss_iou": 0.48046875, + "loss_num": 0.036376953125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 457122928, + "step": 6834 + }, + { + "epoch": 0.7756028368794327, + "grad_norm": 40.489688873291016, + "learning_rate": 5e-05, + "loss": 1.0916, + "num_input_tokens_seen": 457189008, + "step": 6835 + }, + { + "epoch": 0.7756028368794327, + "loss": 0.9680336713790894, + "loss_ce": 0.009537594392895699, + "loss_iou": 0.361328125, + "loss_num": 0.046630859375, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 457189008, + "step": 6835 + }, + { + "epoch": 0.7757163120567376, + "grad_norm": 37.35228729248047, + "learning_rate": 5e-05, + "loss": 1.3151, + "num_input_tokens_seen": 457256224, + "step": 6836 + }, + { + "epoch": 0.7757163120567376, + "loss": 1.3668489456176758, + "loss_ce": 0.0035677445121109486, + "loss_iou": 0.55859375, + "loss_num": 0.048583984375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 457256224, + "step": 6836 + }, + { + "epoch": 0.7758297872340425, + "grad_norm": 14.760811805725098, + "learning_rate": 5e-05, + "loss": 1.1731, + "num_input_tokens_seen": 457323780, + "step": 6837 + }, + { + "epoch": 0.7758297872340425, + "loss": 1.1865360736846924, + "loss_ce": 0.009045921266078949, + "loss_iou": 0.490234375, + "loss_num": 0.039794921875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 457323780, + "step": 6837 + }, + { + "epoch": 0.7759432624113475, + "grad_norm": 23.28983497619629, + "learning_rate": 5e-05, + "loss": 1.1693, + "num_input_tokens_seen": 457391528, + "step": 6838 + }, + { + "epoch": 0.7759432624113475, + "loss": 1.023503303527832, + "loss_ce": 0.003972022794187069, + "loss_iou": 0.443359375, + "loss_num": 0.0267333984375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 457391528, + "step": 6838 + }, + { + "epoch": 0.7760567375886525, + "grad_norm": 31.498910903930664, + "learning_rate": 5e-05, + "loss": 1.2569, + "num_input_tokens_seen": 457459100, + "step": 6839 + }, + { + "epoch": 0.7760567375886525, + "loss": 1.2958900928497314, + "loss_ce": 0.007315846625715494, + "loss_iou": 0.490234375, + "loss_num": 0.061279296875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 457459100, + "step": 6839 + }, + { + "epoch": 0.7761702127659574, + "grad_norm": 25.631397247314453, + "learning_rate": 5e-05, + "loss": 1.1602, + "num_input_tokens_seen": 457527004, + "step": 6840 + }, + { + "epoch": 0.7761702127659574, + "loss": 1.2556395530700684, + "loss_ce": 0.005151208024471998, + "loss_iou": 0.4921875, + "loss_num": 0.052978515625, + "loss_xval": 1.25, + "num_input_tokens_seen": 457527004, + "step": 6840 + }, + { + "epoch": 0.7762836879432624, + "grad_norm": 27.23513412475586, + "learning_rate": 5e-05, + "loss": 1.4471, + "num_input_tokens_seen": 457594636, + "step": 6841 + }, + { + "epoch": 0.7762836879432624, + "loss": 1.5031579732894897, + "loss_ce": 0.010482155717909336, + "loss_iou": 0.59375, + "loss_num": 0.060791015625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 457594636, + "step": 6841 + }, + { + "epoch": 0.7763971631205674, + "grad_norm": 40.578941345214844, + "learning_rate": 5e-05, + "loss": 1.157, + "num_input_tokens_seen": 457661476, + "step": 6842 + }, + { + "epoch": 0.7763971631205674, + "loss": 1.0606162548065186, + "loss_ce": 0.0034872766118496656, + "loss_iou": 0.427734375, + "loss_num": 0.040771484375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 457661476, + "step": 6842 + }, + { + "epoch": 0.7765106382978724, + "grad_norm": 26.51127815246582, + "learning_rate": 5e-05, + "loss": 1.4327, + "num_input_tokens_seen": 457728696, + "step": 6843 + }, + { + "epoch": 0.7765106382978724, + "loss": 1.4330023527145386, + "loss_ce": 0.006244550459086895, + "loss_iou": 0.58984375, + "loss_num": 0.048828125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 457728696, + "step": 6843 + }, + { + "epoch": 0.7766241134751773, + "grad_norm": 27.51950454711914, + "learning_rate": 5e-05, + "loss": 1.379, + "num_input_tokens_seen": 457796040, + "step": 6844 + }, + { + "epoch": 0.7766241134751773, + "loss": 1.3933475017547607, + "loss_ce": 0.005652248859405518, + "loss_iou": 0.5390625, + "loss_num": 0.062255859375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 457796040, + "step": 6844 + }, + { + "epoch": 0.7767375886524823, + "grad_norm": 46.3087158203125, + "learning_rate": 5e-05, + "loss": 1.3589, + "num_input_tokens_seen": 457862840, + "step": 6845 + }, + { + "epoch": 0.7767375886524823, + "loss": 1.4455609321594238, + "loss_ce": 0.008060905151069164, + "loss_iou": 0.59765625, + "loss_num": 0.048828125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 457862840, + "step": 6845 + }, + { + "epoch": 0.7768510638297872, + "grad_norm": 34.25402069091797, + "learning_rate": 5e-05, + "loss": 1.4016, + "num_input_tokens_seen": 457930736, + "step": 6846 + }, + { + "epoch": 0.7768510638297872, + "loss": 1.4513437747955322, + "loss_ce": 0.009937571361660957, + "loss_iou": 0.546875, + "loss_num": 0.0693359375, + "loss_xval": 1.4375, + "num_input_tokens_seen": 457930736, + "step": 6846 + }, + { + "epoch": 0.7769645390070922, + "grad_norm": 20.52100372314453, + "learning_rate": 5e-05, + "loss": 1.0406, + "num_input_tokens_seen": 457998164, + "step": 6847 + }, + { + "epoch": 0.7769645390070922, + "loss": 1.010272741317749, + "loss_ce": 0.007343036588281393, + "loss_iou": 0.447265625, + "loss_num": 0.021728515625, + "loss_xval": 1.0, + "num_input_tokens_seen": 457998164, + "step": 6847 + }, + { + "epoch": 0.7770780141843971, + "grad_norm": 21.212718963623047, + "learning_rate": 5e-05, + "loss": 1.1684, + "num_input_tokens_seen": 458064984, + "step": 6848 + }, + { + "epoch": 0.7770780141843971, + "loss": 1.2097967863082886, + "loss_ce": 0.0061834342777729034, + "loss_iou": 0.486328125, + "loss_num": 0.046630859375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 458064984, + "step": 6848 + }, + { + "epoch": 0.7771914893617021, + "grad_norm": 44.803558349609375, + "learning_rate": 5e-05, + "loss": 1.2656, + "num_input_tokens_seen": 458131372, + "step": 6849 + }, + { + "epoch": 0.7771914893617021, + "loss": 1.25520920753479, + "loss_ce": 0.0049650948494672775, + "loss_iou": 0.484375, + "loss_num": 0.05615234375, + "loss_xval": 1.25, + "num_input_tokens_seen": 458131372, + "step": 6849 + }, + { + "epoch": 0.7773049645390071, + "grad_norm": 36.67531967163086, + "learning_rate": 5e-05, + "loss": 1.3079, + "num_input_tokens_seen": 458197896, + "step": 6850 + }, + { + "epoch": 0.7773049645390071, + "loss": 1.3739395141601562, + "loss_ce": 0.004310651682317257, + "loss_iou": 0.5703125, + "loss_num": 0.046142578125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 458197896, + "step": 6850 + }, + { + "epoch": 0.7774184397163121, + "grad_norm": 26.39560317993164, + "learning_rate": 5e-05, + "loss": 1.1252, + "num_input_tokens_seen": 458265228, + "step": 6851 + }, + { + "epoch": 0.7774184397163121, + "loss": 1.1809816360473633, + "loss_ce": 0.005688754376024008, + "loss_iou": 0.494140625, + "loss_num": 0.03759765625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 458265228, + "step": 6851 + }, + { + "epoch": 0.7775319148936171, + "grad_norm": 30.431194305419922, + "learning_rate": 5e-05, + "loss": 1.1861, + "num_input_tokens_seen": 458330680, + "step": 6852 + }, + { + "epoch": 0.7775319148936171, + "loss": 1.4075849056243896, + "loss_ce": 0.003776284633204341, + "loss_iou": 0.58984375, + "loss_num": 0.045166015625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 458330680, + "step": 6852 + }, + { + "epoch": 0.777645390070922, + "grad_norm": 14.536852836608887, + "learning_rate": 5e-05, + "loss": 1.2915, + "num_input_tokens_seen": 458398140, + "step": 6853 + }, + { + "epoch": 0.777645390070922, + "loss": 1.2969067096710205, + "loss_ce": 0.008820842951536179, + "loss_iou": 0.48046875, + "loss_num": 0.06494140625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 458398140, + "step": 6853 + }, + { + "epoch": 0.7777588652482269, + "grad_norm": 17.034841537475586, + "learning_rate": 5e-05, + "loss": 1.2668, + "num_input_tokens_seen": 458466160, + "step": 6854 + }, + { + "epoch": 0.7777588652482269, + "loss": 1.2671974897384644, + "loss_ce": 0.009384971112012863, + "loss_iou": 0.494140625, + "loss_num": 0.053955078125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 458466160, + "step": 6854 + }, + { + "epoch": 0.7778723404255319, + "grad_norm": 48.58626174926758, + "learning_rate": 5e-05, + "loss": 1.2091, + "num_input_tokens_seen": 458533192, + "step": 6855 + }, + { + "epoch": 0.7778723404255319, + "loss": 1.3999807834625244, + "loss_ce": 0.011308890767395496, + "loss_iou": 0.51171875, + "loss_num": 0.0732421875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 458533192, + "step": 6855 + }, + { + "epoch": 0.7779858156028369, + "grad_norm": 36.11088180541992, + "learning_rate": 5e-05, + "loss": 1.3609, + "num_input_tokens_seen": 458599940, + "step": 6856 + }, + { + "epoch": 0.7779858156028369, + "loss": 1.1772682666778564, + "loss_ce": 0.00783467199653387, + "loss_iou": 0.486328125, + "loss_num": 0.03955078125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 458599940, + "step": 6856 + }, + { + "epoch": 0.7780992907801418, + "grad_norm": 14.436810493469238, + "learning_rate": 5e-05, + "loss": 1.0105, + "num_input_tokens_seen": 458665924, + "step": 6857 + }, + { + "epoch": 0.7780992907801418, + "loss": 0.8302362561225891, + "loss_ce": 0.00794010516256094, + "loss_iou": 0.33203125, + "loss_num": 0.031982421875, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 458665924, + "step": 6857 + }, + { + "epoch": 0.7782127659574468, + "grad_norm": 19.170061111450195, + "learning_rate": 5e-05, + "loss": 1.2475, + "num_input_tokens_seen": 458733096, + "step": 6858 + }, + { + "epoch": 0.7782127659574468, + "loss": 1.3600001335144043, + "loss_ce": 0.005507830996066332, + "loss_iou": 0.5390625, + "loss_num": 0.05517578125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 458733096, + "step": 6858 + }, + { + "epoch": 0.7783262411347518, + "grad_norm": 27.301156997680664, + "learning_rate": 5e-05, + "loss": 1.0308, + "num_input_tokens_seen": 458799972, + "step": 6859 + }, + { + "epoch": 0.7783262411347518, + "loss": 0.8971218466758728, + "loss_ce": 0.00747340964153409, + "loss_iou": 0.359375, + "loss_num": 0.034423828125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 458799972, + "step": 6859 + }, + { + "epoch": 0.7784397163120568, + "grad_norm": 31.77831268310547, + "learning_rate": 5e-05, + "loss": 1.3694, + "num_input_tokens_seen": 458866396, + "step": 6860 + }, + { + "epoch": 0.7784397163120568, + "loss": 1.5099478960037231, + "loss_ce": 0.007018248550593853, + "loss_iou": 0.58984375, + "loss_num": 0.0654296875, + "loss_xval": 1.5, + "num_input_tokens_seen": 458866396, + "step": 6860 + }, + { + "epoch": 0.7785531914893618, + "grad_norm": 60.04932403564453, + "learning_rate": 5e-05, + "loss": 1.1604, + "num_input_tokens_seen": 458933120, + "step": 6861 + }, + { + "epoch": 0.7785531914893618, + "loss": 1.165856122970581, + "loss_ce": 0.007652954198420048, + "loss_iou": 0.4765625, + "loss_num": 0.04150390625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 458933120, + "step": 6861 + }, + { + "epoch": 0.7786666666666666, + "grad_norm": 31.405498504638672, + "learning_rate": 5e-05, + "loss": 1.3952, + "num_input_tokens_seen": 458999452, + "step": 6862 + }, + { + "epoch": 0.7786666666666666, + "loss": 1.5967388153076172, + "loss_ce": 0.007871687412261963, + "loss_iou": 0.625, + "loss_num": 0.0673828125, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 458999452, + "step": 6862 + }, + { + "epoch": 0.7787801418439716, + "grad_norm": 25.171667098999023, + "learning_rate": 5e-05, + "loss": 1.3256, + "num_input_tokens_seen": 459065888, + "step": 6863 + }, + { + "epoch": 0.7787801418439716, + "loss": 1.3202879428863525, + "loss_ce": 0.010229261592030525, + "loss_iou": 0.5390625, + "loss_num": 0.047119140625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 459065888, + "step": 6863 + }, + { + "epoch": 0.7788936170212766, + "grad_norm": 26.387760162353516, + "learning_rate": 5e-05, + "loss": 1.1564, + "num_input_tokens_seen": 459132820, + "step": 6864 + }, + { + "epoch": 0.7788936170212766, + "loss": 1.217691421508789, + "loss_ce": 0.018961003050208092, + "loss_iou": 0.484375, + "loss_num": 0.045654296875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 459132820, + "step": 6864 + }, + { + "epoch": 0.7790070921985816, + "grad_norm": 36.186065673828125, + "learning_rate": 5e-05, + "loss": 1.2552, + "num_input_tokens_seen": 459201300, + "step": 6865 + }, + { + "epoch": 0.7790070921985816, + "loss": 1.3237041234970093, + "loss_ce": 0.005832964088767767, + "loss_iou": 0.5546875, + "loss_num": 0.041748046875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 459201300, + "step": 6865 + }, + { + "epoch": 0.7791205673758865, + "grad_norm": 29.87571144104004, + "learning_rate": 5e-05, + "loss": 1.44, + "num_input_tokens_seen": 459267852, + "step": 6866 + }, + { + "epoch": 0.7791205673758865, + "loss": 1.1906486749649048, + "loss_ce": 0.007054949179291725, + "loss_iou": 0.51171875, + "loss_num": 0.03173828125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 459267852, + "step": 6866 + }, + { + "epoch": 0.7792340425531915, + "grad_norm": 12.01294231414795, + "learning_rate": 5e-05, + "loss": 1.3033, + "num_input_tokens_seen": 459335168, + "step": 6867 + }, + { + "epoch": 0.7792340425531915, + "loss": 1.1869431734085083, + "loss_ce": 0.004325938411056995, + "loss_iou": 0.48828125, + "loss_num": 0.041015625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 459335168, + "step": 6867 + }, + { + "epoch": 0.7793475177304965, + "grad_norm": 148.3489227294922, + "learning_rate": 5e-05, + "loss": 1.2312, + "num_input_tokens_seen": 459401708, + "step": 6868 + }, + { + "epoch": 0.7793475177304965, + "loss": 1.3170344829559326, + "loss_ce": 0.005999356973916292, + "loss_iou": 0.49609375, + "loss_num": 0.0634765625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 459401708, + "step": 6868 + }, + { + "epoch": 0.7794609929078015, + "grad_norm": 21.956876754760742, + "learning_rate": 5e-05, + "loss": 1.2696, + "num_input_tokens_seen": 459466332, + "step": 6869 + }, + { + "epoch": 0.7794609929078015, + "loss": 1.2499380111694336, + "loss_ce": 0.004820763599127531, + "loss_iou": 0.49609375, + "loss_num": 0.051025390625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 459466332, + "step": 6869 + }, + { + "epoch": 0.7795744680851063, + "grad_norm": 26.93648338317871, + "learning_rate": 5e-05, + "loss": 1.1903, + "num_input_tokens_seen": 459533296, + "step": 6870 + }, + { + "epoch": 0.7795744680851063, + "loss": 1.0737874507904053, + "loss_ce": 0.00786950346082449, + "loss_iou": 0.40625, + "loss_num": 0.051025390625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 459533296, + "step": 6870 + }, + { + "epoch": 0.7796879432624113, + "grad_norm": 26.476125717163086, + "learning_rate": 5e-05, + "loss": 1.153, + "num_input_tokens_seen": 459600016, + "step": 6871 + }, + { + "epoch": 0.7796879432624113, + "loss": 0.9094334840774536, + "loss_ce": 0.0056248875334858894, + "loss_iou": 0.3828125, + "loss_num": 0.0279541015625, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 459600016, + "step": 6871 + }, + { + "epoch": 0.7798014184397163, + "grad_norm": 42.75954055786133, + "learning_rate": 5e-05, + "loss": 1.1185, + "num_input_tokens_seen": 459668004, + "step": 6872 + }, + { + "epoch": 0.7798014184397163, + "loss": 1.1451399326324463, + "loss_ce": 0.005979737266898155, + "loss_iou": 0.484375, + "loss_num": 0.0341796875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 459668004, + "step": 6872 + }, + { + "epoch": 0.7799148936170213, + "grad_norm": 32.31513595581055, + "learning_rate": 5e-05, + "loss": 1.3714, + "num_input_tokens_seen": 459735140, + "step": 6873 + }, + { + "epoch": 0.7799148936170213, + "loss": 1.4529528617858887, + "loss_ce": 0.0056871818378567696, + "loss_iou": 0.5546875, + "loss_num": 0.0673828125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 459735140, + "step": 6873 + }, + { + "epoch": 0.7800283687943262, + "grad_norm": 21.835660934448242, + "learning_rate": 5e-05, + "loss": 1.2045, + "num_input_tokens_seen": 459801784, + "step": 6874 + }, + { + "epoch": 0.7800283687943262, + "loss": 1.3984651565551758, + "loss_ce": 0.009304940700531006, + "loss_iou": 0.5234375, + "loss_num": 0.0673828125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 459801784, + "step": 6874 + }, + { + "epoch": 0.7801418439716312, + "grad_norm": 22.246532440185547, + "learning_rate": 5e-05, + "loss": 0.9455, + "num_input_tokens_seen": 459868848, + "step": 6875 + }, + { + "epoch": 0.7801418439716312, + "loss": 1.0276943445205688, + "loss_ce": 0.005233433097600937, + "loss_iou": 0.427734375, + "loss_num": 0.03271484375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 459868848, + "step": 6875 + }, + { + "epoch": 0.7802553191489362, + "grad_norm": 49.863704681396484, + "learning_rate": 5e-05, + "loss": 1.6146, + "num_input_tokens_seen": 459935664, + "step": 6876 + }, + { + "epoch": 0.7802553191489362, + "loss": 1.6644370555877686, + "loss_ce": 0.006233927793800831, + "loss_iou": 0.6484375, + "loss_num": 0.07177734375, + "loss_xval": 1.65625, + "num_input_tokens_seen": 459935664, + "step": 6876 + }, + { + "epoch": 0.7803687943262412, + "grad_norm": 59.153507232666016, + "learning_rate": 5e-05, + "loss": 1.5374, + "num_input_tokens_seen": 460002356, + "step": 6877 + }, + { + "epoch": 0.7803687943262412, + "loss": 1.891052007675171, + "loss_ce": 0.00823947787284851, + "loss_iou": 0.75390625, + "loss_num": 0.07470703125, + "loss_xval": 1.8828125, + "num_input_tokens_seen": 460002356, + "step": 6877 + }, + { + "epoch": 0.780482269503546, + "grad_norm": 37.2530517578125, + "learning_rate": 5e-05, + "loss": 0.9977, + "num_input_tokens_seen": 460069916, + "step": 6878 + }, + { + "epoch": 0.780482269503546, + "loss": 0.8728225827217102, + "loss_ce": 0.004170228727161884, + "loss_iou": 0.39453125, + "loss_num": 0.0157470703125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 460069916, + "step": 6878 + }, + { + "epoch": 0.780595744680851, + "grad_norm": 22.588821411132812, + "learning_rate": 5e-05, + "loss": 1.3401, + "num_input_tokens_seen": 460136300, + "step": 6879 + }, + { + "epoch": 0.780595744680851, + "loss": 1.2680293321609497, + "loss_ce": 0.008751987479627132, + "loss_iou": 0.515625, + "loss_num": 0.044921875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 460136300, + "step": 6879 + }, + { + "epoch": 0.780709219858156, + "grad_norm": 28.289838790893555, + "learning_rate": 5e-05, + "loss": 1.1292, + "num_input_tokens_seen": 460202756, + "step": 6880 + }, + { + "epoch": 0.780709219858156, + "loss": 1.1574864387512207, + "loss_ce": 0.011978578753769398, + "loss_iou": 0.4765625, + "loss_num": 0.0380859375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 460202756, + "step": 6880 + }, + { + "epoch": 0.780822695035461, + "grad_norm": 30.847021102905273, + "learning_rate": 5e-05, + "loss": 1.1858, + "num_input_tokens_seen": 460268364, + "step": 6881 + }, + { + "epoch": 0.780822695035461, + "loss": 1.1210553646087646, + "loss_ce": 0.0037457169964909554, + "loss_iou": 0.4609375, + "loss_num": 0.039306640625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 460268364, + "step": 6881 + }, + { + "epoch": 0.780936170212766, + "grad_norm": 48.35699462890625, + "learning_rate": 5e-05, + "loss": 1.4131, + "num_input_tokens_seen": 460334604, + "step": 6882 + }, + { + "epoch": 0.780936170212766, + "loss": 1.4187235832214355, + "loss_ce": 0.005637696944177151, + "loss_iou": 0.5546875, + "loss_num": 0.060791015625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 460334604, + "step": 6882 + }, + { + "epoch": 0.7810496453900709, + "grad_norm": 41.699527740478516, + "learning_rate": 5e-05, + "loss": 1.388, + "num_input_tokens_seen": 460402544, + "step": 6883 + }, + { + "epoch": 0.7810496453900709, + "loss": 1.345398187637329, + "loss_ce": 0.004089638590812683, + "loss_iou": 0.58203125, + "loss_num": 0.03515625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 460402544, + "step": 6883 + }, + { + "epoch": 0.7811631205673759, + "grad_norm": 18.074533462524414, + "learning_rate": 5e-05, + "loss": 1.0225, + "num_input_tokens_seen": 460470092, + "step": 6884 + }, + { + "epoch": 0.7811631205673759, + "loss": 1.0328562259674072, + "loss_ce": 0.004047645255923271, + "loss_iou": 0.396484375, + "loss_num": 0.047607421875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 460470092, + "step": 6884 + }, + { + "epoch": 0.7812765957446809, + "grad_norm": 18.0989933013916, + "learning_rate": 5e-05, + "loss": 1.3063, + "num_input_tokens_seen": 460537324, + "step": 6885 + }, + { + "epoch": 0.7812765957446809, + "loss": 1.2746888399124146, + "loss_ce": 0.0032044807448983192, + "loss_iou": 0.52734375, + "loss_num": 0.04248046875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 460537324, + "step": 6885 + }, + { + "epoch": 0.7813900709219859, + "grad_norm": 35.194190979003906, + "learning_rate": 5e-05, + "loss": 0.9842, + "num_input_tokens_seen": 460604192, + "step": 6886 + }, + { + "epoch": 0.7813900709219859, + "loss": 0.930612325668335, + "loss_ce": 0.006295960396528244, + "loss_iou": 0.39453125, + "loss_num": 0.0267333984375, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 460604192, + "step": 6886 + }, + { + "epoch": 0.7815035460992907, + "grad_norm": 56.571292877197266, + "learning_rate": 5e-05, + "loss": 1.0887, + "num_input_tokens_seen": 460670476, + "step": 6887 + }, + { + "epoch": 0.7815035460992907, + "loss": 1.0603852272033691, + "loss_ce": 0.006186072714626789, + "loss_iou": 0.46484375, + "loss_num": 0.0247802734375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 460670476, + "step": 6887 + }, + { + "epoch": 0.7816170212765957, + "grad_norm": 38.57762908935547, + "learning_rate": 5e-05, + "loss": 1.2811, + "num_input_tokens_seen": 460737852, + "step": 6888 + }, + { + "epoch": 0.7816170212765957, + "loss": 1.0749070644378662, + "loss_ce": 0.005571125540882349, + "loss_iou": 0.4609375, + "loss_num": 0.0296630859375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 460737852, + "step": 6888 + }, + { + "epoch": 0.7817304964539007, + "grad_norm": 31.74656105041504, + "learning_rate": 5e-05, + "loss": 1.2594, + "num_input_tokens_seen": 460804624, + "step": 6889 + }, + { + "epoch": 0.7817304964539007, + "loss": 1.1787046194076538, + "loss_ce": 0.0057309456169605255, + "loss_iou": 0.5, + "loss_num": 0.03466796875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 460804624, + "step": 6889 + }, + { + "epoch": 0.7818439716312057, + "grad_norm": 21.622312545776367, + "learning_rate": 5e-05, + "loss": 1.0848, + "num_input_tokens_seen": 460871584, + "step": 6890 + }, + { + "epoch": 0.7818439716312057, + "loss": 1.0433082580566406, + "loss_ce": 0.004520457237958908, + "loss_iou": 0.43359375, + "loss_num": 0.0341796875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 460871584, + "step": 6890 + }, + { + "epoch": 0.7819574468085106, + "grad_norm": 23.641077041625977, + "learning_rate": 5e-05, + "loss": 1.1065, + "num_input_tokens_seen": 460939500, + "step": 6891 + }, + { + "epoch": 0.7819574468085106, + "loss": 1.0955109596252441, + "loss_ce": 0.0017608776688575745, + "loss_iou": 0.46484375, + "loss_num": 0.0322265625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 460939500, + "step": 6891 + }, + { + "epoch": 0.7820709219858156, + "grad_norm": 28.350353240966797, + "learning_rate": 5e-05, + "loss": 1.1067, + "num_input_tokens_seen": 461006340, + "step": 6892 + }, + { + "epoch": 0.7820709219858156, + "loss": 1.1325671672821045, + "loss_ce": 0.006834758445620537, + "loss_iou": 0.455078125, + "loss_num": 0.042724609375, + "loss_xval": 1.125, + "num_input_tokens_seen": 461006340, + "step": 6892 + }, + { + "epoch": 0.7821843971631206, + "grad_norm": 20.175262451171875, + "learning_rate": 5e-05, + "loss": 1.1888, + "num_input_tokens_seen": 461073228, + "step": 6893 + }, + { + "epoch": 0.7821843971631206, + "loss": 1.1873066425323486, + "loss_ce": 0.005177770741283894, + "loss_iou": 0.455078125, + "loss_num": 0.054443359375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 461073228, + "step": 6893 + }, + { + "epoch": 0.7822978723404256, + "grad_norm": 38.77226257324219, + "learning_rate": 5e-05, + "loss": 1.1395, + "num_input_tokens_seen": 461140684, + "step": 6894 + }, + { + "epoch": 0.7822978723404256, + "loss": 1.1328554153442383, + "loss_ce": 0.005414040759205818, + "loss_iou": 0.453125, + "loss_num": 0.04443359375, + "loss_xval": 1.125, + "num_input_tokens_seen": 461140684, + "step": 6894 + }, + { + "epoch": 0.7824113475177304, + "grad_norm": 38.545570373535156, + "learning_rate": 5e-05, + "loss": 1.309, + "num_input_tokens_seen": 461208056, + "step": 6895 + }, + { + "epoch": 0.7824113475177304, + "loss": 1.2861828804016113, + "loss_ce": 0.0054211970418691635, + "loss_iou": 0.56640625, + "loss_num": 0.0296630859375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 461208056, + "step": 6895 + }, + { + "epoch": 0.7825248226950354, + "grad_norm": 30.436445236206055, + "learning_rate": 5e-05, + "loss": 1.0706, + "num_input_tokens_seen": 461275060, + "step": 6896 + }, + { + "epoch": 0.7825248226950354, + "loss": 1.0588792562484741, + "loss_ce": 0.010295300744473934, + "loss_iou": 0.4140625, + "loss_num": 0.0439453125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 461275060, + "step": 6896 + }, + { + "epoch": 0.7826382978723404, + "grad_norm": 29.449174880981445, + "learning_rate": 5e-05, + "loss": 1.3712, + "num_input_tokens_seen": 461342660, + "step": 6897 + }, + { + "epoch": 0.7826382978723404, + "loss": 1.5027856826782227, + "loss_ce": 0.008156870491802692, + "loss_iou": 0.625, + "loss_num": 0.04931640625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 461342660, + "step": 6897 + }, + { + "epoch": 0.7827517730496454, + "grad_norm": 17.57137107849121, + "learning_rate": 5e-05, + "loss": 0.9853, + "num_input_tokens_seen": 461409480, + "step": 6898 + }, + { + "epoch": 0.7827517730496454, + "loss": 0.9909243583679199, + "loss_ce": 0.004596184007823467, + "loss_iou": 0.421875, + "loss_num": 0.02880859375, + "loss_xval": 0.984375, + "num_input_tokens_seen": 461409480, + "step": 6898 + }, + { + "epoch": 0.7828652482269504, + "grad_norm": 29.063318252563477, + "learning_rate": 5e-05, + "loss": 1.0776, + "num_input_tokens_seen": 461476740, + "step": 6899 + }, + { + "epoch": 0.7828652482269504, + "loss": 0.9690120220184326, + "loss_ce": 0.005633146036416292, + "loss_iou": 0.41015625, + "loss_num": 0.0289306640625, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 461476740, + "step": 6899 + }, + { + "epoch": 0.7829787234042553, + "grad_norm": 59.27379608154297, + "learning_rate": 5e-05, + "loss": 1.0724, + "num_input_tokens_seen": 461543440, + "step": 6900 + }, + { + "epoch": 0.7829787234042553, + "loss": 1.0390313863754272, + "loss_ce": 0.005828278139233589, + "loss_iou": 0.43359375, + "loss_num": 0.033447265625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 461543440, + "step": 6900 + }, + { + "epoch": 0.7830921985815603, + "grad_norm": 49.276329040527344, + "learning_rate": 5e-05, + "loss": 1.358, + "num_input_tokens_seen": 461610660, + "step": 6901 + }, + { + "epoch": 0.7830921985815603, + "loss": 1.314157247543335, + "loss_ce": 0.008493147790431976, + "loss_iou": 0.546875, + "loss_num": 0.042724609375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 461610660, + "step": 6901 + }, + { + "epoch": 0.7832056737588653, + "grad_norm": 20.37112045288086, + "learning_rate": 5e-05, + "loss": 1.0857, + "num_input_tokens_seen": 461677860, + "step": 6902 + }, + { + "epoch": 0.7832056737588653, + "loss": 1.0806961059570312, + "loss_ce": 0.007698021829128265, + "loss_iou": 0.458984375, + "loss_num": 0.031005859375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 461677860, + "step": 6902 + }, + { + "epoch": 0.7833191489361702, + "grad_norm": 11.879251480102539, + "learning_rate": 5e-05, + "loss": 1.2168, + "num_input_tokens_seen": 461744956, + "step": 6903 + }, + { + "epoch": 0.7833191489361702, + "loss": 1.265828251838684, + "loss_ce": 0.006062676198780537, + "loss_iou": 0.486328125, + "loss_num": 0.057861328125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 461744956, + "step": 6903 + }, + { + "epoch": 0.7834326241134751, + "grad_norm": 121.49528503417969, + "learning_rate": 5e-05, + "loss": 1.1849, + "num_input_tokens_seen": 461812600, + "step": 6904 + }, + { + "epoch": 0.7834326241134751, + "loss": 1.2983407974243164, + "loss_ce": 0.004883832298219204, + "loss_iou": 0.50390625, + "loss_num": 0.057861328125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 461812600, + "step": 6904 + }, + { + "epoch": 0.7835460992907801, + "grad_norm": 23.733291625976562, + "learning_rate": 5e-05, + "loss": 1.354, + "num_input_tokens_seen": 461880176, + "step": 6905 + }, + { + "epoch": 0.7835460992907801, + "loss": 1.315129041671753, + "loss_ce": 0.0040938276797533035, + "loss_iou": 0.515625, + "loss_num": 0.0556640625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 461880176, + "step": 6905 + }, + { + "epoch": 0.7836595744680851, + "grad_norm": 31.73232650756836, + "learning_rate": 5e-05, + "loss": 1.0913, + "num_input_tokens_seen": 461946616, + "step": 6906 + }, + { + "epoch": 0.7836595744680851, + "loss": 1.1222950220108032, + "loss_ce": 0.007548877503722906, + "loss_iou": 0.4765625, + "loss_num": 0.032470703125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 461946616, + "step": 6906 + }, + { + "epoch": 0.7837730496453901, + "grad_norm": 30.890531539916992, + "learning_rate": 5e-05, + "loss": 1.2459, + "num_input_tokens_seen": 462013868, + "step": 6907 + }, + { + "epoch": 0.7837730496453901, + "loss": 1.3113460540771484, + "loss_ce": 0.005681996233761311, + "loss_iou": 0.53125, + "loss_num": 0.048828125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 462013868, + "step": 6907 + }, + { + "epoch": 0.783886524822695, + "grad_norm": 34.285606384277344, + "learning_rate": 5e-05, + "loss": 1.2821, + "num_input_tokens_seen": 462080156, + "step": 6908 + }, + { + "epoch": 0.783886524822695, + "loss": 1.265985131263733, + "loss_ce": 0.008172620087862015, + "loss_iou": 0.5390625, + "loss_num": 0.036376953125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 462080156, + "step": 6908 + }, + { + "epoch": 0.784, + "grad_norm": 27.287151336669922, + "learning_rate": 5e-05, + "loss": 1.0643, + "num_input_tokens_seen": 462146328, + "step": 6909 + }, + { + "epoch": 0.784, + "loss": 0.896996021270752, + "loss_ce": 0.008324187248945236, + "loss_iou": 0.36328125, + "loss_num": 0.032470703125, + "loss_xval": 0.890625, + "num_input_tokens_seen": 462146328, + "step": 6909 + }, + { + "epoch": 0.784113475177305, + "grad_norm": 31.543004989624023, + "learning_rate": 5e-05, + "loss": 1.3682, + "num_input_tokens_seen": 462211784, + "step": 6910 + }, + { + "epoch": 0.784113475177305, + "loss": 1.231660008430481, + "loss_ce": 0.004853449761867523, + "loss_iou": 0.52734375, + "loss_num": 0.033935546875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 462211784, + "step": 6910 + }, + { + "epoch": 0.7842269503546099, + "grad_norm": 33.557159423828125, + "learning_rate": 5e-05, + "loss": 1.3179, + "num_input_tokens_seen": 462279216, + "step": 6911 + }, + { + "epoch": 0.7842269503546099, + "loss": 1.319532036781311, + "loss_ce": 0.007032094988971949, + "loss_iou": 0.5625, + "loss_num": 0.037109375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 462279216, + "step": 6911 + }, + { + "epoch": 0.7843404255319149, + "grad_norm": 63.16952896118164, + "learning_rate": 5e-05, + "loss": 1.1281, + "num_input_tokens_seen": 462345944, + "step": 6912 + }, + { + "epoch": 0.7843404255319149, + "loss": 1.2120181322097778, + "loss_ce": 0.003003249643370509, + "loss_iou": 0.50390625, + "loss_num": 0.039794921875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 462345944, + "step": 6912 + }, + { + "epoch": 0.7844539007092198, + "grad_norm": 33.24873352050781, + "learning_rate": 5e-05, + "loss": 1.3212, + "num_input_tokens_seen": 462412668, + "step": 6913 + }, + { + "epoch": 0.7844539007092198, + "loss": 1.2982194423675537, + "loss_ce": 0.004762359894812107, + "loss_iou": 0.55078125, + "loss_num": 0.03759765625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 462412668, + "step": 6913 + }, + { + "epoch": 0.7845673758865248, + "grad_norm": 27.465192794799805, + "learning_rate": 5e-05, + "loss": 1.0623, + "num_input_tokens_seen": 462479212, + "step": 6914 + }, + { + "epoch": 0.7845673758865248, + "loss": 0.8433876633644104, + "loss_ce": 0.0113564133644104, + "loss_iou": 0.31640625, + "loss_num": 0.039306640625, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 462479212, + "step": 6914 + }, + { + "epoch": 0.7846808510638298, + "grad_norm": 32.79283142089844, + "learning_rate": 5e-05, + "loss": 1.1667, + "num_input_tokens_seen": 462545444, + "step": 6915 + }, + { + "epoch": 0.7846808510638298, + "loss": 0.9852732419967651, + "loss_ce": 0.006269332952797413, + "loss_iou": 0.4140625, + "loss_num": 0.0299072265625, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 462545444, + "step": 6915 + }, + { + "epoch": 0.7847943262411348, + "grad_norm": 57.730796813964844, + "learning_rate": 5e-05, + "loss": 1.4049, + "num_input_tokens_seen": 462612792, + "step": 6916 + }, + { + "epoch": 0.7847943262411348, + "loss": 1.457153081893921, + "loss_ce": 0.01086401380598545, + "loss_iou": 0.5625, + "loss_num": 0.064453125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 462612792, + "step": 6916 + }, + { + "epoch": 0.7849078014184397, + "grad_norm": 16.61048698425293, + "learning_rate": 5e-05, + "loss": 1.1474, + "num_input_tokens_seen": 462680148, + "step": 6917 + }, + { + "epoch": 0.7849078014184397, + "loss": 1.12899649143219, + "loss_ce": 0.007139848545193672, + "loss_iou": 0.43359375, + "loss_num": 0.051025390625, + "loss_xval": 1.125, + "num_input_tokens_seen": 462680148, + "step": 6917 + }, + { + "epoch": 0.7850212765957447, + "grad_norm": 8.408032417297363, + "learning_rate": 5e-05, + "loss": 0.9018, + "num_input_tokens_seen": 462746256, + "step": 6918 + }, + { + "epoch": 0.7850212765957447, + "loss": 0.8690536022186279, + "loss_ce": 0.006748919375240803, + "loss_iou": 0.333984375, + "loss_num": 0.038818359375, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 462746256, + "step": 6918 + }, + { + "epoch": 0.7851347517730497, + "grad_norm": 26.194095611572266, + "learning_rate": 5e-05, + "loss": 1.0262, + "num_input_tokens_seen": 462812816, + "step": 6919 + }, + { + "epoch": 0.7851347517730497, + "loss": 1.169553518295288, + "loss_ce": 0.0030495093669742346, + "loss_iou": 0.4609375, + "loss_num": 0.04931640625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 462812816, + "step": 6919 + }, + { + "epoch": 0.7852482269503546, + "grad_norm": 21.585357666015625, + "learning_rate": 5e-05, + "loss": 0.9936, + "num_input_tokens_seen": 462879144, + "step": 6920 + }, + { + "epoch": 0.7852482269503546, + "loss": 0.9075890183448792, + "loss_ce": 0.006710148882120848, + "loss_iou": 0.37890625, + "loss_num": 0.0286865234375, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 462879144, + "step": 6920 + }, + { + "epoch": 0.7853617021276595, + "grad_norm": 30.672136306762695, + "learning_rate": 5e-05, + "loss": 1.3125, + "num_input_tokens_seen": 462946008, + "step": 6921 + }, + { + "epoch": 0.7853617021276595, + "loss": 1.300855278968811, + "loss_ce": 0.00788654014468193, + "loss_iou": 0.53125, + "loss_num": 0.045166015625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 462946008, + "step": 6921 + }, + { + "epoch": 0.7854751773049645, + "grad_norm": 27.67136573791504, + "learning_rate": 5e-05, + "loss": 0.9621, + "num_input_tokens_seen": 463012424, + "step": 6922 + }, + { + "epoch": 0.7854751773049645, + "loss": 1.163541316986084, + "loss_ce": 0.006803035736083984, + "loss_iou": 0.45703125, + "loss_num": 0.048583984375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 463012424, + "step": 6922 + }, + { + "epoch": 0.7855886524822695, + "grad_norm": 29.76860809326172, + "learning_rate": 5e-05, + "loss": 1.5156, + "num_input_tokens_seen": 463078972, + "step": 6923 + }, + { + "epoch": 0.7855886524822695, + "loss": 1.4694924354553223, + "loss_ce": 0.009531484916806221, + "loss_iou": 0.5625, + "loss_num": 0.06640625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 463078972, + "step": 6923 + }, + { + "epoch": 0.7857021276595745, + "grad_norm": 42.18095779418945, + "learning_rate": 5e-05, + "loss": 1.2107, + "num_input_tokens_seen": 463145604, + "step": 6924 + }, + { + "epoch": 0.7857021276595745, + "loss": 1.1104238033294678, + "loss_ce": 0.006419893354177475, + "loss_iou": 0.49609375, + "loss_num": 0.02197265625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 463145604, + "step": 6924 + }, + { + "epoch": 0.7858156028368795, + "grad_norm": 22.070568084716797, + "learning_rate": 5e-05, + "loss": 1.4655, + "num_input_tokens_seen": 463211744, + "step": 6925 + }, + { + "epoch": 0.7858156028368795, + "loss": 1.3628249168395996, + "loss_ce": 0.003938264213502407, + "loss_iou": 0.59765625, + "loss_num": 0.031982421875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 463211744, + "step": 6925 + }, + { + "epoch": 0.7859290780141844, + "grad_norm": 20.036314010620117, + "learning_rate": 5e-05, + "loss": 1.2766, + "num_input_tokens_seen": 463277540, + "step": 6926 + }, + { + "epoch": 0.7859290780141844, + "loss": 1.1963856220245361, + "loss_ce": 0.004491105675697327, + "loss_iou": 0.49609375, + "loss_num": 0.04052734375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 463277540, + "step": 6926 + }, + { + "epoch": 0.7860425531914894, + "grad_norm": 31.27546501159668, + "learning_rate": 5e-05, + "loss": 1.2659, + "num_input_tokens_seen": 463345172, + "step": 6927 + }, + { + "epoch": 0.7860425531914894, + "loss": 1.4043419361114502, + "loss_ce": 0.00981074757874012, + "loss_iou": 0.5703125, + "loss_num": 0.050048828125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 463345172, + "step": 6927 + }, + { + "epoch": 0.7861560283687943, + "grad_norm": 23.186553955078125, + "learning_rate": 5e-05, + "loss": 1.2394, + "num_input_tokens_seen": 463411568, + "step": 6928 + }, + { + "epoch": 0.7861560283687943, + "loss": 1.4504566192626953, + "loss_ce": 0.006120721809566021, + "loss_iou": 0.5703125, + "loss_num": 0.0595703125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 463411568, + "step": 6928 + }, + { + "epoch": 0.7862695035460993, + "grad_norm": 39.875038146972656, + "learning_rate": 5e-05, + "loss": 1.1437, + "num_input_tokens_seen": 463478768, + "step": 6929 + }, + { + "epoch": 0.7862695035460993, + "loss": 0.9715871214866638, + "loss_ce": 0.005766842979937792, + "loss_iou": 0.41796875, + "loss_num": 0.026123046875, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 463478768, + "step": 6929 + }, + { + "epoch": 0.7863829787234042, + "grad_norm": 37.323638916015625, + "learning_rate": 5e-05, + "loss": 1.1891, + "num_input_tokens_seen": 463545324, + "step": 6930 + }, + { + "epoch": 0.7863829787234042, + "loss": 1.3463889360427856, + "loss_ce": 0.003615498775616288, + "loss_iou": 0.53515625, + "loss_num": 0.05419921875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 463545324, + "step": 6930 + }, + { + "epoch": 0.7864964539007092, + "grad_norm": 24.00625228881836, + "learning_rate": 5e-05, + "loss": 1.1292, + "num_input_tokens_seen": 463610984, + "step": 6931 + }, + { + "epoch": 0.7864964539007092, + "loss": 1.0526044368743896, + "loss_ce": 0.006766939535737038, + "loss_iou": 0.44921875, + "loss_num": 0.029052734375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 463610984, + "step": 6931 + }, + { + "epoch": 0.7866099290780142, + "grad_norm": 12.62514591217041, + "learning_rate": 5e-05, + "loss": 0.9737, + "num_input_tokens_seen": 463679432, + "step": 6932 + }, + { + "epoch": 0.7866099290780142, + "loss": 1.145086407661438, + "loss_ce": 0.006902896333485842, + "loss_iou": 0.458984375, + "loss_num": 0.044189453125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 463679432, + "step": 6932 + }, + { + "epoch": 0.7867234042553192, + "grad_norm": 25.172836303710938, + "learning_rate": 5e-05, + "loss": 0.9993, + "num_input_tokens_seen": 463746240, + "step": 6933 + }, + { + "epoch": 0.7867234042553192, + "loss": 1.1371166706085205, + "loss_ce": 0.008210339583456516, + "loss_iou": 0.482421875, + "loss_num": 0.0322265625, + "loss_xval": 1.125, + "num_input_tokens_seen": 463746240, + "step": 6933 + }, + { + "epoch": 0.7868368794326241, + "grad_norm": 32.891353607177734, + "learning_rate": 5e-05, + "loss": 1.3177, + "num_input_tokens_seen": 463812912, + "step": 6934 + }, + { + "epoch": 0.7868368794326241, + "loss": 1.2924079895019531, + "loss_ce": 0.006275073625147343, + "loss_iou": 0.546875, + "loss_num": 0.03759765625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 463812912, + "step": 6934 + }, + { + "epoch": 0.7869503546099291, + "grad_norm": 28.321578979492188, + "learning_rate": 5e-05, + "loss": 1.2732, + "num_input_tokens_seen": 463880240, + "step": 6935 + }, + { + "epoch": 0.7869503546099291, + "loss": 1.2377409934997559, + "loss_ce": 0.0062956917099654675, + "loss_iou": 0.5234375, + "loss_num": 0.036865234375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 463880240, + "step": 6935 + }, + { + "epoch": 0.787063829787234, + "grad_norm": 49.5084228515625, + "learning_rate": 5e-05, + "loss": 1.3056, + "num_input_tokens_seen": 463948828, + "step": 6936 + }, + { + "epoch": 0.787063829787234, + "loss": 1.2757234573364258, + "loss_ce": 0.005215570330619812, + "loss_iou": 0.54296875, + "loss_num": 0.036376953125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 463948828, + "step": 6936 + }, + { + "epoch": 0.787177304964539, + "grad_norm": 21.287424087524414, + "learning_rate": 5e-05, + "loss": 1.26, + "num_input_tokens_seen": 464014820, + "step": 6937 + }, + { + "epoch": 0.787177304964539, + "loss": 1.4152143001556396, + "loss_ce": 0.007987729273736477, + "loss_iou": 0.546875, + "loss_num": 0.0634765625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 464014820, + "step": 6937 + }, + { + "epoch": 0.787290780141844, + "grad_norm": 27.91753387451172, + "learning_rate": 5e-05, + "loss": 1.1439, + "num_input_tokens_seen": 464082004, + "step": 6938 + }, + { + "epoch": 0.787290780141844, + "loss": 1.2589420080184937, + "loss_ce": 0.00943028461188078, + "loss_iou": 0.484375, + "loss_num": 0.056640625, + "loss_xval": 1.25, + "num_input_tokens_seen": 464082004, + "step": 6938 + }, + { + "epoch": 0.7874042553191489, + "grad_norm": 36.34169006347656, + "learning_rate": 5e-05, + "loss": 1.1271, + "num_input_tokens_seen": 464149172, + "step": 6939 + }, + { + "epoch": 0.7874042553191489, + "loss": 1.0486043691635132, + "loss_ce": 0.004659063182771206, + "loss_iou": 0.44921875, + "loss_num": 0.0291748046875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 464149172, + "step": 6939 + }, + { + "epoch": 0.7875177304964539, + "grad_norm": 30.555580139160156, + "learning_rate": 5e-05, + "loss": 1.2408, + "num_input_tokens_seen": 464214984, + "step": 6940 + }, + { + "epoch": 0.7875177304964539, + "loss": 1.2922462224960327, + "loss_ce": 0.008066507987678051, + "loss_iou": 0.53515625, + "loss_num": 0.04248046875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 464214984, + "step": 6940 + }, + { + "epoch": 0.7876312056737589, + "grad_norm": 23.81289291381836, + "learning_rate": 5e-05, + "loss": 1.0521, + "num_input_tokens_seen": 464281948, + "step": 6941 + }, + { + "epoch": 0.7876312056737589, + "loss": 1.0593476295471191, + "loss_ce": 0.011251978576183319, + "loss_iou": 0.388671875, + "loss_num": 0.0537109375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 464281948, + "step": 6941 + }, + { + "epoch": 0.7877446808510639, + "grad_norm": 24.043886184692383, + "learning_rate": 5e-05, + "loss": 1.2075, + "num_input_tokens_seen": 464348764, + "step": 6942 + }, + { + "epoch": 0.7877446808510639, + "loss": 1.0862950086593628, + "loss_ce": 0.009024523198604584, + "loss_iou": 0.447265625, + "loss_num": 0.036376953125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 464348764, + "step": 6942 + }, + { + "epoch": 0.7878581560283688, + "grad_norm": 42.72541046142578, + "learning_rate": 5e-05, + "loss": 1.1729, + "num_input_tokens_seen": 464415560, + "step": 6943 + }, + { + "epoch": 0.7878581560283688, + "loss": 1.1042060852050781, + "loss_ce": 0.0037116766907274723, + "loss_iou": 0.431640625, + "loss_num": 0.047607421875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 464415560, + "step": 6943 + }, + { + "epoch": 0.7879716312056737, + "grad_norm": 39.01731491088867, + "learning_rate": 5e-05, + "loss": 1.1915, + "num_input_tokens_seen": 464482360, + "step": 6944 + }, + { + "epoch": 0.7879716312056737, + "loss": 1.169754147529602, + "loss_ce": 0.006912388373166323, + "loss_iou": 0.494140625, + "loss_num": 0.03515625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 464482360, + "step": 6944 + }, + { + "epoch": 0.7880851063829787, + "grad_norm": 36.90890121459961, + "learning_rate": 5e-05, + "loss": 1.172, + "num_input_tokens_seen": 464549532, + "step": 6945 + }, + { + "epoch": 0.7880851063829787, + "loss": 1.2119500637054443, + "loss_ce": 0.002965587191283703, + "loss_iou": 0.484375, + "loss_num": 0.047607421875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 464549532, + "step": 6945 + }, + { + "epoch": 0.7881985815602837, + "grad_norm": 47.60728454589844, + "learning_rate": 5e-05, + "loss": 1.3815, + "num_input_tokens_seen": 464615016, + "step": 6946 + }, + { + "epoch": 0.7881985815602837, + "loss": 1.465256929397583, + "loss_ce": 0.007737379055470228, + "loss_iou": 0.58984375, + "loss_num": 0.055908203125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 464615016, + "step": 6946 + }, + { + "epoch": 0.7883120567375886, + "grad_norm": 22.630277633666992, + "learning_rate": 5e-05, + "loss": 1.2316, + "num_input_tokens_seen": 464681028, + "step": 6947 + }, + { + "epoch": 0.7883120567375886, + "loss": 1.4226980209350586, + "loss_ce": 0.009123913943767548, + "loss_iou": 0.59765625, + "loss_num": 0.042724609375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 464681028, + "step": 6947 + }, + { + "epoch": 0.7884255319148936, + "grad_norm": 23.17755126953125, + "learning_rate": 5e-05, + "loss": 1.0901, + "num_input_tokens_seen": 464747484, + "step": 6948 + }, + { + "epoch": 0.7884255319148936, + "loss": 1.2719306945800781, + "loss_ce": 0.0063056740909814835, + "loss_iou": 0.5234375, + "loss_num": 0.04296875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 464747484, + "step": 6948 + }, + { + "epoch": 0.7885390070921986, + "grad_norm": 21.328330993652344, + "learning_rate": 5e-05, + "loss": 1.1435, + "num_input_tokens_seen": 464813964, + "step": 6949 + }, + { + "epoch": 0.7885390070921986, + "loss": 1.0848459005355835, + "loss_ce": 0.010138893499970436, + "loss_iou": 0.46875, + "loss_num": 0.0277099609375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 464813964, + "step": 6949 + }, + { + "epoch": 0.7886524822695036, + "grad_norm": 22.046051025390625, + "learning_rate": 5e-05, + "loss": 1.1006, + "num_input_tokens_seen": 464879708, + "step": 6950 + }, + { + "epoch": 0.7886524822695036, + "loss": 1.2726292610168457, + "loss_ce": 0.007706058211624622, + "loss_iou": 0.46484375, + "loss_num": 0.0673828125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 464879708, + "step": 6950 + }, + { + "epoch": 0.7887659574468086, + "grad_norm": 44.21001052856445, + "learning_rate": 5e-05, + "loss": 1.1652, + "num_input_tokens_seen": 464946480, + "step": 6951 + }, + { + "epoch": 0.7887659574468086, + "loss": 1.0882611274719238, + "loss_ce": 0.003788514994084835, + "loss_iou": 0.443359375, + "loss_num": 0.039794921875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 464946480, + "step": 6951 + }, + { + "epoch": 0.7888794326241135, + "grad_norm": 47.44740295410156, + "learning_rate": 5e-05, + "loss": 1.1699, + "num_input_tokens_seen": 465012944, + "step": 6952 + }, + { + "epoch": 0.7888794326241135, + "loss": 1.2249889373779297, + "loss_ce": 0.0062389494851231575, + "loss_iou": 0.50390625, + "loss_num": 0.04248046875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 465012944, + "step": 6952 + }, + { + "epoch": 0.7889929078014184, + "grad_norm": 35.76927947998047, + "learning_rate": 5e-05, + "loss": 1.2439, + "num_input_tokens_seen": 465080520, + "step": 6953 + }, + { + "epoch": 0.7889929078014184, + "loss": 1.219360589981079, + "loss_ce": 0.005005179438740015, + "loss_iou": 0.53515625, + "loss_num": 0.0286865234375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 465080520, + "step": 6953 + }, + { + "epoch": 0.7891063829787234, + "grad_norm": 21.021400451660156, + "learning_rate": 5e-05, + "loss": 1.1284, + "num_input_tokens_seen": 465147548, + "step": 6954 + }, + { + "epoch": 0.7891063829787234, + "loss": 1.2052158117294312, + "loss_ce": 0.0050204116851091385, + "loss_iou": 0.51953125, + "loss_num": 0.032958984375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 465147548, + "step": 6954 + }, + { + "epoch": 0.7892198581560284, + "grad_norm": 22.06989097595215, + "learning_rate": 5e-05, + "loss": 0.9969, + "num_input_tokens_seen": 465214360, + "step": 6955 + }, + { + "epoch": 0.7892198581560284, + "loss": 0.9273717999458313, + "loss_ce": 0.01086786575615406, + "loss_iou": 0.41796875, + "loss_num": 0.0164794921875, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 465214360, + "step": 6955 + }, + { + "epoch": 0.7893333333333333, + "grad_norm": 29.654647827148438, + "learning_rate": 5e-05, + "loss": 1.205, + "num_input_tokens_seen": 465280320, + "step": 6956 + }, + { + "epoch": 0.7893333333333333, + "loss": 1.356964111328125, + "loss_ce": 0.011016873642802238, + "loss_iou": 0.53515625, + "loss_num": 0.0556640625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 465280320, + "step": 6956 + }, + { + "epoch": 0.7894468085106383, + "grad_norm": 33.928192138671875, + "learning_rate": 5e-05, + "loss": 1.2819, + "num_input_tokens_seen": 465346876, + "step": 6957 + }, + { + "epoch": 0.7894468085106383, + "loss": 1.288292407989502, + "loss_ce": 0.00997207872569561, + "loss_iou": 0.50390625, + "loss_num": 0.0546875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 465346876, + "step": 6957 + }, + { + "epoch": 0.7895602836879433, + "grad_norm": 29.92987823486328, + "learning_rate": 5e-05, + "loss": 1.0535, + "num_input_tokens_seen": 465413840, + "step": 6958 + }, + { + "epoch": 0.7895602836879433, + "loss": 0.9915411472320557, + "loss_ce": 0.0032599125988781452, + "loss_iou": 0.421875, + "loss_num": 0.02880859375, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 465413840, + "step": 6958 + }, + { + "epoch": 0.7896737588652483, + "grad_norm": 22.340585708618164, + "learning_rate": 5e-05, + "loss": 1.2966, + "num_input_tokens_seen": 465481128, + "step": 6959 + }, + { + "epoch": 0.7896737588652483, + "loss": 1.262093186378479, + "loss_ce": 0.006233802996575832, + "loss_iou": 0.52734375, + "loss_num": 0.039794921875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 465481128, + "step": 6959 + }, + { + "epoch": 0.7897872340425532, + "grad_norm": 19.53511619567871, + "learning_rate": 5e-05, + "loss": 1.0389, + "num_input_tokens_seen": 465547980, + "step": 6960 + }, + { + "epoch": 0.7897872340425532, + "loss": 0.9730936884880066, + "loss_ce": 0.0023905490525066853, + "loss_iou": 0.41015625, + "loss_num": 0.0299072265625, + "loss_xval": 0.96875, + "num_input_tokens_seen": 465547980, + "step": 6960 + }, + { + "epoch": 0.7899007092198581, + "grad_norm": 32.32807922363281, + "learning_rate": 5e-05, + "loss": 1.1909, + "num_input_tokens_seen": 465614508, + "step": 6961 + }, + { + "epoch": 0.7899007092198581, + "loss": 1.1489659547805786, + "loss_ce": 0.009317532181739807, + "loss_iou": 0.462890625, + "loss_num": 0.043212890625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 465614508, + "step": 6961 + }, + { + "epoch": 0.7900141843971631, + "grad_norm": 35.961483001708984, + "learning_rate": 5e-05, + "loss": 1.3468, + "num_input_tokens_seen": 465680448, + "step": 6962 + }, + { + "epoch": 0.7900141843971631, + "loss": 1.2428476810455322, + "loss_ce": 0.01775004155933857, + "loss_iou": 0.478515625, + "loss_num": 0.0537109375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 465680448, + "step": 6962 + }, + { + "epoch": 0.7901276595744681, + "grad_norm": 18.796512603759766, + "learning_rate": 5e-05, + "loss": 1.2743, + "num_input_tokens_seen": 465747132, + "step": 6963 + }, + { + "epoch": 0.7901276595744681, + "loss": 1.243001937866211, + "loss_ce": 0.01009183470159769, + "loss_iou": 0.43359375, + "loss_num": 0.0732421875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 465747132, + "step": 6963 + }, + { + "epoch": 0.790241134751773, + "grad_norm": 15.889632225036621, + "learning_rate": 5e-05, + "loss": 1.0052, + "num_input_tokens_seen": 465813304, + "step": 6964 + }, + { + "epoch": 0.790241134751773, + "loss": 1.1741992235183716, + "loss_ce": 0.005253931507468224, + "loss_iou": 0.45703125, + "loss_num": 0.05078125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 465813304, + "step": 6964 + }, + { + "epoch": 0.790354609929078, + "grad_norm": 29.28902816772461, + "learning_rate": 5e-05, + "loss": 1.2428, + "num_input_tokens_seen": 465879608, + "step": 6965 + }, + { + "epoch": 0.790354609929078, + "loss": 1.4922114610671997, + "loss_ce": 0.006859953515231609, + "loss_iou": 0.62890625, + "loss_num": 0.046142578125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 465879608, + "step": 6965 + }, + { + "epoch": 0.790468085106383, + "grad_norm": 27.81441307067871, + "learning_rate": 5e-05, + "loss": 1.0977, + "num_input_tokens_seen": 465946840, + "step": 6966 + }, + { + "epoch": 0.790468085106383, + "loss": 1.1645957231521606, + "loss_ce": 0.006880956701934338, + "loss_iou": 0.478515625, + "loss_num": 0.039794921875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 465946840, + "step": 6966 + }, + { + "epoch": 0.790581560283688, + "grad_norm": 21.67071533203125, + "learning_rate": 5e-05, + "loss": 1.2422, + "num_input_tokens_seen": 466013912, + "step": 6967 + }, + { + "epoch": 0.790581560283688, + "loss": 1.261833906173706, + "loss_ce": 0.006462696008384228, + "loss_iou": 0.5078125, + "loss_num": 0.04736328125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 466013912, + "step": 6967 + }, + { + "epoch": 0.790695035460993, + "grad_norm": 44.968082427978516, + "learning_rate": 5e-05, + "loss": 1.1275, + "num_input_tokens_seen": 466081816, + "step": 6968 + }, + { + "epoch": 0.790695035460993, + "loss": 1.1820473670959473, + "loss_ce": 0.006754308007657528, + "loss_iou": 0.431640625, + "loss_num": 0.0625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 466081816, + "step": 6968 + }, + { + "epoch": 0.7908085106382978, + "grad_norm": 34.207881927490234, + "learning_rate": 5e-05, + "loss": 1.1892, + "num_input_tokens_seen": 466149432, + "step": 6969 + }, + { + "epoch": 0.7908085106382978, + "loss": 1.206531286239624, + "loss_ce": 0.005847723688930273, + "loss_iou": 0.4921875, + "loss_num": 0.043212890625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 466149432, + "step": 6969 + }, + { + "epoch": 0.7909219858156028, + "grad_norm": 29.826799392700195, + "learning_rate": 5e-05, + "loss": 1.1497, + "num_input_tokens_seen": 466216132, + "step": 6970 + }, + { + "epoch": 0.7909219858156028, + "loss": 0.987556517124176, + "loss_ce": 0.006111262831836939, + "loss_iou": 0.404296875, + "loss_num": 0.03466796875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 466216132, + "step": 6970 + }, + { + "epoch": 0.7910354609929078, + "grad_norm": 365.0551452636719, + "learning_rate": 5e-05, + "loss": 1.3124, + "num_input_tokens_seen": 466282592, + "step": 6971 + }, + { + "epoch": 0.7910354609929078, + "loss": 1.2911028861999512, + "loss_ce": 0.005458337254822254, + "loss_iou": 0.51953125, + "loss_num": 0.04833984375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 466282592, + "step": 6971 + }, + { + "epoch": 0.7911489361702128, + "grad_norm": 15.404887199401855, + "learning_rate": 5e-05, + "loss": 1.0186, + "num_input_tokens_seen": 466348584, + "step": 6972 + }, + { + "epoch": 0.7911489361702128, + "loss": 1.2413153648376465, + "loss_ce": 0.009870046749711037, + "loss_iou": 0.498046875, + "loss_num": 0.046875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 466348584, + "step": 6972 + }, + { + "epoch": 0.7912624113475177, + "grad_norm": 23.391427993774414, + "learning_rate": 5e-05, + "loss": 1.1794, + "num_input_tokens_seen": 466415960, + "step": 6973 + }, + { + "epoch": 0.7912624113475177, + "loss": 1.084511399269104, + "loss_ce": 0.0063863834366202354, + "loss_iou": 0.453125, + "loss_num": 0.0341796875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 466415960, + "step": 6973 + }, + { + "epoch": 0.7913758865248227, + "grad_norm": 27.539573669433594, + "learning_rate": 5e-05, + "loss": 1.1231, + "num_input_tokens_seen": 466482104, + "step": 6974 + }, + { + "epoch": 0.7913758865248227, + "loss": 1.0845447778701782, + "loss_ce": 0.005443180445581675, + "loss_iou": 0.46875, + "loss_num": 0.0283203125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 466482104, + "step": 6974 + }, + { + "epoch": 0.7914893617021277, + "grad_norm": 25.935564041137695, + "learning_rate": 5e-05, + "loss": 1.4841, + "num_input_tokens_seen": 466548540, + "step": 6975 + }, + { + "epoch": 0.7914893617021277, + "loss": 1.381671667098999, + "loss_ce": 0.007648330181837082, + "loss_iou": 0.53515625, + "loss_num": 0.060546875, + "loss_xval": 1.375, + "num_input_tokens_seen": 466548540, + "step": 6975 + }, + { + "epoch": 0.7916028368794327, + "grad_norm": 52.622406005859375, + "learning_rate": 5e-05, + "loss": 1.2873, + "num_input_tokens_seen": 466616388, + "step": 6976 + }, + { + "epoch": 0.7916028368794327, + "loss": 1.141830563545227, + "loss_ce": 0.014877529814839363, + "loss_iou": 0.466796875, + "loss_num": 0.03857421875, + "loss_xval": 1.125, + "num_input_tokens_seen": 466616388, + "step": 6976 + }, + { + "epoch": 0.7917163120567375, + "grad_norm": 26.46863555908203, + "learning_rate": 5e-05, + "loss": 1.2236, + "num_input_tokens_seen": 466683932, + "step": 6977 + }, + { + "epoch": 0.7917163120567375, + "loss": 1.4023723602294922, + "loss_ce": 0.009061777964234352, + "loss_iou": 0.54296875, + "loss_num": 0.0615234375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 466683932, + "step": 6977 + }, + { + "epoch": 0.7918297872340425, + "grad_norm": 20.830141067504883, + "learning_rate": 5e-05, + "loss": 1.1411, + "num_input_tokens_seen": 466750420, + "step": 6978 + }, + { + "epoch": 0.7918297872340425, + "loss": 1.0269107818603516, + "loss_ce": 0.00591459684073925, + "loss_iou": 0.41015625, + "loss_num": 0.040283203125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 466750420, + "step": 6978 + }, + { + "epoch": 0.7919432624113475, + "grad_norm": 22.155447006225586, + "learning_rate": 5e-05, + "loss": 1.2505, + "num_input_tokens_seen": 466817588, + "step": 6979 + }, + { + "epoch": 0.7919432624113475, + "loss": 1.2681376934051514, + "loss_ce": 0.010325266048312187, + "loss_iou": 0.5234375, + "loss_num": 0.041748046875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 466817588, + "step": 6979 + }, + { + "epoch": 0.7920567375886525, + "grad_norm": 45.29899215698242, + "learning_rate": 5e-05, + "loss": 1.2307, + "num_input_tokens_seen": 466883908, + "step": 6980 + }, + { + "epoch": 0.7920567375886525, + "loss": 1.1207194328308105, + "loss_ce": 0.004081237595528364, + "loss_iou": 0.462890625, + "loss_num": 0.0380859375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 466883908, + "step": 6980 + }, + { + "epoch": 0.7921702127659574, + "grad_norm": 70.01451873779297, + "learning_rate": 5e-05, + "loss": 1.3272, + "num_input_tokens_seen": 466951332, + "step": 6981 + }, + { + "epoch": 0.7921702127659574, + "loss": 1.4102627038955688, + "loss_ce": 0.006942462641745806, + "loss_iou": 0.609375, + "loss_num": 0.037841796875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 466951332, + "step": 6981 + }, + { + "epoch": 0.7922836879432624, + "grad_norm": 29.44933319091797, + "learning_rate": 5e-05, + "loss": 1.2592, + "num_input_tokens_seen": 467019108, + "step": 6982 + }, + { + "epoch": 0.7922836879432624, + "loss": 1.2022265195846558, + "loss_ce": 0.005449199117720127, + "loss_iou": 0.515625, + "loss_num": 0.032958984375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 467019108, + "step": 6982 + }, + { + "epoch": 0.7923971631205674, + "grad_norm": 22.819965362548828, + "learning_rate": 5e-05, + "loss": 1.2224, + "num_input_tokens_seen": 467086472, + "step": 6983 + }, + { + "epoch": 0.7923971631205674, + "loss": 1.2574028968811035, + "loss_ce": 0.006914630066603422, + "loss_iou": 0.5078125, + "loss_num": 0.046630859375, + "loss_xval": 1.25, + "num_input_tokens_seen": 467086472, + "step": 6983 + }, + { + "epoch": 0.7925106382978724, + "grad_norm": 96.92410278320312, + "learning_rate": 5e-05, + "loss": 1.2704, + "num_input_tokens_seen": 467153600, + "step": 6984 + }, + { + "epoch": 0.7925106382978724, + "loss": 1.3586915731430054, + "loss_ce": 0.004199409391731024, + "loss_iou": 0.55859375, + "loss_num": 0.046875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 467153600, + "step": 6984 + }, + { + "epoch": 0.7926241134751772, + "grad_norm": 29.90910530090332, + "learning_rate": 5e-05, + "loss": 1.4753, + "num_input_tokens_seen": 467219792, + "step": 6985 + }, + { + "epoch": 0.7926241134751772, + "loss": 1.470264196395874, + "loss_ce": 0.004443906247615814, + "loss_iou": 0.578125, + "loss_num": 0.061279296875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 467219792, + "step": 6985 + }, + { + "epoch": 0.7927375886524822, + "grad_norm": 15.829180717468262, + "learning_rate": 5e-05, + "loss": 1.0932, + "num_input_tokens_seen": 467287448, + "step": 6986 + }, + { + "epoch": 0.7927375886524822, + "loss": 1.064224362373352, + "loss_ce": 0.004654060583561659, + "loss_iou": 0.45703125, + "loss_num": 0.02880859375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 467287448, + "step": 6986 + }, + { + "epoch": 0.7928510638297872, + "grad_norm": 35.81101989746094, + "learning_rate": 5e-05, + "loss": 1.069, + "num_input_tokens_seen": 467353752, + "step": 6987 + }, + { + "epoch": 0.7928510638297872, + "loss": 1.051038384437561, + "loss_ce": 0.002210302511230111, + "loss_iou": 0.447265625, + "loss_num": 0.030517578125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 467353752, + "step": 6987 + }, + { + "epoch": 0.7929645390070922, + "grad_norm": 25.271760940551758, + "learning_rate": 5e-05, + "loss": 1.3375, + "num_input_tokens_seen": 467419836, + "step": 6988 + }, + { + "epoch": 0.7929645390070922, + "loss": 1.6280286312103271, + "loss_ce": 0.01084114145487547, + "loss_iou": 0.59375, + "loss_num": 0.0869140625, + "loss_xval": 1.6171875, + "num_input_tokens_seen": 467419836, + "step": 6988 + }, + { + "epoch": 0.7930780141843972, + "grad_norm": 25.98004722595215, + "learning_rate": 5e-05, + "loss": 1.0251, + "num_input_tokens_seen": 467487276, + "step": 6989 + }, + { + "epoch": 0.7930780141843972, + "loss": 0.9734897613525391, + "loss_ce": 0.009012158960103989, + "loss_iou": 0.3828125, + "loss_num": 0.039794921875, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 467487276, + "step": 6989 + }, + { + "epoch": 0.7931914893617021, + "grad_norm": 30.352508544921875, + "learning_rate": 5e-05, + "loss": 0.9997, + "num_input_tokens_seen": 467553788, + "step": 6990 + }, + { + "epoch": 0.7931914893617021, + "loss": 0.9014310240745544, + "loss_ce": 0.00348179554566741, + "loss_iou": 0.38671875, + "loss_num": 0.02490234375, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 467553788, + "step": 6990 + }, + { + "epoch": 0.7933049645390071, + "grad_norm": 25.375783920288086, + "learning_rate": 5e-05, + "loss": 1.0288, + "num_input_tokens_seen": 467620744, + "step": 6991 + }, + { + "epoch": 0.7933049645390071, + "loss": 1.0688421726226807, + "loss_ce": 0.010248374193906784, + "loss_iou": 0.474609375, + "loss_num": 0.0218505859375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 467620744, + "step": 6991 + }, + { + "epoch": 0.7934184397163121, + "grad_norm": 31.390140533447266, + "learning_rate": 5e-05, + "loss": 1.2824, + "num_input_tokens_seen": 467688728, + "step": 6992 + }, + { + "epoch": 0.7934184397163121, + "loss": 1.2983168363571167, + "loss_ce": 0.006324610207229853, + "loss_iou": 0.5390625, + "loss_num": 0.042236328125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 467688728, + "step": 6992 + }, + { + "epoch": 0.7935319148936171, + "grad_norm": 33.307708740234375, + "learning_rate": 5e-05, + "loss": 1.3357, + "num_input_tokens_seen": 467756468, + "step": 6993 + }, + { + "epoch": 0.7935319148936171, + "loss": 1.2331492900848389, + "loss_ce": 0.007075090892612934, + "loss_iou": 0.515625, + "loss_num": 0.038330078125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 467756468, + "step": 6993 + }, + { + "epoch": 0.7936453900709219, + "grad_norm": 27.681501388549805, + "learning_rate": 5e-05, + "loss": 1.3144, + "num_input_tokens_seen": 467823100, + "step": 6994 + }, + { + "epoch": 0.7936453900709219, + "loss": 1.287369728088379, + "loss_ce": 0.007584668695926666, + "loss_iou": 0.50390625, + "loss_num": 0.054443359375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 467823100, + "step": 6994 + }, + { + "epoch": 0.7937588652482269, + "grad_norm": 29.926040649414062, + "learning_rate": 5e-05, + "loss": 0.9698, + "num_input_tokens_seen": 467889912, + "step": 6995 + }, + { + "epoch": 0.7937588652482269, + "loss": 0.8790539503097534, + "loss_ce": 0.0064953388646245, + "loss_iou": 0.38671875, + "loss_num": 0.020263671875, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 467889912, + "step": 6995 + }, + { + "epoch": 0.7938723404255319, + "grad_norm": 19.914182662963867, + "learning_rate": 5e-05, + "loss": 1.085, + "num_input_tokens_seen": 467957200, + "step": 6996 + }, + { + "epoch": 0.7938723404255319, + "loss": 1.1261131763458252, + "loss_ce": 0.010390435345470905, + "loss_iou": 0.435546875, + "loss_num": 0.049072265625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 467957200, + "step": 6996 + }, + { + "epoch": 0.7939858156028369, + "grad_norm": 46.32395553588867, + "learning_rate": 5e-05, + "loss": 1.2186, + "num_input_tokens_seen": 468024264, + "step": 6997 + }, + { + "epoch": 0.7939858156028369, + "loss": 1.2059441804885864, + "loss_ce": 0.008190320804715157, + "loss_iou": 0.484375, + "loss_num": 0.04541015625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 468024264, + "step": 6997 + }, + { + "epoch": 0.7940992907801419, + "grad_norm": 44.69963073730469, + "learning_rate": 5e-05, + "loss": 1.527, + "num_input_tokens_seen": 468090856, + "step": 6998 + }, + { + "epoch": 0.7940992907801419, + "loss": 1.4299664497375488, + "loss_ce": 0.007114896550774574, + "loss_iou": 0.609375, + "loss_num": 0.04150390625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 468090856, + "step": 6998 + }, + { + "epoch": 0.7942127659574468, + "grad_norm": 16.639816284179688, + "learning_rate": 5e-05, + "loss": 1.0528, + "num_input_tokens_seen": 468157840, + "step": 6999 + }, + { + "epoch": 0.7942127659574468, + "loss": 0.9438818097114563, + "loss_ce": 0.00528310053050518, + "loss_iou": 0.375, + "loss_num": 0.0380859375, + "loss_xval": 0.9375, + "num_input_tokens_seen": 468157840, + "step": 6999 + }, + { + "epoch": 0.7943262411347518, + "grad_norm": 56.98869705200195, + "learning_rate": 5e-05, + "loss": 1.0019, + "num_input_tokens_seen": 468225292, + "step": 7000 + }, + { + "epoch": 0.7943262411347518, + "eval_seeclick_CIoU": 0.3454381376504898, + "eval_seeclick_GIoU": 0.3177388906478882, + "eval_seeclick_IoU": 0.45057232677936554, + "eval_seeclick_MAE_all": 0.18039775639772415, + "eval_seeclick_MAE_h": 0.08225570432841778, + "eval_seeclick_MAE_w": 0.1449146494269371, + "eval_seeclick_MAE_x_boxes": 0.27526768296957016, + "eval_seeclick_MAE_y_boxes": 0.15160049498081207, + "eval_seeclick_NUM_probability": 0.9999625384807587, + "eval_seeclick_inside_bbox": 0.6145833432674408, + "eval_seeclick_loss": 2.594264507293701, + "eval_seeclick_loss_ce": 0.013527413830161095, + "eval_seeclick_loss_iou": 0.86138916015625, + "eval_seeclick_loss_num": 0.1813507080078125, + "eval_seeclick_loss_xval": 2.62841796875, + "eval_seeclick_runtime": 64.0714, + "eval_seeclick_samples_per_second": 0.734, + "eval_seeclick_steps_per_second": 0.031, + "num_input_tokens_seen": 468225292, + "step": 7000 + }, + { + "epoch": 0.7943262411347518, + "eval_icons_CIoU": 0.48370005190372467, + "eval_icons_GIoU": 0.48062680661678314, + "eval_icons_IoU": 0.53802689909935, + "eval_icons_MAE_all": 0.13257858157157898, + "eval_icons_MAE_h": 0.08513149991631508, + "eval_icons_MAE_w": 0.07009526155889034, + "eval_icons_MAE_x_boxes": 0.14731258526444435, + "eval_icons_MAE_y_boxes": 0.07643195986747742, + "eval_icons_NUM_probability": 0.999991774559021, + "eval_icons_inside_bbox": 0.7604166567325592, + "eval_icons_loss": 2.2484569549560547, + "eval_icons_loss_ce": 1.8517474018153735e-05, + "eval_icons_loss_iou": 0.77685546875, + "eval_icons_loss_num": 0.13228988647460938, + "eval_icons_loss_xval": 2.21533203125, + "eval_icons_runtime": 72.8464, + "eval_icons_samples_per_second": 0.686, + "eval_icons_steps_per_second": 0.027, + "num_input_tokens_seen": 468225292, + "step": 7000 + }, + { + "epoch": 0.7943262411347518, + "eval_screenspot_CIoU": 0.23782756924629211, + "eval_screenspot_GIoU": 0.21999437113602957, + "eval_screenspot_IoU": 0.35741788148880005, + "eval_screenspot_MAE_all": 0.21620874106884003, + "eval_screenspot_MAE_h": 0.10836134105920792, + "eval_screenspot_MAE_w": 0.149814635515213, + "eval_screenspot_MAE_x_boxes": 0.3631059428056081, + "eval_screenspot_MAE_y_boxes": 0.11406739552815755, + "eval_screenspot_NUM_probability": 0.999973992506663, + "eval_screenspot_inside_bbox": 0.5287500023841858, + "eval_screenspot_loss": 3.0156967639923096, + "eval_screenspot_loss_ce": 0.01755226713915666, + "eval_screenspot_loss_iou": 0.9498697916666666, + "eval_screenspot_loss_num": 0.22343953450520834, + "eval_screenspot_loss_xval": 3.017578125, + "eval_screenspot_runtime": 117.6306, + "eval_screenspot_samples_per_second": 0.757, + "eval_screenspot_steps_per_second": 0.026, + "num_input_tokens_seen": 468225292, + "step": 7000 + }, + { + "epoch": 0.7943262411347518, + "eval_compot_CIoU": 0.19876866787672043, + "eval_compot_GIoU": 0.16255048662424088, + "eval_compot_IoU": 0.33080779016017914, + "eval_compot_MAE_all": 0.22469424456357956, + "eval_compot_MAE_h": 0.07274916395545006, + "eval_compot_MAE_w": 0.17108163982629776, + "eval_compot_MAE_x_boxes": 0.2840614467859268, + "eval_compot_MAE_y_boxes": 0.16379395127296448, + "eval_compot_NUM_probability": 0.9999800026416779, + "eval_compot_inside_bbox": 0.4444444477558136, + "eval_compot_loss": 3.019566059112549, + "eval_compot_loss_ce": 0.005697740241885185, + "eval_compot_loss_iou": 0.94970703125, + "eval_compot_loss_num": 0.22723388671875, + "eval_compot_loss_xval": 3.037109375, + "eval_compot_runtime": 70.2178, + "eval_compot_samples_per_second": 0.712, + "eval_compot_steps_per_second": 0.028, + "num_input_tokens_seen": 468225292, + "step": 7000 + }, + { + "epoch": 0.7943262411347518, + "loss": 2.968013286590576, + "loss_ce": 0.007075721397995949, + "loss_iou": 0.921875, + "loss_num": 0.22265625, + "loss_xval": 2.96875, + "num_input_tokens_seen": 468225292, + "step": 7000 + }, + { + "epoch": 0.7944397163120568, + "grad_norm": 92.24530029296875, + "learning_rate": 5e-05, + "loss": 1.2006, + "num_input_tokens_seen": 468292308, + "step": 7001 + }, + { + "epoch": 0.7944397163120568, + "loss": 1.1414271593093872, + "loss_ce": 0.00763811357319355, + "loss_iou": 0.482421875, + "loss_num": 0.03369140625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 468292308, + "step": 7001 + }, + { + "epoch": 0.7945531914893617, + "grad_norm": 17.425188064575195, + "learning_rate": 5e-05, + "loss": 1.1305, + "num_input_tokens_seen": 468358908, + "step": 7002 + }, + { + "epoch": 0.7945531914893617, + "loss": 1.110703945159912, + "loss_ce": 0.004258552100509405, + "loss_iou": 0.455078125, + "loss_num": 0.03955078125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 468358908, + "step": 7002 + }, + { + "epoch": 0.7946666666666666, + "grad_norm": 21.003032684326172, + "learning_rate": 5e-05, + "loss": 1.0693, + "num_input_tokens_seen": 468426340, + "step": 7003 + }, + { + "epoch": 0.7946666666666666, + "loss": 1.0545096397399902, + "loss_ce": 0.007146351970732212, + "loss_iou": 0.400390625, + "loss_num": 0.04931640625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 468426340, + "step": 7003 + }, + { + "epoch": 0.7947801418439716, + "grad_norm": 14.024166107177734, + "learning_rate": 5e-05, + "loss": 1.1807, + "num_input_tokens_seen": 468493548, + "step": 7004 + }, + { + "epoch": 0.7947801418439716, + "loss": 1.191277265548706, + "loss_ce": 0.008660096675157547, + "loss_iou": 0.427734375, + "loss_num": 0.06591796875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 468493548, + "step": 7004 + }, + { + "epoch": 0.7948936170212766, + "grad_norm": 28.718366622924805, + "learning_rate": 5e-05, + "loss": 1.1277, + "num_input_tokens_seen": 468561340, + "step": 7005 + }, + { + "epoch": 0.7948936170212766, + "loss": 1.2759259939193726, + "loss_ce": 0.005906486883759499, + "loss_iou": 0.515625, + "loss_num": 0.047607421875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 468561340, + "step": 7005 + }, + { + "epoch": 0.7950070921985816, + "grad_norm": 30.6167049407959, + "learning_rate": 5e-05, + "loss": 1.4095, + "num_input_tokens_seen": 468628772, + "step": 7006 + }, + { + "epoch": 0.7950070921985816, + "loss": 1.3489184379577637, + "loss_ce": 0.006145020015537739, + "loss_iou": 0.51171875, + "loss_num": 0.0634765625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 468628772, + "step": 7006 + }, + { + "epoch": 0.7951205673758865, + "grad_norm": 15.011962890625, + "learning_rate": 5e-05, + "loss": 1.236, + "num_input_tokens_seen": 468696884, + "step": 7007 + }, + { + "epoch": 0.7951205673758865, + "loss": 1.2613697052001953, + "loss_ce": 0.006486857309937477, + "loss_iou": 0.490234375, + "loss_num": 0.054443359375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 468696884, + "step": 7007 + }, + { + "epoch": 0.7952340425531915, + "grad_norm": 25.902658462524414, + "learning_rate": 5e-05, + "loss": 1.027, + "num_input_tokens_seen": 468763392, + "step": 7008 + }, + { + "epoch": 0.7952340425531915, + "loss": 0.9536547064781189, + "loss_ce": 0.005900831427425146, + "loss_iou": 0.3671875, + "loss_num": 0.042236328125, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 468763392, + "step": 7008 + }, + { + "epoch": 0.7953475177304965, + "grad_norm": 20.32453727722168, + "learning_rate": 5e-05, + "loss": 1.0869, + "num_input_tokens_seen": 468831368, + "step": 7009 + }, + { + "epoch": 0.7953475177304965, + "loss": 1.171339750289917, + "loss_ce": 0.009230410680174828, + "loss_iou": 0.458984375, + "loss_num": 0.049072265625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 468831368, + "step": 7009 + }, + { + "epoch": 0.7954609929078014, + "grad_norm": 23.173233032226562, + "learning_rate": 5e-05, + "loss": 1.132, + "num_input_tokens_seen": 468898272, + "step": 7010 + }, + { + "epoch": 0.7954609929078014, + "loss": 0.971237063407898, + "loss_ce": 0.006881595589220524, + "loss_iou": 0.40625, + "loss_num": 0.03076171875, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 468898272, + "step": 7010 + }, + { + "epoch": 0.7955744680851063, + "grad_norm": 20.1129150390625, + "learning_rate": 5e-05, + "loss": 0.8569, + "num_input_tokens_seen": 468964724, + "step": 7011 + }, + { + "epoch": 0.7955744680851063, + "loss": 0.7818714380264282, + "loss_ce": 0.005199112929403782, + "loss_iou": 0.310546875, + "loss_num": 0.031494140625, + "loss_xval": 0.77734375, + "num_input_tokens_seen": 468964724, + "step": 7011 + }, + { + "epoch": 0.7956879432624113, + "grad_norm": 21.317155838012695, + "learning_rate": 5e-05, + "loss": 1.1016, + "num_input_tokens_seen": 469031652, + "step": 7012 + }, + { + "epoch": 0.7956879432624113, + "loss": 1.08149254322052, + "loss_ce": 0.007273838855326176, + "loss_iou": 0.435546875, + "loss_num": 0.04052734375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 469031652, + "step": 7012 + }, + { + "epoch": 0.7958014184397163, + "grad_norm": 45.292545318603516, + "learning_rate": 5e-05, + "loss": 1.2278, + "num_input_tokens_seen": 469098420, + "step": 7013 + }, + { + "epoch": 0.7958014184397163, + "loss": 1.2812299728393555, + "loss_ce": 0.009745634160935879, + "loss_iou": 0.5390625, + "loss_num": 0.0390625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 469098420, + "step": 7013 + }, + { + "epoch": 0.7959148936170213, + "grad_norm": 102.80146789550781, + "learning_rate": 5e-05, + "loss": 1.3764, + "num_input_tokens_seen": 469166100, + "step": 7014 + }, + { + "epoch": 0.7959148936170213, + "loss": 1.5222712755203247, + "loss_ce": 0.010552538558840752, + "loss_iou": 0.62890625, + "loss_num": 0.05078125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 469166100, + "step": 7014 + }, + { + "epoch": 0.7960283687943263, + "grad_norm": 47.78632736206055, + "learning_rate": 5e-05, + "loss": 1.3479, + "num_input_tokens_seen": 469233224, + "step": 7015 + }, + { + "epoch": 0.7960283687943263, + "loss": 1.2493430376052856, + "loss_ce": 0.0032492303289473057, + "loss_iou": 0.5234375, + "loss_num": 0.039306640625, + "loss_xval": 1.25, + "num_input_tokens_seen": 469233224, + "step": 7015 + }, + { + "epoch": 0.7961418439716312, + "grad_norm": 10.726040840148926, + "learning_rate": 5e-05, + "loss": 1.2603, + "num_input_tokens_seen": 469299908, + "step": 7016 + }, + { + "epoch": 0.7961418439716312, + "loss": 1.0942208766937256, + "loss_ce": 0.006818500347435474, + "loss_iou": 0.38671875, + "loss_num": 0.0625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 469299908, + "step": 7016 + }, + { + "epoch": 0.7962553191489362, + "grad_norm": 18.130990982055664, + "learning_rate": 5e-05, + "loss": 1.0179, + "num_input_tokens_seen": 469366140, + "step": 7017 + }, + { + "epoch": 0.7962553191489362, + "loss": 1.042539119720459, + "loss_ce": 0.007871169596910477, + "loss_iou": 0.390625, + "loss_num": 0.051025390625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 469366140, + "step": 7017 + }, + { + "epoch": 0.7963687943262411, + "grad_norm": 22.52841567993164, + "learning_rate": 5e-05, + "loss": 1.118, + "num_input_tokens_seen": 469431880, + "step": 7018 + }, + { + "epoch": 0.7963687943262411, + "loss": 1.2210842370986938, + "loss_ce": 0.007705379743129015, + "loss_iou": 0.50390625, + "loss_num": 0.041015625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 469431880, + "step": 7018 + }, + { + "epoch": 0.7964822695035461, + "grad_norm": 24.565855026245117, + "learning_rate": 5e-05, + "loss": 1.2142, + "num_input_tokens_seen": 469498416, + "step": 7019 + }, + { + "epoch": 0.7964822695035461, + "loss": 1.1376694440841675, + "loss_ce": 0.005833543837070465, + "loss_iou": 0.48828125, + "loss_num": 0.0311279296875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 469498416, + "step": 7019 + }, + { + "epoch": 0.796595744680851, + "grad_norm": 31.50313949584961, + "learning_rate": 5e-05, + "loss": 1.2863, + "num_input_tokens_seen": 469565136, + "step": 7020 + }, + { + "epoch": 0.796595744680851, + "loss": 1.1467700004577637, + "loss_ce": 0.008586360141634941, + "loss_iou": 0.486328125, + "loss_num": 0.03271484375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 469565136, + "step": 7020 + }, + { + "epoch": 0.796709219858156, + "grad_norm": 24.707027435302734, + "learning_rate": 5e-05, + "loss": 1.5534, + "num_input_tokens_seen": 469632772, + "step": 7021 + }, + { + "epoch": 0.796709219858156, + "loss": 1.5953545570373535, + "loss_ce": 0.007464010734111071, + "loss_iou": 0.6484375, + "loss_num": 0.05908203125, + "loss_xval": 1.5859375, + "num_input_tokens_seen": 469632772, + "step": 7021 + }, + { + "epoch": 0.796822695035461, + "grad_norm": 20.501976013183594, + "learning_rate": 5e-05, + "loss": 1.1377, + "num_input_tokens_seen": 469699384, + "step": 7022 + }, + { + "epoch": 0.796822695035461, + "loss": 1.2233978509902954, + "loss_ce": 0.007089204154908657, + "loss_iou": 0.51953125, + "loss_num": 0.035888671875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 469699384, + "step": 7022 + }, + { + "epoch": 0.796936170212766, + "grad_norm": 29.319326400756836, + "learning_rate": 5e-05, + "loss": 1.193, + "num_input_tokens_seen": 469766116, + "step": 7023 + }, + { + "epoch": 0.796936170212766, + "loss": 1.1940034627914429, + "loss_ce": 0.014315923675894737, + "loss_iou": 0.44921875, + "loss_num": 0.0556640625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 469766116, + "step": 7023 + }, + { + "epoch": 0.797049645390071, + "grad_norm": 42.93419647216797, + "learning_rate": 5e-05, + "loss": 1.2175, + "num_input_tokens_seen": 469833412, + "step": 7024 + }, + { + "epoch": 0.797049645390071, + "loss": 1.1831657886505127, + "loss_ce": 0.0073845842853188515, + "loss_iou": 0.4765625, + "loss_num": 0.0439453125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 469833412, + "step": 7024 + }, + { + "epoch": 0.7971631205673759, + "grad_norm": 446.7820129394531, + "learning_rate": 5e-05, + "loss": 1.3597, + "num_input_tokens_seen": 469899880, + "step": 7025 + }, + { + "epoch": 0.7971631205673759, + "loss": 1.3096778392791748, + "loss_ce": 0.008408306166529655, + "loss_iou": 0.52734375, + "loss_num": 0.049560546875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 469899880, + "step": 7025 + }, + { + "epoch": 0.7972765957446809, + "grad_norm": 20.22484016418457, + "learning_rate": 5e-05, + "loss": 1.0266, + "num_input_tokens_seen": 469966200, + "step": 7026 + }, + { + "epoch": 0.7972765957446809, + "loss": 1.0897456407546997, + "loss_ce": 0.0032588192261755466, + "loss_iou": 0.421875, + "loss_num": 0.04833984375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 469966200, + "step": 7026 + }, + { + "epoch": 0.7973900709219858, + "grad_norm": 32.75563430786133, + "learning_rate": 5e-05, + "loss": 1.1045, + "num_input_tokens_seen": 470033460, + "step": 7027 + }, + { + "epoch": 0.7973900709219858, + "loss": 1.040750503540039, + "loss_ce": 0.0046177152544260025, + "loss_iou": 0.443359375, + "loss_num": 0.029541015625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 470033460, + "step": 7027 + }, + { + "epoch": 0.7975035460992907, + "grad_norm": 39.03972625732422, + "learning_rate": 5e-05, + "loss": 1.263, + "num_input_tokens_seen": 470100796, + "step": 7028 + }, + { + "epoch": 0.7975035460992907, + "loss": 1.3955864906311035, + "loss_ce": 0.005449853837490082, + "loss_iou": 0.53515625, + "loss_num": 0.06396484375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 470100796, + "step": 7028 + }, + { + "epoch": 0.7976170212765957, + "grad_norm": 28.930273056030273, + "learning_rate": 5e-05, + "loss": 1.3057, + "num_input_tokens_seen": 470167612, + "step": 7029 + }, + { + "epoch": 0.7976170212765957, + "loss": 1.4982540607452393, + "loss_ce": 0.008019670844078064, + "loss_iou": 0.60546875, + "loss_num": 0.05517578125, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 470167612, + "step": 7029 + }, + { + "epoch": 0.7977304964539007, + "grad_norm": 27.297428131103516, + "learning_rate": 5e-05, + "loss": 1.0031, + "num_input_tokens_seen": 470233604, + "step": 7030 + }, + { + "epoch": 0.7977304964539007, + "loss": 0.7239985466003418, + "loss_ce": 0.005462199449539185, + "loss_iou": 0.31640625, + "loss_num": 0.0174560546875, + "loss_xval": 0.71875, + "num_input_tokens_seen": 470233604, + "step": 7030 + }, + { + "epoch": 0.7978439716312057, + "grad_norm": 26.04295539855957, + "learning_rate": 5e-05, + "loss": 1.3772, + "num_input_tokens_seen": 470300948, + "step": 7031 + }, + { + "epoch": 0.7978439716312057, + "loss": 1.2841113805770874, + "loss_ce": 0.009209039621055126, + "loss_iou": 0.51953125, + "loss_num": 0.047607421875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 470300948, + "step": 7031 + }, + { + "epoch": 0.7979574468085107, + "grad_norm": 23.84285545349121, + "learning_rate": 5e-05, + "loss": 1.135, + "num_input_tokens_seen": 470367388, + "step": 7032 + }, + { + "epoch": 0.7979574468085107, + "loss": 1.2046998739242554, + "loss_ce": 0.004016289487481117, + "loss_iou": 0.455078125, + "loss_num": 0.058349609375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 470367388, + "step": 7032 + }, + { + "epoch": 0.7980709219858156, + "grad_norm": 26.07648277282715, + "learning_rate": 5e-05, + "loss": 1.2381, + "num_input_tokens_seen": 470434056, + "step": 7033 + }, + { + "epoch": 0.7980709219858156, + "loss": 1.0871931314468384, + "loss_ce": 0.008579801768064499, + "loss_iou": 0.42578125, + "loss_num": 0.04541015625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 470434056, + "step": 7033 + }, + { + "epoch": 0.7981843971631206, + "grad_norm": 18.434782028198242, + "learning_rate": 5e-05, + "loss": 0.9976, + "num_input_tokens_seen": 470501324, + "step": 7034 + }, + { + "epoch": 0.7981843971631206, + "loss": 1.1426136493682861, + "loss_ce": 0.004918312653899193, + "loss_iou": 0.4609375, + "loss_num": 0.04248046875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 470501324, + "step": 7034 + }, + { + "epoch": 0.7982978723404255, + "grad_norm": 20.039464950561523, + "learning_rate": 5e-05, + "loss": 1.1111, + "num_input_tokens_seen": 470568296, + "step": 7035 + }, + { + "epoch": 0.7982978723404255, + "loss": 1.2167625427246094, + "loss_ce": 0.006069242022931576, + "loss_iou": 0.484375, + "loss_num": 0.048095703125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 470568296, + "step": 7035 + }, + { + "epoch": 0.7984113475177305, + "grad_norm": 70.79693603515625, + "learning_rate": 5e-05, + "loss": 1.2012, + "num_input_tokens_seen": 470635096, + "step": 7036 + }, + { + "epoch": 0.7984113475177305, + "loss": 1.1058322191238403, + "loss_ce": 0.005246280692517757, + "loss_iou": 0.44140625, + "loss_num": 0.044189453125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 470635096, + "step": 7036 + }, + { + "epoch": 0.7985248226950354, + "grad_norm": 45.702667236328125, + "learning_rate": 5e-05, + "loss": 1.151, + "num_input_tokens_seen": 470703148, + "step": 7037 + }, + { + "epoch": 0.7985248226950354, + "loss": 1.1705360412597656, + "loss_ce": 0.013797836378216743, + "loss_iou": 0.48828125, + "loss_num": 0.03662109375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 470703148, + "step": 7037 + }, + { + "epoch": 0.7986382978723404, + "grad_norm": 36.15317916870117, + "learning_rate": 5e-05, + "loss": 1.2475, + "num_input_tokens_seen": 470771620, + "step": 7038 + }, + { + "epoch": 0.7986382978723404, + "loss": 1.0806705951690674, + "loss_ce": 0.00401047058403492, + "loss_iou": 0.46875, + "loss_num": 0.028076171875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 470771620, + "step": 7038 + }, + { + "epoch": 0.7987517730496454, + "grad_norm": 30.785249710083008, + "learning_rate": 5e-05, + "loss": 1.0734, + "num_input_tokens_seen": 470837824, + "step": 7039 + }, + { + "epoch": 0.7987517730496454, + "loss": 1.225407361984253, + "loss_ce": 0.005192548502236605, + "loss_iou": 0.4921875, + "loss_num": 0.046875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 470837824, + "step": 7039 + }, + { + "epoch": 0.7988652482269504, + "grad_norm": 28.39033317565918, + "learning_rate": 5e-05, + "loss": 1.0975, + "num_input_tokens_seen": 470903676, + "step": 7040 + }, + { + "epoch": 0.7988652482269504, + "loss": 1.1221117973327637, + "loss_ce": 0.0029711127281188965, + "loss_iou": 0.490234375, + "loss_num": 0.0277099609375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 470903676, + "step": 7040 + }, + { + "epoch": 0.7989787234042554, + "grad_norm": 28.56953239440918, + "learning_rate": 5e-05, + "loss": 1.2078, + "num_input_tokens_seen": 470969916, + "step": 7041 + }, + { + "epoch": 0.7989787234042554, + "loss": 1.2729803323745728, + "loss_ce": 0.006867039483040571, + "loss_iou": 0.48828125, + "loss_num": 0.057861328125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 470969916, + "step": 7041 + }, + { + "epoch": 0.7990921985815603, + "grad_norm": 28.579078674316406, + "learning_rate": 5e-05, + "loss": 1.161, + "num_input_tokens_seen": 471036560, + "step": 7042 + }, + { + "epoch": 0.7990921985815603, + "loss": 1.0654828548431396, + "loss_ce": 0.005424328614026308, + "loss_iou": 0.44921875, + "loss_num": 0.032470703125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 471036560, + "step": 7042 + }, + { + "epoch": 0.7992056737588652, + "grad_norm": 29.153669357299805, + "learning_rate": 5e-05, + "loss": 1.1039, + "num_input_tokens_seen": 471103496, + "step": 7043 + }, + { + "epoch": 0.7992056737588652, + "loss": 0.9802036881446838, + "loss_ce": 0.006082572974264622, + "loss_iou": 0.39453125, + "loss_num": 0.036865234375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 471103496, + "step": 7043 + }, + { + "epoch": 0.7993191489361702, + "grad_norm": 27.78581428527832, + "learning_rate": 5e-05, + "loss": 1.2232, + "num_input_tokens_seen": 471170360, + "step": 7044 + }, + { + "epoch": 0.7993191489361702, + "loss": 1.3106778860092163, + "loss_ce": 0.007088928949087858, + "loss_iou": 0.51171875, + "loss_num": 0.056396484375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 471170360, + "step": 7044 + }, + { + "epoch": 0.7994326241134752, + "grad_norm": 18.999605178833008, + "learning_rate": 5e-05, + "loss": 1.2432, + "num_input_tokens_seen": 471237308, + "step": 7045 + }, + { + "epoch": 0.7994326241134752, + "loss": 1.0977314710617065, + "loss_ce": 0.004958058707416058, + "loss_iou": 0.43359375, + "loss_num": 0.044677734375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 471237308, + "step": 7045 + }, + { + "epoch": 0.7995460992907801, + "grad_norm": 22.577919006347656, + "learning_rate": 5e-05, + "loss": 1.1323, + "num_input_tokens_seen": 471303768, + "step": 7046 + }, + { + "epoch": 0.7995460992907801, + "loss": 1.069158911705017, + "loss_ce": 0.009954828768968582, + "loss_iou": 0.3984375, + "loss_num": 0.052490234375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 471303768, + "step": 7046 + }, + { + "epoch": 0.7996595744680851, + "grad_norm": 32.83993911743164, + "learning_rate": 5e-05, + "loss": 1.1703, + "num_input_tokens_seen": 471370332, + "step": 7047 + }, + { + "epoch": 0.7996595744680851, + "loss": 1.1262449026107788, + "loss_ce": 0.006371902301907539, + "loss_iou": 0.3984375, + "loss_num": 0.064453125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 471370332, + "step": 7047 + }, + { + "epoch": 0.7997730496453901, + "grad_norm": 38.778480529785156, + "learning_rate": 5e-05, + "loss": 1.3106, + "num_input_tokens_seen": 471437004, + "step": 7048 + }, + { + "epoch": 0.7997730496453901, + "loss": 1.0640661716461182, + "loss_ce": 0.005960681941360235, + "loss_iou": 0.4296875, + "loss_num": 0.039794921875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 471437004, + "step": 7048 + }, + { + "epoch": 0.7998865248226951, + "grad_norm": 36.164363861083984, + "learning_rate": 5e-05, + "loss": 1.1999, + "num_input_tokens_seen": 471503936, + "step": 7049 + }, + { + "epoch": 0.7998865248226951, + "loss": 1.2209669351577759, + "loss_ce": 0.005634896457195282, + "loss_iou": 0.48828125, + "loss_num": 0.047607421875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 471503936, + "step": 7049 + }, + { + "epoch": 0.8, + "grad_norm": 34.113250732421875, + "learning_rate": 5e-05, + "loss": 1.1408, + "num_input_tokens_seen": 471571088, + "step": 7050 + }, + { + "epoch": 0.8, + "loss": 1.0498642921447754, + "loss_ce": 0.003721698885783553, + "loss_iou": 0.443359375, + "loss_num": 0.031982421875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 471571088, + "step": 7050 + }, + { + "epoch": 0.8001134751773049, + "grad_norm": 22.28402328491211, + "learning_rate": 5e-05, + "loss": 1.1816, + "num_input_tokens_seen": 471638896, + "step": 7051 + }, + { + "epoch": 0.8001134751773049, + "loss": 1.200296401977539, + "loss_ce": 0.006448688916862011, + "loss_iou": 0.474609375, + "loss_num": 0.049072265625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 471638896, + "step": 7051 + }, + { + "epoch": 0.8002269503546099, + "grad_norm": 19.064647674560547, + "learning_rate": 5e-05, + "loss": 1.1082, + "num_input_tokens_seen": 471705532, + "step": 7052 + }, + { + "epoch": 0.8002269503546099, + "loss": 1.0448057651519775, + "loss_ce": 0.009527448564767838, + "loss_iou": 0.42578125, + "loss_num": 0.037109375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 471705532, + "step": 7052 + }, + { + "epoch": 0.8003404255319149, + "grad_norm": 24.410634994506836, + "learning_rate": 5e-05, + "loss": 0.7966, + "num_input_tokens_seen": 471771876, + "step": 7053 + }, + { + "epoch": 0.8003404255319149, + "loss": 0.8403922915458679, + "loss_ce": 0.009825900197029114, + "loss_iou": 0.345703125, + "loss_num": 0.02783203125, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 471771876, + "step": 7053 + }, + { + "epoch": 0.8004539007092198, + "grad_norm": 26.936492919921875, + "learning_rate": 5e-05, + "loss": 1.1384, + "num_input_tokens_seen": 471838068, + "step": 7054 + }, + { + "epoch": 0.8004539007092198, + "loss": 1.0918477773666382, + "loss_ce": 0.00444541871547699, + "loss_iou": 0.45703125, + "loss_num": 0.034912109375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 471838068, + "step": 7054 + }, + { + "epoch": 0.8005673758865248, + "grad_norm": 37.08091735839844, + "learning_rate": 5e-05, + "loss": 1.1271, + "num_input_tokens_seen": 471904996, + "step": 7055 + }, + { + "epoch": 0.8005673758865248, + "loss": 0.9626678228378296, + "loss_ce": 0.005636586342006922, + "loss_iou": 0.431640625, + "loss_num": 0.0185546875, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 471904996, + "step": 7055 + }, + { + "epoch": 0.8006808510638298, + "grad_norm": 31.73387336730957, + "learning_rate": 5e-05, + "loss": 1.4803, + "num_input_tokens_seen": 471972412, + "step": 7056 + }, + { + "epoch": 0.8006808510638298, + "loss": 1.308060884475708, + "loss_ce": 0.008256202563643456, + "loss_iou": 0.5234375, + "loss_num": 0.05078125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 471972412, + "step": 7056 + }, + { + "epoch": 0.8007943262411348, + "grad_norm": 21.060075759887695, + "learning_rate": 5e-05, + "loss": 1.0209, + "num_input_tokens_seen": 472039936, + "step": 7057 + }, + { + "epoch": 0.8007943262411348, + "loss": 1.066697597503662, + "loss_ce": 0.0041975281201303005, + "loss_iou": 0.4453125, + "loss_num": 0.034423828125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 472039936, + "step": 7057 + }, + { + "epoch": 0.8009078014184398, + "grad_norm": 15.248111724853516, + "learning_rate": 5e-05, + "loss": 1.1486, + "num_input_tokens_seen": 472106748, + "step": 7058 + }, + { + "epoch": 0.8009078014184398, + "loss": 1.0863349437713623, + "loss_ce": 0.009918847121298313, + "loss_iou": 0.4296875, + "loss_num": 0.04345703125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 472106748, + "step": 7058 + }, + { + "epoch": 0.8010212765957447, + "grad_norm": 22.175642013549805, + "learning_rate": 5e-05, + "loss": 1.2422, + "num_input_tokens_seen": 472174000, + "step": 7059 + }, + { + "epoch": 0.8010212765957447, + "loss": 1.2009963989257812, + "loss_ce": 0.007637061178684235, + "loss_iou": 0.46484375, + "loss_num": 0.052734375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 472174000, + "step": 7059 + }, + { + "epoch": 0.8011347517730496, + "grad_norm": 36.69337463378906, + "learning_rate": 5e-05, + "loss": 1.0418, + "num_input_tokens_seen": 472240968, + "step": 7060 + }, + { + "epoch": 0.8011347517730496, + "loss": 0.9692733287811279, + "loss_ce": 0.006871029734611511, + "loss_iou": 0.40625, + "loss_num": 0.0303955078125, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 472240968, + "step": 7060 + }, + { + "epoch": 0.8012482269503546, + "grad_norm": 45.76145935058594, + "learning_rate": 5e-05, + "loss": 1.2472, + "num_input_tokens_seen": 472307244, + "step": 7061 + }, + { + "epoch": 0.8012482269503546, + "loss": 1.032327651977539, + "loss_ce": 0.0065708160400390625, + "loss_iou": 0.443359375, + "loss_num": 0.027587890625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 472307244, + "step": 7061 + }, + { + "epoch": 0.8013617021276596, + "grad_norm": 29.86480140686035, + "learning_rate": 5e-05, + "loss": 1.0351, + "num_input_tokens_seen": 472374484, + "step": 7062 + }, + { + "epoch": 0.8013617021276596, + "loss": 0.9918184280395508, + "loss_ce": 0.005490278825163841, + "loss_iou": 0.43359375, + "loss_num": 0.0240478515625, + "loss_xval": 0.984375, + "num_input_tokens_seen": 472374484, + "step": 7062 + }, + { + "epoch": 0.8014751773049645, + "grad_norm": 23.075206756591797, + "learning_rate": 5e-05, + "loss": 1.1565, + "num_input_tokens_seen": 472442128, + "step": 7063 + }, + { + "epoch": 0.8014751773049645, + "loss": 1.161750316619873, + "loss_ce": 0.006965285167098045, + "loss_iou": 0.4765625, + "loss_num": 0.0400390625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 472442128, + "step": 7063 + }, + { + "epoch": 0.8015886524822695, + "grad_norm": 28.008644104003906, + "learning_rate": 5e-05, + "loss": 1.2489, + "num_input_tokens_seen": 472509692, + "step": 7064 + }, + { + "epoch": 0.8015886524822695, + "loss": 1.12761652469635, + "loss_ce": 0.007010990753769875, + "loss_iou": 0.431640625, + "loss_num": 0.0517578125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 472509692, + "step": 7064 + }, + { + "epoch": 0.8017021276595745, + "grad_norm": 70.2734375, + "learning_rate": 5e-05, + "loss": 1.1593, + "num_input_tokens_seen": 472576516, + "step": 7065 + }, + { + "epoch": 0.8017021276595745, + "loss": 1.1384330987930298, + "loss_ce": 0.007329562678933144, + "loss_iou": 0.439453125, + "loss_num": 0.05078125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 472576516, + "step": 7065 + }, + { + "epoch": 0.8018156028368795, + "grad_norm": 35.6775016784668, + "learning_rate": 5e-05, + "loss": 1.1112, + "num_input_tokens_seen": 472643448, + "step": 7066 + }, + { + "epoch": 0.8018156028368795, + "loss": 1.1018083095550537, + "loss_ce": 0.01001151092350483, + "loss_iou": 0.443359375, + "loss_num": 0.040771484375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 472643448, + "step": 7066 + }, + { + "epoch": 0.8019290780141844, + "grad_norm": 22.467958450317383, + "learning_rate": 5e-05, + "loss": 1.0286, + "num_input_tokens_seen": 472710408, + "step": 7067 + }, + { + "epoch": 0.8019290780141844, + "loss": 0.9675737023353577, + "loss_ce": 0.0068498398177325726, + "loss_iou": 0.359375, + "loss_num": 0.04833984375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 472710408, + "step": 7067 + }, + { + "epoch": 0.8020425531914893, + "grad_norm": 35.36017608642578, + "learning_rate": 5e-05, + "loss": 1.0483, + "num_input_tokens_seen": 472776924, + "step": 7068 + }, + { + "epoch": 0.8020425531914893, + "loss": 1.0305694341659546, + "loss_ce": 0.0063995299860835075, + "loss_iou": 0.41796875, + "loss_num": 0.037353515625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 472776924, + "step": 7068 + }, + { + "epoch": 0.8021560283687943, + "grad_norm": 21.927038192749023, + "learning_rate": 5e-05, + "loss": 1.2355, + "num_input_tokens_seen": 472843912, + "step": 7069 + }, + { + "epoch": 0.8021560283687943, + "loss": 1.127256155014038, + "loss_ce": 0.004453519359230995, + "loss_iou": 0.451171875, + "loss_num": 0.044189453125, + "loss_xval": 1.125, + "num_input_tokens_seen": 472843912, + "step": 7069 + }, + { + "epoch": 0.8022695035460993, + "grad_norm": 23.8034610748291, + "learning_rate": 5e-05, + "loss": 1.0778, + "num_input_tokens_seen": 472910724, + "step": 7070 + }, + { + "epoch": 0.8022695035460993, + "loss": 0.9464956521987915, + "loss_ce": 0.005577689502388239, + "loss_iou": 0.412109375, + "loss_num": 0.023193359375, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 472910724, + "step": 7070 + }, + { + "epoch": 0.8023829787234042, + "grad_norm": 86.8479232788086, + "learning_rate": 5e-05, + "loss": 1.1618, + "num_input_tokens_seen": 472978940, + "step": 7071 + }, + { + "epoch": 0.8023829787234042, + "loss": 1.0200947523117065, + "loss_ce": 0.0034932135604321957, + "loss_iou": 0.44921875, + "loss_num": 0.0238037109375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 472978940, + "step": 7071 + }, + { + "epoch": 0.8024964539007092, + "grad_norm": 42.341278076171875, + "learning_rate": 5e-05, + "loss": 1.3112, + "num_input_tokens_seen": 473045180, + "step": 7072 + }, + { + "epoch": 0.8024964539007092, + "loss": 1.4616682529449463, + "loss_ce": 0.0075666168704628944, + "loss_iou": 0.5703125, + "loss_num": 0.0625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 473045180, + "step": 7072 + }, + { + "epoch": 0.8026099290780142, + "grad_norm": 27.577131271362305, + "learning_rate": 5e-05, + "loss": 1.2216, + "num_input_tokens_seen": 473113604, + "step": 7073 + }, + { + "epoch": 0.8026099290780142, + "loss": 1.3703209161758423, + "loss_ce": 0.007039668038487434, + "loss_iou": 0.5546875, + "loss_num": 0.05029296875, + "loss_xval": 1.359375, + "num_input_tokens_seen": 473113604, + "step": 7073 + }, + { + "epoch": 0.8027234042553192, + "grad_norm": 24.497726440429688, + "learning_rate": 5e-05, + "loss": 1.0332, + "num_input_tokens_seen": 473180400, + "step": 7074 + }, + { + "epoch": 0.8027234042553192, + "loss": 1.1204562187194824, + "loss_ce": 0.006442530080676079, + "loss_iou": 0.423828125, + "loss_num": 0.05322265625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 473180400, + "step": 7074 + }, + { + "epoch": 0.8028368794326242, + "grad_norm": 48.941986083984375, + "learning_rate": 5e-05, + "loss": 1.1355, + "num_input_tokens_seen": 473247120, + "step": 7075 + }, + { + "epoch": 0.8028368794326242, + "loss": 1.1684579849243164, + "loss_ce": 0.005372079089283943, + "loss_iou": 0.474609375, + "loss_num": 0.04248046875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 473247120, + "step": 7075 + }, + { + "epoch": 0.802950354609929, + "grad_norm": 60.91138458251953, + "learning_rate": 5e-05, + "loss": 1.2098, + "num_input_tokens_seen": 473312992, + "step": 7076 + }, + { + "epoch": 0.802950354609929, + "loss": 1.2499229907989502, + "loss_ce": 0.006331682205200195, + "loss_iou": 0.466796875, + "loss_num": 0.061767578125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 473312992, + "step": 7076 + }, + { + "epoch": 0.803063829787234, + "grad_norm": 32.11311340332031, + "learning_rate": 5e-05, + "loss": 1.2447, + "num_input_tokens_seen": 473380800, + "step": 7077 + }, + { + "epoch": 0.803063829787234, + "loss": 1.3849189281463623, + "loss_ce": 0.006012721918523312, + "loss_iou": 0.53125, + "loss_num": 0.0634765625, + "loss_xval": 1.375, + "num_input_tokens_seen": 473380800, + "step": 7077 + }, + { + "epoch": 0.803177304964539, + "grad_norm": 38.3812141418457, + "learning_rate": 5e-05, + "loss": 1.2792, + "num_input_tokens_seen": 473448008, + "step": 7078 + }, + { + "epoch": 0.803177304964539, + "loss": 1.177103042602539, + "loss_ce": 0.007669423706829548, + "loss_iou": 0.482421875, + "loss_num": 0.041015625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 473448008, + "step": 7078 + }, + { + "epoch": 0.803290780141844, + "grad_norm": 40.186031341552734, + "learning_rate": 5e-05, + "loss": 1.1629, + "num_input_tokens_seen": 473515084, + "step": 7079 + }, + { + "epoch": 0.803290780141844, + "loss": 1.1423591375350952, + "loss_ce": 0.005640408024191856, + "loss_iou": 0.486328125, + "loss_num": 0.032958984375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 473515084, + "step": 7079 + }, + { + "epoch": 0.8034042553191489, + "grad_norm": 28.881671905517578, + "learning_rate": 5e-05, + "loss": 1.1711, + "num_input_tokens_seen": 473581836, + "step": 7080 + }, + { + "epoch": 0.8034042553191489, + "loss": 0.9955777525901794, + "loss_ce": 0.007052386645227671, + "loss_iou": 0.36328125, + "loss_num": 0.052490234375, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 473581836, + "step": 7080 + }, + { + "epoch": 0.8035177304964539, + "grad_norm": 33.07212829589844, + "learning_rate": 5e-05, + "loss": 1.2343, + "num_input_tokens_seen": 473649516, + "step": 7081 + }, + { + "epoch": 0.8035177304964539, + "loss": 1.0003740787506104, + "loss_ce": 0.005745182279497385, + "loss_iou": 0.427734375, + "loss_num": 0.02783203125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 473649516, + "step": 7081 + }, + { + "epoch": 0.8036312056737589, + "grad_norm": 36.651309967041016, + "learning_rate": 5e-05, + "loss": 1.0412, + "num_input_tokens_seen": 473717700, + "step": 7082 + }, + { + "epoch": 0.8036312056737589, + "loss": 0.9778861403465271, + "loss_ce": 0.005229877308011055, + "loss_iou": 0.41796875, + "loss_num": 0.0272216796875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 473717700, + "step": 7082 + }, + { + "epoch": 0.8037446808510639, + "grad_norm": 22.17982292175293, + "learning_rate": 5e-05, + "loss": 1.3366, + "num_input_tokens_seen": 473785092, + "step": 7083 + }, + { + "epoch": 0.8037446808510639, + "loss": 1.3564879894256592, + "loss_ce": 0.0088316909968853, + "loss_iou": 0.53125, + "loss_num": 0.056396484375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 473785092, + "step": 7083 + }, + { + "epoch": 0.8038581560283687, + "grad_norm": 19.099760055541992, + "learning_rate": 5e-05, + "loss": 1.2142, + "num_input_tokens_seen": 473851800, + "step": 7084 + }, + { + "epoch": 0.8038581560283687, + "loss": 1.135730266571045, + "loss_ce": 0.005359205882996321, + "loss_iou": 0.4453125, + "loss_num": 0.04833984375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 473851800, + "step": 7084 + }, + { + "epoch": 0.8039716312056737, + "grad_norm": 27.472646713256836, + "learning_rate": 5e-05, + "loss": 1.5024, + "num_input_tokens_seen": 473919588, + "step": 7085 + }, + { + "epoch": 0.8039716312056737, + "loss": 1.4150053262710571, + "loss_ce": 0.011685071513056755, + "loss_iou": 0.5703125, + "loss_num": 0.05322265625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 473919588, + "step": 7085 + }, + { + "epoch": 0.8040851063829787, + "grad_norm": 48.517425537109375, + "learning_rate": 5e-05, + "loss": 1.0593, + "num_input_tokens_seen": 473986640, + "step": 7086 + }, + { + "epoch": 0.8040851063829787, + "loss": 1.1381652355194092, + "loss_ce": 0.008770684711635113, + "loss_iou": 0.44921875, + "loss_num": 0.046142578125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 473986640, + "step": 7086 + }, + { + "epoch": 0.8041985815602837, + "grad_norm": 37.881717681884766, + "learning_rate": 5e-05, + "loss": 1.2616, + "num_input_tokens_seen": 474051580, + "step": 7087 + }, + { + "epoch": 0.8041985815602837, + "loss": 1.4721375703811646, + "loss_ce": 0.011688346974551678, + "loss_iou": 0.625, + "loss_num": 0.04150390625, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 474051580, + "step": 7087 + }, + { + "epoch": 0.8043120567375887, + "grad_norm": 37.790489196777344, + "learning_rate": 5e-05, + "loss": 1.0853, + "num_input_tokens_seen": 474118344, + "step": 7088 + }, + { + "epoch": 0.8043120567375887, + "loss": 1.3263731002807617, + "loss_ce": 0.007037180941551924, + "loss_iou": 0.51953125, + "loss_num": 0.0556640625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 474118344, + "step": 7088 + }, + { + "epoch": 0.8044255319148936, + "grad_norm": 65.84809875488281, + "learning_rate": 5e-05, + "loss": 1.2187, + "num_input_tokens_seen": 474185292, + "step": 7089 + }, + { + "epoch": 0.8044255319148936, + "loss": 1.3495427370071411, + "loss_ce": 0.00823415257036686, + "loss_iou": 0.515625, + "loss_num": 0.062255859375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 474185292, + "step": 7089 + }, + { + "epoch": 0.8045390070921986, + "grad_norm": 110.11125183105469, + "learning_rate": 5e-05, + "loss": 1.2004, + "num_input_tokens_seen": 474251784, + "step": 7090 + }, + { + "epoch": 0.8045390070921986, + "loss": 1.0809731483459473, + "loss_ce": 0.005777755286544561, + "loss_iou": 0.4609375, + "loss_num": 0.0311279296875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 474251784, + "step": 7090 + }, + { + "epoch": 0.8046524822695036, + "grad_norm": 25.340980529785156, + "learning_rate": 5e-05, + "loss": 1.4581, + "num_input_tokens_seen": 474319712, + "step": 7091 + }, + { + "epoch": 0.8046524822695036, + "loss": 1.6831003427505493, + "loss_ce": 0.009272255003452301, + "loss_iou": 0.640625, + "loss_num": 0.078125, + "loss_xval": 1.671875, + "num_input_tokens_seen": 474319712, + "step": 7091 + }, + { + "epoch": 0.8047659574468085, + "grad_norm": 26.221105575561523, + "learning_rate": 5e-05, + "loss": 1.0799, + "num_input_tokens_seen": 474386836, + "step": 7092 + }, + { + "epoch": 0.8047659574468085, + "loss": 1.1907932758331299, + "loss_ce": 0.006222972646355629, + "loss_iou": 0.47265625, + "loss_num": 0.048095703125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 474386836, + "step": 7092 + }, + { + "epoch": 0.8048794326241134, + "grad_norm": 30.448501586914062, + "learning_rate": 5e-05, + "loss": 1.1412, + "num_input_tokens_seen": 474454192, + "step": 7093 + }, + { + "epoch": 0.8048794326241134, + "loss": 1.1295790672302246, + "loss_ce": 0.004579034633934498, + "loss_iou": 0.49609375, + "loss_num": 0.026611328125, + "loss_xval": 1.125, + "num_input_tokens_seen": 474454192, + "step": 7093 + }, + { + "epoch": 0.8049929078014184, + "grad_norm": 17.40319061279297, + "learning_rate": 5e-05, + "loss": 1.0488, + "num_input_tokens_seen": 474521876, + "step": 7094 + }, + { + "epoch": 0.8049929078014184, + "loss": 1.2576525211334229, + "loss_ce": 0.00911736860871315, + "loss_iou": 0.486328125, + "loss_num": 0.055419921875, + "loss_xval": 1.25, + "num_input_tokens_seen": 474521876, + "step": 7094 + }, + { + "epoch": 0.8051063829787234, + "grad_norm": 16.092548370361328, + "learning_rate": 5e-05, + "loss": 1.1261, + "num_input_tokens_seen": 474588828, + "step": 7095 + }, + { + "epoch": 0.8051063829787234, + "loss": 1.0994532108306885, + "loss_ce": 0.004238399211317301, + "loss_iou": 0.4609375, + "loss_num": 0.034912109375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 474588828, + "step": 7095 + }, + { + "epoch": 0.8052198581560284, + "grad_norm": 21.437828063964844, + "learning_rate": 5e-05, + "loss": 0.9733, + "num_input_tokens_seen": 474655712, + "step": 7096 + }, + { + "epoch": 0.8052198581560284, + "loss": 1.0738317966461182, + "loss_ce": 0.005960657261312008, + "loss_iou": 0.431640625, + "loss_num": 0.041015625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 474655712, + "step": 7096 + }, + { + "epoch": 0.8053333333333333, + "grad_norm": 20.68212127685547, + "learning_rate": 5e-05, + "loss": 1.1258, + "num_input_tokens_seen": 474722184, + "step": 7097 + }, + { + "epoch": 0.8053333333333333, + "loss": 1.1872889995574951, + "loss_ce": 0.007601420860737562, + "loss_iou": 0.435546875, + "loss_num": 0.061767578125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 474722184, + "step": 7097 + }, + { + "epoch": 0.8054468085106383, + "grad_norm": 38.95561599731445, + "learning_rate": 5e-05, + "loss": 1.1317, + "num_input_tokens_seen": 474788764, + "step": 7098 + }, + { + "epoch": 0.8054468085106383, + "loss": 1.2649459838867188, + "loss_ce": 0.009330647997558117, + "loss_iou": 0.4921875, + "loss_num": 0.054443359375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 474788764, + "step": 7098 + }, + { + "epoch": 0.8055602836879433, + "grad_norm": 26.624752044677734, + "learning_rate": 5e-05, + "loss": 1.1343, + "num_input_tokens_seen": 474855196, + "step": 7099 + }, + { + "epoch": 0.8055602836879433, + "loss": 0.889523983001709, + "loss_ce": 0.007199766114354134, + "loss_iou": 0.380859375, + "loss_num": 0.023681640625, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 474855196, + "step": 7099 + }, + { + "epoch": 0.8056737588652483, + "grad_norm": 25.120834350585938, + "learning_rate": 5e-05, + "loss": 1.1106, + "num_input_tokens_seen": 474921640, + "step": 7100 + }, + { + "epoch": 0.8056737588652483, + "loss": 0.9444816708564758, + "loss_ce": 0.007958256639540195, + "loss_iou": 0.35546875, + "loss_num": 0.045166015625, + "loss_xval": 0.9375, + "num_input_tokens_seen": 474921640, + "step": 7100 + }, + { + "epoch": 0.8057872340425531, + "grad_norm": 32.10325622558594, + "learning_rate": 5e-05, + "loss": 1.0299, + "num_input_tokens_seen": 474989156, + "step": 7101 + }, + { + "epoch": 0.8057872340425531, + "loss": 1.0008816719055176, + "loss_ce": 0.004970978479832411, + "loss_iou": 0.388671875, + "loss_num": 0.04345703125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 474989156, + "step": 7101 + }, + { + "epoch": 0.8059007092198581, + "grad_norm": 46.581356048583984, + "learning_rate": 5e-05, + "loss": 1.306, + "num_input_tokens_seen": 475055920, + "step": 7102 + }, + { + "epoch": 0.8059007092198581, + "loss": 1.4445335865020752, + "loss_ce": 0.00849845353513956, + "loss_iou": 0.56640625, + "loss_num": 0.061279296875, + "loss_xval": 1.4375, + "num_input_tokens_seen": 475055920, + "step": 7102 + }, + { + "epoch": 0.8060141843971631, + "grad_norm": 33.62515640258789, + "learning_rate": 5e-05, + "loss": 1.2339, + "num_input_tokens_seen": 475124060, + "step": 7103 + }, + { + "epoch": 0.8060141843971631, + "loss": 0.9374406337738037, + "loss_ce": 0.0026261601597070694, + "loss_iou": 0.400390625, + "loss_num": 0.02685546875, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 475124060, + "step": 7103 + }, + { + "epoch": 0.8061276595744681, + "grad_norm": 39.34568786621094, + "learning_rate": 5e-05, + "loss": 1.4156, + "num_input_tokens_seen": 475191760, + "step": 7104 + }, + { + "epoch": 0.8061276595744681, + "loss": 1.398822546005249, + "loss_ce": 0.010150641202926636, + "loss_iou": 0.5546875, + "loss_num": 0.05517578125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 475191760, + "step": 7104 + }, + { + "epoch": 0.8062411347517731, + "grad_norm": 44.8577995300293, + "learning_rate": 5e-05, + "loss": 1.2562, + "num_input_tokens_seen": 475258060, + "step": 7105 + }, + { + "epoch": 0.8062411347517731, + "loss": 1.0564924478530884, + "loss_ce": 0.01340164989233017, + "loss_iou": 0.431640625, + "loss_num": 0.036376953125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 475258060, + "step": 7105 + }, + { + "epoch": 0.806354609929078, + "grad_norm": 29.810791015625, + "learning_rate": 5e-05, + "loss": 1.2687, + "num_input_tokens_seen": 475324996, + "step": 7106 + }, + { + "epoch": 0.806354609929078, + "loss": 1.3286073207855225, + "loss_ce": 0.0053651598282158375, + "loss_iou": 0.546875, + "loss_num": 0.0458984375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 475324996, + "step": 7106 + }, + { + "epoch": 0.806468085106383, + "grad_norm": 39.123863220214844, + "learning_rate": 5e-05, + "loss": 1.2024, + "num_input_tokens_seen": 475392216, + "step": 7107 + }, + { + "epoch": 0.806468085106383, + "loss": 1.2390000820159912, + "loss_ce": 0.009996173903346062, + "loss_iou": 0.484375, + "loss_num": 0.0517578125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 475392216, + "step": 7107 + }, + { + "epoch": 0.806581560283688, + "grad_norm": 13.755232810974121, + "learning_rate": 5e-05, + "loss": 1.0318, + "num_input_tokens_seen": 475459704, + "step": 7108 + }, + { + "epoch": 0.806581560283688, + "loss": 1.061144232749939, + "loss_ce": 0.0076773762702941895, + "loss_iou": 0.408203125, + "loss_num": 0.04736328125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 475459704, + "step": 7108 + }, + { + "epoch": 0.8066950354609929, + "grad_norm": 18.885461807250977, + "learning_rate": 5e-05, + "loss": 0.9531, + "num_input_tokens_seen": 475525640, + "step": 7109 + }, + { + "epoch": 0.8066950354609929, + "loss": 0.9644181728363037, + "loss_ce": 0.0037247762084007263, + "loss_iou": 0.408203125, + "loss_num": 0.02880859375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 475525640, + "step": 7109 + }, + { + "epoch": 0.8068085106382978, + "grad_norm": 26.565126419067383, + "learning_rate": 5e-05, + "loss": 1.1084, + "num_input_tokens_seen": 475591692, + "step": 7110 + }, + { + "epoch": 0.8068085106382978, + "loss": 1.1297295093536377, + "loss_ce": 0.004973609931766987, + "loss_iou": 0.44140625, + "loss_num": 0.048095703125, + "loss_xval": 1.125, + "num_input_tokens_seen": 475591692, + "step": 7110 + }, + { + "epoch": 0.8069219858156028, + "grad_norm": 77.23392486572266, + "learning_rate": 5e-05, + "loss": 1.3105, + "num_input_tokens_seen": 475658884, + "step": 7111 + }, + { + "epoch": 0.8069219858156028, + "loss": 1.3379583358764648, + "loss_ce": 0.010321654379367828, + "loss_iou": 0.56640625, + "loss_num": 0.03857421875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 475658884, + "step": 7111 + }, + { + "epoch": 0.8070354609929078, + "grad_norm": 32.40888595581055, + "learning_rate": 5e-05, + "loss": 1.155, + "num_input_tokens_seen": 475725676, + "step": 7112 + }, + { + "epoch": 0.8070354609929078, + "loss": 1.055798053741455, + "loss_ce": 0.006969960872083902, + "loss_iou": 0.439453125, + "loss_num": 0.03369140625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 475725676, + "step": 7112 + }, + { + "epoch": 0.8071489361702128, + "grad_norm": 10.276714324951172, + "learning_rate": 5e-05, + "loss": 1.16, + "num_input_tokens_seen": 475792556, + "step": 7113 + }, + { + "epoch": 0.8071489361702128, + "loss": 1.1500942707061768, + "loss_ce": 0.005074799060821533, + "loss_iou": 0.470703125, + "loss_num": 0.0400390625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 475792556, + "step": 7113 + }, + { + "epoch": 0.8072624113475177, + "grad_norm": 19.611881256103516, + "learning_rate": 5e-05, + "loss": 1.1357, + "num_input_tokens_seen": 475859360, + "step": 7114 + }, + { + "epoch": 0.8072624113475177, + "loss": 1.1932919025421143, + "loss_ce": 0.010186366736888885, + "loss_iou": 0.43359375, + "loss_num": 0.06298828125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 475859360, + "step": 7114 + }, + { + "epoch": 0.8073758865248227, + "grad_norm": 40.5567741394043, + "learning_rate": 5e-05, + "loss": 1.1916, + "num_input_tokens_seen": 475926396, + "step": 7115 + }, + { + "epoch": 0.8073758865248227, + "loss": 1.3802986145019531, + "loss_ce": 0.007740022614598274, + "loss_iou": 0.5234375, + "loss_num": 0.06591796875, + "loss_xval": 1.375, + "num_input_tokens_seen": 475926396, + "step": 7115 + }, + { + "epoch": 0.8074893617021277, + "grad_norm": 31.67177391052246, + "learning_rate": 5e-05, + "loss": 1.138, + "num_input_tokens_seen": 475992788, + "step": 7116 + }, + { + "epoch": 0.8074893617021277, + "loss": 1.2903854846954346, + "loss_ce": 0.007182331755757332, + "loss_iou": 0.51171875, + "loss_num": 0.052490234375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 475992788, + "step": 7116 + }, + { + "epoch": 0.8076028368794326, + "grad_norm": 25.55389976501465, + "learning_rate": 5e-05, + "loss": 1.3277, + "num_input_tokens_seen": 476059488, + "step": 7117 + }, + { + "epoch": 0.8076028368794326, + "loss": 1.4186567068099976, + "loss_ce": 0.006547313649207354, + "loss_iou": 0.58984375, + "loss_num": 0.04541015625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 476059488, + "step": 7117 + }, + { + "epoch": 0.8077163120567376, + "grad_norm": 16.46331787109375, + "learning_rate": 5e-05, + "loss": 1.2007, + "num_input_tokens_seen": 476127148, + "step": 7118 + }, + { + "epoch": 0.8077163120567376, + "loss": 1.3868252038955688, + "loss_ce": 0.007918964140117168, + "loss_iou": 0.5390625, + "loss_num": 0.060302734375, + "loss_xval": 1.375, + "num_input_tokens_seen": 476127148, + "step": 7118 + }, + { + "epoch": 0.8078297872340425, + "grad_norm": 22.66191291809082, + "learning_rate": 5e-05, + "loss": 1.1399, + "num_input_tokens_seen": 476193312, + "step": 7119 + }, + { + "epoch": 0.8078297872340425, + "loss": 1.121683120727539, + "loss_ce": 0.0064487336203455925, + "loss_iou": 0.4609375, + "loss_num": 0.038330078125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 476193312, + "step": 7119 + }, + { + "epoch": 0.8079432624113475, + "grad_norm": 30.77849578857422, + "learning_rate": 5e-05, + "loss": 1.1497, + "num_input_tokens_seen": 476260812, + "step": 7120 + }, + { + "epoch": 0.8079432624113475, + "loss": 1.0887110233306885, + "loss_ce": 0.012050800025463104, + "loss_iou": 0.421875, + "loss_num": 0.046630859375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 476260812, + "step": 7120 + }, + { + "epoch": 0.8080567375886525, + "grad_norm": 27.601179122924805, + "learning_rate": 5e-05, + "loss": 1.1455, + "num_input_tokens_seen": 476327840, + "step": 7121 + }, + { + "epoch": 0.8080567375886525, + "loss": 1.2064478397369385, + "loss_ce": 0.007229173555970192, + "loss_iou": 0.48046875, + "loss_num": 0.048095703125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 476327840, + "step": 7121 + }, + { + "epoch": 0.8081702127659575, + "grad_norm": 13.991907119750977, + "learning_rate": 5e-05, + "loss": 1.3115, + "num_input_tokens_seen": 476394172, + "step": 7122 + }, + { + "epoch": 0.8081702127659575, + "loss": 1.1259803771972656, + "loss_ce": 0.003910008352249861, + "loss_iou": 0.474609375, + "loss_num": 0.03466796875, + "loss_xval": 1.125, + "num_input_tokens_seen": 476394172, + "step": 7122 + }, + { + "epoch": 0.8082836879432624, + "grad_norm": 29.063846588134766, + "learning_rate": 5e-05, + "loss": 1.2587, + "num_input_tokens_seen": 476461128, + "step": 7123 + }, + { + "epoch": 0.8082836879432624, + "loss": 1.4124157428741455, + "loss_ce": 0.008118907921016216, + "loss_iou": 0.5390625, + "loss_num": 0.0654296875, + "loss_xval": 1.40625, + "num_input_tokens_seen": 476461128, + "step": 7123 + }, + { + "epoch": 0.8083971631205674, + "grad_norm": 22.802196502685547, + "learning_rate": 5e-05, + "loss": 1.2591, + "num_input_tokens_seen": 476528336, + "step": 7124 + }, + { + "epoch": 0.8083971631205674, + "loss": 1.3390109539031982, + "loss_ce": 0.009909447282552719, + "loss_iou": 0.50390625, + "loss_num": 0.06396484375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 476528336, + "step": 7124 + }, + { + "epoch": 0.8085106382978723, + "grad_norm": 34.070533752441406, + "learning_rate": 5e-05, + "loss": 1.0052, + "num_input_tokens_seen": 476595076, + "step": 7125 + }, + { + "epoch": 0.8085106382978723, + "loss": 1.0090320110321045, + "loss_ce": 0.005613995250314474, + "loss_iou": 0.40625, + "loss_num": 0.037841796875, + "loss_xval": 1.0, + "num_input_tokens_seen": 476595076, + "step": 7125 + }, + { + "epoch": 0.8086241134751773, + "grad_norm": 29.53536033630371, + "learning_rate": 5e-05, + "loss": 1.2886, + "num_input_tokens_seen": 476662064, + "step": 7126 + }, + { + "epoch": 0.8086241134751773, + "loss": 1.1973018646240234, + "loss_ce": 0.006872233934700489, + "loss_iou": 0.482421875, + "loss_num": 0.045654296875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 476662064, + "step": 7126 + }, + { + "epoch": 0.8087375886524822, + "grad_norm": 32.64741897583008, + "learning_rate": 5e-05, + "loss": 1.064, + "num_input_tokens_seen": 476729168, + "step": 7127 + }, + { + "epoch": 0.8087375886524822, + "loss": 1.0874693393707275, + "loss_ce": 0.008856048807501793, + "loss_iou": 0.427734375, + "loss_num": 0.044677734375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 476729168, + "step": 7127 + }, + { + "epoch": 0.8088510638297872, + "grad_norm": 39.049312591552734, + "learning_rate": 5e-05, + "loss": 1.3535, + "num_input_tokens_seen": 476796688, + "step": 7128 + }, + { + "epoch": 0.8088510638297872, + "loss": 1.4955652952194214, + "loss_ce": 0.00728406198322773, + "loss_iou": 0.59765625, + "loss_num": 0.059326171875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 476796688, + "step": 7128 + }, + { + "epoch": 0.8089645390070922, + "grad_norm": 48.16844940185547, + "learning_rate": 5e-05, + "loss": 1.171, + "num_input_tokens_seen": 476863076, + "step": 7129 + }, + { + "epoch": 0.8089645390070922, + "loss": 1.3249796628952026, + "loss_ce": 0.006620308384299278, + "loss_iou": 0.5390625, + "loss_num": 0.047119140625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 476863076, + "step": 7129 + }, + { + "epoch": 0.8090780141843972, + "grad_norm": 33.51209259033203, + "learning_rate": 5e-05, + "loss": 1.2694, + "num_input_tokens_seen": 476930424, + "step": 7130 + }, + { + "epoch": 0.8090780141843972, + "loss": 1.243099570274353, + "loss_ce": 0.0038418001495301723, + "loss_iou": 0.53125, + "loss_num": 0.03466796875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 476930424, + "step": 7130 + }, + { + "epoch": 0.8091914893617022, + "grad_norm": 8.796904563903809, + "learning_rate": 5e-05, + "loss": 1.0184, + "num_input_tokens_seen": 476995800, + "step": 7131 + }, + { + "epoch": 0.8091914893617022, + "loss": 0.8683403730392456, + "loss_ce": 0.004448777996003628, + "loss_iou": 0.3671875, + "loss_num": 0.0263671875, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 476995800, + "step": 7131 + }, + { + "epoch": 0.8093049645390071, + "grad_norm": 22.39076042175293, + "learning_rate": 5e-05, + "loss": 1.0579, + "num_input_tokens_seen": 477063220, + "step": 7132 + }, + { + "epoch": 0.8093049645390071, + "loss": 1.0204379558563232, + "loss_ce": 0.008719202131032944, + "loss_iou": 0.416015625, + "loss_num": 0.0361328125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 477063220, + "step": 7132 + }, + { + "epoch": 0.8094184397163121, + "grad_norm": 18.1001033782959, + "learning_rate": 5e-05, + "loss": 1.2262, + "num_input_tokens_seen": 477130124, + "step": 7133 + }, + { + "epoch": 0.8094184397163121, + "loss": 1.1871154308319092, + "loss_ce": 0.005719062872231007, + "loss_iou": 0.486328125, + "loss_num": 0.0419921875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 477130124, + "step": 7133 + }, + { + "epoch": 0.809531914893617, + "grad_norm": 23.939685821533203, + "learning_rate": 5e-05, + "loss": 1.4104, + "num_input_tokens_seen": 477197340, + "step": 7134 + }, + { + "epoch": 0.809531914893617, + "loss": 1.486551284790039, + "loss_ce": 0.005594279617071152, + "loss_iou": 0.54296875, + "loss_num": 0.078125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 477197340, + "step": 7134 + }, + { + "epoch": 0.809645390070922, + "grad_norm": 22.813138961791992, + "learning_rate": 5e-05, + "loss": 1.2187, + "num_input_tokens_seen": 477264276, + "step": 7135 + }, + { + "epoch": 0.809645390070922, + "loss": 1.2445317506790161, + "loss_ce": 0.007471182849258184, + "loss_iou": 0.4921875, + "loss_num": 0.050537109375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 477264276, + "step": 7135 + }, + { + "epoch": 0.8097588652482269, + "grad_norm": 28.505510330200195, + "learning_rate": 5e-05, + "loss": 1.3149, + "num_input_tokens_seen": 477330968, + "step": 7136 + }, + { + "epoch": 0.8097588652482269, + "loss": 1.5433458089828491, + "loss_ce": 0.009166090749204159, + "loss_iou": 0.58203125, + "loss_num": 0.07373046875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 477330968, + "step": 7136 + }, + { + "epoch": 0.8098723404255319, + "grad_norm": 48.0638427734375, + "learning_rate": 5e-05, + "loss": 1.2176, + "num_input_tokens_seen": 477398184, + "step": 7137 + }, + { + "epoch": 0.8098723404255319, + "loss": 1.0945167541503906, + "loss_ce": 0.009555831551551819, + "loss_iou": 0.435546875, + "loss_num": 0.043212890625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 477398184, + "step": 7137 + }, + { + "epoch": 0.8099858156028369, + "grad_norm": 17.550373077392578, + "learning_rate": 5e-05, + "loss": 1.1877, + "num_input_tokens_seen": 477464848, + "step": 7138 + }, + { + "epoch": 0.8099858156028369, + "loss": 1.1297307014465332, + "loss_ce": 0.010101912543177605, + "loss_iou": 0.4375, + "loss_num": 0.048828125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 477464848, + "step": 7138 + }, + { + "epoch": 0.8100992907801419, + "grad_norm": 33.73147201538086, + "learning_rate": 5e-05, + "loss": 1.2642, + "num_input_tokens_seen": 477532392, + "step": 7139 + }, + { + "epoch": 0.8100992907801419, + "loss": 1.1371445655822754, + "loss_ce": 0.007017564028501511, + "loss_iou": 0.435546875, + "loss_num": 0.051513671875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 477532392, + "step": 7139 + }, + { + "epoch": 0.8102127659574468, + "grad_norm": 31.7109432220459, + "learning_rate": 5e-05, + "loss": 1.3383, + "num_input_tokens_seen": 477599736, + "step": 7140 + }, + { + "epoch": 0.8102127659574468, + "loss": 1.3285102844238281, + "loss_ce": 0.007709413301199675, + "loss_iou": 0.5703125, + "loss_num": 0.036865234375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 477599736, + "step": 7140 + }, + { + "epoch": 0.8103262411347518, + "grad_norm": 47.825042724609375, + "learning_rate": 5e-05, + "loss": 1.1651, + "num_input_tokens_seen": 477666504, + "step": 7141 + }, + { + "epoch": 0.8103262411347518, + "loss": 1.298919916152954, + "loss_ce": 0.004486393183469772, + "loss_iou": 0.53125, + "loss_num": 0.0458984375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 477666504, + "step": 7141 + }, + { + "epoch": 0.8104397163120567, + "grad_norm": 37.34547805786133, + "learning_rate": 5e-05, + "loss": 1.2373, + "num_input_tokens_seen": 477734184, + "step": 7142 + }, + { + "epoch": 0.8104397163120567, + "loss": 1.2403464317321777, + "loss_ce": 0.00401834212243557, + "loss_iou": 0.50390625, + "loss_num": 0.04638671875, + "loss_xval": 1.234375, + "num_input_tokens_seen": 477734184, + "step": 7142 + }, + { + "epoch": 0.8105531914893617, + "grad_norm": 36.760658264160156, + "learning_rate": 5e-05, + "loss": 1.3424, + "num_input_tokens_seen": 477801020, + "step": 7143 + }, + { + "epoch": 0.8105531914893617, + "loss": 1.3138175010681152, + "loss_ce": 0.006200306583195925, + "loss_iou": 0.55078125, + "loss_num": 0.040283203125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 477801020, + "step": 7143 + }, + { + "epoch": 0.8106666666666666, + "grad_norm": 27.205989837646484, + "learning_rate": 5e-05, + "loss": 1.1249, + "num_input_tokens_seen": 477868104, + "step": 7144 + }, + { + "epoch": 0.8106666666666666, + "loss": 1.3253822326660156, + "loss_ce": 0.0055579813197255135, + "loss_iou": 0.515625, + "loss_num": 0.057373046875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 477868104, + "step": 7144 + }, + { + "epoch": 0.8107801418439716, + "grad_norm": 25.89882469177246, + "learning_rate": 5e-05, + "loss": 1.2182, + "num_input_tokens_seen": 477934868, + "step": 7145 + }, + { + "epoch": 0.8107801418439716, + "loss": 1.217466115951538, + "loss_ce": 0.006406599655747414, + "loss_iou": 0.466796875, + "loss_num": 0.055419921875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 477934868, + "step": 7145 + }, + { + "epoch": 0.8108936170212766, + "grad_norm": 27.89187240600586, + "learning_rate": 5e-05, + "loss": 1.1422, + "num_input_tokens_seen": 478001624, + "step": 7146 + }, + { + "epoch": 0.8108936170212766, + "loss": 1.2450191974639893, + "loss_ce": 0.004296484403312206, + "loss_iou": 0.466796875, + "loss_num": 0.061279296875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 478001624, + "step": 7146 + }, + { + "epoch": 0.8110070921985816, + "grad_norm": 29.038902282714844, + "learning_rate": 5e-05, + "loss": 1.2451, + "num_input_tokens_seen": 478069232, + "step": 7147 + }, + { + "epoch": 0.8110070921985816, + "loss": 1.1731222867965698, + "loss_ce": 0.007106663193553686, + "loss_iou": 0.484375, + "loss_num": 0.0390625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 478069232, + "step": 7147 + }, + { + "epoch": 0.8111205673758866, + "grad_norm": 34.53192138671875, + "learning_rate": 5e-05, + "loss": 1.1879, + "num_input_tokens_seen": 478136520, + "step": 7148 + }, + { + "epoch": 0.8111205673758866, + "loss": 1.0258917808532715, + "loss_ce": 0.006360561586916447, + "loss_iou": 0.43359375, + "loss_num": 0.0299072265625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 478136520, + "step": 7148 + }, + { + "epoch": 0.8112340425531915, + "grad_norm": 37.974308013916016, + "learning_rate": 5e-05, + "loss": 1.1669, + "num_input_tokens_seen": 478203116, + "step": 7149 + }, + { + "epoch": 0.8112340425531915, + "loss": 1.2978266477584839, + "loss_ce": 0.009252424351871014, + "loss_iou": 0.5234375, + "loss_num": 0.049072265625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 478203116, + "step": 7149 + }, + { + "epoch": 0.8113475177304964, + "grad_norm": 16.648590087890625, + "learning_rate": 5e-05, + "loss": 1.1563, + "num_input_tokens_seen": 478270412, + "step": 7150 + }, + { + "epoch": 0.8113475177304964, + "loss": 1.2158887386322021, + "loss_ce": 0.008369099348783493, + "loss_iou": 0.466796875, + "loss_num": 0.05419921875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 478270412, + "step": 7150 + }, + { + "epoch": 0.8114609929078014, + "grad_norm": 21.9449462890625, + "learning_rate": 5e-05, + "loss": 1.1439, + "num_input_tokens_seen": 478337284, + "step": 7151 + }, + { + "epoch": 0.8114609929078014, + "loss": 1.0562186241149902, + "loss_ce": 0.010320188477635384, + "loss_iou": 0.40234375, + "loss_num": 0.04833984375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 478337284, + "step": 7151 + }, + { + "epoch": 0.8115744680851064, + "grad_norm": 20.334434509277344, + "learning_rate": 5e-05, + "loss": 1.0542, + "num_input_tokens_seen": 478404128, + "step": 7152 + }, + { + "epoch": 0.8115744680851064, + "loss": 1.087838888168335, + "loss_ce": 0.009225568734109402, + "loss_iou": 0.4296875, + "loss_num": 0.04443359375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 478404128, + "step": 7152 + }, + { + "epoch": 0.8116879432624113, + "grad_norm": 28.38995933532715, + "learning_rate": 5e-05, + "loss": 1.3178, + "num_input_tokens_seen": 478471288, + "step": 7153 + }, + { + "epoch": 0.8116879432624113, + "loss": 1.0469210147857666, + "loss_ce": 0.006881955079734325, + "loss_iou": 0.423828125, + "loss_num": 0.03857421875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 478471288, + "step": 7153 + }, + { + "epoch": 0.8118014184397163, + "grad_norm": 37.47245788574219, + "learning_rate": 5e-05, + "loss": 1.1054, + "num_input_tokens_seen": 478538016, + "step": 7154 + }, + { + "epoch": 0.8118014184397163, + "loss": 0.9845243096351624, + "loss_ce": 0.006252811290323734, + "loss_iou": 0.412109375, + "loss_num": 0.0311279296875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 478538016, + "step": 7154 + }, + { + "epoch": 0.8119148936170213, + "grad_norm": 40.171627044677734, + "learning_rate": 5e-05, + "loss": 1.2011, + "num_input_tokens_seen": 478605384, + "step": 7155 + }, + { + "epoch": 0.8119148936170213, + "loss": 1.2758487462997437, + "loss_ce": 0.0033877575770020485, + "loss_iou": 0.5, + "loss_num": 0.0537109375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 478605384, + "step": 7155 + }, + { + "epoch": 0.8120283687943263, + "grad_norm": 38.95649719238281, + "learning_rate": 5e-05, + "loss": 1.1295, + "num_input_tokens_seen": 478672392, + "step": 7156 + }, + { + "epoch": 0.8120283687943263, + "loss": 1.0465326309204102, + "loss_ce": 0.00795847736299038, + "loss_iou": 0.458984375, + "loss_num": 0.023681640625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 478672392, + "step": 7156 + }, + { + "epoch": 0.8121418439716312, + "grad_norm": 29.5903377532959, + "learning_rate": 5e-05, + "loss": 1.102, + "num_input_tokens_seen": 478739968, + "step": 7157 + }, + { + "epoch": 0.8121418439716312, + "loss": 0.9441360831260681, + "loss_ce": 0.003950535319745541, + "loss_iou": 0.392578125, + "loss_num": 0.03076171875, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 478739968, + "step": 7157 + }, + { + "epoch": 0.8122553191489361, + "grad_norm": 24.189748764038086, + "learning_rate": 5e-05, + "loss": 0.9787, + "num_input_tokens_seen": 478806376, + "step": 7158 + }, + { + "epoch": 0.8122553191489361, + "loss": 0.9934249520301819, + "loss_ce": 0.005021603778004646, + "loss_iou": 0.41796875, + "loss_num": 0.03076171875, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 478806376, + "step": 7158 + }, + { + "epoch": 0.8123687943262411, + "grad_norm": 39.30725860595703, + "learning_rate": 5e-05, + "loss": 1.1819, + "num_input_tokens_seen": 478873392, + "step": 7159 + }, + { + "epoch": 0.8123687943262411, + "loss": 1.1465539932250977, + "loss_ce": 0.007393809501081705, + "loss_iou": 0.49609375, + "loss_num": 0.029052734375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 478873392, + "step": 7159 + }, + { + "epoch": 0.8124822695035461, + "grad_norm": 22.774431228637695, + "learning_rate": 5e-05, + "loss": 1.1719, + "num_input_tokens_seen": 478940316, + "step": 7160 + }, + { + "epoch": 0.8124822695035461, + "loss": 1.147236704826355, + "loss_ce": 0.004658520221710205, + "loss_iou": 0.49609375, + "loss_num": 0.02978515625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 478940316, + "step": 7160 + }, + { + "epoch": 0.812595744680851, + "grad_norm": 16.84014129638672, + "learning_rate": 5e-05, + "loss": 0.9726, + "num_input_tokens_seen": 479006852, + "step": 7161 + }, + { + "epoch": 0.812595744680851, + "loss": 0.8777778148651123, + "loss_ce": 0.003754346165806055, + "loss_iou": 0.35546875, + "loss_num": 0.032958984375, + "loss_xval": 0.875, + "num_input_tokens_seen": 479006852, + "step": 7161 + }, + { + "epoch": 0.812709219858156, + "grad_norm": 33.692447662353516, + "learning_rate": 5e-05, + "loss": 1.189, + "num_input_tokens_seen": 479073856, + "step": 7162 + }, + { + "epoch": 0.812709219858156, + "loss": 1.1008377075195312, + "loss_ce": 0.008064325898885727, + "loss_iou": 0.44921875, + "loss_num": 0.038818359375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 479073856, + "step": 7162 + }, + { + "epoch": 0.812822695035461, + "grad_norm": 75.12581634521484, + "learning_rate": 5e-05, + "loss": 1.2683, + "num_input_tokens_seen": 479141032, + "step": 7163 + }, + { + "epoch": 0.812822695035461, + "loss": 1.333450436592102, + "loss_ce": 0.006301999092102051, + "loss_iou": 0.5390625, + "loss_num": 0.050048828125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 479141032, + "step": 7163 + }, + { + "epoch": 0.812936170212766, + "grad_norm": 39.84430694580078, + "learning_rate": 5e-05, + "loss": 1.4484, + "num_input_tokens_seen": 479207816, + "step": 7164 + }, + { + "epoch": 0.812936170212766, + "loss": 1.4520089626312256, + "loss_ce": 0.006696529686450958, + "loss_iou": 0.54296875, + "loss_num": 0.072265625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 479207816, + "step": 7164 + }, + { + "epoch": 0.813049645390071, + "grad_norm": 33.5829963684082, + "learning_rate": 5e-05, + "loss": 1.4577, + "num_input_tokens_seen": 479273596, + "step": 7165 + }, + { + "epoch": 0.813049645390071, + "loss": 1.545499324798584, + "loss_ce": 0.013272712007164955, + "loss_iou": 0.58203125, + "loss_num": 0.07421875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 479273596, + "step": 7165 + }, + { + "epoch": 0.8131631205673759, + "grad_norm": 29.075679779052734, + "learning_rate": 5e-05, + "loss": 1.3522, + "num_input_tokens_seen": 479340856, + "step": 7166 + }, + { + "epoch": 0.8131631205673759, + "loss": 1.3027149438858032, + "loss_ce": 0.009257897734642029, + "loss_iou": 0.53515625, + "loss_num": 0.044677734375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 479340856, + "step": 7166 + }, + { + "epoch": 0.8132765957446808, + "grad_norm": 23.175537109375, + "learning_rate": 5e-05, + "loss": 1.0576, + "num_input_tokens_seen": 479408544, + "step": 7167 + }, + { + "epoch": 0.8132765957446808, + "loss": 1.0298124551773071, + "loss_ce": 0.008816384710371494, + "loss_iou": 0.427734375, + "loss_num": 0.032958984375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 479408544, + "step": 7167 + }, + { + "epoch": 0.8133900709219858, + "grad_norm": 18.475849151611328, + "learning_rate": 5e-05, + "loss": 1.0351, + "num_input_tokens_seen": 479475948, + "step": 7168 + }, + { + "epoch": 0.8133900709219858, + "loss": 0.9979068040847778, + "loss_ce": 0.005719256121665239, + "loss_iou": 0.419921875, + "loss_num": 0.030517578125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 479475948, + "step": 7168 + }, + { + "epoch": 0.8135035460992908, + "grad_norm": 37.03878402709961, + "learning_rate": 5e-05, + "loss": 1.2103, + "num_input_tokens_seen": 479543416, + "step": 7169 + }, + { + "epoch": 0.8135035460992908, + "loss": 1.300318717956543, + "loss_ce": 0.007838334888219833, + "loss_iou": 0.49609375, + "loss_num": 0.06005859375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 479543416, + "step": 7169 + }, + { + "epoch": 0.8136170212765957, + "grad_norm": 27.319795608520508, + "learning_rate": 5e-05, + "loss": 1.1945, + "num_input_tokens_seen": 479610276, + "step": 7170 + }, + { + "epoch": 0.8136170212765957, + "loss": 1.1401365995407104, + "loss_ce": 0.010253766551613808, + "loss_iou": 0.435546875, + "loss_num": 0.0517578125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 479610276, + "step": 7170 + }, + { + "epoch": 0.8137304964539007, + "grad_norm": 35.422698974609375, + "learning_rate": 5e-05, + "loss": 1.1799, + "num_input_tokens_seen": 479677356, + "step": 7171 + }, + { + "epoch": 0.8137304964539007, + "loss": 1.1925086975097656, + "loss_ce": 0.007938438095152378, + "loss_iou": 0.4921875, + "loss_num": 0.039794921875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 479677356, + "step": 7171 + }, + { + "epoch": 0.8138439716312057, + "grad_norm": 26.122421264648438, + "learning_rate": 5e-05, + "loss": 1.2453, + "num_input_tokens_seen": 479744600, + "step": 7172 + }, + { + "epoch": 0.8138439716312057, + "loss": 1.2824193239212036, + "loss_ce": 0.00947016291320324, + "loss_iou": 0.53515625, + "loss_num": 0.040771484375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 479744600, + "step": 7172 + }, + { + "epoch": 0.8139574468085107, + "grad_norm": 18.61746597290039, + "learning_rate": 5e-05, + "loss": 1.1569, + "num_input_tokens_seen": 479811240, + "step": 7173 + }, + { + "epoch": 0.8139574468085107, + "loss": 1.0431127548217773, + "loss_ce": 0.007224042899906635, + "loss_iou": 0.44921875, + "loss_num": 0.0277099609375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 479811240, + "step": 7173 + }, + { + "epoch": 0.8140709219858157, + "grad_norm": 47.53818130493164, + "learning_rate": 5e-05, + "loss": 1.1708, + "num_input_tokens_seen": 479879756, + "step": 7174 + }, + { + "epoch": 0.8140709219858157, + "loss": 1.0271409749984741, + "loss_ce": 0.007853854447603226, + "loss_iou": 0.439453125, + "loss_num": 0.0281982421875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 479879756, + "step": 7174 + }, + { + "epoch": 0.8141843971631205, + "grad_norm": 43.58334732055664, + "learning_rate": 5e-05, + "loss": 1.3621, + "num_input_tokens_seen": 479945976, + "step": 7175 + }, + { + "epoch": 0.8141843971631205, + "loss": 1.19661545753479, + "loss_ce": 0.004720903933048248, + "loss_iou": 0.515625, + "loss_num": 0.03125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 479945976, + "step": 7175 + }, + { + "epoch": 0.8142978723404255, + "grad_norm": 25.883132934570312, + "learning_rate": 5e-05, + "loss": 1.3554, + "num_input_tokens_seen": 480012760, + "step": 7176 + }, + { + "epoch": 0.8142978723404255, + "loss": 1.3447411060333252, + "loss_ce": 0.003920648247003555, + "loss_iou": 0.578125, + "loss_num": 0.037353515625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 480012760, + "step": 7176 + }, + { + "epoch": 0.8144113475177305, + "grad_norm": 16.987409591674805, + "learning_rate": 5e-05, + "loss": 1.1832, + "num_input_tokens_seen": 480080308, + "step": 7177 + }, + { + "epoch": 0.8144113475177305, + "loss": 1.138774037361145, + "loss_ce": 0.00547323003411293, + "loss_iou": 0.439453125, + "loss_num": 0.05126953125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 480080308, + "step": 7177 + }, + { + "epoch": 0.8145248226950355, + "grad_norm": 30.551931381225586, + "learning_rate": 5e-05, + "loss": 1.1073, + "num_input_tokens_seen": 480146212, + "step": 7178 + }, + { + "epoch": 0.8145248226950355, + "loss": 1.044860601425171, + "loss_ce": 0.00970432162284851, + "loss_iou": 0.3984375, + "loss_num": 0.04736328125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 480146212, + "step": 7178 + }, + { + "epoch": 0.8146382978723404, + "grad_norm": 37.271644592285156, + "learning_rate": 5e-05, + "loss": 1.338, + "num_input_tokens_seen": 480214044, + "step": 7179 + }, + { + "epoch": 0.8146382978723404, + "loss": 1.3381640911102295, + "loss_ce": 0.007109415251761675, + "loss_iou": 0.5546875, + "loss_num": 0.04443359375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 480214044, + "step": 7179 + }, + { + "epoch": 0.8147517730496454, + "grad_norm": 28.854904174804688, + "learning_rate": 5e-05, + "loss": 1.1373, + "num_input_tokens_seen": 480280396, + "step": 7180 + }, + { + "epoch": 0.8147517730496454, + "loss": 0.9849852323532104, + "loss_ce": 0.009887609630823135, + "loss_iou": 0.34375, + "loss_num": 0.0576171875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 480280396, + "step": 7180 + }, + { + "epoch": 0.8148652482269504, + "grad_norm": 37.780216217041016, + "learning_rate": 5e-05, + "loss": 1.3076, + "num_input_tokens_seen": 480348116, + "step": 7181 + }, + { + "epoch": 0.8148652482269504, + "loss": 1.3180577754974365, + "loss_ce": 0.007510927971452475, + "loss_iou": 0.5234375, + "loss_num": 0.05322265625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 480348116, + "step": 7181 + }, + { + "epoch": 0.8149787234042554, + "grad_norm": 41.64287185668945, + "learning_rate": 5e-05, + "loss": 1.2163, + "num_input_tokens_seen": 480415644, + "step": 7182 + }, + { + "epoch": 0.8149787234042554, + "loss": 1.3882009983062744, + "loss_ce": 0.00490023847669363, + "loss_iou": 0.54296875, + "loss_num": 0.0595703125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 480415644, + "step": 7182 + }, + { + "epoch": 0.8150921985815602, + "grad_norm": 33.22254180908203, + "learning_rate": 5e-05, + "loss": 1.1235, + "num_input_tokens_seen": 480481652, + "step": 7183 + }, + { + "epoch": 0.8150921985815602, + "loss": 1.1619408130645752, + "loss_ce": 0.007155667524784803, + "loss_iou": 0.455078125, + "loss_num": 0.048583984375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 480481652, + "step": 7183 + }, + { + "epoch": 0.8152056737588652, + "grad_norm": 12.681459426879883, + "learning_rate": 5e-05, + "loss": 0.95, + "num_input_tokens_seen": 480549476, + "step": 7184 + }, + { + "epoch": 0.8152056737588652, + "loss": 0.8793292045593262, + "loss_ce": 0.00481748953461647, + "loss_iou": 0.38671875, + "loss_num": 0.0201416015625, + "loss_xval": 0.875, + "num_input_tokens_seen": 480549476, + "step": 7184 + }, + { + "epoch": 0.8153191489361702, + "grad_norm": 21.284072875976562, + "learning_rate": 5e-05, + "loss": 1.1294, + "num_input_tokens_seen": 480617164, + "step": 7185 + }, + { + "epoch": 0.8153191489361702, + "loss": 1.1069003343582153, + "loss_ce": 0.005337839014828205, + "loss_iou": 0.484375, + "loss_num": 0.0262451171875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 480617164, + "step": 7185 + }, + { + "epoch": 0.8154326241134752, + "grad_norm": 29.498210906982422, + "learning_rate": 5e-05, + "loss": 1.3171, + "num_input_tokens_seen": 480685504, + "step": 7186 + }, + { + "epoch": 0.8154326241134752, + "loss": 1.316887617111206, + "loss_ce": 0.005364245735108852, + "loss_iou": 0.5234375, + "loss_num": 0.052490234375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 480685504, + "step": 7186 + }, + { + "epoch": 0.8155460992907801, + "grad_norm": 47.65098571777344, + "learning_rate": 5e-05, + "loss": 1.2399, + "num_input_tokens_seen": 480752536, + "step": 7187 + }, + { + "epoch": 0.8155460992907801, + "loss": 1.2001866102218628, + "loss_ce": 0.0038974625058472157, + "loss_iou": 0.515625, + "loss_num": 0.032958984375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 480752536, + "step": 7187 + }, + { + "epoch": 0.8156595744680851, + "grad_norm": 35.6500358581543, + "learning_rate": 5e-05, + "loss": 1.4157, + "num_input_tokens_seen": 480818828, + "step": 7188 + }, + { + "epoch": 0.8156595744680851, + "loss": 1.5660781860351562, + "loss_ce": 0.00748444115743041, + "loss_iou": 0.64453125, + "loss_num": 0.05419921875, + "loss_xval": 1.5625, + "num_input_tokens_seen": 480818828, + "step": 7188 + }, + { + "epoch": 0.8157730496453901, + "grad_norm": 23.816282272338867, + "learning_rate": 5e-05, + "loss": 1.0241, + "num_input_tokens_seen": 480886376, + "step": 7189 + }, + { + "epoch": 0.8157730496453901, + "loss": 1.0536887645721436, + "loss_ce": 0.005837208591401577, + "loss_iou": 0.41015625, + "loss_num": 0.04541015625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 480886376, + "step": 7189 + }, + { + "epoch": 0.8158865248226951, + "grad_norm": 24.066146850585938, + "learning_rate": 5e-05, + "loss": 1.1879, + "num_input_tokens_seen": 480954008, + "step": 7190 + }, + { + "epoch": 0.8158865248226951, + "loss": 1.0891423225402832, + "loss_ce": 0.005646159406751394, + "loss_iou": 0.439453125, + "loss_num": 0.04052734375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 480954008, + "step": 7190 + }, + { + "epoch": 0.816, + "grad_norm": 38.45896911621094, + "learning_rate": 5e-05, + "loss": 1.3124, + "num_input_tokens_seen": 481020688, + "step": 7191 + }, + { + "epoch": 0.816, + "loss": 1.3119642734527588, + "loss_ce": 0.005811940412968397, + "loss_iou": 0.53515625, + "loss_num": 0.0478515625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 481020688, + "step": 7191 + }, + { + "epoch": 0.8161134751773049, + "grad_norm": 33.163719177246094, + "learning_rate": 5e-05, + "loss": 1.1621, + "num_input_tokens_seen": 481087516, + "step": 7192 + }, + { + "epoch": 0.8161134751773049, + "loss": 1.1866912841796875, + "loss_ce": 0.006027215160429478, + "loss_iou": 0.50390625, + "loss_num": 0.0341796875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 481087516, + "step": 7192 + }, + { + "epoch": 0.8162269503546099, + "grad_norm": 29.880598068237305, + "learning_rate": 5e-05, + "loss": 1.2057, + "num_input_tokens_seen": 481154304, + "step": 7193 + }, + { + "epoch": 0.8162269503546099, + "loss": 1.2023828029632568, + "loss_ce": 0.005117187742143869, + "loss_iou": 0.4765625, + "loss_num": 0.048828125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 481154304, + "step": 7193 + }, + { + "epoch": 0.8163404255319149, + "grad_norm": 31.713960647583008, + "learning_rate": 5e-05, + "loss": 1.4858, + "num_input_tokens_seen": 481220576, + "step": 7194 + }, + { + "epoch": 0.8163404255319149, + "loss": 1.4718884229660034, + "loss_ce": 0.007044733501970768, + "loss_iou": 0.56640625, + "loss_num": 0.06689453125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 481220576, + "step": 7194 + }, + { + "epoch": 0.8164539007092199, + "grad_norm": 49.542842864990234, + "learning_rate": 5e-05, + "loss": 1.2754, + "num_input_tokens_seen": 481288636, + "step": 7195 + }, + { + "epoch": 0.8164539007092199, + "loss": 0.9985730051994324, + "loss_ce": 0.004920643754303455, + "loss_iou": 0.40234375, + "loss_num": 0.0380859375, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 481288636, + "step": 7195 + }, + { + "epoch": 0.8165673758865248, + "grad_norm": 23.531143188476562, + "learning_rate": 5e-05, + "loss": 1.1851, + "num_input_tokens_seen": 481355664, + "step": 7196 + }, + { + "epoch": 0.8165673758865248, + "loss": 1.1998218297958374, + "loss_ce": 0.005485930480062962, + "loss_iou": 0.4921875, + "loss_num": 0.042236328125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 481355664, + "step": 7196 + }, + { + "epoch": 0.8166808510638298, + "grad_norm": 30.53082847595215, + "learning_rate": 5e-05, + "loss": 1.088, + "num_input_tokens_seen": 481423052, + "step": 7197 + }, + { + "epoch": 0.8166808510638298, + "loss": 1.106181025505066, + "loss_ce": 0.006571603007614613, + "loss_iou": 0.49609375, + "loss_num": 0.02099609375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 481423052, + "step": 7197 + }, + { + "epoch": 0.8167943262411348, + "grad_norm": 21.918214797973633, + "learning_rate": 5e-05, + "loss": 1.3224, + "num_input_tokens_seen": 481489140, + "step": 7198 + }, + { + "epoch": 0.8167943262411348, + "loss": 1.149350643157959, + "loss_ce": 0.007260735146701336, + "loss_iou": 0.455078125, + "loss_num": 0.04638671875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 481489140, + "step": 7198 + }, + { + "epoch": 0.8169078014184398, + "grad_norm": 24.498594284057617, + "learning_rate": 5e-05, + "loss": 1.1556, + "num_input_tokens_seen": 481556848, + "step": 7199 + }, + { + "epoch": 0.8169078014184398, + "loss": 1.1079500913619995, + "loss_ce": 0.012246934697031975, + "loss_iou": 0.470703125, + "loss_num": 0.0311279296875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 481556848, + "step": 7199 + }, + { + "epoch": 0.8170212765957446, + "grad_norm": 36.61736297607422, + "learning_rate": 5e-05, + "loss": 1.3382, + "num_input_tokens_seen": 481622560, + "step": 7200 + }, + { + "epoch": 0.8170212765957446, + "loss": 1.5216686725616455, + "loss_ce": 0.007508593611419201, + "loss_iou": 0.59375, + "loss_num": 0.064453125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 481622560, + "step": 7200 + }, + { + "epoch": 0.8171347517730496, + "grad_norm": 40.7122917175293, + "learning_rate": 5e-05, + "loss": 1.2045, + "num_input_tokens_seen": 481688972, + "step": 7201 + }, + { + "epoch": 0.8171347517730496, + "loss": 1.2267258167266846, + "loss_ce": 0.0089523671194911, + "loss_iou": 0.4921875, + "loss_num": 0.046875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 481688972, + "step": 7201 + }, + { + "epoch": 0.8172482269503546, + "grad_norm": 37.212303161621094, + "learning_rate": 5e-05, + "loss": 1.135, + "num_input_tokens_seen": 481756752, + "step": 7202 + }, + { + "epoch": 0.8172482269503546, + "loss": 1.2333678007125854, + "loss_ce": 0.006072923541069031, + "loss_iou": 0.515625, + "loss_num": 0.038330078125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 481756752, + "step": 7202 + }, + { + "epoch": 0.8173617021276596, + "grad_norm": 29.083066940307617, + "learning_rate": 5e-05, + "loss": 1.2907, + "num_input_tokens_seen": 481823844, + "step": 7203 + }, + { + "epoch": 0.8173617021276596, + "loss": 1.2513474225997925, + "loss_ce": 0.005741931963711977, + "loss_iou": 0.54296875, + "loss_num": 0.032470703125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 481823844, + "step": 7203 + }, + { + "epoch": 0.8174751773049646, + "grad_norm": 31.329721450805664, + "learning_rate": 5e-05, + "loss": 1.1549, + "num_input_tokens_seen": 481890680, + "step": 7204 + }, + { + "epoch": 0.8174751773049646, + "loss": 1.0182043313980103, + "loss_ce": 0.005020737648010254, + "loss_iou": 0.4375, + "loss_num": 0.02734375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 481890680, + "step": 7204 + }, + { + "epoch": 0.8175886524822695, + "grad_norm": 31.636245727539062, + "learning_rate": 5e-05, + "loss": 1.2538, + "num_input_tokens_seen": 481958256, + "step": 7205 + }, + { + "epoch": 0.8175886524822695, + "loss": 1.0829532146453857, + "loss_ce": 0.005316467955708504, + "loss_iou": 0.46484375, + "loss_num": 0.029541015625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 481958256, + "step": 7205 + }, + { + "epoch": 0.8177021276595745, + "grad_norm": 29.051240921020508, + "learning_rate": 5e-05, + "loss": 1.2077, + "num_input_tokens_seen": 482026080, + "step": 7206 + }, + { + "epoch": 0.8177021276595745, + "loss": 1.0901682376861572, + "loss_ce": 0.0027658543549478054, + "loss_iou": 0.443359375, + "loss_num": 0.039794921875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 482026080, + "step": 7206 + }, + { + "epoch": 0.8178156028368795, + "grad_norm": 30.26823616027832, + "learning_rate": 5e-05, + "loss": 0.9998, + "num_input_tokens_seen": 482093140, + "step": 7207 + }, + { + "epoch": 0.8178156028368795, + "loss": 1.0471696853637695, + "loss_ce": 0.004689143970608711, + "loss_iou": 0.44140625, + "loss_num": 0.032470703125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 482093140, + "step": 7207 + }, + { + "epoch": 0.8179290780141844, + "grad_norm": 26.808826446533203, + "learning_rate": 5e-05, + "loss": 1.0464, + "num_input_tokens_seen": 482160464, + "step": 7208 + }, + { + "epoch": 0.8179290780141844, + "loss": 1.0751854181289673, + "loss_ce": 0.004872928373515606, + "loss_iou": 0.4453125, + "loss_num": 0.0361328125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 482160464, + "step": 7208 + }, + { + "epoch": 0.8180425531914893, + "grad_norm": 15.754632949829102, + "learning_rate": 5e-05, + "loss": 1.1733, + "num_input_tokens_seen": 482227340, + "step": 7209 + }, + { + "epoch": 0.8180425531914893, + "loss": 0.9774841070175171, + "loss_ce": 0.0053161815740168095, + "loss_iou": 0.412109375, + "loss_num": 0.0291748046875, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 482227340, + "step": 7209 + }, + { + "epoch": 0.8181560283687943, + "grad_norm": 12.339649200439453, + "learning_rate": 5e-05, + "loss": 1.2966, + "num_input_tokens_seen": 482294700, + "step": 7210 + }, + { + "epoch": 0.8181560283687943, + "loss": 1.2609522342681885, + "loss_ce": 0.015346825122833252, + "loss_iou": 0.5234375, + "loss_num": 0.0400390625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 482294700, + "step": 7210 + }, + { + "epoch": 0.8182695035460993, + "grad_norm": 12.074065208435059, + "learning_rate": 5e-05, + "loss": 1.0534, + "num_input_tokens_seen": 482361516, + "step": 7211 + }, + { + "epoch": 0.8182695035460993, + "loss": 0.9275490045547485, + "loss_ce": 0.01177750900387764, + "loss_iou": 0.37109375, + "loss_num": 0.034912109375, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 482361516, + "step": 7211 + }, + { + "epoch": 0.8183829787234043, + "grad_norm": 23.655580520629883, + "learning_rate": 5e-05, + "loss": 1.2263, + "num_input_tokens_seen": 482429336, + "step": 7212 + }, + { + "epoch": 0.8183829787234043, + "loss": 1.438103199005127, + "loss_ce": 0.010124657303094864, + "loss_iou": 0.58984375, + "loss_num": 0.050048828125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 482429336, + "step": 7212 + }, + { + "epoch": 0.8184964539007092, + "grad_norm": 15.949129104614258, + "learning_rate": 5e-05, + "loss": 1.2144, + "num_input_tokens_seen": 482497320, + "step": 7213 + }, + { + "epoch": 0.8184964539007092, + "loss": 1.2722899913787842, + "loss_ce": 0.007641596719622612, + "loss_iou": 0.515625, + "loss_num": 0.046142578125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 482497320, + "step": 7213 + }, + { + "epoch": 0.8186099290780142, + "grad_norm": 26.842687606811523, + "learning_rate": 5e-05, + "loss": 1.1512, + "num_input_tokens_seen": 482565008, + "step": 7214 + }, + { + "epoch": 0.8186099290780142, + "loss": 1.081506609916687, + "loss_ce": 0.007287833373993635, + "loss_iou": 0.4453125, + "loss_num": 0.036376953125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 482565008, + "step": 7214 + }, + { + "epoch": 0.8187234042553192, + "grad_norm": 34.44710159301758, + "learning_rate": 5e-05, + "loss": 1.2712, + "num_input_tokens_seen": 482632076, + "step": 7215 + }, + { + "epoch": 0.8187234042553192, + "loss": 1.099503517150879, + "loss_ce": 0.003312167478725314, + "loss_iou": 0.44921875, + "loss_num": 0.039794921875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 482632076, + "step": 7215 + }, + { + "epoch": 0.8188368794326241, + "grad_norm": 36.551597595214844, + "learning_rate": 5e-05, + "loss": 1.1032, + "num_input_tokens_seen": 482698144, + "step": 7216 + }, + { + "epoch": 0.8188368794326241, + "loss": 1.0607547760009766, + "loss_ce": 0.011194270104169846, + "loss_iou": 0.41796875, + "loss_num": 0.04296875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 482698144, + "step": 7216 + }, + { + "epoch": 0.818950354609929, + "grad_norm": 71.8642349243164, + "learning_rate": 5e-05, + "loss": 1.16, + "num_input_tokens_seen": 482764716, + "step": 7217 + }, + { + "epoch": 0.818950354609929, + "loss": 1.123695731163025, + "loss_ce": 0.006996516138315201, + "loss_iou": 0.46875, + "loss_num": 0.035888671875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 482764716, + "step": 7217 + }, + { + "epoch": 0.819063829787234, + "grad_norm": 30.261817932128906, + "learning_rate": 5e-05, + "loss": 1.1815, + "num_input_tokens_seen": 482831436, + "step": 7218 + }, + { + "epoch": 0.819063829787234, + "loss": 1.1352381706237793, + "loss_ce": 0.0063319336622953415, + "loss_iou": 0.43359375, + "loss_num": 0.052734375, + "loss_xval": 1.125, + "num_input_tokens_seen": 482831436, + "step": 7218 + }, + { + "epoch": 0.819177304964539, + "grad_norm": 35.23057556152344, + "learning_rate": 5e-05, + "loss": 1.1626, + "num_input_tokens_seen": 482897680, + "step": 7219 + }, + { + "epoch": 0.819177304964539, + "loss": 1.2325878143310547, + "loss_ce": 0.005903325974941254, + "loss_iou": 0.48828125, + "loss_num": 0.0498046875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 482897680, + "step": 7219 + }, + { + "epoch": 0.819290780141844, + "grad_norm": 22.593774795532227, + "learning_rate": 5e-05, + "loss": 0.9165, + "num_input_tokens_seen": 482963584, + "step": 7220 + }, + { + "epoch": 0.819290780141844, + "loss": 0.6945216655731201, + "loss_ce": 0.002871320815756917, + "loss_iou": 0.296875, + "loss_num": 0.02001953125, + "loss_xval": 0.69140625, + "num_input_tokens_seen": 482963584, + "step": 7220 + }, + { + "epoch": 0.819404255319149, + "grad_norm": 58.16763687133789, + "learning_rate": 5e-05, + "loss": 1.033, + "num_input_tokens_seen": 483030392, + "step": 7221 + }, + { + "epoch": 0.819404255319149, + "loss": 1.1212000846862793, + "loss_ce": 0.004500893410295248, + "loss_iou": 0.482421875, + "loss_num": 0.0301513671875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 483030392, + "step": 7221 + }, + { + "epoch": 0.8195177304964539, + "grad_norm": 28.97342300415039, + "learning_rate": 5e-05, + "loss": 1.2865, + "num_input_tokens_seen": 483097052, + "step": 7222 + }, + { + "epoch": 0.8195177304964539, + "loss": 1.3525681495666504, + "loss_ce": 0.004911965224891901, + "loss_iou": 0.52734375, + "loss_num": 0.05859375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 483097052, + "step": 7222 + }, + { + "epoch": 0.8196312056737589, + "grad_norm": 35.339317321777344, + "learning_rate": 5e-05, + "loss": 0.9908, + "num_input_tokens_seen": 483163164, + "step": 7223 + }, + { + "epoch": 0.8196312056737589, + "loss": 0.9348764419555664, + "loss_ce": 0.006409656256437302, + "loss_iou": 0.38671875, + "loss_num": 0.03076171875, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 483163164, + "step": 7223 + }, + { + "epoch": 0.8197446808510638, + "grad_norm": 28.505355834960938, + "learning_rate": 5e-05, + "loss": 1.3165, + "num_input_tokens_seen": 483230364, + "step": 7224 + }, + { + "epoch": 0.8197446808510638, + "loss": 1.333605170249939, + "loss_ce": 0.008409937843680382, + "loss_iou": 0.54296875, + "loss_num": 0.04736328125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 483230364, + "step": 7224 + }, + { + "epoch": 0.8198581560283688, + "grad_norm": 21.700746536254883, + "learning_rate": 5e-05, + "loss": 1.142, + "num_input_tokens_seen": 483297964, + "step": 7225 + }, + { + "epoch": 0.8198581560283688, + "loss": 1.235951542854309, + "loss_ce": 0.005482837557792664, + "loss_iou": 0.46875, + "loss_num": 0.05859375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 483297964, + "step": 7225 + }, + { + "epoch": 0.8199716312056737, + "grad_norm": 21.61971092224121, + "learning_rate": 5e-05, + "loss": 1.3087, + "num_input_tokens_seen": 483365828, + "step": 7226 + }, + { + "epoch": 0.8199716312056737, + "loss": 1.3292152881622314, + "loss_ce": 0.004508178681135178, + "loss_iou": 0.52734375, + "loss_num": 0.0537109375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 483365828, + "step": 7226 + }, + { + "epoch": 0.8200851063829787, + "grad_norm": 34.054927825927734, + "learning_rate": 5e-05, + "loss": 1.052, + "num_input_tokens_seen": 483432152, + "step": 7227 + }, + { + "epoch": 0.8200851063829787, + "loss": 1.0051047801971436, + "loss_ce": 0.005104805808514357, + "loss_iou": 0.396484375, + "loss_num": 0.04150390625, + "loss_xval": 1.0, + "num_input_tokens_seen": 483432152, + "step": 7227 + }, + { + "epoch": 0.8201985815602837, + "grad_norm": 23.95854377746582, + "learning_rate": 5e-05, + "loss": 0.9959, + "num_input_tokens_seen": 483499672, + "step": 7228 + }, + { + "epoch": 0.8201985815602837, + "loss": 0.9517583847045898, + "loss_ce": 0.005225222557783127, + "loss_iou": 0.375, + "loss_num": 0.039306640625, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 483499672, + "step": 7228 + }, + { + "epoch": 0.8203120567375887, + "grad_norm": 30.777368545532227, + "learning_rate": 5e-05, + "loss": 0.9969, + "num_input_tokens_seen": 483566820, + "step": 7229 + }, + { + "epoch": 0.8203120567375887, + "loss": 0.8942520022392273, + "loss_ce": 0.00606841966509819, + "loss_iou": 0.37109375, + "loss_num": 0.0291748046875, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 483566820, + "step": 7229 + }, + { + "epoch": 0.8204255319148936, + "grad_norm": 27.94155502319336, + "learning_rate": 5e-05, + "loss": 1.2604, + "num_input_tokens_seen": 483633344, + "step": 7230 + }, + { + "epoch": 0.8204255319148936, + "loss": 1.1758297681808472, + "loss_ce": 0.006396132986992598, + "loss_iou": 0.51953125, + "loss_num": 0.0255126953125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 483633344, + "step": 7230 + }, + { + "epoch": 0.8205390070921986, + "grad_norm": 34.82173156738281, + "learning_rate": 5e-05, + "loss": 1.3252, + "num_input_tokens_seen": 483701044, + "step": 7231 + }, + { + "epoch": 0.8205390070921986, + "loss": 1.154801368713379, + "loss_ce": 0.011002639308571815, + "loss_iou": 0.4609375, + "loss_num": 0.0439453125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 483701044, + "step": 7231 + }, + { + "epoch": 0.8206524822695035, + "grad_norm": 57.139808654785156, + "learning_rate": 5e-05, + "loss": 1.1954, + "num_input_tokens_seen": 483768896, + "step": 7232 + }, + { + "epoch": 0.8206524822695035, + "loss": 1.307342767715454, + "loss_ce": 0.005096688866615295, + "loss_iou": 0.5390625, + "loss_num": 0.044677734375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 483768896, + "step": 7232 + }, + { + "epoch": 0.8207659574468085, + "grad_norm": 30.5203857421875, + "learning_rate": 5e-05, + "loss": 1.1451, + "num_input_tokens_seen": 483835784, + "step": 7233 + }, + { + "epoch": 0.8207659574468085, + "loss": 1.316257119178772, + "loss_ce": 0.007175055798143148, + "loss_iou": 0.51953125, + "loss_num": 0.053955078125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 483835784, + "step": 7233 + }, + { + "epoch": 0.8208794326241134, + "grad_norm": 34.33727264404297, + "learning_rate": 5e-05, + "loss": 1.2509, + "num_input_tokens_seen": 483902576, + "step": 7234 + }, + { + "epoch": 0.8208794326241134, + "loss": 1.2599551677703857, + "loss_ce": 0.008002030663192272, + "loss_iou": 0.50390625, + "loss_num": 0.04833984375, + "loss_xval": 1.25, + "num_input_tokens_seen": 483902576, + "step": 7234 + }, + { + "epoch": 0.8209929078014184, + "grad_norm": 23.35175895690918, + "learning_rate": 5e-05, + "loss": 1.1474, + "num_input_tokens_seen": 483969020, + "step": 7235 + }, + { + "epoch": 0.8209929078014184, + "loss": 1.1042028665542603, + "loss_ce": 0.0075231571681797504, + "loss_iou": 0.4296875, + "loss_num": 0.047119140625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 483969020, + "step": 7235 + }, + { + "epoch": 0.8211063829787234, + "grad_norm": 23.027475357055664, + "learning_rate": 5e-05, + "loss": 1.0661, + "num_input_tokens_seen": 484035540, + "step": 7236 + }, + { + "epoch": 0.8211063829787234, + "loss": 1.0116593837738037, + "loss_ce": 0.00384687352925539, + "loss_iou": 0.423828125, + "loss_num": 0.032470703125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 484035540, + "step": 7236 + }, + { + "epoch": 0.8212198581560284, + "grad_norm": 22.116376876831055, + "learning_rate": 5e-05, + "loss": 1.296, + "num_input_tokens_seen": 484103636, + "step": 7237 + }, + { + "epoch": 0.8212198581560284, + "loss": 1.3111746311187744, + "loss_ce": 0.0074637336656451225, + "loss_iou": 0.52734375, + "loss_num": 0.050537109375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 484103636, + "step": 7237 + }, + { + "epoch": 0.8213333333333334, + "grad_norm": 28.57073211669922, + "learning_rate": 5e-05, + "loss": 1.0471, + "num_input_tokens_seen": 484169976, + "step": 7238 + }, + { + "epoch": 0.8213333333333334, + "loss": 1.1344228982925415, + "loss_ce": 0.008446354418992996, + "loss_iou": 0.427734375, + "loss_num": 0.05419921875, + "loss_xval": 1.125, + "num_input_tokens_seen": 484169976, + "step": 7238 + }, + { + "epoch": 0.8214468085106383, + "grad_norm": 35.007469177246094, + "learning_rate": 5e-05, + "loss": 1.2553, + "num_input_tokens_seen": 484237212, + "step": 7239 + }, + { + "epoch": 0.8214468085106383, + "loss": 1.2181894779205322, + "loss_ce": 0.006763693876564503, + "loss_iou": 0.490234375, + "loss_num": 0.0458984375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 484237212, + "step": 7239 + }, + { + "epoch": 0.8215602836879433, + "grad_norm": 34.60259246826172, + "learning_rate": 5e-05, + "loss": 1.2294, + "num_input_tokens_seen": 484304344, + "step": 7240 + }, + { + "epoch": 0.8215602836879433, + "loss": 1.2785851955413818, + "loss_ce": 0.004171158652752638, + "loss_iou": 0.50390625, + "loss_num": 0.05322265625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 484304344, + "step": 7240 + }, + { + "epoch": 0.8216737588652482, + "grad_norm": 42.89153289794922, + "learning_rate": 5e-05, + "loss": 1.2374, + "num_input_tokens_seen": 484369828, + "step": 7241 + }, + { + "epoch": 0.8216737588652482, + "loss": 1.4313764572143555, + "loss_ce": 0.006571768783032894, + "loss_iou": 0.5390625, + "loss_num": 0.06884765625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 484369828, + "step": 7241 + }, + { + "epoch": 0.8217872340425532, + "grad_norm": 34.61298370361328, + "learning_rate": 5e-05, + "loss": 1.3411, + "num_input_tokens_seen": 484436856, + "step": 7242 + }, + { + "epoch": 0.8217872340425532, + "loss": 1.3062453269958496, + "loss_ce": 0.00692895520478487, + "loss_iou": 0.578125, + "loss_num": 0.0283203125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 484436856, + "step": 7242 + }, + { + "epoch": 0.8219007092198581, + "grad_norm": 34.179561614990234, + "learning_rate": 5e-05, + "loss": 1.147, + "num_input_tokens_seen": 484503732, + "step": 7243 + }, + { + "epoch": 0.8219007092198581, + "loss": 1.200822114944458, + "loss_ce": 0.008439311757683754, + "loss_iou": 0.486328125, + "loss_num": 0.044189453125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 484503732, + "step": 7243 + }, + { + "epoch": 0.8220141843971631, + "grad_norm": 28.047748565673828, + "learning_rate": 5e-05, + "loss": 1.2072, + "num_input_tokens_seen": 484571492, + "step": 7244 + }, + { + "epoch": 0.8220141843971631, + "loss": 1.032409906387329, + "loss_ce": 0.004089568741619587, + "loss_iou": 0.466796875, + "loss_num": 0.018798828125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 484571492, + "step": 7244 + }, + { + "epoch": 0.8221276595744681, + "grad_norm": 27.180587768554688, + "learning_rate": 5e-05, + "loss": 1.2049, + "num_input_tokens_seen": 484638344, + "step": 7245 + }, + { + "epoch": 0.8221276595744681, + "loss": 1.0585955381393433, + "loss_ce": 0.008790930733084679, + "loss_iou": 0.423828125, + "loss_num": 0.040771484375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 484638344, + "step": 7245 + }, + { + "epoch": 0.8222411347517731, + "grad_norm": 21.595699310302734, + "learning_rate": 5e-05, + "loss": 1.2003, + "num_input_tokens_seen": 484705100, + "step": 7246 + }, + { + "epoch": 0.8222411347517731, + "loss": 1.1141467094421387, + "loss_ce": 0.005748343653976917, + "loss_iou": 0.462890625, + "loss_num": 0.03662109375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 484705100, + "step": 7246 + }, + { + "epoch": 0.822354609929078, + "grad_norm": 32.095577239990234, + "learning_rate": 5e-05, + "loss": 0.944, + "num_input_tokens_seen": 484772016, + "step": 7247 + }, + { + "epoch": 0.822354609929078, + "loss": 1.0106737613677979, + "loss_ce": 0.005546845030039549, + "loss_iou": 0.427734375, + "loss_num": 0.0299072265625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 484772016, + "step": 7247 + }, + { + "epoch": 0.822468085106383, + "grad_norm": 23.485275268554688, + "learning_rate": 5e-05, + "loss": 1.2371, + "num_input_tokens_seen": 484838568, + "step": 7248 + }, + { + "epoch": 0.822468085106383, + "loss": 1.2315810918807983, + "loss_ce": 0.005018550902605057, + "loss_iou": 0.51953125, + "loss_num": 0.037109375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 484838568, + "step": 7248 + }, + { + "epoch": 0.8225815602836879, + "grad_norm": 18.91994857788086, + "learning_rate": 5e-05, + "loss": 0.9592, + "num_input_tokens_seen": 484905128, + "step": 7249 + }, + { + "epoch": 0.8225815602836879, + "loss": 0.8754010200500488, + "loss_ce": 0.01041075773537159, + "loss_iou": 0.357421875, + "loss_num": 0.0299072265625, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 484905128, + "step": 7249 + }, + { + "epoch": 0.8226950354609929, + "grad_norm": 25.016284942626953, + "learning_rate": 5e-05, + "loss": 1.2334, + "num_input_tokens_seen": 484972356, + "step": 7250 + }, + { + "epoch": 0.8226950354609929, + "eval_seeclick_CIoU": 0.42704105377197266, + "eval_seeclick_GIoU": 0.4061352461576462, + "eval_seeclick_IoU": 0.5033048689365387, + "eval_seeclick_MAE_all": 0.13874638825654984, + "eval_seeclick_MAE_h": 0.05878000520169735, + "eval_seeclick_MAE_w": 0.120242889970541, + "eval_seeclick_MAE_x_boxes": 0.21364983916282654, + "eval_seeclick_MAE_y_boxes": 0.1080620214343071, + "eval_seeclick_NUM_probability": 0.9999715089797974, + "eval_seeclick_inside_bbox": 0.6927083432674408, + "eval_seeclick_loss": 2.2767703533172607, + "eval_seeclick_loss_ce": 0.013851989060640335, + "eval_seeclick_loss_iou": 0.77655029296875, + "eval_seeclick_loss_num": 0.14345550537109375, + "eval_seeclick_loss_xval": 2.2713623046875, + "eval_seeclick_runtime": 68.125, + "eval_seeclick_samples_per_second": 0.69, + "eval_seeclick_steps_per_second": 0.029, + "num_input_tokens_seen": 484972356, + "step": 7250 + }, + { + "epoch": 0.8226950354609929, + "eval_icons_CIoU": 0.5508153438568115, + "eval_icons_GIoU": 0.5543530881404877, + "eval_icons_IoU": 0.5834619402885437, + "eval_icons_MAE_all": 0.10619669035077095, + "eval_icons_MAE_h": 0.07493863999843597, + "eval_icons_MAE_w": 0.09278073534369469, + "eval_icons_MAE_x_boxes": 0.09008097648620605, + "eval_icons_MAE_y_boxes": 0.050373341888189316, + "eval_icons_NUM_probability": 0.9999890029430389, + "eval_icons_inside_bbox": 0.8697916567325592, + "eval_icons_loss": 2.23026967048645, + "eval_icons_loss_ce": 0.0002555137616582215, + "eval_icons_loss_iou": 0.84814453125, + "eval_icons_loss_num": 0.0991668701171875, + "eval_icons_loss_xval": 2.1923828125, + "eval_icons_runtime": 67.5045, + "eval_icons_samples_per_second": 0.741, + "eval_icons_steps_per_second": 0.03, + "num_input_tokens_seen": 484972356, + "step": 7250 + }, + { + "epoch": 0.8226950354609929, + "eval_screenspot_CIoU": 0.2505432864030202, + "eval_screenspot_GIoU": 0.20975414166847864, + "eval_screenspot_IoU": 0.3532035748163859, + "eval_screenspot_MAE_all": 0.22242364784081778, + "eval_screenspot_MAE_h": 0.15834014117717743, + "eval_screenspot_MAE_w": 0.188823771973451, + "eval_screenspot_MAE_x_boxes": 0.3002068102359772, + "eval_screenspot_MAE_y_boxes": 0.11879211415847142, + "eval_screenspot_NUM_probability": 0.9999560117721558, + "eval_screenspot_inside_bbox": 0.534583330154419, + "eval_screenspot_loss": 3.137190580368042, + "eval_screenspot_loss_ce": 0.021777144943674404, + "eval_screenspot_loss_iou": 0.9830729166666666, + "eval_screenspot_loss_num": 0.23075358072916666, + "eval_screenspot_loss_xval": 3.119140625, + "eval_screenspot_runtime": 121.7126, + "eval_screenspot_samples_per_second": 0.731, + "eval_screenspot_steps_per_second": 0.025, + "num_input_tokens_seen": 484972356, + "step": 7250 + }, + { + "epoch": 0.8226950354609929, + "eval_compot_CIoU": 0.2677682712674141, + "eval_compot_GIoU": 0.23399586975574493, + "eval_compot_IoU": 0.3576311022043228, + "eval_compot_MAE_all": 0.23013801872730255, + "eval_compot_MAE_h": 0.08362547494471073, + "eval_compot_MAE_w": 0.28156712651252747, + "eval_compot_MAE_x_boxes": 0.21844355016946793, + "eval_compot_MAE_y_boxes": 0.11766999959945679, + "eval_compot_NUM_probability": 0.9999567270278931, + "eval_compot_inside_bbox": 0.5, + "eval_compot_loss": 3.058607816696167, + "eval_compot_loss_ce": 0.008053961209952831, + "eval_compot_loss_iou": 0.96240234375, + "eval_compot_loss_num": 0.221954345703125, + "eval_compot_loss_xval": 3.03466796875, + "eval_compot_runtime": 74.5271, + "eval_compot_samples_per_second": 0.671, + "eval_compot_steps_per_second": 0.027, + "num_input_tokens_seen": 484972356, + "step": 7250 + }, + { + "epoch": 0.8226950354609929, + "loss": 3.0028510093688965, + "loss_ce": 0.007733777165412903, + "loss_iou": 0.95703125, + "loss_num": 0.2158203125, + "loss_xval": 3.0, + "num_input_tokens_seen": 484972356, + "step": 7250 + }, + { + "epoch": 0.8228085106382979, + "grad_norm": 52.10930252075195, + "learning_rate": 5e-05, + "loss": 1.1976, + "num_input_tokens_seen": 485038060, + "step": 7251 + }, + { + "epoch": 0.8228085106382979, + "loss": 1.1693758964538574, + "loss_ce": 0.010684430599212646, + "loss_iou": 0.47265625, + "loss_num": 0.042724609375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 485038060, + "step": 7251 + }, + { + "epoch": 0.8229219858156028, + "grad_norm": 26.43874168395996, + "learning_rate": 5e-05, + "loss": 1.4166, + "num_input_tokens_seen": 485106108, + "step": 7252 + }, + { + "epoch": 0.8229219858156028, + "loss": 1.4972810745239258, + "loss_ce": 0.006070255301892757, + "loss_iou": 0.58984375, + "loss_num": 0.0625, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 485106108, + "step": 7252 + }, + { + "epoch": 0.8230354609929078, + "grad_norm": 23.60007095336914, + "learning_rate": 5e-05, + "loss": 1.0477, + "num_input_tokens_seen": 485174608, + "step": 7253 + }, + { + "epoch": 0.8230354609929078, + "loss": 0.999100387096405, + "loss_ce": 0.006424586288630962, + "loss_iou": 0.4296875, + "loss_num": 0.0264892578125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 485174608, + "step": 7253 + }, + { + "epoch": 0.8231489361702128, + "grad_norm": 30.297664642333984, + "learning_rate": 5e-05, + "loss": 1.1586, + "num_input_tokens_seen": 485241868, + "step": 7254 + }, + { + "epoch": 0.8231489361702128, + "loss": 1.1283925771713257, + "loss_ce": 0.005834032781422138, + "loss_iou": 0.47265625, + "loss_num": 0.035400390625, + "loss_xval": 1.125, + "num_input_tokens_seen": 485241868, + "step": 7254 + }, + { + "epoch": 0.8232624113475178, + "grad_norm": 27.95920181274414, + "learning_rate": 5e-05, + "loss": 1.0748, + "num_input_tokens_seen": 485309200, + "step": 7255 + }, + { + "epoch": 0.8232624113475178, + "loss": 1.0341260433197021, + "loss_ce": 0.007270548492670059, + "loss_iou": 0.419921875, + "loss_num": 0.037353515625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 485309200, + "step": 7255 + }, + { + "epoch": 0.8233758865248227, + "grad_norm": 22.501508712768555, + "learning_rate": 5e-05, + "loss": 1.1908, + "num_input_tokens_seen": 485376284, + "step": 7256 + }, + { + "epoch": 0.8233758865248227, + "loss": 1.2253177165985107, + "loss_ce": 0.0036380626261234283, + "loss_iou": 0.51171875, + "loss_num": 0.03955078125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 485376284, + "step": 7256 + }, + { + "epoch": 0.8234893617021276, + "grad_norm": 21.466503143310547, + "learning_rate": 5e-05, + "loss": 1.0439, + "num_input_tokens_seen": 485443696, + "step": 7257 + }, + { + "epoch": 0.8234893617021276, + "loss": 1.08803391456604, + "loss_ce": 0.007467452436685562, + "loss_iou": 0.404296875, + "loss_num": 0.0537109375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 485443696, + "step": 7257 + }, + { + "epoch": 0.8236028368794326, + "grad_norm": 30.10502815246582, + "learning_rate": 5e-05, + "loss": 1.1134, + "num_input_tokens_seen": 485509976, + "step": 7258 + }, + { + "epoch": 0.8236028368794326, + "loss": 1.2277287244796753, + "loss_ce": 0.007513897493481636, + "loss_iou": 0.486328125, + "loss_num": 0.04931640625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 485509976, + "step": 7258 + }, + { + "epoch": 0.8237163120567376, + "grad_norm": 40.71794509887695, + "learning_rate": 5e-05, + "loss": 1.2, + "num_input_tokens_seen": 485577324, + "step": 7259 + }, + { + "epoch": 0.8237163120567376, + "loss": 1.155468463897705, + "loss_ce": 0.004589651711285114, + "loss_iou": 0.431640625, + "loss_num": 0.057373046875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 485577324, + "step": 7259 + }, + { + "epoch": 0.8238297872340425, + "grad_norm": 40.950252532958984, + "learning_rate": 5e-05, + "loss": 1.3214, + "num_input_tokens_seen": 485642680, + "step": 7260 + }, + { + "epoch": 0.8238297872340425, + "loss": 1.2601399421691895, + "loss_ce": 0.00867504719644785, + "loss_iou": 0.5078125, + "loss_num": 0.04638671875, + "loss_xval": 1.25, + "num_input_tokens_seen": 485642680, + "step": 7260 + }, + { + "epoch": 0.8239432624113475, + "grad_norm": 32.347206115722656, + "learning_rate": 5e-05, + "loss": 1.0048, + "num_input_tokens_seen": 485708596, + "step": 7261 + }, + { + "epoch": 0.8239432624113475, + "loss": 1.0452661514282227, + "loss_ce": 0.004250556230545044, + "loss_iou": 0.455078125, + "loss_num": 0.0260009765625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 485708596, + "step": 7261 + }, + { + "epoch": 0.8240567375886525, + "grad_norm": 27.331260681152344, + "learning_rate": 5e-05, + "loss": 1.1112, + "num_input_tokens_seen": 485775628, + "step": 7262 + }, + { + "epoch": 0.8240567375886525, + "loss": 1.0155205726623535, + "loss_ce": 0.007952222600579262, + "loss_iou": 0.4140625, + "loss_num": 0.0361328125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 485775628, + "step": 7262 + }, + { + "epoch": 0.8241702127659575, + "grad_norm": 28.731643676757812, + "learning_rate": 5e-05, + "loss": 1.1575, + "num_input_tokens_seen": 485842164, + "step": 7263 + }, + { + "epoch": 0.8241702127659575, + "loss": 1.341387152671814, + "loss_ce": 0.0054496219381690025, + "loss_iou": 0.5390625, + "loss_num": 0.051513671875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 485842164, + "step": 7263 + }, + { + "epoch": 0.8242836879432625, + "grad_norm": 23.943805694580078, + "learning_rate": 5e-05, + "loss": 1.2392, + "num_input_tokens_seen": 485909020, + "step": 7264 + }, + { + "epoch": 0.8242836879432625, + "loss": 1.3404911756515503, + "loss_ce": 0.006018509157001972, + "loss_iou": 0.53125, + "loss_num": 0.0546875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 485909020, + "step": 7264 + }, + { + "epoch": 0.8243971631205673, + "grad_norm": 18.914915084838867, + "learning_rate": 5e-05, + "loss": 1.2147, + "num_input_tokens_seen": 485976584, + "step": 7265 + }, + { + "epoch": 0.8243971631205673, + "loss": 1.1573376655578613, + "loss_ce": 0.005482158157974482, + "loss_iou": 0.46484375, + "loss_num": 0.04443359375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 485976584, + "step": 7265 + }, + { + "epoch": 0.8245106382978723, + "grad_norm": 28.629201889038086, + "learning_rate": 5e-05, + "loss": 1.0445, + "num_input_tokens_seen": 486042868, + "step": 7266 + }, + { + "epoch": 0.8245106382978723, + "loss": 1.0978589057922363, + "loss_ce": 0.011707666330039501, + "loss_iou": 0.3828125, + "loss_num": 0.0634765625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 486042868, + "step": 7266 + }, + { + "epoch": 0.8246241134751773, + "grad_norm": 30.62051010131836, + "learning_rate": 5e-05, + "loss": 1.1308, + "num_input_tokens_seen": 486109988, + "step": 7267 + }, + { + "epoch": 0.8246241134751773, + "loss": 0.953014612197876, + "loss_ce": 0.005260740872472525, + "loss_iou": 0.416015625, + "loss_num": 0.0234375, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 486109988, + "step": 7267 + }, + { + "epoch": 0.8247375886524823, + "grad_norm": 29.387887954711914, + "learning_rate": 5e-05, + "loss": 1.4657, + "num_input_tokens_seen": 486175916, + "step": 7268 + }, + { + "epoch": 0.8247375886524823, + "loss": 1.6333770751953125, + "loss_ce": 0.006912285462021828, + "loss_iou": 0.61328125, + "loss_num": 0.080078125, + "loss_xval": 1.625, + "num_input_tokens_seen": 486175916, + "step": 7268 + }, + { + "epoch": 0.8248510638297872, + "grad_norm": 40.19684982299805, + "learning_rate": 5e-05, + "loss": 1.28, + "num_input_tokens_seen": 486242288, + "step": 7269 + }, + { + "epoch": 0.8248510638297872, + "loss": 1.180815577507019, + "loss_ce": 0.011382073163986206, + "loss_iou": 0.4921875, + "loss_num": 0.037353515625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 486242288, + "step": 7269 + }, + { + "epoch": 0.8249645390070922, + "grad_norm": 21.41098976135254, + "learning_rate": 5e-05, + "loss": 1.0367, + "num_input_tokens_seen": 486309424, + "step": 7270 + }, + { + "epoch": 0.8249645390070922, + "loss": 1.0060646533966064, + "loss_ce": 0.00813976302742958, + "loss_iou": 0.392578125, + "loss_num": 0.042236328125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 486309424, + "step": 7270 + }, + { + "epoch": 0.8250780141843972, + "grad_norm": 18.270719528198242, + "learning_rate": 5e-05, + "loss": 1.0434, + "num_input_tokens_seen": 486376508, + "step": 7271 + }, + { + "epoch": 0.8250780141843972, + "loss": 0.8625434041023254, + "loss_ce": 0.008539495058357716, + "loss_iou": 0.369140625, + "loss_num": 0.0228271484375, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 486376508, + "step": 7271 + }, + { + "epoch": 0.8251914893617022, + "grad_norm": 51.216880798339844, + "learning_rate": 5e-05, + "loss": 1.0049, + "num_input_tokens_seen": 486444108, + "step": 7272 + }, + { + "epoch": 0.8251914893617022, + "loss": 0.9877517819404602, + "loss_ce": 0.0067947362549602985, + "loss_iou": 0.412109375, + "loss_num": 0.03173828125, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 486444108, + "step": 7272 + }, + { + "epoch": 0.8253049645390071, + "grad_norm": 23.587034225463867, + "learning_rate": 5e-05, + "loss": 1.2135, + "num_input_tokens_seen": 486511852, + "step": 7273 + }, + { + "epoch": 0.8253049645390071, + "loss": 0.9705937504768372, + "loss_ce": 0.0067265708930790424, + "loss_iou": 0.40625, + "loss_num": 0.030029296875, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 486511852, + "step": 7273 + }, + { + "epoch": 0.825418439716312, + "grad_norm": 34.709014892578125, + "learning_rate": 5e-05, + "loss": 1.0517, + "num_input_tokens_seen": 486578288, + "step": 7274 + }, + { + "epoch": 0.825418439716312, + "loss": 1.1261472702026367, + "loss_ce": 0.013354301452636719, + "loss_iou": 0.490234375, + "loss_num": 0.0262451171875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 486578288, + "step": 7274 + }, + { + "epoch": 0.825531914893617, + "grad_norm": 39.47550964355469, + "learning_rate": 5e-05, + "loss": 1.4595, + "num_input_tokens_seen": 486645580, + "step": 7275 + }, + { + "epoch": 0.825531914893617, + "loss": 1.3816637992858887, + "loss_ce": 0.005687259137630463, + "loss_iou": 0.5703125, + "loss_num": 0.046630859375, + "loss_xval": 1.375, + "num_input_tokens_seen": 486645580, + "step": 7275 + }, + { + "epoch": 0.825645390070922, + "grad_norm": 17.72028923034668, + "learning_rate": 5e-05, + "loss": 1.0689, + "num_input_tokens_seen": 486711840, + "step": 7276 + }, + { + "epoch": 0.825645390070922, + "loss": 1.2133334875106812, + "loss_ce": 0.006546358577907085, + "loss_iou": 0.51953125, + "loss_num": 0.033203125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 486711840, + "step": 7276 + }, + { + "epoch": 0.825758865248227, + "grad_norm": 17.293895721435547, + "learning_rate": 5e-05, + "loss": 1.047, + "num_input_tokens_seen": 486779320, + "step": 7277 + }, + { + "epoch": 0.825758865248227, + "loss": 1.1401371955871582, + "loss_ce": 0.009277883917093277, + "loss_iou": 0.462890625, + "loss_num": 0.041259765625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 486779320, + "step": 7277 + }, + { + "epoch": 0.8258723404255319, + "grad_norm": 26.600299835205078, + "learning_rate": 5e-05, + "loss": 1.1681, + "num_input_tokens_seen": 486847068, + "step": 7278 + }, + { + "epoch": 0.8258723404255319, + "loss": 1.188145637512207, + "loss_ce": 0.007481512147933245, + "loss_iou": 0.46875, + "loss_num": 0.048583984375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 486847068, + "step": 7278 + }, + { + "epoch": 0.8259858156028369, + "grad_norm": 35.53713607788086, + "learning_rate": 5e-05, + "loss": 1.2532, + "num_input_tokens_seen": 486913452, + "step": 7279 + }, + { + "epoch": 0.8259858156028369, + "loss": 1.2498855590820312, + "loss_ce": 0.009651092812418938, + "loss_iou": 0.51953125, + "loss_num": 0.041015625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 486913452, + "step": 7279 + }, + { + "epoch": 0.8260992907801419, + "grad_norm": 31.627870559692383, + "learning_rate": 5e-05, + "loss": 1.2128, + "num_input_tokens_seen": 486979928, + "step": 7280 + }, + { + "epoch": 0.8260992907801419, + "loss": 1.3085870742797852, + "loss_ce": 0.006829190533608198, + "loss_iou": 0.5234375, + "loss_num": 0.05078125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 486979928, + "step": 7280 + }, + { + "epoch": 0.8262127659574469, + "grad_norm": 29.21493911743164, + "learning_rate": 5e-05, + "loss": 1.0726, + "num_input_tokens_seen": 487047180, + "step": 7281 + }, + { + "epoch": 0.8262127659574469, + "loss": 1.1000590324401855, + "loss_ce": 0.009482840076088905, + "loss_iou": 0.4453125, + "loss_num": 0.0400390625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 487047180, + "step": 7281 + }, + { + "epoch": 0.8263262411347517, + "grad_norm": 23.83078956604004, + "learning_rate": 5e-05, + "loss": 1.2114, + "num_input_tokens_seen": 487114236, + "step": 7282 + }, + { + "epoch": 0.8263262411347517, + "loss": 1.2254726886749268, + "loss_ce": 0.008187463507056236, + "loss_iou": 0.515625, + "loss_num": 0.037353515625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 487114236, + "step": 7282 + }, + { + "epoch": 0.8264397163120567, + "grad_norm": 24.856496810913086, + "learning_rate": 5e-05, + "loss": 1.2117, + "num_input_tokens_seen": 487180924, + "step": 7283 + }, + { + "epoch": 0.8264397163120567, + "loss": 1.3248553276062012, + "loss_ce": 0.007472458761185408, + "loss_iou": 0.54296875, + "loss_num": 0.0458984375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 487180924, + "step": 7283 + }, + { + "epoch": 0.8265531914893617, + "grad_norm": 30.600797653198242, + "learning_rate": 5e-05, + "loss": 1.2206, + "num_input_tokens_seen": 487248640, + "step": 7284 + }, + { + "epoch": 0.8265531914893617, + "loss": 1.4602570533752441, + "loss_ce": 0.003714103950187564, + "loss_iou": 0.55859375, + "loss_num": 0.068359375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 487248640, + "step": 7284 + }, + { + "epoch": 0.8266666666666667, + "grad_norm": 30.164669036865234, + "learning_rate": 5e-05, + "loss": 1.2198, + "num_input_tokens_seen": 487316404, + "step": 7285 + }, + { + "epoch": 0.8266666666666667, + "loss": 1.164042592048645, + "loss_ce": 0.00730423629283905, + "loss_iou": 0.490234375, + "loss_num": 0.03515625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 487316404, + "step": 7285 + }, + { + "epoch": 0.8267801418439716, + "grad_norm": 33.17967987060547, + "learning_rate": 5e-05, + "loss": 1.2305, + "num_input_tokens_seen": 487383560, + "step": 7286 + }, + { + "epoch": 0.8267801418439716, + "loss": 1.2172935009002686, + "loss_ce": 0.012215398252010345, + "loss_iou": 0.494140625, + "loss_num": 0.043212890625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 487383560, + "step": 7286 + }, + { + "epoch": 0.8268936170212766, + "grad_norm": 20.92982292175293, + "learning_rate": 5e-05, + "loss": 1.3092, + "num_input_tokens_seen": 487450000, + "step": 7287 + }, + { + "epoch": 0.8268936170212766, + "loss": 1.220284342765808, + "loss_ce": 0.008370308205485344, + "loss_iou": 0.52734375, + "loss_num": 0.031494140625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 487450000, + "step": 7287 + }, + { + "epoch": 0.8270070921985816, + "grad_norm": 19.950698852539062, + "learning_rate": 5e-05, + "loss": 1.2624, + "num_input_tokens_seen": 487516768, + "step": 7288 + }, + { + "epoch": 0.8270070921985816, + "loss": 1.2312257289886475, + "loss_ce": 0.006372196599841118, + "loss_iou": 0.4609375, + "loss_num": 0.060302734375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 487516768, + "step": 7288 + }, + { + "epoch": 0.8271205673758866, + "grad_norm": 22.20182228088379, + "learning_rate": 5e-05, + "loss": 1.3198, + "num_input_tokens_seen": 487584044, + "step": 7289 + }, + { + "epoch": 0.8271205673758866, + "loss": 1.3454116582870483, + "loss_ce": 0.009230008348822594, + "loss_iou": 0.50390625, + "loss_num": 0.06640625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 487584044, + "step": 7289 + }, + { + "epoch": 0.8272340425531914, + "grad_norm": 27.179569244384766, + "learning_rate": 5e-05, + "loss": 1.213, + "num_input_tokens_seen": 487650760, + "step": 7290 + }, + { + "epoch": 0.8272340425531914, + "loss": 1.1267638206481934, + "loss_ce": 0.0077452161349356174, + "loss_iou": 0.453125, + "loss_num": 0.04248046875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 487650760, + "step": 7290 + }, + { + "epoch": 0.8273475177304964, + "grad_norm": 34.069644927978516, + "learning_rate": 5e-05, + "loss": 1.3937, + "num_input_tokens_seen": 487719200, + "step": 7291 + }, + { + "epoch": 0.8273475177304964, + "loss": 1.168794870376587, + "loss_ce": 0.006685431580990553, + "loss_iou": 0.490234375, + "loss_num": 0.03662109375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 487719200, + "step": 7291 + }, + { + "epoch": 0.8274609929078014, + "grad_norm": 22.5147762298584, + "learning_rate": 5e-05, + "loss": 1.2273, + "num_input_tokens_seen": 487787036, + "step": 7292 + }, + { + "epoch": 0.8274609929078014, + "loss": 1.2138001918792725, + "loss_ce": 0.009210334159433842, + "loss_iou": 0.48828125, + "loss_num": 0.04541015625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 487787036, + "step": 7292 + }, + { + "epoch": 0.8275744680851064, + "grad_norm": 14.13411808013916, + "learning_rate": 5e-05, + "loss": 1.1209, + "num_input_tokens_seen": 487853212, + "step": 7293 + }, + { + "epoch": 0.8275744680851064, + "loss": 0.925784707069397, + "loss_ce": 0.006366382353007793, + "loss_iou": 0.365234375, + "loss_num": 0.037841796875, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 487853212, + "step": 7293 + }, + { + "epoch": 0.8276879432624114, + "grad_norm": 16.426929473876953, + "learning_rate": 5e-05, + "loss": 1.2776, + "num_input_tokens_seen": 487919744, + "step": 7294 + }, + { + "epoch": 0.8276879432624114, + "loss": 1.2584328651428223, + "loss_ce": 0.01233910396695137, + "loss_iou": 0.44921875, + "loss_num": 0.0693359375, + "loss_xval": 1.25, + "num_input_tokens_seen": 487919744, + "step": 7294 + }, + { + "epoch": 0.8278014184397163, + "grad_norm": 23.429311752319336, + "learning_rate": 5e-05, + "loss": 1.1351, + "num_input_tokens_seen": 487985656, + "step": 7295 + }, + { + "epoch": 0.8278014184397163, + "loss": 1.1484960317611694, + "loss_ce": 0.005673808045685291, + "loss_iou": 0.50390625, + "loss_num": 0.027099609375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 487985656, + "step": 7295 + }, + { + "epoch": 0.8279148936170213, + "grad_norm": 20.773771286010742, + "learning_rate": 5e-05, + "loss": 1.1869, + "num_input_tokens_seen": 488052264, + "step": 7296 + }, + { + "epoch": 0.8279148936170213, + "loss": 1.3761779069900513, + "loss_ce": 0.006549004465341568, + "loss_iou": 0.55078125, + "loss_num": 0.052978515625, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 488052264, + "step": 7296 + }, + { + "epoch": 0.8280283687943263, + "grad_norm": 15.000157356262207, + "learning_rate": 5e-05, + "loss": 0.9741, + "num_input_tokens_seen": 488119596, + "step": 7297 + }, + { + "epoch": 0.8280283687943263, + "loss": 0.8637893795967102, + "loss_ce": 0.005879220552742481, + "loss_iou": 0.3828125, + "loss_num": 0.018310546875, + "loss_xval": 0.859375, + "num_input_tokens_seen": 488119596, + "step": 7297 + }, + { + "epoch": 0.8281418439716312, + "grad_norm": 14.995662689208984, + "learning_rate": 5e-05, + "loss": 1.2108, + "num_input_tokens_seen": 488185476, + "step": 7298 + }, + { + "epoch": 0.8281418439716312, + "loss": 1.3060450553894043, + "loss_ce": 0.005752069875597954, + "loss_iou": 0.455078125, + "loss_num": 0.078125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 488185476, + "step": 7298 + }, + { + "epoch": 0.8282553191489361, + "grad_norm": 29.93861961364746, + "learning_rate": 5e-05, + "loss": 1.3581, + "num_input_tokens_seen": 488252392, + "step": 7299 + }, + { + "epoch": 0.8282553191489361, + "loss": 1.4007612466812134, + "loss_ce": 0.006230007857084274, + "loss_iou": 0.55859375, + "loss_num": 0.054931640625, + "loss_xval": 1.390625, + "num_input_tokens_seen": 488252392, + "step": 7299 + }, + { + "epoch": 0.8283687943262411, + "grad_norm": 39.897090911865234, + "learning_rate": 5e-05, + "loss": 1.3243, + "num_input_tokens_seen": 488319472, + "step": 7300 + }, + { + "epoch": 0.8283687943262411, + "loss": 1.1828012466430664, + "loss_ce": 0.0050669340416789055, + "loss_iou": 0.50390625, + "loss_num": 0.034423828125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 488319472, + "step": 7300 + }, + { + "epoch": 0.8284822695035461, + "grad_norm": 34.392295837402344, + "learning_rate": 5e-05, + "loss": 1.3009, + "num_input_tokens_seen": 488386192, + "step": 7301 + }, + { + "epoch": 0.8284822695035461, + "loss": 1.2883028984069824, + "loss_ce": 0.006564639508724213, + "loss_iou": 0.52734375, + "loss_num": 0.046142578125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 488386192, + "step": 7301 + }, + { + "epoch": 0.8285957446808511, + "grad_norm": 24.289302825927734, + "learning_rate": 5e-05, + "loss": 0.9261, + "num_input_tokens_seen": 488451692, + "step": 7302 + }, + { + "epoch": 0.8285957446808511, + "loss": 0.7392101287841797, + "loss_ce": 0.006299976259469986, + "loss_iou": 0.2734375, + "loss_num": 0.036865234375, + "loss_xval": 0.734375, + "num_input_tokens_seen": 488451692, + "step": 7302 + }, + { + "epoch": 0.828709219858156, + "grad_norm": 32.09613037109375, + "learning_rate": 5e-05, + "loss": 1.0475, + "num_input_tokens_seen": 488518052, + "step": 7303 + }, + { + "epoch": 0.828709219858156, + "loss": 0.827151894569397, + "loss_ce": 0.008792506530880928, + "loss_iou": 0.330078125, + "loss_num": 0.0311279296875, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 488518052, + "step": 7303 + }, + { + "epoch": 0.828822695035461, + "grad_norm": 45.78704833984375, + "learning_rate": 5e-05, + "loss": 1.25, + "num_input_tokens_seen": 488586196, + "step": 7304 + }, + { + "epoch": 0.828822695035461, + "loss": 1.2712130546569824, + "loss_ce": 0.003634881693869829, + "loss_iou": 0.53125, + "loss_num": 0.041259765625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 488586196, + "step": 7304 + }, + { + "epoch": 0.828936170212766, + "grad_norm": 30.933502197265625, + "learning_rate": 5e-05, + "loss": 1.2383, + "num_input_tokens_seen": 488654052, + "step": 7305 + }, + { + "epoch": 0.828936170212766, + "loss": 1.348313808441162, + "loss_ce": 0.006028707139194012, + "loss_iou": 0.5546875, + "loss_num": 0.046875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 488654052, + "step": 7305 + }, + { + "epoch": 0.829049645390071, + "grad_norm": 15.691786766052246, + "learning_rate": 5e-05, + "loss": 1.1838, + "num_input_tokens_seen": 488720824, + "step": 7306 + }, + { + "epoch": 0.829049645390071, + "loss": 1.1741828918457031, + "loss_ce": 0.009632157161831856, + "loss_iou": 0.50390625, + "loss_num": 0.031494140625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 488720824, + "step": 7306 + }, + { + "epoch": 0.8291631205673758, + "grad_norm": 30.4237117767334, + "learning_rate": 5e-05, + "loss": 1.0383, + "num_input_tokens_seen": 488786512, + "step": 7307 + }, + { + "epoch": 0.8291631205673758, + "loss": 0.9897092580795288, + "loss_ce": 0.009484655223786831, + "loss_iou": 0.373046875, + "loss_num": 0.046875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 488786512, + "step": 7307 + }, + { + "epoch": 0.8292765957446808, + "grad_norm": 25.20960807800293, + "learning_rate": 5e-05, + "loss": 1.046, + "num_input_tokens_seen": 488853588, + "step": 7308 + }, + { + "epoch": 0.8292765957446808, + "loss": 0.9299286603927612, + "loss_ce": 0.009030197747051716, + "loss_iou": 0.396484375, + "loss_num": 0.0257568359375, + "loss_xval": 0.921875, + "num_input_tokens_seen": 488853588, + "step": 7308 + }, + { + "epoch": 0.8293900709219858, + "grad_norm": 26.602724075317383, + "learning_rate": 5e-05, + "loss": 0.9507, + "num_input_tokens_seen": 488918976, + "step": 7309 + }, + { + "epoch": 0.8293900709219858, + "loss": 0.9007576107978821, + "loss_ce": 0.005738099571317434, + "loss_iou": 0.384765625, + "loss_num": 0.0247802734375, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 488918976, + "step": 7309 + }, + { + "epoch": 0.8295035460992908, + "grad_norm": 24.348615646362305, + "learning_rate": 5e-05, + "loss": 1.227, + "num_input_tokens_seen": 488985560, + "step": 7310 + }, + { + "epoch": 0.8295035460992908, + "loss": 1.3362981081008911, + "loss_ce": 0.008173136040568352, + "loss_iou": 0.546875, + "loss_num": 0.047119140625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 488985560, + "step": 7310 + }, + { + "epoch": 0.8296170212765958, + "grad_norm": 16.002574920654297, + "learning_rate": 5e-05, + "loss": 1.142, + "num_input_tokens_seen": 489053060, + "step": 7311 + }, + { + "epoch": 0.8296170212765958, + "loss": 0.8931933641433716, + "loss_ce": 0.005986334756016731, + "loss_iou": 0.404296875, + "loss_num": 0.015625, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 489053060, + "step": 7311 + }, + { + "epoch": 0.8297304964539007, + "grad_norm": 17.211074829101562, + "learning_rate": 5e-05, + "loss": 0.9422, + "num_input_tokens_seen": 489120008, + "step": 7312 + }, + { + "epoch": 0.8297304964539007, + "loss": 0.9793946743011475, + "loss_ce": 0.003320419229567051, + "loss_iou": 0.37890625, + "loss_num": 0.043701171875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 489120008, + "step": 7312 + }, + { + "epoch": 0.8298439716312057, + "grad_norm": 23.843460083007812, + "learning_rate": 5e-05, + "loss": 1.2922, + "num_input_tokens_seen": 489186576, + "step": 7313 + }, + { + "epoch": 0.8298439716312057, + "loss": 1.2176799774169922, + "loss_ce": 0.006742446217685938, + "loss_iou": 0.5, + "loss_num": 0.04150390625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 489186576, + "step": 7313 + }, + { + "epoch": 0.8299574468085107, + "grad_norm": 16.617713928222656, + "learning_rate": 5e-05, + "loss": 1.1946, + "num_input_tokens_seen": 489253400, + "step": 7314 + }, + { + "epoch": 0.8299574468085107, + "loss": 1.2672016620635986, + "loss_ce": 0.00401808088645339, + "loss_iou": 0.5, + "loss_num": 0.05224609375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 489253400, + "step": 7314 + }, + { + "epoch": 0.8300709219858156, + "grad_norm": 23.642303466796875, + "learning_rate": 5e-05, + "loss": 1.1136, + "num_input_tokens_seen": 489321012, + "step": 7315 + }, + { + "epoch": 0.8300709219858156, + "loss": 1.1026711463928223, + "loss_ce": 0.009897714480757713, + "loss_iou": 0.498046875, + "loss_num": 0.0196533203125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 489321012, + "step": 7315 + }, + { + "epoch": 0.8301843971631205, + "grad_norm": 52.60511779785156, + "learning_rate": 5e-05, + "loss": 1.4519, + "num_input_tokens_seen": 489386468, + "step": 7316 + }, + { + "epoch": 0.8301843971631205, + "loss": 1.307989478111267, + "loss_ce": 0.0067200083285570145, + "loss_iou": 0.5078125, + "loss_num": 0.057373046875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 489386468, + "step": 7316 + }, + { + "epoch": 0.8302978723404255, + "grad_norm": 32.60823059082031, + "learning_rate": 5e-05, + "loss": 1.2385, + "num_input_tokens_seen": 489454444, + "step": 7317 + }, + { + "epoch": 0.8302978723404255, + "loss": 1.2146191596984863, + "loss_ce": 0.006611408665776253, + "loss_iou": 0.5234375, + "loss_num": 0.032958984375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 489454444, + "step": 7317 + }, + { + "epoch": 0.8304113475177305, + "grad_norm": 29.615833282470703, + "learning_rate": 5e-05, + "loss": 1.2493, + "num_input_tokens_seen": 489521136, + "step": 7318 + }, + { + "epoch": 0.8304113475177305, + "loss": 1.3410794734954834, + "loss_ce": 0.005141946952790022, + "loss_iou": 0.5390625, + "loss_num": 0.05078125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 489521136, + "step": 7318 + }, + { + "epoch": 0.8305248226950355, + "grad_norm": 22.364845275878906, + "learning_rate": 5e-05, + "loss": 1.1989, + "num_input_tokens_seen": 489588108, + "step": 7319 + }, + { + "epoch": 0.8305248226950355, + "loss": 1.1521981954574585, + "loss_ce": 0.009131829254329205, + "loss_iou": 0.4296875, + "loss_num": 0.056640625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 489588108, + "step": 7319 + }, + { + "epoch": 0.8306382978723404, + "grad_norm": 22.696929931640625, + "learning_rate": 5e-05, + "loss": 1.0912, + "num_input_tokens_seen": 489654200, + "step": 7320 + }, + { + "epoch": 0.8306382978723404, + "loss": 1.1863741874694824, + "loss_ce": 0.003757043043151498, + "loss_iou": 0.4375, + "loss_num": 0.061767578125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 489654200, + "step": 7320 + }, + { + "epoch": 0.8307517730496454, + "grad_norm": 25.03767967224121, + "learning_rate": 5e-05, + "loss": 1.0587, + "num_input_tokens_seen": 489721588, + "step": 7321 + }, + { + "epoch": 0.8307517730496454, + "loss": 1.1985557079315186, + "loss_ce": 0.006172873545438051, + "loss_iou": 0.5, + "loss_num": 0.038330078125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 489721588, + "step": 7321 + }, + { + "epoch": 0.8308652482269504, + "grad_norm": 29.286033630371094, + "learning_rate": 5e-05, + "loss": 1.3435, + "num_input_tokens_seen": 489788288, + "step": 7322 + }, + { + "epoch": 0.8308652482269504, + "loss": 1.2809278964996338, + "loss_ce": 0.00895520020276308, + "loss_iou": 0.5234375, + "loss_num": 0.04541015625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 489788288, + "step": 7322 + }, + { + "epoch": 0.8309787234042553, + "grad_norm": 26.39630889892578, + "learning_rate": 5e-05, + "loss": 1.3746, + "num_input_tokens_seen": 489854728, + "step": 7323 + }, + { + "epoch": 0.8309787234042553, + "loss": 1.3858864307403564, + "loss_ce": 0.008933411911129951, + "loss_iou": 0.56640625, + "loss_num": 0.04931640625, + "loss_xval": 1.375, + "num_input_tokens_seen": 489854728, + "step": 7323 + }, + { + "epoch": 0.8310921985815602, + "grad_norm": 35.64422607421875, + "learning_rate": 5e-05, + "loss": 1.03, + "num_input_tokens_seen": 489921788, + "step": 7324 + }, + { + "epoch": 0.8310921985815602, + "loss": 1.1600244045257568, + "loss_ce": 0.006215779110789299, + "loss_iou": 0.50390625, + "loss_num": 0.030029296875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 489921788, + "step": 7324 + }, + { + "epoch": 0.8312056737588652, + "grad_norm": 44.329185485839844, + "learning_rate": 5e-05, + "loss": 1.22, + "num_input_tokens_seen": 489988452, + "step": 7325 + }, + { + "epoch": 0.8312056737588652, + "loss": 1.3225549459457397, + "loss_ce": 0.005660467315465212, + "loss_iou": 0.5234375, + "loss_num": 0.0537109375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 489988452, + "step": 7325 + }, + { + "epoch": 0.8313191489361702, + "grad_norm": 37.371978759765625, + "learning_rate": 5e-05, + "loss": 1.0154, + "num_input_tokens_seen": 490055516, + "step": 7326 + }, + { + "epoch": 0.8313191489361702, + "loss": 1.0690441131591797, + "loss_ce": 0.006300122942775488, + "loss_iou": 0.44921875, + "loss_num": 0.032958984375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 490055516, + "step": 7326 + }, + { + "epoch": 0.8314326241134752, + "grad_norm": 44.27159881591797, + "learning_rate": 5e-05, + "loss": 1.3825, + "num_input_tokens_seen": 490121980, + "step": 7327 + }, + { + "epoch": 0.8314326241134752, + "loss": 1.4121553897857666, + "loss_ce": 0.007858584634959698, + "loss_iou": 0.5859375, + "loss_num": 0.04736328125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 490121980, + "step": 7327 + }, + { + "epoch": 0.8315460992907802, + "grad_norm": 27.387924194335938, + "learning_rate": 5e-05, + "loss": 0.9507, + "num_input_tokens_seen": 490189496, + "step": 7328 + }, + { + "epoch": 0.8315460992907802, + "loss": 1.150304913520813, + "loss_ce": 0.01016818918287754, + "loss_iou": 0.45703125, + "loss_num": 0.044921875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 490189496, + "step": 7328 + }, + { + "epoch": 0.8316595744680851, + "grad_norm": 23.460525512695312, + "learning_rate": 5e-05, + "loss": 1.3295, + "num_input_tokens_seen": 490256972, + "step": 7329 + }, + { + "epoch": 0.8316595744680851, + "loss": 1.302455186843872, + "loss_ce": 0.006068467628210783, + "loss_iou": 0.51953125, + "loss_num": 0.051513671875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 490256972, + "step": 7329 + }, + { + "epoch": 0.8317730496453901, + "grad_norm": 34.84410858154297, + "learning_rate": 5e-05, + "loss": 1.1554, + "num_input_tokens_seen": 490323664, + "step": 7330 + }, + { + "epoch": 0.8317730496453901, + "loss": 0.7748442888259888, + "loss_ce": 0.005801265127956867, + "loss_iou": 0.30859375, + "loss_num": 0.0306396484375, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 490323664, + "step": 7330 + }, + { + "epoch": 0.831886524822695, + "grad_norm": 20.83316421508789, + "learning_rate": 5e-05, + "loss": 1.1408, + "num_input_tokens_seen": 490390644, + "step": 7331 + }, + { + "epoch": 0.831886524822695, + "loss": 1.167568325996399, + "loss_ce": 0.003994070924818516, + "loss_iou": 0.47265625, + "loss_num": 0.043701171875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 490390644, + "step": 7331 + }, + { + "epoch": 0.832, + "grad_norm": 40.973846435546875, + "learning_rate": 5e-05, + "loss": 1.2028, + "num_input_tokens_seen": 490457736, + "step": 7332 + }, + { + "epoch": 0.832, + "loss": 1.0910046100616455, + "loss_ce": 0.008973361924290657, + "loss_iou": 0.427734375, + "loss_num": 0.044921875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 490457736, + "step": 7332 + }, + { + "epoch": 0.8321134751773049, + "grad_norm": 33.964664459228516, + "learning_rate": 5e-05, + "loss": 1.06, + "num_input_tokens_seen": 490524844, + "step": 7333 + }, + { + "epoch": 0.8321134751773049, + "loss": 1.0649287700653076, + "loss_ce": 0.010241281241178513, + "loss_iou": 0.484375, + "loss_num": 0.017578125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 490524844, + "step": 7333 + }, + { + "epoch": 0.8322269503546099, + "grad_norm": 26.29037857055664, + "learning_rate": 5e-05, + "loss": 1.2194, + "num_input_tokens_seen": 490592232, + "step": 7334 + }, + { + "epoch": 0.8322269503546099, + "loss": 1.2878445386886597, + "loss_ce": 0.0036648111417889595, + "loss_iou": 0.5078125, + "loss_num": 0.05419921875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 490592232, + "step": 7334 + }, + { + "epoch": 0.8323404255319149, + "grad_norm": 29.867015838623047, + "learning_rate": 5e-05, + "loss": 1.2689, + "num_input_tokens_seen": 490659644, + "step": 7335 + }, + { + "epoch": 0.8323404255319149, + "loss": 1.183178424835205, + "loss_ce": 0.0059323422610759735, + "loss_iou": 0.48828125, + "loss_num": 0.040283203125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 490659644, + "step": 7335 + }, + { + "epoch": 0.8324539007092199, + "grad_norm": 30.34751319885254, + "learning_rate": 5e-05, + "loss": 1.1614, + "num_input_tokens_seen": 490726904, + "step": 7336 + }, + { + "epoch": 0.8324539007092199, + "loss": 1.2550641298294067, + "loss_ce": 0.005064140539616346, + "loss_iou": 0.54296875, + "loss_num": 0.032470703125, + "loss_xval": 1.25, + "num_input_tokens_seen": 490726904, + "step": 7336 + }, + { + "epoch": 0.8325673758865249, + "grad_norm": 36.531578063964844, + "learning_rate": 5e-05, + "loss": 1.1739, + "num_input_tokens_seen": 490793516, + "step": 7337 + }, + { + "epoch": 0.8325673758865249, + "loss": 1.2544711828231812, + "loss_ce": 0.00935402512550354, + "loss_iou": 0.5390625, + "loss_num": 0.032958984375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 490793516, + "step": 7337 + }, + { + "epoch": 0.8326808510638298, + "grad_norm": 46.086666107177734, + "learning_rate": 5e-05, + "loss": 1.109, + "num_input_tokens_seen": 490859484, + "step": 7338 + }, + { + "epoch": 0.8326808510638298, + "loss": 1.1287779808044434, + "loss_ce": 0.0067076534032821655, + "loss_iou": 0.44140625, + "loss_num": 0.047607421875, + "loss_xval": 1.125, + "num_input_tokens_seen": 490859484, + "step": 7338 + }, + { + "epoch": 0.8327943262411347, + "grad_norm": 30.433897018432617, + "learning_rate": 5e-05, + "loss": 1.309, + "num_input_tokens_seen": 490926700, + "step": 7339 + }, + { + "epoch": 0.8327943262411347, + "loss": 1.3847559690475464, + "loss_ce": 0.005544478539377451, + "loss_iou": 0.53125, + "loss_num": 0.06298828125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 490926700, + "step": 7339 + }, + { + "epoch": 0.8329078014184397, + "grad_norm": 17.08194351196289, + "learning_rate": 5e-05, + "loss": 1.0863, + "num_input_tokens_seen": 490992812, + "step": 7340 + }, + { + "epoch": 0.8329078014184397, + "loss": 1.1594129800796509, + "loss_ce": 0.008045786991715431, + "loss_iou": 0.46875, + "loss_num": 0.042724609375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 490992812, + "step": 7340 + }, + { + "epoch": 0.8330212765957447, + "grad_norm": 16.86615753173828, + "learning_rate": 5e-05, + "loss": 1.0697, + "num_input_tokens_seen": 491060276, + "step": 7341 + }, + { + "epoch": 0.8330212765957447, + "loss": 1.0348069667816162, + "loss_ce": 0.006974835880100727, + "loss_iou": 0.412109375, + "loss_num": 0.040771484375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 491060276, + "step": 7341 + }, + { + "epoch": 0.8331347517730496, + "grad_norm": 24.12596321105957, + "learning_rate": 5e-05, + "loss": 1.1245, + "num_input_tokens_seen": 491126748, + "step": 7342 + }, + { + "epoch": 0.8331347517730496, + "loss": 1.1768567562103271, + "loss_ce": 0.004981753416359425, + "loss_iou": 0.47265625, + "loss_num": 0.045654296875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 491126748, + "step": 7342 + }, + { + "epoch": 0.8332482269503546, + "grad_norm": 38.964202880859375, + "learning_rate": 5e-05, + "loss": 1.1841, + "num_input_tokens_seen": 491193904, + "step": 7343 + }, + { + "epoch": 0.8332482269503546, + "loss": 1.0121426582336426, + "loss_ce": 0.0035977703519165516, + "loss_iou": 0.41796875, + "loss_num": 0.03466796875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 491193904, + "step": 7343 + }, + { + "epoch": 0.8333617021276596, + "grad_norm": 34.092201232910156, + "learning_rate": 5e-05, + "loss": 1.2178, + "num_input_tokens_seen": 491261080, + "step": 7344 + }, + { + "epoch": 0.8333617021276596, + "loss": 1.2854478359222412, + "loss_ce": 0.009080559946596622, + "loss_iou": 0.54296875, + "loss_num": 0.037353515625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 491261080, + "step": 7344 + }, + { + "epoch": 0.8334751773049646, + "grad_norm": 35.56142807006836, + "learning_rate": 5e-05, + "loss": 1.1452, + "num_input_tokens_seen": 491327724, + "step": 7345 + }, + { + "epoch": 0.8334751773049646, + "loss": 1.0947459936141968, + "loss_ce": 0.00978502631187439, + "loss_iou": 0.4296875, + "loss_num": 0.044677734375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 491327724, + "step": 7345 + }, + { + "epoch": 0.8335886524822695, + "grad_norm": 44.68758010864258, + "learning_rate": 5e-05, + "loss": 1.1641, + "num_input_tokens_seen": 491395172, + "step": 7346 + }, + { + "epoch": 0.8335886524822695, + "loss": 1.2702319622039795, + "loss_ce": 0.01095459796488285, + "loss_iou": 0.5234375, + "loss_num": 0.0419921875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 491395172, + "step": 7346 + }, + { + "epoch": 0.8337021276595745, + "grad_norm": 25.848596572875977, + "learning_rate": 5e-05, + "loss": 1.1916, + "num_input_tokens_seen": 491462552, + "step": 7347 + }, + { + "epoch": 0.8337021276595745, + "loss": 1.0463353395462036, + "loss_ce": 0.007761065382510424, + "loss_iou": 0.44140625, + "loss_num": 0.031494140625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 491462552, + "step": 7347 + }, + { + "epoch": 0.8338156028368794, + "grad_norm": 99.75275421142578, + "learning_rate": 5e-05, + "loss": 1.2712, + "num_input_tokens_seen": 491529640, + "step": 7348 + }, + { + "epoch": 0.8338156028368794, + "loss": 1.3248400688171387, + "loss_ce": 0.015269815921783447, + "loss_iou": 0.5, + "loss_num": 0.061767578125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 491529640, + "step": 7348 + }, + { + "epoch": 0.8339290780141844, + "grad_norm": 46.169097900390625, + "learning_rate": 5e-05, + "loss": 1.0585, + "num_input_tokens_seen": 491595756, + "step": 7349 + }, + { + "epoch": 0.8339290780141844, + "loss": 0.9263046979904175, + "loss_ce": 0.005894545465707779, + "loss_iou": 0.396484375, + "loss_num": 0.025146484375, + "loss_xval": 0.921875, + "num_input_tokens_seen": 491595756, + "step": 7349 + }, + { + "epoch": 0.8340425531914893, + "grad_norm": 42.93695831298828, + "learning_rate": 5e-05, + "loss": 1.236, + "num_input_tokens_seen": 491663116, + "step": 7350 + }, + { + "epoch": 0.8340425531914893, + "loss": 1.5291314125061035, + "loss_ce": 0.006670374423265457, + "loss_iou": 0.609375, + "loss_num": 0.060546875, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 491663116, + "step": 7350 + }, + { + "epoch": 0.8341560283687943, + "grad_norm": 30.72461700439453, + "learning_rate": 5e-05, + "loss": 1.2846, + "num_input_tokens_seen": 491729860, + "step": 7351 + }, + { + "epoch": 0.8341560283687943, + "loss": 1.3349008560180664, + "loss_ce": 0.005311092361807823, + "loss_iou": 0.5390625, + "loss_num": 0.049560546875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 491729860, + "step": 7351 + }, + { + "epoch": 0.8342695035460993, + "grad_norm": 32.386962890625, + "learning_rate": 5e-05, + "loss": 0.9719, + "num_input_tokens_seen": 491797264, + "step": 7352 + }, + { + "epoch": 0.8342695035460993, + "loss": 1.023186445236206, + "loss_ce": 0.010185937397181988, + "loss_iou": 0.41015625, + "loss_num": 0.0380859375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 491797264, + "step": 7352 + }, + { + "epoch": 0.8343829787234043, + "grad_norm": 16.30339813232422, + "learning_rate": 5e-05, + "loss": 0.9768, + "num_input_tokens_seen": 491863840, + "step": 7353 + }, + { + "epoch": 0.8343829787234043, + "loss": 0.980546236038208, + "loss_ce": 0.004716138355433941, + "loss_iou": 0.36328125, + "loss_num": 0.05029296875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 491863840, + "step": 7353 + }, + { + "epoch": 0.8344964539007093, + "grad_norm": 21.64118003845215, + "learning_rate": 5e-05, + "loss": 0.9763, + "num_input_tokens_seen": 491930232, + "step": 7354 + }, + { + "epoch": 0.8344964539007093, + "loss": 1.1364915370941162, + "loss_ce": 0.008561826311051846, + "loss_iou": 0.4375, + "loss_num": 0.05029296875, + "loss_xval": 1.125, + "num_input_tokens_seen": 491930232, + "step": 7354 + }, + { + "epoch": 0.8346099290780142, + "grad_norm": 17.84300994873047, + "learning_rate": 5e-05, + "loss": 1.0028, + "num_input_tokens_seen": 491996844, + "step": 7355 + }, + { + "epoch": 0.8346099290780142, + "loss": 0.8661183714866638, + "loss_ce": 0.004301966167986393, + "loss_iou": 0.353515625, + "loss_num": 0.030517578125, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 491996844, + "step": 7355 + }, + { + "epoch": 0.8347234042553191, + "grad_norm": 26.726743698120117, + "learning_rate": 5e-05, + "loss": 1.1933, + "num_input_tokens_seen": 492063008, + "step": 7356 + }, + { + "epoch": 0.8347234042553191, + "loss": 1.1269772052764893, + "loss_ce": 0.007836627773940563, + "loss_iou": 0.46484375, + "loss_num": 0.037353515625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 492063008, + "step": 7356 + }, + { + "epoch": 0.8348368794326241, + "grad_norm": 39.932254791259766, + "learning_rate": 5e-05, + "loss": 1.2021, + "num_input_tokens_seen": 492130188, + "step": 7357 + }, + { + "epoch": 0.8348368794326241, + "loss": 1.236647129058838, + "loss_ce": 0.007643206510692835, + "loss_iou": 0.490234375, + "loss_num": 0.04931640625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 492130188, + "step": 7357 + }, + { + "epoch": 0.8349503546099291, + "grad_norm": 30.51945686340332, + "learning_rate": 5e-05, + "loss": 1.0352, + "num_input_tokens_seen": 492196428, + "step": 7358 + }, + { + "epoch": 0.8349503546099291, + "loss": 1.0339514017105103, + "loss_ce": 0.009049052372574806, + "loss_iou": 0.458984375, + "loss_num": 0.021484375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 492196428, + "step": 7358 + }, + { + "epoch": 0.835063829787234, + "grad_norm": 33.83932113647461, + "learning_rate": 5e-05, + "loss": 1.2322, + "num_input_tokens_seen": 492263844, + "step": 7359 + }, + { + "epoch": 0.835063829787234, + "loss": 1.0255846977233887, + "loss_ce": 0.004390198737382889, + "loss_iou": 0.439453125, + "loss_num": 0.0284423828125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 492263844, + "step": 7359 + }, + { + "epoch": 0.835177304964539, + "grad_norm": 28.00567054748535, + "learning_rate": 5e-05, + "loss": 1.3509, + "num_input_tokens_seen": 492329804, + "step": 7360 + }, + { + "epoch": 0.835177304964539, + "loss": 1.251360535621643, + "loss_ce": 0.0067316340282559395, + "loss_iou": 0.5625, + "loss_num": 0.024169921875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 492329804, + "step": 7360 + }, + { + "epoch": 0.835290780141844, + "grad_norm": 27.237062454223633, + "learning_rate": 5e-05, + "loss": 1.0604, + "num_input_tokens_seen": 492396372, + "step": 7361 + }, + { + "epoch": 0.835290780141844, + "loss": 0.9273226857185364, + "loss_ce": 0.00532563915476203, + "loss_iou": 0.38671875, + "loss_num": 0.030029296875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 492396372, + "step": 7361 + }, + { + "epoch": 0.835404255319149, + "grad_norm": 26.161026000976562, + "learning_rate": 5e-05, + "loss": 1.0998, + "num_input_tokens_seen": 492462876, + "step": 7362 + }, + { + "epoch": 0.835404255319149, + "loss": 1.0515341758728027, + "loss_ce": 0.006856441032141447, + "loss_iou": 0.431640625, + "loss_num": 0.036376953125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 492462876, + "step": 7362 + }, + { + "epoch": 0.835517730496454, + "grad_norm": 29.89273452758789, + "learning_rate": 5e-05, + "loss": 1.0639, + "num_input_tokens_seen": 492529012, + "step": 7363 + }, + { + "epoch": 0.835517730496454, + "loss": 0.958503246307373, + "loss_ce": 0.003425145987421274, + "loss_iou": 0.40234375, + "loss_num": 0.0302734375, + "loss_xval": 0.953125, + "num_input_tokens_seen": 492529012, + "step": 7363 + }, + { + "epoch": 0.8356312056737588, + "grad_norm": 35.24683380126953, + "learning_rate": 5e-05, + "loss": 1.3399, + "num_input_tokens_seen": 492595088, + "step": 7364 + }, + { + "epoch": 0.8356312056737588, + "loss": 1.109644889831543, + "loss_ce": 0.007594094146043062, + "loss_iou": 0.45703125, + "loss_num": 0.03759765625, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 492595088, + "step": 7364 + }, + { + "epoch": 0.8357446808510638, + "grad_norm": 40.43858337402344, + "learning_rate": 5e-05, + "loss": 1.1574, + "num_input_tokens_seen": 492662168, + "step": 7365 + }, + { + "epoch": 0.8357446808510638, + "loss": 1.3129425048828125, + "loss_ce": 0.008254972286522388, + "loss_iou": 0.52734375, + "loss_num": 0.04931640625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 492662168, + "step": 7365 + }, + { + "epoch": 0.8358581560283688, + "grad_norm": 41.31731414794922, + "learning_rate": 5e-05, + "loss": 1.0547, + "num_input_tokens_seen": 492728256, + "step": 7366 + }, + { + "epoch": 0.8358581560283688, + "loss": 1.1506118774414062, + "loss_ce": 0.008888178505003452, + "loss_iou": 0.4609375, + "loss_num": 0.04443359375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 492728256, + "step": 7366 + }, + { + "epoch": 0.8359716312056737, + "grad_norm": 31.683279037475586, + "learning_rate": 5e-05, + "loss": 1.2994, + "num_input_tokens_seen": 492794252, + "step": 7367 + }, + { + "epoch": 0.8359716312056737, + "loss": 1.167098045349121, + "loss_ce": 0.008406671695411205, + "loss_iou": 0.48046875, + "loss_num": 0.03955078125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 492794252, + "step": 7367 + }, + { + "epoch": 0.8360851063829787, + "grad_norm": 26.45702362060547, + "learning_rate": 5e-05, + "loss": 1.4072, + "num_input_tokens_seen": 492861836, + "step": 7368 + }, + { + "epoch": 0.8360851063829787, + "loss": 1.5171754360198975, + "loss_ce": 0.010339539498090744, + "loss_iou": 0.57421875, + "loss_num": 0.0712890625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 492861836, + "step": 7368 + }, + { + "epoch": 0.8361985815602837, + "grad_norm": 35.89274597167969, + "learning_rate": 5e-05, + "loss": 1.1753, + "num_input_tokens_seen": 492928508, + "step": 7369 + }, + { + "epoch": 0.8361985815602837, + "loss": 1.139521598815918, + "loss_ce": 0.0052442168816924095, + "loss_iou": 0.498046875, + "loss_num": 0.0277099609375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 492928508, + "step": 7369 + }, + { + "epoch": 0.8363120567375887, + "grad_norm": 38.77903366088867, + "learning_rate": 5e-05, + "loss": 1.4258, + "num_input_tokens_seen": 492994956, + "step": 7370 + }, + { + "epoch": 0.8363120567375887, + "loss": 1.2699213027954102, + "loss_ce": 0.0052728233858942986, + "loss_iou": 0.55859375, + "loss_num": 0.0299072265625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 492994956, + "step": 7370 + }, + { + "epoch": 0.8364255319148937, + "grad_norm": 21.110197067260742, + "learning_rate": 5e-05, + "loss": 1.1893, + "num_input_tokens_seen": 493062800, + "step": 7371 + }, + { + "epoch": 0.8364255319148937, + "loss": 1.2826745510101318, + "loss_ce": 0.008260553702712059, + "loss_iou": 0.470703125, + "loss_num": 0.06640625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 493062800, + "step": 7371 + }, + { + "epoch": 0.8365390070921985, + "grad_norm": 14.974244117736816, + "learning_rate": 5e-05, + "loss": 1.0018, + "num_input_tokens_seen": 493128692, + "step": 7372 + }, + { + "epoch": 0.8365390070921985, + "loss": 0.8630807399749756, + "loss_ce": 0.007611990440636873, + "loss_iou": 0.310546875, + "loss_num": 0.046875, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 493128692, + "step": 7372 + }, + { + "epoch": 0.8366524822695035, + "grad_norm": 24.747892379760742, + "learning_rate": 5e-05, + "loss": 1.1172, + "num_input_tokens_seen": 493194680, + "step": 7373 + }, + { + "epoch": 0.8366524822695035, + "loss": 1.119905948638916, + "loss_ce": 0.006624700501561165, + "loss_iou": 0.427734375, + "loss_num": 0.051513671875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 493194680, + "step": 7373 + }, + { + "epoch": 0.8367659574468085, + "grad_norm": 37.74634552001953, + "learning_rate": 5e-05, + "loss": 1.0754, + "num_input_tokens_seen": 493261360, + "step": 7374 + }, + { + "epoch": 0.8367659574468085, + "loss": 0.9223060607910156, + "loss_ce": 0.005802154075354338, + "loss_iou": 0.388671875, + "loss_num": 0.0281982421875, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 493261360, + "step": 7374 + }, + { + "epoch": 0.8368794326241135, + "grad_norm": 37.004150390625, + "learning_rate": 5e-05, + "loss": 1.4461, + "num_input_tokens_seen": 493328680, + "step": 7375 + }, + { + "epoch": 0.8368794326241135, + "loss": 1.6778876781463623, + "loss_ce": 0.00698927603662014, + "loss_iou": 0.66796875, + "loss_num": 0.0673828125, + "loss_xval": 1.671875, + "num_input_tokens_seen": 493328680, + "step": 7375 + }, + { + "epoch": 0.8369929078014184, + "grad_norm": 10.881518363952637, + "learning_rate": 5e-05, + "loss": 0.9799, + "num_input_tokens_seen": 493395516, + "step": 7376 + }, + { + "epoch": 0.8369929078014184, + "loss": 0.8969802856445312, + "loss_ce": 0.009773220866918564, + "loss_iou": 0.37890625, + "loss_num": 0.0257568359375, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 493395516, + "step": 7376 + }, + { + "epoch": 0.8371063829787234, + "grad_norm": 18.08856201171875, + "learning_rate": 5e-05, + "loss": 1.099, + "num_input_tokens_seen": 493461920, + "step": 7377 + }, + { + "epoch": 0.8371063829787234, + "loss": 1.0867162942886353, + "loss_ce": 0.0038304754998534918, + "loss_iou": 0.4453125, + "loss_num": 0.038330078125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 493461920, + "step": 7377 + }, + { + "epoch": 0.8372198581560284, + "grad_norm": 16.242137908935547, + "learning_rate": 5e-05, + "loss": 1.0652, + "num_input_tokens_seen": 493528492, + "step": 7378 + }, + { + "epoch": 0.8372198581560284, + "loss": 1.2008044719696045, + "loss_ce": 0.009886435233056545, + "loss_iou": 0.44921875, + "loss_num": 0.058837890625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 493528492, + "step": 7378 + }, + { + "epoch": 0.8373333333333334, + "grad_norm": 18.414644241333008, + "learning_rate": 5e-05, + "loss": 1.1509, + "num_input_tokens_seen": 493595084, + "step": 7379 + }, + { + "epoch": 0.8373333333333334, + "loss": 1.380967378616333, + "loss_ce": 0.009385373443365097, + "loss_iou": 0.52734375, + "loss_num": 0.06396484375, + "loss_xval": 1.375, + "num_input_tokens_seen": 493595084, + "step": 7379 + }, + { + "epoch": 0.8374468085106384, + "grad_norm": 22.08258819580078, + "learning_rate": 5e-05, + "loss": 1.1377, + "num_input_tokens_seen": 493662312, + "step": 7380 + }, + { + "epoch": 0.8374468085106384, + "loss": 1.1333321332931519, + "loss_ce": 0.006867218296974897, + "loss_iou": 0.478515625, + "loss_num": 0.033935546875, + "loss_xval": 1.125, + "num_input_tokens_seen": 493662312, + "step": 7380 + }, + { + "epoch": 0.8375602836879432, + "grad_norm": 22.333433151245117, + "learning_rate": 5e-05, + "loss": 1.097, + "num_input_tokens_seen": 493728992, + "step": 7381 + }, + { + "epoch": 0.8375602836879432, + "loss": 1.151731252670288, + "loss_ce": 0.008176521398127079, + "loss_iou": 0.46875, + "loss_num": 0.041015625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 493728992, + "step": 7381 + }, + { + "epoch": 0.8376737588652482, + "grad_norm": 41.526981353759766, + "learning_rate": 5e-05, + "loss": 1.1399, + "num_input_tokens_seen": 493795480, + "step": 7382 + }, + { + "epoch": 0.8376737588652482, + "loss": 0.990159273147583, + "loss_ce": 0.005784241482615471, + "loss_iou": 0.416015625, + "loss_num": 0.0303955078125, + "loss_xval": 0.984375, + "num_input_tokens_seen": 493795480, + "step": 7382 + }, + { + "epoch": 0.8377872340425532, + "grad_norm": 36.08610153198242, + "learning_rate": 5e-05, + "loss": 1.117, + "num_input_tokens_seen": 493862312, + "step": 7383 + }, + { + "epoch": 0.8377872340425532, + "loss": 1.026816964149475, + "loss_ce": 0.006797431036829948, + "loss_iou": 0.42578125, + "loss_num": 0.033935546875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 493862312, + "step": 7383 + }, + { + "epoch": 0.8379007092198582, + "grad_norm": 26.855966567993164, + "learning_rate": 5e-05, + "loss": 1.2193, + "num_input_tokens_seen": 493928516, + "step": 7384 + }, + { + "epoch": 0.8379007092198582, + "loss": 1.196795105934143, + "loss_ce": 0.0043818531557917595, + "loss_iou": 0.494140625, + "loss_num": 0.04150390625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 493928516, + "step": 7384 + }, + { + "epoch": 0.8380141843971631, + "grad_norm": 34.49277877807617, + "learning_rate": 5e-05, + "loss": 1.2104, + "num_input_tokens_seen": 493996076, + "step": 7385 + }, + { + "epoch": 0.8380141843971631, + "loss": 1.1223586797714233, + "loss_ce": 0.0049270205199718475, + "loss_iou": 0.455078125, + "loss_num": 0.041259765625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 493996076, + "step": 7385 + }, + { + "epoch": 0.8381276595744681, + "grad_norm": 33.626808166503906, + "learning_rate": 5e-05, + "loss": 1.2812, + "num_input_tokens_seen": 494062968, + "step": 7386 + }, + { + "epoch": 0.8381276595744681, + "loss": 1.2211384773254395, + "loss_ce": 0.0042195552960038185, + "loss_iou": 0.50390625, + "loss_num": 0.041748046875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 494062968, + "step": 7386 + }, + { + "epoch": 0.8382411347517731, + "grad_norm": 14.617842674255371, + "learning_rate": 5e-05, + "loss": 0.9317, + "num_input_tokens_seen": 494128584, + "step": 7387 + }, + { + "epoch": 0.8382411347517731, + "loss": 0.9263583421707153, + "loss_ce": 0.005948175676167011, + "loss_iou": 0.36328125, + "loss_num": 0.0390625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 494128584, + "step": 7387 + }, + { + "epoch": 0.8383546099290781, + "grad_norm": 20.020050048828125, + "learning_rate": 5e-05, + "loss": 1.0887, + "num_input_tokens_seen": 494194888, + "step": 7388 + }, + { + "epoch": 0.8383546099290781, + "loss": 1.0868010520935059, + "loss_ce": 0.009652649983763695, + "loss_iou": 0.4453125, + "loss_num": 0.037109375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 494194888, + "step": 7388 + }, + { + "epoch": 0.8384680851063829, + "grad_norm": 31.572635650634766, + "learning_rate": 5e-05, + "loss": 1.0732, + "num_input_tokens_seen": 494261620, + "step": 7389 + }, + { + "epoch": 0.8384680851063829, + "loss": 1.0170402526855469, + "loss_ce": 0.005565721075981855, + "loss_iou": 0.443359375, + "loss_num": 0.0250244140625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 494261620, + "step": 7389 + }, + { + "epoch": 0.8385815602836879, + "grad_norm": 28.768308639526367, + "learning_rate": 5e-05, + "loss": 1.2624, + "num_input_tokens_seen": 494328672, + "step": 7390 + }, + { + "epoch": 0.8385815602836879, + "loss": 1.1864672899246216, + "loss_ce": 0.007268092595040798, + "loss_iou": 0.50390625, + "loss_num": 0.033935546875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 494328672, + "step": 7390 + }, + { + "epoch": 0.8386950354609929, + "grad_norm": 18.201919555664062, + "learning_rate": 5e-05, + "loss": 0.9784, + "num_input_tokens_seen": 494395176, + "step": 7391 + }, + { + "epoch": 0.8386950354609929, + "loss": 0.9429205656051636, + "loss_ce": 0.006900594104081392, + "loss_iou": 0.392578125, + "loss_num": 0.0302734375, + "loss_xval": 0.9375, + "num_input_tokens_seen": 494395176, + "step": 7391 + }, + { + "epoch": 0.8388085106382979, + "grad_norm": 16.341402053833008, + "learning_rate": 5e-05, + "loss": 1.0183, + "num_input_tokens_seen": 494460436, + "step": 7392 + }, + { + "epoch": 0.8388085106382979, + "loss": 1.0374358892440796, + "loss_ce": 0.0060943374410271645, + "loss_iou": 0.384765625, + "loss_num": 0.05224609375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 494460436, + "step": 7392 + }, + { + "epoch": 0.8389219858156028, + "grad_norm": 18.740192413330078, + "learning_rate": 5e-05, + "loss": 0.9085, + "num_input_tokens_seen": 494527092, + "step": 7393 + }, + { + "epoch": 0.8389219858156028, + "loss": 0.9831324815750122, + "loss_ce": 0.007180359680205584, + "loss_iou": 0.3828125, + "loss_num": 0.042236328125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 494527092, + "step": 7393 + }, + { + "epoch": 0.8390354609929078, + "grad_norm": 21.293333053588867, + "learning_rate": 5e-05, + "loss": 1.2052, + "num_input_tokens_seen": 494593280, + "step": 7394 + }, + { + "epoch": 0.8390354609929078, + "loss": 1.1074244976043701, + "loss_ce": 0.008303474634885788, + "loss_iou": 0.43359375, + "loss_num": 0.046142578125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 494593280, + "step": 7394 + }, + { + "epoch": 0.8391489361702128, + "grad_norm": 24.584779739379883, + "learning_rate": 5e-05, + "loss": 1.226, + "num_input_tokens_seen": 494660688, + "step": 7395 + }, + { + "epoch": 0.8391489361702128, + "loss": 1.387605905532837, + "loss_ce": 0.005770055577158928, + "loss_iou": 0.5234375, + "loss_num": 0.06689453125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 494660688, + "step": 7395 + }, + { + "epoch": 0.8392624113475178, + "grad_norm": 40.87284469604492, + "learning_rate": 5e-05, + "loss": 1.2066, + "num_input_tokens_seen": 494726496, + "step": 7396 + }, + { + "epoch": 0.8392624113475178, + "loss": 1.1641240119934082, + "loss_ce": 0.011291902512311935, + "loss_iou": 0.515625, + "loss_num": 0.0240478515625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 494726496, + "step": 7396 + }, + { + "epoch": 0.8393758865248226, + "grad_norm": 24.64221954345703, + "learning_rate": 5e-05, + "loss": 1.3622, + "num_input_tokens_seen": 494793404, + "step": 7397 + }, + { + "epoch": 0.8393758865248226, + "loss": 1.2698633670806885, + "loss_ce": 0.0096095260232687, + "loss_iou": 0.546875, + "loss_num": 0.03369140625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 494793404, + "step": 7397 + }, + { + "epoch": 0.8394893617021276, + "grad_norm": 18.732723236083984, + "learning_rate": 5e-05, + "loss": 1.0192, + "num_input_tokens_seen": 494860844, + "step": 7398 + }, + { + "epoch": 0.8394893617021276, + "loss": 1.132852554321289, + "loss_ce": 0.007364192046225071, + "loss_iou": 0.443359375, + "loss_num": 0.0478515625, + "loss_xval": 1.125, + "num_input_tokens_seen": 494860844, + "step": 7398 + }, + { + "epoch": 0.8396028368794326, + "grad_norm": 21.5651912689209, + "learning_rate": 5e-05, + "loss": 1.1152, + "num_input_tokens_seen": 494926664, + "step": 7399 + }, + { + "epoch": 0.8396028368794326, + "loss": 1.2049560546875, + "loss_ce": 0.008666946552693844, + "loss_iou": 0.46875, + "loss_num": 0.05126953125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 494926664, + "step": 7399 + }, + { + "epoch": 0.8397163120567376, + "grad_norm": 48.7031135559082, + "learning_rate": 5e-05, + "loss": 1.2546, + "num_input_tokens_seen": 494993876, + "step": 7400 + }, + { + "epoch": 0.8397163120567376, + "loss": 1.2711539268493652, + "loss_ce": 0.007970334962010384, + "loss_iou": 0.5, + "loss_num": 0.05224609375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 494993876, + "step": 7400 + }, + { + "epoch": 0.8398297872340426, + "grad_norm": 44.3817024230957, + "learning_rate": 5e-05, + "loss": 1.5226, + "num_input_tokens_seen": 495060620, + "step": 7401 + }, + { + "epoch": 0.8398297872340426, + "loss": 1.5284194946289062, + "loss_ce": 0.010108977556228638, + "loss_iou": 0.59765625, + "loss_num": 0.06494140625, + "loss_xval": 1.515625, + "num_input_tokens_seen": 495060620, + "step": 7401 + }, + { + "epoch": 0.8399432624113475, + "grad_norm": 28.301130294799805, + "learning_rate": 5e-05, + "loss": 1.2096, + "num_input_tokens_seen": 495127552, + "step": 7402 + }, + { + "epoch": 0.8399432624113475, + "loss": 1.1658666133880615, + "loss_ce": 0.005222148261964321, + "loss_iou": 0.482421875, + "loss_num": 0.0390625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 495127552, + "step": 7402 + }, + { + "epoch": 0.8400567375886525, + "grad_norm": 64.85076904296875, + "learning_rate": 5e-05, + "loss": 1.3216, + "num_input_tokens_seen": 495195384, + "step": 7403 + }, + { + "epoch": 0.8400567375886525, + "loss": 1.3794784545898438, + "loss_ce": 0.005454925820231438, + "loss_iou": 0.5625, + "loss_num": 0.0498046875, + "loss_xval": 1.375, + "num_input_tokens_seen": 495195384, + "step": 7403 + }, + { + "epoch": 0.8401702127659575, + "grad_norm": 23.79352569580078, + "learning_rate": 5e-05, + "loss": 1.0603, + "num_input_tokens_seen": 495261968, + "step": 7404 + }, + { + "epoch": 0.8401702127659575, + "loss": 1.1559581756591797, + "loss_ce": 0.003614477813243866, + "loss_iou": 0.5078125, + "loss_num": 0.027587890625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 495261968, + "step": 7404 + }, + { + "epoch": 0.8402836879432624, + "grad_norm": 28.270292282104492, + "learning_rate": 5e-05, + "loss": 1.1664, + "num_input_tokens_seen": 495327120, + "step": 7405 + }, + { + "epoch": 0.8402836879432624, + "loss": 1.276295781135559, + "loss_ce": 0.003834799164906144, + "loss_iou": 0.5, + "loss_num": 0.053955078125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 495327120, + "step": 7405 + }, + { + "epoch": 0.8403971631205673, + "grad_norm": 33.8287353515625, + "learning_rate": 5e-05, + "loss": 1.0541, + "num_input_tokens_seen": 495393928, + "step": 7406 + }, + { + "epoch": 0.8403971631205673, + "loss": 0.9891659021377563, + "loss_ce": 0.003570165019482374, + "loss_iou": 0.39453125, + "loss_num": 0.0390625, + "loss_xval": 0.984375, + "num_input_tokens_seen": 495393928, + "step": 7406 + }, + { + "epoch": 0.8405106382978723, + "grad_norm": 23.011886596679688, + "learning_rate": 5e-05, + "loss": 1.1362, + "num_input_tokens_seen": 495459960, + "step": 7407 + }, + { + "epoch": 0.8405106382978723, + "loss": 0.9591189622879028, + "loss_ce": 0.005200449377298355, + "loss_iou": 0.369140625, + "loss_num": 0.04296875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 495459960, + "step": 7407 + }, + { + "epoch": 0.8406241134751773, + "grad_norm": 21.639856338500977, + "learning_rate": 5e-05, + "loss": 1.0856, + "num_input_tokens_seen": 495526868, + "step": 7408 + }, + { + "epoch": 0.8406241134751773, + "loss": 1.1018784046173096, + "loss_ce": 0.005198654253035784, + "loss_iou": 0.416015625, + "loss_num": 0.052734375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 495526868, + "step": 7408 + }, + { + "epoch": 0.8407375886524823, + "grad_norm": 23.95220184326172, + "learning_rate": 5e-05, + "loss": 1.1921, + "num_input_tokens_seen": 495593172, + "step": 7409 + }, + { + "epoch": 0.8407375886524823, + "loss": 1.2566736936569214, + "loss_ce": 0.013021355494856834, + "loss_iou": 0.494140625, + "loss_num": 0.051025390625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 495593172, + "step": 7409 + }, + { + "epoch": 0.8408510638297872, + "grad_norm": 21.54686737060547, + "learning_rate": 5e-05, + "loss": 0.9889, + "num_input_tokens_seen": 495660020, + "step": 7410 + }, + { + "epoch": 0.8408510638297872, + "loss": 0.9830399751663208, + "loss_ce": 0.006599476095288992, + "loss_iou": 0.419921875, + "loss_num": 0.0272216796875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 495660020, + "step": 7410 + }, + { + "epoch": 0.8409645390070922, + "grad_norm": 23.106340408325195, + "learning_rate": 5e-05, + "loss": 1.1312, + "num_input_tokens_seen": 495726156, + "step": 7411 + }, + { + "epoch": 0.8409645390070922, + "loss": 1.2201173305511475, + "loss_ce": 0.0038087547291070223, + "loss_iou": 0.47265625, + "loss_num": 0.054443359375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 495726156, + "step": 7411 + }, + { + "epoch": 0.8410780141843972, + "grad_norm": 25.71483039855957, + "learning_rate": 5e-05, + "loss": 1.3786, + "num_input_tokens_seen": 495793240, + "step": 7412 + }, + { + "epoch": 0.8410780141843972, + "loss": 1.1102471351623535, + "loss_ce": 0.009905312210321426, + "loss_iou": 0.4140625, + "loss_num": 0.053955078125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 495793240, + "step": 7412 + }, + { + "epoch": 0.8411914893617022, + "grad_norm": 41.22884750366211, + "learning_rate": 5e-05, + "loss": 1.2632, + "num_input_tokens_seen": 495861512, + "step": 7413 + }, + { + "epoch": 0.8411914893617022, + "loss": 1.312565565109253, + "loss_ce": 0.00738988071680069, + "loss_iou": 0.50390625, + "loss_num": 0.059326171875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 495861512, + "step": 7413 + }, + { + "epoch": 0.841304964539007, + "grad_norm": 32.31788635253906, + "learning_rate": 5e-05, + "loss": 1.1179, + "num_input_tokens_seen": 495928156, + "step": 7414 + }, + { + "epoch": 0.841304964539007, + "loss": 1.1509512662887573, + "loss_ce": 0.007884817197918892, + "loss_iou": 0.5078125, + "loss_num": 0.0260009765625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 495928156, + "step": 7414 + }, + { + "epoch": 0.841418439716312, + "grad_norm": 17.009883880615234, + "learning_rate": 5e-05, + "loss": 0.9166, + "num_input_tokens_seen": 495994952, + "step": 7415 + }, + { + "epoch": 0.841418439716312, + "loss": 1.0254201889038086, + "loss_ce": 0.007842070423066616, + "loss_iou": 0.375, + "loss_num": 0.053466796875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 495994952, + "step": 7415 + }, + { + "epoch": 0.841531914893617, + "grad_norm": 22.726001739501953, + "learning_rate": 5e-05, + "loss": 1.1547, + "num_input_tokens_seen": 496062164, + "step": 7416 + }, + { + "epoch": 0.841531914893617, + "loss": 1.2751624584197998, + "loss_ce": 0.008560962975025177, + "loss_iou": 0.53515625, + "loss_num": 0.03955078125, + "loss_xval": 1.265625, + "num_input_tokens_seen": 496062164, + "step": 7416 + }, + { + "epoch": 0.841645390070922, + "grad_norm": 29.51093101501465, + "learning_rate": 5e-05, + "loss": 1.2534, + "num_input_tokens_seen": 496129196, + "step": 7417 + }, + { + "epoch": 0.841645390070922, + "loss": 1.080729365348816, + "loss_ce": 0.006510566920042038, + "loss_iou": 0.44921875, + "loss_num": 0.034912109375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 496129196, + "step": 7417 + }, + { + "epoch": 0.841758865248227, + "grad_norm": 29.341930389404297, + "learning_rate": 5e-05, + "loss": 1.1972, + "num_input_tokens_seen": 496196444, + "step": 7418 + }, + { + "epoch": 0.841758865248227, + "loss": 1.0815509557724, + "loss_ce": 0.005379050970077515, + "loss_iou": 0.447265625, + "loss_num": 0.03662109375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 496196444, + "step": 7418 + }, + { + "epoch": 0.8418723404255319, + "grad_norm": 19.627925872802734, + "learning_rate": 5e-05, + "loss": 1.0333, + "num_input_tokens_seen": 496261812, + "step": 7419 + }, + { + "epoch": 0.8418723404255319, + "loss": 0.8711587190628052, + "loss_ce": 0.00397121999412775, + "loss_iou": 0.345703125, + "loss_num": 0.034912109375, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 496261812, + "step": 7419 + }, + { + "epoch": 0.8419858156028369, + "grad_norm": 20.26927947998047, + "learning_rate": 5e-05, + "loss": 1.0591, + "num_input_tokens_seen": 496327952, + "step": 7420 + }, + { + "epoch": 0.8419858156028369, + "loss": 1.0632504224777222, + "loss_ce": 0.006731921806931496, + "loss_iou": 0.408203125, + "loss_num": 0.04833984375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 496327952, + "step": 7420 + }, + { + "epoch": 0.8420992907801419, + "grad_norm": 27.970714569091797, + "learning_rate": 5e-05, + "loss": 1.3776, + "num_input_tokens_seen": 496395004, + "step": 7421 + }, + { + "epoch": 0.8420992907801419, + "loss": 1.2253676652908325, + "loss_ce": 0.0049086669459939, + "loss_iou": 0.45703125, + "loss_num": 0.060791015625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 496395004, + "step": 7421 + }, + { + "epoch": 0.8422127659574468, + "grad_norm": 27.435033798217773, + "learning_rate": 5e-05, + "loss": 1.0507, + "num_input_tokens_seen": 496461684, + "step": 7422 + }, + { + "epoch": 0.8422127659574468, + "loss": 0.9903937578201294, + "loss_ce": 0.009436706081032753, + "loss_iou": 0.41015625, + "loss_num": 0.032470703125, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 496461684, + "step": 7422 + }, + { + "epoch": 0.8423262411347517, + "grad_norm": 20.009340286254883, + "learning_rate": 5e-05, + "loss": 0.978, + "num_input_tokens_seen": 496527984, + "step": 7423 + }, + { + "epoch": 0.8423262411347517, + "loss": 1.0756800174713135, + "loss_ce": 0.005092926323413849, + "loss_iou": 0.4296875, + "loss_num": 0.04248046875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 496527984, + "step": 7423 + }, + { + "epoch": 0.8424397163120567, + "grad_norm": 40.906349182128906, + "learning_rate": 5e-05, + "loss": 1.2173, + "num_input_tokens_seen": 496595412, + "step": 7424 + }, + { + "epoch": 0.8424397163120567, + "loss": 1.24052095413208, + "loss_ce": 0.008099086582660675, + "loss_iou": 0.486328125, + "loss_num": 0.052001953125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 496595412, + "step": 7424 + }, + { + "epoch": 0.8425531914893617, + "grad_norm": 35.32695007324219, + "learning_rate": 5e-05, + "loss": 1.3255, + "num_input_tokens_seen": 496661236, + "step": 7425 + }, + { + "epoch": 0.8425531914893617, + "loss": 1.1340644359588623, + "loss_ce": 0.01060557086020708, + "loss_iou": 0.482421875, + "loss_num": 0.03173828125, + "loss_xval": 1.125, + "num_input_tokens_seen": 496661236, + "step": 7425 + }, + { + "epoch": 0.8426666666666667, + "grad_norm": 22.41616439819336, + "learning_rate": 5e-05, + "loss": 1.1907, + "num_input_tokens_seen": 496728076, + "step": 7426 + }, + { + "epoch": 0.8426666666666667, + "loss": 1.2730777263641357, + "loss_ce": 0.007452625781297684, + "loss_iou": 0.44921875, + "loss_num": 0.0732421875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 496728076, + "step": 7426 + }, + { + "epoch": 0.8427801418439717, + "grad_norm": 29.177644729614258, + "learning_rate": 5e-05, + "loss": 1.1627, + "num_input_tokens_seen": 496795724, + "step": 7427 + }, + { + "epoch": 0.8427801418439717, + "loss": 1.0565407276153564, + "loss_ce": 0.0038064175751060247, + "loss_iou": 0.42578125, + "loss_num": 0.0400390625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 496795724, + "step": 7427 + }, + { + "epoch": 0.8428936170212766, + "grad_norm": 33.81758117675781, + "learning_rate": 5e-05, + "loss": 1.214, + "num_input_tokens_seen": 496862236, + "step": 7428 + }, + { + "epoch": 0.8428936170212766, + "loss": 1.1505357027053833, + "loss_ce": 0.006248589605093002, + "loss_iou": 0.44140625, + "loss_num": 0.0517578125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 496862236, + "step": 7428 + }, + { + "epoch": 0.8430070921985816, + "grad_norm": 37.270172119140625, + "learning_rate": 5e-05, + "loss": 1.0186, + "num_input_tokens_seen": 496929116, + "step": 7429 + }, + { + "epoch": 0.8430070921985816, + "loss": 0.9512230157852173, + "loss_ce": 0.007863659411668777, + "loss_iou": 0.361328125, + "loss_num": 0.043701171875, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 496929116, + "step": 7429 + }, + { + "epoch": 0.8431205673758865, + "grad_norm": 35.49177169799805, + "learning_rate": 5e-05, + "loss": 1.1646, + "num_input_tokens_seen": 496995172, + "step": 7430 + }, + { + "epoch": 0.8431205673758865, + "loss": 1.026766061782837, + "loss_ce": 0.005281764082610607, + "loss_iou": 0.439453125, + "loss_num": 0.0283203125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 496995172, + "step": 7430 + }, + { + "epoch": 0.8432340425531915, + "grad_norm": 16.961790084838867, + "learning_rate": 5e-05, + "loss": 1.2156, + "num_input_tokens_seen": 497062116, + "step": 7431 + }, + { + "epoch": 0.8432340425531915, + "loss": 1.0670323371887207, + "loss_ce": 0.003555741859599948, + "loss_iou": 0.447265625, + "loss_num": 0.033935546875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 497062116, + "step": 7431 + }, + { + "epoch": 0.8433475177304964, + "grad_norm": 18.481351852416992, + "learning_rate": 5e-05, + "loss": 1.2065, + "num_input_tokens_seen": 497128592, + "step": 7432 + }, + { + "epoch": 0.8433475177304964, + "loss": 1.2967429161071777, + "loss_ce": 0.007680532056838274, + "loss_iou": 0.4921875, + "loss_num": 0.061279296875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 497128592, + "step": 7432 + }, + { + "epoch": 0.8434609929078014, + "grad_norm": 27.6805362701416, + "learning_rate": 5e-05, + "loss": 1.1192, + "num_input_tokens_seen": 497195440, + "step": 7433 + }, + { + "epoch": 0.8434609929078014, + "loss": 0.9943065643310547, + "loss_ce": 0.006757727824151516, + "loss_iou": 0.375, + "loss_num": 0.046875, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 497195440, + "step": 7433 + }, + { + "epoch": 0.8435744680851064, + "grad_norm": 21.095951080322266, + "learning_rate": 5e-05, + "loss": 1.0913, + "num_input_tokens_seen": 497261488, + "step": 7434 + }, + { + "epoch": 0.8435744680851064, + "loss": 0.9604694843292236, + "loss_ce": 0.008687286637723446, + "loss_iou": 0.380859375, + "loss_num": 0.038330078125, + "loss_xval": 0.953125, + "num_input_tokens_seen": 497261488, + "step": 7434 + }, + { + "epoch": 0.8436879432624114, + "grad_norm": 105.80353546142578, + "learning_rate": 5e-05, + "loss": 1.1897, + "num_input_tokens_seen": 497329428, + "step": 7435 + }, + { + "epoch": 0.8436879432624114, + "loss": 1.2544180154800415, + "loss_ce": 0.007347680628299713, + "loss_iou": 0.52734375, + "loss_num": 0.03759765625, + "loss_xval": 1.25, + "num_input_tokens_seen": 497329428, + "step": 7435 + }, + { + "epoch": 0.8438014184397163, + "grad_norm": 35.19673156738281, + "learning_rate": 5e-05, + "loss": 1.2017, + "num_input_tokens_seen": 497397304, + "step": 7436 + }, + { + "epoch": 0.8438014184397163, + "loss": 1.3803040981292725, + "loss_ce": 0.00530408276244998, + "loss_iou": 0.54296875, + "loss_num": 0.0576171875, + "loss_xval": 1.375, + "num_input_tokens_seen": 497397304, + "step": 7436 + }, + { + "epoch": 0.8439148936170213, + "grad_norm": 27.641841888427734, + "learning_rate": 5e-05, + "loss": 1.0494, + "num_input_tokens_seen": 497463328, + "step": 7437 + }, + { + "epoch": 0.8439148936170213, + "loss": 1.1813554763793945, + "loss_ce": 0.006550907157361507, + "loss_iou": 0.498046875, + "loss_num": 0.035888671875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 497463328, + "step": 7437 + }, + { + "epoch": 0.8440283687943262, + "grad_norm": 23.93560218811035, + "learning_rate": 5e-05, + "loss": 0.9861, + "num_input_tokens_seen": 497530556, + "step": 7438 + }, + { + "epoch": 0.8440283687943262, + "loss": 0.9237688779830933, + "loss_ce": 0.0026262567844241858, + "loss_iou": 0.357421875, + "loss_num": 0.04150390625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 497530556, + "step": 7438 + }, + { + "epoch": 0.8441418439716312, + "grad_norm": 39.043460845947266, + "learning_rate": 5e-05, + "loss": 1.1903, + "num_input_tokens_seen": 497596960, + "step": 7439 + }, + { + "epoch": 0.8441418439716312, + "loss": 1.267629861831665, + "loss_ce": 0.006887791678309441, + "loss_iou": 0.498046875, + "loss_num": 0.052490234375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 497596960, + "step": 7439 + }, + { + "epoch": 0.8442553191489361, + "grad_norm": 46.05189514160156, + "learning_rate": 5e-05, + "loss": 1.2855, + "num_input_tokens_seen": 497664008, + "step": 7440 + }, + { + "epoch": 0.8442553191489361, + "loss": 1.4577209949493408, + "loss_ce": 0.0065491655841469765, + "loss_iou": 0.546875, + "loss_num": 0.0712890625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 497664008, + "step": 7440 + }, + { + "epoch": 0.8443687943262411, + "grad_norm": 42.23921585083008, + "learning_rate": 5e-05, + "loss": 1.0466, + "num_input_tokens_seen": 497732092, + "step": 7441 + }, + { + "epoch": 0.8443687943262411, + "loss": 1.0499684810638428, + "loss_ce": 0.00504667405039072, + "loss_iou": 0.423828125, + "loss_num": 0.039306640625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 497732092, + "step": 7441 + }, + { + "epoch": 0.8444822695035461, + "grad_norm": 31.168170928955078, + "learning_rate": 5e-05, + "loss": 1.2431, + "num_input_tokens_seen": 497798432, + "step": 7442 + }, + { + "epoch": 0.8444822695035461, + "loss": 1.124021053314209, + "loss_ce": 0.0039040143601596355, + "loss_iou": 0.455078125, + "loss_num": 0.0419921875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 497798432, + "step": 7442 + }, + { + "epoch": 0.8445957446808511, + "grad_norm": 14.867844581604004, + "learning_rate": 5e-05, + "loss": 1.2285, + "num_input_tokens_seen": 497865740, + "step": 7443 + }, + { + "epoch": 0.8445957446808511, + "loss": 1.424274206161499, + "loss_ce": 0.010699973441660404, + "loss_iou": 0.53125, + "loss_num": 0.0703125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 497865740, + "step": 7443 + }, + { + "epoch": 0.8447092198581561, + "grad_norm": 35.0257453918457, + "learning_rate": 5e-05, + "loss": 1.1192, + "num_input_tokens_seen": 497932412, + "step": 7444 + }, + { + "epoch": 0.8447092198581561, + "loss": 0.9663395881652832, + "loss_ce": 0.005402019247412682, + "loss_iou": 0.4140625, + "loss_num": 0.02685546875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 497932412, + "step": 7444 + }, + { + "epoch": 0.844822695035461, + "grad_norm": 36.235130310058594, + "learning_rate": 5e-05, + "loss": 1.3673, + "num_input_tokens_seen": 498000228, + "step": 7445 + }, + { + "epoch": 0.844822695035461, + "loss": 1.2630420923233032, + "loss_ce": 0.005229592323303223, + "loss_iou": 0.53125, + "loss_num": 0.038818359375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 498000228, + "step": 7445 + }, + { + "epoch": 0.8449361702127659, + "grad_norm": 52.92224884033203, + "learning_rate": 5e-05, + "loss": 1.1148, + "num_input_tokens_seen": 498066996, + "step": 7446 + }, + { + "epoch": 0.8449361702127659, + "loss": 1.1018383502960205, + "loss_ce": 0.007539093494415283, + "loss_iou": 0.44921875, + "loss_num": 0.038330078125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 498066996, + "step": 7446 + }, + { + "epoch": 0.8450496453900709, + "grad_norm": 15.554877281188965, + "learning_rate": 5e-05, + "loss": 1.0367, + "num_input_tokens_seen": 498133584, + "step": 7447 + }, + { + "epoch": 0.8450496453900709, + "loss": 1.0321723222732544, + "loss_ce": 0.00726995337754488, + "loss_iou": 0.40625, + "loss_num": 0.042724609375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 498133584, + "step": 7447 + }, + { + "epoch": 0.8451631205673759, + "grad_norm": 18.41646957397461, + "learning_rate": 5e-05, + "loss": 1.0581, + "num_input_tokens_seen": 498200120, + "step": 7448 + }, + { + "epoch": 0.8451631205673759, + "loss": 1.1011475324630737, + "loss_ce": 0.0054444060660898685, + "loss_iou": 0.404296875, + "loss_num": 0.0576171875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 498200120, + "step": 7448 + }, + { + "epoch": 0.8452765957446808, + "grad_norm": 23.912405014038086, + "learning_rate": 5e-05, + "loss": 0.7797, + "num_input_tokens_seen": 498266600, + "step": 7449 + }, + { + "epoch": 0.8452765957446808, + "loss": 0.8238505721092224, + "loss_ce": 0.007535841315984726, + "loss_iou": 0.32421875, + "loss_num": 0.03369140625, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 498266600, + "step": 7449 + }, + { + "epoch": 0.8453900709219858, + "grad_norm": 29.8317928314209, + "learning_rate": 5e-05, + "loss": 1.1898, + "num_input_tokens_seen": 498333532, + "step": 7450 + }, + { + "epoch": 0.8453900709219858, + "loss": 1.077279806137085, + "loss_ce": 0.008432121947407722, + "loss_iou": 0.45703125, + "loss_num": 0.03076171875, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 498333532, + "step": 7450 + }, + { + "epoch": 0.8455035460992908, + "grad_norm": 42.135746002197266, + "learning_rate": 5e-05, + "loss": 1.0819, + "num_input_tokens_seen": 498400616, + "step": 7451 + }, + { + "epoch": 0.8455035460992908, + "loss": 1.093359351158142, + "loss_ce": 0.003515578806400299, + "loss_iou": 0.43359375, + "loss_num": 0.044921875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 498400616, + "step": 7451 + }, + { + "epoch": 0.8456170212765958, + "grad_norm": 36.36491394042969, + "learning_rate": 5e-05, + "loss": 1.3544, + "num_input_tokens_seen": 498467656, + "step": 7452 + }, + { + "epoch": 0.8456170212765958, + "loss": 1.2839877605438232, + "loss_ce": 0.007620646618306637, + "loss_iou": 0.51953125, + "loss_num": 0.047119140625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 498467656, + "step": 7452 + }, + { + "epoch": 0.8457304964539007, + "grad_norm": 43.577293395996094, + "learning_rate": 5e-05, + "loss": 1.2254, + "num_input_tokens_seen": 498534708, + "step": 7453 + }, + { + "epoch": 0.8457304964539007, + "loss": 1.0514187812805176, + "loss_ce": 0.009670643135905266, + "loss_iou": 0.412109375, + "loss_num": 0.04345703125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 498534708, + "step": 7453 + }, + { + "epoch": 0.8458439716312057, + "grad_norm": 20.283361434936523, + "learning_rate": 5e-05, + "loss": 0.9895, + "num_input_tokens_seen": 498600988, + "step": 7454 + }, + { + "epoch": 0.8458439716312057, + "loss": 0.9864646196365356, + "loss_ce": 0.0058737862855196, + "loss_iou": 0.3984375, + "loss_num": 0.036865234375, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 498600988, + "step": 7454 + }, + { + "epoch": 0.8459574468085106, + "grad_norm": 29.632404327392578, + "learning_rate": 5e-05, + "loss": 1.2182, + "num_input_tokens_seen": 498667584, + "step": 7455 + }, + { + "epoch": 0.8459574468085106, + "loss": 1.3564918041229248, + "loss_ce": 0.004441056866198778, + "loss_iou": 0.515625, + "loss_num": 0.0634765625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 498667584, + "step": 7455 + }, + { + "epoch": 0.8460709219858156, + "grad_norm": 43.903446197509766, + "learning_rate": 5e-05, + "loss": 1.249, + "num_input_tokens_seen": 498735136, + "step": 7456 + }, + { + "epoch": 0.8460709219858156, + "loss": 1.276951789855957, + "loss_ce": 0.00473499670624733, + "loss_iou": 0.5234375, + "loss_num": 0.045166015625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 498735136, + "step": 7456 + }, + { + "epoch": 0.8461843971631205, + "grad_norm": 52.977352142333984, + "learning_rate": 5e-05, + "loss": 1.3689, + "num_input_tokens_seen": 498802568, + "step": 7457 + }, + { + "epoch": 0.8461843971631205, + "loss": 1.3000108003616333, + "loss_ce": 0.0060655297711491585, + "loss_iou": 0.53515625, + "loss_num": 0.045654296875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 498802568, + "step": 7457 + }, + { + "epoch": 0.8462978723404255, + "grad_norm": 47.248252868652344, + "learning_rate": 5e-05, + "loss": 1.3425, + "num_input_tokens_seen": 498869352, + "step": 7458 + }, + { + "epoch": 0.8462978723404255, + "loss": 1.4127185344696045, + "loss_ce": 0.007445088587701321, + "loss_iou": 0.5546875, + "loss_num": 0.058349609375, + "loss_xval": 1.40625, + "num_input_tokens_seen": 498869352, + "step": 7458 + }, + { + "epoch": 0.8464113475177305, + "grad_norm": 24.42247772216797, + "learning_rate": 5e-05, + "loss": 1.3585, + "num_input_tokens_seen": 498937220, + "step": 7459 + }, + { + "epoch": 0.8464113475177305, + "loss": 1.2327570915222168, + "loss_ce": 0.004729775246232748, + "loss_iou": 0.53125, + "loss_num": 0.032958984375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 498937220, + "step": 7459 + }, + { + "epoch": 0.8465248226950355, + "grad_norm": 26.431800842285156, + "learning_rate": 5e-05, + "loss": 1.0706, + "num_input_tokens_seen": 499003832, + "step": 7460 + }, + { + "epoch": 0.8465248226950355, + "loss": 0.9583815336227417, + "loss_ce": 0.005256610922515392, + "loss_iou": 0.412109375, + "loss_num": 0.026123046875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 499003832, + "step": 7460 + }, + { + "epoch": 0.8466382978723405, + "grad_norm": 66.60875701904297, + "learning_rate": 5e-05, + "loss": 1.1559, + "num_input_tokens_seen": 499071180, + "step": 7461 + }, + { + "epoch": 0.8466382978723405, + "loss": 0.9918131232261658, + "loss_ce": 0.0032877277117222548, + "loss_iou": 0.404296875, + "loss_num": 0.036376953125, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 499071180, + "step": 7461 + }, + { + "epoch": 0.8467517730496454, + "grad_norm": 28.95789909362793, + "learning_rate": 5e-05, + "loss": 1.1397, + "num_input_tokens_seen": 499137444, + "step": 7462 + }, + { + "epoch": 0.8467517730496454, + "loss": 1.015013575553894, + "loss_ce": 0.010298635810613632, + "loss_iou": 0.42578125, + "loss_num": 0.0306396484375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 499137444, + "step": 7462 + }, + { + "epoch": 0.8468652482269503, + "grad_norm": 28.704689025878906, + "learning_rate": 5e-05, + "loss": 1.4633, + "num_input_tokens_seen": 499204252, + "step": 7463 + }, + { + "epoch": 0.8468652482269503, + "loss": 1.422374963760376, + "loss_ce": 0.006359427701681852, + "loss_iou": 0.57421875, + "loss_num": 0.05322265625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 499204252, + "step": 7463 + }, + { + "epoch": 0.8469787234042553, + "grad_norm": 98.7510986328125, + "learning_rate": 5e-05, + "loss": 1.2295, + "num_input_tokens_seen": 499270816, + "step": 7464 + }, + { + "epoch": 0.8469787234042553, + "loss": 1.1977518796920776, + "loss_ce": 0.008787049911916256, + "loss_iou": 0.4765625, + "loss_num": 0.046630859375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 499270816, + "step": 7464 + }, + { + "epoch": 0.8470921985815603, + "grad_norm": 16.324167251586914, + "learning_rate": 5e-05, + "loss": 1.1291, + "num_input_tokens_seen": 499337492, + "step": 7465 + }, + { + "epoch": 0.8470921985815603, + "loss": 1.0777230262756348, + "loss_ce": 0.0032906776759773493, + "loss_iou": 0.4453125, + "loss_num": 0.036865234375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 499337492, + "step": 7465 + }, + { + "epoch": 0.8472056737588652, + "grad_norm": 29.780141830444336, + "learning_rate": 5e-05, + "loss": 1.1106, + "num_input_tokens_seen": 499405140, + "step": 7466 + }, + { + "epoch": 0.8472056737588652, + "loss": 1.0884393453598022, + "loss_ce": 0.0034784707240760326, + "loss_iou": 0.47265625, + "loss_num": 0.028076171875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 499405140, + "step": 7466 + }, + { + "epoch": 0.8473191489361702, + "grad_norm": 27.187015533447266, + "learning_rate": 5e-05, + "loss": 0.9988, + "num_input_tokens_seen": 499471468, + "step": 7467 + }, + { + "epoch": 0.8473191489361702, + "loss": 1.0495043992996216, + "loss_ce": 0.008977092802524567, + "loss_iou": 0.435546875, + "loss_num": 0.033935546875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 499471468, + "step": 7467 + }, + { + "epoch": 0.8474326241134752, + "grad_norm": 37.4666748046875, + "learning_rate": 5e-05, + "loss": 1.3094, + "num_input_tokens_seen": 499538824, + "step": 7468 + }, + { + "epoch": 0.8474326241134752, + "loss": 1.3223059177398682, + "loss_ce": 0.00492320628836751, + "loss_iou": 0.52734375, + "loss_num": 0.05322265625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 499538824, + "step": 7468 + }, + { + "epoch": 0.8475460992907802, + "grad_norm": 30.74051284790039, + "learning_rate": 5e-05, + "loss": 1.3106, + "num_input_tokens_seen": 499604832, + "step": 7469 + }, + { + "epoch": 0.8475460992907802, + "loss": 1.2954543828964233, + "loss_ce": 0.004927031695842743, + "loss_iou": 0.51953125, + "loss_num": 0.0498046875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 499604832, + "step": 7469 + }, + { + "epoch": 0.8476595744680852, + "grad_norm": 27.704248428344727, + "learning_rate": 5e-05, + "loss": 1.0931, + "num_input_tokens_seen": 499671416, + "step": 7470 + }, + { + "epoch": 0.8476595744680852, + "loss": 1.2684814929962158, + "loss_ce": 0.01555184181779623, + "loss_iou": 0.51171875, + "loss_num": 0.0458984375, + "loss_xval": 1.25, + "num_input_tokens_seen": 499671416, + "step": 7470 + }, + { + "epoch": 0.84777304964539, + "grad_norm": 29.985103607177734, + "learning_rate": 5e-05, + "loss": 1.1351, + "num_input_tokens_seen": 499738160, + "step": 7471 + }, + { + "epoch": 0.84777304964539, + "loss": 1.185481071472168, + "loss_ce": 0.006525898817926645, + "loss_iou": 0.4765625, + "loss_num": 0.04541015625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 499738160, + "step": 7471 + }, + { + "epoch": 0.847886524822695, + "grad_norm": 50.928462982177734, + "learning_rate": 5e-05, + "loss": 1.3129, + "num_input_tokens_seen": 499805168, + "step": 7472 + }, + { + "epoch": 0.847886524822695, + "loss": 1.464645266532898, + "loss_ce": 0.006637404672801495, + "loss_iou": 0.53125, + "loss_num": 0.07861328125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 499805168, + "step": 7472 + }, + { + "epoch": 0.848, + "grad_norm": 26.95461082458496, + "learning_rate": 5e-05, + "loss": 1.3018, + "num_input_tokens_seen": 499871812, + "step": 7473 + }, + { + "epoch": 0.848, + "loss": 1.55948805809021, + "loss_ce": 0.008706754073500633, + "loss_iou": 0.6328125, + "loss_num": 0.0576171875, + "loss_xval": 1.546875, + "num_input_tokens_seen": 499871812, + "step": 7473 + }, + { + "epoch": 0.848113475177305, + "grad_norm": 30.753259658813477, + "learning_rate": 5e-05, + "loss": 1.2433, + "num_input_tokens_seen": 499938924, + "step": 7474 + }, + { + "epoch": 0.848113475177305, + "loss": 1.3593392372131348, + "loss_ce": 0.00435879360884428, + "loss_iou": 0.55078125, + "loss_num": 0.050048828125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 499938924, + "step": 7474 + }, + { + "epoch": 0.8482269503546099, + "grad_norm": 38.26863479614258, + "learning_rate": 5e-05, + "loss": 1.0995, + "num_input_tokens_seen": 500004980, + "step": 7475 + }, + { + "epoch": 0.8482269503546099, + "loss": 1.1259167194366455, + "loss_ce": 0.003846371080726385, + "loss_iou": 0.498046875, + "loss_num": 0.025390625, + "loss_xval": 1.125, + "num_input_tokens_seen": 500004980, + "step": 7475 + }, + { + "epoch": 0.8483404255319149, + "grad_norm": 26.634010314941406, + "learning_rate": 5e-05, + "loss": 1.14, + "num_input_tokens_seen": 500070928, + "step": 7476 + }, + { + "epoch": 0.8483404255319149, + "loss": 1.0697453022003174, + "loss_ce": 0.00626878347247839, + "loss_iou": 0.44921875, + "loss_num": 0.033447265625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 500070928, + "step": 7476 + }, + { + "epoch": 0.8484539007092199, + "grad_norm": 44.00324249267578, + "learning_rate": 5e-05, + "loss": 1.0421, + "num_input_tokens_seen": 500138380, + "step": 7477 + }, + { + "epoch": 0.8484539007092199, + "loss": 1.0050935745239258, + "loss_ce": 0.0065583945252001286, + "loss_iou": 0.408203125, + "loss_num": 0.0361328125, + "loss_xval": 1.0, + "num_input_tokens_seen": 500138380, + "step": 7477 + }, + { + "epoch": 0.8485673758865249, + "grad_norm": 21.134347915649414, + "learning_rate": 5e-05, + "loss": 1.0819, + "num_input_tokens_seen": 500204088, + "step": 7478 + }, + { + "epoch": 0.8485673758865249, + "loss": 1.007814645767212, + "loss_ce": 0.004152603447437286, + "loss_iou": 0.384765625, + "loss_num": 0.04638671875, + "loss_xval": 1.0, + "num_input_tokens_seen": 500204088, + "step": 7478 + }, + { + "epoch": 0.8486808510638297, + "grad_norm": 36.03009796142578, + "learning_rate": 5e-05, + "loss": 0.9948, + "num_input_tokens_seen": 500271888, + "step": 7479 + }, + { + "epoch": 0.8486808510638297, + "loss": 1.1042826175689697, + "loss_ce": 0.007847029715776443, + "loss_iou": 0.447265625, + "loss_num": 0.04052734375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 500271888, + "step": 7479 + }, + { + "epoch": 0.8487943262411347, + "grad_norm": 37.59300994873047, + "learning_rate": 5e-05, + "loss": 1.3965, + "num_input_tokens_seen": 500338352, + "step": 7480 + }, + { + "epoch": 0.8487943262411347, + "loss": 1.3187406063079834, + "loss_ce": 0.008193790912628174, + "loss_iou": 0.5, + "loss_num": 0.0625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 500338352, + "step": 7480 + }, + { + "epoch": 0.8489078014184397, + "grad_norm": 16.763160705566406, + "learning_rate": 5e-05, + "loss": 1.1053, + "num_input_tokens_seen": 500405356, + "step": 7481 + }, + { + "epoch": 0.8489078014184397, + "loss": 1.160474419593811, + "loss_ce": 0.003736100159585476, + "loss_iou": 0.51171875, + "loss_num": 0.0264892578125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 500405356, + "step": 7481 + }, + { + "epoch": 0.8490212765957447, + "grad_norm": 15.674934387207031, + "learning_rate": 5e-05, + "loss": 1.0775, + "num_input_tokens_seen": 500471412, + "step": 7482 + }, + { + "epoch": 0.8490212765957447, + "loss": 1.053704023361206, + "loss_ce": 0.007317200303077698, + "loss_iou": 0.41796875, + "loss_num": 0.042236328125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 500471412, + "step": 7482 + }, + { + "epoch": 0.8491347517730496, + "grad_norm": 17.52893829345703, + "learning_rate": 5e-05, + "loss": 1.0873, + "num_input_tokens_seen": 500537136, + "step": 7483 + }, + { + "epoch": 0.8491347517730496, + "loss": 1.1334048509597778, + "loss_ce": 0.008893115445971489, + "loss_iou": 0.43359375, + "loss_num": 0.0517578125, + "loss_xval": 1.125, + "num_input_tokens_seen": 500537136, + "step": 7483 + }, + { + "epoch": 0.8492482269503546, + "grad_norm": 44.40583038330078, + "learning_rate": 5e-05, + "loss": 1.1862, + "num_input_tokens_seen": 500604276, + "step": 7484 + }, + { + "epoch": 0.8492482269503546, + "loss": 1.1504931449890137, + "loss_ce": 0.004008689895272255, + "loss_iou": 0.46484375, + "loss_num": 0.04345703125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 500604276, + "step": 7484 + }, + { + "epoch": 0.8493617021276596, + "grad_norm": 32.47159194946289, + "learning_rate": 5e-05, + "loss": 1.2183, + "num_input_tokens_seen": 500671264, + "step": 7485 + }, + { + "epoch": 0.8493617021276596, + "loss": 1.2029414176940918, + "loss_ce": 0.00909379031509161, + "loss_iou": 0.498046875, + "loss_num": 0.039794921875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 500671264, + "step": 7485 + }, + { + "epoch": 0.8494751773049646, + "grad_norm": 12.402909278869629, + "learning_rate": 5e-05, + "loss": 0.8715, + "num_input_tokens_seen": 500737636, + "step": 7486 + }, + { + "epoch": 0.8494751773049646, + "loss": 0.9294004440307617, + "loss_ce": 0.007525447756052017, + "loss_iou": 0.404296875, + "loss_num": 0.0230712890625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 500737636, + "step": 7486 + }, + { + "epoch": 0.8495886524822696, + "grad_norm": 18.69084930419922, + "learning_rate": 5e-05, + "loss": 1.0171, + "num_input_tokens_seen": 500804108, + "step": 7487 + }, + { + "epoch": 0.8495886524822696, + "loss": 0.8141562342643738, + "loss_ce": 0.004341772757470608, + "loss_iou": 0.34375, + "loss_num": 0.024658203125, + "loss_xval": 0.80859375, + "num_input_tokens_seen": 500804108, + "step": 7487 + }, + { + "epoch": 0.8497021276595744, + "grad_norm": 32.43520736694336, + "learning_rate": 5e-05, + "loss": 1.0631, + "num_input_tokens_seen": 500871704, + "step": 7488 + }, + { + "epoch": 0.8497021276595744, + "loss": 1.0935478210449219, + "loss_ce": 0.00468053063377738, + "loss_iou": 0.443359375, + "loss_num": 0.04052734375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 500871704, + "step": 7488 + }, + { + "epoch": 0.8498156028368794, + "grad_norm": 29.349430084228516, + "learning_rate": 5e-05, + "loss": 1.3013, + "num_input_tokens_seen": 500939832, + "step": 7489 + }, + { + "epoch": 0.8498156028368794, + "loss": 1.215743064880371, + "loss_ce": 0.0033406747970730066, + "loss_iou": 0.53125, + "loss_num": 0.03076171875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 500939832, + "step": 7489 + }, + { + "epoch": 0.8499290780141844, + "grad_norm": 13.70203971862793, + "learning_rate": 5e-05, + "loss": 1.0404, + "num_input_tokens_seen": 501007244, + "step": 7490 + }, + { + "epoch": 0.8499290780141844, + "loss": 1.1195576190948486, + "loss_ce": 0.00432316679507494, + "loss_iou": 0.453125, + "loss_num": 0.041259765625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 501007244, + "step": 7490 + }, + { + "epoch": 0.8500425531914894, + "grad_norm": 18.410810470581055, + "learning_rate": 5e-05, + "loss": 1.1798, + "num_input_tokens_seen": 501074220, + "step": 7491 + }, + { + "epoch": 0.8500425531914894, + "loss": 1.1930557489395142, + "loss_ce": 0.0055557251907885075, + "loss_iou": 0.5, + "loss_num": 0.03759765625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 501074220, + "step": 7491 + }, + { + "epoch": 0.8501560283687943, + "grad_norm": 24.936973571777344, + "learning_rate": 5e-05, + "loss": 1.2285, + "num_input_tokens_seen": 501142240, + "step": 7492 + }, + { + "epoch": 0.8501560283687943, + "loss": 1.2065587043762207, + "loss_ce": 0.004898523446172476, + "loss_iou": 0.47265625, + "loss_num": 0.05078125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 501142240, + "step": 7492 + }, + { + "epoch": 0.8502695035460993, + "grad_norm": 26.84663963317871, + "learning_rate": 5e-05, + "loss": 1.0581, + "num_input_tokens_seen": 501209408, + "step": 7493 + }, + { + "epoch": 0.8502695035460993, + "loss": 1.2806977033615112, + "loss_ce": 0.007260203827172518, + "loss_iou": 0.494140625, + "loss_num": 0.056884765625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 501209408, + "step": 7493 + }, + { + "epoch": 0.8503829787234043, + "grad_norm": 36.94780731201172, + "learning_rate": 5e-05, + "loss": 1.0145, + "num_input_tokens_seen": 501277196, + "step": 7494 + }, + { + "epoch": 0.8503829787234043, + "loss": 0.9817728996276855, + "loss_ce": 0.004233811981976032, + "loss_iou": 0.44921875, + "loss_num": 0.016357421875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 501277196, + "step": 7494 + }, + { + "epoch": 0.8504964539007093, + "grad_norm": 32.40327072143555, + "learning_rate": 5e-05, + "loss": 1.3706, + "num_input_tokens_seen": 501343708, + "step": 7495 + }, + { + "epoch": 0.8504964539007093, + "loss": 1.3342397212982178, + "loss_ce": 0.002696725307032466, + "loss_iou": 0.55078125, + "loss_num": 0.0458984375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 501343708, + "step": 7495 + }, + { + "epoch": 0.8506099290780141, + "grad_norm": 32.522422790527344, + "learning_rate": 5e-05, + "loss": 1.2398, + "num_input_tokens_seen": 501410384, + "step": 7496 + }, + { + "epoch": 0.8506099290780141, + "loss": 1.3186980485916138, + "loss_ce": 0.007174571976065636, + "loss_iou": 0.546875, + "loss_num": 0.043701171875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 501410384, + "step": 7496 + }, + { + "epoch": 0.8507234042553191, + "grad_norm": 26.787620544433594, + "learning_rate": 5e-05, + "loss": 1.4186, + "num_input_tokens_seen": 501477120, + "step": 7497 + }, + { + "epoch": 0.8507234042553191, + "loss": 1.430500864982605, + "loss_ce": 0.004719615913927555, + "loss_iou": 0.5703125, + "loss_num": 0.056640625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 501477120, + "step": 7497 + }, + { + "epoch": 0.8508368794326241, + "grad_norm": 25.274059295654297, + "learning_rate": 5e-05, + "loss": 1.321, + "num_input_tokens_seen": 501544256, + "step": 7498 + }, + { + "epoch": 0.8508368794326241, + "loss": 1.2905418872833252, + "loss_ce": 0.0009910622611641884, + "loss_iou": 0.51953125, + "loss_num": 0.0498046875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 501544256, + "step": 7498 + }, + { + "epoch": 0.8509503546099291, + "grad_norm": 27.35492515563965, + "learning_rate": 5e-05, + "loss": 1.3655, + "num_input_tokens_seen": 501610704, + "step": 7499 + }, + { + "epoch": 0.8509503546099291, + "loss": 1.261497974395752, + "loss_ce": 0.005638563074171543, + "loss_iou": 0.53125, + "loss_num": 0.039306640625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 501610704, + "step": 7499 + }, + { + "epoch": 0.851063829787234, + "grad_norm": 48.8646354675293, + "learning_rate": 5e-05, + "loss": 1.2152, + "num_input_tokens_seen": 501677692, + "step": 7500 + }, + { + "epoch": 0.851063829787234, + "eval_seeclick_CIoU": 0.4109320789575577, + "eval_seeclick_GIoU": 0.3883696496486664, + "eval_seeclick_IoU": 0.5031837671995163, + "eval_seeclick_MAE_all": 0.15537236630916595, + "eval_seeclick_MAE_h": 0.06950933113694191, + "eval_seeclick_MAE_w": 0.12586355581879616, + "eval_seeclick_MAE_x_boxes": 0.23590490221977234, + "eval_seeclick_MAE_y_boxes": 0.1313842311501503, + "eval_seeclick_NUM_probability": 0.9999594986438751, + "eval_seeclick_inside_bbox": 0.7083333432674408, + "eval_seeclick_loss": 2.3496110439300537, + "eval_seeclick_loss_ce": 0.014333973173052073, + "eval_seeclick_loss_iou": 0.79364013671875, + "eval_seeclick_loss_num": 0.1508312225341797, + "eval_seeclick_loss_xval": 2.3428955078125, + "eval_seeclick_runtime": 64.2963, + "eval_seeclick_samples_per_second": 0.731, + "eval_seeclick_steps_per_second": 0.031, + "num_input_tokens_seen": 501677692, + "step": 7500 + }, + { + "epoch": 0.851063829787234, + "eval_icons_CIoU": 0.5356327295303345, + "eval_icons_GIoU": 0.5309032201766968, + "eval_icons_IoU": 0.5738931596279144, + "eval_icons_MAE_all": 0.12312853336334229, + "eval_icons_MAE_h": 0.07200996205210686, + "eval_icons_MAE_w": 0.11594905704259872, + "eval_icons_MAE_x_boxes": 0.10743373259902, + "eval_icons_MAE_y_boxes": 0.057778600603342056, + "eval_icons_NUM_probability": 0.999986857175827, + "eval_icons_inside_bbox": 0.8350694477558136, + "eval_icons_loss": 2.1500978469848633, + "eval_icons_loss_ce": 8.52114098961465e-05, + "eval_icons_loss_iou": 0.760986328125, + "eval_icons_loss_num": 0.12116622924804688, + "eval_icons_loss_xval": 2.1279296875, + "eval_icons_runtime": 68.1955, + "eval_icons_samples_per_second": 0.733, + "eval_icons_steps_per_second": 0.029, + "num_input_tokens_seen": 501677692, + "step": 7500 + }, + { + "epoch": 0.851063829787234, + "eval_screenspot_CIoU": 0.3365476230780284, + "eval_screenspot_GIoU": 0.2983893156051636, + "eval_screenspot_IoU": 0.41104873021443683, + "eval_screenspot_MAE_all": 0.20466520388921103, + "eval_screenspot_MAE_h": 0.15514193226893744, + "eval_screenspot_MAE_w": 0.18362416326999664, + "eval_screenspot_MAE_x_boxes": 0.22802409529685974, + "eval_screenspot_MAE_y_boxes": 0.11777085314194362, + "eval_screenspot_NUM_probability": 0.9999353488286337, + "eval_screenspot_inside_bbox": 0.6208333373069763, + "eval_screenspot_loss": 2.976738691329956, + "eval_screenspot_loss_ce": 0.015986155718564987, + "eval_screenspot_loss_iou": 0.96142578125, + "eval_screenspot_loss_num": 0.21630859375, + "eval_screenspot_loss_xval": 3.001953125, + "eval_screenspot_runtime": 115.7658, + "eval_screenspot_samples_per_second": 0.769, + "eval_screenspot_steps_per_second": 0.026, + "num_input_tokens_seen": 501677692, + "step": 7500 + }, + { + "epoch": 0.851063829787234, + "eval_compot_CIoU": 0.31461481750011444, + "eval_compot_GIoU": 0.3060735613107681, + "eval_compot_IoU": 0.38777077198028564, + "eval_compot_MAE_all": 0.21025163680315018, + "eval_compot_MAE_h": 0.11703944951295853, + "eval_compot_MAE_w": 0.2708068937063217, + "eval_compot_MAE_x_boxes": 0.18557403981685638, + "eval_compot_MAE_y_boxes": 0.07732896134257317, + "eval_compot_NUM_probability": 0.9999743103981018, + "eval_compot_inside_bbox": 0.5, + "eval_compot_loss": 2.95400333404541, + "eval_compot_loss_ce": 0.0074720559641718864, + "eval_compot_loss_iou": 0.966064453125, + "eval_compot_loss_num": 0.21204376220703125, + "eval_compot_loss_xval": 2.99267578125, + "eval_compot_runtime": 68.0546, + "eval_compot_samples_per_second": 0.735, + "eval_compot_steps_per_second": 0.029, + "num_input_tokens_seen": 501677692, + "step": 7500 + }, + { + "epoch": 0.851063829787234, + "loss": 2.984832763671875, + "loss_ce": 0.007293728645890951, + "loss_iou": 0.9765625, + "loss_num": 0.205078125, + "loss_xval": 2.984375, + "num_input_tokens_seen": 501677692, + "step": 7500 + }, + { + "epoch": 0.851177304964539, + "grad_norm": 36.947608947753906, + "learning_rate": 5e-05, + "loss": 1.1476, + "num_input_tokens_seen": 501745676, + "step": 7501 + }, + { + "epoch": 0.851177304964539, + "loss": 0.9271785616874695, + "loss_ce": 0.007256730459630489, + "loss_iou": 0.37890625, + "loss_num": 0.031982421875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 501745676, + "step": 7501 + }, + { + "epoch": 0.851290780141844, + "grad_norm": 108.04300689697266, + "learning_rate": 5e-05, + "loss": 1.1426, + "num_input_tokens_seen": 501811780, + "step": 7502 + }, + { + "epoch": 0.851290780141844, + "loss": 0.9820061922073364, + "loss_ce": 0.0067254407331347466, + "loss_iou": 0.39453125, + "loss_num": 0.037109375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 501811780, + "step": 7502 + }, + { + "epoch": 0.851404255319149, + "grad_norm": 68.96979522705078, + "learning_rate": 5e-05, + "loss": 1.0669, + "num_input_tokens_seen": 501878416, + "step": 7503 + }, + { + "epoch": 0.851404255319149, + "loss": 1.072098731994629, + "loss_ce": 0.004227672703564167, + "loss_iou": 0.427734375, + "loss_num": 0.042236328125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 501878416, + "step": 7503 + }, + { + "epoch": 0.8515177304964539, + "grad_norm": 19.243009567260742, + "learning_rate": 5e-05, + "loss": 1.1332, + "num_input_tokens_seen": 501945756, + "step": 7504 + }, + { + "epoch": 0.8515177304964539, + "loss": 1.2995315790176392, + "loss_ce": 0.0036331601440906525, + "loss_iou": 0.546875, + "loss_num": 0.039794921875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 501945756, + "step": 7504 + }, + { + "epoch": 0.8516312056737588, + "grad_norm": 37.68891525268555, + "learning_rate": 5e-05, + "loss": 1.0739, + "num_input_tokens_seen": 502011840, + "step": 7505 + }, + { + "epoch": 0.8516312056737588, + "loss": 1.0029268264770508, + "loss_ce": 0.004635767079889774, + "loss_iou": 0.4453125, + "loss_num": 0.0213623046875, + "loss_xval": 1.0, + "num_input_tokens_seen": 502011840, + "step": 7505 + }, + { + "epoch": 0.8517446808510638, + "grad_norm": 33.695343017578125, + "learning_rate": 5e-05, + "loss": 1.4159, + "num_input_tokens_seen": 502078680, + "step": 7506 + }, + { + "epoch": 0.8517446808510638, + "loss": 1.2961069345474243, + "loss_ce": 0.004602973349392414, + "loss_iou": 0.5390625, + "loss_num": 0.04296875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 502078680, + "step": 7506 + }, + { + "epoch": 0.8518581560283688, + "grad_norm": 17.64381980895996, + "learning_rate": 5e-05, + "loss": 0.9757, + "num_input_tokens_seen": 502145436, + "step": 7507 + }, + { + "epoch": 0.8518581560283688, + "loss": 0.7352925539016724, + "loss_ce": 0.007875576615333557, + "loss_iou": 0.2890625, + "loss_num": 0.030029296875, + "loss_xval": 0.7265625, + "num_input_tokens_seen": 502145436, + "step": 7507 + }, + { + "epoch": 0.8519716312056738, + "grad_norm": 33.498600006103516, + "learning_rate": 5e-05, + "loss": 0.952, + "num_input_tokens_seen": 502211952, + "step": 7508 + }, + { + "epoch": 0.8519716312056738, + "loss": 0.8882203698158264, + "loss_ce": 0.005407859571278095, + "loss_iou": 0.3515625, + "loss_num": 0.035888671875, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 502211952, + "step": 7508 + }, + { + "epoch": 0.8520851063829787, + "grad_norm": 14.717931747436523, + "learning_rate": 5e-05, + "loss": 0.9992, + "num_input_tokens_seen": 502277708, + "step": 7509 + }, + { + "epoch": 0.8520851063829787, + "loss": 0.9254254102706909, + "loss_ce": 0.007944885641336441, + "loss_iou": 0.3671875, + "loss_num": 0.03662109375, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 502277708, + "step": 7509 + }, + { + "epoch": 0.8521985815602837, + "grad_norm": 13.919044494628906, + "learning_rate": 5e-05, + "loss": 1.1283, + "num_input_tokens_seen": 502344232, + "step": 7510 + }, + { + "epoch": 0.8521985815602837, + "loss": 1.2941291332244873, + "loss_ce": 0.0060431938618421555, + "loss_iou": 0.48046875, + "loss_num": 0.06494140625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 502344232, + "step": 7510 + }, + { + "epoch": 0.8523120567375887, + "grad_norm": 39.43287658691406, + "learning_rate": 5e-05, + "loss": 1.1644, + "num_input_tokens_seen": 502410992, + "step": 7511 + }, + { + "epoch": 0.8523120567375887, + "loss": 1.1637765169143677, + "loss_ce": 0.0055734566412866116, + "loss_iou": 0.4765625, + "loss_num": 0.041259765625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 502410992, + "step": 7511 + }, + { + "epoch": 0.8524255319148936, + "grad_norm": 55.99016189575195, + "learning_rate": 5e-05, + "loss": 1.2884, + "num_input_tokens_seen": 502478020, + "step": 7512 + }, + { + "epoch": 0.8524255319148936, + "loss": 1.4275448322296143, + "loss_ce": 0.006158198695629835, + "loss_iou": 0.59375, + "loss_num": 0.04736328125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 502478020, + "step": 7512 + }, + { + "epoch": 0.8525390070921985, + "grad_norm": 42.655147552490234, + "learning_rate": 5e-05, + "loss": 1.0677, + "num_input_tokens_seen": 502544148, + "step": 7513 + }, + { + "epoch": 0.8525390070921985, + "loss": 1.0011231899261475, + "loss_ce": 0.007959059439599514, + "loss_iou": 0.3984375, + "loss_num": 0.0390625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 502544148, + "step": 7513 + }, + { + "epoch": 0.8526524822695035, + "grad_norm": 33.47914505004883, + "learning_rate": 5e-05, + "loss": 1.0796, + "num_input_tokens_seen": 502610944, + "step": 7514 + }, + { + "epoch": 0.8526524822695035, + "loss": 1.1156247854232788, + "loss_ce": 0.004296659491956234, + "loss_iou": 0.484375, + "loss_num": 0.028564453125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 502610944, + "step": 7514 + }, + { + "epoch": 0.8527659574468085, + "grad_norm": 24.604774475097656, + "learning_rate": 5e-05, + "loss": 0.9145, + "num_input_tokens_seen": 502677788, + "step": 7515 + }, + { + "epoch": 0.8527659574468085, + "loss": 0.9918372631072998, + "loss_ce": 0.009659495204687119, + "loss_iou": 0.43359375, + "loss_num": 0.0225830078125, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 502677788, + "step": 7515 + }, + { + "epoch": 0.8528794326241135, + "grad_norm": 30.531503677368164, + "learning_rate": 5e-05, + "loss": 1.0625, + "num_input_tokens_seen": 502743872, + "step": 7516 + }, + { + "epoch": 0.8528794326241135, + "loss": 1.1964850425720215, + "loss_ce": 0.0055671450681984425, + "loss_iou": 0.51953125, + "loss_num": 0.031005859375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 502743872, + "step": 7516 + }, + { + "epoch": 0.8529929078014185, + "grad_norm": 528.6150512695312, + "learning_rate": 5e-05, + "loss": 1.1943, + "num_input_tokens_seen": 502811164, + "step": 7517 + }, + { + "epoch": 0.8529929078014185, + "loss": 0.9888784885406494, + "loss_ce": 0.009386338293552399, + "loss_iou": 0.44140625, + "loss_num": 0.01904296875, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 502811164, + "step": 7517 + }, + { + "epoch": 0.8531063829787234, + "grad_norm": 32.510986328125, + "learning_rate": 5e-05, + "loss": 1.2117, + "num_input_tokens_seen": 502877884, + "step": 7518 + }, + { + "epoch": 0.8531063829787234, + "loss": 1.2570345401763916, + "loss_ce": 0.008499487303197384, + "loss_iou": 0.50390625, + "loss_num": 0.0478515625, + "loss_xval": 1.25, + "num_input_tokens_seen": 502877884, + "step": 7518 + }, + { + "epoch": 0.8532198581560284, + "grad_norm": 21.256053924560547, + "learning_rate": 5e-05, + "loss": 1.0945, + "num_input_tokens_seen": 502945824, + "step": 7519 + }, + { + "epoch": 0.8532198581560284, + "loss": 0.863097071647644, + "loss_ce": 0.0051869493909180164, + "loss_iou": 0.36328125, + "loss_num": 0.0262451171875, + "loss_xval": 0.859375, + "num_input_tokens_seen": 502945824, + "step": 7519 + }, + { + "epoch": 0.8533333333333334, + "grad_norm": 29.467124938964844, + "learning_rate": 5e-05, + "loss": 1.1569, + "num_input_tokens_seen": 503012072, + "step": 7520 + }, + { + "epoch": 0.8533333333333334, + "loss": 1.042792558670044, + "loss_ce": 0.00910116732120514, + "loss_iou": 0.447265625, + "loss_num": 0.0277099609375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 503012072, + "step": 7520 + }, + { + "epoch": 0.8534468085106383, + "grad_norm": 22.679065704345703, + "learning_rate": 5e-05, + "loss": 1.0, + "num_input_tokens_seen": 503078780, + "step": 7521 + }, + { + "epoch": 0.8534468085106383, + "loss": 1.0411185026168823, + "loss_ce": 0.007701748516410589, + "loss_iou": 0.41796875, + "loss_num": 0.039306640625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 503078780, + "step": 7521 + }, + { + "epoch": 0.8535602836879432, + "grad_norm": 31.807727813720703, + "learning_rate": 5e-05, + "loss": 1.2462, + "num_input_tokens_seen": 503144772, + "step": 7522 + }, + { + "epoch": 0.8535602836879432, + "loss": 1.1324889659881592, + "loss_ce": 0.009442070499062538, + "loss_iou": 0.451171875, + "loss_num": 0.044189453125, + "loss_xval": 1.125, + "num_input_tokens_seen": 503144772, + "step": 7522 + }, + { + "epoch": 0.8536737588652482, + "grad_norm": 45.389892578125, + "learning_rate": 5e-05, + "loss": 1.3101, + "num_input_tokens_seen": 503212012, + "step": 7523 + }, + { + "epoch": 0.8536737588652482, + "loss": 1.0502169132232666, + "loss_ce": 0.005783365108072758, + "loss_iou": 0.423828125, + "loss_num": 0.039306640625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 503212012, + "step": 7523 + }, + { + "epoch": 0.8537872340425532, + "grad_norm": 28.945575714111328, + "learning_rate": 5e-05, + "loss": 1.222, + "num_input_tokens_seen": 503278528, + "step": 7524 + }, + { + "epoch": 0.8537872340425532, + "loss": 1.3786544799804688, + "loss_ce": 0.004631035961210728, + "loss_iou": 0.57421875, + "loss_num": 0.044921875, + "loss_xval": 1.375, + "num_input_tokens_seen": 503278528, + "step": 7524 + }, + { + "epoch": 0.8539007092198582, + "grad_norm": 15.945329666137695, + "learning_rate": 5e-05, + "loss": 1.0419, + "num_input_tokens_seen": 503345624, + "step": 7525 + }, + { + "epoch": 0.8539007092198582, + "loss": 1.044689416885376, + "loss_ce": 0.010021371766924858, + "loss_iou": 0.43359375, + "loss_num": 0.033935546875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 503345624, + "step": 7525 + }, + { + "epoch": 0.8540141843971631, + "grad_norm": 27.158044815063477, + "learning_rate": 5e-05, + "loss": 1.112, + "num_input_tokens_seen": 503412416, + "step": 7526 + }, + { + "epoch": 0.8540141843971631, + "loss": 1.212621808052063, + "loss_ce": 0.010717486962676048, + "loss_iou": 0.443359375, + "loss_num": 0.06298828125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 503412416, + "step": 7526 + }, + { + "epoch": 0.8541276595744681, + "grad_norm": 38.2752571105957, + "learning_rate": 5e-05, + "loss": 1.0693, + "num_input_tokens_seen": 503478328, + "step": 7527 + }, + { + "epoch": 0.8541276595744681, + "loss": 1.2506415843963623, + "loss_ce": 0.007477468810975552, + "loss_iou": 0.5390625, + "loss_num": 0.033447265625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 503478328, + "step": 7527 + }, + { + "epoch": 0.8542411347517731, + "grad_norm": 30.839927673339844, + "learning_rate": 5e-05, + "loss": 1.1888, + "num_input_tokens_seen": 503545544, + "step": 7528 + }, + { + "epoch": 0.8542411347517731, + "loss": 1.186575174331665, + "loss_ce": 0.007375862915068865, + "loss_iou": 0.482421875, + "loss_num": 0.04345703125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 503545544, + "step": 7528 + }, + { + "epoch": 0.854354609929078, + "grad_norm": 34.56507110595703, + "learning_rate": 5e-05, + "loss": 0.9853, + "num_input_tokens_seen": 503612604, + "step": 7529 + }, + { + "epoch": 0.854354609929078, + "loss": 1.0381381511688232, + "loss_ce": 0.007376337423920631, + "loss_iou": 0.404296875, + "loss_num": 0.044921875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 503612604, + "step": 7529 + }, + { + "epoch": 0.854468085106383, + "grad_norm": 28.563119888305664, + "learning_rate": 5e-05, + "loss": 1.2506, + "num_input_tokens_seen": 503679340, + "step": 7530 + }, + { + "epoch": 0.854468085106383, + "loss": 1.0665197372436523, + "loss_ce": 0.009879149496555328, + "loss_iou": 0.453125, + "loss_num": 0.02978515625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 503679340, + "step": 7530 + }, + { + "epoch": 0.8545815602836879, + "grad_norm": 25.664127349853516, + "learning_rate": 5e-05, + "loss": 1.2015, + "num_input_tokens_seen": 503746464, + "step": 7531 + }, + { + "epoch": 0.8545815602836879, + "loss": 1.4808566570281982, + "loss_ce": 0.007712054066359997, + "loss_iou": 0.55859375, + "loss_num": 0.07080078125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 503746464, + "step": 7531 + }, + { + "epoch": 0.8546950354609929, + "grad_norm": 31.657007217407227, + "learning_rate": 5e-05, + "loss": 1.1817, + "num_input_tokens_seen": 503813356, + "step": 7532 + }, + { + "epoch": 0.8546950354609929, + "loss": 1.0963886976242065, + "loss_ce": 0.007521477527916431, + "loss_iou": 0.46484375, + "loss_num": 0.03173828125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 503813356, + "step": 7532 + }, + { + "epoch": 0.8548085106382979, + "grad_norm": 53.493743896484375, + "learning_rate": 5e-05, + "loss": 1.0784, + "num_input_tokens_seen": 503881296, + "step": 7533 + }, + { + "epoch": 0.8548085106382979, + "loss": 0.9873063564300537, + "loss_ce": 0.006349314469844103, + "loss_iou": 0.43359375, + "loss_num": 0.0230712890625, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 503881296, + "step": 7533 + }, + { + "epoch": 0.8549219858156029, + "grad_norm": 32.351322174072266, + "learning_rate": 5e-05, + "loss": 1.4206, + "num_input_tokens_seen": 503948868, + "step": 7534 + }, + { + "epoch": 0.8549219858156029, + "loss": 1.4020860195159912, + "loss_ce": 0.00755471782758832, + "loss_iou": 0.6171875, + "loss_num": 0.031982421875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 503948868, + "step": 7534 + }, + { + "epoch": 0.8550354609929078, + "grad_norm": 29.63263702392578, + "learning_rate": 5e-05, + "loss": 1.1324, + "num_input_tokens_seen": 504015660, + "step": 7535 + }, + { + "epoch": 0.8550354609929078, + "loss": 0.9896847009658813, + "loss_ce": 0.0062863463535904884, + "loss_iou": 0.40625, + "loss_num": 0.033935546875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 504015660, + "step": 7535 + }, + { + "epoch": 0.8551489361702128, + "grad_norm": 15.663886070251465, + "learning_rate": 5e-05, + "loss": 1.1102, + "num_input_tokens_seen": 504082700, + "step": 7536 + }, + { + "epoch": 0.8551489361702128, + "loss": 1.3050575256347656, + "loss_ce": 0.005741098430007696, + "loss_iou": 0.5390625, + "loss_num": 0.0439453125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 504082700, + "step": 7536 + }, + { + "epoch": 0.8552624113475177, + "grad_norm": 28.198022842407227, + "learning_rate": 5e-05, + "loss": 1.1605, + "num_input_tokens_seen": 504150288, + "step": 7537 + }, + { + "epoch": 0.8552624113475177, + "loss": 1.155400037765503, + "loss_ce": 0.008427442982792854, + "loss_iou": 0.44921875, + "loss_num": 0.049560546875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 504150288, + "step": 7537 + }, + { + "epoch": 0.8553758865248227, + "grad_norm": 94.31521606445312, + "learning_rate": 5e-05, + "loss": 1.2288, + "num_input_tokens_seen": 504217040, + "step": 7538 + }, + { + "epoch": 0.8553758865248227, + "loss": 1.2504537105560303, + "loss_ce": 0.004848284646868706, + "loss_iou": 0.478515625, + "loss_num": 0.05810546875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 504217040, + "step": 7538 + }, + { + "epoch": 0.8554893617021276, + "grad_norm": 38.924537658691406, + "learning_rate": 5e-05, + "loss": 0.9801, + "num_input_tokens_seen": 504284484, + "step": 7539 + }, + { + "epoch": 0.8554893617021276, + "loss": 0.9728374481201172, + "loss_ce": 0.006528859026730061, + "loss_iou": 0.4140625, + "loss_num": 0.027587890625, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 504284484, + "step": 7539 + }, + { + "epoch": 0.8556028368794326, + "grad_norm": 31.751605987548828, + "learning_rate": 5e-05, + "loss": 1.1643, + "num_input_tokens_seen": 504351160, + "step": 7540 + }, + { + "epoch": 0.8556028368794326, + "loss": 1.2784981727600098, + "loss_ce": 0.006769741885364056, + "loss_iou": 0.5, + "loss_num": 0.054443359375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 504351160, + "step": 7540 + }, + { + "epoch": 0.8557163120567376, + "grad_norm": 33.259300231933594, + "learning_rate": 5e-05, + "loss": 1.1387, + "num_input_tokens_seen": 504419304, + "step": 7541 + }, + { + "epoch": 0.8557163120567376, + "loss": 1.1112957000732422, + "loss_ce": 0.004850310739129782, + "loss_iou": 0.4609375, + "loss_num": 0.037109375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 504419304, + "step": 7541 + }, + { + "epoch": 0.8558297872340426, + "grad_norm": 29.56549644470215, + "learning_rate": 5e-05, + "loss": 1.4128, + "num_input_tokens_seen": 504486308, + "step": 7542 + }, + { + "epoch": 0.8558297872340426, + "loss": 1.5404484272003174, + "loss_ce": 0.007245284505188465, + "loss_iou": 0.6171875, + "loss_num": 0.059814453125, + "loss_xval": 1.53125, + "num_input_tokens_seen": 504486308, + "step": 7542 + }, + { + "epoch": 0.8559432624113475, + "grad_norm": 28.922199249267578, + "learning_rate": 5e-05, + "loss": 1.3011, + "num_input_tokens_seen": 504553540, + "step": 7543 + }, + { + "epoch": 0.8559432624113475, + "loss": 1.1980282068252563, + "loss_ce": 0.00857510231435299, + "loss_iou": 0.458984375, + "loss_num": 0.053955078125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 504553540, + "step": 7543 + }, + { + "epoch": 0.8560567375886525, + "grad_norm": 29.532928466796875, + "learning_rate": 5e-05, + "loss": 1.156, + "num_input_tokens_seen": 504619840, + "step": 7544 + }, + { + "epoch": 0.8560567375886525, + "loss": 1.1168911457061768, + "loss_ce": 0.006539624184370041, + "loss_iou": 0.4765625, + "loss_num": 0.03125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 504619840, + "step": 7544 + }, + { + "epoch": 0.8561702127659574, + "grad_norm": 27.284040451049805, + "learning_rate": 5e-05, + "loss": 1.1583, + "num_input_tokens_seen": 504685936, + "step": 7545 + }, + { + "epoch": 0.8561702127659574, + "loss": 1.1642117500305176, + "loss_ce": 0.008449978195130825, + "loss_iou": 0.494140625, + "loss_num": 0.033447265625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 504685936, + "step": 7545 + }, + { + "epoch": 0.8562836879432624, + "grad_norm": 18.714921951293945, + "learning_rate": 5e-05, + "loss": 1.0426, + "num_input_tokens_seen": 504753260, + "step": 7546 + }, + { + "epoch": 0.8562836879432624, + "loss": 0.9791948795318604, + "loss_ce": 0.006172477267682552, + "loss_iou": 0.392578125, + "loss_num": 0.03759765625, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 504753260, + "step": 7546 + }, + { + "epoch": 0.8563971631205674, + "grad_norm": 97.73609924316406, + "learning_rate": 5e-05, + "loss": 1.1396, + "num_input_tokens_seen": 504819088, + "step": 7547 + }, + { + "epoch": 0.8563971631205674, + "loss": 1.0546774864196777, + "loss_ce": 0.007558387704193592, + "loss_iou": 0.3828125, + "loss_num": 0.055908203125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 504819088, + "step": 7547 + }, + { + "epoch": 0.8565106382978723, + "grad_norm": 23.654951095581055, + "learning_rate": 5e-05, + "loss": 1.1597, + "num_input_tokens_seen": 504886312, + "step": 7548 + }, + { + "epoch": 0.8565106382978723, + "loss": 1.2475090026855469, + "loss_ce": 0.008251247927546501, + "loss_iou": 0.51171875, + "loss_num": 0.04345703125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 504886312, + "step": 7548 + }, + { + "epoch": 0.8566241134751773, + "grad_norm": 35.301025390625, + "learning_rate": 5e-05, + "loss": 1.0481, + "num_input_tokens_seen": 504953184, + "step": 7549 + }, + { + "epoch": 0.8566241134751773, + "loss": 1.0271272659301758, + "loss_ce": 0.005887152161449194, + "loss_iou": 0.41796875, + "loss_num": 0.037353515625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 504953184, + "step": 7549 + }, + { + "epoch": 0.8567375886524823, + "grad_norm": 22.2248477935791, + "learning_rate": 5e-05, + "loss": 1.3325, + "num_input_tokens_seen": 505020780, + "step": 7550 + }, + { + "epoch": 0.8567375886524823, + "loss": 1.0910861492156982, + "loss_ce": 0.008322443813085556, + "loss_iou": 0.421875, + "loss_num": 0.047607421875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 505020780, + "step": 7550 + }, + { + "epoch": 0.8568510638297873, + "grad_norm": 25.02309226989746, + "learning_rate": 5e-05, + "loss": 1.3038, + "num_input_tokens_seen": 505087364, + "step": 7551 + }, + { + "epoch": 0.8568510638297873, + "loss": 1.2903928756713867, + "loss_ce": 0.007677928544580936, + "loss_iou": 0.494140625, + "loss_num": 0.05859375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 505087364, + "step": 7551 + }, + { + "epoch": 0.8569645390070922, + "grad_norm": 18.055591583251953, + "learning_rate": 5e-05, + "loss": 1.1158, + "num_input_tokens_seen": 505154492, + "step": 7552 + }, + { + "epoch": 0.8569645390070922, + "loss": 1.1314795017242432, + "loss_ce": 0.005014592781662941, + "loss_iou": 0.44921875, + "loss_num": 0.045654296875, + "loss_xval": 1.125, + "num_input_tokens_seen": 505154492, + "step": 7552 + }, + { + "epoch": 0.8570780141843971, + "grad_norm": 23.273740768432617, + "learning_rate": 5e-05, + "loss": 1.2538, + "num_input_tokens_seen": 505220480, + "step": 7553 + }, + { + "epoch": 0.8570780141843971, + "loss": 1.112792730331421, + "loss_ce": 0.004394347779452801, + "loss_iou": 0.4609375, + "loss_num": 0.037353515625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 505220480, + "step": 7553 + }, + { + "epoch": 0.8571914893617021, + "grad_norm": 14.621659278869629, + "learning_rate": 5e-05, + "loss": 1.2791, + "num_input_tokens_seen": 505287960, + "step": 7554 + }, + { + "epoch": 0.8571914893617021, + "loss": 1.213058590888977, + "loss_ce": 0.007980489172041416, + "loss_iou": 0.4765625, + "loss_num": 0.05029296875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 505287960, + "step": 7554 + }, + { + "epoch": 0.8573049645390071, + "grad_norm": 22.882871627807617, + "learning_rate": 5e-05, + "loss": 1.0611, + "num_input_tokens_seen": 505354448, + "step": 7555 + }, + { + "epoch": 0.8573049645390071, + "loss": 1.1894307136535645, + "loss_ce": 0.008766617625951767, + "loss_iou": 0.47265625, + "loss_num": 0.047119140625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 505354448, + "step": 7555 + }, + { + "epoch": 0.857418439716312, + "grad_norm": 28.510631561279297, + "learning_rate": 5e-05, + "loss": 1.0069, + "num_input_tokens_seen": 505421228, + "step": 7556 + }, + { + "epoch": 0.857418439716312, + "loss": 0.8331307172775269, + "loss_ce": 0.0064705610275268555, + "loss_iou": 0.33203125, + "loss_num": 0.03271484375, + "loss_xval": 0.828125, + "num_input_tokens_seen": 505421228, + "step": 7556 + }, + { + "epoch": 0.857531914893617, + "grad_norm": 35.13850021362305, + "learning_rate": 5e-05, + "loss": 0.9999, + "num_input_tokens_seen": 505489216, + "step": 7557 + }, + { + "epoch": 0.857531914893617, + "loss": 0.9475510120391846, + "loss_ce": 0.010539274662733078, + "loss_iou": 0.38671875, + "loss_num": 0.0322265625, + "loss_xval": 0.9375, + "num_input_tokens_seen": 505489216, + "step": 7557 + }, + { + "epoch": 0.857645390070922, + "grad_norm": 30.434101104736328, + "learning_rate": 5e-05, + "loss": 1.0006, + "num_input_tokens_seen": 505555036, + "step": 7558 + }, + { + "epoch": 0.857645390070922, + "loss": 0.747657060623169, + "loss_ce": 0.009131673723459244, + "loss_iou": 0.318359375, + "loss_num": 0.020751953125, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 505555036, + "step": 7558 + }, + { + "epoch": 0.857758865248227, + "grad_norm": 30.703968048095703, + "learning_rate": 5e-05, + "loss": 1.1299, + "num_input_tokens_seen": 505621672, + "step": 7559 + }, + { + "epoch": 0.857758865248227, + "loss": 0.9516172409057617, + "loss_ce": 0.006060585379600525, + "loss_iou": 0.357421875, + "loss_num": 0.046142578125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 505621672, + "step": 7559 + }, + { + "epoch": 0.857872340425532, + "grad_norm": 33.9035758972168, + "learning_rate": 5e-05, + "loss": 1.1901, + "num_input_tokens_seen": 505687996, + "step": 7560 + }, + { + "epoch": 0.857872340425532, + "loss": 1.3362982273101807, + "loss_ce": 0.004267033189535141, + "loss_iou": 0.55078125, + "loss_num": 0.046630859375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 505687996, + "step": 7560 + }, + { + "epoch": 0.8579858156028369, + "grad_norm": 26.97953224182129, + "learning_rate": 5e-05, + "loss": 1.0813, + "num_input_tokens_seen": 505755316, + "step": 7561 + }, + { + "epoch": 0.8579858156028369, + "loss": 0.9978429079055786, + "loss_ce": 0.004312630742788315, + "loss_iou": 0.41015625, + "loss_num": 0.03466796875, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 505755316, + "step": 7561 + }, + { + "epoch": 0.8580992907801418, + "grad_norm": 25.14840316772461, + "learning_rate": 5e-05, + "loss": 1.1282, + "num_input_tokens_seen": 505822324, + "step": 7562 + }, + { + "epoch": 0.8580992907801418, + "loss": 1.0099354982376099, + "loss_ce": 0.0074941236525774, + "loss_iou": 0.43359375, + "loss_num": 0.027099609375, + "loss_xval": 1.0, + "num_input_tokens_seen": 505822324, + "step": 7562 + }, + { + "epoch": 0.8582127659574468, + "grad_norm": 20.44972038269043, + "learning_rate": 5e-05, + "loss": 1.0627, + "num_input_tokens_seen": 505888732, + "step": 7563 + }, + { + "epoch": 0.8582127659574468, + "loss": 1.0894900560379028, + "loss_ce": 0.005017424002289772, + "loss_iou": 0.43359375, + "loss_num": 0.04296875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 505888732, + "step": 7563 + }, + { + "epoch": 0.8583262411347518, + "grad_norm": 26.51669692993164, + "learning_rate": 5e-05, + "loss": 1.0849, + "num_input_tokens_seen": 505954872, + "step": 7564 + }, + { + "epoch": 0.8583262411347518, + "loss": 0.9681527614593506, + "loss_ce": 0.0059335497207939625, + "loss_iou": 0.431640625, + "loss_num": 0.020263671875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 505954872, + "step": 7564 + }, + { + "epoch": 0.8584397163120567, + "grad_norm": 13.817739486694336, + "learning_rate": 5e-05, + "loss": 1.0383, + "num_input_tokens_seen": 506021772, + "step": 7565 + }, + { + "epoch": 0.8584397163120567, + "loss": 0.9785832166671753, + "loss_ce": 0.005682826973497868, + "loss_iou": 0.39453125, + "loss_num": 0.037109375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 506021772, + "step": 7565 + }, + { + "epoch": 0.8585531914893617, + "grad_norm": 25.439800262451172, + "learning_rate": 5e-05, + "loss": 1.2319, + "num_input_tokens_seen": 506088076, + "step": 7566 + }, + { + "epoch": 0.8585531914893617, + "loss": 1.4217426776885986, + "loss_ce": 0.004750588908791542, + "loss_iou": 0.5546875, + "loss_num": 0.0615234375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 506088076, + "step": 7566 + }, + { + "epoch": 0.8586666666666667, + "grad_norm": 37.11408996582031, + "learning_rate": 5e-05, + "loss": 1.1139, + "num_input_tokens_seen": 506155096, + "step": 7567 + }, + { + "epoch": 0.8586666666666667, + "loss": 1.1378424167633057, + "loss_ce": 0.003565091174095869, + "loss_iou": 0.474609375, + "loss_num": 0.036865234375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 506155096, + "step": 7567 + }, + { + "epoch": 0.8587801418439717, + "grad_norm": 35.21776580810547, + "learning_rate": 5e-05, + "loss": 1.1133, + "num_input_tokens_seen": 506220680, + "step": 7568 + }, + { + "epoch": 0.8587801418439717, + "loss": 1.1005994081497192, + "loss_ce": 0.006361142732203007, + "loss_iou": 0.447265625, + "loss_num": 0.04052734375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 506220680, + "step": 7568 + }, + { + "epoch": 0.8588936170212766, + "grad_norm": 24.3530330657959, + "learning_rate": 5e-05, + "loss": 1.1128, + "num_input_tokens_seen": 506287592, + "step": 7569 + }, + { + "epoch": 0.8588936170212766, + "loss": 1.2574827671051025, + "loss_ce": 0.009435982443392277, + "loss_iou": 0.51171875, + "loss_num": 0.044921875, + "loss_xval": 1.25, + "num_input_tokens_seen": 506287592, + "step": 7569 + }, + { + "epoch": 0.8590070921985815, + "grad_norm": 18.5660400390625, + "learning_rate": 5e-05, + "loss": 1.1883, + "num_input_tokens_seen": 506353580, + "step": 7570 + }, + { + "epoch": 0.8590070921985815, + "loss": 1.0541863441467285, + "loss_ce": 0.0058465031906962395, + "loss_iou": 0.423828125, + "loss_num": 0.04052734375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 506353580, + "step": 7570 + }, + { + "epoch": 0.8591205673758865, + "grad_norm": 25.590328216552734, + "learning_rate": 5e-05, + "loss": 1.0919, + "num_input_tokens_seen": 506420908, + "step": 7571 + }, + { + "epoch": 0.8591205673758865, + "loss": 1.3116939067840576, + "loss_ce": 0.006029815413057804, + "loss_iou": 0.5078125, + "loss_num": 0.05810546875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 506420908, + "step": 7571 + }, + { + "epoch": 0.8592340425531915, + "grad_norm": 23.80573272705078, + "learning_rate": 5e-05, + "loss": 1.101, + "num_input_tokens_seen": 506488080, + "step": 7572 + }, + { + "epoch": 0.8592340425531915, + "loss": 1.2362626791000366, + "loss_ce": 0.008723629638552666, + "loss_iou": 0.494140625, + "loss_num": 0.04833984375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 506488080, + "step": 7572 + }, + { + "epoch": 0.8593475177304964, + "grad_norm": 33.03302764892578, + "learning_rate": 5e-05, + "loss": 1.1421, + "num_input_tokens_seen": 506555020, + "step": 7573 + }, + { + "epoch": 0.8593475177304964, + "loss": 1.0118517875671387, + "loss_ce": 0.009898596443235874, + "loss_iou": 0.40234375, + "loss_num": 0.03955078125, + "loss_xval": 1.0, + "num_input_tokens_seen": 506555020, + "step": 7573 + }, + { + "epoch": 0.8594609929078014, + "grad_norm": 29.326644897460938, + "learning_rate": 5e-05, + "loss": 1.3135, + "num_input_tokens_seen": 506621952, + "step": 7574 + }, + { + "epoch": 0.8594609929078014, + "loss": 1.3861851692199707, + "loss_ce": 0.0028843535110354424, + "loss_iou": 0.57421875, + "loss_num": 0.04736328125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 506621952, + "step": 7574 + }, + { + "epoch": 0.8595744680851064, + "grad_norm": 26.04305076599121, + "learning_rate": 5e-05, + "loss": 1.0999, + "num_input_tokens_seen": 506688792, + "step": 7575 + }, + { + "epoch": 0.8595744680851064, + "loss": 1.0934977531433105, + "loss_ce": 0.0090250875800848, + "loss_iou": 0.431640625, + "loss_num": 0.04443359375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 506688792, + "step": 7575 + }, + { + "epoch": 0.8596879432624114, + "grad_norm": 34.39436340332031, + "learning_rate": 5e-05, + "loss": 1.2278, + "num_input_tokens_seen": 506755852, + "step": 7576 + }, + { + "epoch": 0.8596879432624114, + "loss": 1.179345965385437, + "loss_ce": 0.009424078278243542, + "loss_iou": 0.5, + "loss_num": 0.0341796875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 506755852, + "step": 7576 + }, + { + "epoch": 0.8598014184397164, + "grad_norm": 43.14785385131836, + "learning_rate": 5e-05, + "loss": 1.0295, + "num_input_tokens_seen": 506822652, + "step": 7577 + }, + { + "epoch": 0.8598014184397164, + "loss": 1.1060868501663208, + "loss_ce": 0.009681839495897293, + "loss_iou": 0.44921875, + "loss_num": 0.039794921875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 506822652, + "step": 7577 + }, + { + "epoch": 0.8599148936170212, + "grad_norm": 33.93454360961914, + "learning_rate": 5e-05, + "loss": 1.311, + "num_input_tokens_seen": 506888780, + "step": 7578 + }, + { + "epoch": 0.8599148936170212, + "loss": 1.3033851385116577, + "loss_ce": 0.008463308215141296, + "loss_iou": 0.54296875, + "loss_num": 0.041259765625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 506888780, + "step": 7578 + }, + { + "epoch": 0.8600283687943262, + "grad_norm": 598.232421875, + "learning_rate": 5e-05, + "loss": 1.2474, + "num_input_tokens_seen": 506956708, + "step": 7579 + }, + { + "epoch": 0.8600283687943262, + "loss": 1.3069546222686768, + "loss_ce": 0.007149897515773773, + "loss_iou": 0.54296875, + "loss_num": 0.043212890625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 506956708, + "step": 7579 + }, + { + "epoch": 0.8601418439716312, + "grad_norm": 22.120121002197266, + "learning_rate": 5e-05, + "loss": 1.1259, + "num_input_tokens_seen": 507023228, + "step": 7580 + }, + { + "epoch": 0.8601418439716312, + "loss": 1.1287977695465088, + "loss_ce": 0.012586807832121849, + "loss_iou": 0.421875, + "loss_num": 0.054443359375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 507023228, + "step": 7580 + }, + { + "epoch": 0.8602553191489362, + "grad_norm": 19.149887084960938, + "learning_rate": 5e-05, + "loss": 1.1516, + "num_input_tokens_seen": 507090056, + "step": 7581 + }, + { + "epoch": 0.8602553191489362, + "loss": 1.2865629196166992, + "loss_ce": 0.008242547512054443, + "loss_iou": 0.53125, + "loss_num": 0.04296875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 507090056, + "step": 7581 + }, + { + "epoch": 0.8603687943262411, + "grad_norm": 24.249530792236328, + "learning_rate": 5e-05, + "loss": 1.116, + "num_input_tokens_seen": 507157512, + "step": 7582 + }, + { + "epoch": 0.8603687943262411, + "loss": 1.1159194707870483, + "loss_ce": 0.006056193727999926, + "loss_iou": 0.470703125, + "loss_num": 0.033447265625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 507157512, + "step": 7582 + }, + { + "epoch": 0.8604822695035461, + "grad_norm": 25.101030349731445, + "learning_rate": 5e-05, + "loss": 1.3459, + "num_input_tokens_seen": 507223552, + "step": 7583 + }, + { + "epoch": 0.8604822695035461, + "loss": 1.3311941623687744, + "loss_ce": 0.00990502629429102, + "loss_iou": 0.53125, + "loss_num": 0.052490234375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 507223552, + "step": 7583 + }, + { + "epoch": 0.8605957446808511, + "grad_norm": 21.399066925048828, + "learning_rate": 5e-05, + "loss": 1.17, + "num_input_tokens_seen": 507289976, + "step": 7584 + }, + { + "epoch": 0.8605957446808511, + "loss": 1.3159031867980957, + "loss_ce": 0.00535624660551548, + "loss_iou": 0.50390625, + "loss_num": 0.06103515625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 507289976, + "step": 7584 + }, + { + "epoch": 0.8607092198581561, + "grad_norm": 30.0952091217041, + "learning_rate": 5e-05, + "loss": 1.3023, + "num_input_tokens_seen": 507358072, + "step": 7585 + }, + { + "epoch": 0.8607092198581561, + "loss": 1.4103161096572876, + "loss_ce": 0.0069958604872226715, + "loss_iou": 0.55859375, + "loss_num": 0.057861328125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 507358072, + "step": 7585 + }, + { + "epoch": 0.8608226950354609, + "grad_norm": 47.418609619140625, + "learning_rate": 5e-05, + "loss": 1.1414, + "num_input_tokens_seen": 507425388, + "step": 7586 + }, + { + "epoch": 0.8608226950354609, + "loss": 1.0491806268692017, + "loss_ce": 0.0052353558130562305, + "loss_iou": 0.44140625, + "loss_num": 0.0322265625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 507425388, + "step": 7586 + }, + { + "epoch": 0.8609361702127659, + "grad_norm": 35.182945251464844, + "learning_rate": 5e-05, + "loss": 1.1852, + "num_input_tokens_seen": 507492272, + "step": 7587 + }, + { + "epoch": 0.8609361702127659, + "loss": 1.0672056674957275, + "loss_ce": 0.012029845267534256, + "loss_iou": 0.45703125, + "loss_num": 0.02783203125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 507492272, + "step": 7587 + }, + { + "epoch": 0.8610496453900709, + "grad_norm": 115.6550064086914, + "learning_rate": 5e-05, + "loss": 1.1494, + "num_input_tokens_seen": 507559884, + "step": 7588 + }, + { + "epoch": 0.8610496453900709, + "loss": 1.1857707500457764, + "loss_ce": 0.006571571342647076, + "loss_iou": 0.466796875, + "loss_num": 0.048583984375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 507559884, + "step": 7588 + }, + { + "epoch": 0.8611631205673759, + "grad_norm": 14.388777732849121, + "learning_rate": 5e-05, + "loss": 1.1092, + "num_input_tokens_seen": 507626716, + "step": 7589 + }, + { + "epoch": 0.8611631205673759, + "loss": 1.084478735923767, + "loss_ce": 0.006842049770057201, + "loss_iou": 0.44140625, + "loss_num": 0.039306640625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 507626716, + "step": 7589 + }, + { + "epoch": 0.8612765957446809, + "grad_norm": 20.794490814208984, + "learning_rate": 5e-05, + "loss": 1.238, + "num_input_tokens_seen": 507694828, + "step": 7590 + }, + { + "epoch": 0.8612765957446809, + "loss": 1.1431777477264404, + "loss_ce": 0.007435564883053303, + "loss_iou": 0.427734375, + "loss_num": 0.056396484375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 507694828, + "step": 7590 + }, + { + "epoch": 0.8613900709219858, + "grad_norm": 31.681608200073242, + "learning_rate": 5e-05, + "loss": 0.8245, + "num_input_tokens_seen": 507760400, + "step": 7591 + }, + { + "epoch": 0.8613900709219858, + "loss": 0.8109288811683655, + "loss_ce": 0.0055090077221393585, + "loss_iou": 0.333984375, + "loss_num": 0.0277099609375, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 507760400, + "step": 7591 + }, + { + "epoch": 0.8615035460992908, + "grad_norm": 81.89488983154297, + "learning_rate": 5e-05, + "loss": 1.2796, + "num_input_tokens_seen": 507827864, + "step": 7592 + }, + { + "epoch": 0.8615035460992908, + "loss": 1.2770689725875854, + "loss_ce": 0.008514195680618286, + "loss_iou": 0.4609375, + "loss_num": 0.0693359375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 507827864, + "step": 7592 + }, + { + "epoch": 0.8616170212765958, + "grad_norm": 48.48907470703125, + "learning_rate": 5e-05, + "loss": 1.3416, + "num_input_tokens_seen": 507895484, + "step": 7593 + }, + { + "epoch": 0.8616170212765958, + "loss": 1.5260496139526367, + "loss_ce": 0.007983321323990822, + "loss_iou": 0.6171875, + "loss_num": 0.057373046875, + "loss_xval": 1.515625, + "num_input_tokens_seen": 507895484, + "step": 7593 + }, + { + "epoch": 0.8617304964539008, + "grad_norm": 31.69777488708496, + "learning_rate": 5e-05, + "loss": 1.2707, + "num_input_tokens_seen": 507961640, + "step": 7594 + }, + { + "epoch": 0.8617304964539008, + "loss": 1.321097493171692, + "loss_ce": 0.004691189154982567, + "loss_iou": 0.5625, + "loss_num": 0.03857421875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 507961640, + "step": 7594 + }, + { + "epoch": 0.8618439716312056, + "grad_norm": 24.430734634399414, + "learning_rate": 5e-05, + "loss": 1.0528, + "num_input_tokens_seen": 508029136, + "step": 7595 + }, + { + "epoch": 0.8618439716312056, + "loss": 1.0307306051254272, + "loss_ce": 0.007293099537491798, + "loss_iou": 0.44140625, + "loss_num": 0.0279541015625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 508029136, + "step": 7595 + }, + { + "epoch": 0.8619574468085106, + "grad_norm": 28.10980987548828, + "learning_rate": 5e-05, + "loss": 1.0407, + "num_input_tokens_seen": 508097160, + "step": 7596 + }, + { + "epoch": 0.8619574468085106, + "loss": 0.8484659790992737, + "loss_ce": 0.007157385814934969, + "loss_iou": 0.3671875, + "loss_num": 0.0211181640625, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 508097160, + "step": 7596 + }, + { + "epoch": 0.8620709219858156, + "grad_norm": 26.728776931762695, + "learning_rate": 5e-05, + "loss": 1.1409, + "num_input_tokens_seen": 508164508, + "step": 7597 + }, + { + "epoch": 0.8620709219858156, + "loss": 1.183455467224121, + "loss_ce": 0.007430021185427904, + "loss_iou": 0.486328125, + "loss_num": 0.040771484375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 508164508, + "step": 7597 + }, + { + "epoch": 0.8621843971631206, + "grad_norm": 25.401803970336914, + "learning_rate": 5e-05, + "loss": 1.1509, + "num_input_tokens_seen": 508231292, + "step": 7598 + }, + { + "epoch": 0.8621843971631206, + "loss": 1.3786770105361938, + "loss_ce": 0.012954358011484146, + "loss_iou": 0.5078125, + "loss_num": 0.06982421875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 508231292, + "step": 7598 + }, + { + "epoch": 0.8622978723404255, + "grad_norm": 21.471866607666016, + "learning_rate": 5e-05, + "loss": 1.032, + "num_input_tokens_seen": 508298952, + "step": 7599 + }, + { + "epoch": 0.8622978723404255, + "loss": 0.999671220779419, + "loss_ce": 0.007972019724547863, + "loss_iou": 0.42578125, + "loss_num": 0.0281982421875, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 508298952, + "step": 7599 + }, + { + "epoch": 0.8624113475177305, + "grad_norm": 12.474852561950684, + "learning_rate": 5e-05, + "loss": 0.9767, + "num_input_tokens_seen": 508363988, + "step": 7600 + }, + { + "epoch": 0.8624113475177305, + "loss": 1.267382264137268, + "loss_ce": 0.009081501513719559, + "loss_iou": 0.490234375, + "loss_num": 0.0556640625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 508363988, + "step": 7600 + }, + { + "epoch": 0.8625248226950355, + "grad_norm": 9.898343086242676, + "learning_rate": 5e-05, + "loss": 0.9433, + "num_input_tokens_seen": 508431188, + "step": 7601 + }, + { + "epoch": 0.8625248226950355, + "loss": 1.0329394340515137, + "loss_ce": 0.009501967579126358, + "loss_iou": 0.41796875, + "loss_num": 0.037841796875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 508431188, + "step": 7601 + }, + { + "epoch": 0.8626382978723405, + "grad_norm": 33.00741958618164, + "learning_rate": 5e-05, + "loss": 1.0549, + "num_input_tokens_seen": 508497816, + "step": 7602 + }, + { + "epoch": 0.8626382978723405, + "loss": 1.0890986919403076, + "loss_ce": 0.005602657329291105, + "loss_iou": 0.462890625, + "loss_num": 0.031494140625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 508497816, + "step": 7602 + }, + { + "epoch": 0.8627517730496453, + "grad_norm": 39.235198974609375, + "learning_rate": 5e-05, + "loss": 1.3046, + "num_input_tokens_seen": 508564076, + "step": 7603 + }, + { + "epoch": 0.8627517730496453, + "loss": 1.0734035968780518, + "loss_ce": 0.007485635112971067, + "loss_iou": 0.44921875, + "loss_num": 0.03369140625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 508564076, + "step": 7603 + }, + { + "epoch": 0.8628652482269503, + "grad_norm": 24.688867568969727, + "learning_rate": 5e-05, + "loss": 1.134, + "num_input_tokens_seen": 508631580, + "step": 7604 + }, + { + "epoch": 0.8628652482269503, + "loss": 1.0465811491012573, + "loss_ce": 0.0055655669420957565, + "loss_iou": 0.43359375, + "loss_num": 0.0341796875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 508631580, + "step": 7604 + }, + { + "epoch": 0.8629787234042553, + "grad_norm": 24.06692886352539, + "learning_rate": 5e-05, + "loss": 0.8975, + "num_input_tokens_seen": 508698588, + "step": 7605 + }, + { + "epoch": 0.8629787234042553, + "loss": 0.9876052737236023, + "loss_ce": 0.006282090209424496, + "loss_iou": 0.40625, + "loss_num": 0.03369140625, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 508698588, + "step": 7605 + }, + { + "epoch": 0.8630921985815603, + "grad_norm": 49.552520751953125, + "learning_rate": 5e-05, + "loss": 1.1003, + "num_input_tokens_seen": 508764696, + "step": 7606 + }, + { + "epoch": 0.8630921985815603, + "loss": 1.1053504943847656, + "loss_ce": 0.00683983787894249, + "loss_iou": 0.423828125, + "loss_num": 0.050048828125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 508764696, + "step": 7606 + }, + { + "epoch": 0.8632056737588653, + "grad_norm": 54.69274139404297, + "learning_rate": 5e-05, + "loss": 1.1082, + "num_input_tokens_seen": 508831908, + "step": 7607 + }, + { + "epoch": 0.8632056737588653, + "loss": 1.0366122722625732, + "loss_ce": 0.00487392395734787, + "loss_iou": 0.423828125, + "loss_num": 0.036865234375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 508831908, + "step": 7607 + }, + { + "epoch": 0.8633191489361702, + "grad_norm": 28.497318267822266, + "learning_rate": 5e-05, + "loss": 1.1042, + "num_input_tokens_seen": 508899048, + "step": 7608 + }, + { + "epoch": 0.8633191489361702, + "loss": 1.0122734308242798, + "loss_ce": 0.004460926167666912, + "loss_iou": 0.443359375, + "loss_num": 0.02392578125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 508899048, + "step": 7608 + }, + { + "epoch": 0.8634326241134752, + "grad_norm": 22.874616622924805, + "learning_rate": 5e-05, + "loss": 0.9787, + "num_input_tokens_seen": 508964136, + "step": 7609 + }, + { + "epoch": 0.8634326241134752, + "loss": 1.1161631345748901, + "loss_ce": 0.010938508436083794, + "loss_iou": 0.42578125, + "loss_num": 0.05078125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 508964136, + "step": 7609 + }, + { + "epoch": 0.8635460992907802, + "grad_norm": 25.50688934326172, + "learning_rate": 5e-05, + "loss": 1.3981, + "num_input_tokens_seen": 509032100, + "step": 7610 + }, + { + "epoch": 0.8635460992907802, + "loss": 1.512260913848877, + "loss_ce": 0.003960244357585907, + "loss_iou": 0.60546875, + "loss_num": 0.059326171875, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 509032100, + "step": 7610 + }, + { + "epoch": 0.8636595744680851, + "grad_norm": 18.750141143798828, + "learning_rate": 5e-05, + "loss": 1.0927, + "num_input_tokens_seen": 509098672, + "step": 7611 + }, + { + "epoch": 0.8636595744680851, + "loss": 1.0140249729156494, + "loss_ce": 0.006700717844069004, + "loss_iou": 0.416015625, + "loss_num": 0.034912109375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 509098672, + "step": 7611 + }, + { + "epoch": 0.86377304964539, + "grad_norm": 20.907346725463867, + "learning_rate": 5e-05, + "loss": 1.1507, + "num_input_tokens_seen": 509165396, + "step": 7612 + }, + { + "epoch": 0.86377304964539, + "loss": 1.2507414817810059, + "loss_ce": 0.011483818292617798, + "loss_iou": 0.46875, + "loss_num": 0.060546875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 509165396, + "step": 7612 + }, + { + "epoch": 0.863886524822695, + "grad_norm": 34.06420135498047, + "learning_rate": 5e-05, + "loss": 1.328, + "num_input_tokens_seen": 509232508, + "step": 7613 + }, + { + "epoch": 0.863886524822695, + "loss": 1.2931896448135376, + "loss_ce": 0.01096305251121521, + "loss_iou": 0.4765625, + "loss_num": 0.0654296875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 509232508, + "step": 7613 + }, + { + "epoch": 0.864, + "grad_norm": 173.75289916992188, + "learning_rate": 5e-05, + "loss": 1.2316, + "num_input_tokens_seen": 509299632, + "step": 7614 + }, + { + "epoch": 0.864, + "loss": 1.028282880783081, + "loss_ce": 0.0067985402420163155, + "loss_iou": 0.435546875, + "loss_num": 0.030029296875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 509299632, + "step": 7614 + }, + { + "epoch": 0.864113475177305, + "grad_norm": 30.723285675048828, + "learning_rate": 5e-05, + "loss": 1.1037, + "num_input_tokens_seen": 509365324, + "step": 7615 + }, + { + "epoch": 0.864113475177305, + "loss": 1.0958425998687744, + "loss_ce": 0.00505291111767292, + "loss_iou": 0.4453125, + "loss_num": 0.040771484375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 509365324, + "step": 7615 + }, + { + "epoch": 0.86422695035461, + "grad_norm": 21.719013214111328, + "learning_rate": 5e-05, + "loss": 1.3785, + "num_input_tokens_seen": 509432620, + "step": 7616 + }, + { + "epoch": 0.86422695035461, + "loss": 1.487968921661377, + "loss_ce": 0.004570442251861095, + "loss_iou": 0.62109375, + "loss_num": 0.048583984375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 509432620, + "step": 7616 + }, + { + "epoch": 0.8643404255319149, + "grad_norm": 13.385945320129395, + "learning_rate": 5e-05, + "loss": 0.9116, + "num_input_tokens_seen": 509499584, + "step": 7617 + }, + { + "epoch": 0.8643404255319149, + "loss": 0.967031717300415, + "loss_ce": 0.007803231477737427, + "loss_iou": 0.37890625, + "loss_num": 0.039794921875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 509499584, + "step": 7617 + }, + { + "epoch": 0.8644539007092199, + "grad_norm": 15.023100852966309, + "learning_rate": 5e-05, + "loss": 0.9554, + "num_input_tokens_seen": 509565868, + "step": 7618 + }, + { + "epoch": 0.8644539007092199, + "loss": 1.0199594497680664, + "loss_ce": 0.009217267856001854, + "loss_iou": 0.43359375, + "loss_num": 0.029052734375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 509565868, + "step": 7618 + }, + { + "epoch": 0.8645673758865248, + "grad_norm": 33.0904655456543, + "learning_rate": 5e-05, + "loss": 1.0844, + "num_input_tokens_seen": 509632240, + "step": 7619 + }, + { + "epoch": 0.8645673758865248, + "loss": 1.2111470699310303, + "loss_ce": 0.010219261981546879, + "loss_iou": 0.49609375, + "loss_num": 0.041259765625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 509632240, + "step": 7619 + }, + { + "epoch": 0.8646808510638297, + "grad_norm": 34.73188400268555, + "learning_rate": 5e-05, + "loss": 1.295, + "num_input_tokens_seen": 509699304, + "step": 7620 + }, + { + "epoch": 0.8646808510638297, + "loss": 1.218545913696289, + "loss_ce": 0.005167004652321339, + "loss_iou": 0.486328125, + "loss_num": 0.0478515625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 509699304, + "step": 7620 + }, + { + "epoch": 0.8647943262411347, + "grad_norm": 25.598831176757812, + "learning_rate": 5e-05, + "loss": 1.0302, + "num_input_tokens_seen": 509766632, + "step": 7621 + }, + { + "epoch": 0.8647943262411347, + "loss": 1.1128045320510864, + "loss_ce": 0.004894323647022247, + "loss_iou": 0.453125, + "loss_num": 0.0400390625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 509766632, + "step": 7621 + }, + { + "epoch": 0.8649078014184397, + "grad_norm": 26.883827209472656, + "learning_rate": 5e-05, + "loss": 1.1776, + "num_input_tokens_seen": 509832688, + "step": 7622 + }, + { + "epoch": 0.8649078014184397, + "loss": 1.3215992450714111, + "loss_ce": 0.010564180091023445, + "loss_iou": 0.515625, + "loss_num": 0.05615234375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 509832688, + "step": 7622 + }, + { + "epoch": 0.8650212765957447, + "grad_norm": 28.554426193237305, + "learning_rate": 5e-05, + "loss": 1.3924, + "num_input_tokens_seen": 509900260, + "step": 7623 + }, + { + "epoch": 0.8650212765957447, + "loss": 1.4601469039916992, + "loss_ce": 0.008486630395054817, + "loss_iou": 0.55078125, + "loss_num": 0.07080078125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 509900260, + "step": 7623 + }, + { + "epoch": 0.8651347517730497, + "grad_norm": 73.32719421386719, + "learning_rate": 5e-05, + "loss": 1.2596, + "num_input_tokens_seen": 509967780, + "step": 7624 + }, + { + "epoch": 0.8651347517730497, + "loss": 1.1797735691070557, + "loss_ce": 0.006922081112861633, + "loss_iou": 0.46875, + "loss_num": 0.047119140625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 509967780, + "step": 7624 + }, + { + "epoch": 0.8652482269503546, + "grad_norm": 38.20478439331055, + "learning_rate": 5e-05, + "loss": 1.1363, + "num_input_tokens_seen": 510034496, + "step": 7625 + }, + { + "epoch": 0.8652482269503546, + "loss": 1.156623363494873, + "loss_ce": 0.005256141535937786, + "loss_iou": 0.4765625, + "loss_num": 0.039794921875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 510034496, + "step": 7625 + }, + { + "epoch": 0.8653617021276596, + "grad_norm": 30.56376075744629, + "learning_rate": 5e-05, + "loss": 1.1438, + "num_input_tokens_seen": 510101156, + "step": 7626 + }, + { + "epoch": 0.8653617021276596, + "loss": 1.0928109884262085, + "loss_ce": 0.0078500397503376, + "loss_iou": 0.4296875, + "loss_num": 0.044677734375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 510101156, + "step": 7626 + }, + { + "epoch": 0.8654751773049646, + "grad_norm": 81.1536636352539, + "learning_rate": 5e-05, + "loss": 0.9255, + "num_input_tokens_seen": 510168240, + "step": 7627 + }, + { + "epoch": 0.8654751773049646, + "loss": 0.8592779636383057, + "loss_ce": 0.005029852502048016, + "loss_iou": 0.349609375, + "loss_num": 0.0311279296875, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 510168240, + "step": 7627 + }, + { + "epoch": 0.8655886524822695, + "grad_norm": 34.027748107910156, + "learning_rate": 5e-05, + "loss": 1.2724, + "num_input_tokens_seen": 510235164, + "step": 7628 + }, + { + "epoch": 0.8655886524822695, + "loss": 1.1953092813491821, + "loss_ce": 0.0024381964467465878, + "loss_iou": 0.5078125, + "loss_num": 0.035400390625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 510235164, + "step": 7628 + }, + { + "epoch": 0.8657021276595744, + "grad_norm": 38.22317123413086, + "learning_rate": 5e-05, + "loss": 1.1503, + "num_input_tokens_seen": 510302036, + "step": 7629 + }, + { + "epoch": 0.8657021276595744, + "loss": 1.1560171842575073, + "loss_ce": 0.0070914002135396, + "loss_iou": 0.4609375, + "loss_num": 0.04541015625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 510302036, + "step": 7629 + }, + { + "epoch": 0.8658156028368794, + "grad_norm": 34.03765869140625, + "learning_rate": 5e-05, + "loss": 1.3525, + "num_input_tokens_seen": 510368720, + "step": 7630 + }, + { + "epoch": 0.8658156028368794, + "loss": 1.2844877243041992, + "loss_ce": 0.006167395040392876, + "loss_iou": 0.51953125, + "loss_num": 0.048583984375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 510368720, + "step": 7630 + }, + { + "epoch": 0.8659290780141844, + "grad_norm": 20.771926879882812, + "learning_rate": 5e-05, + "loss": 0.9539, + "num_input_tokens_seen": 510435824, + "step": 7631 + }, + { + "epoch": 0.8659290780141844, + "loss": 0.988645076751709, + "loss_ce": 0.0032935321796685457, + "loss_iou": 0.408203125, + "loss_num": 0.033935546875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 510435824, + "step": 7631 + }, + { + "epoch": 0.8660425531914894, + "grad_norm": 28.251970291137695, + "learning_rate": 5e-05, + "loss": 1.2915, + "num_input_tokens_seen": 510500952, + "step": 7632 + }, + { + "epoch": 0.8660425531914894, + "loss": 1.3562684059143066, + "loss_ce": 0.007147292606532574, + "loss_iou": 0.51171875, + "loss_num": 0.064453125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 510500952, + "step": 7632 + }, + { + "epoch": 0.8661560283687944, + "grad_norm": 177.54986572265625, + "learning_rate": 5e-05, + "loss": 1.0885, + "num_input_tokens_seen": 510567920, + "step": 7633 + }, + { + "epoch": 0.8661560283687944, + "loss": 1.0619659423828125, + "loss_ce": 0.005218476988375187, + "loss_iou": 0.43359375, + "loss_num": 0.0380859375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 510567920, + "step": 7633 + }, + { + "epoch": 0.8662695035460993, + "grad_norm": 29.439176559448242, + "learning_rate": 5e-05, + "loss": 1.2089, + "num_input_tokens_seen": 510635180, + "step": 7634 + }, + { + "epoch": 0.8662695035460993, + "loss": 1.2268595695495605, + "loss_ce": 0.005179958883672953, + "loss_iou": 0.48046875, + "loss_num": 0.05224609375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 510635180, + "step": 7634 + }, + { + "epoch": 0.8663829787234043, + "grad_norm": 49.19693374633789, + "learning_rate": 5e-05, + "loss": 1.3507, + "num_input_tokens_seen": 510702976, + "step": 7635 + }, + { + "epoch": 0.8663829787234043, + "loss": 1.2689321041107178, + "loss_ce": 0.005748558323830366, + "loss_iou": 0.52734375, + "loss_num": 0.041259765625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 510702976, + "step": 7635 + }, + { + "epoch": 0.8664964539007092, + "grad_norm": 26.579078674316406, + "learning_rate": 5e-05, + "loss": 1.3921, + "num_input_tokens_seen": 510770100, + "step": 7636 + }, + { + "epoch": 0.8664964539007092, + "loss": 1.323514699935913, + "loss_ce": 0.009061582386493683, + "loss_iou": 0.5390625, + "loss_num": 0.047119140625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 510770100, + "step": 7636 + }, + { + "epoch": 0.8666099290780142, + "grad_norm": 27.712337493896484, + "learning_rate": 5e-05, + "loss": 1.1053, + "num_input_tokens_seen": 510837208, + "step": 7637 + }, + { + "epoch": 0.8666099290780142, + "loss": 1.2039506435394287, + "loss_ce": 0.005220118444412947, + "loss_iou": 0.458984375, + "loss_num": 0.055908203125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 510837208, + "step": 7637 + }, + { + "epoch": 0.8667234042553191, + "grad_norm": 41.14185333251953, + "learning_rate": 5e-05, + "loss": 1.201, + "num_input_tokens_seen": 510904880, + "step": 7638 + }, + { + "epoch": 0.8667234042553191, + "loss": 1.259751558303833, + "loss_ce": 0.009263194166123867, + "loss_iou": 0.5234375, + "loss_num": 0.04150390625, + "loss_xval": 1.25, + "num_input_tokens_seen": 510904880, + "step": 7638 + }, + { + "epoch": 0.8668368794326241, + "grad_norm": 157.69557189941406, + "learning_rate": 5e-05, + "loss": 1.2559, + "num_input_tokens_seen": 510971580, + "step": 7639 + }, + { + "epoch": 0.8668368794326241, + "loss": 1.145493984222412, + "loss_ce": 0.002915928140282631, + "loss_iou": 0.462890625, + "loss_num": 0.04345703125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 510971580, + "step": 7639 + }, + { + "epoch": 0.8669503546099291, + "grad_norm": 17.424779891967773, + "learning_rate": 5e-05, + "loss": 0.9315, + "num_input_tokens_seen": 511038804, + "step": 7640 + }, + { + "epoch": 0.8669503546099291, + "loss": 0.8864501118659973, + "loss_ce": 0.00949699617922306, + "loss_iou": 0.380859375, + "loss_num": 0.02294921875, + "loss_xval": 0.875, + "num_input_tokens_seen": 511038804, + "step": 7640 + }, + { + "epoch": 0.8670638297872341, + "grad_norm": 29.504844665527344, + "learning_rate": 5e-05, + "loss": 1.0483, + "num_input_tokens_seen": 511105972, + "step": 7641 + }, + { + "epoch": 0.8670638297872341, + "loss": 1.225663661956787, + "loss_ce": 0.0044722589664161205, + "loss_iou": 0.50390625, + "loss_num": 0.04248046875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 511105972, + "step": 7641 + }, + { + "epoch": 0.867177304964539, + "grad_norm": 42.15966033935547, + "learning_rate": 5e-05, + "loss": 1.1114, + "num_input_tokens_seen": 511173404, + "step": 7642 + }, + { + "epoch": 0.867177304964539, + "loss": 0.9699837565422058, + "loss_ce": 0.006116616539657116, + "loss_iou": 0.416015625, + "loss_num": 0.0264892578125, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 511173404, + "step": 7642 + }, + { + "epoch": 0.867290780141844, + "grad_norm": 41.492618560791016, + "learning_rate": 5e-05, + "loss": 1.3037, + "num_input_tokens_seen": 511240692, + "step": 7643 + }, + { + "epoch": 0.867290780141844, + "loss": 1.2984058856964111, + "loss_ce": 0.00494881346821785, + "loss_iou": 0.55078125, + "loss_num": 0.0390625, + "loss_xval": 1.296875, + "num_input_tokens_seen": 511240692, + "step": 7643 + }, + { + "epoch": 0.8674042553191489, + "grad_norm": 21.696413040161133, + "learning_rate": 5e-05, + "loss": 1.1106, + "num_input_tokens_seen": 511308024, + "step": 7644 + }, + { + "epoch": 0.8674042553191489, + "loss": 1.0598108768463135, + "loss_ce": 0.003902652533724904, + "loss_iou": 0.4375, + "loss_num": 0.0361328125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 511308024, + "step": 7644 + }, + { + "epoch": 0.8675177304964539, + "grad_norm": 32.517547607421875, + "learning_rate": 5e-05, + "loss": 1.1989, + "num_input_tokens_seen": 511375336, + "step": 7645 + }, + { + "epoch": 0.8675177304964539, + "loss": 1.361860990524292, + "loss_ce": 0.008833641186356544, + "loss_iou": 0.53125, + "loss_num": 0.05810546875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 511375336, + "step": 7645 + }, + { + "epoch": 0.8676312056737588, + "grad_norm": 36.620262145996094, + "learning_rate": 5e-05, + "loss": 1.1317, + "num_input_tokens_seen": 511442528, + "step": 7646 + }, + { + "epoch": 0.8676312056737588, + "loss": 1.0853679180145264, + "loss_ce": 0.00406911363825202, + "loss_iou": 0.46875, + "loss_num": 0.02880859375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 511442528, + "step": 7646 + }, + { + "epoch": 0.8677446808510638, + "grad_norm": 33.57526397705078, + "learning_rate": 5e-05, + "loss": 1.2246, + "num_input_tokens_seen": 511510024, + "step": 7647 + }, + { + "epoch": 0.8677446808510638, + "loss": 1.4209126234054565, + "loss_ce": 0.007826649583876133, + "loss_iou": 0.57421875, + "loss_num": 0.053955078125, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 511510024, + "step": 7647 + }, + { + "epoch": 0.8678581560283688, + "grad_norm": 33.58757019042969, + "learning_rate": 5e-05, + "loss": 1.132, + "num_input_tokens_seen": 511577104, + "step": 7648 + }, + { + "epoch": 0.8678581560283688, + "loss": 1.2376632690429688, + "loss_ce": 0.006217909045517445, + "loss_iou": 0.5234375, + "loss_num": 0.036376953125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 511577104, + "step": 7648 + }, + { + "epoch": 0.8679716312056738, + "grad_norm": 33.834720611572266, + "learning_rate": 5e-05, + "loss": 1.1508, + "num_input_tokens_seen": 511644664, + "step": 7649 + }, + { + "epoch": 0.8679716312056738, + "loss": 1.1726454496383667, + "loss_ce": 0.006629792042076588, + "loss_iou": 0.490234375, + "loss_num": 0.037353515625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 511644664, + "step": 7649 + }, + { + "epoch": 0.8680851063829788, + "grad_norm": 21.46348762512207, + "learning_rate": 5e-05, + "loss": 1.0838, + "num_input_tokens_seen": 511711924, + "step": 7650 + }, + { + "epoch": 0.8680851063829788, + "loss": 1.3499493598937988, + "loss_ce": 0.008640836924314499, + "loss_iou": 0.51171875, + "loss_num": 0.06396484375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 511711924, + "step": 7650 + }, + { + "epoch": 0.8681985815602837, + "grad_norm": 25.30902862548828, + "learning_rate": 5e-05, + "loss": 1.2459, + "num_input_tokens_seen": 511778816, + "step": 7651 + }, + { + "epoch": 0.8681985815602837, + "loss": 1.2619589567184448, + "loss_ce": 0.009029246866703033, + "loss_iou": 0.490234375, + "loss_num": 0.0546875, + "loss_xval": 1.25, + "num_input_tokens_seen": 511778816, + "step": 7651 + }, + { + "epoch": 0.8683120567375886, + "grad_norm": 33.32905960083008, + "learning_rate": 5e-05, + "loss": 1.0324, + "num_input_tokens_seen": 511844840, + "step": 7652 + }, + { + "epoch": 0.8683120567375886, + "loss": 0.9819490909576416, + "loss_ce": 0.007461846340447664, + "loss_iou": 0.39453125, + "loss_num": 0.036865234375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 511844840, + "step": 7652 + }, + { + "epoch": 0.8684255319148936, + "grad_norm": 38.02830123901367, + "learning_rate": 5e-05, + "loss": 1.2048, + "num_input_tokens_seen": 511911724, + "step": 7653 + }, + { + "epoch": 0.8684255319148936, + "loss": 1.3891563415527344, + "loss_ce": 0.007320375181734562, + "loss_iou": 0.546875, + "loss_num": 0.05810546875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 511911724, + "step": 7653 + }, + { + "epoch": 0.8685390070921986, + "grad_norm": 31.907608032226562, + "learning_rate": 5e-05, + "loss": 1.1366, + "num_input_tokens_seen": 511978760, + "step": 7654 + }, + { + "epoch": 0.8685390070921986, + "loss": 1.0580646991729736, + "loss_ce": 0.0019123689271509647, + "loss_iou": 0.458984375, + "loss_num": 0.0279541015625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 511978760, + "step": 7654 + }, + { + "epoch": 0.8686524822695035, + "grad_norm": 25.099884033203125, + "learning_rate": 5e-05, + "loss": 1.0717, + "num_input_tokens_seen": 512045880, + "step": 7655 + }, + { + "epoch": 0.8686524822695035, + "loss": 0.9573158025741577, + "loss_ce": 0.008341273292899132, + "loss_iou": 0.40625, + "loss_num": 0.02734375, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 512045880, + "step": 7655 + }, + { + "epoch": 0.8687659574468085, + "grad_norm": 22.399118423461914, + "learning_rate": 5e-05, + "loss": 1.043, + "num_input_tokens_seen": 512112676, + "step": 7656 + }, + { + "epoch": 0.8687659574468085, + "loss": 1.1833879947662354, + "loss_ce": 0.003212274983525276, + "loss_iou": 0.44140625, + "loss_num": 0.0595703125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 512112676, + "step": 7656 + }, + { + "epoch": 0.8688794326241135, + "grad_norm": 25.643041610717773, + "learning_rate": 5e-05, + "loss": 0.9762, + "num_input_tokens_seen": 512178508, + "step": 7657 + }, + { + "epoch": 0.8688794326241135, + "loss": 1.0531458854675293, + "loss_ce": 0.009200586006045341, + "loss_iou": 0.41015625, + "loss_num": 0.045166015625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 512178508, + "step": 7657 + }, + { + "epoch": 0.8689929078014185, + "grad_norm": 32.220008850097656, + "learning_rate": 5e-05, + "loss": 1.1156, + "num_input_tokens_seen": 512244032, + "step": 7658 + }, + { + "epoch": 0.8689929078014185, + "loss": 1.2495691776275635, + "loss_ce": 0.011776203289628029, + "loss_iou": 0.451171875, + "loss_num": 0.0673828125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 512244032, + "step": 7658 + }, + { + "epoch": 0.8691063829787234, + "grad_norm": 35.37873458862305, + "learning_rate": 5e-05, + "loss": 1.0145, + "num_input_tokens_seen": 512311576, + "step": 7659 + }, + { + "epoch": 0.8691063829787234, + "loss": 1.117624282836914, + "loss_ce": 0.0058079371228814125, + "loss_iou": 0.4765625, + "loss_num": 0.03173828125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 512311576, + "step": 7659 + }, + { + "epoch": 0.8692198581560284, + "grad_norm": 32.83293914794922, + "learning_rate": 5e-05, + "loss": 1.0851, + "num_input_tokens_seen": 512377472, + "step": 7660 + }, + { + "epoch": 0.8692198581560284, + "loss": 0.9591670632362366, + "loss_ce": 0.0065303402952849865, + "loss_iou": 0.40234375, + "loss_num": 0.029541015625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 512377472, + "step": 7660 + }, + { + "epoch": 0.8693333333333333, + "grad_norm": 25.163204193115234, + "learning_rate": 5e-05, + "loss": 1.3074, + "num_input_tokens_seen": 512442728, + "step": 7661 + }, + { + "epoch": 0.8693333333333333, + "loss": 1.5049822330474854, + "loss_ce": 0.003029159503057599, + "loss_iou": 0.578125, + "loss_num": 0.068359375, + "loss_xval": 1.5, + "num_input_tokens_seen": 512442728, + "step": 7661 + }, + { + "epoch": 0.8694468085106383, + "grad_norm": 21.042293548583984, + "learning_rate": 5e-05, + "loss": 1.1521, + "num_input_tokens_seen": 512509728, + "step": 7662 + }, + { + "epoch": 0.8694468085106383, + "loss": 1.078903317451477, + "loss_ce": 0.00639351038262248, + "loss_iou": 0.419921875, + "loss_num": 0.046142578125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 512509728, + "step": 7662 + }, + { + "epoch": 0.8695602836879432, + "grad_norm": 28.51872444152832, + "learning_rate": 5e-05, + "loss": 1.0308, + "num_input_tokens_seen": 512576392, + "step": 7663 + }, + { + "epoch": 0.8695602836879432, + "loss": 1.0403456687927246, + "loss_ce": 0.0066542429849505424, + "loss_iou": 0.4375, + "loss_num": 0.031982421875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 512576392, + "step": 7663 + }, + { + "epoch": 0.8696737588652482, + "grad_norm": 33.88445281982422, + "learning_rate": 5e-05, + "loss": 1.1484, + "num_input_tokens_seen": 512643536, + "step": 7664 + }, + { + "epoch": 0.8696737588652482, + "loss": 1.1632704734802246, + "loss_ce": 0.007752934005111456, + "loss_iou": 0.486328125, + "loss_num": 0.036865234375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 512643536, + "step": 7664 + }, + { + "epoch": 0.8697872340425532, + "grad_norm": 37.98988723754883, + "learning_rate": 5e-05, + "loss": 1.2734, + "num_input_tokens_seen": 512710756, + "step": 7665 + }, + { + "epoch": 0.8697872340425532, + "loss": 1.2779004573822021, + "loss_ce": 0.003974658902734518, + "loss_iou": 0.51953125, + "loss_num": 0.046630859375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 512710756, + "step": 7665 + }, + { + "epoch": 0.8699007092198582, + "grad_norm": 30.803043365478516, + "learning_rate": 5e-05, + "loss": 1.3593, + "num_input_tokens_seen": 512777900, + "step": 7666 + }, + { + "epoch": 0.8699007092198582, + "loss": 1.3313167095184326, + "loss_ce": 0.002703445265069604, + "loss_iou": 0.5546875, + "loss_num": 0.04296875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 512777900, + "step": 7666 + }, + { + "epoch": 0.8700141843971632, + "grad_norm": 23.385530471801758, + "learning_rate": 5e-05, + "loss": 1.2178, + "num_input_tokens_seen": 512844804, + "step": 7667 + }, + { + "epoch": 0.8700141843971632, + "loss": 1.3623254299163818, + "loss_ce": 0.007833216339349747, + "loss_iou": 0.54296875, + "loss_num": 0.053466796875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 512844804, + "step": 7667 + }, + { + "epoch": 0.8701276595744681, + "grad_norm": 30.516826629638672, + "learning_rate": 5e-05, + "loss": 1.1441, + "num_input_tokens_seen": 512911864, + "step": 7668 + }, + { + "epoch": 0.8701276595744681, + "loss": 1.1866097450256348, + "loss_ce": 0.003992592915892601, + "loss_iou": 0.498046875, + "loss_num": 0.037353515625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 512911864, + "step": 7668 + }, + { + "epoch": 0.870241134751773, + "grad_norm": 29.983762741088867, + "learning_rate": 5e-05, + "loss": 1.0925, + "num_input_tokens_seen": 512979724, + "step": 7669 + }, + { + "epoch": 0.870241134751773, + "loss": 1.0942639112472534, + "loss_ce": 0.004420152865350246, + "loss_iou": 0.4453125, + "loss_num": 0.039794921875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 512979724, + "step": 7669 + }, + { + "epoch": 0.870354609929078, + "grad_norm": 38.537113189697266, + "learning_rate": 5e-05, + "loss": 1.1237, + "num_input_tokens_seen": 513045688, + "step": 7670 + }, + { + "epoch": 0.870354609929078, + "loss": 1.0413882732391357, + "loss_ce": 0.007452820427715778, + "loss_iou": 0.419921875, + "loss_num": 0.0390625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 513045688, + "step": 7670 + }, + { + "epoch": 0.870468085106383, + "grad_norm": 31.694461822509766, + "learning_rate": 5e-05, + "loss": 1.1953, + "num_input_tokens_seen": 513112596, + "step": 7671 + }, + { + "epoch": 0.870468085106383, + "loss": 1.0733869075775146, + "loss_ce": 0.009422006085515022, + "loss_iou": 0.4296875, + "loss_num": 0.041015625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 513112596, + "step": 7671 + }, + { + "epoch": 0.8705815602836879, + "grad_norm": 37.06765365600586, + "learning_rate": 5e-05, + "loss": 1.2196, + "num_input_tokens_seen": 513180124, + "step": 7672 + }, + { + "epoch": 0.8705815602836879, + "loss": 1.2622196674346924, + "loss_ce": 0.006360335741192102, + "loss_iou": 0.52734375, + "loss_num": 0.039794921875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 513180124, + "step": 7672 + }, + { + "epoch": 0.8706950354609929, + "grad_norm": 26.47205924987793, + "learning_rate": 5e-05, + "loss": 1.0876, + "num_input_tokens_seen": 513246996, + "step": 7673 + }, + { + "epoch": 0.8706950354609929, + "loss": 1.0933207273483276, + "loss_ce": 0.00616262573748827, + "loss_iou": 0.427734375, + "loss_num": 0.04638671875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 513246996, + "step": 7673 + }, + { + "epoch": 0.8708085106382979, + "grad_norm": 44.97612380981445, + "learning_rate": 5e-05, + "loss": 1.2293, + "num_input_tokens_seen": 513313872, + "step": 7674 + }, + { + "epoch": 0.8708085106382979, + "loss": 1.2554868459701538, + "loss_ce": 0.007440014276653528, + "loss_iou": 0.490234375, + "loss_num": 0.053466796875, + "loss_xval": 1.25, + "num_input_tokens_seen": 513313872, + "step": 7674 + }, + { + "epoch": 0.8709219858156029, + "grad_norm": 40.43269729614258, + "learning_rate": 5e-05, + "loss": 1.3077, + "num_input_tokens_seen": 513381480, + "step": 7675 + }, + { + "epoch": 0.8709219858156029, + "loss": 1.1905566453933716, + "loss_ce": 0.003056551329791546, + "loss_iou": 0.478515625, + "loss_num": 0.0458984375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 513381480, + "step": 7675 + }, + { + "epoch": 0.8710354609929079, + "grad_norm": 22.40824317932129, + "learning_rate": 5e-05, + "loss": 1.2382, + "num_input_tokens_seen": 513448548, + "step": 7676 + }, + { + "epoch": 0.8710354609929079, + "loss": 1.2347084283828735, + "loss_ce": 0.007169337943196297, + "loss_iou": 0.5078125, + "loss_num": 0.042724609375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 513448548, + "step": 7676 + }, + { + "epoch": 0.8711489361702127, + "grad_norm": 15.55111312866211, + "learning_rate": 5e-05, + "loss": 1.0034, + "num_input_tokens_seen": 513514840, + "step": 7677 + }, + { + "epoch": 0.8711489361702127, + "loss": 1.0651627779006958, + "loss_ce": 0.00608073640614748, + "loss_iou": 0.400390625, + "loss_num": 0.0517578125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 513514840, + "step": 7677 + }, + { + "epoch": 0.8712624113475177, + "grad_norm": 27.542964935302734, + "learning_rate": 5e-05, + "loss": 0.9827, + "num_input_tokens_seen": 513580860, + "step": 7678 + }, + { + "epoch": 0.8712624113475177, + "loss": 0.85213702917099, + "loss_ce": 0.003992507699877024, + "loss_iou": 0.37109375, + "loss_num": 0.02099609375, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 513580860, + "step": 7678 + }, + { + "epoch": 0.8713758865248227, + "grad_norm": 15.207019805908203, + "learning_rate": 5e-05, + "loss": 1.0224, + "num_input_tokens_seen": 513647684, + "step": 7679 + }, + { + "epoch": 0.8713758865248227, + "loss": 1.005120038986206, + "loss_ce": 0.00975867360830307, + "loss_iou": 0.41015625, + "loss_num": 0.035400390625, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 513647684, + "step": 7679 + }, + { + "epoch": 0.8714893617021277, + "grad_norm": 26.635473251342773, + "learning_rate": 5e-05, + "loss": 0.9388, + "num_input_tokens_seen": 513715172, + "step": 7680 + }, + { + "epoch": 0.8714893617021277, + "loss": 1.0503108501434326, + "loss_ce": 0.006853800266981125, + "loss_iou": 0.435546875, + "loss_num": 0.034423828125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 513715172, + "step": 7680 + }, + { + "epoch": 0.8716028368794326, + "grad_norm": 18.617647171020508, + "learning_rate": 5e-05, + "loss": 1.0422, + "num_input_tokens_seen": 513781400, + "step": 7681 + }, + { + "epoch": 0.8716028368794326, + "loss": 1.1171164512634277, + "loss_ce": 0.004811800550669432, + "loss_iou": 0.484375, + "loss_num": 0.028076171875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 513781400, + "step": 7681 + }, + { + "epoch": 0.8717163120567376, + "grad_norm": 21.27891731262207, + "learning_rate": 5e-05, + "loss": 1.1911, + "num_input_tokens_seen": 513848656, + "step": 7682 + }, + { + "epoch": 0.8717163120567376, + "loss": 1.3987982273101807, + "loss_ce": 0.005243632011115551, + "loss_iou": 0.53125, + "loss_num": 0.06689453125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 513848656, + "step": 7682 + }, + { + "epoch": 0.8718297872340426, + "grad_norm": 18.266294479370117, + "learning_rate": 5e-05, + "loss": 1.1071, + "num_input_tokens_seen": 513915452, + "step": 7683 + }, + { + "epoch": 0.8718297872340426, + "loss": 1.1260740756988525, + "loss_ce": 0.006201016716659069, + "loss_iou": 0.43359375, + "loss_num": 0.05078125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 513915452, + "step": 7683 + }, + { + "epoch": 0.8719432624113476, + "grad_norm": 19.488943099975586, + "learning_rate": 5e-05, + "loss": 1.0614, + "num_input_tokens_seen": 513982232, + "step": 7684 + }, + { + "epoch": 0.8719432624113476, + "loss": 1.0342566967010498, + "loss_ce": 0.006424674764275551, + "loss_iou": 0.41015625, + "loss_num": 0.041259765625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 513982232, + "step": 7684 + }, + { + "epoch": 0.8720567375886524, + "grad_norm": 35.925331115722656, + "learning_rate": 5e-05, + "loss": 1.1987, + "num_input_tokens_seen": 514049500, + "step": 7685 + }, + { + "epoch": 0.8720567375886524, + "loss": 1.24336576461792, + "loss_ce": 0.006061085499823093, + "loss_iou": 0.50390625, + "loss_num": 0.045166015625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 514049500, + "step": 7685 + }, + { + "epoch": 0.8721702127659574, + "grad_norm": 39.75484085083008, + "learning_rate": 5e-05, + "loss": 1.0768, + "num_input_tokens_seen": 514115656, + "step": 7686 + }, + { + "epoch": 0.8721702127659574, + "loss": 1.0037381649017334, + "loss_ce": 0.008132611401379108, + "loss_iou": 0.443359375, + "loss_num": 0.02197265625, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 514115656, + "step": 7686 + }, + { + "epoch": 0.8722836879432624, + "grad_norm": 39.13368225097656, + "learning_rate": 5e-05, + "loss": 1.3121, + "num_input_tokens_seen": 514182960, + "step": 7687 + }, + { + "epoch": 0.8722836879432624, + "loss": 1.3831582069396973, + "loss_ce": 0.010111341252923012, + "loss_iou": 0.60546875, + "loss_num": 0.03271484375, + "loss_xval": 1.375, + "num_input_tokens_seen": 514182960, + "step": 7687 + }, + { + "epoch": 0.8723971631205674, + "grad_norm": 18.846023559570312, + "learning_rate": 5e-05, + "loss": 1.0638, + "num_input_tokens_seen": 514250176, + "step": 7688 + }, + { + "epoch": 0.8723971631205674, + "loss": 1.0799604654312134, + "loss_ce": 0.007206554990261793, + "loss_iou": 0.439453125, + "loss_num": 0.038818359375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 514250176, + "step": 7688 + }, + { + "epoch": 0.8725106382978723, + "grad_norm": 25.922222137451172, + "learning_rate": 5e-05, + "loss": 1.2784, + "num_input_tokens_seen": 514317096, + "step": 7689 + }, + { + "epoch": 0.8725106382978723, + "loss": 1.1129343509674072, + "loss_ce": 0.006000735331326723, + "loss_iou": 0.47265625, + "loss_num": 0.032470703125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 514317096, + "step": 7689 + }, + { + "epoch": 0.8726241134751773, + "grad_norm": 33.711265563964844, + "learning_rate": 5e-05, + "loss": 1.1926, + "num_input_tokens_seen": 514383376, + "step": 7690 + }, + { + "epoch": 0.8726241134751773, + "loss": 1.280335783958435, + "loss_ce": 0.0061658890917897224, + "loss_iou": 0.494140625, + "loss_num": 0.056884765625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 514383376, + "step": 7690 + }, + { + "epoch": 0.8727375886524823, + "grad_norm": 34.78956985473633, + "learning_rate": 5e-05, + "loss": 1.0259, + "num_input_tokens_seen": 514450340, + "step": 7691 + }, + { + "epoch": 0.8727375886524823, + "loss": 1.2363159656524658, + "loss_ce": 0.004382332321256399, + "loss_iou": 0.466796875, + "loss_num": 0.059814453125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 514450340, + "step": 7691 + }, + { + "epoch": 0.8728510638297873, + "grad_norm": 33.29838943481445, + "learning_rate": 5e-05, + "loss": 1.1921, + "num_input_tokens_seen": 514516208, + "step": 7692 + }, + { + "epoch": 0.8728510638297873, + "loss": 1.2643406391143799, + "loss_ce": 0.00652819499373436, + "loss_iou": 0.5078125, + "loss_num": 0.04833984375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 514516208, + "step": 7692 + }, + { + "epoch": 0.8729645390070921, + "grad_norm": 29.210460662841797, + "learning_rate": 5e-05, + "loss": 1.2691, + "num_input_tokens_seen": 514582724, + "step": 7693 + }, + { + "epoch": 0.8729645390070921, + "loss": 1.2333518266677856, + "loss_ce": 0.008254170417785645, + "loss_iou": 0.47265625, + "loss_num": 0.05615234375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 514582724, + "step": 7693 + }, + { + "epoch": 0.8730780141843971, + "grad_norm": 31.408926010131836, + "learning_rate": 5e-05, + "loss": 1.2128, + "num_input_tokens_seen": 514649680, + "step": 7694 + }, + { + "epoch": 0.8730780141843971, + "loss": 1.0397917032241821, + "loss_ce": 0.0061003477312624454, + "loss_iou": 0.416015625, + "loss_num": 0.040771484375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 514649680, + "step": 7694 + }, + { + "epoch": 0.8731914893617021, + "grad_norm": 35.12500762939453, + "learning_rate": 5e-05, + "loss": 1.3134, + "num_input_tokens_seen": 514717120, + "step": 7695 + }, + { + "epoch": 0.8731914893617021, + "loss": 1.1940417289733887, + "loss_ce": 0.012889388017356396, + "loss_iou": 0.4765625, + "loss_num": 0.0458984375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 514717120, + "step": 7695 + }, + { + "epoch": 0.8733049645390071, + "grad_norm": 27.983657836914062, + "learning_rate": 5e-05, + "loss": 1.0592, + "num_input_tokens_seen": 514784124, + "step": 7696 + }, + { + "epoch": 0.8733049645390071, + "loss": 1.166804313659668, + "loss_ce": 0.009577752090990543, + "loss_iou": 0.4609375, + "loss_num": 0.047119140625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 514784124, + "step": 7696 + }, + { + "epoch": 0.8734184397163121, + "grad_norm": 19.010845184326172, + "learning_rate": 5e-05, + "loss": 1.0165, + "num_input_tokens_seen": 514851592, + "step": 7697 + }, + { + "epoch": 0.8734184397163121, + "loss": 0.9105287790298462, + "loss_ce": 0.005255335476249456, + "loss_iou": 0.3515625, + "loss_num": 0.040283203125, + "loss_xval": 0.90625, + "num_input_tokens_seen": 514851592, + "step": 7697 + }, + { + "epoch": 0.873531914893617, + "grad_norm": 17.137861251831055, + "learning_rate": 5e-05, + "loss": 1.1503, + "num_input_tokens_seen": 514918072, + "step": 7698 + }, + { + "epoch": 0.873531914893617, + "loss": 1.1050806045532227, + "loss_ce": 0.006936036050319672, + "loss_iou": 0.44140625, + "loss_num": 0.042724609375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 514918072, + "step": 7698 + }, + { + "epoch": 0.873645390070922, + "grad_norm": 29.163047790527344, + "learning_rate": 5e-05, + "loss": 1.0092, + "num_input_tokens_seen": 514985252, + "step": 7699 + }, + { + "epoch": 0.873645390070922, + "loss": 1.0357542037963867, + "loss_ce": 0.006945624947547913, + "loss_iou": 0.4453125, + "loss_num": 0.027099609375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 514985252, + "step": 7699 + }, + { + "epoch": 0.873758865248227, + "grad_norm": 30.789058685302734, + "learning_rate": 5e-05, + "loss": 1.2461, + "num_input_tokens_seen": 515051936, + "step": 7700 + }, + { + "epoch": 0.873758865248227, + "loss": 1.1677632331848145, + "loss_ce": 0.0051655531860888, + "loss_iou": 0.451171875, + "loss_num": 0.05224609375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 515051936, + "step": 7700 + }, + { + "epoch": 0.873872340425532, + "grad_norm": 40.63864517211914, + "learning_rate": 5e-05, + "loss": 1.1495, + "num_input_tokens_seen": 515119276, + "step": 7701 + }, + { + "epoch": 0.873872340425532, + "loss": 1.0656489133834839, + "loss_ce": 0.006078613456338644, + "loss_iou": 0.44921875, + "loss_num": 0.031494140625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 515119276, + "step": 7701 + }, + { + "epoch": 0.8739858156028368, + "grad_norm": 36.12200164794922, + "learning_rate": 5e-05, + "loss": 1.1842, + "num_input_tokens_seen": 515185812, + "step": 7702 + }, + { + "epoch": 0.8739858156028368, + "loss": 1.2776763439178467, + "loss_ce": 0.006191965192556381, + "loss_iou": 0.546875, + "loss_num": 0.03515625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 515185812, + "step": 7702 + }, + { + "epoch": 0.8740992907801418, + "grad_norm": 39.69415283203125, + "learning_rate": 5e-05, + "loss": 1.2163, + "num_input_tokens_seen": 515252124, + "step": 7703 + }, + { + "epoch": 0.8740992907801418, + "loss": 1.2621867656707764, + "loss_ce": 0.005594968795776367, + "loss_iou": 0.490234375, + "loss_num": 0.05517578125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 515252124, + "step": 7703 + }, + { + "epoch": 0.8742127659574468, + "grad_norm": 32.88632583618164, + "learning_rate": 5e-05, + "loss": 1.2377, + "num_input_tokens_seen": 515316960, + "step": 7704 + }, + { + "epoch": 0.8742127659574468, + "loss": 1.3553106784820557, + "loss_ce": 0.007654513698071241, + "loss_iou": 0.5390625, + "loss_num": 0.053466796875, + "loss_xval": 1.34375, + "num_input_tokens_seen": 515316960, + "step": 7704 + }, + { + "epoch": 0.8743262411347518, + "grad_norm": 21.01688003540039, + "learning_rate": 5e-05, + "loss": 1.0418, + "num_input_tokens_seen": 515383504, + "step": 7705 + }, + { + "epoch": 0.8743262411347518, + "loss": 1.1768633127212524, + "loss_ce": 0.005476531572639942, + "loss_iou": 0.47265625, + "loss_num": 0.045654296875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 515383504, + "step": 7705 + }, + { + "epoch": 0.8744397163120567, + "grad_norm": 30.276472091674805, + "learning_rate": 5e-05, + "loss": 1.1776, + "num_input_tokens_seen": 515450940, + "step": 7706 + }, + { + "epoch": 0.8744397163120567, + "loss": 0.98784339427948, + "loss_ce": 0.003956642001867294, + "loss_iou": 0.41015625, + "loss_num": 0.033203125, + "loss_xval": 0.984375, + "num_input_tokens_seen": 515450940, + "step": 7706 + }, + { + "epoch": 0.8745531914893617, + "grad_norm": 30.549922943115234, + "learning_rate": 5e-05, + "loss": 1.2618, + "num_input_tokens_seen": 515518756, + "step": 7707 + }, + { + "epoch": 0.8745531914893617, + "loss": 1.4696872234344482, + "loss_ce": 0.008749851956963539, + "loss_iou": 0.6015625, + "loss_num": 0.05126953125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 515518756, + "step": 7707 + }, + { + "epoch": 0.8746666666666667, + "grad_norm": 32.16357421875, + "learning_rate": 5e-05, + "loss": 1.1427, + "num_input_tokens_seen": 515584916, + "step": 7708 + }, + { + "epoch": 0.8746666666666667, + "loss": 1.1748517751693726, + "loss_ce": 0.0054181404411792755, + "loss_iou": 0.46875, + "loss_num": 0.04638671875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 515584916, + "step": 7708 + }, + { + "epoch": 0.8747801418439717, + "grad_norm": 35.91350555419922, + "learning_rate": 5e-05, + "loss": 1.1048, + "num_input_tokens_seen": 515651980, + "step": 7709 + }, + { + "epoch": 0.8747801418439717, + "loss": 1.2237064838409424, + "loss_ce": 0.005444777198135853, + "loss_iou": 0.49609375, + "loss_num": 0.045166015625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 515651980, + "step": 7709 + }, + { + "epoch": 0.8748936170212765, + "grad_norm": 34.001060485839844, + "learning_rate": 5e-05, + "loss": 1.0325, + "num_input_tokens_seen": 515719196, + "step": 7710 + }, + { + "epoch": 0.8748936170212765, + "loss": 1.138267993927002, + "loss_ce": 0.003990605473518372, + "loss_iou": 0.482421875, + "loss_num": 0.033447265625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 515719196, + "step": 7710 + }, + { + "epoch": 0.8750070921985815, + "grad_norm": 37.65966033935547, + "learning_rate": 5e-05, + "loss": 1.1651, + "num_input_tokens_seen": 515785232, + "step": 7711 + }, + { + "epoch": 0.8750070921985815, + "loss": 0.9656720757484436, + "loss_ce": 0.005863716825842857, + "loss_iou": 0.40625, + "loss_num": 0.0299072265625, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 515785232, + "step": 7711 + }, + { + "epoch": 0.8751205673758865, + "grad_norm": 33.35206604003906, + "learning_rate": 5e-05, + "loss": 1.0684, + "num_input_tokens_seen": 515853016, + "step": 7712 + }, + { + "epoch": 0.8751205673758865, + "loss": 1.0501797199249268, + "loss_ce": 0.006234453991055489, + "loss_iou": 0.4296875, + "loss_num": 0.036865234375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 515853016, + "step": 7712 + }, + { + "epoch": 0.8752340425531915, + "grad_norm": 26.095722198486328, + "learning_rate": 5e-05, + "loss": 1.0267, + "num_input_tokens_seen": 515918940, + "step": 7713 + }, + { + "epoch": 0.8752340425531915, + "loss": 1.0528041124343872, + "loss_ce": 0.004464254714548588, + "loss_iou": 0.451171875, + "loss_num": 0.0291748046875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 515918940, + "step": 7713 + }, + { + "epoch": 0.8753475177304965, + "grad_norm": 47.17416763305664, + "learning_rate": 5e-05, + "loss": 0.9709, + "num_input_tokens_seen": 515985320, + "step": 7714 + }, + { + "epoch": 0.8753475177304965, + "loss": 0.7179620265960693, + "loss_ce": 0.005376579239964485, + "loss_iou": 0.3203125, + "loss_num": 0.0142822265625, + "loss_xval": 0.7109375, + "num_input_tokens_seen": 515985320, + "step": 7714 + }, + { + "epoch": 0.8754609929078014, + "grad_norm": 38.978675842285156, + "learning_rate": 5e-05, + "loss": 1.1686, + "num_input_tokens_seen": 516052300, + "step": 7715 + }, + { + "epoch": 0.8754609929078014, + "loss": 1.223002314567566, + "loss_ce": 0.007181952707469463, + "loss_iou": 0.5, + "loss_num": 0.042236328125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 516052300, + "step": 7715 + }, + { + "epoch": 0.8755744680851064, + "grad_norm": 38.4661750793457, + "learning_rate": 5e-05, + "loss": 1.2034, + "num_input_tokens_seen": 516119168, + "step": 7716 + }, + { + "epoch": 0.8755744680851064, + "loss": 1.3936142921447754, + "loss_ce": 0.00933699868619442, + "loss_iou": 0.5078125, + "loss_num": 0.0732421875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 516119168, + "step": 7716 + }, + { + "epoch": 0.8756879432624114, + "grad_norm": 30.528602600097656, + "learning_rate": 5e-05, + "loss": 1.0332, + "num_input_tokens_seen": 516185732, + "step": 7717 + }, + { + "epoch": 0.8756879432624114, + "loss": 1.2147835493087769, + "loss_ce": 0.01019367203116417, + "loss_iou": 0.48046875, + "loss_num": 0.049072265625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 516185732, + "step": 7717 + }, + { + "epoch": 0.8758014184397163, + "grad_norm": 38.83443069458008, + "learning_rate": 5e-05, + "loss": 1.3103, + "num_input_tokens_seen": 516251360, + "step": 7718 + }, + { + "epoch": 0.8758014184397163, + "loss": 1.2836800813674927, + "loss_ce": 0.006092223338782787, + "loss_iou": 0.53125, + "loss_num": 0.042236328125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 516251360, + "step": 7718 + }, + { + "epoch": 0.8759148936170212, + "grad_norm": 18.982421875, + "learning_rate": 5e-05, + "loss": 1.2081, + "num_input_tokens_seen": 516317720, + "step": 7719 + }, + { + "epoch": 0.8759148936170212, + "loss": 1.1897329092025757, + "loss_ce": 0.007115733344107866, + "loss_iou": 0.515625, + "loss_num": 0.03076171875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 516317720, + "step": 7719 + }, + { + "epoch": 0.8760283687943262, + "grad_norm": 17.595561981201172, + "learning_rate": 5e-05, + "loss": 1.0441, + "num_input_tokens_seen": 516385104, + "step": 7720 + }, + { + "epoch": 0.8760283687943262, + "loss": 1.0729488134384155, + "loss_ce": 0.003124624490737915, + "loss_iou": 0.435546875, + "loss_num": 0.03955078125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 516385104, + "step": 7720 + }, + { + "epoch": 0.8761418439716312, + "grad_norm": 20.996746063232422, + "learning_rate": 5e-05, + "loss": 1.0218, + "num_input_tokens_seen": 516450656, + "step": 7721 + }, + { + "epoch": 0.8761418439716312, + "loss": 0.9893002510070801, + "loss_ce": 0.005962836090475321, + "loss_iou": 0.388671875, + "loss_num": 0.041015625, + "loss_xval": 0.984375, + "num_input_tokens_seen": 516450656, + "step": 7721 + }, + { + "epoch": 0.8762553191489362, + "grad_norm": 34.62242889404297, + "learning_rate": 5e-05, + "loss": 1.1474, + "num_input_tokens_seen": 516518352, + "step": 7722 + }, + { + "epoch": 0.8762553191489362, + "loss": 1.0805069208145142, + "loss_ce": 0.007264770567417145, + "loss_iou": 0.470703125, + "loss_num": 0.0264892578125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 516518352, + "step": 7722 + }, + { + "epoch": 0.8763687943262412, + "grad_norm": 175.86195373535156, + "learning_rate": 5e-05, + "loss": 0.9878, + "num_input_tokens_seen": 516583836, + "step": 7723 + }, + { + "epoch": 0.8763687943262412, + "loss": 0.9092756509780884, + "loss_ce": 0.004978754557669163, + "loss_iou": 0.375, + "loss_num": 0.0306396484375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 516583836, + "step": 7723 + }, + { + "epoch": 0.8764822695035461, + "grad_norm": 27.399934768676758, + "learning_rate": 5e-05, + "loss": 1.0185, + "num_input_tokens_seen": 516650292, + "step": 7724 + }, + { + "epoch": 0.8764822695035461, + "loss": 1.2134721279144287, + "loss_ce": 0.006685024127364159, + "loss_iou": 0.48046875, + "loss_num": 0.0498046875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 516650292, + "step": 7724 + }, + { + "epoch": 0.8765957446808511, + "grad_norm": 15.254218101501465, + "learning_rate": 5e-05, + "loss": 1.4261, + "num_input_tokens_seen": 516716956, + "step": 7725 + }, + { + "epoch": 0.8765957446808511, + "loss": 1.429352045059204, + "loss_ce": 0.009918483905494213, + "loss_iou": 0.5703125, + "loss_num": 0.0556640625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 516716956, + "step": 7725 + }, + { + "epoch": 0.876709219858156, + "grad_norm": 18.175662994384766, + "learning_rate": 5e-05, + "loss": 1.0112, + "num_input_tokens_seen": 516784076, + "step": 7726 + }, + { + "epoch": 0.876709219858156, + "loss": 0.8594558238983154, + "loss_ce": 0.009113969281315804, + "loss_iou": 0.296875, + "loss_num": 0.051025390625, + "loss_xval": 0.8515625, + "num_input_tokens_seen": 516784076, + "step": 7726 + }, + { + "epoch": 0.876822695035461, + "grad_norm": 18.744232177734375, + "learning_rate": 5e-05, + "loss": 1.0646, + "num_input_tokens_seen": 516849624, + "step": 7727 + }, + { + "epoch": 0.876822695035461, + "loss": 1.2869322299957275, + "loss_ce": 0.00958842970430851, + "loss_iou": 0.478515625, + "loss_num": 0.06396484375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 516849624, + "step": 7727 + }, + { + "epoch": 0.8769361702127659, + "grad_norm": 26.483415603637695, + "learning_rate": 5e-05, + "loss": 0.9806, + "num_input_tokens_seen": 516916368, + "step": 7728 + }, + { + "epoch": 0.8769361702127659, + "loss": 0.9186341762542725, + "loss_ce": 0.0035951128229498863, + "loss_iou": 0.373046875, + "loss_num": 0.033935546875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 516916368, + "step": 7728 + }, + { + "epoch": 0.8770496453900709, + "grad_norm": 83.0414810180664, + "learning_rate": 5e-05, + "loss": 0.9602, + "num_input_tokens_seen": 516983380, + "step": 7729 + }, + { + "epoch": 0.8770496453900709, + "loss": 0.9550936222076416, + "loss_ce": 0.006118998397141695, + "loss_iou": 0.40625, + "loss_num": 0.0274658203125, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 516983380, + "step": 7729 + }, + { + "epoch": 0.8771631205673759, + "grad_norm": 34.90255355834961, + "learning_rate": 5e-05, + "loss": 1.0417, + "num_input_tokens_seen": 517050688, + "step": 7730 + }, + { + "epoch": 0.8771631205673759, + "loss": 1.155426263809204, + "loss_ce": 0.006988730281591415, + "loss_iou": 0.451171875, + "loss_num": 0.049072265625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 517050688, + "step": 7730 + }, + { + "epoch": 0.8772765957446809, + "grad_norm": 27.337970733642578, + "learning_rate": 5e-05, + "loss": 1.0773, + "num_input_tokens_seen": 517117932, + "step": 7731 + }, + { + "epoch": 0.8772765957446809, + "loss": 0.9892913103103638, + "loss_ce": 0.005892924033105373, + "loss_iou": 0.408203125, + "loss_num": 0.03369140625, + "loss_xval": 0.984375, + "num_input_tokens_seen": 517117932, + "step": 7731 + }, + { + "epoch": 0.8773900709219858, + "grad_norm": 29.86963653564453, + "learning_rate": 5e-05, + "loss": 1.2352, + "num_input_tokens_seen": 517185268, + "step": 7732 + }, + { + "epoch": 0.8773900709219858, + "loss": 1.0105105638504028, + "loss_ce": 0.006604310125112534, + "loss_iou": 0.40625, + "loss_num": 0.038330078125, + "loss_xval": 1.0, + "num_input_tokens_seen": 517185268, + "step": 7732 + }, + { + "epoch": 0.8775035460992908, + "grad_norm": 32.86005401611328, + "learning_rate": 5e-05, + "loss": 1.2523, + "num_input_tokens_seen": 517251316, + "step": 7733 + }, + { + "epoch": 0.8775035460992908, + "loss": 1.494234323501587, + "loss_ce": 0.007906241342425346, + "loss_iou": 0.58984375, + "loss_num": 0.0615234375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 517251316, + "step": 7733 + }, + { + "epoch": 0.8776170212765958, + "grad_norm": 32.179588317871094, + "learning_rate": 5e-05, + "loss": 1.0723, + "num_input_tokens_seen": 517316968, + "step": 7734 + }, + { + "epoch": 0.8776170212765958, + "loss": 1.1784415245056152, + "loss_ce": 0.006810740567743778, + "loss_iou": 0.439453125, + "loss_num": 0.05859375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 517316968, + "step": 7734 + }, + { + "epoch": 0.8777304964539007, + "grad_norm": 25.012954711914062, + "learning_rate": 5e-05, + "loss": 1.1203, + "num_input_tokens_seen": 517383460, + "step": 7735 + }, + { + "epoch": 0.8777304964539007, + "loss": 0.9707211256027222, + "loss_ce": 0.005694234743714333, + "loss_iou": 0.294921875, + "loss_num": 0.0751953125, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 517383460, + "step": 7735 + }, + { + "epoch": 0.8778439716312056, + "grad_norm": 23.257341384887695, + "learning_rate": 5e-05, + "loss": 0.997, + "num_input_tokens_seen": 517451028, + "step": 7736 + }, + { + "epoch": 0.8778439716312056, + "loss": 1.0049601793289185, + "loss_ce": 0.006425061263144016, + "loss_iou": 0.41015625, + "loss_num": 0.03564453125, + "loss_xval": 1.0, + "num_input_tokens_seen": 517451028, + "step": 7736 + }, + { + "epoch": 0.8779574468085106, + "grad_norm": 32.080223083496094, + "learning_rate": 5e-05, + "loss": 1.4047, + "num_input_tokens_seen": 517518620, + "step": 7737 + }, + { + "epoch": 0.8779574468085106, + "loss": 1.5795676708221436, + "loss_ce": 0.009255151264369488, + "loss_iou": 0.6171875, + "loss_num": 0.0673828125, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 517518620, + "step": 7737 + }, + { + "epoch": 0.8780709219858156, + "grad_norm": 49.47175216674805, + "learning_rate": 5e-05, + "loss": 1.3162, + "num_input_tokens_seen": 517585864, + "step": 7738 + }, + { + "epoch": 0.8780709219858156, + "loss": 1.2403910160064697, + "loss_ce": 0.011143013834953308, + "loss_iou": 0.47265625, + "loss_num": 0.05712890625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 517585864, + "step": 7738 + }, + { + "epoch": 0.8781843971631206, + "grad_norm": 25.218307495117188, + "learning_rate": 5e-05, + "loss": 1.2449, + "num_input_tokens_seen": 517653736, + "step": 7739 + }, + { + "epoch": 0.8781843971631206, + "loss": 1.101396918296814, + "loss_ce": 0.008379347622394562, + "loss_iou": 0.4609375, + "loss_num": 0.034912109375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 517653736, + "step": 7739 + }, + { + "epoch": 0.8782978723404256, + "grad_norm": 27.23138999938965, + "learning_rate": 5e-05, + "loss": 0.9079, + "num_input_tokens_seen": 517720876, + "step": 7740 + }, + { + "epoch": 0.8782978723404256, + "loss": 0.8411276936531067, + "loss_ce": 0.004823957569897175, + "loss_iou": 0.326171875, + "loss_num": 0.036376953125, + "loss_xval": 0.8359375, + "num_input_tokens_seen": 517720876, + "step": 7740 + }, + { + "epoch": 0.8784113475177305, + "grad_norm": 26.073787689208984, + "learning_rate": 5e-05, + "loss": 1.4432, + "num_input_tokens_seen": 517788056, + "step": 7741 + }, + { + "epoch": 0.8784113475177305, + "loss": 1.4128855466842651, + "loss_ce": 0.005658971611410379, + "loss_iou": 0.57421875, + "loss_num": 0.052001953125, + "loss_xval": 1.40625, + "num_input_tokens_seen": 517788056, + "step": 7741 + }, + { + "epoch": 0.8785248226950355, + "grad_norm": 28.490367889404297, + "learning_rate": 5e-05, + "loss": 1.1452, + "num_input_tokens_seen": 517855988, + "step": 7742 + }, + { + "epoch": 0.8785248226950355, + "loss": 1.2631301879882812, + "loss_ce": 0.006782562006264925, + "loss_iou": 0.5234375, + "loss_num": 0.04150390625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 517855988, + "step": 7742 + }, + { + "epoch": 0.8786382978723404, + "grad_norm": 26.661964416503906, + "learning_rate": 5e-05, + "loss": 1.3763, + "num_input_tokens_seen": 517923356, + "step": 7743 + }, + { + "epoch": 0.8786382978723404, + "loss": 1.2166324853897095, + "loss_ce": 0.004230109043419361, + "loss_iou": 0.4765625, + "loss_num": 0.051513671875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 517923356, + "step": 7743 + }, + { + "epoch": 0.8787517730496454, + "grad_norm": 32.05195236206055, + "learning_rate": 5e-05, + "loss": 0.98, + "num_input_tokens_seen": 517988916, + "step": 7744 + }, + { + "epoch": 0.8787517730496454, + "loss": 0.8730877041816711, + "loss_ce": 0.004343797452747822, + "loss_iou": 0.333984375, + "loss_num": 0.040283203125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 517988916, + "step": 7744 + }, + { + "epoch": 0.8788652482269503, + "grad_norm": 26.36874008178711, + "learning_rate": 5e-05, + "loss": 1.448, + "num_input_tokens_seen": 518055980, + "step": 7745 + }, + { + "epoch": 0.8788652482269503, + "loss": 1.581653356552124, + "loss_ce": 0.008655330166220665, + "loss_iou": 0.578125, + "loss_num": 0.0830078125, + "loss_xval": 1.5703125, + "num_input_tokens_seen": 518055980, + "step": 7745 + }, + { + "epoch": 0.8789787234042553, + "grad_norm": 33.724788665771484, + "learning_rate": 5e-05, + "loss": 1.2165, + "num_input_tokens_seen": 518121948, + "step": 7746 + }, + { + "epoch": 0.8789787234042553, + "loss": 1.1831742525100708, + "loss_ce": 0.0032426253892481327, + "loss_iou": 0.458984375, + "loss_num": 0.052734375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 518121948, + "step": 7746 + }, + { + "epoch": 0.8790921985815603, + "grad_norm": 39.42753982543945, + "learning_rate": 5e-05, + "loss": 1.319, + "num_input_tokens_seen": 518190100, + "step": 7747 + }, + { + "epoch": 0.8790921985815603, + "loss": 1.3268439769744873, + "loss_ce": 0.006043112371116877, + "loss_iou": 0.55078125, + "loss_num": 0.043212890625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 518190100, + "step": 7747 + }, + { + "epoch": 0.8792056737588653, + "grad_norm": 29.74178695678711, + "learning_rate": 5e-05, + "loss": 1.0804, + "num_input_tokens_seen": 518256728, + "step": 7748 + }, + { + "epoch": 0.8792056737588653, + "loss": 1.123023271560669, + "loss_ce": 0.007300544064491987, + "loss_iou": 0.46875, + "loss_num": 0.0361328125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 518256728, + "step": 7748 + }, + { + "epoch": 0.8793191489361702, + "grad_norm": 28.047706604003906, + "learning_rate": 5e-05, + "loss": 1.2378, + "num_input_tokens_seen": 518323544, + "step": 7749 + }, + { + "epoch": 0.8793191489361702, + "loss": 1.4020931720733643, + "loss_ce": 0.008050153963267803, + "loss_iou": 0.55859375, + "loss_num": 0.055908203125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 518323544, + "step": 7749 + }, + { + "epoch": 0.8794326241134752, + "grad_norm": 30.065710067749023, + "learning_rate": 5e-05, + "loss": 1.1493, + "num_input_tokens_seen": 518391184, + "step": 7750 + }, + { + "epoch": 0.8794326241134752, + "eval_seeclick_CIoU": 0.4169445335865021, + "eval_seeclick_GIoU": 0.39955635368824005, + "eval_seeclick_IoU": 0.4989336282014847, + "eval_seeclick_MAE_all": 0.15412300825119019, + "eval_seeclick_MAE_h": 0.07619788683950901, + "eval_seeclick_MAE_w": 0.10378715768456459, + "eval_seeclick_MAE_x_boxes": 0.21179071068763733, + "eval_seeclick_MAE_y_boxes": 0.13869959115982056, + "eval_seeclick_NUM_probability": 0.9999786615371704, + "eval_seeclick_inside_bbox": 0.6770833432674408, + "eval_seeclick_loss": 2.370638847351074, + "eval_seeclick_loss_ce": 0.015494395978748798, + "eval_seeclick_loss_iou": 0.8037109375, + "eval_seeclick_loss_num": 0.155181884765625, + "eval_seeclick_loss_xval": 2.382568359375, + "eval_seeclick_runtime": 65.6105, + "eval_seeclick_samples_per_second": 0.716, + "eval_seeclick_steps_per_second": 0.03, + "num_input_tokens_seen": 518391184, + "step": 7750 + }, + { + "epoch": 0.8794326241134752, + "eval_icons_CIoU": 0.549997553229332, + "eval_icons_GIoU": 0.5572770535945892, + "eval_icons_IoU": 0.5837914347648621, + "eval_icons_MAE_all": 0.12237675860524178, + "eval_icons_MAE_h": 0.08939209580421448, + "eval_icons_MAE_w": 0.13161900639533997, + "eval_icons_MAE_x_boxes": 0.10740558058023453, + "eval_icons_MAE_y_boxes": 0.027571785263717175, + "eval_icons_NUM_probability": 0.9999937117099762, + "eval_icons_inside_bbox": 0.8385416567325592, + "eval_icons_loss": 2.162153720855713, + "eval_icons_loss_ce": 0.0004636792332348705, + "eval_icons_loss_iou": 0.78369140625, + "eval_icons_loss_num": 0.10659599304199219, + "eval_icons_loss_xval": 2.10009765625, + "eval_icons_runtime": 66.0469, + "eval_icons_samples_per_second": 0.757, + "eval_icons_steps_per_second": 0.03, + "num_input_tokens_seen": 518391184, + "step": 7750 + }, + { + "epoch": 0.8794326241134752, + "eval_screenspot_CIoU": 0.2839810351530711, + "eval_screenspot_GIoU": 0.24688133597373962, + "eval_screenspot_IoU": 0.3731335997581482, + "eval_screenspot_MAE_all": 0.22123401860396066, + "eval_screenspot_MAE_h": 0.13120056688785553, + "eval_screenspot_MAE_w": 0.1952210913101832, + "eval_screenspot_MAE_x_boxes": 0.2729170223077138, + "eval_screenspot_MAE_y_boxes": 0.1412887473901113, + "eval_screenspot_NUM_probability": 0.9999338984489441, + "eval_screenspot_inside_bbox": 0.6254166762034098, + "eval_screenspot_loss": 2.9954543113708496, + "eval_screenspot_loss_ce": 0.01565958062807719, + "eval_screenspot_loss_iou": 0.9436848958333334, + "eval_screenspot_loss_num": 0.23024495442708334, + "eval_screenspot_loss_xval": 3.0387369791666665, + "eval_screenspot_runtime": 117.2468, + "eval_screenspot_samples_per_second": 0.759, + "eval_screenspot_steps_per_second": 0.026, + "num_input_tokens_seen": 518391184, + "step": 7750 + }, + { + "epoch": 0.8794326241134752, + "eval_compot_CIoU": 0.33890771865844727, + "eval_compot_GIoU": 0.31636473536491394, + "eval_compot_IoU": 0.4282033145427704, + "eval_compot_MAE_all": 0.20733410865068436, + "eval_compot_MAE_h": 0.10330870747566223, + "eval_compot_MAE_w": 0.23203124105930328, + "eval_compot_MAE_x_boxes": 0.19962362945079803, + "eval_compot_MAE_y_boxes": 0.11569422483444214, + "eval_compot_NUM_probability": 0.9999734461307526, + "eval_compot_inside_bbox": 0.5520833432674408, + "eval_compot_loss": 2.898477792739868, + "eval_compot_loss_ce": 0.007367167389020324, + "eval_compot_loss_iou": 0.916748046875, + "eval_compot_loss_num": 0.2191619873046875, + "eval_compot_loss_xval": 2.92724609375, + "eval_compot_runtime": 69.2908, + "eval_compot_samples_per_second": 0.722, + "eval_compot_steps_per_second": 0.029, + "num_input_tokens_seen": 518391184, + "step": 7750 + }, + { + "epoch": 0.8794326241134752, + "loss": 2.7631473541259766, + "loss_ce": 0.008264686912298203, + "loss_iou": 0.87890625, + "loss_num": 0.201171875, + "loss_xval": 2.75, + "num_input_tokens_seen": 518391184, + "step": 7750 + }, + { + "epoch": 0.8795460992907801, + "grad_norm": 15.434724807739258, + "learning_rate": 5e-05, + "loss": 1.0047, + "num_input_tokens_seen": 518457176, + "step": 7751 + }, + { + "epoch": 0.8795460992907801, + "loss": 0.9493497610092163, + "loss_ce": 0.008187670260667801, + "loss_iou": 0.37109375, + "loss_num": 0.039306640625, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 518457176, + "step": 7751 + }, + { + "epoch": 0.8796595744680851, + "grad_norm": 14.786232948303223, + "learning_rate": 5e-05, + "loss": 1.2761, + "num_input_tokens_seen": 518523940, + "step": 7752 + }, + { + "epoch": 0.8796595744680851, + "loss": 1.3473196029663086, + "loss_ce": 0.01040557585656643, + "loss_iou": 0.5, + "loss_num": 0.06640625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 518523940, + "step": 7752 + }, + { + "epoch": 0.87977304964539, + "grad_norm": 34.479671478271484, + "learning_rate": 5e-05, + "loss": 0.9002, + "num_input_tokens_seen": 518590496, + "step": 7753 + }, + { + "epoch": 0.87977304964539, + "loss": 0.7487648129463196, + "loss_ce": 0.004624183289706707, + "loss_iou": 0.341796875, + "loss_num": 0.01239013671875, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 518590496, + "step": 7753 + }, + { + "epoch": 0.879886524822695, + "grad_norm": 75.73641967773438, + "learning_rate": 5e-05, + "loss": 1.2026, + "num_input_tokens_seen": 518657192, + "step": 7754 + }, + { + "epoch": 0.879886524822695, + "loss": 1.2257683277130127, + "loss_ce": 0.009459754452109337, + "loss_iou": 0.515625, + "loss_num": 0.0361328125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 518657192, + "step": 7754 + }, + { + "epoch": 0.88, + "grad_norm": 40.88368225097656, + "learning_rate": 5e-05, + "loss": 1.5559, + "num_input_tokens_seen": 518723820, + "step": 7755 + }, + { + "epoch": 0.88, + "loss": 1.533879041671753, + "loss_ce": 0.003605567617341876, + "loss_iou": 0.60546875, + "loss_num": 0.06396484375, + "loss_xval": 1.53125, + "num_input_tokens_seen": 518723820, + "step": 7755 + }, + { + "epoch": 0.880113475177305, + "grad_norm": 19.183773040771484, + "learning_rate": 5e-05, + "loss": 1.1735, + "num_input_tokens_seen": 518791980, + "step": 7756 + }, + { + "epoch": 0.880113475177305, + "loss": 1.1312565803527832, + "loss_ce": 0.008697974495589733, + "loss_iou": 0.435546875, + "loss_num": 0.050048828125, + "loss_xval": 1.125, + "num_input_tokens_seen": 518791980, + "step": 7756 + }, + { + "epoch": 0.88022695035461, + "grad_norm": 24.90403175354004, + "learning_rate": 5e-05, + "loss": 1.1289, + "num_input_tokens_seen": 518858736, + "step": 7757 + }, + { + "epoch": 0.88022695035461, + "loss": 1.0478111505508423, + "loss_ce": 0.006795580964535475, + "loss_iou": 0.423828125, + "loss_num": 0.038818359375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 518858736, + "step": 7757 + }, + { + "epoch": 0.8803404255319149, + "grad_norm": 26.98384666442871, + "learning_rate": 5e-05, + "loss": 1.1761, + "num_input_tokens_seen": 518925056, + "step": 7758 + }, + { + "epoch": 0.8803404255319149, + "loss": 1.083017349243164, + "loss_ce": 0.008188271895051003, + "loss_iou": 0.431640625, + "loss_num": 0.0419921875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 518925056, + "step": 7758 + }, + { + "epoch": 0.8804539007092198, + "grad_norm": 18.41033363342285, + "learning_rate": 5e-05, + "loss": 1.1304, + "num_input_tokens_seen": 518992064, + "step": 7759 + }, + { + "epoch": 0.8804539007092198, + "loss": 1.1156554222106934, + "loss_ce": 0.006768705323338509, + "loss_iou": 0.48046875, + "loss_num": 0.0299072265625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 518992064, + "step": 7759 + }, + { + "epoch": 0.8805673758865248, + "grad_norm": 15.625975608825684, + "learning_rate": 5e-05, + "loss": 1.1687, + "num_input_tokens_seen": 519058860, + "step": 7760 + }, + { + "epoch": 0.8805673758865248, + "loss": 1.3803305625915527, + "loss_ce": 0.008260278031229973, + "loss_iou": 0.515625, + "loss_num": 0.06884765625, + "loss_xval": 1.375, + "num_input_tokens_seen": 519058860, + "step": 7760 + }, + { + "epoch": 0.8806808510638298, + "grad_norm": 17.231884002685547, + "learning_rate": 5e-05, + "loss": 0.986, + "num_input_tokens_seen": 519126756, + "step": 7761 + }, + { + "epoch": 0.8806808510638298, + "loss": 0.9976662397384644, + "loss_ce": 0.0040138717740774155, + "loss_iou": 0.421875, + "loss_num": 0.0302734375, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 519126756, + "step": 7761 + }, + { + "epoch": 0.8807943262411347, + "grad_norm": 15.838114738464355, + "learning_rate": 5e-05, + "loss": 1.0041, + "num_input_tokens_seen": 519192944, + "step": 7762 + }, + { + "epoch": 0.8807943262411347, + "loss": 1.0683867931365967, + "loss_ce": 0.005642715375870466, + "loss_iou": 0.42578125, + "loss_num": 0.0419921875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 519192944, + "step": 7762 + }, + { + "epoch": 0.8809078014184397, + "grad_norm": 17.532432556152344, + "learning_rate": 5e-05, + "loss": 1.0114, + "num_input_tokens_seen": 519259448, + "step": 7763 + }, + { + "epoch": 0.8809078014184397, + "loss": 0.9671973586082458, + "loss_ce": 0.006748126819729805, + "loss_iou": 0.384765625, + "loss_num": 0.0380859375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 519259448, + "step": 7763 + }, + { + "epoch": 0.8810212765957447, + "grad_norm": 24.553606033325195, + "learning_rate": 5e-05, + "loss": 1.0399, + "num_input_tokens_seen": 519326336, + "step": 7764 + }, + { + "epoch": 0.8810212765957447, + "loss": 0.9806061387062073, + "loss_ce": 0.012985268607735634, + "loss_iou": 0.404296875, + "loss_num": 0.031982421875, + "loss_xval": 0.96875, + "num_input_tokens_seen": 519326336, + "step": 7764 + }, + { + "epoch": 0.8811347517730497, + "grad_norm": 21.542627334594727, + "learning_rate": 5e-05, + "loss": 1.2934, + "num_input_tokens_seen": 519392720, + "step": 7765 + }, + { + "epoch": 0.8811347517730497, + "loss": 1.2871952056884766, + "loss_ce": 0.01424589566886425, + "loss_iou": 0.51953125, + "loss_num": 0.04736328125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 519392720, + "step": 7765 + }, + { + "epoch": 0.8812482269503547, + "grad_norm": 29.591623306274414, + "learning_rate": 5e-05, + "loss": 1.2333, + "num_input_tokens_seen": 519458756, + "step": 7766 + }, + { + "epoch": 0.8812482269503547, + "loss": 1.0859495401382446, + "loss_ce": 0.004894885700196028, + "loss_iou": 0.478515625, + "loss_num": 0.02490234375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 519458756, + "step": 7766 + }, + { + "epoch": 0.8813617021276596, + "grad_norm": 66.36488342285156, + "learning_rate": 5e-05, + "loss": 1.0694, + "num_input_tokens_seen": 519525772, + "step": 7767 + }, + { + "epoch": 0.8813617021276596, + "loss": 0.9695190787315369, + "loss_ce": 0.0037903368938714266, + "loss_iou": 0.40625, + "loss_num": 0.0302734375, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 519525772, + "step": 7767 + }, + { + "epoch": 0.8814751773049645, + "grad_norm": 28.051137924194336, + "learning_rate": 5e-05, + "loss": 1.122, + "num_input_tokens_seen": 519593232, + "step": 7768 + }, + { + "epoch": 0.8814751773049645, + "loss": 1.0026310682296753, + "loss_ce": 0.008490407839417458, + "loss_iou": 0.40625, + "loss_num": 0.036376953125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 519593232, + "step": 7768 + }, + { + "epoch": 0.8815886524822695, + "grad_norm": 39.614200592041016, + "learning_rate": 5e-05, + "loss": 1.4974, + "num_input_tokens_seen": 519660432, + "step": 7769 + }, + { + "epoch": 0.8815886524822695, + "loss": 1.5080821514129639, + "loss_ce": 0.008082114160060883, + "loss_iou": 0.59765625, + "loss_num": 0.060546875, + "loss_xval": 1.5, + "num_input_tokens_seen": 519660432, + "step": 7769 + }, + { + "epoch": 0.8817021276595745, + "grad_norm": 31.050857543945312, + "learning_rate": 5e-05, + "loss": 1.3792, + "num_input_tokens_seen": 519726708, + "step": 7770 + }, + { + "epoch": 0.8817021276595745, + "loss": 1.2918812036514282, + "loss_ce": 0.006724925711750984, + "loss_iou": 0.47265625, + "loss_num": 0.06787109375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 519726708, + "step": 7770 + }, + { + "epoch": 0.8818156028368794, + "grad_norm": 35.05898666381836, + "learning_rate": 5e-05, + "loss": 1.1825, + "num_input_tokens_seen": 519793668, + "step": 7771 + }, + { + "epoch": 0.8818156028368794, + "loss": 1.0936366319656372, + "loss_ce": 0.007699141278862953, + "loss_iou": 0.44140625, + "loss_num": 0.041015625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 519793668, + "step": 7771 + }, + { + "epoch": 0.8819290780141844, + "grad_norm": 51.05626678466797, + "learning_rate": 5e-05, + "loss": 1.0565, + "num_input_tokens_seen": 519860848, + "step": 7772 + }, + { + "epoch": 0.8819290780141844, + "loss": 1.1131283044815063, + "loss_ce": 0.008636124432086945, + "loss_iou": 0.45703125, + "loss_num": 0.038330078125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 519860848, + "step": 7772 + }, + { + "epoch": 0.8820425531914894, + "grad_norm": 38.84157943725586, + "learning_rate": 5e-05, + "loss": 1.3672, + "num_input_tokens_seen": 519927216, + "step": 7773 + }, + { + "epoch": 0.8820425531914894, + "loss": 1.34083092212677, + "loss_ce": 0.004405180923640728, + "loss_iou": 0.54296875, + "loss_num": 0.05078125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 519927216, + "step": 7773 + }, + { + "epoch": 0.8821560283687944, + "grad_norm": 13.618973731994629, + "learning_rate": 5e-05, + "loss": 0.9544, + "num_input_tokens_seen": 519993532, + "step": 7774 + }, + { + "epoch": 0.8821560283687944, + "loss": 0.9352449178695679, + "loss_ce": 0.005069187842309475, + "loss_iou": 0.375, + "loss_num": 0.036376953125, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 519993532, + "step": 7774 + }, + { + "epoch": 0.8822695035460993, + "grad_norm": 65.40596771240234, + "learning_rate": 5e-05, + "loss": 1.181, + "num_input_tokens_seen": 520060944, + "step": 7775 + }, + { + "epoch": 0.8822695035460993, + "loss": 0.9831361770629883, + "loss_ce": 0.006085427477955818, + "loss_iou": 0.412109375, + "loss_num": 0.03076171875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 520060944, + "step": 7775 + }, + { + "epoch": 0.8823829787234042, + "grad_norm": 24.125028610229492, + "learning_rate": 5e-05, + "loss": 1.322, + "num_input_tokens_seen": 520127680, + "step": 7776 + }, + { + "epoch": 0.8823829787234042, + "loss": 1.1768333911895752, + "loss_ce": 0.003981876187026501, + "loss_iou": 0.498046875, + "loss_num": 0.03564453125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 520127680, + "step": 7776 + }, + { + "epoch": 0.8824964539007092, + "grad_norm": 32.45817565917969, + "learning_rate": 5e-05, + "loss": 1.2832, + "num_input_tokens_seen": 520194024, + "step": 7777 + }, + { + "epoch": 0.8824964539007092, + "loss": 1.3603310585021973, + "loss_ce": 0.009012714959681034, + "loss_iou": 0.5546875, + "loss_num": 0.048583984375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 520194024, + "step": 7777 + }, + { + "epoch": 0.8826099290780142, + "grad_norm": 26.320459365844727, + "learning_rate": 5e-05, + "loss": 1.1848, + "num_input_tokens_seen": 520260716, + "step": 7778 + }, + { + "epoch": 0.8826099290780142, + "loss": 1.2135932445526123, + "loss_ce": 0.008515178225934505, + "loss_iou": 0.5078125, + "loss_num": 0.03857421875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 520260716, + "step": 7778 + }, + { + "epoch": 0.8827234042553191, + "grad_norm": 17.854400634765625, + "learning_rate": 5e-05, + "loss": 1.0612, + "num_input_tokens_seen": 520327988, + "step": 7779 + }, + { + "epoch": 0.8827234042553191, + "loss": 0.9515697360038757, + "loss_ce": 0.006257211789488792, + "loss_iou": 0.3828125, + "loss_num": 0.03564453125, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 520327988, + "step": 7779 + }, + { + "epoch": 0.8828368794326241, + "grad_norm": 26.45498275756836, + "learning_rate": 5e-05, + "loss": 1.0084, + "num_input_tokens_seen": 520394828, + "step": 7780 + }, + { + "epoch": 0.8828368794326241, + "loss": 1.0036205053329468, + "loss_ce": 0.008015031926333904, + "loss_iou": 0.423828125, + "loss_num": 0.0296630859375, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 520394828, + "step": 7780 + }, + { + "epoch": 0.8829503546099291, + "grad_norm": 43.953067779541016, + "learning_rate": 5e-05, + "loss": 1.1965, + "num_input_tokens_seen": 520461564, + "step": 7781 + }, + { + "epoch": 0.8829503546099291, + "loss": 1.0396728515625, + "loss_ce": 0.006835957057774067, + "loss_iou": 0.412109375, + "loss_num": 0.042236328125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 520461564, + "step": 7781 + }, + { + "epoch": 0.8830638297872341, + "grad_norm": 33.58832550048828, + "learning_rate": 5e-05, + "loss": 1.1433, + "num_input_tokens_seen": 520528524, + "step": 7782 + }, + { + "epoch": 0.8830638297872341, + "loss": 1.137037992477417, + "loss_ce": 0.006178536452353001, + "loss_iou": 0.4765625, + "loss_num": 0.035888671875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 520528524, + "step": 7782 + }, + { + "epoch": 0.8831773049645391, + "grad_norm": 23.82640838623047, + "learning_rate": 5e-05, + "loss": 1.1124, + "num_input_tokens_seen": 520594768, + "step": 7783 + }, + { + "epoch": 0.8831773049645391, + "loss": 1.120131492614746, + "loss_ce": 0.008315054699778557, + "loss_iou": 0.455078125, + "loss_num": 0.04052734375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 520594768, + "step": 7783 + }, + { + "epoch": 0.8832907801418439, + "grad_norm": 21.37941551208496, + "learning_rate": 5e-05, + "loss": 1.0098, + "num_input_tokens_seen": 520661988, + "step": 7784 + }, + { + "epoch": 0.8832907801418439, + "loss": 1.001227617263794, + "loss_ce": 0.01050494983792305, + "loss_iou": 0.404296875, + "loss_num": 0.03662109375, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 520661988, + "step": 7784 + }, + { + "epoch": 0.8834042553191489, + "grad_norm": 17.18915557861328, + "learning_rate": 5e-05, + "loss": 0.9941, + "num_input_tokens_seen": 520728852, + "step": 7785 + }, + { + "epoch": 0.8834042553191489, + "loss": 1.0840994119644165, + "loss_ce": 0.005974405445158482, + "loss_iou": 0.4296875, + "loss_num": 0.04345703125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 520728852, + "step": 7785 + }, + { + "epoch": 0.8835177304964539, + "grad_norm": 18.687511444091797, + "learning_rate": 5e-05, + "loss": 1.1936, + "num_input_tokens_seen": 520795580, + "step": 7786 + }, + { + "epoch": 0.8835177304964539, + "loss": 1.3656507730484009, + "loss_ce": 0.009205411188304424, + "loss_iou": 0.51953125, + "loss_num": 0.06396484375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 520795580, + "step": 7786 + }, + { + "epoch": 0.8836312056737589, + "grad_norm": 31.347257614135742, + "learning_rate": 5e-05, + "loss": 1.2161, + "num_input_tokens_seen": 520863560, + "step": 7787 + }, + { + "epoch": 0.8836312056737589, + "loss": 1.1688830852508545, + "loss_ce": 0.005308852531015873, + "loss_iou": 0.439453125, + "loss_num": 0.056884765625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 520863560, + "step": 7787 + }, + { + "epoch": 0.8837446808510638, + "grad_norm": 29.635807037353516, + "learning_rate": 5e-05, + "loss": 1.4761, + "num_input_tokens_seen": 520931252, + "step": 7788 + }, + { + "epoch": 0.8837446808510638, + "loss": 1.5146167278289795, + "loss_ce": 0.003874470479786396, + "loss_iou": 0.6015625, + "loss_num": 0.062255859375, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 520931252, + "step": 7788 + }, + { + "epoch": 0.8838581560283688, + "grad_norm": 21.551631927490234, + "learning_rate": 5e-05, + "loss": 1.1074, + "num_input_tokens_seen": 520997300, + "step": 7789 + }, + { + "epoch": 0.8838581560283688, + "loss": 1.191662311553955, + "loss_ce": 0.008801012299954891, + "loss_iou": 0.453125, + "loss_num": 0.05517578125, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 520997300, + "step": 7789 + }, + { + "epoch": 0.8839716312056738, + "grad_norm": 30.03976058959961, + "learning_rate": 5e-05, + "loss": 1.0128, + "num_input_tokens_seen": 521064548, + "step": 7790 + }, + { + "epoch": 0.8839716312056738, + "loss": 0.98313307762146, + "loss_ce": 0.012185798957943916, + "loss_iou": 0.376953125, + "loss_num": 0.043212890625, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 521064548, + "step": 7790 + }, + { + "epoch": 0.8840851063829788, + "grad_norm": 66.46215057373047, + "learning_rate": 5e-05, + "loss": 1.0461, + "num_input_tokens_seen": 521131596, + "step": 7791 + }, + { + "epoch": 0.8840851063829788, + "loss": 1.1319314241409302, + "loss_ce": 0.006443129852414131, + "loss_iou": 0.4765625, + "loss_num": 0.0341796875, + "loss_xval": 1.125, + "num_input_tokens_seen": 521131596, + "step": 7791 + }, + { + "epoch": 0.8841985815602836, + "grad_norm": 24.39225196838379, + "learning_rate": 5e-05, + "loss": 1.0688, + "num_input_tokens_seen": 521197584, + "step": 7792 + }, + { + "epoch": 0.8841985815602836, + "loss": 1.1157926321029663, + "loss_ce": 0.004952812567353249, + "loss_iou": 0.45703125, + "loss_num": 0.03955078125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 521197584, + "step": 7792 + }, + { + "epoch": 0.8843120567375886, + "grad_norm": 31.09064483642578, + "learning_rate": 5e-05, + "loss": 1.1147, + "num_input_tokens_seen": 521264368, + "step": 7793 + }, + { + "epoch": 0.8843120567375886, + "loss": 1.0623703002929688, + "loss_ce": 0.0042647989466786385, + "loss_iou": 0.431640625, + "loss_num": 0.038818359375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 521264368, + "step": 7793 + }, + { + "epoch": 0.8844255319148936, + "grad_norm": 79.0219497680664, + "learning_rate": 5e-05, + "loss": 1.1835, + "num_input_tokens_seen": 521330252, + "step": 7794 + }, + { + "epoch": 0.8844255319148936, + "loss": 1.1303439140319824, + "loss_ce": 0.009494319558143616, + "loss_iou": 0.45703125, + "loss_num": 0.041259765625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 521330252, + "step": 7794 + }, + { + "epoch": 0.8845390070921986, + "grad_norm": 36.09965133666992, + "learning_rate": 5e-05, + "loss": 0.9067, + "num_input_tokens_seen": 521396280, + "step": 7795 + }, + { + "epoch": 0.8845390070921986, + "loss": 0.8271517753601074, + "loss_ce": 0.007693769875913858, + "loss_iou": 0.314453125, + "loss_num": 0.03759765625, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 521396280, + "step": 7795 + }, + { + "epoch": 0.8846524822695035, + "grad_norm": 26.411170959472656, + "learning_rate": 5e-05, + "loss": 1.1974, + "num_input_tokens_seen": 521463692, + "step": 7796 + }, + { + "epoch": 0.8846524822695035, + "loss": 1.293933391571045, + "loss_ce": 0.00926537998020649, + "loss_iou": 0.50390625, + "loss_num": 0.055908203125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 521463692, + "step": 7796 + }, + { + "epoch": 0.8847659574468085, + "grad_norm": 77.96514129638672, + "learning_rate": 5e-05, + "loss": 1.2176, + "num_input_tokens_seen": 521530348, + "step": 7797 + }, + { + "epoch": 0.8847659574468085, + "loss": 1.4412825107574463, + "loss_ce": 0.009641796350479126, + "loss_iou": 0.58984375, + "loss_num": 0.051025390625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 521530348, + "step": 7797 + }, + { + "epoch": 0.8848794326241135, + "grad_norm": 81.65701293945312, + "learning_rate": 5e-05, + "loss": 1.4109, + "num_input_tokens_seen": 521598076, + "step": 7798 + }, + { + "epoch": 0.8848794326241135, + "loss": 1.4093146324157715, + "loss_ce": 0.007947497069835663, + "loss_iou": 0.53125, + "loss_num": 0.06689453125, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 521598076, + "step": 7798 + }, + { + "epoch": 0.8849929078014185, + "grad_norm": 25.045272827148438, + "learning_rate": 5e-05, + "loss": 1.1978, + "num_input_tokens_seen": 521665064, + "step": 7799 + }, + { + "epoch": 0.8849929078014185, + "loss": 1.1716970205307007, + "loss_ce": 0.008244888857007027, + "loss_iou": 0.419921875, + "loss_num": 0.064453125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 521665064, + "step": 7799 + }, + { + "epoch": 0.8851063829787233, + "grad_norm": 20.975069046020508, + "learning_rate": 5e-05, + "loss": 1.0928, + "num_input_tokens_seen": 521731604, + "step": 7800 + }, + { + "epoch": 0.8851063829787233, + "loss": 1.0381566286087036, + "loss_ce": 0.004221122246235609, + "loss_iou": 0.41015625, + "loss_num": 0.04296875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 521731604, + "step": 7800 + }, + { + "epoch": 0.8852198581560283, + "grad_norm": 17.98219871520996, + "learning_rate": 5e-05, + "loss": 1.0663, + "num_input_tokens_seen": 521798428, + "step": 7801 + }, + { + "epoch": 0.8852198581560283, + "loss": 1.0118134021759033, + "loss_ce": 0.004489170853048563, + "loss_iou": 0.408203125, + "loss_num": 0.0380859375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 521798428, + "step": 7801 + }, + { + "epoch": 0.8853333333333333, + "grad_norm": 16.49885368347168, + "learning_rate": 5e-05, + "loss": 1.1788, + "num_input_tokens_seen": 521865628, + "step": 7802 + }, + { + "epoch": 0.8853333333333333, + "loss": 1.17415189743042, + "loss_ce": 0.008136180229485035, + "loss_iou": 0.4921875, + "loss_num": 0.036376953125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 521865628, + "step": 7802 + }, + { + "epoch": 0.8854468085106383, + "grad_norm": 25.947738647460938, + "learning_rate": 5e-05, + "loss": 1.0467, + "num_input_tokens_seen": 521933224, + "step": 7803 + }, + { + "epoch": 0.8854468085106383, + "loss": 0.9938412308692932, + "loss_ce": 0.006048312410712242, + "loss_iou": 0.392578125, + "loss_num": 0.040283203125, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 521933224, + "step": 7803 + }, + { + "epoch": 0.8855602836879433, + "grad_norm": 29.682491302490234, + "learning_rate": 5e-05, + "loss": 1.1144, + "num_input_tokens_seen": 522001028, + "step": 7804 + }, + { + "epoch": 0.8855602836879433, + "loss": 0.9070239067077637, + "loss_ce": 0.004680179059505463, + "loss_iou": 0.3828125, + "loss_num": 0.0277099609375, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 522001028, + "step": 7804 + }, + { + "epoch": 0.8856737588652482, + "grad_norm": 45.995567321777344, + "learning_rate": 5e-05, + "loss": 1.1033, + "num_input_tokens_seen": 522068212, + "step": 7805 + }, + { + "epoch": 0.8856737588652482, + "loss": 1.137714147567749, + "loss_ce": 0.0049016098491847515, + "loss_iou": 0.494140625, + "loss_num": 0.029296875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 522068212, + "step": 7805 + }, + { + "epoch": 0.8857872340425532, + "grad_norm": 31.156408309936523, + "learning_rate": 5e-05, + "loss": 1.321, + "num_input_tokens_seen": 522135576, + "step": 7806 + }, + { + "epoch": 0.8857872340425532, + "loss": 1.174822211265564, + "loss_ce": 0.008318311534821987, + "loss_iou": 0.494140625, + "loss_num": 0.0361328125, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 522135576, + "step": 7806 + }, + { + "epoch": 0.8859007092198582, + "grad_norm": 29.10515785217285, + "learning_rate": 5e-05, + "loss": 0.9912, + "num_input_tokens_seen": 522203164, + "step": 7807 + }, + { + "epoch": 0.8859007092198582, + "loss": 0.9826942682266235, + "loss_ce": 0.005155189894139767, + "loss_iou": 0.43359375, + "loss_num": 0.021728515625, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 522203164, + "step": 7807 + }, + { + "epoch": 0.8860141843971632, + "grad_norm": 30.32254409790039, + "learning_rate": 5e-05, + "loss": 1.134, + "num_input_tokens_seen": 522269948, + "step": 7808 + }, + { + "epoch": 0.8860141843971632, + "loss": 1.1633594036102295, + "loss_ce": 0.007597658317536116, + "loss_iou": 0.453125, + "loss_num": 0.049560546875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 522269948, + "step": 7808 + }, + { + "epoch": 0.886127659574468, + "grad_norm": 26.08673858642578, + "learning_rate": 5e-05, + "loss": 1.2874, + "num_input_tokens_seen": 522336568, + "step": 7809 + }, + { + "epoch": 0.886127659574468, + "loss": 1.1188035011291504, + "loss_ce": 0.0064988150261342525, + "loss_iou": 0.474609375, + "loss_num": 0.032470703125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 522336568, + "step": 7809 + }, + { + "epoch": 0.886241134751773, + "grad_norm": 30.826427459716797, + "learning_rate": 5e-05, + "loss": 0.9297, + "num_input_tokens_seen": 522403336, + "step": 7810 + }, + { + "epoch": 0.886241134751773, + "loss": 0.9160504341125488, + "loss_ce": 0.007847310975193977, + "loss_iou": 0.365234375, + "loss_num": 0.03564453125, + "loss_xval": 0.90625, + "num_input_tokens_seen": 522403336, + "step": 7810 + }, + { + "epoch": 0.886354609929078, + "grad_norm": 44.56938171386719, + "learning_rate": 5e-05, + "loss": 1.2604, + "num_input_tokens_seen": 522469892, + "step": 7811 + }, + { + "epoch": 0.886354609929078, + "loss": 1.1799609661102295, + "loss_ce": 0.006132881157100201, + "loss_iou": 0.482421875, + "loss_num": 0.041748046875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 522469892, + "step": 7811 + }, + { + "epoch": 0.886468085106383, + "grad_norm": 27.14594268798828, + "learning_rate": 5e-05, + "loss": 1.2613, + "num_input_tokens_seen": 522536680, + "step": 7812 + }, + { + "epoch": 0.886468085106383, + "loss": 1.3091919422149658, + "loss_ce": 0.0076782600954174995, + "loss_iou": 0.56640625, + "loss_num": 0.032958984375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 522536680, + "step": 7812 + }, + { + "epoch": 0.886581560283688, + "grad_norm": 17.78923225402832, + "learning_rate": 5e-05, + "loss": 1.04, + "num_input_tokens_seen": 522603784, + "step": 7813 + }, + { + "epoch": 0.886581560283688, + "loss": 1.0886154174804688, + "loss_ce": 0.006095848977565765, + "loss_iou": 0.453125, + "loss_num": 0.035400390625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 522603784, + "step": 7813 + }, + { + "epoch": 0.8866950354609929, + "grad_norm": 27.538957595825195, + "learning_rate": 5e-05, + "loss": 1.0319, + "num_input_tokens_seen": 522670680, + "step": 7814 + }, + { + "epoch": 0.8866950354609929, + "loss": 1.027287244796753, + "loss_ce": 0.009709209203720093, + "loss_iou": 0.4140625, + "loss_num": 0.03759765625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 522670680, + "step": 7814 + }, + { + "epoch": 0.8868085106382979, + "grad_norm": 35.82349395751953, + "learning_rate": 5e-05, + "loss": 1.3136, + "num_input_tokens_seen": 522737620, + "step": 7815 + }, + { + "epoch": 0.8868085106382979, + "loss": 1.2376995086669922, + "loss_ce": 0.009183863177895546, + "loss_iou": 0.48046875, + "loss_num": 0.0537109375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 522737620, + "step": 7815 + }, + { + "epoch": 0.8869219858156029, + "grad_norm": 45.247100830078125, + "learning_rate": 5e-05, + "loss": 1.111, + "num_input_tokens_seen": 522805428, + "step": 7816 + }, + { + "epoch": 0.8869219858156029, + "loss": 1.1535577774047852, + "loss_ce": 0.004143664613366127, + "loss_iou": 0.49609375, + "loss_num": 0.031494140625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 522805428, + "step": 7816 + }, + { + "epoch": 0.8870354609929078, + "grad_norm": 49.25419998168945, + "learning_rate": 5e-05, + "loss": 1.1603, + "num_input_tokens_seen": 522871552, + "step": 7817 + }, + { + "epoch": 0.8870354609929078, + "loss": 0.996712327003479, + "loss_ce": 0.007820723578333855, + "loss_iou": 0.380859375, + "loss_num": 0.045166015625, + "loss_xval": 0.98828125, + "num_input_tokens_seen": 522871552, + "step": 7817 + }, + { + "epoch": 0.8871489361702127, + "grad_norm": 39.60197067260742, + "learning_rate": 5e-05, + "loss": 1.2827, + "num_input_tokens_seen": 522937888, + "step": 7818 + }, + { + "epoch": 0.8871489361702127, + "loss": 1.2108614444732666, + "loss_ce": 0.009689556434750557, + "loss_iou": 0.490234375, + "loss_num": 0.043701171875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 522937888, + "step": 7818 + }, + { + "epoch": 0.8872624113475177, + "grad_norm": 17.181591033935547, + "learning_rate": 5e-05, + "loss": 1.0709, + "num_input_tokens_seen": 523004744, + "step": 7819 + }, + { + "epoch": 0.8872624113475177, + "loss": 1.115802526473999, + "loss_ce": 0.0042303153313696384, + "loss_iou": 0.421875, + "loss_num": 0.053466796875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 523004744, + "step": 7819 + }, + { + "epoch": 0.8873758865248227, + "grad_norm": 14.671643257141113, + "learning_rate": 5e-05, + "loss": 1.0323, + "num_input_tokens_seen": 523072464, + "step": 7820 + }, + { + "epoch": 0.8873758865248227, + "loss": 1.0981791019439697, + "loss_ce": 0.004917354788631201, + "loss_iou": 0.458984375, + "loss_num": 0.03515625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 523072464, + "step": 7820 + }, + { + "epoch": 0.8874893617021277, + "grad_norm": 19.584272384643555, + "learning_rate": 5e-05, + "loss": 0.9368, + "num_input_tokens_seen": 523138944, + "step": 7821 + }, + { + "epoch": 0.8874893617021277, + "loss": 0.9678623676300049, + "loss_ce": 0.008633852005004883, + "loss_iou": 0.388671875, + "loss_num": 0.036865234375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 523138944, + "step": 7821 + }, + { + "epoch": 0.8876028368794326, + "grad_norm": 25.033103942871094, + "learning_rate": 5e-05, + "loss": 1.0064, + "num_input_tokens_seen": 523205568, + "step": 7822 + }, + { + "epoch": 0.8876028368794326, + "loss": 0.9070481061935425, + "loss_ce": 0.006413315422832966, + "loss_iou": 0.376953125, + "loss_num": 0.029296875, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 523205568, + "step": 7822 + }, + { + "epoch": 0.8877163120567376, + "grad_norm": 54.828773498535156, + "learning_rate": 5e-05, + "loss": 1.0967, + "num_input_tokens_seen": 523271200, + "step": 7823 + }, + { + "epoch": 0.8877163120567376, + "loss": 1.076500654220581, + "loss_ce": 0.0037467298097908497, + "loss_iou": 0.439453125, + "loss_num": 0.0390625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 523271200, + "step": 7823 + }, + { + "epoch": 0.8878297872340426, + "grad_norm": 40.51267623901367, + "learning_rate": 5e-05, + "loss": 1.4361, + "num_input_tokens_seen": 523338412, + "step": 7824 + }, + { + "epoch": 0.8878297872340426, + "loss": 1.4688305854797363, + "loss_ce": 0.008869742974638939, + "loss_iou": 0.578125, + "loss_num": 0.0595703125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 523338412, + "step": 7824 + }, + { + "epoch": 0.8879432624113475, + "grad_norm": 42.1322135925293, + "learning_rate": 5e-05, + "loss": 1.2803, + "num_input_tokens_seen": 523405448, + "step": 7825 + }, + { + "epoch": 0.8879432624113475, + "loss": 1.4933013916015625, + "loss_ce": 0.005996681749820709, + "loss_iou": 0.59375, + "loss_num": 0.059326171875, + "loss_xval": 1.484375, + "num_input_tokens_seen": 523405448, + "step": 7825 + }, + { + "epoch": 0.8880567375886524, + "grad_norm": 26.897480010986328, + "learning_rate": 5e-05, + "loss": 0.9753, + "num_input_tokens_seen": 523472108, + "step": 7826 + }, + { + "epoch": 0.8880567375886524, + "loss": 1.0434142351150513, + "loss_ce": 0.007525595370680094, + "loss_iou": 0.380859375, + "loss_num": 0.0546875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 523472108, + "step": 7826 + }, + { + "epoch": 0.8881702127659574, + "grad_norm": 22.96624755859375, + "learning_rate": 5e-05, + "loss": 1.1535, + "num_input_tokens_seen": 523538924, + "step": 7827 + }, + { + "epoch": 0.8881702127659574, + "loss": 1.2154217958450317, + "loss_ce": 0.006437448784708977, + "loss_iou": 0.490234375, + "loss_num": 0.046142578125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 523538924, + "step": 7827 + }, + { + "epoch": 0.8882836879432624, + "grad_norm": 31.597835540771484, + "learning_rate": 5e-05, + "loss": 1.0768, + "num_input_tokens_seen": 523606116, + "step": 7828 + }, + { + "epoch": 0.8882836879432624, + "loss": 1.0826040506362915, + "loss_ce": 0.0059438361786305904, + "loss_iou": 0.4453125, + "loss_num": 0.037109375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 523606116, + "step": 7828 + }, + { + "epoch": 0.8883971631205674, + "grad_norm": 27.062789916992188, + "learning_rate": 5e-05, + "loss": 1.406, + "num_input_tokens_seen": 523672888, + "step": 7829 + }, + { + "epoch": 0.8883971631205674, + "loss": 1.3964414596557617, + "loss_ce": 0.006793079897761345, + "loss_iou": 0.54296875, + "loss_num": 0.060546875, + "loss_xval": 1.390625, + "num_input_tokens_seen": 523672888, + "step": 7829 + }, + { + "epoch": 0.8885106382978724, + "grad_norm": 19.898927688598633, + "learning_rate": 5e-05, + "loss": 1.0592, + "num_input_tokens_seen": 523740540, + "step": 7830 + }, + { + "epoch": 0.8885106382978724, + "loss": 1.1518311500549316, + "loss_ce": 0.003881865181028843, + "loss_iou": 0.484375, + "loss_num": 0.0361328125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 523740540, + "step": 7830 + }, + { + "epoch": 0.8886241134751773, + "grad_norm": 26.599836349487305, + "learning_rate": 5e-05, + "loss": 1.3099, + "num_input_tokens_seen": 523806800, + "step": 7831 + }, + { + "epoch": 0.8886241134751773, + "loss": 1.1162238121032715, + "loss_ce": 0.00587227288633585, + "loss_iou": 0.4765625, + "loss_num": 0.03173828125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 523806800, + "step": 7831 + }, + { + "epoch": 0.8887375886524823, + "grad_norm": 51.10588455200195, + "learning_rate": 5e-05, + "loss": 1.3026, + "num_input_tokens_seen": 523872640, + "step": 7832 + }, + { + "epoch": 0.8887375886524823, + "loss": 1.477279782295227, + "loss_ce": 0.006576624698936939, + "loss_iou": 0.58984375, + "loss_num": 0.0576171875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 523872640, + "step": 7832 + }, + { + "epoch": 0.8888510638297872, + "grad_norm": 33.782161712646484, + "learning_rate": 5e-05, + "loss": 1.2142, + "num_input_tokens_seen": 523939812, + "step": 7833 + }, + { + "epoch": 0.8888510638297872, + "loss": 1.23951256275177, + "loss_ce": 0.006602376699447632, + "loss_iou": 0.5078125, + "loss_num": 0.04345703125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 523939812, + "step": 7833 + }, + { + "epoch": 0.8889645390070922, + "grad_norm": 45.622684478759766, + "learning_rate": 5e-05, + "loss": 1.2726, + "num_input_tokens_seen": 524007308, + "step": 7834 + }, + { + "epoch": 0.8889645390070922, + "loss": 1.3116791248321533, + "loss_ce": 0.007724017836153507, + "loss_iou": 0.45703125, + "loss_num": 0.078125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 524007308, + "step": 7834 + }, + { + "epoch": 0.8890780141843971, + "grad_norm": 36.37752151489258, + "learning_rate": 5e-05, + "loss": 1.4148, + "num_input_tokens_seen": 524073760, + "step": 7835 + }, + { + "epoch": 0.8890780141843971, + "loss": 1.4285211563110352, + "loss_ce": 0.010064136236906052, + "loss_iou": 0.5703125, + "loss_num": 0.055908203125, + "loss_xval": 1.421875, + "num_input_tokens_seen": 524073760, + "step": 7835 + }, + { + "epoch": 0.8891914893617021, + "grad_norm": 187.64886474609375, + "learning_rate": 5e-05, + "loss": 1.0402, + "num_input_tokens_seen": 524141412, + "step": 7836 + }, + { + "epoch": 0.8891914893617021, + "loss": 1.0252187252044678, + "loss_ce": 0.005687437020242214, + "loss_iou": 0.4296875, + "loss_num": 0.0322265625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 524141412, + "step": 7836 + }, + { + "epoch": 0.8893049645390071, + "grad_norm": 30.841670989990234, + "learning_rate": 5e-05, + "loss": 0.9877, + "num_input_tokens_seen": 524208616, + "step": 7837 + }, + { + "epoch": 0.8893049645390071, + "loss": 0.911248505115509, + "loss_ce": 0.004998495802283287, + "loss_iou": 0.375, + "loss_num": 0.031005859375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 524208616, + "step": 7837 + }, + { + "epoch": 0.8894184397163121, + "grad_norm": 22.86810302734375, + "learning_rate": 5e-05, + "loss": 1.1195, + "num_input_tokens_seen": 524275892, + "step": 7838 + }, + { + "epoch": 0.8894184397163121, + "loss": 1.2617461681365967, + "loss_ce": 0.008816417306661606, + "loss_iou": 0.48046875, + "loss_num": 0.058349609375, + "loss_xval": 1.25, + "num_input_tokens_seen": 524275892, + "step": 7838 + }, + { + "epoch": 0.889531914893617, + "grad_norm": 19.15053939819336, + "learning_rate": 5e-05, + "loss": 1.2788, + "num_input_tokens_seen": 524342736, + "step": 7839 + }, + { + "epoch": 0.889531914893617, + "loss": 1.2707277536392212, + "loss_ce": 0.005591010674834251, + "loss_iou": 0.48828125, + "loss_num": 0.05810546875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 524342736, + "step": 7839 + }, + { + "epoch": 0.889645390070922, + "grad_norm": 29.985218048095703, + "learning_rate": 5e-05, + "loss": 1.1899, + "num_input_tokens_seen": 524409824, + "step": 7840 + }, + { + "epoch": 0.889645390070922, + "loss": 1.325821876525879, + "loss_ce": 0.007218454964458942, + "loss_iou": 0.484375, + "loss_num": 0.0703125, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 524409824, + "step": 7840 + }, + { + "epoch": 0.889758865248227, + "grad_norm": 48.327903747558594, + "learning_rate": 5e-05, + "loss": 1.1565, + "num_input_tokens_seen": 524475800, + "step": 7841 + }, + { + "epoch": 0.889758865248227, + "loss": 0.917302131652832, + "loss_ce": 0.0022631133906543255, + "loss_iou": 0.373046875, + "loss_num": 0.03369140625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 524475800, + "step": 7841 + }, + { + "epoch": 0.8898723404255319, + "grad_norm": 34.231590270996094, + "learning_rate": 5e-05, + "loss": 1.0707, + "num_input_tokens_seen": 524542576, + "step": 7842 + }, + { + "epoch": 0.8898723404255319, + "loss": 1.226578950881958, + "loss_ce": 0.00685238279402256, + "loss_iou": 0.515625, + "loss_num": 0.03857421875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 524542576, + "step": 7842 + }, + { + "epoch": 0.8899858156028368, + "grad_norm": 24.551084518432617, + "learning_rate": 5e-05, + "loss": 1.3755, + "num_input_tokens_seen": 524609740, + "step": 7843 + }, + { + "epoch": 0.8899858156028368, + "loss": 1.2411433458328247, + "loss_ce": 0.006280006840825081, + "loss_iou": 0.50390625, + "loss_num": 0.046142578125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 524609740, + "step": 7843 + }, + { + "epoch": 0.8900992907801418, + "grad_norm": 31.73590087890625, + "learning_rate": 5e-05, + "loss": 1.2679, + "num_input_tokens_seen": 524677696, + "step": 7844 + }, + { + "epoch": 0.8900992907801418, + "loss": 1.3545678853988647, + "loss_ce": 0.00788819044828415, + "loss_iou": 0.5, + "loss_num": 0.06884765625, + "loss_xval": 1.34375, + "num_input_tokens_seen": 524677696, + "step": 7844 + }, + { + "epoch": 0.8902127659574468, + "grad_norm": 17.6614990234375, + "learning_rate": 5e-05, + "loss": 1.0356, + "num_input_tokens_seen": 524744228, + "step": 7845 + }, + { + "epoch": 0.8902127659574468, + "loss": 1.201249122619629, + "loss_ce": 0.005448316223919392, + "loss_iou": 0.474609375, + "loss_num": 0.049560546875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 524744228, + "step": 7845 + }, + { + "epoch": 0.8903262411347518, + "grad_norm": 25.00977897644043, + "learning_rate": 5e-05, + "loss": 1.0259, + "num_input_tokens_seen": 524810100, + "step": 7846 + }, + { + "epoch": 0.8903262411347518, + "loss": 1.2763757705688477, + "loss_ce": 0.010262422263622284, + "loss_iou": 0.48828125, + "loss_num": 0.0576171875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 524810100, + "step": 7846 + }, + { + "epoch": 0.8904397163120568, + "grad_norm": 31.624387741088867, + "learning_rate": 5e-05, + "loss": 1.1096, + "num_input_tokens_seen": 524876464, + "step": 7847 + }, + { + "epoch": 0.8904397163120568, + "loss": 1.297377586364746, + "loss_ce": 0.010268156416714191, + "loss_iou": 0.5078125, + "loss_num": 0.053466796875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 524876464, + "step": 7847 + }, + { + "epoch": 0.8905531914893617, + "grad_norm": 30.506187438964844, + "learning_rate": 5e-05, + "loss": 1.3262, + "num_input_tokens_seen": 524943352, + "step": 7848 + }, + { + "epoch": 0.8905531914893617, + "loss": 1.391268253326416, + "loss_ce": 0.006502596195787191, + "loss_iou": 0.59375, + "loss_num": 0.03955078125, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 524943352, + "step": 7848 + }, + { + "epoch": 0.8906666666666667, + "grad_norm": 33.26799392700195, + "learning_rate": 5e-05, + "loss": 1.4433, + "num_input_tokens_seen": 525009980, + "step": 7849 + }, + { + "epoch": 0.8906666666666667, + "loss": 1.2937676906585693, + "loss_ce": 0.0076348381116986275, + "loss_iou": 0.55859375, + "loss_num": 0.03466796875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 525009980, + "step": 7849 + }, + { + "epoch": 0.8907801418439716, + "grad_norm": 31.016292572021484, + "learning_rate": 5e-05, + "loss": 1.0607, + "num_input_tokens_seen": 525077708, + "step": 7850 + }, + { + "epoch": 0.8907801418439716, + "loss": 1.1358346939086914, + "loss_ce": 0.005951791536062956, + "loss_iou": 0.455078125, + "loss_num": 0.044189453125, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 525077708, + "step": 7850 + }, + { + "epoch": 0.8908936170212766, + "grad_norm": 41.63932418823242, + "learning_rate": 5e-05, + "loss": 1.3761, + "num_input_tokens_seen": 525144844, + "step": 7851 + }, + { + "epoch": 0.8908936170212766, + "loss": 1.265067219734192, + "loss_ce": 0.007743003778159618, + "loss_iou": 0.51953125, + "loss_num": 0.044189453125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 525144844, + "step": 7851 + }, + { + "epoch": 0.8910070921985815, + "grad_norm": 34.65161895751953, + "learning_rate": 5e-05, + "loss": 1.1555, + "num_input_tokens_seen": 525211520, + "step": 7852 + }, + { + "epoch": 0.8910070921985815, + "loss": 1.2823548316955566, + "loss_ce": 0.005987698212265968, + "loss_iou": 0.52734375, + "loss_num": 0.043701171875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 525211520, + "step": 7852 + }, + { + "epoch": 0.8911205673758865, + "grad_norm": 30.168861389160156, + "learning_rate": 5e-05, + "loss": 1.1467, + "num_input_tokens_seen": 525278524, + "step": 7853 + }, + { + "epoch": 0.8911205673758865, + "loss": 1.426568627357483, + "loss_ce": 0.008111622184515, + "loss_iou": 0.5625, + "loss_num": 0.059326171875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 525278524, + "step": 7853 + }, + { + "epoch": 0.8912340425531915, + "grad_norm": 190.78533935546875, + "learning_rate": 5e-05, + "loss": 1.1512, + "num_input_tokens_seen": 525345900, + "step": 7854 + }, + { + "epoch": 0.8912340425531915, + "loss": 1.1403611898422241, + "loss_ce": 0.0065721385180950165, + "loss_iou": 0.4765625, + "loss_num": 0.03662109375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 525345900, + "step": 7854 + }, + { + "epoch": 0.8913475177304965, + "grad_norm": 19.87389373779297, + "learning_rate": 5e-05, + "loss": 1.2781, + "num_input_tokens_seen": 525411608, + "step": 7855 + }, + { + "epoch": 0.8913475177304965, + "loss": 1.1504552364349365, + "loss_ce": 0.004398162476718426, + "loss_iou": 0.447265625, + "loss_num": 0.05029296875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 525411608, + "step": 7855 + }, + { + "epoch": 0.8914609929078015, + "grad_norm": 19.82358169555664, + "learning_rate": 5e-05, + "loss": 1.0633, + "num_input_tokens_seen": 525478040, + "step": 7856 + }, + { + "epoch": 0.8914609929078015, + "loss": 1.0671695470809937, + "loss_ce": 0.007110942155122757, + "loss_iou": 0.34375, + "loss_num": 0.07421875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 525478040, + "step": 7856 + }, + { + "epoch": 0.8915744680851064, + "grad_norm": 22.270448684692383, + "learning_rate": 5e-05, + "loss": 1.2258, + "num_input_tokens_seen": 525544912, + "step": 7857 + }, + { + "epoch": 0.8915744680851064, + "loss": 1.2022106647491455, + "loss_ce": 0.009583689272403717, + "loss_iou": 0.443359375, + "loss_num": 0.061279296875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 525544912, + "step": 7857 + }, + { + "epoch": 0.8916879432624113, + "grad_norm": 21.582000732421875, + "learning_rate": 5e-05, + "loss": 1.1962, + "num_input_tokens_seen": 525612316, + "step": 7858 + }, + { + "epoch": 0.8916879432624113, + "loss": 1.002703309059143, + "loss_ce": 0.0027033169753849506, + "loss_iou": 0.419921875, + "loss_num": 0.031982421875, + "loss_xval": 1.0, + "num_input_tokens_seen": 525612316, + "step": 7858 + }, + { + "epoch": 0.8918014184397163, + "grad_norm": 13.527247428894043, + "learning_rate": 5e-05, + "loss": 1.1503, + "num_input_tokens_seen": 525679124, + "step": 7859 + }, + { + "epoch": 0.8918014184397163, + "loss": 0.9109662771224976, + "loss_ce": 0.009110821411013603, + "loss_iou": 0.388671875, + "loss_num": 0.0250244140625, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 525679124, + "step": 7859 + }, + { + "epoch": 0.8919148936170213, + "grad_norm": 21.850757598876953, + "learning_rate": 5e-05, + "loss": 1.2198, + "num_input_tokens_seen": 525745580, + "step": 7860 + }, + { + "epoch": 0.8919148936170213, + "loss": 1.2256550788879395, + "loss_ce": 0.007393233012408018, + "loss_iou": 0.51953125, + "loss_num": 0.0361328125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 525745580, + "step": 7860 + }, + { + "epoch": 0.8920283687943262, + "grad_norm": 30.27219009399414, + "learning_rate": 5e-05, + "loss": 1.1589, + "num_input_tokens_seen": 525812428, + "step": 7861 + }, + { + "epoch": 0.8920283687943262, + "loss": 1.0398669242858887, + "loss_ce": 0.005198925733566284, + "loss_iou": 0.416015625, + "loss_num": 0.040771484375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 525812428, + "step": 7861 + }, + { + "epoch": 0.8921418439716312, + "grad_norm": 26.861080169677734, + "learning_rate": 5e-05, + "loss": 1.1378, + "num_input_tokens_seen": 525880072, + "step": 7862 + }, + { + "epoch": 0.8921418439716312, + "loss": 1.1881089210510254, + "loss_ce": 0.003050350584089756, + "loss_iou": 0.498046875, + "loss_num": 0.0380859375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 525880072, + "step": 7862 + }, + { + "epoch": 0.8922553191489362, + "grad_norm": 21.589265823364258, + "learning_rate": 5e-05, + "loss": 0.97, + "num_input_tokens_seen": 525946996, + "step": 7863 + }, + { + "epoch": 0.8922553191489362, + "loss": 1.0758787393569946, + "loss_ce": 0.006054524332284927, + "loss_iou": 0.421875, + "loss_num": 0.04541015625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 525946996, + "step": 7863 + }, + { + "epoch": 0.8923687943262412, + "grad_norm": 31.617002487182617, + "learning_rate": 5e-05, + "loss": 1.1562, + "num_input_tokens_seen": 526013316, + "step": 7864 + }, + { + "epoch": 0.8923687943262412, + "loss": 0.9266256093978882, + "loss_ce": 0.0034077907912433147, + "loss_iou": 0.38671875, + "loss_num": 0.02978515625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 526013316, + "step": 7864 + }, + { + "epoch": 0.8924822695035461, + "grad_norm": 27.18924331665039, + "learning_rate": 5e-05, + "loss": 1.1375, + "num_input_tokens_seen": 526079460, + "step": 7865 + }, + { + "epoch": 0.8924822695035461, + "loss": 1.0496773719787598, + "loss_ce": 0.006220357492566109, + "loss_iou": 0.41796875, + "loss_num": 0.041748046875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 526079460, + "step": 7865 + }, + { + "epoch": 0.892595744680851, + "grad_norm": 27.91826820373535, + "learning_rate": 5e-05, + "loss": 1.1224, + "num_input_tokens_seen": 526145568, + "step": 7866 + }, + { + "epoch": 0.892595744680851, + "loss": 1.1927251815795898, + "loss_ce": 0.007178369909524918, + "loss_iou": 0.48046875, + "loss_num": 0.044921875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 526145568, + "step": 7866 + }, + { + "epoch": 0.892709219858156, + "grad_norm": 23.26554298400879, + "learning_rate": 5e-05, + "loss": 1.2001, + "num_input_tokens_seen": 526213044, + "step": 7867 + }, + { + "epoch": 0.892709219858156, + "loss": 1.2146222591400146, + "loss_ce": 0.00905580259859562, + "loss_iou": 0.48828125, + "loss_num": 0.04638671875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 526213044, + "step": 7867 + }, + { + "epoch": 0.892822695035461, + "grad_norm": 30.904769897460938, + "learning_rate": 5e-05, + "loss": 1.1015, + "num_input_tokens_seen": 526280124, + "step": 7868 + }, + { + "epoch": 0.892822695035461, + "loss": 1.144993543624878, + "loss_ce": 0.0053450264967978, + "loss_iou": 0.470703125, + "loss_num": 0.039794921875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 526280124, + "step": 7868 + }, + { + "epoch": 0.892936170212766, + "grad_norm": 44.48162841796875, + "learning_rate": 5e-05, + "loss": 1.2093, + "num_input_tokens_seen": 526347100, + "step": 7869 + }, + { + "epoch": 0.892936170212766, + "loss": 1.4072623252868652, + "loss_ce": 0.005895067472010851, + "loss_iou": 0.5546875, + "loss_num": 0.05859375, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 526347100, + "step": 7869 + }, + { + "epoch": 0.8930496453900709, + "grad_norm": 31.994064331054688, + "learning_rate": 5e-05, + "loss": 1.0982, + "num_input_tokens_seen": 526414180, + "step": 7870 + }, + { + "epoch": 0.8930496453900709, + "loss": 1.135132074356079, + "loss_ce": 0.006225725635886192, + "loss_iou": 0.484375, + "loss_num": 0.0322265625, + "loss_xval": 1.125, + "num_input_tokens_seen": 526414180, + "step": 7870 + }, + { + "epoch": 0.8931631205673759, + "grad_norm": 22.85639762878418, + "learning_rate": 5e-05, + "loss": 1.3582, + "num_input_tokens_seen": 526481160, + "step": 7871 + }, + { + "epoch": 0.8931631205673759, + "loss": 1.2274373769760132, + "loss_ce": 0.003804525826126337, + "loss_iou": 0.466796875, + "loss_num": 0.05859375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 526481160, + "step": 7871 + }, + { + "epoch": 0.8932765957446809, + "grad_norm": 21.62712860107422, + "learning_rate": 5e-05, + "loss": 1.0612, + "num_input_tokens_seen": 526548200, + "step": 7872 + }, + { + "epoch": 0.8932765957446809, + "loss": 1.2178269624710083, + "loss_ce": 0.0029832376167178154, + "loss_iou": 0.5078125, + "loss_num": 0.04052734375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 526548200, + "step": 7872 + }, + { + "epoch": 0.8933900709219859, + "grad_norm": 35.561336517333984, + "learning_rate": 5e-05, + "loss": 1.2851, + "num_input_tokens_seen": 526615100, + "step": 7873 + }, + { + "epoch": 0.8933900709219859, + "loss": 1.2897634506225586, + "loss_ce": 0.007536754943430424, + "loss_iou": 0.515625, + "loss_num": 0.049560546875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 526615100, + "step": 7873 + }, + { + "epoch": 0.8935035460992908, + "grad_norm": 126.19566345214844, + "learning_rate": 5e-05, + "loss": 0.9902, + "num_input_tokens_seen": 526681624, + "step": 7874 + }, + { + "epoch": 0.8935035460992908, + "loss": 0.9146341681480408, + "loss_ce": 0.002524800132960081, + "loss_iou": 0.39453125, + "loss_num": 0.0245361328125, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 526681624, + "step": 7874 + }, + { + "epoch": 0.8936170212765957, + "grad_norm": 55.99440002441406, + "learning_rate": 5e-05, + "loss": 1.2925, + "num_input_tokens_seen": 526748728, + "step": 7875 + }, + { + "epoch": 0.8936170212765957, + "loss": 1.298125982284546, + "loss_ce": 0.012481443583965302, + "loss_iou": 0.53125, + "loss_num": 0.044189453125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 526748728, + "step": 7875 + }, + { + "epoch": 0.8937304964539007, + "grad_norm": 23.479251861572266, + "learning_rate": 5e-05, + "loss": 1.0474, + "num_input_tokens_seen": 526815028, + "step": 7876 + }, + { + "epoch": 0.8937304964539007, + "loss": 1.1597228050231934, + "loss_ce": 0.008355647325515747, + "loss_iou": 0.484375, + "loss_num": 0.03662109375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 526815028, + "step": 7876 + }, + { + "epoch": 0.8938439716312057, + "grad_norm": 28.768741607666016, + "learning_rate": 5e-05, + "loss": 1.2914, + "num_input_tokens_seen": 526881808, + "step": 7877 + }, + { + "epoch": 0.8938439716312057, + "loss": 1.261812686920166, + "loss_ce": 0.008883025497198105, + "loss_iou": 0.4921875, + "loss_num": 0.0537109375, + "loss_xval": 1.25, + "num_input_tokens_seen": 526881808, + "step": 7877 + }, + { + "epoch": 0.8939574468085106, + "grad_norm": 42.1556510925293, + "learning_rate": 5e-05, + "loss": 1.2889, + "num_input_tokens_seen": 526948788, + "step": 7878 + }, + { + "epoch": 0.8939574468085106, + "loss": 1.3460259437561035, + "loss_ce": 0.002764226868748665, + "loss_iou": 0.55078125, + "loss_num": 0.04833984375, + "loss_xval": 1.34375, + "num_input_tokens_seen": 526948788, + "step": 7878 + }, + { + "epoch": 0.8940709219858156, + "grad_norm": 30.535398483276367, + "learning_rate": 5e-05, + "loss": 1.2757, + "num_input_tokens_seen": 527015496, + "step": 7879 + }, + { + "epoch": 0.8940709219858156, + "loss": 1.322379469871521, + "loss_ce": 0.010856013745069504, + "loss_iou": 0.56640625, + "loss_num": 0.036376953125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 527015496, + "step": 7879 + }, + { + "epoch": 0.8941843971631206, + "grad_norm": 20.537017822265625, + "learning_rate": 5e-05, + "loss": 1.4215, + "num_input_tokens_seen": 527082568, + "step": 7880 + }, + { + "epoch": 0.8941843971631206, + "loss": 1.2639656066894531, + "loss_ce": 0.007129664998501539, + "loss_iou": 0.5, + "loss_num": 0.051025390625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 527082568, + "step": 7880 + }, + { + "epoch": 0.8942978723404256, + "grad_norm": 20.381235122680664, + "learning_rate": 5e-05, + "loss": 0.924, + "num_input_tokens_seen": 527149108, + "step": 7881 + }, + { + "epoch": 0.8942978723404256, + "loss": 0.9921355247497559, + "loss_ce": 0.006539795082062483, + "loss_iou": 0.408203125, + "loss_num": 0.033447265625, + "loss_xval": 0.984375, + "num_input_tokens_seen": 527149108, + "step": 7881 + }, + { + "epoch": 0.8944113475177305, + "grad_norm": 22.113611221313477, + "learning_rate": 5e-05, + "loss": 1.1318, + "num_input_tokens_seen": 527216940, + "step": 7882 + }, + { + "epoch": 0.8944113475177305, + "loss": 1.2334388494491577, + "loss_ce": 0.004923169035464525, + "loss_iou": 0.5078125, + "loss_num": 0.042724609375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 527216940, + "step": 7882 + }, + { + "epoch": 0.8945248226950354, + "grad_norm": 30.717533111572266, + "learning_rate": 5e-05, + "loss": 1.0676, + "num_input_tokens_seen": 527283268, + "step": 7883 + }, + { + "epoch": 0.8945248226950354, + "loss": 1.1537824869155884, + "loss_ce": 0.0051008304581046104, + "loss_iou": 0.49609375, + "loss_num": 0.031494140625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 527283268, + "step": 7883 + }, + { + "epoch": 0.8946382978723404, + "grad_norm": 35.70387649536133, + "learning_rate": 5e-05, + "loss": 1.2459, + "num_input_tokens_seen": 527349620, + "step": 7884 + }, + { + "epoch": 0.8946382978723404, + "loss": 1.247149109840393, + "loss_ce": 0.005694056395441294, + "loss_iou": 0.53515625, + "loss_num": 0.034912109375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 527349620, + "step": 7884 + }, + { + "epoch": 0.8947517730496454, + "grad_norm": 29.475479125976562, + "learning_rate": 5e-05, + "loss": 1.0897, + "num_input_tokens_seen": 527416080, + "step": 7885 + }, + { + "epoch": 0.8947517730496454, + "loss": 0.9081581830978394, + "loss_ce": 0.005265139043331146, + "loss_iou": 0.376953125, + "loss_num": 0.0296630859375, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 527416080, + "step": 7885 + }, + { + "epoch": 0.8948652482269503, + "grad_norm": 23.50242042541504, + "learning_rate": 5e-05, + "loss": 1.2191, + "num_input_tokens_seen": 527484056, + "step": 7886 + }, + { + "epoch": 0.8948652482269503, + "loss": 1.2583754062652588, + "loss_ce": 0.0015394880902022123, + "loss_iou": 0.4921875, + "loss_num": 0.054931640625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 527484056, + "step": 7886 + }, + { + "epoch": 0.8949787234042553, + "grad_norm": 33.193450927734375, + "learning_rate": 5e-05, + "loss": 1.2873, + "num_input_tokens_seen": 527550740, + "step": 7887 + }, + { + "epoch": 0.8949787234042553, + "loss": 1.4403738975524902, + "loss_ce": 0.007756695616990328, + "loss_iou": 0.5546875, + "loss_num": 0.06494140625, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 527550740, + "step": 7887 + }, + { + "epoch": 0.8950921985815603, + "grad_norm": 29.018402099609375, + "learning_rate": 5e-05, + "loss": 1.3212, + "num_input_tokens_seen": 527617480, + "step": 7888 + }, + { + "epoch": 0.8950921985815603, + "loss": 1.2495734691619873, + "loss_ce": 0.008286281488835812, + "loss_iou": 0.484375, + "loss_num": 0.054443359375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 527617480, + "step": 7888 + }, + { + "epoch": 0.8952056737588653, + "grad_norm": 37.548946380615234, + "learning_rate": 5e-05, + "loss": 1.1654, + "num_input_tokens_seen": 527684652, + "step": 7889 + }, + { + "epoch": 0.8952056737588653, + "loss": 1.167483925819397, + "loss_ce": 0.00830415915697813, + "loss_iou": 0.4765625, + "loss_num": 0.041259765625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 527684652, + "step": 7889 + }, + { + "epoch": 0.8953191489361703, + "grad_norm": 24.818645477294922, + "learning_rate": 5e-05, + "loss": 1.6132, + "num_input_tokens_seen": 527752636, + "step": 7890 + }, + { + "epoch": 0.8953191489361703, + "loss": 1.6368680000305176, + "loss_ce": 0.007473482750356197, + "loss_iou": 0.6328125, + "loss_num": 0.0732421875, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 527752636, + "step": 7890 + }, + { + "epoch": 0.8954326241134751, + "grad_norm": 25.52562713623047, + "learning_rate": 5e-05, + "loss": 1.1633, + "num_input_tokens_seen": 527820628, + "step": 7891 + }, + { + "epoch": 0.8954326241134751, + "loss": 1.2963496446609497, + "loss_ce": 0.008751949295401573, + "loss_iou": 0.53515625, + "loss_num": 0.04248046875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 527820628, + "step": 7891 + }, + { + "epoch": 0.8955460992907801, + "grad_norm": 48.36626434326172, + "learning_rate": 5e-05, + "loss": 1.1358, + "num_input_tokens_seen": 527887836, + "step": 7892 + }, + { + "epoch": 0.8955460992907801, + "loss": 1.2040350437164307, + "loss_ce": 0.0062811486423015594, + "loss_iou": 0.482421875, + "loss_num": 0.046875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 527887836, + "step": 7892 + }, + { + "epoch": 0.8956595744680851, + "grad_norm": 35.40951156616211, + "learning_rate": 5e-05, + "loss": 1.124, + "num_input_tokens_seen": 527954536, + "step": 7893 + }, + { + "epoch": 0.8956595744680851, + "loss": 0.9777355194091797, + "loss_ce": 0.004346862435340881, + "loss_iou": 0.37109375, + "loss_num": 0.0458984375, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 527954536, + "step": 7893 + }, + { + "epoch": 0.8957730496453901, + "grad_norm": 46.95203399658203, + "learning_rate": 5e-05, + "loss": 1.1452, + "num_input_tokens_seen": 528021912, + "step": 7894 + }, + { + "epoch": 0.8957730496453901, + "loss": 1.0579140186309814, + "loss_ce": 0.0027383388951420784, + "loss_iou": 0.431640625, + "loss_num": 0.03857421875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 528021912, + "step": 7894 + }, + { + "epoch": 0.895886524822695, + "grad_norm": 37.6141357421875, + "learning_rate": 5e-05, + "loss": 1.2001, + "num_input_tokens_seen": 528088700, + "step": 7895 + }, + { + "epoch": 0.895886524822695, + "loss": 1.235693097114563, + "loss_ce": 0.00375955062918365, + "loss_iou": 0.515625, + "loss_num": 0.0400390625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 528088700, + "step": 7895 + }, + { + "epoch": 0.896, + "grad_norm": 28.491140365600586, + "learning_rate": 5e-05, + "loss": 1.0537, + "num_input_tokens_seen": 528155116, + "step": 7896 + }, + { + "epoch": 0.896, + "loss": 1.119389533996582, + "loss_ce": 0.006352345924824476, + "loss_iou": 0.48046875, + "loss_num": 0.030517578125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 528155116, + "step": 7896 + }, + { + "epoch": 0.896113475177305, + "grad_norm": 24.85100555419922, + "learning_rate": 5e-05, + "loss": 1.0565, + "num_input_tokens_seen": 528221596, + "step": 7897 + }, + { + "epoch": 0.896113475177305, + "loss": 1.1207709312438965, + "loss_ce": 0.006024925969541073, + "loss_iou": 0.474609375, + "loss_num": 0.032958984375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 528221596, + "step": 7897 + }, + { + "epoch": 0.89622695035461, + "grad_norm": 27.116579055786133, + "learning_rate": 5e-05, + "loss": 1.0822, + "num_input_tokens_seen": 528288048, + "step": 7898 + }, + { + "epoch": 0.89622695035461, + "loss": 1.0504953861236572, + "loss_ce": 0.004108612425625324, + "loss_iou": 0.427734375, + "loss_num": 0.037841796875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 528288048, + "step": 7898 + }, + { + "epoch": 0.8963404255319148, + "grad_norm": 24.547367095947266, + "learning_rate": 5e-05, + "loss": 1.1253, + "num_input_tokens_seen": 528354316, + "step": 7899 + }, + { + "epoch": 0.8963404255319148, + "loss": 1.1191439628601074, + "loss_ce": 0.004397938493639231, + "loss_iou": 0.447265625, + "loss_num": 0.044189453125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 528354316, + "step": 7899 + }, + { + "epoch": 0.8964539007092198, + "grad_norm": 24.893890380859375, + "learning_rate": 5e-05, + "loss": 1.1017, + "num_input_tokens_seen": 528421040, + "step": 7900 + }, + { + "epoch": 0.8964539007092198, + "loss": 1.0717415809631348, + "loss_ce": 0.005823597311973572, + "loss_iou": 0.43359375, + "loss_num": 0.0400390625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 528421040, + "step": 7900 + }, + { + "epoch": 0.8965673758865248, + "grad_norm": 43.00497817993164, + "learning_rate": 5e-05, + "loss": 1.0061, + "num_input_tokens_seen": 528487148, + "step": 7901 + }, + { + "epoch": 0.8965673758865248, + "loss": 1.1307592391967773, + "loss_ce": 0.003806165885180235, + "loss_iou": 0.435546875, + "loss_num": 0.050537109375, + "loss_xval": 1.125, + "num_input_tokens_seen": 528487148, + "step": 7901 + }, + { + "epoch": 0.8966808510638298, + "grad_norm": 21.096330642700195, + "learning_rate": 5e-05, + "loss": 1.143, + "num_input_tokens_seen": 528554820, + "step": 7902 + }, + { + "epoch": 0.8966808510638298, + "loss": 0.9541460275650024, + "loss_ce": 0.004438955336809158, + "loss_iou": 0.4296875, + "loss_num": 0.018310546875, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 528554820, + "step": 7902 + }, + { + "epoch": 0.8967943262411348, + "grad_norm": 28.134490966796875, + "learning_rate": 5e-05, + "loss": 1.1978, + "num_input_tokens_seen": 528621304, + "step": 7903 + }, + { + "epoch": 0.8967943262411348, + "loss": 1.1572158336639404, + "loss_ce": 0.010243233293294907, + "loss_iou": 0.4609375, + "loss_num": 0.044677734375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 528621304, + "step": 7903 + }, + { + "epoch": 0.8969078014184397, + "grad_norm": 40.24374771118164, + "learning_rate": 5e-05, + "loss": 1.3719, + "num_input_tokens_seen": 528687516, + "step": 7904 + }, + { + "epoch": 0.8969078014184397, + "loss": 1.2504189014434814, + "loss_ce": 0.005301821045577526, + "loss_iou": 0.5234375, + "loss_num": 0.039306640625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 528687516, + "step": 7904 + }, + { + "epoch": 0.8970212765957447, + "grad_norm": 28.874950408935547, + "learning_rate": 5e-05, + "loss": 1.2954, + "num_input_tokens_seen": 528754892, + "step": 7905 + }, + { + "epoch": 0.8970212765957447, + "loss": 1.3465783596038818, + "loss_ce": 0.005758095066994429, + "loss_iou": 0.5703125, + "loss_num": 0.040283203125, + "loss_xval": 1.34375, + "num_input_tokens_seen": 528754892, + "step": 7905 + }, + { + "epoch": 0.8971347517730497, + "grad_norm": 28.17296600341797, + "learning_rate": 5e-05, + "loss": 1.1225, + "num_input_tokens_seen": 528822152, + "step": 7906 + }, + { + "epoch": 0.8971347517730497, + "loss": 1.0396738052368164, + "loss_ce": 0.008667876943945885, + "loss_iou": 0.40625, + "loss_num": 0.04345703125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 528822152, + "step": 7906 + }, + { + "epoch": 0.8972482269503546, + "grad_norm": 32.36654281616211, + "learning_rate": 5e-05, + "loss": 1.0683, + "num_input_tokens_seen": 528888752, + "step": 7907 + }, + { + "epoch": 0.8972482269503546, + "loss": 1.1425949335098267, + "loss_ce": 0.0038009837735444307, + "loss_iou": 0.51171875, + "loss_num": 0.0234375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 528888752, + "step": 7907 + }, + { + "epoch": 0.8973617021276595, + "grad_norm": 30.693105697631836, + "learning_rate": 5e-05, + "loss": 1.1297, + "num_input_tokens_seen": 528955936, + "step": 7908 + }, + { + "epoch": 0.8973617021276595, + "loss": 1.1465903520584106, + "loss_ce": 0.005965340416878462, + "loss_iou": 0.443359375, + "loss_num": 0.05078125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 528955936, + "step": 7908 + }, + { + "epoch": 0.8974751773049645, + "grad_norm": 32.01324462890625, + "learning_rate": 5e-05, + "loss": 1.1594, + "num_input_tokens_seen": 529022196, + "step": 7909 + }, + { + "epoch": 0.8974751773049645, + "loss": 1.3333172798156738, + "loss_ce": 0.01202816516160965, + "loss_iou": 0.52734375, + "loss_num": 0.052978515625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 529022196, + "step": 7909 + }, + { + "epoch": 0.8975886524822695, + "grad_norm": 46.231353759765625, + "learning_rate": 5e-05, + "loss": 1.4421, + "num_input_tokens_seen": 529088928, + "step": 7910 + }, + { + "epoch": 0.8975886524822695, + "loss": 1.376652479171753, + "loss_ce": 0.004093986004590988, + "loss_iou": 0.5390625, + "loss_num": 0.05859375, + "loss_xval": 1.375, + "num_input_tokens_seen": 529088928, + "step": 7910 + }, + { + "epoch": 0.8977021276595745, + "grad_norm": 31.139528274536133, + "learning_rate": 5e-05, + "loss": 1.2275, + "num_input_tokens_seen": 529156924, + "step": 7911 + }, + { + "epoch": 0.8977021276595745, + "loss": 1.2562791109085083, + "loss_ce": 0.004326047375798225, + "loss_iou": 0.54296875, + "loss_num": 0.032470703125, + "loss_xval": 1.25, + "num_input_tokens_seen": 529156924, + "step": 7911 + }, + { + "epoch": 0.8978156028368794, + "grad_norm": 16.65692901611328, + "learning_rate": 5e-05, + "loss": 1.1376, + "num_input_tokens_seen": 529223140, + "step": 7912 + }, + { + "epoch": 0.8978156028368794, + "loss": 1.0791364908218384, + "loss_ce": 0.006382574327290058, + "loss_iou": 0.447265625, + "loss_num": 0.035400390625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 529223140, + "step": 7912 + }, + { + "epoch": 0.8979290780141844, + "grad_norm": 9.369155883789062, + "learning_rate": 5e-05, + "loss": 1.1017, + "num_input_tokens_seen": 529290204, + "step": 7913 + }, + { + "epoch": 0.8979290780141844, + "loss": 1.0127745866775513, + "loss_ce": 0.004473796579986811, + "loss_iou": 0.369140625, + "loss_num": 0.053466796875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 529290204, + "step": 7913 + }, + { + "epoch": 0.8980425531914894, + "grad_norm": 22.039260864257812, + "learning_rate": 5e-05, + "loss": 1.2191, + "num_input_tokens_seen": 529357840, + "step": 7914 + }, + { + "epoch": 0.8980425531914894, + "loss": 1.1723740100860596, + "loss_ce": 0.00879967212677002, + "loss_iou": 0.435546875, + "loss_num": 0.058349609375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 529357840, + "step": 7914 + }, + { + "epoch": 0.8981560283687944, + "grad_norm": 28.3402042388916, + "learning_rate": 5e-05, + "loss": 1.1539, + "num_input_tokens_seen": 529424600, + "step": 7915 + }, + { + "epoch": 0.8981560283687944, + "loss": 1.4704790115356445, + "loss_ce": 0.0036821053363382816, + "loss_iou": 0.56640625, + "loss_num": 0.06591796875, + "loss_xval": 1.46875, + "num_input_tokens_seen": 529424600, + "step": 7915 + }, + { + "epoch": 0.8982695035460992, + "grad_norm": 39.734344482421875, + "learning_rate": 5e-05, + "loss": 1.1045, + "num_input_tokens_seen": 529490704, + "step": 7916 + }, + { + "epoch": 0.8982695035460992, + "loss": 0.9758509397506714, + "loss_ce": 0.00758922565728426, + "loss_iou": 0.42578125, + "loss_num": 0.0238037109375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 529490704, + "step": 7916 + }, + { + "epoch": 0.8983829787234042, + "grad_norm": 31.4080753326416, + "learning_rate": 5e-05, + "loss": 1.2365, + "num_input_tokens_seen": 529557896, + "step": 7917 + }, + { + "epoch": 0.8983829787234042, + "loss": 1.2112311124801636, + "loss_ce": 0.005176459904760122, + "loss_iou": 0.494140625, + "loss_num": 0.04345703125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 529557896, + "step": 7917 + }, + { + "epoch": 0.8984964539007092, + "grad_norm": 47.46647262573242, + "learning_rate": 5e-05, + "loss": 1.1101, + "num_input_tokens_seen": 529625400, + "step": 7918 + }, + { + "epoch": 0.8984964539007092, + "loss": 0.9722484946250916, + "loss_ce": 0.009846189059317112, + "loss_iou": 0.41015625, + "loss_num": 0.028076171875, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 529625400, + "step": 7918 + }, + { + "epoch": 0.8986099290780142, + "grad_norm": 72.3985824584961, + "learning_rate": 5e-05, + "loss": 1.1179, + "num_input_tokens_seen": 529692156, + "step": 7919 + }, + { + "epoch": 0.8986099290780142, + "loss": 1.198542594909668, + "loss_ce": 0.009089536033570766, + "loss_iou": 0.47265625, + "loss_num": 0.049072265625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 529692156, + "step": 7919 + }, + { + "epoch": 0.8987234042553192, + "grad_norm": 20.930702209472656, + "learning_rate": 5e-05, + "loss": 0.9435, + "num_input_tokens_seen": 529759240, + "step": 7920 + }, + { + "epoch": 0.8987234042553192, + "loss": 1.0426499843597412, + "loss_ce": 0.009858839213848114, + "loss_iou": 0.41796875, + "loss_num": 0.039306640625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 529759240, + "step": 7920 + }, + { + "epoch": 0.8988368794326241, + "grad_norm": 27.471376419067383, + "learning_rate": 5e-05, + "loss": 1.112, + "num_input_tokens_seen": 529826776, + "step": 7921 + }, + { + "epoch": 0.8988368794326241, + "loss": 1.143112063407898, + "loss_ce": 0.00395191740244627, + "loss_iou": 0.4765625, + "loss_num": 0.037109375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 529826776, + "step": 7921 + }, + { + "epoch": 0.8989503546099291, + "grad_norm": 48.14865493774414, + "learning_rate": 5e-05, + "loss": 1.2153, + "num_input_tokens_seen": 529893544, + "step": 7922 + }, + { + "epoch": 0.8989503546099291, + "loss": 1.1581884622573853, + "loss_ce": 0.005844730418175459, + "loss_iou": 0.4375, + "loss_num": 0.0556640625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 529893544, + "step": 7922 + }, + { + "epoch": 0.8990638297872341, + "grad_norm": 23.639341354370117, + "learning_rate": 5e-05, + "loss": 1.1155, + "num_input_tokens_seen": 529960996, + "step": 7923 + }, + { + "epoch": 0.8990638297872341, + "loss": 1.0741636753082275, + "loss_ce": 0.007757483050227165, + "loss_iou": 0.40234375, + "loss_num": 0.052490234375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 529960996, + "step": 7923 + }, + { + "epoch": 0.899177304964539, + "grad_norm": 26.579315185546875, + "learning_rate": 5e-05, + "loss": 1.4251, + "num_input_tokens_seen": 530028516, + "step": 7924 + }, + { + "epoch": 0.899177304964539, + "loss": 1.292119026184082, + "loss_ce": 0.006962677463889122, + "loss_iou": 0.49609375, + "loss_num": 0.058349609375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 530028516, + "step": 7924 + }, + { + "epoch": 0.8992907801418439, + "grad_norm": 35.726654052734375, + "learning_rate": 5e-05, + "loss": 1.205, + "num_input_tokens_seen": 530095568, + "step": 7925 + }, + { + "epoch": 0.8992907801418439, + "loss": 1.3648719787597656, + "loss_ce": 0.0113564133644104, + "loss_iou": 0.52734375, + "loss_num": 0.05908203125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 530095568, + "step": 7925 + }, + { + "epoch": 0.8994042553191489, + "grad_norm": 45.926849365234375, + "learning_rate": 5e-05, + "loss": 1.4103, + "num_input_tokens_seen": 530162200, + "step": 7926 + }, + { + "epoch": 0.8994042553191489, + "loss": 1.4325977563858032, + "loss_ce": 0.006816535256803036, + "loss_iou": 0.5703125, + "loss_num": 0.057373046875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 530162200, + "step": 7926 + }, + { + "epoch": 0.8995177304964539, + "grad_norm": 40.35443878173828, + "learning_rate": 5e-05, + "loss": 1.2164, + "num_input_tokens_seen": 530228592, + "step": 7927 + }, + { + "epoch": 0.8995177304964539, + "loss": 1.3788995742797852, + "loss_ce": 0.007317568641155958, + "loss_iou": 0.546875, + "loss_num": 0.054931640625, + "loss_xval": 1.375, + "num_input_tokens_seen": 530228592, + "step": 7927 + }, + { + "epoch": 0.8996312056737589, + "grad_norm": 35.50190353393555, + "learning_rate": 5e-05, + "loss": 1.2727, + "num_input_tokens_seen": 530295212, + "step": 7928 + }, + { + "epoch": 0.8996312056737589, + "loss": 1.3427565097808838, + "loss_ce": 0.007795482873916626, + "loss_iou": 0.55078125, + "loss_num": 0.046142578125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 530295212, + "step": 7928 + }, + { + "epoch": 0.8997446808510638, + "grad_norm": 20.161346435546875, + "learning_rate": 5e-05, + "loss": 1.0952, + "num_input_tokens_seen": 530363248, + "step": 7929 + }, + { + "epoch": 0.8997446808510638, + "loss": 1.1138839721679688, + "loss_ce": 0.004753223620355129, + "loss_iou": 0.431640625, + "loss_num": 0.0498046875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 530363248, + "step": 7929 + }, + { + "epoch": 0.8998581560283688, + "grad_norm": 23.140037536621094, + "learning_rate": 5e-05, + "loss": 1.0522, + "num_input_tokens_seen": 530428848, + "step": 7930 + }, + { + "epoch": 0.8998581560283688, + "loss": 1.0125784873962402, + "loss_ce": 0.004033499397337437, + "loss_iou": 0.423828125, + "loss_num": 0.032470703125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 530428848, + "step": 7930 + }, + { + "epoch": 0.8999716312056738, + "grad_norm": 24.305740356445312, + "learning_rate": 5e-05, + "loss": 1.1418, + "num_input_tokens_seen": 530496076, + "step": 7931 + }, + { + "epoch": 0.8999716312056738, + "loss": 0.98154217004776, + "loss_ce": 0.004247195553034544, + "loss_iou": 0.4375, + "loss_num": 0.0206298828125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 530496076, + "step": 7931 + }, + { + "epoch": 0.9000851063829787, + "grad_norm": 25.826610565185547, + "learning_rate": 5e-05, + "loss": 1.156, + "num_input_tokens_seen": 530562968, + "step": 7932 + }, + { + "epoch": 0.9000851063829787, + "loss": 1.1627342700958252, + "loss_ce": 0.005507700145244598, + "loss_iou": 0.48046875, + "loss_num": 0.03955078125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 530562968, + "step": 7932 + }, + { + "epoch": 0.9001985815602837, + "grad_norm": 35.317813873291016, + "learning_rate": 5e-05, + "loss": 1.0497, + "num_input_tokens_seen": 530630628, + "step": 7933 + }, + { + "epoch": 0.9001985815602837, + "loss": 1.0710244178771973, + "loss_ce": 0.006571268197149038, + "loss_iou": 0.46484375, + "loss_num": 0.027099609375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 530630628, + "step": 7933 + }, + { + "epoch": 0.9003120567375886, + "grad_norm": 35.070350646972656, + "learning_rate": 5e-05, + "loss": 1.4607, + "num_input_tokens_seen": 530698500, + "step": 7934 + }, + { + "epoch": 0.9003120567375886, + "loss": 1.3689990043640137, + "loss_ce": 0.005229384638369083, + "loss_iou": 0.57421875, + "loss_num": 0.04345703125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 530698500, + "step": 7934 + }, + { + "epoch": 0.9004255319148936, + "grad_norm": 32.347496032714844, + "learning_rate": 5e-05, + "loss": 1.0917, + "num_input_tokens_seen": 530765140, + "step": 7935 + }, + { + "epoch": 0.9004255319148936, + "loss": 1.093759298324585, + "loss_ce": 0.0048920754343271255, + "loss_iou": 0.46484375, + "loss_num": 0.031982421875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 530765140, + "step": 7935 + }, + { + "epoch": 0.9005390070921986, + "grad_norm": 29.610145568847656, + "learning_rate": 5e-05, + "loss": 1.1534, + "num_input_tokens_seen": 530832828, + "step": 7936 + }, + { + "epoch": 0.9005390070921986, + "loss": 1.0060136318206787, + "loss_ce": 0.005174277815967798, + "loss_iou": 0.431640625, + "loss_num": 0.0274658203125, + "loss_xval": 1.0, + "num_input_tokens_seen": 530832828, + "step": 7936 + }, + { + "epoch": 0.9006524822695036, + "grad_norm": 36.82908630371094, + "learning_rate": 5e-05, + "loss": 1.0842, + "num_input_tokens_seen": 530898832, + "step": 7937 + }, + { + "epoch": 0.9006524822695036, + "loss": 1.1984808444976807, + "loss_ce": 0.004633232485502958, + "loss_iou": 0.484375, + "loss_num": 0.045166015625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 530898832, + "step": 7937 + }, + { + "epoch": 0.9007659574468085, + "grad_norm": 41.60575485229492, + "learning_rate": 5e-05, + "loss": 1.3106, + "num_input_tokens_seen": 530965796, + "step": 7938 + }, + { + "epoch": 0.9007659574468085, + "loss": 1.2302706241607666, + "loss_ce": 0.009079167619347572, + "loss_iou": 0.462890625, + "loss_num": 0.05908203125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 530965796, + "step": 7938 + }, + { + "epoch": 0.9008794326241135, + "grad_norm": 52.252811431884766, + "learning_rate": 5e-05, + "loss": 1.2583, + "num_input_tokens_seen": 531033360, + "step": 7939 + }, + { + "epoch": 0.9008794326241135, + "loss": 1.1324774026870728, + "loss_ce": 0.003082859329879284, + "loss_iou": 0.4765625, + "loss_num": 0.03515625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 531033360, + "step": 7939 + }, + { + "epoch": 0.9009929078014184, + "grad_norm": 27.401269912719727, + "learning_rate": 5e-05, + "loss": 1.1757, + "num_input_tokens_seen": 531099824, + "step": 7940 + }, + { + "epoch": 0.9009929078014184, + "loss": 1.207685112953186, + "loss_ce": 0.007978043518960476, + "loss_iou": 0.46875, + "loss_num": 0.052734375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 531099824, + "step": 7940 + }, + { + "epoch": 0.9011063829787234, + "grad_norm": 31.323806762695312, + "learning_rate": 5e-05, + "loss": 0.9738, + "num_input_tokens_seen": 531166740, + "step": 7941 + }, + { + "epoch": 0.9011063829787234, + "loss": 1.0223286151885986, + "loss_ce": 0.003773953765630722, + "loss_iou": 0.43359375, + "loss_num": 0.0302734375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 531166740, + "step": 7941 + }, + { + "epoch": 0.9012198581560283, + "grad_norm": 24.152015686035156, + "learning_rate": 5e-05, + "loss": 1.1051, + "num_input_tokens_seen": 531233328, + "step": 7942 + }, + { + "epoch": 0.9012198581560283, + "loss": 1.220895528793335, + "loss_ce": 0.009958039037883282, + "loss_iou": 0.482421875, + "loss_num": 0.049560546875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 531233328, + "step": 7942 + }, + { + "epoch": 0.9013333333333333, + "grad_norm": 48.146671295166016, + "learning_rate": 5e-05, + "loss": 1.327, + "num_input_tokens_seen": 531300044, + "step": 7943 + }, + { + "epoch": 0.9013333333333333, + "loss": 1.1228342056274414, + "loss_ce": 0.005646763369441032, + "loss_iou": 0.4765625, + "loss_num": 0.0322265625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 531300044, + "step": 7943 + }, + { + "epoch": 0.9014468085106383, + "grad_norm": 36.0324592590332, + "learning_rate": 5e-05, + "loss": 1.4332, + "num_input_tokens_seen": 531366480, + "step": 7944 + }, + { + "epoch": 0.9014468085106383, + "loss": 1.5773274898529053, + "loss_ce": 0.011409536004066467, + "loss_iou": 0.609375, + "loss_num": 0.06982421875, + "loss_xval": 1.5625, + "num_input_tokens_seen": 531366480, + "step": 7944 + }, + { + "epoch": 0.9015602836879433, + "grad_norm": 12.111413955688477, + "learning_rate": 5e-05, + "loss": 0.8612, + "num_input_tokens_seen": 531433888, + "step": 7945 + }, + { + "epoch": 0.9015602836879433, + "loss": 0.8153706192970276, + "loss_ce": 0.012956648133695126, + "loss_iou": 0.341796875, + "loss_num": 0.024169921875, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 531433888, + "step": 7945 + }, + { + "epoch": 0.9016737588652483, + "grad_norm": 14.551475524902344, + "learning_rate": 5e-05, + "loss": 0.9407, + "num_input_tokens_seen": 531500980, + "step": 7946 + }, + { + "epoch": 0.9016737588652483, + "loss": 1.0025566816329956, + "loss_ce": 0.022820353507995605, + "loss_iou": 0.37890625, + "loss_num": 0.0439453125, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 531500980, + "step": 7946 + }, + { + "epoch": 0.9017872340425532, + "grad_norm": 21.53519630432129, + "learning_rate": 5e-05, + "loss": 1.1178, + "num_input_tokens_seen": 531568048, + "step": 7947 + }, + { + "epoch": 0.9017872340425532, + "loss": 1.304362416267395, + "loss_ce": 0.08805384486913681, + "loss_iou": 0.4609375, + "loss_num": 0.058837890625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 531568048, + "step": 7947 + }, + { + "epoch": 0.9019007092198582, + "grad_norm": 16.56079864501953, + "learning_rate": 5e-05, + "loss": 0.9393, + "num_input_tokens_seen": 531634256, + "step": 7948 + }, + { + "epoch": 0.9019007092198582, + "loss": 1.0395539999008179, + "loss_ce": 0.11914382874965668, + "loss_iou": 0.326171875, + "loss_num": 0.05322265625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 531634256, + "step": 7948 + }, + { + "epoch": 0.9020141843971631, + "grad_norm": 15.438474655151367, + "learning_rate": 5e-05, + "loss": 1.087, + "num_input_tokens_seen": 531702584, + "step": 7949 + }, + { + "epoch": 0.9020141843971631, + "loss": 1.163250207901001, + "loss_ce": 0.007976744323968887, + "loss_iou": 0.46875, + "loss_num": 0.04345703125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 531702584, + "step": 7949 + }, + { + "epoch": 0.902127659574468, + "grad_norm": 25.531126022338867, + "learning_rate": 5e-05, + "loss": 1.1203, + "num_input_tokens_seen": 531770344, + "step": 7950 + }, + { + "epoch": 0.902127659574468, + "loss": 1.0750236511230469, + "loss_ce": 0.010082267224788666, + "loss_iou": 0.45703125, + "loss_num": 0.0302734375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 531770344, + "step": 7950 + }, + { + "epoch": 0.902241134751773, + "grad_norm": 23.52625846862793, + "learning_rate": 5e-05, + "loss": 0.9893, + "num_input_tokens_seen": 531837516, + "step": 7951 + }, + { + "epoch": 0.902241134751773, + "loss": 1.057793140411377, + "loss_ce": 0.004082209896296263, + "loss_iou": 0.458984375, + "loss_num": 0.02734375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 531837516, + "step": 7951 + }, + { + "epoch": 0.902354609929078, + "grad_norm": 28.846960067749023, + "learning_rate": 5e-05, + "loss": 1.0289, + "num_input_tokens_seen": 531903608, + "step": 7952 + }, + { + "epoch": 0.902354609929078, + "loss": 1.0497965812683105, + "loss_ce": 0.008781038224697113, + "loss_iou": 0.40234375, + "loss_num": 0.047119140625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 531903608, + "step": 7952 + }, + { + "epoch": 0.902468085106383, + "grad_norm": 31.388336181640625, + "learning_rate": 5e-05, + "loss": 1.4052, + "num_input_tokens_seen": 531969964, + "step": 7953 + }, + { + "epoch": 0.902468085106383, + "loss": 1.1422491073608398, + "loss_ce": 0.004553730599582195, + "loss_iou": 0.494140625, + "loss_num": 0.0296630859375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 531969964, + "step": 7953 + }, + { + "epoch": 0.902581560283688, + "grad_norm": 40.14531707763672, + "learning_rate": 5e-05, + "loss": 1.0407, + "num_input_tokens_seen": 532037232, + "step": 7954 + }, + { + "epoch": 0.902581560283688, + "loss": 1.0524502992630005, + "loss_ce": 0.009481595829129219, + "loss_iou": 0.400390625, + "loss_num": 0.048583984375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 532037232, + "step": 7954 + }, + { + "epoch": 0.902695035460993, + "grad_norm": 40.89832305908203, + "learning_rate": 5e-05, + "loss": 1.4037, + "num_input_tokens_seen": 532104168, + "step": 7955 + }, + { + "epoch": 0.902695035460993, + "loss": 1.5204079151153564, + "loss_ce": 0.003806252498179674, + "loss_iou": 0.5546875, + "loss_num": 0.08203125, + "loss_xval": 1.515625, + "num_input_tokens_seen": 532104168, + "step": 7955 + }, + { + "epoch": 0.9028085106382979, + "grad_norm": 26.338808059692383, + "learning_rate": 5e-05, + "loss": 1.0701, + "num_input_tokens_seen": 532170316, + "step": 7956 + }, + { + "epoch": 0.9028085106382979, + "loss": 1.1927993297576904, + "loss_ce": 0.0074965739622712135, + "loss_iou": 0.482421875, + "loss_num": 0.044189453125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 532170316, + "step": 7956 + }, + { + "epoch": 0.9029219858156028, + "grad_norm": 21.80779266357422, + "learning_rate": 5e-05, + "loss": 1.1325, + "num_input_tokens_seen": 532237860, + "step": 7957 + }, + { + "epoch": 0.9029219858156028, + "loss": 1.1579861640930176, + "loss_ce": 0.00515408581122756, + "loss_iou": 0.482421875, + "loss_num": 0.037841796875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 532237860, + "step": 7957 + }, + { + "epoch": 0.9030354609929078, + "grad_norm": 24.545284271240234, + "learning_rate": 5e-05, + "loss": 1.0201, + "num_input_tokens_seen": 532305520, + "step": 7958 + }, + { + "epoch": 0.9030354609929078, + "loss": 1.0189889669418335, + "loss_ce": 0.005317098461091518, + "loss_iou": 0.427734375, + "loss_num": 0.03173828125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 532305520, + "step": 7958 + }, + { + "epoch": 0.9031489361702127, + "grad_norm": 36.512794494628906, + "learning_rate": 5e-05, + "loss": 1.0938, + "num_input_tokens_seen": 532372592, + "step": 7959 + }, + { + "epoch": 0.9031489361702127, + "loss": 1.1185503005981445, + "loss_ce": 0.005513135809451342, + "loss_iou": 0.453125, + "loss_num": 0.041259765625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 532372592, + "step": 7959 + }, + { + "epoch": 0.9032624113475177, + "grad_norm": 40.312801361083984, + "learning_rate": 5e-05, + "loss": 1.11, + "num_input_tokens_seen": 532438796, + "step": 7960 + }, + { + "epoch": 0.9032624113475177, + "loss": 0.9502089023590088, + "loss_ce": 0.005872930400073528, + "loss_iou": 0.42578125, + "loss_num": 0.0181884765625, + "loss_xval": 0.9453125, + "num_input_tokens_seen": 532438796, + "step": 7960 + }, + { + "epoch": 0.9033758865248227, + "grad_norm": 24.61942481994629, + "learning_rate": 5e-05, + "loss": 1.254, + "num_input_tokens_seen": 532505460, + "step": 7961 + }, + { + "epoch": 0.9033758865248227, + "loss": 1.0911338329315186, + "loss_ce": 0.007149493787437677, + "loss_iou": 0.486328125, + "loss_num": 0.0223388671875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 532505460, + "step": 7961 + }, + { + "epoch": 0.9034893617021277, + "grad_norm": 15.195837020874023, + "learning_rate": 5e-05, + "loss": 1.1657, + "num_input_tokens_seen": 532572312, + "step": 7962 + }, + { + "epoch": 0.9034893617021277, + "loss": 0.9963790774345398, + "loss_ce": 0.004191601648926735, + "loss_iou": 0.416015625, + "loss_num": 0.031982421875, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 532572312, + "step": 7962 + }, + { + "epoch": 0.9036028368794327, + "grad_norm": 16.95564079284668, + "learning_rate": 5e-05, + "loss": 0.9501, + "num_input_tokens_seen": 532639484, + "step": 7963 + }, + { + "epoch": 0.9036028368794327, + "loss": 1.0718297958374023, + "loss_ce": 0.007864986546337605, + "loss_iou": 0.455078125, + "loss_num": 0.0308837890625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 532639484, + "step": 7963 + }, + { + "epoch": 0.9037163120567376, + "grad_norm": 23.378252029418945, + "learning_rate": 5e-05, + "loss": 0.989, + "num_input_tokens_seen": 532707340, + "step": 7964 + }, + { + "epoch": 0.9037163120567376, + "loss": 1.0205297470092773, + "loss_ce": 0.00514884851872921, + "loss_iou": 0.396484375, + "loss_num": 0.04443359375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 532707340, + "step": 7964 + }, + { + "epoch": 0.9038297872340425, + "grad_norm": 35.14237594604492, + "learning_rate": 5e-05, + "loss": 1.0833, + "num_input_tokens_seen": 532773476, + "step": 7965 + }, + { + "epoch": 0.9038297872340425, + "loss": 1.3351173400878906, + "loss_ce": 0.00748065859079361, + "loss_iou": 0.53515625, + "loss_num": 0.05224609375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 532773476, + "step": 7965 + }, + { + "epoch": 0.9039432624113475, + "grad_norm": 39.61227035522461, + "learning_rate": 5e-05, + "loss": 1.1202, + "num_input_tokens_seen": 532840480, + "step": 7966 + }, + { + "epoch": 0.9039432624113475, + "loss": 0.899034857749939, + "loss_ce": 0.00303870951756835, + "loss_iou": 0.388671875, + "loss_num": 0.0235595703125, + "loss_xval": 0.89453125, + "num_input_tokens_seen": 532840480, + "step": 7966 + }, + { + "epoch": 0.9040567375886525, + "grad_norm": 29.929954528808594, + "learning_rate": 5e-05, + "loss": 1.3346, + "num_input_tokens_seen": 532906608, + "step": 7967 + }, + { + "epoch": 0.9040567375886525, + "loss": 1.1285662651062012, + "loss_ce": 0.006923213135451078, + "loss_iou": 0.474609375, + "loss_num": 0.0341796875, + "loss_xval": 1.125, + "num_input_tokens_seen": 532906608, + "step": 7967 + }, + { + "epoch": 0.9041702127659574, + "grad_norm": 22.302953720092773, + "learning_rate": 5e-05, + "loss": 1.1926, + "num_input_tokens_seen": 532973328, + "step": 7968 + }, + { + "epoch": 0.9041702127659574, + "loss": 1.0836870670318604, + "loss_ce": 0.008491779677569866, + "loss_iou": 0.44921875, + "loss_num": 0.035400390625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 532973328, + "step": 7968 + }, + { + "epoch": 0.9042836879432624, + "grad_norm": 40.45433044433594, + "learning_rate": 5e-05, + "loss": 1.1256, + "num_input_tokens_seen": 533039984, + "step": 7969 + }, + { + "epoch": 0.9042836879432624, + "loss": 1.1911695003509521, + "loss_ce": 0.008552326820790768, + "loss_iou": 0.498046875, + "loss_num": 0.036865234375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 533039984, + "step": 7969 + }, + { + "epoch": 0.9043971631205674, + "grad_norm": 20.559860229492188, + "learning_rate": 5e-05, + "loss": 1.0324, + "num_input_tokens_seen": 533107480, + "step": 7970 + }, + { + "epoch": 0.9043971631205674, + "loss": 0.8799256682395935, + "loss_ce": 0.0054139792919158936, + "loss_iou": 0.404296875, + "loss_num": 0.01336669921875, + "loss_xval": 0.875, + "num_input_tokens_seen": 533107480, + "step": 7970 + }, + { + "epoch": 0.9045106382978724, + "grad_norm": 22.6672420501709, + "learning_rate": 5e-05, + "loss": 1.093, + "num_input_tokens_seen": 533174176, + "step": 7971 + }, + { + "epoch": 0.9045106382978724, + "loss": 1.1923940181732178, + "loss_ce": 0.007457440719008446, + "loss_iou": 0.4453125, + "loss_num": 0.058349609375, + "loss_xval": 1.1875, + "num_input_tokens_seen": 533174176, + "step": 7971 + }, + { + "epoch": 0.9046241134751773, + "grad_norm": 29.09718894958496, + "learning_rate": 5e-05, + "loss": 0.9623, + "num_input_tokens_seen": 533240916, + "step": 7972 + }, + { + "epoch": 0.9046241134751773, + "loss": 1.122342586517334, + "loss_ce": 0.004178482573479414, + "loss_iou": 0.45703125, + "loss_num": 0.041015625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 533240916, + "step": 7972 + }, + { + "epoch": 0.9047375886524822, + "grad_norm": 34.31465530395508, + "learning_rate": 5e-05, + "loss": 1.1234, + "num_input_tokens_seen": 533307984, + "step": 7973 + }, + { + "epoch": 0.9047375886524822, + "loss": 1.2206543684005737, + "loss_ce": 0.006787183694541454, + "loss_iou": 0.484375, + "loss_num": 0.048583984375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 533307984, + "step": 7973 + }, + { + "epoch": 0.9048510638297872, + "grad_norm": 51.08740997314453, + "learning_rate": 5e-05, + "loss": 1.1122, + "num_input_tokens_seen": 533375396, + "step": 7974 + }, + { + "epoch": 0.9048510638297872, + "loss": 1.102609634399414, + "loss_ce": 0.006906425580382347, + "loss_iou": 0.48046875, + "loss_num": 0.0269775390625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 533375396, + "step": 7974 + }, + { + "epoch": 0.9049645390070922, + "grad_norm": 38.97102737426758, + "learning_rate": 5e-05, + "loss": 1.2371, + "num_input_tokens_seen": 533442480, + "step": 7975 + }, + { + "epoch": 0.9049645390070922, + "loss": 1.389622449874878, + "loss_ce": 0.003880314063280821, + "loss_iou": 0.58203125, + "loss_num": 0.044921875, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 533442480, + "step": 7975 + }, + { + "epoch": 0.9050780141843972, + "grad_norm": 18.61118507385254, + "learning_rate": 5e-05, + "loss": 1.2875, + "num_input_tokens_seen": 533509232, + "step": 7976 + }, + { + "epoch": 0.9050780141843972, + "loss": 1.4211323261260986, + "loss_ce": 0.00609328830614686, + "loss_iou": 0.55859375, + "loss_num": 0.06005859375, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 533509232, + "step": 7976 + }, + { + "epoch": 0.9051914893617021, + "grad_norm": 13.777307510375977, + "learning_rate": 5e-05, + "loss": 1.059, + "num_input_tokens_seen": 533576180, + "step": 7977 + }, + { + "epoch": 0.9051914893617021, + "loss": 1.1276767253875732, + "loss_ce": 0.005118175409734249, + "loss_iou": 0.4921875, + "loss_num": 0.02783203125, + "loss_xval": 1.125, + "num_input_tokens_seen": 533576180, + "step": 7977 + }, + { + "epoch": 0.9053049645390071, + "grad_norm": 21.676706314086914, + "learning_rate": 5e-05, + "loss": 1.1364, + "num_input_tokens_seen": 533643720, + "step": 7978 + }, + { + "epoch": 0.9053049645390071, + "loss": 1.2684224843978882, + "loss_ce": 0.007192003540694714, + "loss_iou": 0.5, + "loss_num": 0.05224609375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 533643720, + "step": 7978 + }, + { + "epoch": 0.9054184397163121, + "grad_norm": 20.418598175048828, + "learning_rate": 5e-05, + "loss": 1.3625, + "num_input_tokens_seen": 533710784, + "step": 7979 + }, + { + "epoch": 0.9054184397163121, + "loss": 1.3734171390533447, + "loss_ce": 0.0057414742186665535, + "loss_iou": 0.5390625, + "loss_num": 0.0576171875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 533710784, + "step": 7979 + }, + { + "epoch": 0.9055319148936171, + "grad_norm": 28.977691650390625, + "learning_rate": 5e-05, + "loss": 1.2508, + "num_input_tokens_seen": 533777624, + "step": 7980 + }, + { + "epoch": 0.9055319148936171, + "loss": 1.2211562395095825, + "loss_ce": 0.006312523037195206, + "loss_iou": 0.47265625, + "loss_num": 0.05419921875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 533777624, + "step": 7980 + }, + { + "epoch": 0.905645390070922, + "grad_norm": 31.493379592895508, + "learning_rate": 5e-05, + "loss": 1.2412, + "num_input_tokens_seen": 533844312, + "step": 7981 + }, + { + "epoch": 0.905645390070922, + "loss": 1.2557215690612793, + "loss_ce": 0.0054774656891822815, + "loss_iou": 0.515625, + "loss_num": 0.0439453125, + "loss_xval": 1.25, + "num_input_tokens_seen": 533844312, + "step": 7981 + }, + { + "epoch": 0.9057588652482269, + "grad_norm": 25.50387191772461, + "learning_rate": 5e-05, + "loss": 1.2151, + "num_input_tokens_seen": 533910768, + "step": 7982 + }, + { + "epoch": 0.9057588652482269, + "loss": 1.0790736675262451, + "loss_ce": 0.004366675857454538, + "loss_iou": 0.43359375, + "loss_num": 0.041748046875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 533910768, + "step": 7982 + }, + { + "epoch": 0.9058723404255319, + "grad_norm": 22.063915252685547, + "learning_rate": 5e-05, + "loss": 1.1804, + "num_input_tokens_seen": 533977732, + "step": 7983 + }, + { + "epoch": 0.9058723404255319, + "loss": 1.0990831851959229, + "loss_ce": 0.005333230830729008, + "loss_iou": 0.4609375, + "loss_num": 0.034912109375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 533977732, + "step": 7983 + }, + { + "epoch": 0.9059858156028369, + "grad_norm": 43.88955307006836, + "learning_rate": 5e-05, + "loss": 1.1794, + "num_input_tokens_seen": 534043572, + "step": 7984 + }, + { + "epoch": 0.9059858156028369, + "loss": 0.9215897917747498, + "loss_ce": 0.0036210352554917336, + "loss_iou": 0.384765625, + "loss_num": 0.0296630859375, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 534043572, + "step": 7984 + }, + { + "epoch": 0.9060992907801418, + "grad_norm": 69.14888000488281, + "learning_rate": 5e-05, + "loss": 1.4357, + "num_input_tokens_seen": 534110044, + "step": 7985 + }, + { + "epoch": 0.9060992907801418, + "loss": 1.3273015022277832, + "loss_ce": 0.004059378057718277, + "loss_iou": 0.53515625, + "loss_num": 0.050537109375, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 534110044, + "step": 7985 + }, + { + "epoch": 0.9062127659574468, + "grad_norm": 43.33194351196289, + "learning_rate": 5e-05, + "loss": 1.4144, + "num_input_tokens_seen": 534176432, + "step": 7986 + }, + { + "epoch": 0.9062127659574468, + "loss": 1.2762197256088257, + "loss_ce": 0.006688495632261038, + "loss_iou": 0.5859375, + "loss_num": 0.020263671875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 534176432, + "step": 7986 + }, + { + "epoch": 0.9063262411347518, + "grad_norm": 29.702468872070312, + "learning_rate": 5e-05, + "loss": 1.5051, + "num_input_tokens_seen": 534243632, + "step": 7987 + }, + { + "epoch": 0.9063262411347518, + "loss": 1.5997440814971924, + "loss_ce": 0.00941208004951477, + "loss_iou": 0.625, + "loss_num": 0.068359375, + "loss_xval": 1.59375, + "num_input_tokens_seen": 534243632, + "step": 7987 + }, + { + "epoch": 0.9064397163120568, + "grad_norm": 16.201858520507812, + "learning_rate": 5e-05, + "loss": 1.344, + "num_input_tokens_seen": 534311384, + "step": 7988 + }, + { + "epoch": 0.9064397163120568, + "loss": 1.3159552812576294, + "loss_ce": 0.008338071405887604, + "loss_iou": 0.51171875, + "loss_num": 0.056396484375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 534311384, + "step": 7988 + }, + { + "epoch": 0.9065531914893618, + "grad_norm": 21.933853149414062, + "learning_rate": 5e-05, + "loss": 1.0486, + "num_input_tokens_seen": 534377668, + "step": 7989 + }, + { + "epoch": 0.9065531914893618, + "loss": 1.2617874145507812, + "loss_ce": 0.006904590874910355, + "loss_iou": 0.453125, + "loss_num": 0.0693359375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 534377668, + "step": 7989 + }, + { + "epoch": 0.9066666666666666, + "grad_norm": 33.63631820678711, + "learning_rate": 5e-05, + "loss": 1.1954, + "num_input_tokens_seen": 534445856, + "step": 7990 + }, + { + "epoch": 0.9066666666666666, + "loss": 1.2405593395233154, + "loss_ce": 0.008137423545122147, + "loss_iou": 0.423828125, + "loss_num": 0.0771484375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 534445856, + "step": 7990 + }, + { + "epoch": 0.9067801418439716, + "grad_norm": 20.61371421813965, + "learning_rate": 5e-05, + "loss": 0.8175, + "num_input_tokens_seen": 534511336, + "step": 7991 + }, + { + "epoch": 0.9067801418439716, + "loss": 0.8920964598655701, + "loss_ce": 0.004889485891908407, + "loss_iou": 0.34375, + "loss_num": 0.0400390625, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 534511336, + "step": 7991 + }, + { + "epoch": 0.9068936170212766, + "grad_norm": 15.853392601013184, + "learning_rate": 5e-05, + "loss": 0.949, + "num_input_tokens_seen": 534577244, + "step": 7992 + }, + { + "epoch": 0.9068936170212766, + "loss": 1.150424838066101, + "loss_ce": 0.00442879693582654, + "loss_iou": 0.44140625, + "loss_num": 0.052734375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 534577244, + "step": 7992 + }, + { + "epoch": 0.9070070921985816, + "grad_norm": 26.239116668701172, + "learning_rate": 5e-05, + "loss": 1.2511, + "num_input_tokens_seen": 534643652, + "step": 7993 + }, + { + "epoch": 0.9070070921985816, + "loss": 1.2031999826431274, + "loss_ce": 0.004469585604965687, + "loss_iou": 0.5, + "loss_num": 0.038818359375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 534643652, + "step": 7993 + }, + { + "epoch": 0.9071205673758865, + "grad_norm": 28.577083587646484, + "learning_rate": 5e-05, + "loss": 1.0574, + "num_input_tokens_seen": 534711364, + "step": 7994 + }, + { + "epoch": 0.9071205673758865, + "loss": 1.0077228546142578, + "loss_ce": 0.008699323050677776, + "loss_iou": 0.39453125, + "loss_num": 0.041748046875, + "loss_xval": 1.0, + "num_input_tokens_seen": 534711364, + "step": 7994 + }, + { + "epoch": 0.9072340425531915, + "grad_norm": 32.81580352783203, + "learning_rate": 5e-05, + "loss": 1.0251, + "num_input_tokens_seen": 534778240, + "step": 7995 + }, + { + "epoch": 0.9072340425531915, + "loss": 1.123138427734375, + "loss_ce": 0.0069274017587304115, + "loss_iou": 0.48046875, + "loss_num": 0.031005859375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 534778240, + "step": 7995 + }, + { + "epoch": 0.9073475177304965, + "grad_norm": 96.89299011230469, + "learning_rate": 5e-05, + "loss": 1.0197, + "num_input_tokens_seen": 534844092, + "step": 7996 + }, + { + "epoch": 0.9073475177304965, + "loss": 1.13187837600708, + "loss_ce": 0.004925209563225508, + "loss_iou": 0.5, + "loss_num": 0.025146484375, + "loss_xval": 1.125, + "num_input_tokens_seen": 534844092, + "step": 7996 + }, + { + "epoch": 0.9074609929078015, + "grad_norm": 37.166229248046875, + "learning_rate": 5e-05, + "loss": 1.2294, + "num_input_tokens_seen": 534911428, + "step": 7997 + }, + { + "epoch": 0.9074609929078015, + "loss": 1.166644811630249, + "loss_ce": 0.006488625425845385, + "loss_iou": 0.50390625, + "loss_num": 0.0296630859375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 534911428, + "step": 7997 + }, + { + "epoch": 0.9075744680851063, + "grad_norm": 23.2115535736084, + "learning_rate": 5e-05, + "loss": 1.2162, + "num_input_tokens_seen": 534977868, + "step": 7998 + }, + { + "epoch": 0.9075744680851063, + "loss": 1.2466084957122803, + "loss_ce": 0.002956175012513995, + "loss_iou": 0.51953125, + "loss_num": 0.040283203125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 534977868, + "step": 7998 + }, + { + "epoch": 0.9076879432624113, + "grad_norm": 22.954544067382812, + "learning_rate": 5e-05, + "loss": 1.1297, + "num_input_tokens_seen": 535045108, + "step": 7999 + }, + { + "epoch": 0.9076879432624113, + "loss": 1.1274731159210205, + "loss_ce": 0.007355961948633194, + "loss_iou": 0.447265625, + "loss_num": 0.044921875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 535045108, + "step": 7999 + }, + { + "epoch": 0.9078014184397163, + "grad_norm": 56.07314682006836, + "learning_rate": 5e-05, + "loss": 1.287, + "num_input_tokens_seen": 535111088, + "step": 8000 + }, + { + "epoch": 0.9078014184397163, + "eval_seeclick_CIoU": 0.43051646649837494, + "eval_seeclick_GIoU": 0.4130583554506302, + "eval_seeclick_IoU": 0.5029020309448242, + "eval_seeclick_MAE_all": 0.15554723143577576, + "eval_seeclick_MAE_h": 0.07968715205788612, + "eval_seeclick_MAE_w": 0.11886032670736313, + "eval_seeclick_MAE_x_boxes": 0.17960333824157715, + "eval_seeclick_MAE_y_boxes": 0.138613723218441, + "eval_seeclick_NUM_probability": 0.9999635517597198, + "eval_seeclick_inside_bbox": 0.6770833432674408, + "eval_seeclick_loss": 2.364712715148926, + "eval_seeclick_loss_ce": 0.015469929669052362, + "eval_seeclick_loss_iou": 0.79461669921875, + "eval_seeclick_loss_num": 0.1555023193359375, + "eval_seeclick_loss_xval": 2.3665771484375, + "eval_seeclick_runtime": 65.7824, + "eval_seeclick_samples_per_second": 0.714, + "eval_seeclick_steps_per_second": 0.03, + "num_input_tokens_seen": 535111088, + "step": 8000 + }, + { + "epoch": 0.9078014184397163, + "eval_icons_CIoU": 0.5080226957798004, + "eval_icons_GIoU": 0.5098324865102768, + "eval_icons_IoU": 0.5423379093408585, + "eval_icons_MAE_all": 0.13543357327580452, + "eval_icons_MAE_h": 0.10154182463884354, + "eval_icons_MAE_w": 0.1259986087679863, + "eval_icons_MAE_x_boxes": 0.09796573966741562, + "eval_icons_MAE_y_boxes": 0.06221279501914978, + "eval_icons_NUM_probability": 0.9999938905239105, + "eval_icons_inside_bbox": 0.7986111044883728, + "eval_icons_loss": 2.1907994747161865, + "eval_icons_loss_ce": 0.00016746124310884625, + "eval_icons_loss_iou": 0.7706298828125, + "eval_icons_loss_num": 0.12168312072753906, + "eval_icons_loss_xval": 2.14794921875, + "eval_icons_runtime": 66.3119, + "eval_icons_samples_per_second": 0.754, + "eval_icons_steps_per_second": 0.03, + "num_input_tokens_seen": 535111088, + "step": 8000 + }, + { + "epoch": 0.9078014184397163, + "eval_screenspot_CIoU": 0.2540293037891388, + "eval_screenspot_GIoU": 0.21995519598325095, + "eval_screenspot_IoU": 0.3440182606379191, + "eval_screenspot_MAE_all": 0.23289077977339426, + "eval_screenspot_MAE_h": 0.17299707730611166, + "eval_screenspot_MAE_w": 0.19757875303427377, + "eval_screenspot_MAE_x_boxes": 0.25558794538180035, + "eval_screenspot_MAE_y_boxes": 0.13193622479836145, + "eval_screenspot_NUM_probability": 0.9999702175458273, + "eval_screenspot_inside_bbox": 0.5808333357175192, + "eval_screenspot_loss": 3.0734567642211914, + "eval_screenspot_loss_ce": 0.013925697344044844, + "eval_screenspot_loss_iou": 0.9544270833333334, + "eval_screenspot_loss_num": 0.24037679036458334, + "eval_screenspot_loss_xval": 3.1100260416666665, + "eval_screenspot_runtime": 124.7472, + "eval_screenspot_samples_per_second": 0.713, + "eval_screenspot_steps_per_second": 0.024, + "num_input_tokens_seen": 535111088, + "step": 8000 + }, + { + "epoch": 0.9078014184397163, + "eval_compot_CIoU": 0.2344236820936203, + "eval_compot_GIoU": 0.1876864954829216, + "eval_compot_IoU": 0.34379102289676666, + "eval_compot_MAE_all": 0.25282740592956543, + "eval_compot_MAE_h": 0.1293821483850479, + "eval_compot_MAE_w": 0.2458648458123207, + "eval_compot_MAE_x_boxes": 0.30738359689712524, + "eval_compot_MAE_y_boxes": 0.09896384179592133, + "eval_compot_NUM_probability": 0.9999889433383942, + "eval_compot_inside_bbox": 0.46875, + "eval_compot_loss": 3.168466091156006, + "eval_compot_loss_ce": 0.0062392407562583685, + "eval_compot_loss_iou": 0.95654296875, + "eval_compot_loss_num": 0.2534637451171875, + "eval_compot_loss_xval": 3.1806640625, + "eval_compot_runtime": 69.115, + "eval_compot_samples_per_second": 0.723, + "eval_compot_steps_per_second": 0.029, + "num_input_tokens_seen": 535111088, + "step": 8000 + }, + { + "epoch": 0.9078014184397163, + "loss": 3.1498701572418213, + "loss_ce": 0.0072920555248856544, + "loss_iou": 0.9609375, + "loss_num": 0.244140625, + "loss_xval": 3.140625, + "num_input_tokens_seen": 535111088, + "step": 8000 + }, + { + "epoch": 0.9079148936170213, + "grad_norm": 33.31051254272461, + "learning_rate": 5e-05, + "loss": 1.1205, + "num_input_tokens_seen": 535177240, + "step": 8001 + }, + { + "epoch": 0.9079148936170213, + "loss": 1.011879801750183, + "loss_ce": 0.007241109386086464, + "loss_iou": 0.41796875, + "loss_num": 0.03369140625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 535177240, + "step": 8001 + }, + { + "epoch": 0.9080283687943262, + "grad_norm": 33.5251579284668, + "learning_rate": 5e-05, + "loss": 1.0083, + "num_input_tokens_seen": 535243580, + "step": 8002 + }, + { + "epoch": 0.9080283687943262, + "loss": 0.9817003011703491, + "loss_ce": 0.00513780303299427, + "loss_iou": 0.4296875, + "loss_num": 0.0235595703125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 535243580, + "step": 8002 + }, + { + "epoch": 0.9081418439716312, + "grad_norm": 20.13898277282715, + "learning_rate": 5e-05, + "loss": 1.3943, + "num_input_tokens_seen": 535310208, + "step": 8003 + }, + { + "epoch": 0.9081418439716312, + "loss": 1.4459571838378906, + "loss_ce": 0.01041039451956749, + "loss_iou": 0.5390625, + "loss_num": 0.07080078125, + "loss_xval": 1.4375, + "num_input_tokens_seen": 535310208, + "step": 8003 + }, + { + "epoch": 0.9082553191489362, + "grad_norm": 20.05130958557129, + "learning_rate": 5e-05, + "loss": 1.1842, + "num_input_tokens_seen": 535376560, + "step": 8004 + }, + { + "epoch": 0.9082553191489362, + "loss": 1.3292902708053589, + "loss_ce": 0.007024652324616909, + "loss_iou": 0.486328125, + "loss_num": 0.06982421875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 535376560, + "step": 8004 + }, + { + "epoch": 0.9083687943262412, + "grad_norm": 27.422523498535156, + "learning_rate": 5e-05, + "loss": 1.2617, + "num_input_tokens_seen": 535443868, + "step": 8005 + }, + { + "epoch": 0.9083687943262412, + "loss": 1.1913487911224365, + "loss_ce": 0.009219978004693985, + "loss_iou": 0.5, + "loss_num": 0.035888671875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 535443868, + "step": 8005 + }, + { + "epoch": 0.908482269503546, + "grad_norm": 35.85560607910156, + "learning_rate": 5e-05, + "loss": 0.9823, + "num_input_tokens_seen": 535509972, + "step": 8006 + }, + { + "epoch": 0.908482269503546, + "loss": 0.9659758806228638, + "loss_ce": 0.010409521870315075, + "loss_iou": 0.396484375, + "loss_num": 0.0322265625, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 535509972, + "step": 8006 + }, + { + "epoch": 0.908595744680851, + "grad_norm": 31.060131072998047, + "learning_rate": 5e-05, + "loss": 1.3146, + "num_input_tokens_seen": 535577272, + "step": 8007 + }, + { + "epoch": 0.908595744680851, + "loss": 1.379183292388916, + "loss_ce": 0.00662469957023859, + "loss_iou": 0.54296875, + "loss_num": 0.0576171875, + "loss_xval": 1.375, + "num_input_tokens_seen": 535577272, + "step": 8007 + }, + { + "epoch": 0.908709219858156, + "grad_norm": 32.75374221801758, + "learning_rate": 5e-05, + "loss": 1.1812, + "num_input_tokens_seen": 535643900, + "step": 8008 + }, + { + "epoch": 0.908709219858156, + "loss": 1.2112860679626465, + "loss_ce": 0.005719671491533518, + "loss_iou": 0.490234375, + "loss_num": 0.044921875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 535643900, + "step": 8008 + }, + { + "epoch": 0.908822695035461, + "grad_norm": 23.688283920288086, + "learning_rate": 5e-05, + "loss": 1.2657, + "num_input_tokens_seen": 535711524, + "step": 8009 + }, + { + "epoch": 0.908822695035461, + "loss": 1.1233339309692383, + "loss_ce": 0.006146513856947422, + "loss_iou": 0.478515625, + "loss_num": 0.03173828125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 535711524, + "step": 8009 + }, + { + "epoch": 0.908936170212766, + "grad_norm": 14.364005088806152, + "learning_rate": 5e-05, + "loss": 1.1546, + "num_input_tokens_seen": 535779452, + "step": 8010 + }, + { + "epoch": 0.908936170212766, + "loss": 1.1562055349349976, + "loss_ce": 0.005326638929545879, + "loss_iou": 0.48828125, + "loss_num": 0.03466796875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 535779452, + "step": 8010 + }, + { + "epoch": 0.9090496453900709, + "grad_norm": 29.609909057617188, + "learning_rate": 5e-05, + "loss": 0.9413, + "num_input_tokens_seen": 535845564, + "step": 8011 + }, + { + "epoch": 0.9090496453900709, + "loss": 1.1584445238113403, + "loss_ce": 0.00854218378663063, + "loss_iou": 0.486328125, + "loss_num": 0.035400390625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 535845564, + "step": 8011 + }, + { + "epoch": 0.9091631205673759, + "grad_norm": 62.04926681518555, + "learning_rate": 5e-05, + "loss": 1.1894, + "num_input_tokens_seen": 535913076, + "step": 8012 + }, + { + "epoch": 0.9091631205673759, + "loss": 1.2346919775009155, + "loss_ce": 0.007641180884093046, + "loss_iou": 0.5, + "loss_num": 0.044921875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 535913076, + "step": 8012 + }, + { + "epoch": 0.9092765957446809, + "grad_norm": 47.02861404418945, + "learning_rate": 5e-05, + "loss": 1.4539, + "num_input_tokens_seen": 535980400, + "step": 8013 + }, + { + "epoch": 0.9092765957446809, + "loss": 1.3795217275619507, + "loss_ce": 0.005498276557773352, + "loss_iou": 0.57421875, + "loss_num": 0.045654296875, + "loss_xval": 1.375, + "num_input_tokens_seen": 535980400, + "step": 8013 + }, + { + "epoch": 0.9093900709219858, + "grad_norm": 32.72863006591797, + "learning_rate": 5e-05, + "loss": 1.1717, + "num_input_tokens_seen": 536047672, + "step": 8014 + }, + { + "epoch": 0.9093900709219858, + "loss": 1.2187104225158691, + "loss_ce": 0.022909674793481827, + "loss_iou": 0.482421875, + "loss_num": 0.046630859375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 536047672, + "step": 8014 + }, + { + "epoch": 0.9095035460992907, + "grad_norm": 29.699331283569336, + "learning_rate": 5e-05, + "loss": 1.1888, + "num_input_tokens_seen": 536115124, + "step": 8015 + }, + { + "epoch": 0.9095035460992907, + "loss": 1.3591920137405396, + "loss_ce": 0.011047501116991043, + "loss_iou": 0.54296875, + "loss_num": 0.05224609375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 536115124, + "step": 8015 + }, + { + "epoch": 0.9096170212765957, + "grad_norm": 28.185226440429688, + "learning_rate": 5e-05, + "loss": 1.1651, + "num_input_tokens_seen": 536182464, + "step": 8016 + }, + { + "epoch": 0.9096170212765957, + "loss": 1.041357159614563, + "loss_ce": 0.006200883537530899, + "loss_iou": 0.41796875, + "loss_num": 0.039306640625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 536182464, + "step": 8016 + }, + { + "epoch": 0.9097304964539007, + "grad_norm": 24.758508682250977, + "learning_rate": 5e-05, + "loss": 1.1405, + "num_input_tokens_seen": 536249440, + "step": 8017 + }, + { + "epoch": 0.9097304964539007, + "loss": 1.2139697074890137, + "loss_ce": 0.003520494094118476, + "loss_iou": 0.5234375, + "loss_num": 0.033203125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 536249440, + "step": 8017 + }, + { + "epoch": 0.9098439716312057, + "grad_norm": 20.49869728088379, + "learning_rate": 5e-05, + "loss": 1.1994, + "num_input_tokens_seen": 536317124, + "step": 8018 + }, + { + "epoch": 0.9098439716312057, + "loss": 1.1696982383728027, + "loss_ce": 0.0022177600767463446, + "loss_iou": 0.46875, + "loss_num": 0.0458984375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 536317124, + "step": 8018 + }, + { + "epoch": 0.9099574468085107, + "grad_norm": 24.83345603942871, + "learning_rate": 5e-05, + "loss": 1.1984, + "num_input_tokens_seen": 536384336, + "step": 8019 + }, + { + "epoch": 0.9099574468085107, + "loss": 1.1245992183685303, + "loss_ce": 0.00667934026569128, + "loss_iou": 0.4609375, + "loss_num": 0.0390625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 536384336, + "step": 8019 + }, + { + "epoch": 0.9100709219858156, + "grad_norm": 27.363664627075195, + "learning_rate": 5e-05, + "loss": 1.1856, + "num_input_tokens_seen": 536449192, + "step": 8020 + }, + { + "epoch": 0.9100709219858156, + "loss": 1.2291089296340942, + "loss_ce": 0.008894085884094238, + "loss_iou": 0.44921875, + "loss_num": 0.06494140625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 536449192, + "step": 8020 + }, + { + "epoch": 0.9101843971631206, + "grad_norm": 34.82018280029297, + "learning_rate": 5e-05, + "loss": 1.2844, + "num_input_tokens_seen": 536515140, + "step": 8021 + }, + { + "epoch": 0.9101843971631206, + "loss": 1.273432731628418, + "loss_ce": 0.0048780133947730064, + "loss_iou": 0.482421875, + "loss_num": 0.060302734375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 536515140, + "step": 8021 + }, + { + "epoch": 0.9102978723404256, + "grad_norm": 27.76491355895996, + "learning_rate": 5e-05, + "loss": 1.04, + "num_input_tokens_seen": 536582808, + "step": 8022 + }, + { + "epoch": 0.9102978723404256, + "loss": 1.0190588235855103, + "loss_ce": 0.007339972071349621, + "loss_iou": 0.39453125, + "loss_num": 0.044677734375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 536582808, + "step": 8022 + }, + { + "epoch": 0.9104113475177305, + "grad_norm": 30.65494155883789, + "learning_rate": 5e-05, + "loss": 1.1719, + "num_input_tokens_seen": 536649360, + "step": 8023 + }, + { + "epoch": 0.9104113475177305, + "loss": 1.170069694519043, + "loss_ce": 0.00576314702630043, + "loss_iou": 0.48046875, + "loss_num": 0.04052734375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 536649360, + "step": 8023 + }, + { + "epoch": 0.9105248226950354, + "grad_norm": 35.7688102722168, + "learning_rate": 5e-05, + "loss": 1.1716, + "num_input_tokens_seen": 536716544, + "step": 8024 + }, + { + "epoch": 0.9105248226950354, + "loss": 1.168874740600586, + "loss_ce": 0.007497742306441069, + "loss_iou": 0.4609375, + "loss_num": 0.0478515625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 536716544, + "step": 8024 + }, + { + "epoch": 0.9106382978723404, + "grad_norm": 24.42721176147461, + "learning_rate": 5e-05, + "loss": 1.2323, + "num_input_tokens_seen": 536782916, + "step": 8025 + }, + { + "epoch": 0.9106382978723404, + "loss": 1.189043402671814, + "loss_ce": 0.008379345759749413, + "loss_iou": 0.48828125, + "loss_num": 0.040771484375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 536782916, + "step": 8025 + }, + { + "epoch": 0.9107517730496454, + "grad_norm": 24.803760528564453, + "learning_rate": 5e-05, + "loss": 1.1526, + "num_input_tokens_seen": 536850720, + "step": 8026 + }, + { + "epoch": 0.9107517730496454, + "loss": 1.0585780143737793, + "loss_ce": 0.004867075476795435, + "loss_iou": 0.435546875, + "loss_num": 0.03662109375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 536850720, + "step": 8026 + }, + { + "epoch": 0.9108652482269504, + "grad_norm": 27.43886375427246, + "learning_rate": 5e-05, + "loss": 1.2597, + "num_input_tokens_seen": 536917308, + "step": 8027 + }, + { + "epoch": 0.9108652482269504, + "loss": 1.2405483722686768, + "loss_ce": 0.004220214672386646, + "loss_iou": 0.478515625, + "loss_num": 0.05615234375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 536917308, + "step": 8027 + }, + { + "epoch": 0.9109787234042553, + "grad_norm": 25.687618255615234, + "learning_rate": 5e-05, + "loss": 1.362, + "num_input_tokens_seen": 536984304, + "step": 8028 + }, + { + "epoch": 0.9109787234042553, + "loss": 1.2800178527832031, + "loss_ce": 0.005603753961622715, + "loss_iou": 0.53125, + "loss_num": 0.04296875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 536984304, + "step": 8028 + }, + { + "epoch": 0.9110921985815603, + "grad_norm": 17.088211059570312, + "learning_rate": 5e-05, + "loss": 1.3663, + "num_input_tokens_seen": 537051132, + "step": 8029 + }, + { + "epoch": 0.9110921985815603, + "loss": 1.447923183441162, + "loss_ce": 0.0035873372107744217, + "loss_iou": 0.56640625, + "loss_num": 0.0625, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 537051132, + "step": 8029 + }, + { + "epoch": 0.9112056737588653, + "grad_norm": 13.14069652557373, + "learning_rate": 5e-05, + "loss": 0.9895, + "num_input_tokens_seen": 537118448, + "step": 8030 + }, + { + "epoch": 0.9112056737588653, + "loss": 0.8527205586433411, + "loss_ce": 0.005308450665324926, + "loss_iou": 0.337890625, + "loss_num": 0.0341796875, + "loss_xval": 0.84765625, + "num_input_tokens_seen": 537118448, + "step": 8030 + }, + { + "epoch": 0.9113191489361702, + "grad_norm": 10.896180152893066, + "learning_rate": 5e-05, + "loss": 1.0765, + "num_input_tokens_seen": 537186628, + "step": 8031 + }, + { + "epoch": 0.9113191489361702, + "loss": 1.0779856443405151, + "loss_ce": 0.01109111774712801, + "loss_iou": 0.44140625, + "loss_num": 0.036865234375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 537186628, + "step": 8031 + }, + { + "epoch": 0.9114326241134751, + "grad_norm": 26.525047302246094, + "learning_rate": 5e-05, + "loss": 1.0559, + "num_input_tokens_seen": 537254184, + "step": 8032 + }, + { + "epoch": 0.9114326241134751, + "loss": 0.9665857553482056, + "loss_ce": 0.008089656941592693, + "loss_iou": 0.42578125, + "loss_num": 0.021240234375, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 537254184, + "step": 8032 + }, + { + "epoch": 0.9115460992907801, + "grad_norm": 20.75648307800293, + "learning_rate": 5e-05, + "loss": 1.0185, + "num_input_tokens_seen": 537320872, + "step": 8033 + }, + { + "epoch": 0.9115460992907801, + "loss": 0.9194244146347046, + "loss_ce": 0.006582620088011026, + "loss_iou": 0.37109375, + "loss_num": 0.034423828125, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 537320872, + "step": 8033 + }, + { + "epoch": 0.9116595744680851, + "grad_norm": 35.561058044433594, + "learning_rate": 5e-05, + "loss": 1.1098, + "num_input_tokens_seen": 537388204, + "step": 8034 + }, + { + "epoch": 0.9116595744680851, + "loss": 1.0326073169708252, + "loss_ce": 0.004775196313858032, + "loss_iou": 0.400390625, + "loss_num": 0.045166015625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 537388204, + "step": 8034 + }, + { + "epoch": 0.9117730496453901, + "grad_norm": 49.6676025390625, + "learning_rate": 5e-05, + "loss": 1.1626, + "num_input_tokens_seen": 537454740, + "step": 8035 + }, + { + "epoch": 0.9117730496453901, + "loss": 0.9873941540718079, + "loss_ce": 0.0049723070114851, + "loss_iou": 0.439453125, + "loss_num": 0.0205078125, + "loss_xval": 0.984375, + "num_input_tokens_seen": 537454740, + "step": 8035 + }, + { + "epoch": 0.911886524822695, + "grad_norm": 34.557472229003906, + "learning_rate": 5e-05, + "loss": 1.2264, + "num_input_tokens_seen": 537521648, + "step": 8036 + }, + { + "epoch": 0.911886524822695, + "loss": 1.077714443206787, + "loss_ce": 0.005937070120126009, + "loss_iou": 0.466796875, + "loss_num": 0.027099609375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 537521648, + "step": 8036 + }, + { + "epoch": 0.912, + "grad_norm": 20.651660919189453, + "learning_rate": 5e-05, + "loss": 1.2213, + "num_input_tokens_seen": 537589536, + "step": 8037 + }, + { + "epoch": 0.912, + "loss": 1.1673948764801025, + "loss_ce": 0.005773710086941719, + "loss_iou": 0.4765625, + "loss_num": 0.04150390625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 537589536, + "step": 8037 + }, + { + "epoch": 0.912113475177305, + "grad_norm": 24.29839515686035, + "learning_rate": 5e-05, + "loss": 1.1437, + "num_input_tokens_seen": 537657332, + "step": 8038 + }, + { + "epoch": 0.912113475177305, + "loss": 1.272611141204834, + "loss_ce": 0.006009509786963463, + "loss_iou": 0.51171875, + "loss_num": 0.049072265625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 537657332, + "step": 8038 + }, + { + "epoch": 0.9122269503546099, + "grad_norm": 23.386144638061523, + "learning_rate": 5e-05, + "loss": 1.1547, + "num_input_tokens_seen": 537724116, + "step": 8039 + }, + { + "epoch": 0.9122269503546099, + "loss": 1.3690404891967773, + "loss_ce": 0.004782658535987139, + "loss_iou": 0.52734375, + "loss_num": 0.06298828125, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 537724116, + "step": 8039 + }, + { + "epoch": 0.9123404255319149, + "grad_norm": 37.787269592285156, + "learning_rate": 5e-05, + "loss": 1.2686, + "num_input_tokens_seen": 537792032, + "step": 8040 + }, + { + "epoch": 0.9123404255319149, + "loss": 1.2068722248077393, + "loss_ce": 0.009118231013417244, + "loss_iou": 0.48828125, + "loss_num": 0.04443359375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 537792032, + "step": 8040 + }, + { + "epoch": 0.9124539007092198, + "grad_norm": 16.847270965576172, + "learning_rate": 5e-05, + "loss": 1.1533, + "num_input_tokens_seen": 537858892, + "step": 8041 + }, + { + "epoch": 0.9124539007092198, + "loss": 1.2051607370376587, + "loss_ce": 0.003988896030932665, + "loss_iou": 0.50390625, + "loss_num": 0.0380859375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 537858892, + "step": 8041 + }, + { + "epoch": 0.9125673758865248, + "grad_norm": 12.988758087158203, + "learning_rate": 5e-05, + "loss": 0.9233, + "num_input_tokens_seen": 537926328, + "step": 8042 + }, + { + "epoch": 0.9125673758865248, + "loss": 0.846403956413269, + "loss_ce": 0.006071914918720722, + "loss_iou": 0.37890625, + "loss_num": 0.0166015625, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 537926328, + "step": 8042 + }, + { + "epoch": 0.9126808510638298, + "grad_norm": 33.0661506652832, + "learning_rate": 5e-05, + "loss": 1.1431, + "num_input_tokens_seen": 537994160, + "step": 8043 + }, + { + "epoch": 0.9126808510638298, + "loss": 1.363337516784668, + "loss_ce": 0.008845453150570393, + "loss_iou": 0.51953125, + "loss_num": 0.0634765625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 537994160, + "step": 8043 + }, + { + "epoch": 0.9127943262411348, + "grad_norm": 24.829710006713867, + "learning_rate": 5e-05, + "loss": 1.0827, + "num_input_tokens_seen": 538060776, + "step": 8044 + }, + { + "epoch": 0.9127943262411348, + "loss": 1.056772232055664, + "loss_ce": 0.006967535708099604, + "loss_iou": 0.4453125, + "loss_num": 0.03173828125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 538060776, + "step": 8044 + }, + { + "epoch": 0.9129078014184397, + "grad_norm": 33.81443405151367, + "learning_rate": 5e-05, + "loss": 1.2066, + "num_input_tokens_seen": 538128468, + "step": 8045 + }, + { + "epoch": 0.9129078014184397, + "loss": 1.0112214088439941, + "loss_ce": 0.006338565610349178, + "loss_iou": 0.451171875, + "loss_num": 0.020263671875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 538128468, + "step": 8045 + }, + { + "epoch": 0.9130212765957447, + "grad_norm": 33.00640869140625, + "learning_rate": 5e-05, + "loss": 1.0631, + "num_input_tokens_seen": 538194380, + "step": 8046 + }, + { + "epoch": 0.9130212765957447, + "loss": 0.9987494945526123, + "loss_ce": 0.004486756399273872, + "loss_iou": 0.408203125, + "loss_num": 0.03564453125, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 538194380, + "step": 8046 + }, + { + "epoch": 0.9131347517730496, + "grad_norm": 29.42576026916504, + "learning_rate": 5e-05, + "loss": 1.2137, + "num_input_tokens_seen": 538261968, + "step": 8047 + }, + { + "epoch": 0.9131347517730496, + "loss": 1.3290767669677734, + "loss_ce": 0.004857921972870827, + "loss_iou": 0.55078125, + "loss_num": 0.044677734375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 538261968, + "step": 8047 + }, + { + "epoch": 0.9132482269503546, + "grad_norm": 35.599674224853516, + "learning_rate": 5e-05, + "loss": 1.022, + "num_input_tokens_seen": 538327912, + "step": 8048 + }, + { + "epoch": 0.9132482269503546, + "loss": 1.2114921808242798, + "loss_ce": 0.005925716832280159, + "loss_iou": 0.451171875, + "loss_num": 0.060546875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 538327912, + "step": 8048 + }, + { + "epoch": 0.9133617021276595, + "grad_norm": 23.172578811645508, + "learning_rate": 5e-05, + "loss": 0.9933, + "num_input_tokens_seen": 538394524, + "step": 8049 + }, + { + "epoch": 0.9133617021276595, + "loss": 1.0117573738098145, + "loss_ce": 0.005898030940443277, + "loss_iou": 0.41015625, + "loss_num": 0.037353515625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 538394524, + "step": 8049 + }, + { + "epoch": 0.9134751773049645, + "grad_norm": 21.071557998657227, + "learning_rate": 5e-05, + "loss": 1.2445, + "num_input_tokens_seen": 538461612, + "step": 8050 + }, + { + "epoch": 0.9134751773049645, + "loss": 1.2376610040664673, + "loss_ce": 0.0037743267603218555, + "loss_iou": 0.474609375, + "loss_num": 0.056640625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 538461612, + "step": 8050 + }, + { + "epoch": 0.9135886524822695, + "grad_norm": 25.288969039916992, + "learning_rate": 5e-05, + "loss": 1.1487, + "num_input_tokens_seen": 538529380, + "step": 8051 + }, + { + "epoch": 0.9135886524822695, + "loss": 1.0362486839294434, + "loss_ce": 0.009149023331701756, + "loss_iou": 0.41015625, + "loss_num": 0.041259765625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 538529380, + "step": 8051 + }, + { + "epoch": 0.9137021276595745, + "grad_norm": 41.13090515136719, + "learning_rate": 5e-05, + "loss": 1.2809, + "num_input_tokens_seen": 538596468, + "step": 8052 + }, + { + "epoch": 0.9137021276595745, + "loss": 1.3367178440093994, + "loss_ce": 0.0032217265106737614, + "loss_iou": 0.5078125, + "loss_num": 0.06298828125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 538596468, + "step": 8052 + }, + { + "epoch": 0.9138156028368795, + "grad_norm": 40.34462356567383, + "learning_rate": 5e-05, + "loss": 1.3395, + "num_input_tokens_seen": 538662760, + "step": 8053 + }, + { + "epoch": 0.9138156028368795, + "loss": 1.289419412612915, + "loss_ce": 0.0062162624672055244, + "loss_iou": 0.5546875, + "loss_num": 0.034423828125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 538662760, + "step": 8053 + }, + { + "epoch": 0.9139290780141844, + "grad_norm": 33.607608795166016, + "learning_rate": 5e-05, + "loss": 1.2036, + "num_input_tokens_seen": 538729904, + "step": 8054 + }, + { + "epoch": 0.9139290780141844, + "loss": 1.0360474586486816, + "loss_ce": 0.005285808350890875, + "loss_iou": 0.4296875, + "loss_num": 0.03466796875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 538729904, + "step": 8054 + }, + { + "epoch": 0.9140425531914894, + "grad_norm": 38.017704010009766, + "learning_rate": 5e-05, + "loss": 1.3485, + "num_input_tokens_seen": 538796464, + "step": 8055 + }, + { + "epoch": 0.9140425531914894, + "loss": 1.3950021266937256, + "loss_ce": 0.006818563677370548, + "loss_iou": 0.55859375, + "loss_num": 0.0537109375, + "loss_xval": 1.390625, + "num_input_tokens_seen": 538796464, + "step": 8055 + }, + { + "epoch": 0.9141560283687943, + "grad_norm": 43.73765563964844, + "learning_rate": 5e-05, + "loss": 1.1434, + "num_input_tokens_seen": 538863096, + "step": 8056 + }, + { + "epoch": 0.9141560283687943, + "loss": 1.0520366430282593, + "loss_ce": 0.0041850293055176735, + "loss_iou": 0.4375, + "loss_num": 0.034912109375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 538863096, + "step": 8056 + }, + { + "epoch": 0.9142695035460993, + "grad_norm": 29.79226303100586, + "learning_rate": 5e-05, + "loss": 1.1165, + "num_input_tokens_seen": 538930100, + "step": 8057 + }, + { + "epoch": 0.9142695035460993, + "loss": 1.2340178489685059, + "loss_ce": 0.007943657226860523, + "loss_iou": 0.498046875, + "loss_num": 0.04541015625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 538930100, + "step": 8057 + }, + { + "epoch": 0.9143829787234042, + "grad_norm": 24.372732162475586, + "learning_rate": 5e-05, + "loss": 1.1931, + "num_input_tokens_seen": 538996784, + "step": 8058 + }, + { + "epoch": 0.9143829787234042, + "loss": 1.2643308639526367, + "loss_ce": 0.005053543485701084, + "loss_iou": 0.5234375, + "loss_num": 0.043212890625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 538996784, + "step": 8058 + }, + { + "epoch": 0.9144964539007092, + "grad_norm": 27.450803756713867, + "learning_rate": 5e-05, + "loss": 1.163, + "num_input_tokens_seen": 539062860, + "step": 8059 + }, + { + "epoch": 0.9144964539007092, + "loss": 1.129260540008545, + "loss_ce": 0.004748746752738953, + "loss_iou": 0.44140625, + "loss_num": 0.04833984375, + "loss_xval": 1.125, + "num_input_tokens_seen": 539062860, + "step": 8059 + }, + { + "epoch": 0.9146099290780142, + "grad_norm": 20.37662696838379, + "learning_rate": 5e-05, + "loss": 1.2966, + "num_input_tokens_seen": 539129828, + "step": 8060 + }, + { + "epoch": 0.9146099290780142, + "loss": 1.2285218238830566, + "loss_ce": 0.010748401284217834, + "loss_iou": 0.498046875, + "loss_num": 0.04443359375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 539129828, + "step": 8060 + }, + { + "epoch": 0.9147234042553192, + "grad_norm": 19.351490020751953, + "learning_rate": 5e-05, + "loss": 1.1647, + "num_input_tokens_seen": 539196604, + "step": 8061 + }, + { + "epoch": 0.9147234042553192, + "loss": 1.173569679260254, + "loss_ce": 0.004624355584383011, + "loss_iou": 0.46484375, + "loss_num": 0.0478515625, + "loss_xval": 1.171875, + "num_input_tokens_seen": 539196604, + "step": 8061 + }, + { + "epoch": 0.9148368794326242, + "grad_norm": 26.800329208374023, + "learning_rate": 5e-05, + "loss": 1.1419, + "num_input_tokens_seen": 539264292, + "step": 8062 + }, + { + "epoch": 0.9148368794326242, + "loss": 1.1243064403533936, + "loss_ce": 0.004189200699329376, + "loss_iou": 0.470703125, + "loss_num": 0.03564453125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 539264292, + "step": 8062 + }, + { + "epoch": 0.9149503546099291, + "grad_norm": 19.388952255249023, + "learning_rate": 5e-05, + "loss": 1.222, + "num_input_tokens_seen": 539330548, + "step": 8063 + }, + { + "epoch": 0.9149503546099291, + "loss": 1.2733820676803589, + "loss_ce": 0.013128168880939484, + "loss_iou": 0.443359375, + "loss_num": 0.07470703125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 539330548, + "step": 8063 + }, + { + "epoch": 0.915063829787234, + "grad_norm": 23.088417053222656, + "learning_rate": 5e-05, + "loss": 1.1097, + "num_input_tokens_seen": 539397984, + "step": 8064 + }, + { + "epoch": 0.915063829787234, + "loss": 1.0417571067810059, + "loss_ce": 0.007089177146553993, + "loss_iou": 0.42578125, + "loss_num": 0.037109375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 539397984, + "step": 8064 + }, + { + "epoch": 0.915177304964539, + "grad_norm": 20.766258239746094, + "learning_rate": 5e-05, + "loss": 1.0972, + "num_input_tokens_seen": 539465228, + "step": 8065 + }, + { + "epoch": 0.915177304964539, + "loss": 1.031905174255371, + "loss_ce": 0.006026195362210274, + "loss_iou": 0.44921875, + "loss_num": 0.025390625, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 539465228, + "step": 8065 + }, + { + "epoch": 0.915290780141844, + "grad_norm": 17.55408477783203, + "learning_rate": 5e-05, + "loss": 1.2614, + "num_input_tokens_seen": 539532352, + "step": 8066 + }, + { + "epoch": 0.915290780141844, + "loss": 1.1511213779449463, + "loss_ce": 0.002439789939671755, + "loss_iou": 0.447265625, + "loss_num": 0.051025390625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 539532352, + "step": 8066 + }, + { + "epoch": 0.9154042553191489, + "grad_norm": 24.689931869506836, + "learning_rate": 5e-05, + "loss": 1.1231, + "num_input_tokens_seen": 539598928, + "step": 8067 + }, + { + "epoch": 0.9154042553191489, + "loss": 0.9193532466888428, + "loss_ce": 0.00907492358237505, + "loss_iou": 0.376953125, + "loss_num": 0.031494140625, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 539598928, + "step": 8067 + }, + { + "epoch": 0.9155177304964539, + "grad_norm": 57.79139709472656, + "learning_rate": 5e-05, + "loss": 1.2365, + "num_input_tokens_seen": 539665840, + "step": 8068 + }, + { + "epoch": 0.9155177304964539, + "loss": 1.283647060394287, + "loss_ce": 0.007279911078512669, + "loss_iou": 0.49609375, + "loss_num": 0.056884765625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 539665840, + "step": 8068 + }, + { + "epoch": 0.9156312056737589, + "grad_norm": 28.753870010375977, + "learning_rate": 5e-05, + "loss": 1.1892, + "num_input_tokens_seen": 539733240, + "step": 8069 + }, + { + "epoch": 0.9156312056737589, + "loss": 1.0580923557281494, + "loss_ce": 0.0068227434530854225, + "loss_iou": 0.435546875, + "loss_num": 0.0361328125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 539733240, + "step": 8069 + }, + { + "epoch": 0.9157446808510639, + "grad_norm": 37.728660583496094, + "learning_rate": 5e-05, + "loss": 1.3074, + "num_input_tokens_seen": 539801344, + "step": 8070 + }, + { + "epoch": 0.9157446808510639, + "loss": 1.1973049640655518, + "loss_ce": 0.0029690556693822145, + "loss_iou": 0.51171875, + "loss_num": 0.0341796875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 539801344, + "step": 8070 + }, + { + "epoch": 0.9158581560283688, + "grad_norm": 40.08733367919922, + "learning_rate": 5e-05, + "loss": 1.348, + "num_input_tokens_seen": 539867748, + "step": 8071 + }, + { + "epoch": 0.9158581560283688, + "loss": 1.5387533903121948, + "loss_ce": 0.011897964403033257, + "loss_iou": 0.609375, + "loss_num": 0.061767578125, + "loss_xval": 1.5234375, + "num_input_tokens_seen": 539867748, + "step": 8071 + }, + { + "epoch": 0.9159716312056737, + "grad_norm": 25.673215866088867, + "learning_rate": 5e-05, + "loss": 1.3526, + "num_input_tokens_seen": 539934372, + "step": 8072 + }, + { + "epoch": 0.9159716312056737, + "loss": 1.2919917106628418, + "loss_ce": 0.005858942400664091, + "loss_iou": 0.53125, + "loss_num": 0.045654296875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 539934372, + "step": 8072 + }, + { + "epoch": 0.9160851063829787, + "grad_norm": 23.691137313842773, + "learning_rate": 5e-05, + "loss": 1.2919, + "num_input_tokens_seen": 540000940, + "step": 8073 + }, + { + "epoch": 0.9160851063829787, + "loss": 1.20361328125, + "loss_ce": 0.00927728321403265, + "loss_iou": 0.515625, + "loss_num": 0.0322265625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 540000940, + "step": 8073 + }, + { + "epoch": 0.9161985815602837, + "grad_norm": 30.411935806274414, + "learning_rate": 5e-05, + "loss": 1.3266, + "num_input_tokens_seen": 540067744, + "step": 8074 + }, + { + "epoch": 0.9161985815602837, + "loss": 1.4872384071350098, + "loss_ce": 0.008722718805074692, + "loss_iou": 0.58203125, + "loss_num": 0.0625, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 540067744, + "step": 8074 + }, + { + "epoch": 0.9163120567375886, + "grad_norm": 27.650720596313477, + "learning_rate": 5e-05, + "loss": 1.2453, + "num_input_tokens_seen": 540135260, + "step": 8075 + }, + { + "epoch": 0.9163120567375886, + "loss": 1.3388229608535767, + "loss_ce": 0.005326804704964161, + "loss_iou": 0.5625, + "loss_num": 0.04248046875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 540135260, + "step": 8075 + }, + { + "epoch": 0.9164255319148936, + "grad_norm": 17.48399543762207, + "learning_rate": 5e-05, + "loss": 0.9569, + "num_input_tokens_seen": 540202368, + "step": 8076 + }, + { + "epoch": 0.9164255319148936, + "loss": 0.8304520845413208, + "loss_ce": 0.004524338524788618, + "loss_iou": 0.359375, + "loss_num": 0.0216064453125, + "loss_xval": 0.82421875, + "num_input_tokens_seen": 540202368, + "step": 8076 + }, + { + "epoch": 0.9165390070921986, + "grad_norm": 39.218482971191406, + "learning_rate": 5e-05, + "loss": 1.0896, + "num_input_tokens_seen": 540268808, + "step": 8077 + }, + { + "epoch": 0.9165390070921986, + "loss": 0.9230691194534302, + "loss_ce": 0.007541817147284746, + "loss_iou": 0.349609375, + "loss_num": 0.04296875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 540268808, + "step": 8077 + }, + { + "epoch": 0.9166524822695036, + "grad_norm": 19.79063606262207, + "learning_rate": 5e-05, + "loss": 1.0608, + "num_input_tokens_seen": 540336208, + "step": 8078 + }, + { + "epoch": 0.9166524822695036, + "loss": 1.1054991483688354, + "loss_ce": 0.0044249361380934715, + "loss_iou": 0.4609375, + "loss_num": 0.03564453125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 540336208, + "step": 8078 + }, + { + "epoch": 0.9167659574468086, + "grad_norm": 33.950904846191406, + "learning_rate": 5e-05, + "loss": 1.2573, + "num_input_tokens_seen": 540402592, + "step": 8079 + }, + { + "epoch": 0.9167659574468086, + "loss": 1.405164361000061, + "loss_ce": 0.0033088275231420994, + "loss_iou": 0.55859375, + "loss_num": 0.056640625, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 540402592, + "step": 8079 + }, + { + "epoch": 0.9168794326241134, + "grad_norm": 33.67524337768555, + "learning_rate": 5e-05, + "loss": 1.2816, + "num_input_tokens_seen": 540469536, + "step": 8080 + }, + { + "epoch": 0.9168794326241134, + "loss": 1.2364389896392822, + "loss_ce": 0.004017087165266275, + "loss_iou": 0.515625, + "loss_num": 0.040283203125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 540469536, + "step": 8080 + }, + { + "epoch": 0.9169929078014184, + "grad_norm": 28.876291275024414, + "learning_rate": 5e-05, + "loss": 0.9299, + "num_input_tokens_seen": 540536856, + "step": 8081 + }, + { + "epoch": 0.9169929078014184, + "loss": 1.068364143371582, + "loss_ce": 0.009648299776017666, + "loss_iou": 0.419921875, + "loss_num": 0.0439453125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 540536856, + "step": 8081 + }, + { + "epoch": 0.9171063829787234, + "grad_norm": 32.97058868408203, + "learning_rate": 5e-05, + "loss": 1.1197, + "num_input_tokens_seen": 540603168, + "step": 8082 + }, + { + "epoch": 0.9171063829787234, + "loss": 1.083646297454834, + "loss_ce": 0.0060096196830272675, + "loss_iou": 0.427734375, + "loss_num": 0.044189453125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 540603168, + "step": 8082 + }, + { + "epoch": 0.9172198581560284, + "grad_norm": 44.73435592651367, + "learning_rate": 5e-05, + "loss": 1.4124, + "num_input_tokens_seen": 540670532, + "step": 8083 + }, + { + "epoch": 0.9172198581560284, + "loss": 1.2999447584152222, + "loss_ce": 0.005511188413947821, + "loss_iou": 0.5234375, + "loss_num": 0.0498046875, + "loss_xval": 1.296875, + "num_input_tokens_seen": 540670532, + "step": 8083 + }, + { + "epoch": 0.9173333333333333, + "grad_norm": 38.43854904174805, + "learning_rate": 5e-05, + "loss": 1.3566, + "num_input_tokens_seen": 540737204, + "step": 8084 + }, + { + "epoch": 0.9173333333333333, + "loss": 1.484583854675293, + "loss_ce": 0.00899791531264782, + "loss_iou": 0.62890625, + "loss_num": 0.04345703125, + "loss_xval": 1.4765625, + "num_input_tokens_seen": 540737204, + "step": 8084 + }, + { + "epoch": 0.9174468085106383, + "grad_norm": 12.956910133361816, + "learning_rate": 5e-05, + "loss": 1.2785, + "num_input_tokens_seen": 540803836, + "step": 8085 + }, + { + "epoch": 0.9174468085106383, + "loss": 1.3328194618225098, + "loss_ce": 0.008600806817412376, + "loss_iou": 0.490234375, + "loss_num": 0.0693359375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 540803836, + "step": 8085 + }, + { + "epoch": 0.9175602836879433, + "grad_norm": 11.869109153747559, + "learning_rate": 5e-05, + "loss": 1.1753, + "num_input_tokens_seen": 540870548, + "step": 8086 + }, + { + "epoch": 0.9175602836879433, + "loss": 1.1520617008209229, + "loss_ce": 0.005577371455729008, + "loss_iou": 0.453125, + "loss_num": 0.048095703125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 540870548, + "step": 8086 + }, + { + "epoch": 0.9176737588652483, + "grad_norm": 22.781211853027344, + "learning_rate": 5e-05, + "loss": 1.0032, + "num_input_tokens_seen": 540937264, + "step": 8087 + }, + { + "epoch": 0.9176737588652483, + "loss": 0.974671483039856, + "loss_ce": 0.0051891133189201355, + "loss_iou": 0.40234375, + "loss_num": 0.03271484375, + "loss_xval": 0.96875, + "num_input_tokens_seen": 540937264, + "step": 8087 + }, + { + "epoch": 0.9177872340425532, + "grad_norm": 38.54653549194336, + "learning_rate": 5e-05, + "loss": 1.0748, + "num_input_tokens_seen": 541003740, + "step": 8088 + }, + { + "epoch": 0.9177872340425532, + "loss": 0.9936201572418213, + "loss_ce": 0.0019209638703614473, + "loss_iou": 0.453125, + "loss_num": 0.0172119140625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 541003740, + "step": 8088 + }, + { + "epoch": 0.9179007092198581, + "grad_norm": 61.84878158569336, + "learning_rate": 5e-05, + "loss": 1.3564, + "num_input_tokens_seen": 541069860, + "step": 8089 + }, + { + "epoch": 0.9179007092198581, + "loss": 1.379051685333252, + "loss_ce": 0.00893445871770382, + "loss_iou": 0.5859375, + "loss_num": 0.039794921875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 541069860, + "step": 8089 + }, + { + "epoch": 0.9180141843971631, + "grad_norm": 45.83306121826172, + "learning_rate": 5e-05, + "loss": 1.5465, + "num_input_tokens_seen": 541137400, + "step": 8090 + }, + { + "epoch": 0.9180141843971631, + "loss": 1.6076016426086426, + "loss_ce": 0.0040859654545784, + "loss_iou": 0.6484375, + "loss_num": 0.061279296875, + "loss_xval": 1.6015625, + "num_input_tokens_seen": 541137400, + "step": 8090 + }, + { + "epoch": 0.9181276595744681, + "grad_norm": 15.08241081237793, + "learning_rate": 5e-05, + "loss": 1.2231, + "num_input_tokens_seen": 541204260, + "step": 8091 + }, + { + "epoch": 0.9181276595744681, + "loss": 1.2144948244094849, + "loss_ce": 0.006975308060646057, + "loss_iou": 0.494140625, + "loss_num": 0.044189453125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 541204260, + "step": 8091 + }, + { + "epoch": 0.918241134751773, + "grad_norm": 17.291614532470703, + "learning_rate": 5e-05, + "loss": 1.1373, + "num_input_tokens_seen": 541272208, + "step": 8092 + }, + { + "epoch": 0.918241134751773, + "loss": 1.0540615320205688, + "loss_ce": 0.004745098762214184, + "loss_iou": 0.453125, + "loss_num": 0.028564453125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 541272208, + "step": 8092 + }, + { + "epoch": 0.918354609929078, + "grad_norm": 14.045452117919922, + "learning_rate": 5e-05, + "loss": 1.1918, + "num_input_tokens_seen": 541339448, + "step": 8093 + }, + { + "epoch": 0.918354609929078, + "loss": 1.0687501430511475, + "loss_ce": 0.00502947298809886, + "loss_iou": 0.412109375, + "loss_num": 0.04736328125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 541339448, + "step": 8093 + }, + { + "epoch": 0.918468085106383, + "grad_norm": 16.710887908935547, + "learning_rate": 5e-05, + "loss": 1.0176, + "num_input_tokens_seen": 541406188, + "step": 8094 + }, + { + "epoch": 0.918468085106383, + "loss": 1.0034990310668945, + "loss_ce": 0.009602585807442665, + "loss_iou": 0.4140625, + "loss_num": 0.032958984375, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 541406188, + "step": 8094 + }, + { + "epoch": 0.918581560283688, + "grad_norm": 42.3673210144043, + "learning_rate": 5e-05, + "loss": 1.1492, + "num_input_tokens_seen": 541473260, + "step": 8095 + }, + { + "epoch": 0.918581560283688, + "loss": 1.2675906419754028, + "loss_ce": 0.015149256214499474, + "loss_iou": 0.4609375, + "loss_num": 0.06640625, + "loss_xval": 1.25, + "num_input_tokens_seen": 541473260, + "step": 8095 + }, + { + "epoch": 0.918695035460993, + "grad_norm": 35.26059341430664, + "learning_rate": 5e-05, + "loss": 1.1415, + "num_input_tokens_seen": 541539960, + "step": 8096 + }, + { + "epoch": 0.918695035460993, + "loss": 1.1701797246932983, + "loss_ce": 0.009046928025782108, + "loss_iou": 0.50390625, + "loss_num": 0.0308837890625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 541539960, + "step": 8096 + }, + { + "epoch": 0.9188085106382978, + "grad_norm": 16.935867309570312, + "learning_rate": 5e-05, + "loss": 0.9422, + "num_input_tokens_seen": 541605184, + "step": 8097 + }, + { + "epoch": 0.9188085106382978, + "loss": 1.0068449974060059, + "loss_ce": 0.0014738771133124828, + "loss_iou": 0.41796875, + "loss_num": 0.033935546875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 541605184, + "step": 8097 + }, + { + "epoch": 0.9189219858156028, + "grad_norm": 33.529197692871094, + "learning_rate": 5e-05, + "loss": 0.904, + "num_input_tokens_seen": 541671432, + "step": 8098 + }, + { + "epoch": 0.9189219858156028, + "loss": 1.0215401649475098, + "loss_ce": 0.006403512787073851, + "loss_iou": 0.421875, + "loss_num": 0.033935546875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 541671432, + "step": 8098 + }, + { + "epoch": 0.9190354609929078, + "grad_norm": 53.269386291503906, + "learning_rate": 5e-05, + "loss": 1.1547, + "num_input_tokens_seen": 541738092, + "step": 8099 + }, + { + "epoch": 0.9190354609929078, + "loss": 1.1470189094543457, + "loss_ce": 0.003952566999942064, + "loss_iou": 0.455078125, + "loss_num": 0.04638671875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 541738092, + "step": 8099 + }, + { + "epoch": 0.9191489361702128, + "grad_norm": 17.940776824951172, + "learning_rate": 5e-05, + "loss": 1.0063, + "num_input_tokens_seen": 541804056, + "step": 8100 + }, + { + "epoch": 0.9191489361702128, + "loss": 1.0357518196105957, + "loss_ce": 0.00474590715020895, + "loss_iou": 0.416015625, + "loss_num": 0.039794921875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 541804056, + "step": 8100 + }, + { + "epoch": 0.9192624113475177, + "grad_norm": 8.009352684020996, + "learning_rate": 5e-05, + "loss": 1.213, + "num_input_tokens_seen": 541871132, + "step": 8101 + }, + { + "epoch": 0.9192624113475177, + "loss": 1.2943774461746216, + "loss_ce": 0.00629153847694397, + "loss_iou": 0.5078125, + "loss_num": 0.054931640625, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 541871132, + "step": 8101 + }, + { + "epoch": 0.9193758865248227, + "grad_norm": 36.11280822753906, + "learning_rate": 5e-05, + "loss": 1.0687, + "num_input_tokens_seen": 541937580, + "step": 8102 + }, + { + "epoch": 0.9193758865248227, + "loss": 1.134916067123413, + "loss_ce": 0.009427713230252266, + "loss_iou": 0.412109375, + "loss_num": 0.060546875, + "loss_xval": 1.125, + "num_input_tokens_seen": 541937580, + "step": 8102 + }, + { + "epoch": 0.9194893617021277, + "grad_norm": 29.28401756286621, + "learning_rate": 5e-05, + "loss": 0.9398, + "num_input_tokens_seen": 542003688, + "step": 8103 + }, + { + "epoch": 0.9194893617021277, + "loss": 1.018993854522705, + "loss_ce": 0.0038571867626160383, + "loss_iou": 0.43359375, + "loss_num": 0.029296875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 542003688, + "step": 8103 + }, + { + "epoch": 0.9196028368794327, + "grad_norm": 40.89104461669922, + "learning_rate": 5e-05, + "loss": 1.3415, + "num_input_tokens_seen": 542070776, + "step": 8104 + }, + { + "epoch": 0.9196028368794327, + "loss": 1.3791484832763672, + "loss_ce": 0.009031224064528942, + "loss_iou": 0.53515625, + "loss_num": 0.06005859375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 542070776, + "step": 8104 + }, + { + "epoch": 0.9197163120567375, + "grad_norm": 49.91361999511719, + "learning_rate": 5e-05, + "loss": 1.2976, + "num_input_tokens_seen": 542137864, + "step": 8105 + }, + { + "epoch": 0.9197163120567375, + "loss": 1.1550406217575073, + "loss_ce": 0.0045889876782894135, + "loss_iou": 0.474609375, + "loss_num": 0.0400390625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 542137864, + "step": 8105 + }, + { + "epoch": 0.9198297872340425, + "grad_norm": 31.125001907348633, + "learning_rate": 5e-05, + "loss": 1.251, + "num_input_tokens_seen": 542205092, + "step": 8106 + }, + { + "epoch": 0.9198297872340425, + "loss": 1.2931574583053589, + "loss_ce": 0.009954363107681274, + "loss_iou": 0.48046875, + "loss_num": 0.064453125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 542205092, + "step": 8106 + }, + { + "epoch": 0.9199432624113475, + "grad_norm": 35.4841194152832, + "learning_rate": 5e-05, + "loss": 0.9941, + "num_input_tokens_seen": 542271916, + "step": 8107 + }, + { + "epoch": 0.9199432624113475, + "loss": 1.0005723237991333, + "loss_ce": 0.0074083078652620316, + "loss_iou": 0.416015625, + "loss_num": 0.0322265625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 542271916, + "step": 8107 + }, + { + "epoch": 0.9200567375886525, + "grad_norm": 17.483997344970703, + "learning_rate": 5e-05, + "loss": 1.2606, + "num_input_tokens_seen": 542340492, + "step": 8108 + }, + { + "epoch": 0.9200567375886525, + "loss": 1.4601305723190308, + "loss_ce": 0.0060289958491921425, + "loss_iou": 0.5703125, + "loss_num": 0.0625, + "loss_xval": 1.453125, + "num_input_tokens_seen": 542340492, + "step": 8108 + }, + { + "epoch": 0.9201702127659575, + "grad_norm": 20.285795211791992, + "learning_rate": 5e-05, + "loss": 1.0936, + "num_input_tokens_seen": 542407280, + "step": 8109 + }, + { + "epoch": 0.9201702127659575, + "loss": 1.0322070121765137, + "loss_ce": 0.005839758552610874, + "loss_iou": 0.40625, + "loss_num": 0.04296875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 542407280, + "step": 8109 + }, + { + "epoch": 0.9202836879432624, + "grad_norm": 22.902589797973633, + "learning_rate": 5e-05, + "loss": 1.1636, + "num_input_tokens_seen": 542474392, + "step": 8110 + }, + { + "epoch": 0.9202836879432624, + "loss": 1.0183179378509521, + "loss_ce": 0.00611093221232295, + "loss_iou": 0.388671875, + "loss_num": 0.047119140625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 542474392, + "step": 8110 + }, + { + "epoch": 0.9203971631205674, + "grad_norm": 30.702363967895508, + "learning_rate": 5e-05, + "loss": 1.2129, + "num_input_tokens_seen": 542540864, + "step": 8111 + }, + { + "epoch": 0.9203971631205674, + "loss": 1.1959537267684937, + "loss_ce": 0.0065006110817193985, + "loss_iou": 0.484375, + "loss_num": 0.044189453125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 542540864, + "step": 8111 + }, + { + "epoch": 0.9205106382978724, + "grad_norm": 24.4227294921875, + "learning_rate": 5e-05, + "loss": 1.3913, + "num_input_tokens_seen": 542606828, + "step": 8112 + }, + { + "epoch": 0.9205106382978724, + "loss": 1.3588099479675293, + "loss_ce": 0.005294273607432842, + "loss_iou": 0.5234375, + "loss_num": 0.0615234375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 542606828, + "step": 8112 + }, + { + "epoch": 0.9206241134751773, + "grad_norm": 20.288347244262695, + "learning_rate": 5e-05, + "loss": 1.1025, + "num_input_tokens_seen": 542672780, + "step": 8113 + }, + { + "epoch": 0.9206241134751773, + "loss": 1.3435876369476318, + "loss_ce": 0.006917671300470829, + "loss_iou": 0.51171875, + "loss_num": 0.062255859375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 542672780, + "step": 8113 + }, + { + "epoch": 0.9207375886524822, + "grad_norm": 109.24906921386719, + "learning_rate": 5e-05, + "loss": 1.1263, + "num_input_tokens_seen": 542739576, + "step": 8114 + }, + { + "epoch": 0.9207375886524822, + "loss": 1.0088205337524414, + "loss_ce": 0.00833229348063469, + "loss_iou": 0.41796875, + "loss_num": 0.033203125, + "loss_xval": 1.0, + "num_input_tokens_seen": 542739576, + "step": 8114 + }, + { + "epoch": 0.9208510638297872, + "grad_norm": 21.412416458129883, + "learning_rate": 5e-05, + "loss": 1.043, + "num_input_tokens_seen": 542806604, + "step": 8115 + }, + { + "epoch": 0.9208510638297872, + "loss": 0.9334495067596436, + "loss_ce": 0.006203400902450085, + "loss_iou": 0.39453125, + "loss_num": 0.02783203125, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 542806604, + "step": 8115 + }, + { + "epoch": 0.9209645390070922, + "grad_norm": 23.50920867919922, + "learning_rate": 5e-05, + "loss": 1.1859, + "num_input_tokens_seen": 542873768, + "step": 8116 + }, + { + "epoch": 0.9209645390070922, + "loss": 1.1683413982391357, + "loss_ce": 0.008673425763845444, + "loss_iou": 0.38671875, + "loss_num": 0.0771484375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 542873768, + "step": 8116 + }, + { + "epoch": 0.9210780141843972, + "grad_norm": 25.791885375976562, + "learning_rate": 5e-05, + "loss": 1.1539, + "num_input_tokens_seen": 542941044, + "step": 8117 + }, + { + "epoch": 0.9210780141843972, + "loss": 1.2318894863128662, + "loss_ce": 0.005571110174059868, + "loss_iou": 0.46484375, + "loss_num": 0.059326171875, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 542941044, + "step": 8117 + }, + { + "epoch": 0.9211914893617021, + "grad_norm": 29.391094207763672, + "learning_rate": 5e-05, + "loss": 1.1085, + "num_input_tokens_seen": 543008776, + "step": 8118 + }, + { + "epoch": 0.9211914893617021, + "loss": 1.0395370721817017, + "loss_ce": 0.00438082218170166, + "loss_iou": 0.45703125, + "loss_num": 0.0238037109375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 543008776, + "step": 8118 + }, + { + "epoch": 0.9213049645390071, + "grad_norm": 37.89221954345703, + "learning_rate": 5e-05, + "loss": 1.1624, + "num_input_tokens_seen": 543076196, + "step": 8119 + }, + { + "epoch": 0.9213049645390071, + "loss": 1.2815215587615967, + "loss_ce": 0.007595884148031473, + "loss_iou": 0.5234375, + "loss_num": 0.044921875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 543076196, + "step": 8119 + }, + { + "epoch": 0.9214184397163121, + "grad_norm": 30.998916625976562, + "learning_rate": 5e-05, + "loss": 1.2039, + "num_input_tokens_seen": 543142704, + "step": 8120 + }, + { + "epoch": 0.9214184397163121, + "loss": 1.2530320882797241, + "loss_ce": 0.010356297716498375, + "loss_iou": 0.5390625, + "loss_num": 0.032958984375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 543142704, + "step": 8120 + }, + { + "epoch": 0.9215319148936171, + "grad_norm": 30.331727981567383, + "learning_rate": 5e-05, + "loss": 1.2967, + "num_input_tokens_seen": 543210368, + "step": 8121 + }, + { + "epoch": 0.9215319148936171, + "loss": 1.3060954809188843, + "loss_ce": 0.00482587143778801, + "loss_iou": 0.451171875, + "loss_num": 0.07958984375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 543210368, + "step": 8121 + }, + { + "epoch": 0.9216453900709219, + "grad_norm": 23.326045989990234, + "learning_rate": 5e-05, + "loss": 1.2604, + "num_input_tokens_seen": 543276212, + "step": 8122 + }, + { + "epoch": 0.9216453900709219, + "loss": 1.2744486331939697, + "loss_ce": 0.005405586678534746, + "loss_iou": 0.52734375, + "loss_num": 0.042724609375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 543276212, + "step": 8122 + }, + { + "epoch": 0.9217588652482269, + "grad_norm": 18.984773635864258, + "learning_rate": 5e-05, + "loss": 1.2577, + "num_input_tokens_seen": 543342932, + "step": 8123 + }, + { + "epoch": 0.9217588652482269, + "loss": 1.3216056823730469, + "loss_ce": 0.010570557788014412, + "loss_iou": 0.52734375, + "loss_num": 0.0517578125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 543342932, + "step": 8123 + }, + { + "epoch": 0.9218723404255319, + "grad_norm": 24.176002502441406, + "learning_rate": 5e-05, + "loss": 1.165, + "num_input_tokens_seen": 543410024, + "step": 8124 + }, + { + "epoch": 0.9218723404255319, + "loss": 1.3679797649383545, + "loss_ce": 0.008604750037193298, + "loss_iou": 0.5234375, + "loss_num": 0.062255859375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 543410024, + "step": 8124 + }, + { + "epoch": 0.9219858156028369, + "grad_norm": 48.75239944458008, + "learning_rate": 5e-05, + "loss": 1.2913, + "num_input_tokens_seen": 543477888, + "step": 8125 + }, + { + "epoch": 0.9219858156028369, + "loss": 1.432648777961731, + "loss_ce": 0.004426114726811647, + "loss_iou": 0.52734375, + "loss_num": 0.07470703125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 543477888, + "step": 8125 + }, + { + "epoch": 0.9220992907801419, + "grad_norm": 32.15153503417969, + "learning_rate": 5e-05, + "loss": 1.3593, + "num_input_tokens_seen": 543544428, + "step": 8126 + }, + { + "epoch": 0.9220992907801419, + "loss": 1.4034757614135742, + "loss_ce": 0.003573458408936858, + "loss_iou": 0.57421875, + "loss_num": 0.0498046875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 543544428, + "step": 8126 + }, + { + "epoch": 0.9222127659574468, + "grad_norm": 16.601322174072266, + "learning_rate": 5e-05, + "loss": 1.0425, + "num_input_tokens_seen": 543610880, + "step": 8127 + }, + { + "epoch": 0.9222127659574468, + "loss": 1.0728659629821777, + "loss_ce": 0.005483067594468594, + "loss_iou": 0.38671875, + "loss_num": 0.058837890625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 543610880, + "step": 8127 + }, + { + "epoch": 0.9223262411347518, + "grad_norm": 35.173423767089844, + "learning_rate": 5e-05, + "loss": 1.2598, + "num_input_tokens_seen": 543676888, + "step": 8128 + }, + { + "epoch": 0.9223262411347518, + "loss": 1.4549939632415771, + "loss_ce": 0.009681493043899536, + "loss_iou": 0.59765625, + "loss_num": 0.05078125, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 543676888, + "step": 8128 + }, + { + "epoch": 0.9224397163120568, + "grad_norm": 27.364898681640625, + "learning_rate": 5e-05, + "loss": 1.1769, + "num_input_tokens_seen": 543743420, + "step": 8129 + }, + { + "epoch": 0.9224397163120568, + "loss": 0.9670590162277222, + "loss_ce": 0.004412475973367691, + "loss_iou": 0.419921875, + "loss_num": 0.0247802734375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 543743420, + "step": 8129 + }, + { + "epoch": 0.9225531914893617, + "grad_norm": 19.2812442779541, + "learning_rate": 5e-05, + "loss": 1.0276, + "num_input_tokens_seen": 543809984, + "step": 8130 + }, + { + "epoch": 0.9225531914893617, + "loss": 1.1053744554519653, + "loss_ce": 0.007229946553707123, + "loss_iou": 0.46875, + "loss_num": 0.031982421875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 543809984, + "step": 8130 + }, + { + "epoch": 0.9226666666666666, + "grad_norm": 26.891319274902344, + "learning_rate": 5e-05, + "loss": 1.0682, + "num_input_tokens_seen": 543877080, + "step": 8131 + }, + { + "epoch": 0.9226666666666666, + "loss": 1.1038062572479248, + "loss_ce": 0.003708526026457548, + "loss_iou": 0.4765625, + "loss_num": 0.0294189453125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 543877080, + "step": 8131 + }, + { + "epoch": 0.9227801418439716, + "grad_norm": 39.72121047973633, + "learning_rate": 5e-05, + "loss": 1.2028, + "num_input_tokens_seen": 543943368, + "step": 8132 + }, + { + "epoch": 0.9227801418439716, + "loss": 1.3877758979797363, + "loss_ce": 0.004475119523704052, + "loss_iou": 0.55859375, + "loss_num": 0.052978515625, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 543943368, + "step": 8132 + }, + { + "epoch": 0.9228936170212766, + "grad_norm": 32.43728256225586, + "learning_rate": 5e-05, + "loss": 1.2319, + "num_input_tokens_seen": 544009400, + "step": 8133 + }, + { + "epoch": 0.9228936170212766, + "loss": 1.2228264808654785, + "loss_ce": 0.003588202176615596, + "loss_iou": 0.50390625, + "loss_num": 0.042724609375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 544009400, + "step": 8133 + }, + { + "epoch": 0.9230070921985816, + "grad_norm": 32.94306564331055, + "learning_rate": 5e-05, + "loss": 1.2606, + "num_input_tokens_seen": 544074900, + "step": 8134 + }, + { + "epoch": 0.9230070921985816, + "loss": 1.312777042388916, + "loss_ce": 0.011751607060432434, + "loss_iou": 0.45703125, + "loss_num": 0.0771484375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 544074900, + "step": 8134 + }, + { + "epoch": 0.9231205673758865, + "grad_norm": 22.698698043823242, + "learning_rate": 5e-05, + "loss": 1.1966, + "num_input_tokens_seen": 544141964, + "step": 8135 + }, + { + "epoch": 0.9231205673758865, + "loss": 1.1977708339691162, + "loss_ce": 0.011735632084310055, + "loss_iou": 0.490234375, + "loss_num": 0.041259765625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 544141964, + "step": 8135 + }, + { + "epoch": 0.9232340425531915, + "grad_norm": 26.151498794555664, + "learning_rate": 5e-05, + "loss": 1.0343, + "num_input_tokens_seen": 544210524, + "step": 8136 + }, + { + "epoch": 0.9232340425531915, + "loss": 1.0111138820648193, + "loss_ce": 0.012090405449271202, + "loss_iou": 0.41015625, + "loss_num": 0.035400390625, + "loss_xval": 1.0, + "num_input_tokens_seen": 544210524, + "step": 8136 + }, + { + "epoch": 0.9233475177304965, + "grad_norm": 39.09157943725586, + "learning_rate": 5e-05, + "loss": 1.2708, + "num_input_tokens_seen": 544276972, + "step": 8137 + }, + { + "epoch": 0.9233475177304965, + "loss": 1.2682952880859375, + "loss_ce": 0.01097114011645317, + "loss_iou": 0.5390625, + "loss_num": 0.035888671875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 544276972, + "step": 8137 + }, + { + "epoch": 0.9234609929078014, + "grad_norm": 32.624656677246094, + "learning_rate": 5e-05, + "loss": 1.4352, + "num_input_tokens_seen": 544342224, + "step": 8138 + }, + { + "epoch": 0.9234609929078014, + "loss": 1.4232600927352905, + "loss_ce": 0.00529127474874258, + "loss_iou": 0.5234375, + "loss_num": 0.07421875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 544342224, + "step": 8138 + }, + { + "epoch": 0.9235744680851063, + "grad_norm": 43.9814453125, + "learning_rate": 5e-05, + "loss": 1.4701, + "num_input_tokens_seen": 544409904, + "step": 8139 + }, + { + "epoch": 0.9235744680851063, + "loss": 1.615267276763916, + "loss_ce": 0.006868816912174225, + "loss_iou": 0.6484375, + "loss_num": 0.0625, + "loss_xval": 1.609375, + "num_input_tokens_seen": 544409904, + "step": 8139 + }, + { + "epoch": 0.9236879432624113, + "grad_norm": 23.2250919342041, + "learning_rate": 5e-05, + "loss": 1.0975, + "num_input_tokens_seen": 544476784, + "step": 8140 + }, + { + "epoch": 0.9236879432624113, + "loss": 1.0114576816558838, + "loss_ce": 0.004133520647883415, + "loss_iou": 0.4140625, + "loss_num": 0.035400390625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 544476784, + "step": 8140 + }, + { + "epoch": 0.9238014184397163, + "grad_norm": 26.211257934570312, + "learning_rate": 5e-05, + "loss": 1.2275, + "num_input_tokens_seen": 544543848, + "step": 8141 + }, + { + "epoch": 0.9238014184397163, + "loss": 1.178234577178955, + "loss_ce": 0.01343962736427784, + "loss_iou": 0.484375, + "loss_num": 0.0390625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 544543848, + "step": 8141 + }, + { + "epoch": 0.9239148936170213, + "grad_norm": 25.826831817626953, + "learning_rate": 5e-05, + "loss": 1.1201, + "num_input_tokens_seen": 544610820, + "step": 8142 + }, + { + "epoch": 0.9239148936170213, + "loss": 1.2697476148605347, + "loss_ce": 0.008028858341276646, + "loss_iou": 0.51171875, + "loss_num": 0.047119140625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 544610820, + "step": 8142 + }, + { + "epoch": 0.9240283687943263, + "grad_norm": 42.35760498046875, + "learning_rate": 5e-05, + "loss": 1.1894, + "num_input_tokens_seen": 544678276, + "step": 8143 + }, + { + "epoch": 0.9240283687943263, + "loss": 1.1097804307937622, + "loss_ce": 0.0028468044474720955, + "loss_iou": 0.45703125, + "loss_num": 0.0390625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 544678276, + "step": 8143 + }, + { + "epoch": 0.9241418439716312, + "grad_norm": 27.62563133239746, + "learning_rate": 5e-05, + "loss": 0.9857, + "num_input_tokens_seen": 544745088, + "step": 8144 + }, + { + "epoch": 0.9241418439716312, + "loss": 1.07780921459198, + "loss_ce": 0.008717428892850876, + "loss_iou": 0.443359375, + "loss_num": 0.03662109375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 544745088, + "step": 8144 + }, + { + "epoch": 0.9242553191489362, + "grad_norm": 30.963476181030273, + "learning_rate": 5e-05, + "loss": 1.2662, + "num_input_tokens_seen": 544811960, + "step": 8145 + }, + { + "epoch": 0.9242553191489362, + "loss": 1.2930035591125488, + "loss_ce": 0.0068706972524523735, + "loss_iou": 0.5078125, + "loss_num": 0.05419921875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 544811960, + "step": 8145 + }, + { + "epoch": 0.9243687943262411, + "grad_norm": 40.76095199584961, + "learning_rate": 5e-05, + "loss": 1.1815, + "num_input_tokens_seen": 544879436, + "step": 8146 + }, + { + "epoch": 0.9243687943262411, + "loss": 1.2422657012939453, + "loss_ce": 0.005449354182928801, + "loss_iou": 0.4921875, + "loss_num": 0.050537109375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 544879436, + "step": 8146 + }, + { + "epoch": 0.9244822695035461, + "grad_norm": 35.23863220214844, + "learning_rate": 5e-05, + "loss": 1.0173, + "num_input_tokens_seen": 544946088, + "step": 8147 + }, + { + "epoch": 0.9244822695035461, + "loss": 1.1194179058074951, + "loss_ce": 0.00369525165297091, + "loss_iou": 0.455078125, + "loss_num": 0.04150390625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 544946088, + "step": 8147 + }, + { + "epoch": 0.924595744680851, + "grad_norm": 51.1673698425293, + "learning_rate": 5e-05, + "loss": 1.027, + "num_input_tokens_seen": 545012948, + "step": 8148 + }, + { + "epoch": 0.924595744680851, + "loss": 1.0934600830078125, + "loss_ce": 0.007034271024167538, + "loss_iou": 0.4609375, + "loss_num": 0.032958984375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 545012948, + "step": 8148 + }, + { + "epoch": 0.924709219858156, + "grad_norm": 17.48137092590332, + "learning_rate": 5e-05, + "loss": 1.1315, + "num_input_tokens_seen": 545081764, + "step": 8149 + }, + { + "epoch": 0.924709219858156, + "loss": 1.2387675046920776, + "loss_ce": 0.007322186604142189, + "loss_iou": 0.478515625, + "loss_num": 0.05517578125, + "loss_xval": 1.234375, + "num_input_tokens_seen": 545081764, + "step": 8149 + }, + { + "epoch": 0.924822695035461, + "grad_norm": 16.11158561706543, + "learning_rate": 5e-05, + "loss": 1.0904, + "num_input_tokens_seen": 545147960, + "step": 8150 + }, + { + "epoch": 0.924822695035461, + "loss": 1.100377082824707, + "loss_ce": 0.003941530827432871, + "loss_iou": 0.404296875, + "loss_num": 0.057373046875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 545147960, + "step": 8150 + }, + { + "epoch": 0.924936170212766, + "grad_norm": 22.343017578125, + "learning_rate": 5e-05, + "loss": 1.1667, + "num_input_tokens_seen": 545214848, + "step": 8151 + }, + { + "epoch": 0.924936170212766, + "loss": 1.2814037799835205, + "loss_ce": 0.00405998295173049, + "loss_iou": 0.5078125, + "loss_num": 0.052001953125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 545214848, + "step": 8151 + }, + { + "epoch": 0.925049645390071, + "grad_norm": 31.534467697143555, + "learning_rate": 5e-05, + "loss": 1.249, + "num_input_tokens_seen": 545282712, + "step": 8152 + }, + { + "epoch": 0.925049645390071, + "loss": 1.073409080505371, + "loss_ce": 0.006514505483210087, + "loss_iou": 0.439453125, + "loss_num": 0.037353515625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 545282712, + "step": 8152 + }, + { + "epoch": 0.9251631205673759, + "grad_norm": 23.32234764099121, + "learning_rate": 5e-05, + "loss": 1.1366, + "num_input_tokens_seen": 545349704, + "step": 8153 + }, + { + "epoch": 0.9251631205673759, + "loss": 1.181584358215332, + "loss_ce": 0.0033617541193962097, + "loss_iou": 0.47265625, + "loss_num": 0.046875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 545349704, + "step": 8153 + }, + { + "epoch": 0.9252765957446808, + "grad_norm": 21.507997512817383, + "learning_rate": 5e-05, + "loss": 1.1547, + "num_input_tokens_seen": 545416360, + "step": 8154 + }, + { + "epoch": 0.9252765957446808, + "loss": 1.2408018112182617, + "loss_ce": 0.005450259894132614, + "loss_iou": 0.5, + "loss_num": 0.046630859375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 545416360, + "step": 8154 + }, + { + "epoch": 0.9253900709219858, + "grad_norm": 16.89110565185547, + "learning_rate": 5e-05, + "loss": 1.0484, + "num_input_tokens_seen": 545483968, + "step": 8155 + }, + { + "epoch": 0.9253900709219858, + "loss": 1.1128973960876465, + "loss_ce": 0.005963773000985384, + "loss_iou": 0.46875, + "loss_num": 0.034423828125, + "loss_xval": 1.109375, + "num_input_tokens_seen": 545483968, + "step": 8155 + }, + { + "epoch": 0.9255035460992908, + "grad_norm": 26.51165199279785, + "learning_rate": 5e-05, + "loss": 1.0818, + "num_input_tokens_seen": 545551028, + "step": 8156 + }, + { + "epoch": 0.9255035460992908, + "loss": 1.1850347518920898, + "loss_ce": 0.006812104023993015, + "loss_iou": 0.484375, + "loss_num": 0.04248046875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 545551028, + "step": 8156 + }, + { + "epoch": 0.9256170212765957, + "grad_norm": 54.855064392089844, + "learning_rate": 5e-05, + "loss": 1.1422, + "num_input_tokens_seen": 545618460, + "step": 8157 + }, + { + "epoch": 0.9256170212765957, + "loss": 1.1699237823486328, + "loss_ce": 0.0039081210270524025, + "loss_iou": 0.51953125, + "loss_num": 0.0250244140625, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 545618460, + "step": 8157 + }, + { + "epoch": 0.9257304964539007, + "grad_norm": 62.77344512939453, + "learning_rate": 5e-05, + "loss": 1.3622, + "num_input_tokens_seen": 545685192, + "step": 8158 + }, + { + "epoch": 0.9257304964539007, + "loss": 1.462662696838379, + "loss_ce": 0.005631350912153721, + "loss_iou": 0.58203125, + "loss_num": 0.05908203125, + "loss_xval": 1.453125, + "num_input_tokens_seen": 545685192, + "step": 8158 + }, + { + "epoch": 0.9258439716312057, + "grad_norm": 37.34187316894531, + "learning_rate": 5e-05, + "loss": 1.2486, + "num_input_tokens_seen": 545751300, + "step": 8159 + }, + { + "epoch": 0.9258439716312057, + "loss": 1.1225049495697021, + "loss_ce": 0.00788083579391241, + "loss_iou": 0.46875, + "loss_num": 0.03515625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 545751300, + "step": 8159 + }, + { + "epoch": 0.9259574468085107, + "grad_norm": 14.571124076843262, + "learning_rate": 5e-05, + "loss": 1.0817, + "num_input_tokens_seen": 545818816, + "step": 8160 + }, + { + "epoch": 0.9259574468085107, + "loss": 1.1376402378082275, + "loss_ce": 0.0067809224128723145, + "loss_iou": 0.46875, + "loss_num": 0.038818359375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 545818816, + "step": 8160 + }, + { + "epoch": 0.9260709219858156, + "grad_norm": 16.334306716918945, + "learning_rate": 5e-05, + "loss": 0.8409, + "num_input_tokens_seen": 545885096, + "step": 8161 + }, + { + "epoch": 0.9260709219858156, + "loss": 0.8057093620300293, + "loss_ce": 0.0032191111240535975, + "loss_iou": 0.359375, + "loss_num": 0.0167236328125, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 545885096, + "step": 8161 + }, + { + "epoch": 0.9261843971631206, + "grad_norm": 19.776031494140625, + "learning_rate": 5e-05, + "loss": 1.1228, + "num_input_tokens_seen": 545951712, + "step": 8162 + }, + { + "epoch": 0.9261843971631206, + "loss": 1.2110124826431274, + "loss_ce": 0.006178541108965874, + "loss_iou": 0.435546875, + "loss_num": 0.06640625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 545951712, + "step": 8162 + }, + { + "epoch": 0.9262978723404255, + "grad_norm": 20.767133712768555, + "learning_rate": 5e-05, + "loss": 1.1551, + "num_input_tokens_seen": 546018044, + "step": 8163 + }, + { + "epoch": 0.9262978723404255, + "loss": 1.3814340829849243, + "loss_ce": 0.007410610094666481, + "loss_iou": 0.54296875, + "loss_num": 0.05712890625, + "loss_xval": 1.375, + "num_input_tokens_seen": 546018044, + "step": 8163 + }, + { + "epoch": 0.9264113475177305, + "grad_norm": 79.72860717773438, + "learning_rate": 5e-05, + "loss": 1.2604, + "num_input_tokens_seen": 546084668, + "step": 8164 + }, + { + "epoch": 0.9264113475177305, + "loss": 1.2767459154129028, + "loss_ce": 0.005261451005935669, + "loss_iou": 0.53125, + "loss_num": 0.04150390625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 546084668, + "step": 8164 + }, + { + "epoch": 0.9265248226950354, + "grad_norm": 18.69915199279785, + "learning_rate": 5e-05, + "loss": 1.1083, + "num_input_tokens_seen": 546152284, + "step": 8165 + }, + { + "epoch": 0.9265248226950354, + "loss": 1.0676872730255127, + "loss_ce": 0.00518729817122221, + "loss_iou": 0.44921875, + "loss_num": 0.0322265625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 546152284, + "step": 8165 + }, + { + "epoch": 0.9266382978723404, + "grad_norm": 23.727153778076172, + "learning_rate": 5e-05, + "loss": 1.1463, + "num_input_tokens_seen": 546218492, + "step": 8166 + }, + { + "epoch": 0.9266382978723404, + "loss": 1.1362922191619873, + "loss_ce": 0.006409310735762119, + "loss_iou": 0.392578125, + "loss_num": 0.0693359375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 546218492, + "step": 8166 + }, + { + "epoch": 0.9267517730496454, + "grad_norm": 36.534427642822266, + "learning_rate": 5e-05, + "loss": 1.1202, + "num_input_tokens_seen": 546286708, + "step": 8167 + }, + { + "epoch": 0.9267517730496454, + "loss": 1.1352295875549316, + "loss_ce": 0.005835078656673431, + "loss_iou": 0.486328125, + "loss_num": 0.031494140625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 546286708, + "step": 8167 + }, + { + "epoch": 0.9268652482269504, + "grad_norm": 28.52116584777832, + "learning_rate": 5e-05, + "loss": 1.1738, + "num_input_tokens_seen": 546354212, + "step": 8168 + }, + { + "epoch": 0.9268652482269504, + "loss": 1.4192299842834473, + "loss_ce": 0.0056556956842541695, + "loss_iou": 0.59765625, + "loss_num": 0.043212890625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 546354212, + "step": 8168 + }, + { + "epoch": 0.9269787234042554, + "grad_norm": 25.4630069732666, + "learning_rate": 5e-05, + "loss": 1.1311, + "num_input_tokens_seen": 546421384, + "step": 8169 + }, + { + "epoch": 0.9269787234042554, + "loss": 1.0288408994674683, + "loss_ce": 0.008210972882807255, + "loss_iou": 0.412109375, + "loss_num": 0.038818359375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 546421384, + "step": 8169 + }, + { + "epoch": 0.9270921985815603, + "grad_norm": 25.23624038696289, + "learning_rate": 5e-05, + "loss": 1.0745, + "num_input_tokens_seen": 546488652, + "step": 8170 + }, + { + "epoch": 0.9270921985815603, + "loss": 1.203079104423523, + "loss_ce": 0.004592761397361755, + "loss_iou": 0.48046875, + "loss_num": 0.046875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 546488652, + "step": 8170 + }, + { + "epoch": 0.9272056737588652, + "grad_norm": 23.85565185546875, + "learning_rate": 5e-05, + "loss": 1.0966, + "num_input_tokens_seen": 546554100, + "step": 8171 + }, + { + "epoch": 0.9272056737588652, + "loss": 1.227198839187622, + "loss_ce": 0.003565962892025709, + "loss_iou": 0.484375, + "loss_num": 0.05078125, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 546554100, + "step": 8171 + }, + { + "epoch": 0.9273191489361702, + "grad_norm": 20.69461441040039, + "learning_rate": 5e-05, + "loss": 1.058, + "num_input_tokens_seen": 546620840, + "step": 8172 + }, + { + "epoch": 0.9273191489361702, + "loss": 1.0169297456741333, + "loss_ce": 0.007408271078020334, + "loss_iou": 0.396484375, + "loss_num": 0.04345703125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 546620840, + "step": 8172 + }, + { + "epoch": 0.9274326241134752, + "grad_norm": 33.613887786865234, + "learning_rate": 5e-05, + "loss": 1.0554, + "num_input_tokens_seen": 546688148, + "step": 8173 + }, + { + "epoch": 0.9274326241134752, + "loss": 1.2061645984649658, + "loss_ce": 0.005969417281448841, + "loss_iou": 0.486328125, + "loss_num": 0.045654296875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 546688148, + "step": 8173 + }, + { + "epoch": 0.9275460992907801, + "grad_norm": 23.632617950439453, + "learning_rate": 5e-05, + "loss": 1.0641, + "num_input_tokens_seen": 546755464, + "step": 8174 + }, + { + "epoch": 0.9275460992907801, + "loss": 0.9501767754554749, + "loss_ce": 0.0014463174156844616, + "loss_iou": 0.4140625, + "loss_num": 0.0242919921875, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 546755464, + "step": 8174 + }, + { + "epoch": 0.9276595744680851, + "grad_norm": 24.9721736907959, + "learning_rate": 5e-05, + "loss": 1.0137, + "num_input_tokens_seen": 546821560, + "step": 8175 + }, + { + "epoch": 0.9276595744680851, + "loss": 0.9958186149597168, + "loss_ce": 0.004851792007684708, + "loss_iou": 0.40625, + "loss_num": 0.03564453125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 546821560, + "step": 8175 + }, + { + "epoch": 0.9277730496453901, + "grad_norm": 32.93861389160156, + "learning_rate": 5e-05, + "loss": 1.0791, + "num_input_tokens_seen": 546887768, + "step": 8176 + }, + { + "epoch": 0.9277730496453901, + "loss": 1.083216905593872, + "loss_ce": 0.004115315154194832, + "loss_iou": 0.4609375, + "loss_num": 0.0311279296875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 546887768, + "step": 8176 + }, + { + "epoch": 0.9278865248226951, + "grad_norm": 39.5357666015625, + "learning_rate": 5e-05, + "loss": 1.3686, + "num_input_tokens_seen": 546954864, + "step": 8177 + }, + { + "epoch": 0.9278865248226951, + "loss": 1.314100980758667, + "loss_ce": 0.004042481072247028, + "loss_iou": 0.53125, + "loss_num": 0.049072265625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 546954864, + "step": 8177 + }, + { + "epoch": 0.928, + "grad_norm": 22.894506454467773, + "learning_rate": 5e-05, + "loss": 1.3094, + "num_input_tokens_seen": 547021144, + "step": 8178 + }, + { + "epoch": 0.928, + "loss": 1.108553171157837, + "loss_ce": 0.005769932176917791, + "loss_iou": 0.48828125, + "loss_num": 0.025146484375, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 547021144, + "step": 8178 + }, + { + "epoch": 0.9281134751773049, + "grad_norm": 21.940868377685547, + "learning_rate": 5e-05, + "loss": 1.3944, + "num_input_tokens_seen": 547088044, + "step": 8179 + }, + { + "epoch": 0.9281134751773049, + "loss": 1.3392014503479004, + "loss_ce": 0.012052983976900578, + "loss_iou": 0.5234375, + "loss_num": 0.056884765625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 547088044, + "step": 8179 + }, + { + "epoch": 0.9282269503546099, + "grad_norm": 22.432584762573242, + "learning_rate": 5e-05, + "loss": 1.1684, + "num_input_tokens_seen": 547155656, + "step": 8180 + }, + { + "epoch": 0.9282269503546099, + "loss": 1.0822930335998535, + "loss_ce": 0.004412174224853516, + "loss_iou": 0.453125, + "loss_num": 0.034423828125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 547155656, + "step": 8180 + }, + { + "epoch": 0.9283404255319149, + "grad_norm": 15.419774055480957, + "learning_rate": 5e-05, + "loss": 1.0075, + "num_input_tokens_seen": 547223556, + "step": 8181 + }, + { + "epoch": 0.9283404255319149, + "loss": 1.0514510869979858, + "loss_ce": 0.007994045503437519, + "loss_iou": 0.421875, + "loss_num": 0.040283203125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 547223556, + "step": 8181 + }, + { + "epoch": 0.9284539007092198, + "grad_norm": 39.84848403930664, + "learning_rate": 5e-05, + "loss": 1.1883, + "num_input_tokens_seen": 547291528, + "step": 8182 + }, + { + "epoch": 0.9284539007092198, + "loss": 1.1405739784240723, + "loss_ce": 0.0053201406262815, + "loss_iou": 0.478515625, + "loss_num": 0.035888671875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 547291528, + "step": 8182 + }, + { + "epoch": 0.9285673758865248, + "grad_norm": 37.237159729003906, + "learning_rate": 5e-05, + "loss": 1.2095, + "num_input_tokens_seen": 547358564, + "step": 8183 + }, + { + "epoch": 0.9285673758865248, + "loss": 1.2438557147979736, + "loss_ce": 0.007039372343569994, + "loss_iou": 0.49609375, + "loss_num": 0.04931640625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 547358564, + "step": 8183 + }, + { + "epoch": 0.9286808510638298, + "grad_norm": 28.81267547607422, + "learning_rate": 5e-05, + "loss": 1.3756, + "num_input_tokens_seen": 547425840, + "step": 8184 + }, + { + "epoch": 0.9286808510638298, + "loss": 1.5146162509918213, + "loss_ce": 0.008268657140433788, + "loss_iou": 0.59765625, + "loss_num": 0.062255859375, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 547425840, + "step": 8184 + }, + { + "epoch": 0.9287943262411348, + "grad_norm": 29.893028259277344, + "learning_rate": 5e-05, + "loss": 1.0332, + "num_input_tokens_seen": 547491596, + "step": 8185 + }, + { + "epoch": 0.9287943262411348, + "loss": 0.9789373874664307, + "loss_ce": 0.0023749233223497868, + "loss_iou": 0.39453125, + "loss_num": 0.037109375, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 547491596, + "step": 8185 + }, + { + "epoch": 0.9289078014184398, + "grad_norm": 23.888957977294922, + "learning_rate": 5e-05, + "loss": 1.1385, + "num_input_tokens_seen": 547558928, + "step": 8186 + }, + { + "epoch": 0.9289078014184398, + "loss": 1.2932063341140747, + "loss_ce": 0.010003243573009968, + "loss_iou": 0.53125, + "loss_num": 0.044677734375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 547558928, + "step": 8186 + }, + { + "epoch": 0.9290212765957446, + "grad_norm": 33.04542541503906, + "learning_rate": 5e-05, + "loss": 1.1831, + "num_input_tokens_seen": 547626008, + "step": 8187 + }, + { + "epoch": 0.9290212765957446, + "loss": 1.1393473148345947, + "loss_ce": 0.005069942679256201, + "loss_iou": 0.431640625, + "loss_num": 0.054443359375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 547626008, + "step": 8187 + }, + { + "epoch": 0.9291347517730496, + "grad_norm": 46.129756927490234, + "learning_rate": 5e-05, + "loss": 1.267, + "num_input_tokens_seen": 547692944, + "step": 8188 + }, + { + "epoch": 0.9291347517730496, + "loss": 1.156341314315796, + "loss_ce": 0.0035092358011752367, + "loss_iou": 0.4921875, + "loss_num": 0.03369140625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 547692944, + "step": 8188 + }, + { + "epoch": 0.9292482269503546, + "grad_norm": 41.186737060546875, + "learning_rate": 5e-05, + "loss": 1.1569, + "num_input_tokens_seen": 547760188, + "step": 8189 + }, + { + "epoch": 0.9292482269503546, + "loss": 1.2906608581542969, + "loss_ce": 0.00843432080000639, + "loss_iou": 0.53125, + "loss_num": 0.0439453125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 547760188, + "step": 8189 + }, + { + "epoch": 0.9293617021276596, + "grad_norm": 31.432680130004883, + "learning_rate": 5e-05, + "loss": 1.0828, + "num_input_tokens_seen": 547827552, + "step": 8190 + }, + { + "epoch": 0.9293617021276596, + "loss": 0.9193800091743469, + "loss_ce": 0.006782323122024536, + "loss_iou": 0.431640625, + "loss_num": 0.010009765625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 547827552, + "step": 8190 + }, + { + "epoch": 0.9294751773049645, + "grad_norm": 25.617401123046875, + "learning_rate": 5e-05, + "loss": 1.1843, + "num_input_tokens_seen": 547893992, + "step": 8191 + }, + { + "epoch": 0.9294751773049645, + "loss": 0.965925931930542, + "loss_ce": 0.008467437699437141, + "loss_iou": 0.375, + "loss_num": 0.041748046875, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 547893992, + "step": 8191 + }, + { + "epoch": 0.9295886524822695, + "grad_norm": 28.619958877563477, + "learning_rate": 5e-05, + "loss": 1.0389, + "num_input_tokens_seen": 547960968, + "step": 8192 + }, + { + "epoch": 0.9295886524822695, + "loss": 1.1030781269073486, + "loss_ce": 0.005910229869186878, + "loss_iou": 0.44921875, + "loss_num": 0.0400390625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 547960968, + "step": 8192 + }, + { + "epoch": 0.9297021276595745, + "grad_norm": 24.975017547607422, + "learning_rate": 5e-05, + "loss": 1.228, + "num_input_tokens_seen": 548027892, + "step": 8193 + }, + { + "epoch": 0.9297021276595745, + "loss": 1.2341917753219604, + "loss_ce": 0.0017699560848996043, + "loss_iou": 0.50390625, + "loss_num": 0.04541015625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 548027892, + "step": 8193 + }, + { + "epoch": 0.9298156028368795, + "grad_norm": 25.42691993713379, + "learning_rate": 5e-05, + "loss": 1.1014, + "num_input_tokens_seen": 548094736, + "step": 8194 + }, + { + "epoch": 0.9298156028368795, + "loss": 1.1317665576934814, + "loss_ce": 0.007987171411514282, + "loss_iou": 0.453125, + "loss_num": 0.04345703125, + "loss_xval": 1.125, + "num_input_tokens_seen": 548094736, + "step": 8194 + }, + { + "epoch": 0.9299290780141845, + "grad_norm": 27.17547035217285, + "learning_rate": 5e-05, + "loss": 1.2799, + "num_input_tokens_seen": 548161796, + "step": 8195 + }, + { + "epoch": 0.9299290780141845, + "loss": 1.2027983665466309, + "loss_ce": 0.004556186497211456, + "loss_iou": 0.5, + "loss_num": 0.0390625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 548161796, + "step": 8195 + }, + { + "epoch": 0.9300425531914893, + "grad_norm": 28.035268783569336, + "learning_rate": 5e-05, + "loss": 0.9913, + "num_input_tokens_seen": 548229448, + "step": 8196 + }, + { + "epoch": 0.9300425531914893, + "loss": 1.0168073177337646, + "loss_ce": 0.005088565871119499, + "loss_iou": 0.4140625, + "loss_num": 0.03662109375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 548229448, + "step": 8196 + }, + { + "epoch": 0.9301560283687943, + "grad_norm": 30.44109344482422, + "learning_rate": 5e-05, + "loss": 1.307, + "num_input_tokens_seen": 548297192, + "step": 8197 + }, + { + "epoch": 0.9301560283687943, + "loss": 1.3436671495437622, + "loss_ce": 0.006264768540859222, + "loss_iou": 0.5234375, + "loss_num": 0.057373046875, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 548297192, + "step": 8197 + }, + { + "epoch": 0.9302695035460993, + "grad_norm": 49.06044387817383, + "learning_rate": 5e-05, + "loss": 1.2113, + "num_input_tokens_seen": 548365152, + "step": 8198 + }, + { + "epoch": 0.9302695035460993, + "loss": 1.3157789707183838, + "loss_ce": 0.003278982127085328, + "loss_iou": 0.5078125, + "loss_num": 0.06005859375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 548365152, + "step": 8198 + }, + { + "epoch": 0.9303829787234043, + "grad_norm": 29.356950759887695, + "learning_rate": 5e-05, + "loss": 1.1534, + "num_input_tokens_seen": 548431368, + "step": 8199 + }, + { + "epoch": 0.9303829787234043, + "loss": 0.9412106871604919, + "loss_ce": 0.005175560247153044, + "loss_iou": 0.4296875, + "loss_num": 0.015625, + "loss_xval": 0.9375, + "num_input_tokens_seen": 548431368, + "step": 8199 + }, + { + "epoch": 0.9304964539007092, + "grad_norm": 12.790672302246094, + "learning_rate": 5e-05, + "loss": 1.1823, + "num_input_tokens_seen": 548497364, + "step": 8200 + }, + { + "epoch": 0.9304964539007092, + "loss": 1.2384535074234009, + "loss_ce": 0.009937800467014313, + "loss_iou": 0.421875, + "loss_num": 0.0771484375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 548497364, + "step": 8200 + }, + { + "epoch": 0.9306099290780142, + "grad_norm": 14.566682815551758, + "learning_rate": 5e-05, + "loss": 1.0663, + "num_input_tokens_seen": 548563516, + "step": 8201 + }, + { + "epoch": 0.9306099290780142, + "loss": 0.9048230051994324, + "loss_ce": 0.0054089659824967384, + "loss_iou": 0.359375, + "loss_num": 0.03564453125, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 548563516, + "step": 8201 + }, + { + "epoch": 0.9307234042553192, + "grad_norm": 24.26250457763672, + "learning_rate": 5e-05, + "loss": 1.1382, + "num_input_tokens_seen": 548629156, + "step": 8202 + }, + { + "epoch": 0.9307234042553192, + "loss": 1.0625087022781372, + "loss_ce": 0.010262587107717991, + "loss_iou": 0.38671875, + "loss_num": 0.055908203125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 548629156, + "step": 8202 + }, + { + "epoch": 0.9308368794326242, + "grad_norm": 24.73960304260254, + "learning_rate": 5e-05, + "loss": 1.0196, + "num_input_tokens_seen": 548695984, + "step": 8203 + }, + { + "epoch": 0.9308368794326242, + "loss": 0.947939395904541, + "loss_ce": 0.00763180386275053, + "loss_iou": 0.384765625, + "loss_num": 0.0341796875, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 548695984, + "step": 8203 + }, + { + "epoch": 0.930950354609929, + "grad_norm": 42.62773895263672, + "learning_rate": 5e-05, + "loss": 1.2641, + "num_input_tokens_seen": 548763076, + "step": 8204 + }, + { + "epoch": 0.930950354609929, + "loss": 1.2961628437042236, + "loss_ce": 0.01003008522093296, + "loss_iou": 0.53515625, + "loss_num": 0.0439453125, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 548763076, + "step": 8204 + }, + { + "epoch": 0.931063829787234, + "grad_norm": 35.73743438720703, + "learning_rate": 5e-05, + "loss": 1.0134, + "num_input_tokens_seen": 548828948, + "step": 8205 + }, + { + "epoch": 0.931063829787234, + "loss": 1.1575891971588135, + "loss_ce": 0.004757036454975605, + "loss_iou": 0.490234375, + "loss_num": 0.0341796875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 548828948, + "step": 8205 + }, + { + "epoch": 0.931177304964539, + "grad_norm": 27.380374908447266, + "learning_rate": 5e-05, + "loss": 1.2016, + "num_input_tokens_seen": 548896236, + "step": 8206 + }, + { + "epoch": 0.931177304964539, + "loss": 1.2404000759124756, + "loss_ce": 0.005536765791475773, + "loss_iou": 0.49609375, + "loss_num": 0.048583984375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 548896236, + "step": 8206 + }, + { + "epoch": 0.931290780141844, + "grad_norm": 27.907564163208008, + "learning_rate": 5e-05, + "loss": 1.0119, + "num_input_tokens_seen": 548963092, + "step": 8207 + }, + { + "epoch": 0.931290780141844, + "loss": 1.2836555242538452, + "loss_ce": 0.006799998227506876, + "loss_iou": 0.5234375, + "loss_num": 0.045654296875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 548963092, + "step": 8207 + }, + { + "epoch": 0.9314042553191489, + "grad_norm": 37.982418060302734, + "learning_rate": 5e-05, + "loss": 0.988, + "num_input_tokens_seen": 549030368, + "step": 8208 + }, + { + "epoch": 0.9314042553191489, + "loss": 1.0697662830352783, + "loss_ce": 0.004824932664632797, + "loss_iou": 0.419921875, + "loss_num": 0.044677734375, + "loss_xval": 1.0625, + "num_input_tokens_seen": 549030368, + "step": 8208 + }, + { + "epoch": 0.9315177304964539, + "grad_norm": 84.74822235107422, + "learning_rate": 5e-05, + "loss": 1.182, + "num_input_tokens_seen": 549097044, + "step": 8209 + }, + { + "epoch": 0.9315177304964539, + "loss": 1.1075793504714966, + "loss_ce": 0.006260951515287161, + "loss_iou": 0.490234375, + "loss_num": 0.0242919921875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 549097044, + "step": 8209 + }, + { + "epoch": 0.9316312056737589, + "grad_norm": 37.201690673828125, + "learning_rate": 5e-05, + "loss": 1.0604, + "num_input_tokens_seen": 549163804, + "step": 8210 + }, + { + "epoch": 0.9316312056737589, + "loss": 1.1081355810165405, + "loss_ce": 0.003643427509814501, + "loss_iou": 0.4453125, + "loss_num": 0.04296875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 549163804, + "step": 8210 + }, + { + "epoch": 0.9317446808510639, + "grad_norm": 25.869348526000977, + "learning_rate": 5e-05, + "loss": 1.0935, + "num_input_tokens_seen": 549229812, + "step": 8211 + }, + { + "epoch": 0.9317446808510639, + "loss": 1.1418612003326416, + "loss_ce": 0.005630745552480221, + "loss_iou": 0.48828125, + "loss_num": 0.031982421875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 549229812, + "step": 8211 + }, + { + "epoch": 0.9318581560283687, + "grad_norm": 61.286460876464844, + "learning_rate": 5e-05, + "loss": 1.0839, + "num_input_tokens_seen": 549296648, + "step": 8212 + }, + { + "epoch": 0.9318581560283687, + "loss": 1.115936517715454, + "loss_ce": 0.009002890437841415, + "loss_iou": 0.451171875, + "loss_num": 0.040771484375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 549296648, + "step": 8212 + }, + { + "epoch": 0.9319716312056737, + "grad_norm": 23.868906021118164, + "learning_rate": 5e-05, + "loss": 1.1587, + "num_input_tokens_seen": 549363440, + "step": 8213 + }, + { + "epoch": 0.9319716312056737, + "loss": 1.3978271484375, + "loss_ce": 0.006225597579032183, + "loss_iou": 0.546875, + "loss_num": 0.05908203125, + "loss_xval": 1.390625, + "num_input_tokens_seen": 549363440, + "step": 8213 + }, + { + "epoch": 0.9320851063829787, + "grad_norm": 32.90830612182617, + "learning_rate": 5e-05, + "loss": 1.1826, + "num_input_tokens_seen": 549430976, + "step": 8214 + }, + { + "epoch": 0.9320851063829787, + "loss": 1.099472165107727, + "loss_ce": 0.005233893170952797, + "loss_iou": 0.486328125, + "loss_num": 0.0244140625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 549430976, + "step": 8214 + }, + { + "epoch": 0.9321985815602837, + "grad_norm": 23.895870208740234, + "learning_rate": 5e-05, + "loss": 1.3718, + "num_input_tokens_seen": 549497976, + "step": 8215 + }, + { + "epoch": 0.9321985815602837, + "loss": 1.0641342401504517, + "loss_ce": 0.008958481252193451, + "loss_iou": 0.478515625, + "loss_num": 0.019775390625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 549497976, + "step": 8215 + }, + { + "epoch": 0.9323120567375887, + "grad_norm": 28.712018966674805, + "learning_rate": 5e-05, + "loss": 1.2197, + "num_input_tokens_seen": 549564324, + "step": 8216 + }, + { + "epoch": 0.9323120567375887, + "loss": 1.1013668775558472, + "loss_ce": 0.010790692642331123, + "loss_iou": 0.41796875, + "loss_num": 0.05078125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 549564324, + "step": 8216 + }, + { + "epoch": 0.9324255319148936, + "grad_norm": 27.598526000976562, + "learning_rate": 5e-05, + "loss": 1.128, + "num_input_tokens_seen": 549630888, + "step": 8217 + }, + { + "epoch": 0.9324255319148936, + "loss": 1.11540949344635, + "loss_ce": 0.008475853130221367, + "loss_iou": 0.458984375, + "loss_num": 0.037841796875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 549630888, + "step": 8217 + }, + { + "epoch": 0.9325390070921986, + "grad_norm": 32.77553939819336, + "learning_rate": 5e-05, + "loss": 1.0986, + "num_input_tokens_seen": 549697108, + "step": 8218 + }, + { + "epoch": 0.9325390070921986, + "loss": 1.109673261642456, + "loss_ce": 0.021904725581407547, + "loss_iou": 0.43359375, + "loss_num": 0.044189453125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 549697108, + "step": 8218 + }, + { + "epoch": 0.9326524822695036, + "grad_norm": 25.618806838989258, + "learning_rate": 5e-05, + "loss": 1.3946, + "num_input_tokens_seen": 549764928, + "step": 8219 + }, + { + "epoch": 0.9326524822695036, + "loss": 1.2504713535308838, + "loss_ce": 0.0068190498277544975, + "loss_iou": 0.52734375, + "loss_num": 0.03759765625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 549764928, + "step": 8219 + }, + { + "epoch": 0.9327659574468085, + "grad_norm": 27.801523208618164, + "learning_rate": 5e-05, + "loss": 1.1442, + "num_input_tokens_seen": 549830892, + "step": 8220 + }, + { + "epoch": 0.9327659574468085, + "loss": 1.2315855026245117, + "loss_ce": 0.011859031394124031, + "loss_iou": 0.484375, + "loss_num": 0.05029296875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 549830892, + "step": 8220 + }, + { + "epoch": 0.9328794326241134, + "grad_norm": 29.453067779541016, + "learning_rate": 5e-05, + "loss": 1.2708, + "num_input_tokens_seen": 549897456, + "step": 8221 + }, + { + "epoch": 0.9328794326241134, + "loss": 1.4976683855056763, + "loss_ce": 0.010363672859966755, + "loss_iou": 0.515625, + "loss_num": 0.09130859375, + "loss_xval": 1.484375, + "num_input_tokens_seen": 549897456, + "step": 8221 + }, + { + "epoch": 0.9329929078014184, + "grad_norm": 28.42740249633789, + "learning_rate": 5e-05, + "loss": 0.9602, + "num_input_tokens_seen": 549964444, + "step": 8222 + }, + { + "epoch": 0.9329929078014184, + "loss": 1.0565744638442993, + "loss_ce": 0.006769822910428047, + "loss_iou": 0.47265625, + "loss_num": 0.0211181640625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 549964444, + "step": 8222 + }, + { + "epoch": 0.9331063829787234, + "grad_norm": 24.859935760498047, + "learning_rate": 5e-05, + "loss": 1.2466, + "num_input_tokens_seen": 550030620, + "step": 8223 + }, + { + "epoch": 0.9331063829787234, + "loss": 0.9935636520385742, + "loss_ce": 0.008212069049477577, + "loss_iou": 0.439453125, + "loss_num": 0.0213623046875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 550030620, + "step": 8223 + }, + { + "epoch": 0.9332198581560284, + "grad_norm": 24.082561492919922, + "learning_rate": 5e-05, + "loss": 1.1386, + "num_input_tokens_seen": 550096504, + "step": 8224 + }, + { + "epoch": 0.9332198581560284, + "loss": 1.1753578186035156, + "loss_ce": 0.00397114222869277, + "loss_iou": 0.44921875, + "loss_num": 0.0537109375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 550096504, + "step": 8224 + }, + { + "epoch": 0.9333333333333333, + "grad_norm": 25.371030807495117, + "learning_rate": 5e-05, + "loss": 1.1128, + "num_input_tokens_seen": 550163544, + "step": 8225 + }, + { + "epoch": 0.9333333333333333, + "loss": 0.9856150150299072, + "loss_ce": 0.007587662898004055, + "loss_iou": 0.412109375, + "loss_num": 0.03076171875, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 550163544, + "step": 8225 + }, + { + "epoch": 0.9334468085106383, + "grad_norm": 22.789653778076172, + "learning_rate": 5e-05, + "loss": 1.1841, + "num_input_tokens_seen": 550231496, + "step": 8226 + }, + { + "epoch": 0.9334468085106383, + "loss": 1.0313509702682495, + "loss_ce": 0.0020541492849588394, + "loss_iou": 0.439453125, + "loss_num": 0.03076171875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 550231496, + "step": 8226 + }, + { + "epoch": 0.9335602836879433, + "grad_norm": 22.489959716796875, + "learning_rate": 5e-05, + "loss": 1.0184, + "num_input_tokens_seen": 550298868, + "step": 8227 + }, + { + "epoch": 0.9335602836879433, + "loss": 0.9614653587341309, + "loss_ce": 0.00614312756806612, + "loss_iou": 0.419921875, + "loss_num": 0.023681640625, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 550298868, + "step": 8227 + }, + { + "epoch": 0.9336737588652483, + "grad_norm": 29.071931838989258, + "learning_rate": 5e-05, + "loss": 1.0389, + "num_input_tokens_seen": 550365840, + "step": 8228 + }, + { + "epoch": 0.9336737588652483, + "loss": 1.129481315612793, + "loss_ce": 0.00838766060769558, + "loss_iou": 0.416015625, + "loss_num": 0.057861328125, + "loss_xval": 1.125, + "num_input_tokens_seen": 550365840, + "step": 8228 + }, + { + "epoch": 0.9337872340425531, + "grad_norm": 28.716163635253906, + "learning_rate": 5e-05, + "loss": 1.0186, + "num_input_tokens_seen": 550432372, + "step": 8229 + }, + { + "epoch": 0.9337872340425531, + "loss": 1.0590317249298096, + "loss_ce": 0.009227164089679718, + "loss_iou": 0.435546875, + "loss_num": 0.0361328125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 550432372, + "step": 8229 + }, + { + "epoch": 0.9339007092198581, + "grad_norm": 37.78855514526367, + "learning_rate": 5e-05, + "loss": 1.1215, + "num_input_tokens_seen": 550500624, + "step": 8230 + }, + { + "epoch": 0.9339007092198581, + "loss": 1.0793269872665405, + "loss_ce": 0.007549653761088848, + "loss_iou": 0.46875, + "loss_num": 0.0267333984375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 550500624, + "step": 8230 + }, + { + "epoch": 0.9340141843971631, + "grad_norm": 45.816951751708984, + "learning_rate": 5e-05, + "loss": 0.9029, + "num_input_tokens_seen": 550567292, + "step": 8231 + }, + { + "epoch": 0.9340141843971631, + "loss": 0.9101113080978394, + "loss_ce": 0.003861304372549057, + "loss_iou": 0.400390625, + "loss_num": 0.0206298828125, + "loss_xval": 0.90625, + "num_input_tokens_seen": 550567292, + "step": 8231 + }, + { + "epoch": 0.9341276595744681, + "grad_norm": 29.725313186645508, + "learning_rate": 5e-05, + "loss": 1.1951, + "num_input_tokens_seen": 550633928, + "step": 8232 + }, + { + "epoch": 0.9341276595744681, + "loss": 1.3676401376724243, + "loss_ce": 0.00924169085919857, + "loss_iou": 0.53125, + "loss_num": 0.05859375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 550633928, + "step": 8232 + }, + { + "epoch": 0.9342411347517731, + "grad_norm": 35.92384338378906, + "learning_rate": 5e-05, + "loss": 0.9847, + "num_input_tokens_seen": 550701640, + "step": 8233 + }, + { + "epoch": 0.9342411347517731, + "loss": 0.9687173366546631, + "loss_ce": 0.004850159399211407, + "loss_iou": 0.40625, + "loss_num": 0.030029296875, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 550701640, + "step": 8233 + }, + { + "epoch": 0.934354609929078, + "grad_norm": 17.13994026184082, + "learning_rate": 5e-05, + "loss": 1.144, + "num_input_tokens_seen": 550767664, + "step": 8234 + }, + { + "epoch": 0.934354609929078, + "loss": 1.0924015045166016, + "loss_ce": 0.006463926285505295, + "loss_iou": 0.423828125, + "loss_num": 0.04736328125, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 550767664, + "step": 8234 + }, + { + "epoch": 0.934468085106383, + "grad_norm": 16.971487045288086, + "learning_rate": 5e-05, + "loss": 1.1333, + "num_input_tokens_seen": 550835076, + "step": 8235 + }, + { + "epoch": 0.934468085106383, + "loss": 1.034224510192871, + "loss_ce": 0.008589817211031914, + "loss_iou": 0.396484375, + "loss_num": 0.046630859375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 550835076, + "step": 8235 + }, + { + "epoch": 0.934581560283688, + "grad_norm": 40.17391586303711, + "learning_rate": 5e-05, + "loss": 1.1726, + "num_input_tokens_seen": 550901732, + "step": 8236 + }, + { + "epoch": 0.934581560283688, + "loss": 1.2009813785552979, + "loss_ce": 0.007622057106345892, + "loss_iou": 0.494140625, + "loss_num": 0.041015625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 550901732, + "step": 8236 + }, + { + "epoch": 0.9346950354609929, + "grad_norm": 41.68381118774414, + "learning_rate": 5e-05, + "loss": 1.1541, + "num_input_tokens_seen": 550967332, + "step": 8237 + }, + { + "epoch": 0.9346950354609929, + "loss": 0.9994069337844849, + "loss_ce": 0.00679221423342824, + "loss_iou": 0.37109375, + "loss_num": 0.050048828125, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 550967332, + "step": 8237 + }, + { + "epoch": 0.9348085106382978, + "grad_norm": 26.011049270629883, + "learning_rate": 5e-05, + "loss": 0.9882, + "num_input_tokens_seen": 551034540, + "step": 8238 + }, + { + "epoch": 0.9348085106382978, + "loss": 0.9680129289627075, + "loss_ce": 0.008052005432546139, + "loss_iou": 0.404296875, + "loss_num": 0.0303955078125, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 551034540, + "step": 8238 + }, + { + "epoch": 0.9349219858156028, + "grad_norm": 26.405006408691406, + "learning_rate": 5e-05, + "loss": 1.091, + "num_input_tokens_seen": 551100072, + "step": 8239 + }, + { + "epoch": 0.9349219858156028, + "loss": 1.1043932437896729, + "loss_ce": 0.006737020798027515, + "loss_iou": 0.494140625, + "loss_num": 0.021484375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 551100072, + "step": 8239 + }, + { + "epoch": 0.9350354609929078, + "grad_norm": 14.8922119140625, + "learning_rate": 5e-05, + "loss": 1.0455, + "num_input_tokens_seen": 551166472, + "step": 8240 + }, + { + "epoch": 0.9350354609929078, + "loss": 1.174260139465332, + "loss_ce": 0.004826560616493225, + "loss_iou": 0.478515625, + "loss_num": 0.042724609375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 551166472, + "step": 8240 + }, + { + "epoch": 0.9351489361702128, + "grad_norm": 19.718862533569336, + "learning_rate": 5e-05, + "loss": 1.0185, + "num_input_tokens_seen": 551233380, + "step": 8241 + }, + { + "epoch": 0.9351489361702128, + "loss": 0.8497212529182434, + "loss_ce": 0.004018126986920834, + "loss_iou": 0.361328125, + "loss_num": 0.024658203125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 551233380, + "step": 8241 + }, + { + "epoch": 0.9352624113475178, + "grad_norm": 21.348421096801758, + "learning_rate": 5e-05, + "loss": 1.1717, + "num_input_tokens_seen": 551300936, + "step": 8242 + }, + { + "epoch": 0.9352624113475178, + "loss": 1.2451469898223877, + "loss_ce": 0.005400862544775009, + "loss_iou": 0.4921875, + "loss_num": 0.050537109375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 551300936, + "step": 8242 + }, + { + "epoch": 0.9353758865248227, + "grad_norm": 13.551583290100098, + "learning_rate": 5e-05, + "loss": 1.1672, + "num_input_tokens_seen": 551368788, + "step": 8243 + }, + { + "epoch": 0.9353758865248227, + "loss": 1.3226749897003174, + "loss_ce": 0.009198407642543316, + "loss_iou": 0.53125, + "loss_num": 0.05078125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 551368788, + "step": 8243 + }, + { + "epoch": 0.9354893617021277, + "grad_norm": 39.01690673828125, + "learning_rate": 5e-05, + "loss": 1.0312, + "num_input_tokens_seen": 551436020, + "step": 8244 + }, + { + "epoch": 0.9354893617021277, + "loss": 0.9870066046714783, + "loss_ce": 0.005683340132236481, + "loss_iou": 0.390625, + "loss_num": 0.0400390625, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 551436020, + "step": 8244 + }, + { + "epoch": 0.9356028368794326, + "grad_norm": 30.216285705566406, + "learning_rate": 5e-05, + "loss": 1.0597, + "num_input_tokens_seen": 551503500, + "step": 8245 + }, + { + "epoch": 0.9356028368794326, + "loss": 1.0725986957550049, + "loss_ce": 0.008145568892359734, + "loss_iou": 0.443359375, + "loss_num": 0.0361328125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 551503500, + "step": 8245 + }, + { + "epoch": 0.9357163120567376, + "grad_norm": 24.013141632080078, + "learning_rate": 5e-05, + "loss": 1.1422, + "num_input_tokens_seen": 551569912, + "step": 8246 + }, + { + "epoch": 0.9357163120567376, + "loss": 1.1729023456573486, + "loss_ce": 0.005421928130090237, + "loss_iou": 0.484375, + "loss_num": 0.039794921875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 551569912, + "step": 8246 + }, + { + "epoch": 0.9358297872340425, + "grad_norm": 22.139184951782227, + "learning_rate": 5e-05, + "loss": 1.2439, + "num_input_tokens_seen": 551637216, + "step": 8247 + }, + { + "epoch": 0.9358297872340425, + "loss": 1.062975287437439, + "loss_ce": 0.00828778650611639, + "loss_iou": 0.419921875, + "loss_num": 0.04296875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 551637216, + "step": 8247 + }, + { + "epoch": 0.9359432624113475, + "grad_norm": 16.2928409576416, + "learning_rate": 5e-05, + "loss": 0.8231, + "num_input_tokens_seen": 551703576, + "step": 8248 + }, + { + "epoch": 0.9359432624113475, + "loss": 0.8658946752548218, + "loss_ce": 0.004566552117466927, + "loss_iou": 0.36328125, + "loss_num": 0.02685546875, + "loss_xval": 0.859375, + "num_input_tokens_seen": 551703576, + "step": 8248 + }, + { + "epoch": 0.9360567375886525, + "grad_norm": 26.84858512878418, + "learning_rate": 5e-05, + "loss": 1.0158, + "num_input_tokens_seen": 551770588, + "step": 8249 + }, + { + "epoch": 0.9360567375886525, + "loss": 1.2193001508712769, + "loss_ce": 0.007386112120002508, + "loss_iou": 0.490234375, + "loss_num": 0.0458984375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 551770588, + "step": 8249 + }, + { + "epoch": 0.9361702127659575, + "grad_norm": 39.283912658691406, + "learning_rate": 5e-05, + "loss": 1.0162, + "num_input_tokens_seen": 551837700, + "step": 8250 + }, + { + "epoch": 0.9361702127659575, + "eval_seeclick_CIoU": 0.4429234266281128, + "eval_seeclick_GIoU": 0.4342086613178253, + "eval_seeclick_IoU": 0.513911172747612, + "eval_seeclick_MAE_all": 0.1554260477423668, + "eval_seeclick_MAE_h": 0.08877960778772831, + "eval_seeclick_MAE_w": 0.11264747381210327, + "eval_seeclick_MAE_x_boxes": 0.19206206500530243, + "eval_seeclick_MAE_y_boxes": 0.12609486654400826, + "eval_seeclick_NUM_probability": 0.9999123215675354, + "eval_seeclick_inside_bbox": 0.7083333432674408, + "eval_seeclick_loss": 2.322678327560425, + "eval_seeclick_loss_ce": 0.013556249905377626, + "eval_seeclick_loss_iou": 0.77960205078125, + "eval_seeclick_loss_num": 0.15425872802734375, + "eval_seeclick_loss_xval": 2.33251953125, + "eval_seeclick_runtime": 72.6285, + "eval_seeclick_samples_per_second": 0.647, + "eval_seeclick_steps_per_second": 0.028, + "num_input_tokens_seen": 551837700, + "step": 8250 + }, + { + "epoch": 0.9361702127659575, + "eval_icons_CIoU": 0.545161098241806, + "eval_icons_GIoU": 0.5645887851715088, + "eval_icons_IoU": 0.5831234753131866, + "eval_icons_MAE_all": 0.11625730991363525, + "eval_icons_MAE_h": 0.06438243389129639, + "eval_icons_MAE_w": 0.13507209718227386, + "eval_icons_MAE_x_boxes": 0.1183479018509388, + "eval_icons_MAE_y_boxes": 0.025553555227816105, + "eval_icons_NUM_probability": 0.9999854266643524, + "eval_icons_inside_bbox": 0.8229166567325592, + "eval_icons_loss": 2.1315975189208984, + "eval_icons_loss_ce": 3.0982950192992575e-05, + "eval_icons_loss_iou": 0.7725830078125, + "eval_icons_loss_num": 0.1066131591796875, + "eval_icons_loss_xval": 2.07861328125, + "eval_icons_runtime": 67.3591, + "eval_icons_samples_per_second": 0.742, + "eval_icons_steps_per_second": 0.03, + "num_input_tokens_seen": 551837700, + "step": 8250 + }, + { + "epoch": 0.9361702127659575, + "eval_screenspot_CIoU": 0.24281910061836243, + "eval_screenspot_GIoU": 0.20278377830982208, + "eval_screenspot_IoU": 0.33557459712028503, + "eval_screenspot_MAE_all": 0.23451484739780426, + "eval_screenspot_MAE_h": 0.11083681384722392, + "eval_screenspot_MAE_w": 0.23262972633043924, + "eval_screenspot_MAE_x_boxes": 0.26154621442159015, + "eval_screenspot_MAE_y_boxes": 0.1645099272330602, + "eval_screenspot_NUM_probability": 0.9999229907989502, + "eval_screenspot_inside_bbox": 0.5645833412806193, + "eval_screenspot_loss": 3.0926551818847656, + "eval_screenspot_loss_ce": 0.01422831416130066, + "eval_screenspot_loss_iou": 0.9685872395833334, + "eval_screenspot_loss_num": 0.24161783854166666, + "eval_screenspot_loss_xval": 3.142578125, + "eval_screenspot_runtime": 118.4535, + "eval_screenspot_samples_per_second": 0.751, + "eval_screenspot_steps_per_second": 0.025, + "num_input_tokens_seen": 551837700, + "step": 8250 + }, + { + "epoch": 0.9361702127659575, + "eval_compot_CIoU": 0.2591692805290222, + "eval_compot_GIoU": 0.2079835683107376, + "eval_compot_IoU": 0.3663042187690735, + "eval_compot_MAE_all": 0.2188594490289688, + "eval_compot_MAE_h": 0.08341988548636436, + "eval_compot_MAE_w": 0.2260262295603752, + "eval_compot_MAE_x_boxes": 0.22406192123889923, + "eval_compot_MAE_y_boxes": 0.13894899934530258, + "eval_compot_NUM_probability": 0.9999710917472839, + "eval_compot_inside_bbox": 0.5277777910232544, + "eval_compot_loss": 2.9999794960021973, + "eval_compot_loss_ce": 0.007064483128488064, + "eval_compot_loss_iou": 0.951416015625, + "eval_compot_loss_num": 0.2252960205078125, + "eval_compot_loss_xval": 3.0283203125, + "eval_compot_runtime": 69.2268, + "eval_compot_samples_per_second": 0.722, + "eval_compot_steps_per_second": 0.029, + "num_input_tokens_seen": 551837700, + "step": 8250 + }, + { + "epoch": 0.9361702127659575, + "loss": 3.1075797080993652, + "loss_ce": 0.007970218546688557, + "loss_iou": 0.95703125, + "loss_num": 0.2373046875, + "loss_xval": 3.09375, + "num_input_tokens_seen": 551837700, + "step": 8250 + }, + { + "epoch": 0.9362836879432624, + "grad_norm": 27.834938049316406, + "learning_rate": 5e-05, + "loss": 1.1669, + "num_input_tokens_seen": 551904616, + "step": 8251 + }, + { + "epoch": 0.9362836879432624, + "loss": 1.2482810020446777, + "loss_ce": 0.00609346991404891, + "loss_iou": 0.53515625, + "loss_num": 0.034912109375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 551904616, + "step": 8251 + }, + { + "epoch": 0.9363971631205674, + "grad_norm": 28.042236328125, + "learning_rate": 5e-05, + "loss": 0.9136, + "num_input_tokens_seen": 551970904, + "step": 8252 + }, + { + "epoch": 0.9363971631205674, + "loss": 0.8061067461967468, + "loss_ce": 0.010086250491440296, + "loss_iou": 0.330078125, + "loss_num": 0.02734375, + "loss_xval": 0.796875, + "num_input_tokens_seen": 551970904, + "step": 8252 + }, + { + "epoch": 0.9365106382978723, + "grad_norm": 30.54875373840332, + "learning_rate": 5e-05, + "loss": 1.0598, + "num_input_tokens_seen": 552037932, + "step": 8253 + }, + { + "epoch": 0.9365106382978723, + "loss": 1.1478614807128906, + "loss_ce": 0.005771709606051445, + "loss_iou": 0.47265625, + "loss_num": 0.039306640625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 552037932, + "step": 8253 + }, + { + "epoch": 0.9366241134751773, + "grad_norm": 34.987789154052734, + "learning_rate": 5e-05, + "loss": 1.0021, + "num_input_tokens_seen": 552104992, + "step": 8254 + }, + { + "epoch": 0.9366241134751773, + "loss": 0.962179958820343, + "loss_ce": 0.005636970978230238, + "loss_iou": 0.3828125, + "loss_num": 0.03759765625, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 552104992, + "step": 8254 + }, + { + "epoch": 0.9367375886524822, + "grad_norm": 36.572086334228516, + "learning_rate": 5e-05, + "loss": 1.2355, + "num_input_tokens_seen": 552172652, + "step": 8255 + }, + { + "epoch": 0.9367375886524822, + "loss": 1.161331057548523, + "loss_ce": 0.007522428408265114, + "loss_iou": 0.494140625, + "loss_num": 0.033203125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 552172652, + "step": 8255 + }, + { + "epoch": 0.9368510638297872, + "grad_norm": 26.527711868286133, + "learning_rate": 5e-05, + "loss": 1.2263, + "num_input_tokens_seen": 552238712, + "step": 8256 + }, + { + "epoch": 0.9368510638297872, + "loss": 1.3213486671447754, + "loss_ce": 0.013853448443114758, + "loss_iou": 0.50390625, + "loss_num": 0.060302734375, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 552238712, + "step": 8256 + }, + { + "epoch": 0.9369645390070922, + "grad_norm": 29.562829971313477, + "learning_rate": 5e-05, + "loss": 1.0935, + "num_input_tokens_seen": 552305760, + "step": 8257 + }, + { + "epoch": 0.9369645390070922, + "loss": 1.0275123119354248, + "loss_ce": 0.00407477468252182, + "loss_iou": 0.4609375, + "loss_num": 0.0198974609375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 552305760, + "step": 8257 + }, + { + "epoch": 0.9370780141843972, + "grad_norm": 40.521060943603516, + "learning_rate": 5e-05, + "loss": 1.3675, + "num_input_tokens_seen": 552372960, + "step": 8258 + }, + { + "epoch": 0.9370780141843972, + "loss": 1.3068026304244995, + "loss_ce": 0.006997938267886639, + "loss_iou": 0.54296875, + "loss_num": 0.042724609375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 552372960, + "step": 8258 + }, + { + "epoch": 0.9371914893617022, + "grad_norm": 34.69844436645508, + "learning_rate": 5e-05, + "loss": 1.3603, + "num_input_tokens_seen": 552440312, + "step": 8259 + }, + { + "epoch": 0.9371914893617022, + "loss": 1.3580620288848877, + "loss_ce": 0.00405813567340374, + "loss_iou": 0.5390625, + "loss_num": 0.053955078125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 552440312, + "step": 8259 + }, + { + "epoch": 0.9373049645390071, + "grad_norm": 19.284931182861328, + "learning_rate": 5e-05, + "loss": 1.0978, + "num_input_tokens_seen": 552506172, + "step": 8260 + }, + { + "epoch": 0.9373049645390071, + "loss": 1.0431616306304932, + "loss_ce": 0.007517009042203426, + "loss_iou": 0.478515625, + "loss_num": 0.015380859375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 552506172, + "step": 8260 + }, + { + "epoch": 0.937418439716312, + "grad_norm": 24.273305892944336, + "learning_rate": 5e-05, + "loss": 0.8816, + "num_input_tokens_seen": 552572384, + "step": 8261 + }, + { + "epoch": 0.937418439716312, + "loss": 0.9137635231018066, + "loss_ce": 0.00971079058945179, + "loss_iou": 0.375, + "loss_num": 0.03076171875, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 552572384, + "step": 8261 + }, + { + "epoch": 0.937531914893617, + "grad_norm": 47.96969223022461, + "learning_rate": 5e-05, + "loss": 1.2288, + "num_input_tokens_seen": 552638708, + "step": 8262 + }, + { + "epoch": 0.937531914893617, + "loss": 1.1488926410675049, + "loss_ce": 0.004361448809504509, + "loss_iou": 0.47265625, + "loss_num": 0.0400390625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 552638708, + "step": 8262 + }, + { + "epoch": 0.937645390070922, + "grad_norm": 30.103044509887695, + "learning_rate": 5e-05, + "loss": 1.2309, + "num_input_tokens_seen": 552705004, + "step": 8263 + }, + { + "epoch": 0.937645390070922, + "loss": 1.3002874851226807, + "loss_ce": 0.0043889437802135944, + "loss_iou": 0.53515625, + "loss_num": 0.0458984375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 552705004, + "step": 8263 + }, + { + "epoch": 0.9377588652482269, + "grad_norm": 21.853532791137695, + "learning_rate": 5e-05, + "loss": 1.2572, + "num_input_tokens_seen": 552772876, + "step": 8264 + }, + { + "epoch": 0.9377588652482269, + "loss": 1.0895888805389404, + "loss_ce": 0.0026748660020530224, + "loss_iou": 0.44921875, + "loss_num": 0.037841796875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 552772876, + "step": 8264 + }, + { + "epoch": 0.9378723404255319, + "grad_norm": 35.71635818481445, + "learning_rate": 5e-05, + "loss": 1.1564, + "num_input_tokens_seen": 552839120, + "step": 8265 + }, + { + "epoch": 0.9378723404255319, + "loss": 1.1272608041763306, + "loss_ce": 0.002749138046056032, + "loss_iou": 0.462890625, + "loss_num": 0.040283203125, + "loss_xval": 1.125, + "num_input_tokens_seen": 552839120, + "step": 8265 + }, + { + "epoch": 0.9379858156028369, + "grad_norm": 28.686227798461914, + "learning_rate": 5e-05, + "loss": 1.333, + "num_input_tokens_seen": 552905564, + "step": 8266 + }, + { + "epoch": 0.9379858156028369, + "loss": 1.2461403608322144, + "loss_ce": 0.008347375318408012, + "loss_iou": 0.494140625, + "loss_num": 0.04931640625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 552905564, + "step": 8266 + }, + { + "epoch": 0.9380992907801419, + "grad_norm": 12.30831527709961, + "learning_rate": 5e-05, + "loss": 1.1984, + "num_input_tokens_seen": 552971692, + "step": 8267 + }, + { + "epoch": 0.9380992907801419, + "loss": 1.25809645652771, + "loss_ce": 0.009072994813323021, + "loss_iou": 0.482421875, + "loss_num": 0.056884765625, + "loss_xval": 1.25, + "num_input_tokens_seen": 552971692, + "step": 8267 + }, + { + "epoch": 0.9382127659574468, + "grad_norm": 19.822242736816406, + "learning_rate": 5e-05, + "loss": 1.235, + "num_input_tokens_seen": 553038128, + "step": 8268 + }, + { + "epoch": 0.9382127659574468, + "loss": 1.0876972675323486, + "loss_ce": 0.007374969776719809, + "loss_iou": 0.421875, + "loss_num": 0.047119140625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 553038128, + "step": 8268 + }, + { + "epoch": 0.9383262411347518, + "grad_norm": 18.22284698486328, + "learning_rate": 5e-05, + "loss": 1.1186, + "num_input_tokens_seen": 553105608, + "step": 8269 + }, + { + "epoch": 0.9383262411347518, + "loss": 1.149895429611206, + "loss_ce": 0.006340636871755123, + "loss_iou": 0.4609375, + "loss_num": 0.044921875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 553105608, + "step": 8269 + }, + { + "epoch": 0.9384397163120567, + "grad_norm": 53.883113861083984, + "learning_rate": 5e-05, + "loss": 1.0718, + "num_input_tokens_seen": 553172292, + "step": 8270 + }, + { + "epoch": 0.9384397163120567, + "loss": 1.0829904079437256, + "loss_ce": 0.007795086596161127, + "loss_iou": 0.439453125, + "loss_num": 0.03955078125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 553172292, + "step": 8270 + }, + { + "epoch": 0.9385531914893617, + "grad_norm": 30.85863494873047, + "learning_rate": 5e-05, + "loss": 1.1526, + "num_input_tokens_seen": 553240108, + "step": 8271 + }, + { + "epoch": 0.9385531914893617, + "loss": 0.9177560806274414, + "loss_ce": 0.0036936134565621614, + "loss_iou": 0.390625, + "loss_num": 0.0262451171875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 553240108, + "step": 8271 + }, + { + "epoch": 0.9386666666666666, + "grad_norm": 32.242713928222656, + "learning_rate": 5e-05, + "loss": 1.0854, + "num_input_tokens_seen": 553307424, + "step": 8272 + }, + { + "epoch": 0.9386666666666666, + "loss": 1.0174134969711304, + "loss_ce": 0.0076478696428239346, + "loss_iou": 0.447265625, + "loss_num": 0.0234375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 553307424, + "step": 8272 + }, + { + "epoch": 0.9387801418439716, + "grad_norm": 27.060710906982422, + "learning_rate": 5e-05, + "loss": 1.3003, + "num_input_tokens_seen": 553374348, + "step": 8273 + }, + { + "epoch": 0.9387801418439716, + "loss": 1.183825969696045, + "loss_ce": 0.005603384226560593, + "loss_iou": 0.455078125, + "loss_num": 0.05322265625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 553374348, + "step": 8273 + }, + { + "epoch": 0.9388936170212766, + "grad_norm": 16.185625076293945, + "learning_rate": 5e-05, + "loss": 0.9848, + "num_input_tokens_seen": 553441652, + "step": 8274 + }, + { + "epoch": 0.9388936170212766, + "loss": 1.000684142112732, + "loss_ce": 0.002637261990457773, + "loss_iou": 0.375, + "loss_num": 0.04931640625, + "loss_xval": 1.0, + "num_input_tokens_seen": 553441652, + "step": 8274 + }, + { + "epoch": 0.9390070921985816, + "grad_norm": 33.03567886352539, + "learning_rate": 5e-05, + "loss": 1.1039, + "num_input_tokens_seen": 553509456, + "step": 8275 + }, + { + "epoch": 0.9390070921985816, + "loss": 1.1956002712249756, + "loss_ce": 0.00858843419700861, + "loss_iou": 0.5078125, + "loss_num": 0.03369140625, + "loss_xval": 1.1875, + "num_input_tokens_seen": 553509456, + "step": 8275 + }, + { + "epoch": 0.9391205673758866, + "grad_norm": 46.88787841796875, + "learning_rate": 5e-05, + "loss": 1.3411, + "num_input_tokens_seen": 553575880, + "step": 8276 + }, + { + "epoch": 0.9391205673758866, + "loss": 1.5076167583465576, + "loss_ce": 0.0022456455044448376, + "loss_iou": 0.60546875, + "loss_num": 0.0595703125, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 553575880, + "step": 8276 + }, + { + "epoch": 0.9392340425531915, + "grad_norm": 37.095054626464844, + "learning_rate": 5e-05, + "loss": 1.5092, + "num_input_tokens_seen": 553643376, + "step": 8277 + }, + { + "epoch": 0.9392340425531915, + "loss": 1.5041180849075317, + "loss_ce": 0.005094575230032206, + "loss_iou": 0.6171875, + "loss_num": 0.052490234375, + "loss_xval": 1.5, + "num_input_tokens_seen": 553643376, + "step": 8277 + }, + { + "epoch": 0.9393475177304964, + "grad_norm": 25.31447982788086, + "learning_rate": 5e-05, + "loss": 1.2737, + "num_input_tokens_seen": 553710888, + "step": 8278 + }, + { + "epoch": 0.9393475177304964, + "loss": 1.542476773262024, + "loss_ce": 0.009273640811443329, + "loss_iou": 0.6015625, + "loss_num": 0.0654296875, + "loss_xval": 1.53125, + "num_input_tokens_seen": 553710888, + "step": 8278 + }, + { + "epoch": 0.9394609929078014, + "grad_norm": 24.78458023071289, + "learning_rate": 5e-05, + "loss": 1.0175, + "num_input_tokens_seen": 553777616, + "step": 8279 + }, + { + "epoch": 0.9394609929078014, + "loss": 1.042091727256775, + "loss_ce": 0.0035175085067749023, + "loss_iou": 0.439453125, + "loss_num": 0.031982421875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 553777616, + "step": 8279 + }, + { + "epoch": 0.9395744680851064, + "grad_norm": 34.60740280151367, + "learning_rate": 5e-05, + "loss": 1.3237, + "num_input_tokens_seen": 553844012, + "step": 8280 + }, + { + "epoch": 0.9395744680851064, + "loss": 1.2644155025482178, + "loss_ce": 0.009288524277508259, + "loss_iou": 0.50390625, + "loss_num": 0.049072265625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 553844012, + "step": 8280 + }, + { + "epoch": 0.9396879432624113, + "grad_norm": 23.782455444335938, + "learning_rate": 5e-05, + "loss": 1.1997, + "num_input_tokens_seen": 553910248, + "step": 8281 + }, + { + "epoch": 0.9396879432624113, + "loss": 1.4254708290100098, + "loss_ce": 0.00457252049818635, + "loss_iou": 0.546875, + "loss_num": 0.0654296875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 553910248, + "step": 8281 + }, + { + "epoch": 0.9398014184397163, + "grad_norm": 13.56779670715332, + "learning_rate": 5e-05, + "loss": 0.9223, + "num_input_tokens_seen": 553976908, + "step": 8282 + }, + { + "epoch": 0.9398014184397163, + "loss": 0.8727994561195374, + "loss_ce": 0.0070768012665212154, + "loss_iou": 0.353515625, + "loss_num": 0.031982421875, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 553976908, + "step": 8282 + }, + { + "epoch": 0.9399148936170213, + "grad_norm": 19.498342514038086, + "learning_rate": 5e-05, + "loss": 1.1129, + "num_input_tokens_seen": 554044628, + "step": 8283 + }, + { + "epoch": 0.9399148936170213, + "loss": 1.1645889282226562, + "loss_ce": 0.008827227167785168, + "loss_iou": 0.4453125, + "loss_num": 0.052978515625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 554044628, + "step": 8283 + }, + { + "epoch": 0.9400283687943263, + "grad_norm": 17.74361801147461, + "learning_rate": 5e-05, + "loss": 0.8463, + "num_input_tokens_seen": 554110752, + "step": 8284 + }, + { + "epoch": 0.9400283687943263, + "loss": 0.9104627370834351, + "loss_ce": 0.009095552377402782, + "loss_iou": 0.34765625, + "loss_num": 0.040771484375, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 554110752, + "step": 8284 + }, + { + "epoch": 0.9401418439716313, + "grad_norm": 16.428974151611328, + "learning_rate": 5e-05, + "loss": 0.9988, + "num_input_tokens_seen": 554178288, + "step": 8285 + }, + { + "epoch": 0.9401418439716313, + "loss": 0.963208794593811, + "loss_ce": 0.0054451473988592625, + "loss_iou": 0.419921875, + "loss_num": 0.023681640625, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 554178288, + "step": 8285 + }, + { + "epoch": 0.9402553191489361, + "grad_norm": 15.568198204040527, + "learning_rate": 5e-05, + "loss": 1.1632, + "num_input_tokens_seen": 554246196, + "step": 8286 + }, + { + "epoch": 0.9402553191489361, + "loss": 1.2343957424163818, + "loss_ce": 0.009298020973801613, + "loss_iou": 0.47265625, + "loss_num": 0.05615234375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 554246196, + "step": 8286 + }, + { + "epoch": 0.9403687943262411, + "grad_norm": 20.52535057067871, + "learning_rate": 5e-05, + "loss": 1.0639, + "num_input_tokens_seen": 554313776, + "step": 8287 + }, + { + "epoch": 0.9403687943262411, + "loss": 0.9826794862747192, + "loss_ce": 0.004652151372283697, + "loss_iou": 0.40234375, + "loss_num": 0.03515625, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 554313776, + "step": 8287 + }, + { + "epoch": 0.9404822695035461, + "grad_norm": 38.372684478759766, + "learning_rate": 5e-05, + "loss": 1.2737, + "num_input_tokens_seen": 554381380, + "step": 8288 + }, + { + "epoch": 0.9404822695035461, + "loss": 1.3589463233947754, + "loss_ce": 0.0044541205279529095, + "loss_iou": 0.5625, + "loss_num": 0.045654296875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 554381380, + "step": 8288 + }, + { + "epoch": 0.940595744680851, + "grad_norm": 26.488815307617188, + "learning_rate": 5e-05, + "loss": 1.119, + "num_input_tokens_seen": 554448596, + "step": 8289 + }, + { + "epoch": 0.940595744680851, + "loss": 1.4364244937896729, + "loss_ce": 0.007713631726801395, + "loss_iou": 0.58203125, + "loss_num": 0.053466796875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 554448596, + "step": 8289 + }, + { + "epoch": 0.940709219858156, + "grad_norm": 15.794018745422363, + "learning_rate": 5e-05, + "loss": 1.0287, + "num_input_tokens_seen": 554515636, + "step": 8290 + }, + { + "epoch": 0.940709219858156, + "loss": 0.9072319269180298, + "loss_ce": 0.0063530029729008675, + "loss_iou": 0.365234375, + "loss_num": 0.03369140625, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 554515636, + "step": 8290 + }, + { + "epoch": 0.940822695035461, + "grad_norm": 24.996002197265625, + "learning_rate": 5e-05, + "loss": 0.8936, + "num_input_tokens_seen": 554583004, + "step": 8291 + }, + { + "epoch": 0.940822695035461, + "loss": 0.8598901629447937, + "loss_ce": 0.0039331428706645966, + "loss_iou": 0.375, + "loss_num": 0.0216064453125, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 554583004, + "step": 8291 + }, + { + "epoch": 0.940936170212766, + "grad_norm": 23.009349822998047, + "learning_rate": 5e-05, + "loss": 1.008, + "num_input_tokens_seen": 554648596, + "step": 8292 + }, + { + "epoch": 0.940936170212766, + "loss": 1.1627094745635986, + "loss_ce": 0.011952618137001991, + "loss_iou": 0.439453125, + "loss_num": 0.054443359375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 554648596, + "step": 8292 + }, + { + "epoch": 0.941049645390071, + "grad_norm": 29.277481079101562, + "learning_rate": 5e-05, + "loss": 1.1019, + "num_input_tokens_seen": 554715660, + "step": 8293 + }, + { + "epoch": 0.941049645390071, + "loss": 1.0285344123840332, + "loss_ce": 0.01290950272232294, + "loss_iou": 0.439453125, + "loss_num": 0.027099609375, + "loss_xval": 1.015625, + "num_input_tokens_seen": 554715660, + "step": 8293 + }, + { + "epoch": 0.9411631205673758, + "grad_norm": 52.18384552001953, + "learning_rate": 5e-05, + "loss": 1.3046, + "num_input_tokens_seen": 554782544, + "step": 8294 + }, + { + "epoch": 0.9411631205673758, + "loss": 1.2906378507614136, + "loss_ce": 0.011340973898768425, + "loss_iou": 0.5625, + "loss_num": 0.03173828125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 554782544, + "step": 8294 + }, + { + "epoch": 0.9412765957446808, + "grad_norm": 36.49513626098633, + "learning_rate": 5e-05, + "loss": 1.2786, + "num_input_tokens_seen": 554848960, + "step": 8295 + }, + { + "epoch": 0.9412765957446808, + "loss": 1.2267488241195679, + "loss_ce": 0.005557412281632423, + "loss_iou": 0.4765625, + "loss_num": 0.05322265625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 554848960, + "step": 8295 + }, + { + "epoch": 0.9413900709219858, + "grad_norm": 21.590299606323242, + "learning_rate": 5e-05, + "loss": 1.1568, + "num_input_tokens_seen": 554916968, + "step": 8296 + }, + { + "epoch": 0.9413900709219858, + "loss": 1.102173089981079, + "loss_ce": 0.007690591271966696, + "loss_iou": 0.443359375, + "loss_num": 0.041259765625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 554916968, + "step": 8296 + }, + { + "epoch": 0.9415035460992908, + "grad_norm": 23.181161880493164, + "learning_rate": 5e-05, + "loss": 0.9821, + "num_input_tokens_seen": 554984204, + "step": 8297 + }, + { + "epoch": 0.9415035460992908, + "loss": 1.1022143363952637, + "loss_ce": 0.009929142892360687, + "loss_iou": 0.43359375, + "loss_num": 0.044677734375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 554984204, + "step": 8297 + }, + { + "epoch": 0.9416170212765957, + "grad_norm": 26.46047019958496, + "learning_rate": 5e-05, + "loss": 1.053, + "num_input_tokens_seen": 555051708, + "step": 8298 + }, + { + "epoch": 0.9416170212765957, + "loss": 1.1012303829193115, + "loss_ce": 0.006503771059215069, + "loss_iou": 0.45703125, + "loss_num": 0.0361328125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 555051708, + "step": 8298 + }, + { + "epoch": 0.9417304964539007, + "grad_norm": 37.774452209472656, + "learning_rate": 5e-05, + "loss": 1.0853, + "num_input_tokens_seen": 555118620, + "step": 8299 + }, + { + "epoch": 0.9417304964539007, + "loss": 1.201641321182251, + "loss_ce": 0.007305333856493235, + "loss_iou": 0.49609375, + "loss_num": 0.04052734375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 555118620, + "step": 8299 + }, + { + "epoch": 0.9418439716312057, + "grad_norm": 40.55072021484375, + "learning_rate": 5e-05, + "loss": 1.192, + "num_input_tokens_seen": 555184848, + "step": 8300 + }, + { + "epoch": 0.9418439716312057, + "loss": 1.096829891204834, + "loss_ce": 0.008206836879253387, + "loss_iou": 0.462890625, + "loss_num": 0.03271484375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 555184848, + "step": 8300 + }, + { + "epoch": 0.9419574468085107, + "grad_norm": 39.43423843383789, + "learning_rate": 5e-05, + "loss": 1.1866, + "num_input_tokens_seen": 555252028, + "step": 8301 + }, + { + "epoch": 0.9419574468085107, + "loss": 1.3212305307388306, + "loss_ce": 0.008242249488830566, + "loss_iou": 0.5078125, + "loss_num": 0.059814453125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 555252028, + "step": 8301 + }, + { + "epoch": 0.9420709219858157, + "grad_norm": 27.99262809753418, + "learning_rate": 5e-05, + "loss": 1.2971, + "num_input_tokens_seen": 555319092, + "step": 8302 + }, + { + "epoch": 0.9420709219858157, + "loss": 1.319257140159607, + "loss_ce": 0.007245410233736038, + "loss_iou": 0.5390625, + "loss_num": 0.04638671875, + "loss_xval": 1.3125, + "num_input_tokens_seen": 555319092, + "step": 8302 + }, + { + "epoch": 0.9421843971631205, + "grad_norm": 24.24989891052246, + "learning_rate": 5e-05, + "loss": 1.0992, + "num_input_tokens_seen": 555385824, + "step": 8303 + }, + { + "epoch": 0.9421843971631205, + "loss": 1.059998869895935, + "loss_ce": 0.005799639970064163, + "loss_iou": 0.4296875, + "loss_num": 0.03857421875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 555385824, + "step": 8303 + }, + { + "epoch": 0.9422978723404255, + "grad_norm": 20.226652145385742, + "learning_rate": 5e-05, + "loss": 1.2236, + "num_input_tokens_seen": 555453004, + "step": 8304 + }, + { + "epoch": 0.9422978723404255, + "loss": 1.1392580270767212, + "loss_ce": 0.0054688905365765095, + "loss_iou": 0.4765625, + "loss_num": 0.03662109375, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 555453004, + "step": 8304 + }, + { + "epoch": 0.9424113475177305, + "grad_norm": 12.63210391998291, + "learning_rate": 5e-05, + "loss": 1.0465, + "num_input_tokens_seen": 555519568, + "step": 8305 + }, + { + "epoch": 0.9424113475177305, + "loss": 1.2697473764419556, + "loss_ce": 0.0046106851659715176, + "loss_iou": 0.52734375, + "loss_num": 0.042724609375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 555519568, + "step": 8305 + }, + { + "epoch": 0.9425248226950355, + "grad_norm": 19.756240844726562, + "learning_rate": 5e-05, + "loss": 1.0709, + "num_input_tokens_seen": 555586884, + "step": 8306 + }, + { + "epoch": 0.9425248226950355, + "loss": 1.0041457414627075, + "loss_ce": 0.0026808816473931074, + "loss_iou": 0.4453125, + "loss_num": 0.0224609375, + "loss_xval": 1.0, + "num_input_tokens_seen": 555586884, + "step": 8306 + }, + { + "epoch": 0.9426382978723404, + "grad_norm": 30.752681732177734, + "learning_rate": 5e-05, + "loss": 1.0698, + "num_input_tokens_seen": 555652888, + "step": 8307 + }, + { + "epoch": 0.9426382978723404, + "loss": 0.9536786079406738, + "loss_ce": 0.004215693566948175, + "loss_iou": 0.400390625, + "loss_num": 0.029541015625, + "loss_xval": 0.94921875, + "num_input_tokens_seen": 555652888, + "step": 8307 + }, + { + "epoch": 0.9427517730496454, + "grad_norm": 26.57585906982422, + "learning_rate": 5e-05, + "loss": 1.0514, + "num_input_tokens_seen": 555719268, + "step": 8308 + }, + { + "epoch": 0.9427517730496454, + "loss": 1.0272456407546997, + "loss_ce": 0.005272985436022282, + "loss_iou": 0.4453125, + "loss_num": 0.026611328125, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 555719268, + "step": 8308 + }, + { + "epoch": 0.9428652482269504, + "grad_norm": 21.205739974975586, + "learning_rate": 5e-05, + "loss": 1.0084, + "num_input_tokens_seen": 555786624, + "step": 8309 + }, + { + "epoch": 0.9428652482269504, + "loss": 0.9270639419555664, + "loss_ce": 0.002503424882888794, + "loss_iou": 0.396484375, + "loss_num": 0.02587890625, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 555786624, + "step": 8309 + }, + { + "epoch": 0.9429787234042554, + "grad_norm": 22.072595596313477, + "learning_rate": 5e-05, + "loss": 1.2744, + "num_input_tokens_seen": 555854224, + "step": 8310 + }, + { + "epoch": 0.9429787234042554, + "loss": 1.3847670555114746, + "loss_ce": 0.008790450170636177, + "loss_iou": 0.5234375, + "loss_num": 0.0654296875, + "loss_xval": 1.375, + "num_input_tokens_seen": 555854224, + "step": 8310 + }, + { + "epoch": 0.9430921985815602, + "grad_norm": 402.77288818359375, + "learning_rate": 5e-05, + "loss": 1.102, + "num_input_tokens_seen": 555921468, + "step": 8311 + }, + { + "epoch": 0.9430921985815602, + "loss": 0.9319757223129272, + "loss_ce": 0.00820864923298359, + "loss_iou": 0.373046875, + "loss_num": 0.035888671875, + "loss_xval": 0.921875, + "num_input_tokens_seen": 555921468, + "step": 8311 + }, + { + "epoch": 0.9432056737588652, + "grad_norm": 26.7337703704834, + "learning_rate": 5e-05, + "loss": 1.1429, + "num_input_tokens_seen": 555987296, + "step": 8312 + }, + { + "epoch": 0.9432056737588652, + "loss": 1.0886616706848145, + "loss_ce": 0.007118740119040012, + "loss_iou": 0.458984375, + "loss_num": 0.032470703125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 555987296, + "step": 8312 + }, + { + "epoch": 0.9433191489361702, + "grad_norm": 30.57939910888672, + "learning_rate": 5e-05, + "loss": 1.2331, + "num_input_tokens_seen": 556055368, + "step": 8313 + }, + { + "epoch": 0.9433191489361702, + "loss": 1.332629919052124, + "loss_ce": 0.006946294568479061, + "loss_iou": 0.52734375, + "loss_num": 0.0537109375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 556055368, + "step": 8313 + }, + { + "epoch": 0.9434326241134752, + "grad_norm": 26.862071990966797, + "learning_rate": 5e-05, + "loss": 1.1071, + "num_input_tokens_seen": 556122304, + "step": 8314 + }, + { + "epoch": 0.9434326241134752, + "loss": 0.9741529226303101, + "loss_ce": 0.006135351024568081, + "loss_iou": 0.400390625, + "loss_num": 0.033447265625, + "loss_xval": 0.96875, + "num_input_tokens_seen": 556122304, + "step": 8314 + }, + { + "epoch": 0.9435460992907801, + "grad_norm": 29.69858169555664, + "learning_rate": 5e-05, + "loss": 1.2312, + "num_input_tokens_seen": 556189744, + "step": 8315 + }, + { + "epoch": 0.9435460992907801, + "loss": 1.2804099321365356, + "loss_ce": 0.010390397161245346, + "loss_iou": 0.478515625, + "loss_num": 0.0625, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 556189744, + "step": 8315 + }, + { + "epoch": 0.9436595744680851, + "grad_norm": 30.196224212646484, + "learning_rate": 5e-05, + "loss": 1.1608, + "num_input_tokens_seen": 556256300, + "step": 8316 + }, + { + "epoch": 0.9436595744680851, + "loss": 1.0973438024520874, + "loss_ce": 0.002617231337353587, + "loss_iou": 0.453125, + "loss_num": 0.03759765625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 556256300, + "step": 8316 + }, + { + "epoch": 0.9437730496453901, + "grad_norm": 29.799474716186523, + "learning_rate": 5e-05, + "loss": 1.0954, + "num_input_tokens_seen": 556321684, + "step": 8317 + }, + { + "epoch": 0.9437730496453901, + "loss": 0.9424781799316406, + "loss_ce": 0.0030250914860516787, + "loss_iou": 0.4140625, + "loss_num": 0.022705078125, + "loss_xval": 0.9375, + "num_input_tokens_seen": 556321684, + "step": 8317 + }, + { + "epoch": 0.9438865248226951, + "grad_norm": 42.069332122802734, + "learning_rate": 5e-05, + "loss": 1.1401, + "num_input_tokens_seen": 556388044, + "step": 8318 + }, + { + "epoch": 0.9438865248226951, + "loss": 1.1188724040985107, + "loss_ce": 0.005255517084151506, + "loss_iou": 0.4921875, + "loss_num": 0.0262451171875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 556388044, + "step": 8318 + }, + { + "epoch": 0.944, + "grad_norm": 20.441879272460938, + "learning_rate": 5e-05, + "loss": 0.9561, + "num_input_tokens_seen": 556455820, + "step": 8319 + }, + { + "epoch": 0.944, + "loss": 0.9601708650588989, + "loss_ce": 0.006069313269108534, + "loss_iou": 0.40234375, + "loss_num": 0.030029296875, + "loss_xval": 0.953125, + "num_input_tokens_seen": 556455820, + "step": 8319 + }, + { + "epoch": 0.9441134751773049, + "grad_norm": 20.807695388793945, + "learning_rate": 5e-05, + "loss": 1.1359, + "num_input_tokens_seen": 556521920, + "step": 8320 + }, + { + "epoch": 0.9441134751773049, + "loss": 1.4317666292190552, + "loss_ce": 0.009403271600604057, + "loss_iou": 0.5703125, + "loss_num": 0.055419921875, + "loss_xval": 1.421875, + "num_input_tokens_seen": 556521920, + "step": 8320 + }, + { + "epoch": 0.9442269503546099, + "grad_norm": 18.728422164916992, + "learning_rate": 5e-05, + "loss": 1.2148, + "num_input_tokens_seen": 556588964, + "step": 8321 + }, + { + "epoch": 0.9442269503546099, + "loss": 1.0137423276901245, + "loss_ce": 0.0064181797206401825, + "loss_iou": 0.431640625, + "loss_num": 0.0284423828125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 556588964, + "step": 8321 + }, + { + "epoch": 0.9443404255319149, + "grad_norm": 51.7614631652832, + "learning_rate": 5e-05, + "loss": 1.3176, + "num_input_tokens_seen": 556656064, + "step": 8322 + }, + { + "epoch": 0.9443404255319149, + "loss": 1.1893454790115356, + "loss_ce": 0.00477517070248723, + "loss_iou": 0.4765625, + "loss_num": 0.04638671875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 556656064, + "step": 8322 + }, + { + "epoch": 0.9444539007092199, + "grad_norm": 43.42755126953125, + "learning_rate": 5e-05, + "loss": 1.2332, + "num_input_tokens_seen": 556722792, + "step": 8323 + }, + { + "epoch": 0.9444539007092199, + "loss": 1.364148736000061, + "loss_ce": 0.005262009333819151, + "loss_iou": 0.4921875, + "loss_num": 0.0751953125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 556722792, + "step": 8323 + }, + { + "epoch": 0.9445673758865248, + "grad_norm": 43.74141311645508, + "learning_rate": 5e-05, + "loss": 1.3308, + "num_input_tokens_seen": 556789492, + "step": 8324 + }, + { + "epoch": 0.9445673758865248, + "loss": 1.5685142278671265, + "loss_ce": 0.015779860317707062, + "loss_iou": 0.62890625, + "loss_num": 0.05859375, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 556789492, + "step": 8324 + }, + { + "epoch": 0.9446808510638298, + "grad_norm": 35.66701889038086, + "learning_rate": 5e-05, + "loss": 1.4643, + "num_input_tokens_seen": 556856808, + "step": 8325 + }, + { + "epoch": 0.9446808510638298, + "loss": 1.4674851894378662, + "loss_ce": 0.006059437524527311, + "loss_iou": 0.5625, + "loss_num": 0.06689453125, + "loss_xval": 1.4609375, + "num_input_tokens_seen": 556856808, + "step": 8325 + }, + { + "epoch": 0.9447943262411348, + "grad_norm": 35.81878662109375, + "learning_rate": 5e-05, + "loss": 1.0836, + "num_input_tokens_seen": 556923828, + "step": 8326 + }, + { + "epoch": 0.9447943262411348, + "loss": 1.0943372249603271, + "loss_ce": 0.004493482876569033, + "loss_iou": 0.4609375, + "loss_num": 0.0341796875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 556923828, + "step": 8326 + }, + { + "epoch": 0.9449078014184397, + "grad_norm": 45.28693771362305, + "learning_rate": 5e-05, + "loss": 1.066, + "num_input_tokens_seen": 556990420, + "step": 8327 + }, + { + "epoch": 0.9449078014184397, + "loss": 1.0378339290618896, + "loss_ce": 0.0031659877859055996, + "loss_iou": 0.435546875, + "loss_num": 0.032958984375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 556990420, + "step": 8327 + }, + { + "epoch": 0.9450212765957446, + "grad_norm": 43.67850112915039, + "learning_rate": 5e-05, + "loss": 1.4258, + "num_input_tokens_seen": 557057140, + "step": 8328 + }, + { + "epoch": 0.9450212765957446, + "loss": 1.4958994388580322, + "loss_ce": 0.012989320792257786, + "loss_iou": 0.57421875, + "loss_num": 0.06689453125, + "loss_xval": 1.484375, + "num_input_tokens_seen": 557057140, + "step": 8328 + }, + { + "epoch": 0.9451347517730496, + "grad_norm": 35.10715103149414, + "learning_rate": 5e-05, + "loss": 1.068, + "num_input_tokens_seen": 557123576, + "step": 8329 + }, + { + "epoch": 0.9451347517730496, + "loss": 1.0535268783569336, + "loss_ce": 0.006163671612739563, + "loss_iou": 0.466796875, + "loss_num": 0.0225830078125, + "loss_xval": 1.046875, + "num_input_tokens_seen": 557123576, + "step": 8329 + }, + { + "epoch": 0.9452482269503546, + "grad_norm": 30.951608657836914, + "learning_rate": 5e-05, + "loss": 1.3229, + "num_input_tokens_seen": 557191060, + "step": 8330 + }, + { + "epoch": 0.9452482269503546, + "loss": 1.3685425519943237, + "loss_ce": 0.0037964533548802137, + "loss_iou": 0.546875, + "loss_num": 0.0537109375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 557191060, + "step": 8330 + }, + { + "epoch": 0.9453617021276596, + "grad_norm": 19.492555618286133, + "learning_rate": 5e-05, + "loss": 0.8979, + "num_input_tokens_seen": 557256464, + "step": 8331 + }, + { + "epoch": 0.9453617021276596, + "loss": 1.1101586818695068, + "loss_ce": 0.005178240593522787, + "loss_iou": 0.447265625, + "loss_num": 0.0419921875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 557256464, + "step": 8331 + }, + { + "epoch": 0.9454751773049646, + "grad_norm": 21.8177433013916, + "learning_rate": 5e-05, + "loss": 1.1528, + "num_input_tokens_seen": 557323664, + "step": 8332 + }, + { + "epoch": 0.9454751773049646, + "loss": 1.089538335800171, + "loss_ce": 0.005065710283815861, + "loss_iou": 0.46875, + "loss_num": 0.029541015625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 557323664, + "step": 8332 + }, + { + "epoch": 0.9455886524822695, + "grad_norm": 31.77066421508789, + "learning_rate": 5e-05, + "loss": 1.0192, + "num_input_tokens_seen": 557390040, + "step": 8333 + }, + { + "epoch": 0.9455886524822695, + "loss": 1.0384255647659302, + "loss_ce": 0.005954904947429895, + "loss_iou": 0.419921875, + "loss_num": 0.038818359375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 557390040, + "step": 8333 + }, + { + "epoch": 0.9457021276595745, + "grad_norm": 27.803955078125, + "learning_rate": 5e-05, + "loss": 1.3297, + "num_input_tokens_seen": 557456792, + "step": 8334 + }, + { + "epoch": 0.9457021276595745, + "loss": 1.5638329982757568, + "loss_ce": 0.006215764209628105, + "loss_iou": 0.640625, + "loss_num": 0.054931640625, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 557456792, + "step": 8334 + }, + { + "epoch": 0.9458156028368795, + "grad_norm": 17.686973571777344, + "learning_rate": 5e-05, + "loss": 1.0372, + "num_input_tokens_seen": 557524308, + "step": 8335 + }, + { + "epoch": 0.9458156028368795, + "loss": 1.1038925647735596, + "loss_ce": 0.0047715455293655396, + "loss_iou": 0.4453125, + "loss_num": 0.041748046875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 557524308, + "step": 8335 + }, + { + "epoch": 0.9459290780141844, + "grad_norm": 28.154735565185547, + "learning_rate": 5e-05, + "loss": 0.9737, + "num_input_tokens_seen": 557590992, + "step": 8336 + }, + { + "epoch": 0.9459290780141844, + "loss": 1.147055745124817, + "loss_ce": 0.0054541947320103645, + "loss_iou": 0.486328125, + "loss_num": 0.033447265625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 557590992, + "step": 8336 + }, + { + "epoch": 0.9460425531914893, + "grad_norm": 47.55574035644531, + "learning_rate": 5e-05, + "loss": 1.2924, + "num_input_tokens_seen": 557657904, + "step": 8337 + }, + { + "epoch": 0.9460425531914893, + "loss": 1.2522780895233154, + "loss_ce": 0.014698628336191177, + "loss_iou": 0.458984375, + "loss_num": 0.06396484375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 557657904, + "step": 8337 + }, + { + "epoch": 0.9461560283687943, + "grad_norm": 40.47968673706055, + "learning_rate": 5e-05, + "loss": 1.1427, + "num_input_tokens_seen": 557724684, + "step": 8338 + }, + { + "epoch": 0.9461560283687943, + "loss": 1.066680908203125, + "loss_ce": 0.007110633887350559, + "loss_iou": 0.46875, + "loss_num": 0.02392578125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 557724684, + "step": 8338 + }, + { + "epoch": 0.9462695035460993, + "grad_norm": 23.69969367980957, + "learning_rate": 5e-05, + "loss": 0.966, + "num_input_tokens_seen": 557790812, + "step": 8339 + }, + { + "epoch": 0.9462695035460993, + "loss": 1.0526622533798218, + "loss_ce": 0.009693443775177002, + "loss_iou": 0.390625, + "loss_num": 0.052734375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 557790812, + "step": 8339 + }, + { + "epoch": 0.9463829787234043, + "grad_norm": 35.2867317199707, + "learning_rate": 5e-05, + "loss": 0.9354, + "num_input_tokens_seen": 557857772, + "step": 8340 + }, + { + "epoch": 0.9463829787234043, + "loss": 0.806940495967865, + "loss_ce": 0.0037178127095103264, + "loss_iou": 0.3515625, + "loss_num": 0.0196533203125, + "loss_xval": 0.8046875, + "num_input_tokens_seen": 557857772, + "step": 8340 + }, + { + "epoch": 0.9464964539007092, + "grad_norm": 17.587007522583008, + "learning_rate": 5e-05, + "loss": 0.9998, + "num_input_tokens_seen": 557924872, + "step": 8341 + }, + { + "epoch": 0.9464964539007092, + "loss": 0.9981178045272827, + "loss_ce": 0.006418611854314804, + "loss_iou": 0.43359375, + "loss_num": 0.025146484375, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 557924872, + "step": 8341 + }, + { + "epoch": 0.9466099290780142, + "grad_norm": 30.007896423339844, + "learning_rate": 5e-05, + "loss": 1.2089, + "num_input_tokens_seen": 557992172, + "step": 8342 + }, + { + "epoch": 0.9466099290780142, + "loss": 1.170024037361145, + "loss_ce": 0.005229080095887184, + "loss_iou": 0.515625, + "loss_num": 0.0272216796875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 557992172, + "step": 8342 + }, + { + "epoch": 0.9467234042553192, + "grad_norm": 44.549251556396484, + "learning_rate": 5e-05, + "loss": 1.1044, + "num_input_tokens_seen": 558059304, + "step": 8343 + }, + { + "epoch": 0.9467234042553192, + "loss": 1.0650694370269775, + "loss_ce": 0.004766767844557762, + "loss_iou": 0.453125, + "loss_num": 0.03125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 558059304, + "step": 8343 + }, + { + "epoch": 0.9468368794326241, + "grad_norm": 31.333011627197266, + "learning_rate": 5e-05, + "loss": 1.2585, + "num_input_tokens_seen": 558126652, + "step": 8344 + }, + { + "epoch": 0.9468368794326241, + "loss": 1.3147777318954468, + "loss_ce": 0.007892917841672897, + "loss_iou": 0.515625, + "loss_num": 0.055419921875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 558126652, + "step": 8344 + }, + { + "epoch": 0.946950354609929, + "grad_norm": 27.73871612548828, + "learning_rate": 5e-05, + "loss": 0.8869, + "num_input_tokens_seen": 558192688, + "step": 8345 + }, + { + "epoch": 0.946950354609929, + "loss": 0.7458516955375671, + "loss_ce": 0.008547038771212101, + "loss_iou": 0.314453125, + "loss_num": 0.021484375, + "loss_xval": 0.73828125, + "num_input_tokens_seen": 558192688, + "step": 8345 + }, + { + "epoch": 0.947063829787234, + "grad_norm": 30.963457107543945, + "learning_rate": 5e-05, + "loss": 1.3141, + "num_input_tokens_seen": 558260328, + "step": 8346 + }, + { + "epoch": 0.947063829787234, + "loss": 1.3883658647537231, + "loss_ce": 0.007018230855464935, + "loss_iou": 0.578125, + "loss_num": 0.0458984375, + "loss_xval": 1.3828125, + "num_input_tokens_seen": 558260328, + "step": 8346 + }, + { + "epoch": 0.947177304964539, + "grad_norm": 40.01841735839844, + "learning_rate": 5e-05, + "loss": 1.0932, + "num_input_tokens_seen": 558327000, + "step": 8347 + }, + { + "epoch": 0.947177304964539, + "loss": 0.8039937019348145, + "loss_ce": 0.00315144588239491, + "loss_iou": 0.33984375, + "loss_num": 0.024169921875, + "loss_xval": 0.80078125, + "num_input_tokens_seen": 558327000, + "step": 8347 + }, + { + "epoch": 0.947290780141844, + "grad_norm": 29.443113327026367, + "learning_rate": 5e-05, + "loss": 1.218, + "num_input_tokens_seen": 558393056, + "step": 8348 + }, + { + "epoch": 0.947290780141844, + "loss": 1.4351389408111572, + "loss_ce": 0.0076487110927701, + "loss_iou": 0.55078125, + "loss_num": 0.0654296875, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 558393056, + "step": 8348 + }, + { + "epoch": 0.947404255319149, + "grad_norm": 25.714099884033203, + "learning_rate": 5e-05, + "loss": 1.0496, + "num_input_tokens_seen": 558461012, + "step": 8349 + }, + { + "epoch": 0.947404255319149, + "loss": 1.0791051387786865, + "loss_ce": 0.007816081866621971, + "loss_iou": 0.419921875, + "loss_num": 0.046142578125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 558461012, + "step": 8349 + }, + { + "epoch": 0.9475177304964539, + "grad_norm": 34.40819549560547, + "learning_rate": 5e-05, + "loss": 1.1248, + "num_input_tokens_seen": 558528208, + "step": 8350 + }, + { + "epoch": 0.9475177304964539, + "loss": 1.2163722515106201, + "loss_ce": 0.008364480920135975, + "loss_iou": 0.470703125, + "loss_num": 0.052978515625, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 558528208, + "step": 8350 + }, + { + "epoch": 0.9476312056737589, + "grad_norm": 31.011865615844727, + "learning_rate": 5e-05, + "loss": 1.2495, + "num_input_tokens_seen": 558595488, + "step": 8351 + }, + { + "epoch": 0.9476312056737589, + "loss": 1.4602587223052979, + "loss_ce": 0.009086832404136658, + "loss_iou": 0.59375, + "loss_num": 0.05224609375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 558595488, + "step": 8351 + }, + { + "epoch": 0.9477446808510638, + "grad_norm": 31.417526245117188, + "learning_rate": 5e-05, + "loss": 1.2342, + "num_input_tokens_seen": 558661852, + "step": 8352 + }, + { + "epoch": 0.9477446808510638, + "loss": 1.282225489616394, + "loss_ce": 0.004881727043539286, + "loss_iou": 0.51953125, + "loss_num": 0.048095703125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 558661852, + "step": 8352 + }, + { + "epoch": 0.9478581560283688, + "grad_norm": 34.340293884277344, + "learning_rate": 5e-05, + "loss": 1.1796, + "num_input_tokens_seen": 558729636, + "step": 8353 + }, + { + "epoch": 0.9478581560283688, + "loss": 1.3119956254959106, + "loss_ce": 0.004378387238830328, + "loss_iou": 0.5546875, + "loss_num": 0.040283203125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 558729636, + "step": 8353 + }, + { + "epoch": 0.9479716312056737, + "grad_norm": 30.92693519592285, + "learning_rate": 5e-05, + "loss": 1.0931, + "num_input_tokens_seen": 558796288, + "step": 8354 + }, + { + "epoch": 0.9479716312056737, + "loss": 0.916976809501648, + "loss_ce": 0.002426052698865533, + "loss_iou": 0.3984375, + "loss_num": 0.0230712890625, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 558796288, + "step": 8354 + }, + { + "epoch": 0.9480851063829787, + "grad_norm": 20.7502498626709, + "learning_rate": 5e-05, + "loss": 0.9655, + "num_input_tokens_seen": 558862872, + "step": 8355 + }, + { + "epoch": 0.9480851063829787, + "loss": 0.9695435762405396, + "loss_ce": 0.0029908299911767244, + "loss_iou": 0.38671875, + "loss_num": 0.038818359375, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 558862872, + "step": 8355 + }, + { + "epoch": 0.9481985815602837, + "grad_norm": 41.847206115722656, + "learning_rate": 5e-05, + "loss": 1.1528, + "num_input_tokens_seen": 558929952, + "step": 8356 + }, + { + "epoch": 0.9481985815602837, + "loss": 1.090430736541748, + "loss_ce": 0.007911136373877525, + "loss_iou": 0.4765625, + "loss_num": 0.026123046875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 558929952, + "step": 8356 + }, + { + "epoch": 0.9483120567375887, + "grad_norm": 25.727890014648438, + "learning_rate": 5e-05, + "loss": 0.9897, + "num_input_tokens_seen": 558998088, + "step": 8357 + }, + { + "epoch": 0.9483120567375887, + "loss": 1.056267499923706, + "loss_ce": 0.005974600091576576, + "loss_iou": 0.4609375, + "loss_num": 0.0257568359375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 558998088, + "step": 8357 + }, + { + "epoch": 0.9484255319148936, + "grad_norm": 25.349716186523438, + "learning_rate": 5e-05, + "loss": 1.2061, + "num_input_tokens_seen": 559065104, + "step": 8358 + }, + { + "epoch": 0.9484255319148936, + "loss": 1.3696101903915405, + "loss_ce": 0.004375839605927467, + "loss_iou": 0.5703125, + "loss_num": 0.045654296875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 559065104, + "step": 8358 + }, + { + "epoch": 0.9485390070921986, + "grad_norm": 38.85327911376953, + "learning_rate": 5e-05, + "loss": 1.1273, + "num_input_tokens_seen": 559132472, + "step": 8359 + }, + { + "epoch": 0.9485390070921986, + "loss": 1.3003063201904297, + "loss_ce": 0.005872741341590881, + "loss_iou": 0.52734375, + "loss_num": 0.048095703125, + "loss_xval": 1.296875, + "num_input_tokens_seen": 559132472, + "step": 8359 + }, + { + "epoch": 0.9486524822695035, + "grad_norm": 37.88418197631836, + "learning_rate": 5e-05, + "loss": 1.2555, + "num_input_tokens_seen": 559199840, + "step": 8360 + }, + { + "epoch": 0.9486524822695035, + "loss": 1.1804163455963135, + "loss_ce": 0.005123320035636425, + "loss_iou": 0.50390625, + "loss_num": 0.033203125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 559199840, + "step": 8360 + }, + { + "epoch": 0.9487659574468085, + "grad_norm": 20.24452781677246, + "learning_rate": 5e-05, + "loss": 1.1196, + "num_input_tokens_seen": 559267000, + "step": 8361 + }, + { + "epoch": 0.9487659574468085, + "loss": 1.0720722675323486, + "loss_ce": 0.004933573305606842, + "loss_iou": 0.431640625, + "loss_num": 0.040283203125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 559267000, + "step": 8361 + }, + { + "epoch": 0.9488794326241135, + "grad_norm": 22.377214431762695, + "learning_rate": 5e-05, + "loss": 0.8848, + "num_input_tokens_seen": 559332908, + "step": 8362 + }, + { + "epoch": 0.9488794326241135, + "loss": 0.7184131145477295, + "loss_ce": 0.004652754403650761, + "loss_iou": 0.310546875, + "loss_num": 0.0184326171875, + "loss_xval": 0.71484375, + "num_input_tokens_seen": 559332908, + "step": 8362 + }, + { + "epoch": 0.9489929078014184, + "grad_norm": 30.67763900756836, + "learning_rate": 5e-05, + "loss": 1.0511, + "num_input_tokens_seen": 559399424, + "step": 8363 + }, + { + "epoch": 0.9489929078014184, + "loss": 0.8744982481002808, + "loss_ce": 0.007615940645337105, + "loss_iou": 0.353515625, + "loss_num": 0.03173828125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 559399424, + "step": 8363 + }, + { + "epoch": 0.9491063829787234, + "grad_norm": 31.21258544921875, + "learning_rate": 5e-05, + "loss": 1.2425, + "num_input_tokens_seen": 559466488, + "step": 8364 + }, + { + "epoch": 0.9491063829787234, + "loss": 1.1441725492477417, + "loss_ce": 0.004524149000644684, + "loss_iou": 0.515625, + "loss_num": 0.021728515625, + "loss_xval": 1.140625, + "num_input_tokens_seen": 559466488, + "step": 8364 + }, + { + "epoch": 0.9492198581560284, + "grad_norm": 32.210994720458984, + "learning_rate": 5e-05, + "loss": 1.2049, + "num_input_tokens_seen": 559533340, + "step": 8365 + }, + { + "epoch": 0.9492198581560284, + "loss": 1.2023634910583496, + "loss_ce": 0.005586165003478527, + "loss_iou": 0.478515625, + "loss_num": 0.0478515625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 559533340, + "step": 8365 + }, + { + "epoch": 0.9493333333333334, + "grad_norm": 28.496929168701172, + "learning_rate": 5e-05, + "loss": 1.1035, + "num_input_tokens_seen": 559600264, + "step": 8366 + }, + { + "epoch": 0.9493333333333334, + "loss": 0.9571170806884766, + "loss_ce": 0.004968674853444099, + "loss_iou": 0.3828125, + "loss_num": 0.037353515625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 559600264, + "step": 8366 + }, + { + "epoch": 0.9494468085106383, + "grad_norm": 22.29484748840332, + "learning_rate": 5e-05, + "loss": 1.1007, + "num_input_tokens_seen": 559666908, + "step": 8367 + }, + { + "epoch": 0.9494468085106383, + "loss": 1.1218421459197998, + "loss_ce": 0.00709609966725111, + "loss_iou": 0.478515625, + "loss_num": 0.031494140625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 559666908, + "step": 8367 + }, + { + "epoch": 0.9495602836879432, + "grad_norm": 22.529781341552734, + "learning_rate": 5e-05, + "loss": 1.1587, + "num_input_tokens_seen": 559732768, + "step": 8368 + }, + { + "epoch": 0.9495602836879432, + "loss": 1.1317800283432007, + "loss_ce": 0.006780024617910385, + "loss_iou": 0.45703125, + "loss_num": 0.042236328125, + "loss_xval": 1.125, + "num_input_tokens_seen": 559732768, + "step": 8368 + }, + { + "epoch": 0.9496737588652482, + "grad_norm": 27.909387588500977, + "learning_rate": 5e-05, + "loss": 0.9893, + "num_input_tokens_seen": 559799776, + "step": 8369 + }, + { + "epoch": 0.9496737588652482, + "loss": 1.212477207183838, + "loss_ce": 0.007887383922934532, + "loss_iou": 0.498046875, + "loss_num": 0.041015625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 559799776, + "step": 8369 + }, + { + "epoch": 0.9497872340425532, + "grad_norm": 34.011051177978516, + "learning_rate": 5e-05, + "loss": 1.0805, + "num_input_tokens_seen": 559866292, + "step": 8370 + }, + { + "epoch": 0.9497872340425532, + "loss": 1.0736374855041504, + "loss_ce": 0.010405030101537704, + "loss_iou": 0.451171875, + "loss_num": 0.031982421875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 559866292, + "step": 8370 + }, + { + "epoch": 0.9499007092198581, + "grad_norm": 26.40046501159668, + "learning_rate": 5e-05, + "loss": 1.069, + "num_input_tokens_seen": 559933892, + "step": 8371 + }, + { + "epoch": 0.9499007092198581, + "loss": 1.0560228824615479, + "loss_ce": 0.008171268738806248, + "loss_iou": 0.443359375, + "loss_num": 0.0322265625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 559933892, + "step": 8371 + }, + { + "epoch": 0.9500141843971631, + "grad_norm": 32.3559455871582, + "learning_rate": 5e-05, + "loss": 1.3615, + "num_input_tokens_seen": 560000900, + "step": 8372 + }, + { + "epoch": 0.9500141843971631, + "loss": 1.3600571155548096, + "loss_ce": 0.007029765285551548, + "loss_iou": 0.51171875, + "loss_num": 0.06640625, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 560000900, + "step": 8372 + }, + { + "epoch": 0.9501276595744681, + "grad_norm": 34.32535934448242, + "learning_rate": 5e-05, + "loss": 1.2139, + "num_input_tokens_seen": 560067644, + "step": 8373 + }, + { + "epoch": 0.9501276595744681, + "loss": 1.1297205686569214, + "loss_ce": 0.005208859220147133, + "loss_iou": 0.482421875, + "loss_num": 0.031982421875, + "loss_xval": 1.125, + "num_input_tokens_seen": 560067644, + "step": 8373 + }, + { + "epoch": 0.9502411347517731, + "grad_norm": 30.594301223754883, + "learning_rate": 5e-05, + "loss": 1.1517, + "num_input_tokens_seen": 560134080, + "step": 8374 + }, + { + "epoch": 0.9502411347517731, + "loss": 1.2001107931137085, + "loss_ce": 0.00479822838678956, + "loss_iou": 0.48046875, + "loss_num": 0.047119140625, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 560134080, + "step": 8374 + }, + { + "epoch": 0.950354609929078, + "grad_norm": 26.7244930267334, + "learning_rate": 5e-05, + "loss": 1.0914, + "num_input_tokens_seen": 560201112, + "step": 8375 + }, + { + "epoch": 0.950354609929078, + "loss": 1.0703904628753662, + "loss_ce": 0.004472453612834215, + "loss_iou": 0.41796875, + "loss_num": 0.046142578125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 560201112, + "step": 8375 + }, + { + "epoch": 0.950468085106383, + "grad_norm": 39.49506378173828, + "learning_rate": 5e-05, + "loss": 1.1762, + "num_input_tokens_seen": 560268536, + "step": 8376 + }, + { + "epoch": 0.950468085106383, + "loss": 1.1161566972732544, + "loss_ce": 0.004340272396802902, + "loss_iou": 0.4765625, + "loss_num": 0.031494140625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 560268536, + "step": 8376 + }, + { + "epoch": 0.9505815602836879, + "grad_norm": 35.626564025878906, + "learning_rate": 5e-05, + "loss": 1.1107, + "num_input_tokens_seen": 560335656, + "step": 8377 + }, + { + "epoch": 0.9505815602836879, + "loss": 1.1189446449279785, + "loss_ce": 0.004198597278445959, + "loss_iou": 0.4765625, + "loss_num": 0.031982421875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 560335656, + "step": 8377 + }, + { + "epoch": 0.9506950354609929, + "grad_norm": 28.001487731933594, + "learning_rate": 5e-05, + "loss": 1.2388, + "num_input_tokens_seen": 560401748, + "step": 8378 + }, + { + "epoch": 0.9506950354609929, + "loss": 1.313368797302246, + "loss_ce": 0.009169531054794788, + "loss_iou": 0.52734375, + "loss_num": 0.04931640625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 560401748, + "step": 8378 + }, + { + "epoch": 0.9508085106382979, + "grad_norm": 14.762081146240234, + "learning_rate": 5e-05, + "loss": 1.058, + "num_input_tokens_seen": 560469500, + "step": 8379 + }, + { + "epoch": 0.9508085106382979, + "loss": 1.1606240272521973, + "loss_ce": 0.008768497034907341, + "loss_iou": 0.48046875, + "loss_num": 0.0380859375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 560469500, + "step": 8379 + }, + { + "epoch": 0.9509219858156028, + "grad_norm": 20.042083740234375, + "learning_rate": 5e-05, + "loss": 1.2463, + "num_input_tokens_seen": 560536956, + "step": 8380 + }, + { + "epoch": 0.9509219858156028, + "loss": 1.2807307243347168, + "loss_ce": 0.006316666025668383, + "loss_iou": 0.5234375, + "loss_num": 0.0458984375, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 560536956, + "step": 8380 + }, + { + "epoch": 0.9510354609929078, + "grad_norm": 41.60103988647461, + "learning_rate": 5e-05, + "loss": 1.0105, + "num_input_tokens_seen": 560604268, + "step": 8381 + }, + { + "epoch": 0.9510354609929078, + "loss": 1.0338521003723145, + "loss_ce": 0.004067004658281803, + "loss_iou": 0.46484375, + "loss_num": 0.0196533203125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 560604268, + "step": 8381 + }, + { + "epoch": 0.9511489361702128, + "grad_norm": 25.97610092163086, + "learning_rate": 5e-05, + "loss": 1.2837, + "num_input_tokens_seen": 560670716, + "step": 8382 + }, + { + "epoch": 0.9511489361702128, + "loss": 1.1561359167099, + "loss_ce": 0.006233567371964455, + "loss_iou": 0.48046875, + "loss_num": 0.037841796875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 560670716, + "step": 8382 + }, + { + "epoch": 0.9512624113475178, + "grad_norm": 9.994210243225098, + "learning_rate": 5e-05, + "loss": 0.8329, + "num_input_tokens_seen": 560737900, + "step": 8383 + }, + { + "epoch": 0.9512624113475178, + "loss": 0.8950198888778687, + "loss_ce": 0.009277749806642532, + "loss_iou": 0.37109375, + "loss_num": 0.0283203125, + "loss_xval": 0.88671875, + "num_input_tokens_seen": 560737900, + "step": 8383 + }, + { + "epoch": 0.9513758865248227, + "grad_norm": 19.720727920532227, + "learning_rate": 5e-05, + "loss": 0.9289, + "num_input_tokens_seen": 560804332, + "step": 8384 + }, + { + "epoch": 0.9513758865248227, + "loss": 0.7798107266426086, + "loss_ce": 0.010035308077931404, + "loss_iou": 0.3125, + "loss_num": 0.0291748046875, + "loss_xval": 0.76953125, + "num_input_tokens_seen": 560804332, + "step": 8384 + }, + { + "epoch": 0.9514893617021276, + "grad_norm": 23.6680965423584, + "learning_rate": 5e-05, + "loss": 0.9712, + "num_input_tokens_seen": 560871488, + "step": 8385 + }, + { + "epoch": 0.9514893617021276, + "loss": 1.0553624629974365, + "loss_ce": 0.006046081427484751, + "loss_iou": 0.44140625, + "loss_num": 0.033447265625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 560871488, + "step": 8385 + }, + { + "epoch": 0.9516028368794326, + "grad_norm": 21.167388916015625, + "learning_rate": 5e-05, + "loss": 1.0852, + "num_input_tokens_seen": 560938020, + "step": 8386 + }, + { + "epoch": 0.9516028368794326, + "loss": 1.0830947160720825, + "loss_ce": 0.0044814422726631165, + "loss_iou": 0.431640625, + "loss_num": 0.04296875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 560938020, + "step": 8386 + }, + { + "epoch": 0.9517163120567376, + "grad_norm": 29.845796585083008, + "learning_rate": 5e-05, + "loss": 1.0819, + "num_input_tokens_seen": 561005328, + "step": 8387 + }, + { + "epoch": 0.9517163120567376, + "loss": 0.9104443788528442, + "loss_ce": 0.00858885608613491, + "loss_iou": 0.390625, + "loss_num": 0.024658203125, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 561005328, + "step": 8387 + }, + { + "epoch": 0.9518297872340425, + "grad_norm": 45.81770324707031, + "learning_rate": 5e-05, + "loss": 1.1277, + "num_input_tokens_seen": 561071404, + "step": 8388 + }, + { + "epoch": 0.9518297872340425, + "loss": 1.1003774404525757, + "loss_ce": 0.0041860491037368774, + "loss_iou": 0.453125, + "loss_num": 0.0380859375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 561071404, + "step": 8388 + }, + { + "epoch": 0.9519432624113475, + "grad_norm": 28.123586654663086, + "learning_rate": 5e-05, + "loss": 1.167, + "num_input_tokens_seen": 561137748, + "step": 8389 + }, + { + "epoch": 0.9519432624113475, + "loss": 1.2320668697357178, + "loss_ce": 0.0075796907767653465, + "loss_iou": 0.48828125, + "loss_num": 0.04931640625, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 561137748, + "step": 8389 + }, + { + "epoch": 0.9520567375886525, + "grad_norm": 24.192745208740234, + "learning_rate": 5e-05, + "loss": 1.1522, + "num_input_tokens_seen": 561204896, + "step": 8390 + }, + { + "epoch": 0.9520567375886525, + "loss": 1.1295561790466309, + "loss_ce": 0.007730013690888882, + "loss_iou": 0.4609375, + "loss_num": 0.03955078125, + "loss_xval": 1.125, + "num_input_tokens_seen": 561204896, + "step": 8390 + }, + { + "epoch": 0.9521702127659575, + "grad_norm": 14.962766647338867, + "learning_rate": 5e-05, + "loss": 0.9837, + "num_input_tokens_seen": 561271052, + "step": 8391 + }, + { + "epoch": 0.9521702127659575, + "loss": 1.0253181457519531, + "loss_ce": 0.006763403303921223, + "loss_iou": 0.4296875, + "loss_num": 0.031982421875, + "loss_xval": 1.015625, + "num_input_tokens_seen": 561271052, + "step": 8391 + }, + { + "epoch": 0.9522836879432625, + "grad_norm": 31.110708236694336, + "learning_rate": 5e-05, + "loss": 1.0021, + "num_input_tokens_seen": 561337464, + "step": 8392 + }, + { + "epoch": 0.9522836879432625, + "loss": 1.1606377363204956, + "loss_ce": 0.005852603353559971, + "loss_iou": 0.48828125, + "loss_num": 0.03515625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 561337464, + "step": 8392 + }, + { + "epoch": 0.9523971631205673, + "grad_norm": 54.73729705810547, + "learning_rate": 5e-05, + "loss": 1.3352, + "num_input_tokens_seen": 561405764, + "step": 8393 + }, + { + "epoch": 0.9523971631205673, + "loss": 1.4179761409759521, + "loss_ce": 0.00537845166400075, + "loss_iou": 0.58984375, + "loss_num": 0.04638671875, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 561405764, + "step": 8393 + }, + { + "epoch": 0.9525106382978723, + "grad_norm": 84.18202209472656, + "learning_rate": 5e-05, + "loss": 1.011, + "num_input_tokens_seen": 561474008, + "step": 8394 + }, + { + "epoch": 0.9525106382978723, + "loss": 1.0019829273223877, + "loss_ce": 0.008330659940838814, + "loss_iou": 0.412109375, + "loss_num": 0.03369140625, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 561474008, + "step": 8394 + }, + { + "epoch": 0.9526241134751773, + "grad_norm": 34.83298110961914, + "learning_rate": 5e-05, + "loss": 1.2949, + "num_input_tokens_seen": 561541956, + "step": 8395 + }, + { + "epoch": 0.9526241134751773, + "loss": 1.2862004041671753, + "loss_ce": 0.005926960147917271, + "loss_iou": 0.53515625, + "loss_num": 0.04150390625, + "loss_xval": 1.28125, + "num_input_tokens_seen": 561541956, + "step": 8395 + }, + { + "epoch": 0.9527375886524823, + "grad_norm": 50.3280143737793, + "learning_rate": 5e-05, + "loss": 1.0906, + "num_input_tokens_seen": 561609444, + "step": 8396 + }, + { + "epoch": 0.9527375886524823, + "loss": 0.9322080612182617, + "loss_ce": 0.006915097124874592, + "loss_iou": 0.4140625, + "loss_num": 0.0196533203125, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 561609444, + "step": 8396 + }, + { + "epoch": 0.9528510638297872, + "grad_norm": 37.485599517822266, + "learning_rate": 5e-05, + "loss": 1.3563, + "num_input_tokens_seen": 561675988, + "step": 8397 + }, + { + "epoch": 0.9528510638297872, + "loss": 1.2922605276107788, + "loss_ce": 0.007104352116584778, + "loss_iou": 0.53125, + "loss_num": 0.044677734375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 561675988, + "step": 8397 + }, + { + "epoch": 0.9529645390070922, + "grad_norm": 25.08643913269043, + "learning_rate": 5e-05, + "loss": 1.0872, + "num_input_tokens_seen": 561742364, + "step": 8398 + }, + { + "epoch": 0.9529645390070922, + "loss": 1.1454075574874878, + "loss_ce": 0.010153653100132942, + "loss_iou": 0.44921875, + "loss_num": 0.047607421875, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 561742364, + "step": 8398 + }, + { + "epoch": 0.9530780141843972, + "grad_norm": 23.584209442138672, + "learning_rate": 5e-05, + "loss": 1.2304, + "num_input_tokens_seen": 561810316, + "step": 8399 + }, + { + "epoch": 0.9530780141843972, + "loss": 1.3303767442703247, + "loss_ce": 0.004204909782856703, + "loss_iou": 0.52734375, + "loss_num": 0.0537109375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 561810316, + "step": 8399 + }, + { + "epoch": 0.9531914893617022, + "grad_norm": 35.93369674682617, + "learning_rate": 5e-05, + "loss": 1.1331, + "num_input_tokens_seen": 561876460, + "step": 8400 + }, + { + "epoch": 0.9531914893617022, + "loss": 0.9254552721977234, + "loss_ce": 0.0055334297940135, + "loss_iou": 0.384765625, + "loss_num": 0.0299072265625, + "loss_xval": 0.921875, + "num_input_tokens_seen": 561876460, + "step": 8400 + }, + { + "epoch": 0.953304964539007, + "grad_norm": 32.38916778564453, + "learning_rate": 5e-05, + "loss": 1.2236, + "num_input_tokens_seen": 561943672, + "step": 8401 + }, + { + "epoch": 0.953304964539007, + "loss": 0.9987059235572815, + "loss_ce": 0.00310042523778975, + "loss_iou": 0.40625, + "loss_num": 0.03662109375, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 561943672, + "step": 8401 + }, + { + "epoch": 0.953418439716312, + "grad_norm": 16.45209312438965, + "learning_rate": 5e-05, + "loss": 1.0219, + "num_input_tokens_seen": 562010392, + "step": 8402 + }, + { + "epoch": 0.953418439716312, + "loss": 1.0842792987823486, + "loss_ce": 0.010548809543251991, + "loss_iou": 0.4375, + "loss_num": 0.0400390625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 562010392, + "step": 8402 + }, + { + "epoch": 0.953531914893617, + "grad_norm": 16.769245147705078, + "learning_rate": 5e-05, + "loss": 0.9496, + "num_input_tokens_seen": 562076828, + "step": 8403 + }, + { + "epoch": 0.953531914893617, + "loss": 0.8707510232925415, + "loss_ce": 0.0055166431702673435, + "loss_iou": 0.3515625, + "loss_num": 0.03271484375, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 562076828, + "step": 8403 + }, + { + "epoch": 0.953645390070922, + "grad_norm": 32.06451416015625, + "learning_rate": 5e-05, + "loss": 1.2428, + "num_input_tokens_seen": 562145244, + "step": 8404 + }, + { + "epoch": 0.953645390070922, + "loss": 1.2001842260360718, + "loss_ce": 0.0031627805437892675, + "loss_iou": 0.486328125, + "loss_num": 0.044677734375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 562145244, + "step": 8404 + }, + { + "epoch": 0.953758865248227, + "grad_norm": 25.65650177001953, + "learning_rate": 5e-05, + "loss": 1.1332, + "num_input_tokens_seen": 562211552, + "step": 8405 + }, + { + "epoch": 0.953758865248227, + "loss": 1.2519261837005615, + "loss_ce": 0.012180065736174583, + "loss_iou": 0.46484375, + "loss_num": 0.061767578125, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 562211552, + "step": 8405 + }, + { + "epoch": 0.9538723404255319, + "grad_norm": 37.198280334472656, + "learning_rate": 5e-05, + "loss": 1.1521, + "num_input_tokens_seen": 562278960, + "step": 8406 + }, + { + "epoch": 0.9538723404255319, + "loss": 1.0893839597702026, + "loss_ce": 0.007841002196073532, + "loss_iou": 0.4609375, + "loss_num": 0.031494140625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 562278960, + "step": 8406 + }, + { + "epoch": 0.9539858156028369, + "grad_norm": 25.643146514892578, + "learning_rate": 5e-05, + "loss": 1.2661, + "num_input_tokens_seen": 562346368, + "step": 8407 + }, + { + "epoch": 0.9539858156028369, + "loss": 1.2804769277572632, + "loss_ce": 0.013387156650424004, + "loss_iou": 0.51953125, + "loss_num": 0.04541015625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 562346368, + "step": 8407 + }, + { + "epoch": 0.9540992907801419, + "grad_norm": 95.9126205444336, + "learning_rate": 5e-05, + "loss": 1.1748, + "num_input_tokens_seen": 562413264, + "step": 8408 + }, + { + "epoch": 0.9540992907801419, + "loss": 1.3047759532928467, + "loss_ce": 0.007412733510136604, + "loss_iou": 0.5078125, + "loss_num": 0.05615234375, + "loss_xval": 1.296875, + "num_input_tokens_seen": 562413264, + "step": 8408 + }, + { + "epoch": 0.9542127659574469, + "grad_norm": 44.90486526489258, + "learning_rate": 5e-05, + "loss": 1.1877, + "num_input_tokens_seen": 562479804, + "step": 8409 + }, + { + "epoch": 0.9542127659574469, + "loss": 1.104974389076233, + "loss_ce": 0.004388380795717239, + "loss_iou": 0.44921875, + "loss_num": 0.040283203125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 562479804, + "step": 8409 + }, + { + "epoch": 0.9543262411347517, + "grad_norm": 69.01921081542969, + "learning_rate": 5e-05, + "loss": 1.2626, + "num_input_tokens_seen": 562547832, + "step": 8410 + }, + { + "epoch": 0.9543262411347517, + "loss": 1.2629673480987549, + "loss_ce": 0.004666561260819435, + "loss_iou": 0.53125, + "loss_num": 0.03857421875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 562547832, + "step": 8410 + }, + { + "epoch": 0.9544397163120567, + "grad_norm": 28.918270111083984, + "learning_rate": 5e-05, + "loss": 1.0101, + "num_input_tokens_seen": 562613836, + "step": 8411 + }, + { + "epoch": 0.9544397163120567, + "loss": 1.125049114227295, + "loss_ce": 0.00639681052416563, + "loss_iou": 0.416015625, + "loss_num": 0.057861328125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 562613836, + "step": 8411 + }, + { + "epoch": 0.9545531914893617, + "grad_norm": 29.67314910888672, + "learning_rate": 5e-05, + "loss": 1.0576, + "num_input_tokens_seen": 562680852, + "step": 8412 + }, + { + "epoch": 0.9545531914893617, + "loss": 0.9887215495109558, + "loss_ce": 0.004590705968439579, + "loss_iou": 0.3984375, + "loss_num": 0.037841796875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 562680852, + "step": 8412 + }, + { + "epoch": 0.9546666666666667, + "grad_norm": 84.43462371826172, + "learning_rate": 5e-05, + "loss": 1.2909, + "num_input_tokens_seen": 562747452, + "step": 8413 + }, + { + "epoch": 0.9546666666666667, + "loss": 1.1931376457214355, + "loss_ce": 0.006614216137677431, + "loss_iou": 0.50390625, + "loss_num": 0.03564453125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 562747452, + "step": 8413 + }, + { + "epoch": 0.9547801418439716, + "grad_norm": 75.94058990478516, + "learning_rate": 5e-05, + "loss": 1.0517, + "num_input_tokens_seen": 562814152, + "step": 8414 + }, + { + "epoch": 0.9547801418439716, + "loss": 1.1282811164855957, + "loss_ce": 0.004745877347886562, + "loss_iou": 0.498046875, + "loss_num": 0.025146484375, + "loss_xval": 1.125, + "num_input_tokens_seen": 562814152, + "step": 8414 + }, + { + "epoch": 0.9548936170212766, + "grad_norm": 29.309282302856445, + "learning_rate": 5e-05, + "loss": 1.1042, + "num_input_tokens_seen": 562881616, + "step": 8415 + }, + { + "epoch": 0.9548936170212766, + "loss": 1.1426243782043457, + "loss_ce": 0.005417338572442532, + "loss_iou": 0.458984375, + "loss_num": 0.0439453125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 562881616, + "step": 8415 + }, + { + "epoch": 0.9550070921985816, + "grad_norm": 25.116134643554688, + "learning_rate": 5e-05, + "loss": 1.1206, + "num_input_tokens_seen": 562950052, + "step": 8416 + }, + { + "epoch": 0.9550070921985816, + "loss": 0.9583844542503357, + "loss_ce": 0.0023297672159969807, + "loss_iou": 0.416015625, + "loss_num": 0.0252685546875, + "loss_xval": 0.95703125, + "num_input_tokens_seen": 562950052, + "step": 8416 + }, + { + "epoch": 0.9551205673758866, + "grad_norm": 19.153505325317383, + "learning_rate": 5e-05, + "loss": 1.1561, + "num_input_tokens_seen": 563017456, + "step": 8417 + }, + { + "epoch": 0.9551205673758866, + "loss": 1.1833467483520508, + "loss_ce": 0.006100758444517851, + "loss_iou": 0.447265625, + "loss_num": 0.056884765625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 563017456, + "step": 8417 + }, + { + "epoch": 0.9552340425531914, + "grad_norm": 11.40415096282959, + "learning_rate": 5e-05, + "loss": 1.0149, + "num_input_tokens_seen": 563083952, + "step": 8418 + }, + { + "epoch": 0.9552340425531914, + "loss": 1.0637009143829346, + "loss_ce": 0.004374801181256771, + "loss_iou": 0.412109375, + "loss_num": 0.04736328125, + "loss_xval": 1.0625, + "num_input_tokens_seen": 563083952, + "step": 8418 + }, + { + "epoch": 0.9553475177304964, + "grad_norm": 14.178675651550293, + "learning_rate": 5e-05, + "loss": 1.1113, + "num_input_tokens_seen": 563150584, + "step": 8419 + }, + { + "epoch": 0.9553475177304964, + "loss": 1.0976004600524902, + "loss_ce": 0.003850543173030019, + "loss_iou": 0.3984375, + "loss_num": 0.059326171875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 563150584, + "step": 8419 + }, + { + "epoch": 0.9554609929078014, + "grad_norm": 22.04481315612793, + "learning_rate": 5e-05, + "loss": 1.3296, + "num_input_tokens_seen": 563218732, + "step": 8420 + }, + { + "epoch": 0.9554609929078014, + "loss": 1.2868611812591553, + "loss_ce": 0.006587721407413483, + "loss_iou": 0.50390625, + "loss_num": 0.054443359375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 563218732, + "step": 8420 + }, + { + "epoch": 0.9555744680851064, + "grad_norm": 44.97551345825195, + "learning_rate": 5e-05, + "loss": 1.2008, + "num_input_tokens_seen": 563285980, + "step": 8421 + }, + { + "epoch": 0.9555744680851064, + "loss": 1.2809193134307861, + "loss_ce": 0.005040392745286226, + "loss_iou": 0.5, + "loss_num": 0.0546875, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 563285980, + "step": 8421 + }, + { + "epoch": 0.9556879432624114, + "grad_norm": 42.31654739379883, + "learning_rate": 5e-05, + "loss": 1.1784, + "num_input_tokens_seen": 563353116, + "step": 8422 + }, + { + "epoch": 0.9556879432624114, + "loss": 1.080761432647705, + "loss_ce": 0.0036130722146481276, + "loss_iou": 0.4453125, + "loss_num": 0.037353515625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 563353116, + "step": 8422 + }, + { + "epoch": 0.9558014184397163, + "grad_norm": 38.481483459472656, + "learning_rate": 5e-05, + "loss": 1.2949, + "num_input_tokens_seen": 563419388, + "step": 8423 + }, + { + "epoch": 0.9558014184397163, + "loss": 1.2760802507400513, + "loss_ce": 0.008013804443180561, + "loss_iou": 0.5234375, + "loss_num": 0.044677734375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 563419388, + "step": 8423 + }, + { + "epoch": 0.9559148936170213, + "grad_norm": 31.276700973510742, + "learning_rate": 5e-05, + "loss": 1.0893, + "num_input_tokens_seen": 563486264, + "step": 8424 + }, + { + "epoch": 0.9559148936170213, + "loss": 1.3075222969055176, + "loss_ce": 0.004787982441484928, + "loss_iou": 0.54296875, + "loss_num": 0.0439453125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 563486264, + "step": 8424 + }, + { + "epoch": 0.9560283687943263, + "grad_norm": 27.307994842529297, + "learning_rate": 5e-05, + "loss": 1.1393, + "num_input_tokens_seen": 563553260, + "step": 8425 + }, + { + "epoch": 0.9560283687943263, + "loss": 1.2547390460968018, + "loss_ce": 0.0037625355180352926, + "loss_iou": 0.48828125, + "loss_num": 0.05517578125, + "loss_xval": 1.25, + "num_input_tokens_seen": 563553260, + "step": 8425 + }, + { + "epoch": 0.9561418439716312, + "grad_norm": 24.867938995361328, + "learning_rate": 5e-05, + "loss": 1.096, + "num_input_tokens_seen": 563620124, + "step": 8426 + }, + { + "epoch": 0.9561418439716312, + "loss": 0.9914864897727966, + "loss_ce": 0.007111518643796444, + "loss_iou": 0.4140625, + "loss_num": 0.031494140625, + "loss_xval": 0.984375, + "num_input_tokens_seen": 563620124, + "step": 8426 + }, + { + "epoch": 0.9562553191489361, + "grad_norm": 44.49909591674805, + "learning_rate": 5e-05, + "loss": 1.219, + "num_input_tokens_seen": 563687000, + "step": 8427 + }, + { + "epoch": 0.9562553191489361, + "loss": 1.3374022245407104, + "loss_ce": 0.010253796353936195, + "loss_iou": 0.53515625, + "loss_num": 0.051025390625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 563687000, + "step": 8427 + }, + { + "epoch": 0.9563687943262411, + "grad_norm": 51.7673454284668, + "learning_rate": 5e-05, + "loss": 1.3566, + "num_input_tokens_seen": 563753880, + "step": 8428 + }, + { + "epoch": 0.9563687943262411, + "loss": 1.5639170408248901, + "loss_ce": 0.010206127539277077, + "loss_iou": 0.62890625, + "loss_num": 0.059326171875, + "loss_xval": 1.5546875, + "num_input_tokens_seen": 563753880, + "step": 8428 + }, + { + "epoch": 0.9564822695035461, + "grad_norm": 30.925695419311523, + "learning_rate": 5e-05, + "loss": 1.0907, + "num_input_tokens_seen": 563821460, + "step": 8429 + }, + { + "epoch": 0.9564822695035461, + "loss": 1.1715059280395508, + "loss_ce": 0.0020722916815429926, + "loss_iou": 0.486328125, + "loss_num": 0.03955078125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 563821460, + "step": 8429 + }, + { + "epoch": 0.9565957446808511, + "grad_norm": 28.4793701171875, + "learning_rate": 5e-05, + "loss": 1.1323, + "num_input_tokens_seen": 563888112, + "step": 8430 + }, + { + "epoch": 0.9565957446808511, + "loss": 1.0739179849624634, + "loss_ce": 0.005070284474641085, + "loss_iou": 0.455078125, + "loss_num": 0.03173828125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 563888112, + "step": 8430 + }, + { + "epoch": 0.956709219858156, + "grad_norm": 23.2514591217041, + "learning_rate": 5e-05, + "loss": 1.0975, + "num_input_tokens_seen": 563954508, + "step": 8431 + }, + { + "epoch": 0.956709219858156, + "loss": 1.0365090370178223, + "loss_ce": 0.008188726380467415, + "loss_iou": 0.392578125, + "loss_num": 0.048828125, + "loss_xval": 1.03125, + "num_input_tokens_seen": 563954508, + "step": 8431 + }, + { + "epoch": 0.956822695035461, + "grad_norm": 78.25452423095703, + "learning_rate": 5e-05, + "loss": 1.0222, + "num_input_tokens_seen": 564020308, + "step": 8432 + }, + { + "epoch": 0.956822695035461, + "loss": 0.9689186811447144, + "loss_ce": 0.007035131566226482, + "loss_iou": 0.41015625, + "loss_num": 0.0286865234375, + "loss_xval": 0.9609375, + "num_input_tokens_seen": 564020308, + "step": 8432 + }, + { + "epoch": 0.956936170212766, + "grad_norm": 35.71268844604492, + "learning_rate": 5e-05, + "loss": 0.994, + "num_input_tokens_seen": 564087232, + "step": 8433 + }, + { + "epoch": 0.956936170212766, + "loss": 0.8006929755210876, + "loss_ce": 0.008456628769636154, + "loss_iou": 0.326171875, + "loss_num": 0.0279541015625, + "loss_xval": 0.79296875, + "num_input_tokens_seen": 564087232, + "step": 8433 + }, + { + "epoch": 0.9570496453900709, + "grad_norm": 45.958316802978516, + "learning_rate": 5e-05, + "loss": 1.2109, + "num_input_tokens_seen": 564153808, + "step": 8434 + }, + { + "epoch": 0.9570496453900709, + "loss": 1.3157916069030762, + "loss_ce": 0.005244703032076359, + "loss_iou": 0.55859375, + "loss_num": 0.039306640625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 564153808, + "step": 8434 + }, + { + "epoch": 0.9571631205673758, + "grad_norm": 31.09377098083496, + "learning_rate": 5e-05, + "loss": 1.1591, + "num_input_tokens_seen": 564220064, + "step": 8435 + }, + { + "epoch": 0.9571631205673758, + "loss": 0.9952287673950195, + "loss_ce": 0.0096330801025033, + "loss_iou": 0.390625, + "loss_num": 0.04052734375, + "loss_xval": 0.984375, + "num_input_tokens_seen": 564220064, + "step": 8435 + }, + { + "epoch": 0.9572765957446808, + "grad_norm": 37.62327575683594, + "learning_rate": 5e-05, + "loss": 1.1981, + "num_input_tokens_seen": 564287304, + "step": 8436 + }, + { + "epoch": 0.9572765957446808, + "loss": 1.2580559253692627, + "loss_ce": 0.006102790590375662, + "loss_iou": 0.515625, + "loss_num": 0.04345703125, + "loss_xval": 1.25, + "num_input_tokens_seen": 564287304, + "step": 8436 + }, + { + "epoch": 0.9573900709219858, + "grad_norm": 22.919919967651367, + "learning_rate": 5e-05, + "loss": 1.0583, + "num_input_tokens_seen": 564354024, + "step": 8437 + }, + { + "epoch": 0.9573900709219858, + "loss": 0.9906991124153137, + "loss_ce": 0.005103426054120064, + "loss_iou": 0.388671875, + "loss_num": 0.041748046875, + "loss_xval": 0.984375, + "num_input_tokens_seen": 564354024, + "step": 8437 + }, + { + "epoch": 0.9575035460992908, + "grad_norm": 25.027929306030273, + "learning_rate": 5e-05, + "loss": 1.0836, + "num_input_tokens_seen": 564420520, + "step": 8438 + }, + { + "epoch": 0.9575035460992908, + "loss": 1.013732671737671, + "loss_ce": 0.006408424582332373, + "loss_iou": 0.439453125, + "loss_num": 0.0257568359375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 564420520, + "step": 8438 + }, + { + "epoch": 0.9576170212765958, + "grad_norm": 29.705312728881836, + "learning_rate": 5e-05, + "loss": 1.1363, + "num_input_tokens_seen": 564488084, + "step": 8439 + }, + { + "epoch": 0.9576170212765958, + "loss": 1.1745007038116455, + "loss_ce": 0.0070201437920331955, + "loss_iou": 0.458984375, + "loss_num": 0.049560546875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 564488084, + "step": 8439 + }, + { + "epoch": 0.9577304964539007, + "grad_norm": 40.107295989990234, + "learning_rate": 5e-05, + "loss": 1.1391, + "num_input_tokens_seen": 564554376, + "step": 8440 + }, + { + "epoch": 0.9577304964539007, + "loss": 1.0085899829864502, + "loss_ce": 0.007125111296772957, + "loss_iou": 0.369140625, + "loss_num": 0.052490234375, + "loss_xval": 1.0, + "num_input_tokens_seen": 564554376, + "step": 8440 + }, + { + "epoch": 0.9578439716312057, + "grad_norm": 26.977216720581055, + "learning_rate": 5e-05, + "loss": 1.176, + "num_input_tokens_seen": 564621584, + "step": 8441 + }, + { + "epoch": 0.9578439716312057, + "loss": 1.240750789642334, + "loss_ce": 0.00784057192504406, + "loss_iou": 0.484375, + "loss_num": 0.052734375, + "loss_xval": 1.234375, + "num_input_tokens_seen": 564621584, + "step": 8441 + }, + { + "epoch": 0.9579574468085107, + "grad_norm": 39.55371856689453, + "learning_rate": 5e-05, + "loss": 1.1801, + "num_input_tokens_seen": 564689192, + "step": 8442 + }, + { + "epoch": 0.9579574468085107, + "loss": 1.1871819496154785, + "loss_ce": 0.008226824924349785, + "loss_iou": 0.43359375, + "loss_num": 0.0625, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 564689192, + "step": 8442 + }, + { + "epoch": 0.9580709219858156, + "grad_norm": 39.53941345214844, + "learning_rate": 5e-05, + "loss": 1.178, + "num_input_tokens_seen": 564756124, + "step": 8443 + }, + { + "epoch": 0.9580709219858156, + "loss": 1.244680404663086, + "loss_ce": 0.003469496499747038, + "loss_iou": 0.51953125, + "loss_num": 0.041015625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 564756124, + "step": 8443 + }, + { + "epoch": 0.9581843971631205, + "grad_norm": 35.23585891723633, + "learning_rate": 5e-05, + "loss": 1.1526, + "num_input_tokens_seen": 564823632, + "step": 8444 + }, + { + "epoch": 0.9581843971631205, + "loss": 1.2820892333984375, + "loss_ce": 0.0052337623201310635, + "loss_iou": 0.515625, + "loss_num": 0.048095703125, + "loss_xval": 1.2734375, + "num_input_tokens_seen": 564823632, + "step": 8444 + }, + { + "epoch": 0.9582978723404255, + "grad_norm": 36.280460357666016, + "learning_rate": 5e-05, + "loss": 1.0875, + "num_input_tokens_seen": 564891280, + "step": 8445 + }, + { + "epoch": 0.9582978723404255, + "loss": 1.1084100008010864, + "loss_ce": 0.0063591692596673965, + "loss_iou": 0.466796875, + "loss_num": 0.033203125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 564891280, + "step": 8445 + }, + { + "epoch": 0.9584113475177305, + "grad_norm": 23.954816818237305, + "learning_rate": 5e-05, + "loss": 1.114, + "num_input_tokens_seen": 564959216, + "step": 8446 + }, + { + "epoch": 0.9584113475177305, + "loss": 1.189215064048767, + "loss_ce": 0.0026916179340332747, + "loss_iou": 0.498046875, + "loss_num": 0.038330078125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 564959216, + "step": 8446 + }, + { + "epoch": 0.9585248226950355, + "grad_norm": 29.74066734313965, + "learning_rate": 5e-05, + "loss": 1.0329, + "num_input_tokens_seen": 565026556, + "step": 8447 + }, + { + "epoch": 0.9585248226950355, + "loss": 1.0808112621307373, + "loss_ce": 0.010010464116930962, + "loss_iou": 0.4140625, + "loss_num": 0.04833984375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 565026556, + "step": 8447 + }, + { + "epoch": 0.9586382978723405, + "grad_norm": 31.039588928222656, + "learning_rate": 5e-05, + "loss": 1.1317, + "num_input_tokens_seen": 565093896, + "step": 8448 + }, + { + "epoch": 0.9586382978723405, + "loss": 1.1011282205581665, + "loss_ce": 0.005425120238214731, + "loss_iou": 0.45703125, + "loss_num": 0.035888671875, + "loss_xval": 1.09375, + "num_input_tokens_seen": 565093896, + "step": 8448 + }, + { + "epoch": 0.9587517730496454, + "grad_norm": 25.98451042175293, + "learning_rate": 5e-05, + "loss": 1.1571, + "num_input_tokens_seen": 565161436, + "step": 8449 + }, + { + "epoch": 0.9587517730496454, + "loss": 1.1516454219818115, + "loss_ce": 0.003940344788134098, + "loss_iou": 0.44921875, + "loss_num": 0.050048828125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 565161436, + "step": 8449 + }, + { + "epoch": 0.9588652482269504, + "grad_norm": 24.794111251831055, + "learning_rate": 5e-05, + "loss": 1.2626, + "num_input_tokens_seen": 565227944, + "step": 8450 + }, + { + "epoch": 0.9588652482269504, + "loss": 1.3653066158294678, + "loss_ce": 0.008861269801855087, + "loss_iou": 0.51171875, + "loss_num": 0.06689453125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 565227944, + "step": 8450 + }, + { + "epoch": 0.9589787234042553, + "grad_norm": 242.95591735839844, + "learning_rate": 5e-05, + "loss": 0.9438, + "num_input_tokens_seen": 565295188, + "step": 8451 + }, + { + "epoch": 0.9589787234042553, + "loss": 0.8266887068748474, + "loss_ce": 0.004911378026008606, + "loss_iou": 0.345703125, + "loss_num": 0.0260009765625, + "loss_xval": 0.8203125, + "num_input_tokens_seen": 565295188, + "step": 8451 + }, + { + "epoch": 0.9590921985815603, + "grad_norm": 19.304636001586914, + "learning_rate": 5e-05, + "loss": 1.1299, + "num_input_tokens_seen": 565360972, + "step": 8452 + }, + { + "epoch": 0.9590921985815603, + "loss": 1.2538636922836304, + "loss_ce": 0.03352675586938858, + "loss_iou": 0.47265625, + "loss_num": 0.05517578125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 565360972, + "step": 8452 + }, + { + "epoch": 0.9592056737588652, + "grad_norm": 21.940380096435547, + "learning_rate": 5e-05, + "loss": 1.2434, + "num_input_tokens_seen": 565427852, + "step": 8453 + }, + { + "epoch": 0.9592056737588652, + "loss": 1.4125561714172363, + "loss_ce": 0.006794477812945843, + "loss_iou": 0.5234375, + "loss_num": 0.072265625, + "loss_xval": 1.40625, + "num_input_tokens_seen": 565427852, + "step": 8453 + }, + { + "epoch": 0.9593191489361702, + "grad_norm": 31.087251663208008, + "learning_rate": 5e-05, + "loss": 1.1916, + "num_input_tokens_seen": 565494852, + "step": 8454 + }, + { + "epoch": 0.9593191489361702, + "loss": 1.197697401046753, + "loss_ce": 0.010685695335268974, + "loss_iou": 0.484375, + "loss_num": 0.04345703125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 565494852, + "step": 8454 + }, + { + "epoch": 0.9594326241134752, + "grad_norm": 34.99216842651367, + "learning_rate": 5e-05, + "loss": 1.1807, + "num_input_tokens_seen": 565561140, + "step": 8455 + }, + { + "epoch": 0.9594326241134752, + "loss": 1.0920993089675903, + "loss_ce": 0.007138341665267944, + "loss_iou": 0.4609375, + "loss_num": 0.03271484375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 565561140, + "step": 8455 + }, + { + "epoch": 0.9595460992907802, + "grad_norm": 37.599849700927734, + "learning_rate": 5e-05, + "loss": 1.1747, + "num_input_tokens_seen": 565628540, + "step": 8456 + }, + { + "epoch": 0.9595460992907802, + "loss": 1.0695910453796387, + "loss_ce": 0.0066027212888002396, + "loss_iou": 0.4765625, + "loss_num": 0.02197265625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 565628540, + "step": 8456 + }, + { + "epoch": 0.9596595744680851, + "grad_norm": 28.252899169921875, + "learning_rate": 5e-05, + "loss": 1.3101, + "num_input_tokens_seen": 565694944, + "step": 8457 + }, + { + "epoch": 0.9596595744680851, + "loss": 1.3771889209747314, + "loss_ce": 0.00658352579921484, + "loss_iou": 0.50390625, + "loss_num": 0.0732421875, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 565694944, + "step": 8457 + }, + { + "epoch": 0.9597730496453901, + "grad_norm": 22.908517837524414, + "learning_rate": 5e-05, + "loss": 1.0441, + "num_input_tokens_seen": 565762656, + "step": 8458 + }, + { + "epoch": 0.9597730496453901, + "loss": 1.2144685983657837, + "loss_ce": 0.004507764708250761, + "loss_iou": 0.5234375, + "loss_num": 0.033203125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 565762656, + "step": 8458 + }, + { + "epoch": 0.959886524822695, + "grad_norm": 33.813575744628906, + "learning_rate": 5e-05, + "loss": 1.026, + "num_input_tokens_seen": 565828924, + "step": 8459 + }, + { + "epoch": 0.959886524822695, + "loss": 0.8795760869979858, + "loss_ce": 0.006040945183485746, + "loss_iou": 0.388671875, + "loss_num": 0.0194091796875, + "loss_xval": 0.875, + "num_input_tokens_seen": 565828924, + "step": 8459 + }, + { + "epoch": 0.96, + "grad_norm": 28.501379013061523, + "learning_rate": 5e-05, + "loss": 1.3917, + "num_input_tokens_seen": 565896092, + "step": 8460 + }, + { + "epoch": 0.96, + "loss": 1.4193466901779175, + "loss_ce": 0.007725601550191641, + "loss_iou": 0.53515625, + "loss_num": 0.06884765625, + "loss_xval": 1.4140625, + "num_input_tokens_seen": 565896092, + "step": 8460 + }, + { + "epoch": 0.9601134751773049, + "grad_norm": 21.395055770874023, + "learning_rate": 5e-05, + "loss": 1.0416, + "num_input_tokens_seen": 565962364, + "step": 8461 + }, + { + "epoch": 0.9601134751773049, + "loss": 0.873987078666687, + "loss_ce": 0.007287848740816116, + "loss_iou": 0.341796875, + "loss_num": 0.036865234375, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 565962364, + "step": 8461 + }, + { + "epoch": 0.9602269503546099, + "grad_norm": 28.893095016479492, + "learning_rate": 5e-05, + "loss": 1.0601, + "num_input_tokens_seen": 566029060, + "step": 8462 + }, + { + "epoch": 0.9602269503546099, + "loss": 1.124535322189331, + "loss_ce": 0.01027741003781557, + "loss_iou": 0.470703125, + "loss_num": 0.0341796875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 566029060, + "step": 8462 + }, + { + "epoch": 0.9603404255319149, + "grad_norm": 33.18663024902344, + "learning_rate": 5e-05, + "loss": 1.1153, + "num_input_tokens_seen": 566096000, + "step": 8463 + }, + { + "epoch": 0.9603404255319149, + "loss": 1.0959869623184204, + "loss_ce": 0.0032135811634361744, + "loss_iou": 0.44921875, + "loss_num": 0.039306640625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 566096000, + "step": 8463 + }, + { + "epoch": 0.9604539007092199, + "grad_norm": 22.328964233398438, + "learning_rate": 5e-05, + "loss": 1.3347, + "num_input_tokens_seen": 566162540, + "step": 8464 + }, + { + "epoch": 0.9604539007092199, + "loss": 1.2862290143966675, + "loss_ce": 0.005711443722248077, + "loss_iou": 0.515625, + "loss_num": 0.050537109375, + "loss_xval": 1.28125, + "num_input_tokens_seen": 566162540, + "step": 8464 + }, + { + "epoch": 0.9605673758865249, + "grad_norm": 26.20109748840332, + "learning_rate": 5e-05, + "loss": 1.0673, + "num_input_tokens_seen": 566228704, + "step": 8465 + }, + { + "epoch": 0.9605673758865249, + "loss": 1.0211923122406006, + "loss_ce": 0.0050790635868906975, + "loss_iou": 0.44921875, + "loss_num": 0.0235595703125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 566228704, + "step": 8465 + }, + { + "epoch": 0.9606808510638298, + "grad_norm": 22.926729202270508, + "learning_rate": 5e-05, + "loss": 1.2005, + "num_input_tokens_seen": 566294896, + "step": 8466 + }, + { + "epoch": 0.9606808510638298, + "loss": 1.1180363893508911, + "loss_ce": 0.006952390540391207, + "loss_iou": 0.47265625, + "loss_num": 0.03271484375, + "loss_xval": 1.109375, + "num_input_tokens_seen": 566294896, + "step": 8466 + }, + { + "epoch": 0.9607943262411347, + "grad_norm": 20.820363998413086, + "learning_rate": 5e-05, + "loss": 0.8338, + "num_input_tokens_seen": 566361184, + "step": 8467 + }, + { + "epoch": 0.9607943262411347, + "loss": 0.7151079177856445, + "loss_ce": 0.010060363449156284, + "loss_iou": 0.291015625, + "loss_num": 0.0245361328125, + "loss_xval": 0.703125, + "num_input_tokens_seen": 566361184, + "step": 8467 + }, + { + "epoch": 0.9609078014184397, + "grad_norm": 29.65336799621582, + "learning_rate": 5e-05, + "loss": 1.0663, + "num_input_tokens_seen": 566428924, + "step": 8468 + }, + { + "epoch": 0.9609078014184397, + "loss": 0.9396437406539917, + "loss_ce": 0.006049949675798416, + "loss_iou": 0.3828125, + "loss_num": 0.033203125, + "loss_xval": 0.93359375, + "num_input_tokens_seen": 566428924, + "step": 8468 + }, + { + "epoch": 0.9610212765957447, + "grad_norm": 22.38951873779297, + "learning_rate": 5e-05, + "loss": 1.1827, + "num_input_tokens_seen": 566495448, + "step": 8469 + }, + { + "epoch": 0.9610212765957447, + "loss": 1.0955184698104858, + "loss_ce": 0.010069208219647408, + "loss_iou": 0.427734375, + "loss_num": 0.045654296875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 566495448, + "step": 8469 + }, + { + "epoch": 0.9611347517730496, + "grad_norm": 18.700714111328125, + "learning_rate": 5e-05, + "loss": 1.1515, + "num_input_tokens_seen": 566561396, + "step": 8470 + }, + { + "epoch": 0.9611347517730496, + "loss": 1.0418732166290283, + "loss_ce": 0.005252127535641193, + "loss_iou": 0.41796875, + "loss_num": 0.04052734375, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 566561396, + "step": 8470 + }, + { + "epoch": 0.9612482269503546, + "grad_norm": 45.93354415893555, + "learning_rate": 5e-05, + "loss": 1.1673, + "num_input_tokens_seen": 566628180, + "step": 8471 + }, + { + "epoch": 0.9612482269503546, + "loss": 1.1563913822174072, + "loss_ce": 0.008381199091672897, + "loss_iou": 0.4453125, + "loss_num": 0.052001953125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 566628180, + "step": 8471 + }, + { + "epoch": 0.9613617021276596, + "grad_norm": 36.90776062011719, + "learning_rate": 5e-05, + "loss": 1.1965, + "num_input_tokens_seen": 566695856, + "step": 8472 + }, + { + "epoch": 0.9613617021276596, + "loss": 1.1477890014648438, + "loss_ce": 0.005210856907069683, + "loss_iou": 0.44921875, + "loss_num": 0.04833984375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 566695856, + "step": 8472 + }, + { + "epoch": 0.9614751773049646, + "grad_norm": 28.24884796142578, + "learning_rate": 5e-05, + "loss": 1.1849, + "num_input_tokens_seen": 566763016, + "step": 8473 + }, + { + "epoch": 0.9614751773049646, + "loss": 1.2573935985565186, + "loss_ce": 0.0073936679400503635, + "loss_iou": 0.49609375, + "loss_num": 0.05126953125, + "loss_xval": 1.25, + "num_input_tokens_seen": 566763016, + "step": 8473 + }, + { + "epoch": 0.9615886524822695, + "grad_norm": 26.603437423706055, + "learning_rate": 5e-05, + "loss": 1.0151, + "num_input_tokens_seen": 566829224, + "step": 8474 + }, + { + "epoch": 0.9615886524822695, + "loss": 1.0418312549591064, + "loss_ce": 0.0030129868537187576, + "loss_iou": 0.396484375, + "loss_num": 0.049072265625, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 566829224, + "step": 8474 + }, + { + "epoch": 0.9617021276595744, + "grad_norm": 28.45425033569336, + "learning_rate": 5e-05, + "loss": 1.1428, + "num_input_tokens_seen": 566896080, + "step": 8475 + }, + { + "epoch": 0.9617021276595744, + "loss": 1.380286455154419, + "loss_ce": 0.009681028313934803, + "loss_iou": 0.53125, + "loss_num": 0.0615234375, + "loss_xval": 1.3671875, + "num_input_tokens_seen": 566896080, + "step": 8475 + }, + { + "epoch": 0.9618156028368794, + "grad_norm": 23.281044006347656, + "learning_rate": 5e-05, + "loss": 1.1321, + "num_input_tokens_seen": 566962556, + "step": 8476 + }, + { + "epoch": 0.9618156028368794, + "loss": 1.337904453277588, + "loss_ce": 0.008070450276136398, + "loss_iou": 0.53125, + "loss_num": 0.053955078125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 566962556, + "step": 8476 + }, + { + "epoch": 0.9619290780141844, + "grad_norm": 30.84454345703125, + "learning_rate": 5e-05, + "loss": 1.1963, + "num_input_tokens_seen": 567028624, + "step": 8477 + }, + { + "epoch": 0.9619290780141844, + "loss": 1.0679728984832764, + "loss_ce": 0.00986736360937357, + "loss_iou": 0.4609375, + "loss_num": 0.027587890625, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 567028624, + "step": 8477 + }, + { + "epoch": 0.9620425531914893, + "grad_norm": 30.68266487121582, + "learning_rate": 5e-05, + "loss": 1.0995, + "num_input_tokens_seen": 567094792, + "step": 8478 + }, + { + "epoch": 0.9620425531914893, + "loss": 1.078692078590393, + "loss_ce": 0.009844409301877022, + "loss_iou": 0.47265625, + "loss_num": 0.0247802734375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 567094792, + "step": 8478 + }, + { + "epoch": 0.9621560283687943, + "grad_norm": 25.05832862854004, + "learning_rate": 5e-05, + "loss": 1.1801, + "num_input_tokens_seen": 567161512, + "step": 8479 + }, + { + "epoch": 0.9621560283687943, + "loss": 1.217832088470459, + "loss_ce": 0.010312587022781372, + "loss_iou": 0.4765625, + "loss_num": 0.05126953125, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 567161512, + "step": 8479 + }, + { + "epoch": 0.9622695035460993, + "grad_norm": 24.877790451049805, + "learning_rate": 5e-05, + "loss": 1.2382, + "num_input_tokens_seen": 567228360, + "step": 8480 + }, + { + "epoch": 0.9622695035460993, + "loss": 1.3125948905944824, + "loss_ce": 0.005466050002723932, + "loss_iou": 0.49609375, + "loss_num": 0.06298828125, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 567228360, + "step": 8480 + }, + { + "epoch": 0.9623829787234043, + "grad_norm": 34.340797424316406, + "learning_rate": 5e-05, + "loss": 1.2515, + "num_input_tokens_seen": 567295016, + "step": 8481 + }, + { + "epoch": 0.9623829787234043, + "loss": 1.2322285175323486, + "loss_ce": 0.00566595233976841, + "loss_iou": 0.47265625, + "loss_num": 0.05615234375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 567295016, + "step": 8481 + }, + { + "epoch": 0.9624964539007093, + "grad_norm": 27.319107055664062, + "learning_rate": 5e-05, + "loss": 1.3092, + "num_input_tokens_seen": 567362084, + "step": 8482 + }, + { + "epoch": 0.9624964539007093, + "loss": 1.2597523927688599, + "loss_ce": 0.005357873626053333, + "loss_iou": 0.5234375, + "loss_num": 0.0419921875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 567362084, + "step": 8482 + }, + { + "epoch": 0.9626099290780142, + "grad_norm": 18.720590591430664, + "learning_rate": 5e-05, + "loss": 1.0347, + "num_input_tokens_seen": 567429232, + "step": 8483 + }, + { + "epoch": 0.9626099290780142, + "loss": 0.9887070059776306, + "loss_ce": 0.01165621355175972, + "loss_iou": 0.392578125, + "loss_num": 0.038330078125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 567429232, + "step": 8483 + }, + { + "epoch": 0.9627234042553191, + "grad_norm": 18.15302848815918, + "learning_rate": 5e-05, + "loss": 1.0839, + "num_input_tokens_seen": 567496108, + "step": 8484 + }, + { + "epoch": 0.9627234042553191, + "loss": 1.1065688133239746, + "loss_ce": 0.0064712390303611755, + "loss_iou": 0.44921875, + "loss_num": 0.040283203125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 567496108, + "step": 8484 + }, + { + "epoch": 0.9628368794326241, + "grad_norm": 14.781879425048828, + "learning_rate": 5e-05, + "loss": 1.0337, + "num_input_tokens_seen": 567563024, + "step": 8485 + }, + { + "epoch": 0.9628368794326241, + "loss": 1.147166132926941, + "loss_ce": 0.004343944601714611, + "loss_iou": 0.4296875, + "loss_num": 0.056396484375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 567563024, + "step": 8485 + }, + { + "epoch": 0.9629503546099291, + "grad_norm": 14.369681358337402, + "learning_rate": 5e-05, + "loss": 1.1777, + "num_input_tokens_seen": 567630300, + "step": 8486 + }, + { + "epoch": 0.9629503546099291, + "loss": 1.317685842514038, + "loss_ce": 0.007138952612876892, + "loss_iou": 0.5234375, + "loss_num": 0.0517578125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 567630300, + "step": 8486 + }, + { + "epoch": 0.963063829787234, + "grad_norm": 18.985979080200195, + "learning_rate": 5e-05, + "loss": 0.9988, + "num_input_tokens_seen": 567697044, + "step": 8487 + }, + { + "epoch": 0.963063829787234, + "loss": 1.035060167312622, + "loss_ce": 0.0072280713357031345, + "loss_iou": 0.4296875, + "loss_num": 0.03369140625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 567697044, + "step": 8487 + }, + { + "epoch": 0.963177304964539, + "grad_norm": 31.73249626159668, + "learning_rate": 5e-05, + "loss": 1.0638, + "num_input_tokens_seen": 567763908, + "step": 8488 + }, + { + "epoch": 0.963177304964539, + "loss": 1.1490346193313599, + "loss_ce": 0.004991677589714527, + "loss_iou": 0.42578125, + "loss_num": 0.058349609375, + "loss_xval": 1.140625, + "num_input_tokens_seen": 567763908, + "step": 8488 + }, + { + "epoch": 0.963290780141844, + "grad_norm": 33.22964859008789, + "learning_rate": 5e-05, + "loss": 1.0912, + "num_input_tokens_seen": 567831956, + "step": 8489 + }, + { + "epoch": 0.963290780141844, + "loss": 1.0125946998596191, + "loss_ce": 0.00722366850823164, + "loss_iou": 0.443359375, + "loss_num": 0.0240478515625, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 567831956, + "step": 8489 + }, + { + "epoch": 0.963404255319149, + "grad_norm": 49.180503845214844, + "learning_rate": 5e-05, + "loss": 1.1748, + "num_input_tokens_seen": 567898376, + "step": 8490 + }, + { + "epoch": 0.963404255319149, + "loss": 1.0232285261154175, + "loss_ce": 0.007359371054917574, + "loss_iou": 0.41796875, + "loss_num": 0.03564453125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 567898376, + "step": 8490 + }, + { + "epoch": 0.963517730496454, + "grad_norm": 34.775882720947266, + "learning_rate": 5e-05, + "loss": 1.3883, + "num_input_tokens_seen": 567965856, + "step": 8491 + }, + { + "epoch": 0.963517730496454, + "loss": 1.3666841983795166, + "loss_ce": 0.01023889146745205, + "loss_iou": 0.5625, + "loss_num": 0.04736328125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 567965856, + "step": 8491 + }, + { + "epoch": 0.9636312056737588, + "grad_norm": 17.02838134765625, + "learning_rate": 5e-05, + "loss": 0.9507, + "num_input_tokens_seen": 568032596, + "step": 8492 + }, + { + "epoch": 0.9636312056737588, + "loss": 0.7673616409301758, + "loss_ce": 0.011746378615498543, + "loss_iou": 0.328125, + "loss_num": 0.019775390625, + "loss_xval": 0.75390625, + "num_input_tokens_seen": 568032596, + "step": 8492 + }, + { + "epoch": 0.9637446808510638, + "grad_norm": 26.6237850189209, + "learning_rate": 5e-05, + "loss": 1.0113, + "num_input_tokens_seen": 568099200, + "step": 8493 + }, + { + "epoch": 0.9637446808510638, + "loss": 1.1322762966156006, + "loss_ce": 0.006787997670471668, + "loss_iou": 0.45703125, + "loss_num": 0.042236328125, + "loss_xval": 1.125, + "num_input_tokens_seen": 568099200, + "step": 8493 + }, + { + "epoch": 0.9638581560283688, + "grad_norm": 41.32244110107422, + "learning_rate": 5e-05, + "loss": 1.2196, + "num_input_tokens_seen": 568165816, + "step": 8494 + }, + { + "epoch": 0.9638581560283688, + "loss": 1.2308733463287354, + "loss_ce": 0.005775759927928448, + "loss_iou": 0.51953125, + "loss_num": 0.03662109375, + "loss_xval": 1.2265625, + "num_input_tokens_seen": 568165816, + "step": 8494 + }, + { + "epoch": 0.9639716312056738, + "grad_norm": 36.6297721862793, + "learning_rate": 5e-05, + "loss": 1.3386, + "num_input_tokens_seen": 568233316, + "step": 8495 + }, + { + "epoch": 0.9639716312056738, + "loss": 1.247692584991455, + "loss_ce": 0.008923020213842392, + "loss_iou": 0.5546875, + "loss_num": 0.02587890625, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 568233316, + "step": 8495 + }, + { + "epoch": 0.9640851063829787, + "grad_norm": 29.579923629760742, + "learning_rate": 5e-05, + "loss": 1.0063, + "num_input_tokens_seen": 568301120, + "step": 8496 + }, + { + "epoch": 0.9640851063829787, + "loss": 0.9877645969390869, + "loss_ce": 0.005342686548829079, + "loss_iou": 0.43359375, + "loss_num": 0.0225830078125, + "loss_xval": 0.984375, + "num_input_tokens_seen": 568301120, + "step": 8496 + }, + { + "epoch": 0.9641985815602837, + "grad_norm": 30.12457847595215, + "learning_rate": 5e-05, + "loss": 1.1011, + "num_input_tokens_seen": 568368688, + "step": 8497 + }, + { + "epoch": 0.9641985815602837, + "loss": 1.052663803100586, + "loss_ce": 0.009695141576230526, + "loss_iou": 0.43359375, + "loss_num": 0.03515625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 568368688, + "step": 8497 + }, + { + "epoch": 0.9643120567375887, + "grad_norm": 28.805055618286133, + "learning_rate": 5e-05, + "loss": 1.0647, + "num_input_tokens_seen": 568435700, + "step": 8498 + }, + { + "epoch": 0.9643120567375887, + "loss": 1.083722472190857, + "loss_ce": 0.0036442973650991917, + "loss_iou": 0.46875, + "loss_num": 0.028564453125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 568435700, + "step": 8498 + }, + { + "epoch": 0.9644255319148937, + "grad_norm": 20.91286849975586, + "learning_rate": 5e-05, + "loss": 1.0773, + "num_input_tokens_seen": 568502416, + "step": 8499 + }, + { + "epoch": 0.9644255319148937, + "loss": 1.1876006126403809, + "loss_ce": 0.004983334336429834, + "loss_iou": 0.494140625, + "loss_num": 0.03857421875, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 568502416, + "step": 8499 + }, + { + "epoch": 0.9645390070921985, + "grad_norm": 12.020109176635742, + "learning_rate": 5e-05, + "loss": 1.238, + "num_input_tokens_seen": 568568940, + "step": 8500 + }, + { + "epoch": 0.9645390070921985, + "eval_seeclick_CIoU": 0.4920806288719177, + "eval_seeclick_GIoU": 0.48214516043663025, + "eval_seeclick_IoU": 0.5542092323303223, + "eval_seeclick_MAE_all": 0.13070832937955856, + "eval_seeclick_MAE_h": 0.06050192192196846, + "eval_seeclick_MAE_w": 0.1004745215177536, + "eval_seeclick_MAE_x_boxes": 0.1700640246272087, + "eval_seeclick_MAE_y_boxes": 0.10280213132500648, + "eval_seeclick_NUM_probability": 0.9998638927936554, + "eval_seeclick_inside_bbox": 0.7239583432674408, + "eval_seeclick_loss": 2.120236396789551, + "eval_seeclick_loss_ce": 0.013900938909500837, + "eval_seeclick_loss_iou": 0.7412109375, + "eval_seeclick_loss_num": 0.13109970092773438, + "eval_seeclick_loss_xval": 2.1383056640625, + "eval_seeclick_runtime": 66.1208, + "eval_seeclick_samples_per_second": 0.711, + "eval_seeclick_steps_per_second": 0.03, + "num_input_tokens_seen": 568568940, + "step": 8500 + }, + { + "epoch": 0.9645390070921985, + "eval_icons_CIoU": 0.4980957508087158, + "eval_icons_GIoU": 0.5105675607919693, + "eval_icons_IoU": 0.5404826104640961, + "eval_icons_MAE_all": 0.1295335851609707, + "eval_icons_MAE_h": 0.05382731184363365, + "eval_icons_MAE_w": 0.1413898691534996, + "eval_icons_MAE_x_boxes": 0.13806955888867378, + "eval_icons_MAE_y_boxes": 0.04105625208467245, + "eval_icons_NUM_probability": 0.9999868273735046, + "eval_icons_inside_bbox": 0.7326388955116272, + "eval_icons_loss": 2.2110180854797363, + "eval_icons_loss_ce": 6.815152482886333e-05, + "eval_icons_loss_iou": 0.777587890625, + "eval_icons_loss_num": 0.12941741943359375, + "eval_icons_loss_xval": 2.2021484375, + "eval_icons_runtime": 69.3128, + "eval_icons_samples_per_second": 0.721, + "eval_icons_steps_per_second": 0.029, + "num_input_tokens_seen": 568568940, + "step": 8500 + }, + { + "epoch": 0.9645390070921985, + "eval_screenspot_CIoU": 0.26356201867262524, + "eval_screenspot_GIoU": 0.2184008757273356, + "eval_screenspot_IoU": 0.3624574542045593, + "eval_screenspot_MAE_all": 0.2179192155599594, + "eval_screenspot_MAE_h": 0.14117426673571268, + "eval_screenspot_MAE_w": 0.1656809151172638, + "eval_screenspot_MAE_x_boxes": 0.3079611857732137, + "eval_screenspot_MAE_y_boxes": 0.11669818808635075, + "eval_screenspot_NUM_probability": 0.9995583891868591, + "eval_screenspot_inside_bbox": 0.577916661898295, + "eval_screenspot_loss": 2.9239914417266846, + "eval_screenspot_loss_ce": 0.01259295673420032, + "eval_screenspot_loss_iou": 0.9231770833333334, + "eval_screenspot_loss_num": 0.22679646809895834, + "eval_screenspot_loss_xval": 2.98046875, + "eval_screenspot_runtime": 114.8811, + "eval_screenspot_samples_per_second": 0.775, + "eval_screenspot_steps_per_second": 0.026, + "num_input_tokens_seen": 568568940, + "step": 8500 + }, + { + "epoch": 0.9645390070921985, + "eval_compot_CIoU": 0.22225983440876007, + "eval_compot_GIoU": 0.1768219918012619, + "eval_compot_IoU": 0.334790363907814, + "eval_compot_MAE_all": 0.23575982451438904, + "eval_compot_MAE_h": 0.08927956223487854, + "eval_compot_MAE_w": 0.23818249255418777, + "eval_compot_MAE_x_boxes": 0.2475108653306961, + "eval_compot_MAE_y_boxes": 0.1353021040558815, + "eval_compot_NUM_probability": 0.999486654996872, + "eval_compot_inside_bbox": 0.5, + "eval_compot_loss": 3.0847487449645996, + "eval_compot_loss_ce": 0.008023877162486315, + "eval_compot_loss_iou": 0.957763671875, + "eval_compot_loss_num": 0.237823486328125, + "eval_compot_loss_xval": 3.1044921875, + "eval_compot_runtime": 67.8594, + "eval_compot_samples_per_second": 0.737, + "eval_compot_steps_per_second": 0.029, + "num_input_tokens_seen": 568568940, + "step": 8500 + }, + { + "epoch": 0.9645390070921985, + "loss": 3.184418201446533, + "loss_ce": 0.00863686203956604, + "loss_iou": 0.96484375, + "loss_num": 0.2490234375, + "loss_xval": 3.171875, + "num_input_tokens_seen": 568568940, + "step": 8500 + }, + { + "epoch": 0.9646524822695035, + "grad_norm": 22.386070251464844, + "learning_rate": 5e-05, + "loss": 1.1841, + "num_input_tokens_seen": 568635644, + "step": 8501 + }, + { + "epoch": 0.9646524822695035, + "loss": 1.194843053817749, + "loss_ce": 0.007831304334104061, + "loss_iou": 0.48828125, + "loss_num": 0.0419921875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 568635644, + "step": 8501 + }, + { + "epoch": 0.9647659574468085, + "grad_norm": 28.375551223754883, + "learning_rate": 5e-05, + "loss": 1.1748, + "num_input_tokens_seen": 568703312, + "step": 8502 + }, + { + "epoch": 0.9647659574468085, + "loss": 1.1630454063415527, + "loss_ce": 0.005330463871359825, + "loss_iou": 0.47265625, + "loss_num": 0.042236328125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 568703312, + "step": 8502 + }, + { + "epoch": 0.9648794326241135, + "grad_norm": 31.3717041015625, + "learning_rate": 5e-05, + "loss": 0.9873, + "num_input_tokens_seen": 568770060, + "step": 8503 + }, + { + "epoch": 0.9648794326241135, + "loss": 0.9129906892776489, + "loss_ce": 0.004543466027826071, + "loss_iou": 0.41015625, + "loss_num": 0.0177001953125, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 568770060, + "step": 8503 + }, + { + "epoch": 0.9649929078014184, + "grad_norm": 40.820526123046875, + "learning_rate": 5e-05, + "loss": 1.1115, + "num_input_tokens_seen": 568837280, + "step": 8504 + }, + { + "epoch": 0.9649929078014184, + "loss": 1.0883393287658691, + "loss_ce": 0.0043549248948693275, + "loss_iou": 0.458984375, + "loss_num": 0.032958984375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 568837280, + "step": 8504 + }, + { + "epoch": 0.9651063829787234, + "grad_norm": 38.285682678222656, + "learning_rate": 5e-05, + "loss": 1.2591, + "num_input_tokens_seen": 568904060, + "step": 8505 + }, + { + "epoch": 0.9651063829787234, + "loss": 1.3817024230957031, + "loss_ce": 0.006702481769025326, + "loss_iou": 0.6015625, + "loss_num": 0.034912109375, + "loss_xval": 1.375, + "num_input_tokens_seen": 568904060, + "step": 8505 + }, + { + "epoch": 0.9652198581560284, + "grad_norm": 14.378414154052734, + "learning_rate": 5e-05, + "loss": 1.0372, + "num_input_tokens_seen": 568971116, + "step": 8506 + }, + { + "epoch": 0.9652198581560284, + "loss": 1.0393023490905762, + "loss_ce": 0.005122619215399027, + "loss_iou": 0.4296875, + "loss_num": 0.03466796875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 568971116, + "step": 8506 + }, + { + "epoch": 0.9653333333333334, + "grad_norm": 10.082069396972656, + "learning_rate": 5e-05, + "loss": 0.9935, + "num_input_tokens_seen": 569037460, + "step": 8507 + }, + { + "epoch": 0.9653333333333334, + "loss": 0.9882411360740662, + "loss_ce": 0.006795820780098438, + "loss_iou": 0.40625, + "loss_num": 0.03369140625, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 569037460, + "step": 8507 + }, + { + "epoch": 0.9654468085106382, + "grad_norm": 22.69618034362793, + "learning_rate": 5e-05, + "loss": 0.9964, + "num_input_tokens_seen": 569104688, + "step": 8508 + }, + { + "epoch": 0.9654468085106382, + "loss": 0.9514611959457397, + "loss_ce": 0.009566687047481537, + "loss_iou": 0.40234375, + "loss_num": 0.0269775390625, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 569104688, + "step": 8508 + }, + { + "epoch": 0.9655602836879432, + "grad_norm": 24.450681686401367, + "learning_rate": 5e-05, + "loss": 1.0763, + "num_input_tokens_seen": 569171564, + "step": 8509 + }, + { + "epoch": 0.9655602836879432, + "loss": 1.0043749809265137, + "loss_ce": 0.004374884068965912, + "loss_iou": 0.421875, + "loss_num": 0.031494140625, + "loss_xval": 1.0, + "num_input_tokens_seen": 569171564, + "step": 8509 + }, + { + "epoch": 0.9656737588652482, + "grad_norm": 18.204330444335938, + "learning_rate": 5e-05, + "loss": 0.9797, + "num_input_tokens_seen": 569238436, + "step": 8510 + }, + { + "epoch": 0.9656737588652482, + "loss": 0.9299611449241638, + "loss_ce": 0.003203329863026738, + "loss_iou": 0.380859375, + "loss_num": 0.032958984375, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 569238436, + "step": 8510 + }, + { + "epoch": 0.9657872340425532, + "grad_norm": 14.835429191589355, + "learning_rate": 5e-05, + "loss": 0.9992, + "num_input_tokens_seen": 569305052, + "step": 8511 + }, + { + "epoch": 0.9657872340425532, + "loss": 1.0413745641708374, + "loss_ce": 0.0091480053961277, + "loss_iou": 0.419921875, + "loss_num": 0.03857421875, + "loss_xval": 1.03125, + "num_input_tokens_seen": 569305052, + "step": 8511 + }, + { + "epoch": 0.9659007092198582, + "grad_norm": 24.385408401489258, + "learning_rate": 5e-05, + "loss": 1.1626, + "num_input_tokens_seen": 569371980, + "step": 8512 + }, + { + "epoch": 0.9659007092198582, + "loss": 1.0577114820480347, + "loss_ce": 0.004183635115623474, + "loss_iou": 0.435546875, + "loss_num": 0.0361328125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 569371980, + "step": 8512 + }, + { + "epoch": 0.9660141843971631, + "grad_norm": 49.22040939331055, + "learning_rate": 5e-05, + "loss": 1.352, + "num_input_tokens_seen": 569438544, + "step": 8513 + }, + { + "epoch": 0.9660141843971631, + "loss": 1.6405134201049805, + "loss_ce": 0.005747748073190451, + "loss_iou": 0.6640625, + "loss_num": 0.061767578125, + "loss_xval": 1.6328125, + "num_input_tokens_seen": 569438544, + "step": 8513 + }, + { + "epoch": 0.9661276595744681, + "grad_norm": 35.74392318725586, + "learning_rate": 5e-05, + "loss": 1.3409, + "num_input_tokens_seen": 569505992, + "step": 8514 + }, + { + "epoch": 0.9661276595744681, + "loss": 1.4583711624145508, + "loss_ce": 0.00866415910422802, + "loss_iou": 0.609375, + "loss_num": 0.046630859375, + "loss_xval": 1.453125, + "num_input_tokens_seen": 569505992, + "step": 8514 + }, + { + "epoch": 0.9662411347517731, + "grad_norm": 29.171981811523438, + "learning_rate": 5e-05, + "loss": 1.1254, + "num_input_tokens_seen": 569571392, + "step": 8515 + }, + { + "epoch": 0.9662411347517731, + "loss": 0.9365425705909729, + "loss_ce": 0.007343406789004803, + "loss_iou": 0.3984375, + "loss_num": 0.0262451171875, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 569571392, + "step": 8515 + }, + { + "epoch": 0.9663546099290781, + "grad_norm": 22.643112182617188, + "learning_rate": 5e-05, + "loss": 0.9666, + "num_input_tokens_seen": 569637756, + "step": 8516 + }, + { + "epoch": 0.9663546099290781, + "loss": 0.9079878926277161, + "loss_ce": 0.005155866965651512, + "loss_iou": 0.357421875, + "loss_num": 0.03759765625, + "loss_xval": 0.90234375, + "num_input_tokens_seen": 569637756, + "step": 8516 + }, + { + "epoch": 0.9664680851063829, + "grad_norm": 13.810038566589355, + "learning_rate": 5e-05, + "loss": 1.0021, + "num_input_tokens_seen": 569704732, + "step": 8517 + }, + { + "epoch": 0.9664680851063829, + "loss": 1.0581495761871338, + "loss_ce": 0.009077190421521664, + "loss_iou": 0.408203125, + "loss_num": 0.046875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 569704732, + "step": 8517 + }, + { + "epoch": 0.9665815602836879, + "grad_norm": 11.459569931030273, + "learning_rate": 5e-05, + "loss": 1.0368, + "num_input_tokens_seen": 569771480, + "step": 8518 + }, + { + "epoch": 0.9665815602836879, + "loss": 1.1138099431991577, + "loss_ce": 0.0071204365231096745, + "loss_iou": 0.42578125, + "loss_num": 0.051513671875, + "loss_xval": 1.109375, + "num_input_tokens_seen": 569771480, + "step": 8518 + }, + { + "epoch": 0.9666950354609929, + "grad_norm": 16.063377380371094, + "learning_rate": 5e-05, + "loss": 0.8015, + "num_input_tokens_seen": 569837464, + "step": 8519 + }, + { + "epoch": 0.9666950354609929, + "loss": 0.8486765027046204, + "loss_ce": 0.008832765743136406, + "loss_iou": 0.34765625, + "loss_num": 0.0289306640625, + "loss_xval": 0.83984375, + "num_input_tokens_seen": 569837464, + "step": 8519 + }, + { + "epoch": 0.9668085106382979, + "grad_norm": 28.12065315246582, + "learning_rate": 5e-05, + "loss": 1.2409, + "num_input_tokens_seen": 569904284, + "step": 8520 + }, + { + "epoch": 0.9668085106382979, + "loss": 1.215410828590393, + "loss_ce": 0.007891261018812656, + "loss_iou": 0.4921875, + "loss_num": 0.044921875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 569904284, + "step": 8520 + }, + { + "epoch": 0.9669219858156028, + "grad_norm": 43.553951263427734, + "learning_rate": 5e-05, + "loss": 1.238, + "num_input_tokens_seen": 569970940, + "step": 8521 + }, + { + "epoch": 0.9669219858156028, + "loss": 1.225163459777832, + "loss_ce": 0.00348371802829206, + "loss_iou": 0.53125, + "loss_num": 0.03125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 569970940, + "step": 8521 + }, + { + "epoch": 0.9670354609929078, + "grad_norm": 27.907983779907227, + "learning_rate": 5e-05, + "loss": 1.326, + "num_input_tokens_seen": 570037612, + "step": 8522 + }, + { + "epoch": 0.9670354609929078, + "loss": 1.188056230545044, + "loss_ce": 0.005957775749266148, + "loss_iou": 0.515625, + "loss_num": 0.0296630859375, + "loss_xval": 1.1796875, + "num_input_tokens_seen": 570037612, + "step": 8522 + }, + { + "epoch": 0.9671489361702128, + "grad_norm": 16.075891494750977, + "learning_rate": 5e-05, + "loss": 1.099, + "num_input_tokens_seen": 570105036, + "step": 8523 + }, + { + "epoch": 0.9671489361702128, + "loss": 1.1289585828781128, + "loss_ce": 0.005911675281822681, + "loss_iou": 0.447265625, + "loss_num": 0.045654296875, + "loss_xval": 1.125, + "num_input_tokens_seen": 570105036, + "step": 8523 + }, + { + "epoch": 0.9672624113475178, + "grad_norm": 39.97523498535156, + "learning_rate": 5e-05, + "loss": 1.0974, + "num_input_tokens_seen": 570171868, + "step": 8524 + }, + { + "epoch": 0.9672624113475178, + "loss": 1.0038702487945557, + "loss_ce": 0.006311721634119749, + "loss_iou": 0.421875, + "loss_num": 0.0308837890625, + "loss_xval": 0.99609375, + "num_input_tokens_seen": 570171868, + "step": 8524 + }, + { + "epoch": 0.9673758865248226, + "grad_norm": 57.85662841796875, + "learning_rate": 5e-05, + "loss": 1.5695, + "num_input_tokens_seen": 570239420, + "step": 8525 + }, + { + "epoch": 0.9673758865248226, + "loss": 1.6665596961975098, + "loss_ce": 0.0034738248214125633, + "loss_iou": 0.671875, + "loss_num": 0.064453125, + "loss_xval": 1.6640625, + "num_input_tokens_seen": 570239420, + "step": 8525 + }, + { + "epoch": 0.9674893617021276, + "grad_norm": 45.531028747558594, + "learning_rate": 5e-05, + "loss": 1.5688, + "num_input_tokens_seen": 570305984, + "step": 8526 + }, + { + "epoch": 0.9674893617021276, + "loss": 1.7209515571594238, + "loss_ce": 0.008060875348746777, + "loss_iou": 0.64453125, + "loss_num": 0.08544921875, + "loss_xval": 1.7109375, + "num_input_tokens_seen": 570305984, + "step": 8526 + }, + { + "epoch": 0.9676028368794326, + "grad_norm": 28.700489044189453, + "learning_rate": 5e-05, + "loss": 1.2516, + "num_input_tokens_seen": 570372608, + "step": 8527 + }, + { + "epoch": 0.9676028368794326, + "loss": 1.4503265619277954, + "loss_ce": 0.006478890776634216, + "loss_iou": 0.57421875, + "loss_num": 0.059326171875, + "loss_xval": 1.4453125, + "num_input_tokens_seen": 570372608, + "step": 8527 + }, + { + "epoch": 0.9677163120567376, + "grad_norm": 24.478193283081055, + "learning_rate": 5e-05, + "loss": 1.2406, + "num_input_tokens_seen": 570439048, + "step": 8528 + }, + { + "epoch": 0.9677163120567376, + "loss": 1.2887606620788574, + "loss_ce": 0.003116235602647066, + "loss_iou": 0.546875, + "loss_num": 0.038818359375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 570439048, + "step": 8528 + }, + { + "epoch": 0.9678297872340426, + "grad_norm": 24.06743049621582, + "learning_rate": 5e-05, + "loss": 1.0511, + "num_input_tokens_seen": 570505952, + "step": 8529 + }, + { + "epoch": 0.9678297872340426, + "loss": 1.0755367279052734, + "loss_ce": 0.006200713105499744, + "loss_iou": 0.404296875, + "loss_num": 0.05224609375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 570505952, + "step": 8529 + }, + { + "epoch": 0.9679432624113475, + "grad_norm": 28.694250106811523, + "learning_rate": 5e-05, + "loss": 1.0181, + "num_input_tokens_seen": 570573032, + "step": 8530 + }, + { + "epoch": 0.9679432624113475, + "loss": 1.032049298286438, + "loss_ce": 0.007879471406340599, + "loss_iou": 0.375, + "loss_num": 0.0546875, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 570573032, + "step": 8530 + }, + { + "epoch": 0.9680567375886525, + "grad_norm": 23.107810974121094, + "learning_rate": 5e-05, + "loss": 1.0701, + "num_input_tokens_seen": 570639508, + "step": 8531 + }, + { + "epoch": 0.9680567375886525, + "loss": 1.105494737625122, + "loss_ce": 0.0053971512243151665, + "loss_iou": 0.4609375, + "loss_num": 0.03564453125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 570639508, + "step": 8531 + }, + { + "epoch": 0.9681702127659575, + "grad_norm": 20.98639488220215, + "learning_rate": 5e-05, + "loss": 1.1506, + "num_input_tokens_seen": 570705996, + "step": 8532 + }, + { + "epoch": 0.9681702127659575, + "loss": 1.0829261541366577, + "loss_ce": 0.005533543415367603, + "loss_iou": 0.42578125, + "loss_num": 0.04541015625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 570705996, + "step": 8532 + }, + { + "epoch": 0.9682836879432624, + "grad_norm": 14.54517650604248, + "learning_rate": 5e-05, + "loss": 1.0128, + "num_input_tokens_seen": 570772164, + "step": 8533 + }, + { + "epoch": 0.9682836879432624, + "loss": 0.9879904389381409, + "loss_ce": 0.010451311245560646, + "loss_iou": 0.40234375, + "loss_num": 0.034423828125, + "loss_xval": 0.9765625, + "num_input_tokens_seen": 570772164, + "step": 8533 + }, + { + "epoch": 0.9683971631205673, + "grad_norm": 17.834815979003906, + "learning_rate": 5e-05, + "loss": 0.9573, + "num_input_tokens_seen": 570838456, + "step": 8534 + }, + { + "epoch": 0.9683971631205673, + "loss": 0.9167196750640869, + "loss_ce": 0.008272414095699787, + "loss_iou": 0.3671875, + "loss_num": 0.03466796875, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 570838456, + "step": 8534 + }, + { + "epoch": 0.9685106382978723, + "grad_norm": 56.721946716308594, + "learning_rate": 5e-05, + "loss": 0.8891, + "num_input_tokens_seen": 570904184, + "step": 8535 + }, + { + "epoch": 0.9685106382978723, + "loss": 0.9608098268508911, + "loss_ce": 0.006708239670842886, + "loss_iou": 0.4140625, + "loss_num": 0.025634765625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 570904184, + "step": 8535 + }, + { + "epoch": 0.9686241134751773, + "grad_norm": 27.664169311523438, + "learning_rate": 5e-05, + "loss": 1.1483, + "num_input_tokens_seen": 570971876, + "step": 8536 + }, + { + "epoch": 0.9686241134751773, + "loss": 1.1641111373901367, + "loss_ce": 0.007861117832362652, + "loss_iou": 0.482421875, + "loss_num": 0.03759765625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 570971876, + "step": 8536 + }, + { + "epoch": 0.9687375886524823, + "grad_norm": 25.834930419921875, + "learning_rate": 5e-05, + "loss": 1.1141, + "num_input_tokens_seen": 571039000, + "step": 8537 + }, + { + "epoch": 0.9687375886524823, + "loss": 1.053584337234497, + "loss_ce": 0.0052444348111748695, + "loss_iou": 0.427734375, + "loss_num": 0.03857421875, + "loss_xval": 1.046875, + "num_input_tokens_seen": 571039000, + "step": 8537 + }, + { + "epoch": 0.9688510638297873, + "grad_norm": 33.0144157409668, + "learning_rate": 5e-05, + "loss": 1.2634, + "num_input_tokens_seen": 571105708, + "step": 8538 + }, + { + "epoch": 0.9688510638297873, + "loss": 1.202516794204712, + "loss_ce": 0.008669093251228333, + "loss_iou": 0.45703125, + "loss_num": 0.055908203125, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 571105708, + "step": 8538 + }, + { + "epoch": 0.9689645390070922, + "grad_norm": 36.51931381225586, + "learning_rate": 5e-05, + "loss": 1.3326, + "num_input_tokens_seen": 571173152, + "step": 8539 + }, + { + "epoch": 0.9689645390070922, + "loss": 1.5191326141357422, + "loss_ce": 0.012296689674258232, + "loss_iou": 0.62109375, + "loss_num": 0.05322265625, + "loss_xval": 1.5078125, + "num_input_tokens_seen": 571173152, + "step": 8539 + }, + { + "epoch": 0.9690780141843972, + "grad_norm": 27.985061645507812, + "learning_rate": 5e-05, + "loss": 1.1886, + "num_input_tokens_seen": 571239484, + "step": 8540 + }, + { + "epoch": 0.9690780141843972, + "loss": 1.2218623161315918, + "loss_ce": 0.005065452307462692, + "loss_iou": 0.498046875, + "loss_num": 0.0439453125, + "loss_xval": 1.21875, + "num_input_tokens_seen": 571239484, + "step": 8540 + }, + { + "epoch": 0.9691914893617021, + "grad_norm": 14.184792518615723, + "learning_rate": 5e-05, + "loss": 0.9819, + "num_input_tokens_seen": 571305984, + "step": 8541 + }, + { + "epoch": 0.9691914893617021, + "loss": 0.8624393939971924, + "loss_ce": 0.0069707175716757774, + "loss_iou": 0.3515625, + "loss_num": 0.030517578125, + "loss_xval": 0.85546875, + "num_input_tokens_seen": 571305984, + "step": 8541 + }, + { + "epoch": 0.969304964539007, + "grad_norm": 20.181110382080078, + "learning_rate": 5e-05, + "loss": 1.1442, + "num_input_tokens_seen": 571373224, + "step": 8542 + }, + { + "epoch": 0.969304964539007, + "loss": 1.362884283065796, + "loss_ce": 0.010345254093408585, + "loss_iou": 0.51953125, + "loss_num": 0.062255859375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 571373224, + "step": 8542 + }, + { + "epoch": 0.969418439716312, + "grad_norm": 28.335161209106445, + "learning_rate": 5e-05, + "loss": 1.0501, + "num_input_tokens_seen": 571439304, + "step": 8543 + }, + { + "epoch": 0.969418439716312, + "loss": 1.0960325002670288, + "loss_ce": 0.007653605658560991, + "loss_iou": 0.427734375, + "loss_num": 0.04638671875, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 571439304, + "step": 8543 + }, + { + "epoch": 0.969531914893617, + "grad_norm": 29.176782608032227, + "learning_rate": 5e-05, + "loss": 1.1283, + "num_input_tokens_seen": 571506612, + "step": 8544 + }, + { + "epoch": 0.969531914893617, + "loss": 1.0139400959014893, + "loss_ce": 0.0042965468019247055, + "loss_iou": 0.4453125, + "loss_num": 0.0238037109375, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 571506612, + "step": 8544 + }, + { + "epoch": 0.969645390070922, + "grad_norm": 21.246196746826172, + "learning_rate": 5e-05, + "loss": 1.0901, + "num_input_tokens_seen": 571573236, + "step": 8545 + }, + { + "epoch": 0.969645390070922, + "loss": 0.988274335861206, + "loss_ce": 0.007317292969673872, + "loss_iou": 0.388671875, + "loss_num": 0.041015625, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 571573236, + "step": 8545 + }, + { + "epoch": 0.969758865248227, + "grad_norm": 31.707889556884766, + "learning_rate": 5e-05, + "loss": 1.1215, + "num_input_tokens_seen": 571640032, + "step": 8546 + }, + { + "epoch": 0.969758865248227, + "loss": 1.364042043685913, + "loss_ce": 0.008084926754236221, + "loss_iou": 0.55859375, + "loss_num": 0.048095703125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 571640032, + "step": 8546 + }, + { + "epoch": 0.9698723404255319, + "grad_norm": 32.41674041748047, + "learning_rate": 5e-05, + "loss": 1.1305, + "num_input_tokens_seen": 571707124, + "step": 8547 + }, + { + "epoch": 0.9698723404255319, + "loss": 1.1474170684814453, + "loss_ce": 0.010209959000349045, + "loss_iou": 0.47265625, + "loss_num": 0.038330078125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 571707124, + "step": 8547 + }, + { + "epoch": 0.9699858156028369, + "grad_norm": 29.087482452392578, + "learning_rate": 5e-05, + "loss": 1.0679, + "num_input_tokens_seen": 571773000, + "step": 8548 + }, + { + "epoch": 0.9699858156028369, + "loss": 0.9343105554580688, + "loss_ce": 0.00779686076566577, + "loss_iou": 0.38671875, + "loss_num": 0.0302734375, + "loss_xval": 0.92578125, + "num_input_tokens_seen": 571773000, + "step": 8548 + }, + { + "epoch": 0.9700992907801419, + "grad_norm": 21.14975357055664, + "learning_rate": 5e-05, + "loss": 1.0288, + "num_input_tokens_seen": 571840404, + "step": 8549 + }, + { + "epoch": 0.9700992907801419, + "loss": 1.2629964351654053, + "loss_ce": 0.00469568558037281, + "loss_iou": 0.48046875, + "loss_num": 0.059326171875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 571840404, + "step": 8549 + }, + { + "epoch": 0.9702127659574468, + "grad_norm": 26.958948135375977, + "learning_rate": 5e-05, + "loss": 1.0539, + "num_input_tokens_seen": 571906588, + "step": 8550 + }, + { + "epoch": 0.9702127659574468, + "loss": 1.0964807271957397, + "loss_ce": 0.005172123201191425, + "loss_iou": 0.462890625, + "loss_num": 0.032958984375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 571906588, + "step": 8550 + }, + { + "epoch": 0.9703262411347517, + "grad_norm": 45.76063537597656, + "learning_rate": 5e-05, + "loss": 1.1699, + "num_input_tokens_seen": 571973372, + "step": 8551 + }, + { + "epoch": 0.9703262411347517, + "loss": 1.0807852745056152, + "loss_ce": 0.006078123580664396, + "loss_iou": 0.43359375, + "loss_num": 0.041748046875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 571973372, + "step": 8551 + }, + { + "epoch": 0.9704397163120567, + "grad_norm": 36.07223129272461, + "learning_rate": 5e-05, + "loss": 1.4549, + "num_input_tokens_seen": 572040696, + "step": 8552 + }, + { + "epoch": 0.9704397163120567, + "loss": 1.7271302938461304, + "loss_ce": 0.00838030781596899, + "loss_iou": 0.64453125, + "loss_num": 0.08544921875, + "loss_xval": 1.71875, + "num_input_tokens_seen": 572040696, + "step": 8552 + }, + { + "epoch": 0.9705531914893617, + "grad_norm": 16.14217185974121, + "learning_rate": 5e-05, + "loss": 1.1583, + "num_input_tokens_seen": 572107460, + "step": 8553 + }, + { + "epoch": 0.9705531914893617, + "loss": 1.169623613357544, + "loss_ce": 0.009467234835028648, + "loss_iou": 0.4765625, + "loss_num": 0.041259765625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 572107460, + "step": 8553 + }, + { + "epoch": 0.9706666666666667, + "grad_norm": 23.207918167114258, + "learning_rate": 5e-05, + "loss": 1.0869, + "num_input_tokens_seen": 572174000, + "step": 8554 + }, + { + "epoch": 0.9706666666666667, + "loss": 1.134469747543335, + "loss_ce": 0.01239947322756052, + "loss_iou": 0.43359375, + "loss_num": 0.05126953125, + "loss_xval": 1.125, + "num_input_tokens_seen": 572174000, + "step": 8554 + }, + { + "epoch": 0.9707801418439717, + "grad_norm": 40.495357513427734, + "learning_rate": 5e-05, + "loss": 1.3422, + "num_input_tokens_seen": 572241440, + "step": 8555 + }, + { + "epoch": 0.9707801418439717, + "loss": 1.3314762115478516, + "loss_ce": 0.005792642943561077, + "loss_iou": 0.490234375, + "loss_num": 0.06884765625, + "loss_xval": 1.328125, + "num_input_tokens_seen": 572241440, + "step": 8555 + }, + { + "epoch": 0.9708936170212766, + "grad_norm": 35.551090240478516, + "learning_rate": 5e-05, + "loss": 1.369, + "num_input_tokens_seen": 572309056, + "step": 8556 + }, + { + "epoch": 0.9708936170212766, + "loss": 1.1618802547454834, + "loss_ce": 0.0041654352098703384, + "loss_iou": 0.4921875, + "loss_num": 0.03515625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 572309056, + "step": 8556 + }, + { + "epoch": 0.9710070921985816, + "grad_norm": 13.202245712280273, + "learning_rate": 5e-05, + "loss": 1.1319, + "num_input_tokens_seen": 572376696, + "step": 8557 + }, + { + "epoch": 0.9710070921985816, + "loss": 1.0853204727172852, + "loss_ce": 0.0032892338931560516, + "loss_iou": 0.44921875, + "loss_num": 0.037109375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 572376696, + "step": 8557 + }, + { + "epoch": 0.9711205673758865, + "grad_norm": 25.596424102783203, + "learning_rate": 5e-05, + "loss": 0.9997, + "num_input_tokens_seen": 572443268, + "step": 8558 + }, + { + "epoch": 0.9711205673758865, + "loss": 1.080031394958496, + "loss_ce": 0.011183653958141804, + "loss_iou": 0.447265625, + "loss_num": 0.034423828125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 572443268, + "step": 8558 + }, + { + "epoch": 0.9712340425531915, + "grad_norm": 29.019332885742188, + "learning_rate": 5e-05, + "loss": 1.1211, + "num_input_tokens_seen": 572509992, + "step": 8559 + }, + { + "epoch": 0.9712340425531915, + "loss": 1.3159468173980713, + "loss_ce": 0.011259302496910095, + "loss_iou": 0.4375, + "loss_num": 0.08544921875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 572509992, + "step": 8559 + }, + { + "epoch": 0.9713475177304964, + "grad_norm": 31.62079620361328, + "learning_rate": 5e-05, + "loss": 1.2351, + "num_input_tokens_seen": 572577504, + "step": 8560 + }, + { + "epoch": 0.9713475177304964, + "loss": 1.2467963695526123, + "loss_ce": 0.007538527715951204, + "loss_iou": 0.5078125, + "loss_num": 0.044677734375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 572577504, + "step": 8560 + }, + { + "epoch": 0.9714609929078014, + "grad_norm": 37.82951736450195, + "learning_rate": 5e-05, + "loss": 1.1061, + "num_input_tokens_seen": 572645028, + "step": 8561 + }, + { + "epoch": 0.9714609929078014, + "loss": 1.0618280172348022, + "loss_ce": 0.004699109587818384, + "loss_iou": 0.447265625, + "loss_num": 0.03271484375, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 572645028, + "step": 8561 + }, + { + "epoch": 0.9715744680851064, + "grad_norm": 24.334705352783203, + "learning_rate": 5e-05, + "loss": 1.2533, + "num_input_tokens_seen": 572711880, + "step": 8562 + }, + { + "epoch": 0.9715744680851064, + "loss": 1.2609827518463135, + "loss_ce": 0.006099913734942675, + "loss_iou": 0.46484375, + "loss_num": 0.0654296875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 572711880, + "step": 8562 + }, + { + "epoch": 0.9716879432624114, + "grad_norm": 17.7482967376709, + "learning_rate": 5e-05, + "loss": 0.9702, + "num_input_tokens_seen": 572778952, + "step": 8563 + }, + { + "epoch": 0.9716879432624114, + "loss": 0.9592428207397461, + "loss_ce": 0.0041646999306976795, + "loss_iou": 0.396484375, + "loss_num": 0.0322265625, + "loss_xval": 0.953125, + "num_input_tokens_seen": 572778952, + "step": 8563 + }, + { + "epoch": 0.9718014184397163, + "grad_norm": 24.957815170288086, + "learning_rate": 5e-05, + "loss": 1.0864, + "num_input_tokens_seen": 572845280, + "step": 8564 + }, + { + "epoch": 0.9718014184397163, + "loss": 1.119059681892395, + "loss_ce": 0.004801923409104347, + "loss_iou": 0.447265625, + "loss_num": 0.044189453125, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 572845280, + "step": 8564 + }, + { + "epoch": 0.9719148936170213, + "grad_norm": 26.546152114868164, + "learning_rate": 5e-05, + "loss": 1.1479, + "num_input_tokens_seen": 572913032, + "step": 8565 + }, + { + "epoch": 0.9719148936170213, + "loss": 1.1920552253723145, + "loss_ce": 0.00309033808298409, + "loss_iou": 0.484375, + "loss_num": 0.043701171875, + "loss_xval": 1.1875, + "num_input_tokens_seen": 572913032, + "step": 8565 + }, + { + "epoch": 0.9720283687943262, + "grad_norm": 28.623815536499023, + "learning_rate": 5e-05, + "loss": 1.0189, + "num_input_tokens_seen": 572980188, + "step": 8566 + }, + { + "epoch": 0.9720283687943262, + "loss": 0.9754780530929565, + "loss_ce": 0.008681152015924454, + "loss_iou": 0.40625, + "loss_num": 0.0311279296875, + "loss_xval": 0.96875, + "num_input_tokens_seen": 572980188, + "step": 8566 + }, + { + "epoch": 0.9721418439716312, + "grad_norm": 25.185409545898438, + "learning_rate": 5e-05, + "loss": 1.1154, + "num_input_tokens_seen": 573046420, + "step": 8567 + }, + { + "epoch": 0.9721418439716312, + "loss": 0.9688594341278076, + "loss_ce": 0.002611893229186535, + "loss_iou": 0.384765625, + "loss_num": 0.039306640625, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 573046420, + "step": 8567 + }, + { + "epoch": 0.9722553191489361, + "grad_norm": 29.41708755493164, + "learning_rate": 5e-05, + "loss": 1.0535, + "num_input_tokens_seen": 573113648, + "step": 8568 + }, + { + "epoch": 0.9722553191489361, + "loss": 0.9834406971931458, + "loss_ce": 0.004131613299250603, + "loss_iou": 0.39453125, + "loss_num": 0.038330078125, + "loss_xval": 0.98046875, + "num_input_tokens_seen": 573113648, + "step": 8568 + }, + { + "epoch": 0.9723687943262411, + "grad_norm": 29.451160430908203, + "learning_rate": 5e-05, + "loss": 1.1327, + "num_input_tokens_seen": 573180176, + "step": 8569 + }, + { + "epoch": 0.9723687943262411, + "loss": 1.07769775390625, + "loss_ce": 0.004943829961121082, + "loss_iou": 0.484375, + "loss_num": 0.0206298828125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 573180176, + "step": 8569 + }, + { + "epoch": 0.9724822695035461, + "grad_norm": 32.562416076660156, + "learning_rate": 5e-05, + "loss": 0.89, + "num_input_tokens_seen": 573246316, + "step": 8570 + }, + { + "epoch": 0.9724822695035461, + "loss": 0.9551855325698853, + "loss_ce": 0.0025488673709332943, + "loss_iou": 0.404296875, + "loss_num": 0.0286865234375, + "loss_xval": 0.953125, + "num_input_tokens_seen": 573246316, + "step": 8570 + }, + { + "epoch": 0.9725957446808511, + "grad_norm": 27.91400718688965, + "learning_rate": 5e-05, + "loss": 1.263, + "num_input_tokens_seen": 573312852, + "step": 8571 + }, + { + "epoch": 0.9725957446808511, + "loss": 1.2660404443740845, + "loss_ce": 0.008227979764342308, + "loss_iou": 0.4453125, + "loss_num": 0.0732421875, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 573312852, + "step": 8571 + }, + { + "epoch": 0.9727092198581561, + "grad_norm": 18.32602882385254, + "learning_rate": 5e-05, + "loss": 1.0079, + "num_input_tokens_seen": 573378564, + "step": 8572 + }, + { + "epoch": 0.9727092198581561, + "loss": 0.7661265134811401, + "loss_ce": 0.005384288262575865, + "loss_iou": 0.3125, + "loss_num": 0.0272216796875, + "loss_xval": 0.76171875, + "num_input_tokens_seen": 573378564, + "step": 8572 + }, + { + "epoch": 0.972822695035461, + "grad_norm": 28.658355712890625, + "learning_rate": 5e-05, + "loss": 1.28, + "num_input_tokens_seen": 573445644, + "step": 8573 + }, + { + "epoch": 0.972822695035461, + "loss": 1.3415420055389404, + "loss_ce": 0.004872138611972332, + "loss_iou": 0.53515625, + "loss_num": 0.05322265625, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 573445644, + "step": 8573 + }, + { + "epoch": 0.9729361702127659, + "grad_norm": 34.929283142089844, + "learning_rate": 5e-05, + "loss": 1.1049, + "num_input_tokens_seen": 573512320, + "step": 8574 + }, + { + "epoch": 0.9729361702127659, + "loss": 1.0815362930297852, + "loss_ce": 0.0048760962672531605, + "loss_iou": 0.447265625, + "loss_num": 0.036376953125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 573512320, + "step": 8574 + }, + { + "epoch": 0.9730496453900709, + "grad_norm": 28.899154663085938, + "learning_rate": 5e-05, + "loss": 1.1447, + "num_input_tokens_seen": 573579744, + "step": 8575 + }, + { + "epoch": 0.9730496453900709, + "loss": 1.1667330265045166, + "loss_ce": 0.007553335279226303, + "loss_iou": 0.466796875, + "loss_num": 0.045654296875, + "loss_xval": 1.15625, + "num_input_tokens_seen": 573579744, + "step": 8575 + }, + { + "epoch": 0.9731631205673759, + "grad_norm": 16.654712677001953, + "learning_rate": 5e-05, + "loss": 1.1407, + "num_input_tokens_seen": 573646792, + "step": 8576 + }, + { + "epoch": 0.9731631205673759, + "loss": 1.3201735019683838, + "loss_ce": 0.007185171823948622, + "loss_iou": 0.515625, + "loss_num": 0.056884765625, + "loss_xval": 1.3125, + "num_input_tokens_seen": 573646792, + "step": 8576 + }, + { + "epoch": 0.9732765957446808, + "grad_norm": 17.839557647705078, + "learning_rate": 5e-05, + "loss": 1.0032, + "num_input_tokens_seen": 573713952, + "step": 8577 + }, + { + "epoch": 0.9732765957446808, + "loss": 1.0464695692062378, + "loss_ce": 0.00740708876401186, + "loss_iou": 0.4140625, + "loss_num": 0.042236328125, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 573713952, + "step": 8577 + }, + { + "epoch": 0.9733900709219858, + "grad_norm": 22.779123306274414, + "learning_rate": 5e-05, + "loss": 1.4421, + "num_input_tokens_seen": 573780464, + "step": 8578 + }, + { + "epoch": 0.9733900709219858, + "loss": 1.5711909532546997, + "loss_ce": 0.012108927592635155, + "loss_iou": 0.5703125, + "loss_num": 0.0830078125, + "loss_xval": 1.5625, + "num_input_tokens_seen": 573780464, + "step": 8578 + }, + { + "epoch": 0.9735035460992908, + "grad_norm": 23.82673454284668, + "learning_rate": 5e-05, + "loss": 0.9753, + "num_input_tokens_seen": 573846640, + "step": 8579 + }, + { + "epoch": 0.9735035460992908, + "loss": 0.9404515027999878, + "loss_ce": 0.008810860104858875, + "loss_iou": 0.39453125, + "loss_num": 0.02880859375, + "loss_xval": 0.9296875, + "num_input_tokens_seen": 573846640, + "step": 8579 + }, + { + "epoch": 0.9736170212765958, + "grad_norm": 18.674976348876953, + "learning_rate": 5e-05, + "loss": 1.1792, + "num_input_tokens_seen": 573913796, + "step": 8580 + }, + { + "epoch": 0.9736170212765958, + "loss": 1.0841376781463623, + "loss_ce": 0.006500902120023966, + "loss_iou": 0.4140625, + "loss_num": 0.05029296875, + "loss_xval": 1.078125, + "num_input_tokens_seen": 573913796, + "step": 8580 + }, + { + "epoch": 0.9737304964539008, + "grad_norm": 31.981090545654297, + "learning_rate": 5e-05, + "loss": 0.9488, + "num_input_tokens_seen": 573980268, + "step": 8581 + }, + { + "epoch": 0.9737304964539008, + "loss": 0.8983425498008728, + "loss_ce": 0.007961705327033997, + "loss_iou": 0.392578125, + "loss_num": 0.0208740234375, + "loss_xval": 0.890625, + "num_input_tokens_seen": 573980268, + "step": 8581 + }, + { + "epoch": 0.9738439716312057, + "grad_norm": 43.14170837402344, + "learning_rate": 5e-05, + "loss": 1.3563, + "num_input_tokens_seen": 574046724, + "step": 8582 + }, + { + "epoch": 0.9738439716312057, + "loss": 1.249158263206482, + "loss_ce": 0.0064824530854821205, + "loss_iou": 0.53515625, + "loss_num": 0.0341796875, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 574046724, + "step": 8582 + }, + { + "epoch": 0.9739574468085106, + "grad_norm": 31.773550033569336, + "learning_rate": 5e-05, + "loss": 1.4375, + "num_input_tokens_seen": 574113516, + "step": 8583 + }, + { + "epoch": 0.9739574468085106, + "loss": 1.4292595386505127, + "loss_ce": 0.0075981467962265015, + "loss_iou": 0.578125, + "loss_num": 0.052978515625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 574113516, + "step": 8583 + }, + { + "epoch": 0.9740709219858156, + "grad_norm": 21.04728126525879, + "learning_rate": 5e-05, + "loss": 1.1547, + "num_input_tokens_seen": 574181080, + "step": 8584 + }, + { + "epoch": 0.9740709219858156, + "loss": 1.0333287715911865, + "loss_ce": 0.00598508445546031, + "loss_iou": 0.431640625, + "loss_num": 0.032958984375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 574181080, + "step": 8584 + }, + { + "epoch": 0.9741843971631206, + "grad_norm": 60.0704345703125, + "learning_rate": 5e-05, + "loss": 1.0783, + "num_input_tokens_seen": 574248132, + "step": 8585 + }, + { + "epoch": 0.9741843971631206, + "loss": 1.054626226425171, + "loss_ce": 0.006286488845944405, + "loss_iou": 0.44140625, + "loss_num": 0.032958984375, + "loss_xval": 1.046875, + "num_input_tokens_seen": 574248132, + "step": 8585 + }, + { + "epoch": 0.9742978723404255, + "grad_norm": 28.96151351928711, + "learning_rate": 5e-05, + "loss": 1.2145, + "num_input_tokens_seen": 574314920, + "step": 8586 + }, + { + "epoch": 0.9742978723404255, + "loss": 1.1329138278961182, + "loss_ce": 0.0059606521390378475, + "loss_iou": 0.470703125, + "loss_num": 0.037353515625, + "loss_xval": 1.125, + "num_input_tokens_seen": 574314920, + "step": 8586 + }, + { + "epoch": 0.9744113475177305, + "grad_norm": 29.458219528198242, + "learning_rate": 5e-05, + "loss": 0.9231, + "num_input_tokens_seen": 574380288, + "step": 8587 + }, + { + "epoch": 0.9744113475177305, + "loss": 1.0400640964508057, + "loss_ce": 0.01088930293917656, + "loss_iou": 0.412109375, + "loss_num": 0.041259765625, + "loss_xval": 1.03125, + "num_input_tokens_seen": 574380288, + "step": 8587 + }, + { + "epoch": 0.9745248226950355, + "grad_norm": 30.70442008972168, + "learning_rate": 5e-05, + "loss": 1.1851, + "num_input_tokens_seen": 574445328, + "step": 8588 + }, + { + "epoch": 0.9745248226950355, + "loss": 1.2099835872650146, + "loss_ce": 0.005393829196691513, + "loss_iou": 0.49609375, + "loss_num": 0.04296875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 574445328, + "step": 8588 + }, + { + "epoch": 0.9746382978723405, + "grad_norm": 30.915136337280273, + "learning_rate": 5e-05, + "loss": 0.8876, + "num_input_tokens_seen": 574511616, + "step": 8589 + }, + { + "epoch": 0.9746382978723405, + "loss": 0.8206194043159485, + "loss_ce": 0.006166296079754829, + "loss_iou": 0.330078125, + "loss_num": 0.0306396484375, + "loss_xval": 0.8125, + "num_input_tokens_seen": 574511616, + "step": 8589 + }, + { + "epoch": 0.9747517730496454, + "grad_norm": 27.74287223815918, + "learning_rate": 5e-05, + "loss": 1.1894, + "num_input_tokens_seen": 574577176, + "step": 8590 + }, + { + "epoch": 0.9747517730496454, + "loss": 1.2166788578033447, + "loss_ce": 0.00525312777608633, + "loss_iou": 0.53515625, + "loss_num": 0.0281982421875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 574577176, + "step": 8590 + }, + { + "epoch": 0.9748652482269503, + "grad_norm": 27.996868133544922, + "learning_rate": 5e-05, + "loss": 1.0747, + "num_input_tokens_seen": 574643064, + "step": 8591 + }, + { + "epoch": 0.9748652482269503, + "loss": 1.1193476915359497, + "loss_ce": 0.005334004759788513, + "loss_iou": 0.451171875, + "loss_num": 0.04248046875, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 574643064, + "step": 8591 + }, + { + "epoch": 0.9749787234042553, + "grad_norm": 26.67415428161621, + "learning_rate": 5e-05, + "loss": 1.1633, + "num_input_tokens_seen": 574709496, + "step": 8592 + }, + { + "epoch": 0.9749787234042553, + "loss": 1.2639657258987427, + "loss_ce": 0.0056648654863238335, + "loss_iou": 0.54296875, + "loss_num": 0.034912109375, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 574709496, + "step": 8592 + }, + { + "epoch": 0.9750921985815603, + "grad_norm": 38.1026611328125, + "learning_rate": 5e-05, + "loss": 1.0383, + "num_input_tokens_seen": 574775336, + "step": 8593 + }, + { + "epoch": 0.9750921985815603, + "loss": 1.0666414499282837, + "loss_ce": 0.006338663399219513, + "loss_iou": 0.44140625, + "loss_num": 0.03515625, + "loss_xval": 1.0625, + "num_input_tokens_seen": 574775336, + "step": 8593 + }, + { + "epoch": 0.9752056737588652, + "grad_norm": 29.9263858795166, + "learning_rate": 5e-05, + "loss": 1.1312, + "num_input_tokens_seen": 574842376, + "step": 8594 + }, + { + "epoch": 0.9752056737588652, + "loss": 1.1109037399291992, + "loss_ce": 0.006899922154843807, + "loss_iou": 0.453125, + "loss_num": 0.039794921875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 574842376, + "step": 8594 + }, + { + "epoch": 0.9753191489361702, + "grad_norm": 31.29172706604004, + "learning_rate": 5e-05, + "loss": 1.1677, + "num_input_tokens_seen": 574908876, + "step": 8595 + }, + { + "epoch": 0.9753191489361702, + "loss": 1.164139747619629, + "loss_ce": 0.010819359682500362, + "loss_iou": 0.484375, + "loss_num": 0.037109375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 574908876, + "step": 8595 + }, + { + "epoch": 0.9754326241134752, + "grad_norm": 29.509395599365234, + "learning_rate": 5e-05, + "loss": 1.3435, + "num_input_tokens_seen": 574975636, + "step": 8596 + }, + { + "epoch": 0.9754326241134752, + "loss": 1.2063579559326172, + "loss_ce": 0.007139177061617374, + "loss_iou": 0.49609375, + "loss_num": 0.041748046875, + "loss_xval": 1.203125, + "num_input_tokens_seen": 574975636, + "step": 8596 + }, + { + "epoch": 0.9755460992907802, + "grad_norm": 33.33230209350586, + "learning_rate": 5e-05, + "loss": 1.019, + "num_input_tokens_seen": 575042092, + "step": 8597 + }, + { + "epoch": 0.9755460992907802, + "loss": 1.1081045866012573, + "loss_ce": 0.00507726427167654, + "loss_iou": 0.46484375, + "loss_num": 0.034423828125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 575042092, + "step": 8597 + }, + { + "epoch": 0.9756595744680852, + "grad_norm": 373.83453369140625, + "learning_rate": 5e-05, + "loss": 1.1008, + "num_input_tokens_seen": 575108660, + "step": 8598 + }, + { + "epoch": 0.9756595744680852, + "loss": 1.1326959133148193, + "loss_ce": 0.005986879579722881, + "loss_iou": 0.462890625, + "loss_num": 0.0400390625, + "loss_xval": 1.125, + "num_input_tokens_seen": 575108660, + "step": 8598 + }, + { + "epoch": 0.97577304964539, + "grad_norm": 23.184669494628906, + "learning_rate": 5e-05, + "loss": 1.0949, + "num_input_tokens_seen": 575175640, + "step": 8599 + }, + { + "epoch": 0.97577304964539, + "loss": 1.1041202545166016, + "loss_ce": 0.008905486203730106, + "loss_iou": 0.470703125, + "loss_num": 0.0306396484375, + "loss_xval": 1.09375, + "num_input_tokens_seen": 575175640, + "step": 8599 + }, + { + "epoch": 0.975886524822695, + "grad_norm": 30.308252334594727, + "learning_rate": 5e-05, + "loss": 1.0922, + "num_input_tokens_seen": 575242204, + "step": 8600 + }, + { + "epoch": 0.975886524822695, + "loss": 1.1103041172027588, + "loss_ce": 0.012159590609371662, + "loss_iou": 0.43359375, + "loss_num": 0.046142578125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 575242204, + "step": 8600 + }, + { + "epoch": 0.976, + "grad_norm": 48.53287887573242, + "learning_rate": 5e-05, + "loss": 1.0579, + "num_input_tokens_seen": 575309928, + "step": 8601 + }, + { + "epoch": 0.976, + "loss": 1.0377384424209595, + "loss_ce": 0.006488430313766003, + "loss_iou": 0.453125, + "loss_num": 0.025146484375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 575309928, + "step": 8601 + }, + { + "epoch": 0.976113475177305, + "grad_norm": 31.855745315551758, + "learning_rate": 5e-05, + "loss": 1.1421, + "num_input_tokens_seen": 575376896, + "step": 8602 + }, + { + "epoch": 0.976113475177305, + "loss": 1.1200730800628662, + "loss_ce": 0.007524138782173395, + "loss_iou": 0.4765625, + "loss_num": 0.0322265625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 575376896, + "step": 8602 + }, + { + "epoch": 0.9762269503546099, + "grad_norm": 16.50316047668457, + "learning_rate": 5e-05, + "loss": 1.1387, + "num_input_tokens_seen": 575443400, + "step": 8603 + }, + { + "epoch": 0.9762269503546099, + "loss": 0.9468929767608643, + "loss_ce": 0.005974974948912859, + "loss_iou": 0.3984375, + "loss_num": 0.02880859375, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 575443400, + "step": 8603 + }, + { + "epoch": 0.9763404255319149, + "grad_norm": 22.228599548339844, + "learning_rate": 5e-05, + "loss": 1.2796, + "num_input_tokens_seen": 575510120, + "step": 8604 + }, + { + "epoch": 0.9763404255319149, + "loss": 1.4998509883880615, + "loss_ce": 0.009616520255804062, + "loss_iou": 0.546875, + "loss_num": 0.07861328125, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 575510120, + "step": 8604 + }, + { + "epoch": 0.9764539007092199, + "grad_norm": 101.14181518554688, + "learning_rate": 5e-05, + "loss": 1.1853, + "num_input_tokens_seen": 575577956, + "step": 8605 + }, + { + "epoch": 0.9764539007092199, + "loss": 1.1724820137023926, + "loss_ce": 0.005489822942763567, + "loss_iou": 0.486328125, + "loss_num": 0.03857421875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 575577956, + "step": 8605 + }, + { + "epoch": 0.9765673758865249, + "grad_norm": 27.095924377441406, + "learning_rate": 5e-05, + "loss": 1.222, + "num_input_tokens_seen": 575644212, + "step": 8606 + }, + { + "epoch": 0.9765673758865249, + "loss": 1.1095833778381348, + "loss_ce": 0.0060677677392959595, + "loss_iou": 0.4375, + "loss_num": 0.045654296875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 575644212, + "step": 8606 + }, + { + "epoch": 0.9766808510638297, + "grad_norm": 33.305137634277344, + "learning_rate": 5e-05, + "loss": 1.1786, + "num_input_tokens_seen": 575711468, + "step": 8607 + }, + { + "epoch": 0.9766808510638297, + "loss": 1.219251275062561, + "loss_ce": 0.004895761609077454, + "loss_iou": 0.47265625, + "loss_num": 0.053466796875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 575711468, + "step": 8607 + }, + { + "epoch": 0.9767943262411347, + "grad_norm": 25.711652755737305, + "learning_rate": 5e-05, + "loss": 1.2098, + "num_input_tokens_seen": 575778212, + "step": 8608 + }, + { + "epoch": 0.9767943262411347, + "loss": 1.3282102346420288, + "loss_ce": 0.007409379817545414, + "loss_iou": 0.55859375, + "loss_num": 0.041259765625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 575778212, + "step": 8608 + }, + { + "epoch": 0.9769078014184397, + "grad_norm": 22.865039825439453, + "learning_rate": 5e-05, + "loss": 1.2281, + "num_input_tokens_seen": 575845220, + "step": 8609 + }, + { + "epoch": 0.9769078014184397, + "loss": 1.3260924816131592, + "loss_ce": 0.008709716610610485, + "loss_iou": 0.50390625, + "loss_num": 0.06201171875, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 575845220, + "step": 8609 + }, + { + "epoch": 0.9770212765957447, + "grad_norm": 24.116806030273438, + "learning_rate": 5e-05, + "loss": 1.0571, + "num_input_tokens_seen": 575911772, + "step": 8610 + }, + { + "epoch": 0.9770212765957447, + "loss": 1.1460647583007812, + "loss_ce": 0.006050038151443005, + "loss_iou": 0.44921875, + "loss_num": 0.048095703125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 575911772, + "step": 8610 + }, + { + "epoch": 0.9771347517730496, + "grad_norm": 34.85610580444336, + "learning_rate": 5e-05, + "loss": 1.0545, + "num_input_tokens_seen": 575978428, + "step": 8611 + }, + { + "epoch": 0.9771347517730496, + "loss": 1.013623595237732, + "loss_ce": 0.0062994444742798805, + "loss_iou": 0.43359375, + "loss_num": 0.028076171875, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 575978428, + "step": 8611 + }, + { + "epoch": 0.9772482269503546, + "grad_norm": 31.856884002685547, + "learning_rate": 5e-05, + "loss": 1.3238, + "num_input_tokens_seen": 576045836, + "step": 8612 + }, + { + "epoch": 0.9772482269503546, + "loss": 1.2270541191101074, + "loss_ce": 0.006839183159172535, + "loss_iou": 0.498046875, + "loss_num": 0.04443359375, + "loss_xval": 1.21875, + "num_input_tokens_seen": 576045836, + "step": 8612 + }, + { + "epoch": 0.9773617021276596, + "grad_norm": 41.45634078979492, + "learning_rate": 5e-05, + "loss": 1.1152, + "num_input_tokens_seen": 576113528, + "step": 8613 + }, + { + "epoch": 0.9773617021276596, + "loss": 1.1818829774856567, + "loss_ce": 0.008543111383914948, + "loss_iou": 0.45703125, + "loss_num": 0.052001953125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 576113528, + "step": 8613 + }, + { + "epoch": 0.9774751773049646, + "grad_norm": 24.195228576660156, + "learning_rate": 5e-05, + "loss": 1.3654, + "num_input_tokens_seen": 576180200, + "step": 8614 + }, + { + "epoch": 0.9774751773049646, + "loss": 1.5903575420379639, + "loss_ce": 0.009302876889705658, + "loss_iou": 0.61328125, + "loss_num": 0.0712890625, + "loss_xval": 1.578125, + "num_input_tokens_seen": 576180200, + "step": 8614 + }, + { + "epoch": 0.9775886524822694, + "grad_norm": 20.434175491333008, + "learning_rate": 5e-05, + "loss": 1.1535, + "num_input_tokens_seen": 576247536, + "step": 8615 + }, + { + "epoch": 0.9775886524822694, + "loss": 1.1572800874710083, + "loss_ce": 0.005424587056040764, + "loss_iou": 0.46484375, + "loss_num": 0.04443359375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 576247536, + "step": 8615 + }, + { + "epoch": 0.9777021276595744, + "grad_norm": 33.14513397216797, + "learning_rate": 5e-05, + "loss": 1.319, + "num_input_tokens_seen": 576314212, + "step": 8616 + }, + { + "epoch": 0.9777021276595744, + "loss": 1.4328880310058594, + "loss_ce": 0.006130224093794823, + "loss_iou": 0.5390625, + "loss_num": 0.0703125, + "loss_xval": 1.4296875, + "num_input_tokens_seen": 576314212, + "step": 8616 + }, + { + "epoch": 0.9778156028368794, + "grad_norm": 20.676666259765625, + "learning_rate": 5e-05, + "loss": 0.9129, + "num_input_tokens_seen": 576381344, + "step": 8617 + }, + { + "epoch": 0.9778156028368794, + "loss": 0.7075753211975098, + "loss_ce": 0.006678072270005941, + "loss_iou": 0.296875, + "loss_num": 0.0218505859375, + "loss_xval": 0.69921875, + "num_input_tokens_seen": 576381344, + "step": 8617 + }, + { + "epoch": 0.9779290780141844, + "grad_norm": 141.63037109375, + "learning_rate": 5e-05, + "loss": 1.1699, + "num_input_tokens_seen": 576446028, + "step": 8618 + }, + { + "epoch": 0.9779290780141844, + "loss": 1.2987160682678223, + "loss_ce": 0.00907367654144764, + "loss_iou": 0.455078125, + "loss_num": 0.076171875, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 576446028, + "step": 8618 + }, + { + "epoch": 0.9780425531914894, + "grad_norm": 27.979814529418945, + "learning_rate": 5e-05, + "loss": 1.2156, + "num_input_tokens_seen": 576513536, + "step": 8619 + }, + { + "epoch": 0.9780425531914894, + "loss": 1.2165019512176514, + "loss_ce": 0.004587856121361256, + "loss_iou": 0.5, + "loss_num": 0.0419921875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 576513536, + "step": 8619 + }, + { + "epoch": 0.9781560283687943, + "grad_norm": 37.311092376708984, + "learning_rate": 5e-05, + "loss": 1.1911, + "num_input_tokens_seen": 576581176, + "step": 8620 + }, + { + "epoch": 0.9781560283687943, + "loss": 1.2949552536010742, + "loss_ce": 0.011263888329267502, + "loss_iou": 0.5546875, + "loss_num": 0.03466796875, + "loss_xval": 1.28125, + "num_input_tokens_seen": 576581176, + "step": 8620 + }, + { + "epoch": 0.9782695035460993, + "grad_norm": 34.91053771972656, + "learning_rate": 5e-05, + "loss": 1.4595, + "num_input_tokens_seen": 576648724, + "step": 8621 + }, + { + "epoch": 0.9782695035460993, + "loss": 1.4706286191940308, + "loss_ce": 0.005784825887531042, + "loss_iou": 0.6328125, + "loss_num": 0.03955078125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 576648724, + "step": 8621 + }, + { + "epoch": 0.9783829787234043, + "grad_norm": 22.549123764038086, + "learning_rate": 5e-05, + "loss": 1.1538, + "num_input_tokens_seen": 576715780, + "step": 8622 + }, + { + "epoch": 0.9783829787234043, + "loss": 1.3097909688949585, + "loss_ce": 0.004615205340087414, + "loss_iou": 0.484375, + "loss_num": 0.06640625, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 576715780, + "step": 8622 + }, + { + "epoch": 0.9784964539007093, + "grad_norm": 37.656944274902344, + "learning_rate": 5e-05, + "loss": 1.1338, + "num_input_tokens_seen": 576782344, + "step": 8623 + }, + { + "epoch": 0.9784964539007093, + "loss": 1.261322259902954, + "loss_ce": 0.005462884437292814, + "loss_iou": 0.5078125, + "loss_num": 0.04736328125, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 576782344, + "step": 8623 + }, + { + "epoch": 0.9786099290780141, + "grad_norm": 52.67655944824219, + "learning_rate": 5e-05, + "loss": 1.0382, + "num_input_tokens_seen": 576848304, + "step": 8624 + }, + { + "epoch": 0.9786099290780141, + "loss": 1.00998055934906, + "loss_ce": 0.008027490228414536, + "loss_iou": 0.44921875, + "loss_num": 0.020751953125, + "loss_xval": 1.0, + "num_input_tokens_seen": 576848304, + "step": 8624 + }, + { + "epoch": 0.9787234042553191, + "grad_norm": 26.1253662109375, + "learning_rate": 5e-05, + "loss": 1.09, + "num_input_tokens_seen": 576913876, + "step": 8625 + }, + { + "epoch": 0.9787234042553191, + "loss": 1.1071772575378418, + "loss_ce": 0.010497553274035454, + "loss_iou": 0.408203125, + "loss_num": 0.0556640625, + "loss_xval": 1.09375, + "num_input_tokens_seen": 576913876, + "step": 8625 + }, + { + "epoch": 0.9788368794326241, + "grad_norm": 10.934087753295898, + "learning_rate": 5e-05, + "loss": 0.9481, + "num_input_tokens_seen": 576980708, + "step": 8626 + }, + { + "epoch": 0.9788368794326241, + "loss": 1.0060734748840332, + "loss_ce": 0.004913817159831524, + "loss_iou": 0.41015625, + "loss_num": 0.0361328125, + "loss_xval": 1.0, + "num_input_tokens_seen": 576980708, + "step": 8626 + }, + { + "epoch": 0.9789503546099291, + "grad_norm": 20.82501220703125, + "learning_rate": 5e-05, + "loss": 1.1936, + "num_input_tokens_seen": 577047132, + "step": 8627 + }, + { + "epoch": 0.9789503546099291, + "loss": 1.337235689163208, + "loss_ce": 0.008622344583272934, + "loss_iou": 0.4921875, + "loss_num": 0.0693359375, + "loss_xval": 1.328125, + "num_input_tokens_seen": 577047132, + "step": 8627 + }, + { + "epoch": 0.979063829787234, + "grad_norm": 30.911273956298828, + "learning_rate": 5e-05, + "loss": 1.0249, + "num_input_tokens_seen": 577113428, + "step": 8628 + }, + { + "epoch": 0.979063829787234, + "loss": 0.8771306276321411, + "loss_ce": 0.004083775915205479, + "loss_iou": 0.33984375, + "loss_num": 0.03857421875, + "loss_xval": 0.875, + "num_input_tokens_seen": 577113428, + "step": 8628 + }, + { + "epoch": 0.979177304964539, + "grad_norm": 138.38607788085938, + "learning_rate": 5e-05, + "loss": 1.2787, + "num_input_tokens_seen": 577180152, + "step": 8629 + }, + { + "epoch": 0.979177304964539, + "loss": 1.1100952625274658, + "loss_ce": 0.010485908016562462, + "loss_iou": 0.421875, + "loss_num": 0.051513671875, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 577180152, + "step": 8629 + }, + { + "epoch": 0.979290780141844, + "grad_norm": 39.336273193359375, + "learning_rate": 5e-05, + "loss": 1.3269, + "num_input_tokens_seen": 577247896, + "step": 8630 + }, + { + "epoch": 0.979290780141844, + "loss": 1.3617744445800781, + "loss_ce": 0.00532904639840126, + "loss_iou": 0.53125, + "loss_num": 0.05908203125, + "loss_xval": 1.359375, + "num_input_tokens_seen": 577247896, + "step": 8630 + }, + { + "epoch": 0.979404255319149, + "grad_norm": 35.139686584472656, + "learning_rate": 5e-05, + "loss": 0.9885, + "num_input_tokens_seen": 577314860, + "step": 8631 + }, + { + "epoch": 0.979404255319149, + "loss": 0.9130556583404541, + "loss_ce": 0.006317422725260258, + "loss_iou": 0.37109375, + "loss_num": 0.032958984375, + "loss_xval": 0.90625, + "num_input_tokens_seen": 577314860, + "step": 8631 + }, + { + "epoch": 0.9795177304964539, + "grad_norm": 27.108484268188477, + "learning_rate": 5e-05, + "loss": 0.9853, + "num_input_tokens_seen": 577381496, + "step": 8632 + }, + { + "epoch": 0.9795177304964539, + "loss": 0.9196357131004333, + "loss_ce": 0.005573259200900793, + "loss_iou": 0.376953125, + "loss_num": 0.031982421875, + "loss_xval": 0.9140625, + "num_input_tokens_seen": 577381496, + "step": 8632 + }, + { + "epoch": 0.9796312056737588, + "grad_norm": 38.372596740722656, + "learning_rate": 5e-05, + "loss": 0.896, + "num_input_tokens_seen": 577448412, + "step": 8633 + }, + { + "epoch": 0.9796312056737588, + "loss": 0.8485106229782104, + "loss_ce": 0.006347556598484516, + "loss_iou": 0.33203125, + "loss_num": 0.03564453125, + "loss_xval": 0.84375, + "num_input_tokens_seen": 577448412, + "step": 8633 + }, + { + "epoch": 0.9797446808510638, + "grad_norm": 56.6425895690918, + "learning_rate": 5e-05, + "loss": 1.0983, + "num_input_tokens_seen": 577515504, + "step": 8634 + }, + { + "epoch": 0.9797446808510638, + "loss": 1.0824294090270996, + "loss_ce": 0.0043042972683906555, + "loss_iou": 0.435546875, + "loss_num": 0.041259765625, + "loss_xval": 1.078125, + "num_input_tokens_seen": 577515504, + "step": 8634 + }, + { + "epoch": 0.9798581560283688, + "grad_norm": 19.893632888793945, + "learning_rate": 5e-05, + "loss": 1.0555, + "num_input_tokens_seen": 577582836, + "step": 8635 + }, + { + "epoch": 0.9798581560283688, + "loss": 1.057999849319458, + "loss_ce": 0.0038006757386028767, + "loss_iou": 0.392578125, + "loss_num": 0.053466796875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 577582836, + "step": 8635 + }, + { + "epoch": 0.9799716312056738, + "grad_norm": 20.914674758911133, + "learning_rate": 5e-05, + "loss": 1.1362, + "num_input_tokens_seen": 577649556, + "step": 8636 + }, + { + "epoch": 0.9799716312056738, + "loss": 1.1755205392837524, + "loss_ce": 0.005110422149300575, + "loss_iou": 0.478515625, + "loss_num": 0.04248046875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 577649556, + "step": 8636 + }, + { + "epoch": 0.9800851063829787, + "grad_norm": 25.250595092773438, + "learning_rate": 5e-05, + "loss": 1.1666, + "num_input_tokens_seen": 577716936, + "step": 8637 + }, + { + "epoch": 0.9800851063829787, + "loss": 1.2618017196655273, + "loss_ce": 0.0054539889097213745, + "loss_iou": 0.490234375, + "loss_num": 0.054931640625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 577716936, + "step": 8637 + }, + { + "epoch": 0.9801985815602837, + "grad_norm": 22.373519897460938, + "learning_rate": 5e-05, + "loss": 1.1915, + "num_input_tokens_seen": 577783368, + "step": 8638 + }, + { + "epoch": 0.9801985815602837, + "loss": 1.1294031143188477, + "loss_ce": 0.007821033708751202, + "loss_iou": 0.466796875, + "loss_num": 0.0380859375, + "loss_xval": 1.125, + "num_input_tokens_seen": 577783368, + "step": 8638 + }, + { + "epoch": 0.9803120567375887, + "grad_norm": 20.44088363647461, + "learning_rate": 5e-05, + "loss": 1.2137, + "num_input_tokens_seen": 577850884, + "step": 8639 + }, + { + "epoch": 0.9803120567375887, + "loss": 1.2184785604476929, + "loss_ce": 0.00851767510175705, + "loss_iou": 0.51171875, + "loss_num": 0.037109375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 577850884, + "step": 8639 + }, + { + "epoch": 0.9804255319148936, + "grad_norm": 22.952552795410156, + "learning_rate": 5e-05, + "loss": 1.2854, + "num_input_tokens_seen": 577917508, + "step": 8640 + }, + { + "epoch": 0.9804255319148936, + "loss": 1.1424095630645752, + "loss_ce": 0.010573690757155418, + "loss_iou": 0.462890625, + "loss_num": 0.041259765625, + "loss_xval": 1.1328125, + "num_input_tokens_seen": 577917508, + "step": 8640 + }, + { + "epoch": 0.9805390070921985, + "grad_norm": 20.960771560668945, + "learning_rate": 5e-05, + "loss": 1.2753, + "num_input_tokens_seen": 577984960, + "step": 8641 + }, + { + "epoch": 0.9805390070921985, + "loss": 1.2728461027145386, + "loss_ce": 0.006732896901667118, + "loss_iou": 0.5078125, + "loss_num": 0.04931640625, + "loss_xval": 1.265625, + "num_input_tokens_seen": 577984960, + "step": 8641 + }, + { + "epoch": 0.9806524822695035, + "grad_norm": 25.304960250854492, + "learning_rate": 5e-05, + "loss": 1.1887, + "num_input_tokens_seen": 578052584, + "step": 8642 + }, + { + "epoch": 0.9806524822695035, + "loss": 1.2873307466506958, + "loss_ce": 0.0055924286134541035, + "loss_iou": 0.5390625, + "loss_num": 0.040283203125, + "loss_xval": 1.28125, + "num_input_tokens_seen": 578052584, + "step": 8642 + }, + { + "epoch": 0.9807659574468085, + "grad_norm": 32.40913009643555, + "learning_rate": 5e-05, + "loss": 0.9549, + "num_input_tokens_seen": 578118332, + "step": 8643 + }, + { + "epoch": 0.9807659574468085, + "loss": 0.7261736989021301, + "loss_ce": 0.007240619510412216, + "loss_iou": 0.287109375, + "loss_num": 0.0291748046875, + "loss_xval": 0.71875, + "num_input_tokens_seen": 578118332, + "step": 8643 + }, + { + "epoch": 0.9808794326241135, + "grad_norm": 141.16001892089844, + "learning_rate": 5e-05, + "loss": 1.1457, + "num_input_tokens_seen": 578186152, + "step": 8644 + }, + { + "epoch": 0.9808794326241135, + "loss": 1.1484531164169312, + "loss_ce": 0.0029452904127538204, + "loss_iou": 0.49609375, + "loss_num": 0.030517578125, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 578186152, + "step": 8644 + }, + { + "epoch": 0.9809929078014185, + "grad_norm": 111.46839904785156, + "learning_rate": 5e-05, + "loss": 1.2031, + "num_input_tokens_seen": 578253032, + "step": 8645 + }, + { + "epoch": 0.9809929078014185, + "loss": 1.2087148427963257, + "loss_ce": 0.008031254634261131, + "loss_iou": 0.478515625, + "loss_num": 0.049072265625, + "loss_xval": 1.203125, + "num_input_tokens_seen": 578253032, + "step": 8645 + }, + { + "epoch": 0.9811063829787234, + "grad_norm": 29.204687118530273, + "learning_rate": 5e-05, + "loss": 1.327, + "num_input_tokens_seen": 578319720, + "step": 8646 + }, + { + "epoch": 0.9811063829787234, + "loss": 1.2478930950164795, + "loss_ce": 0.0047290632501244545, + "loss_iou": 0.5390625, + "loss_num": 0.03271484375, + "loss_xval": 1.2421875, + "num_input_tokens_seen": 578319720, + "step": 8646 + }, + { + "epoch": 0.9812198581560284, + "grad_norm": 21.459604263305664, + "learning_rate": 5e-05, + "loss": 1.0213, + "num_input_tokens_seen": 578385652, + "step": 8647 + }, + { + "epoch": 0.9812198581560284, + "loss": 0.7540826797485352, + "loss_ce": 0.010064119473099709, + "loss_iou": 0.298828125, + "loss_num": 0.029296875, + "loss_xval": 0.7421875, + "num_input_tokens_seen": 578385652, + "step": 8647 + }, + { + "epoch": 0.9813333333333333, + "grad_norm": 25.822126388549805, + "learning_rate": 5e-05, + "loss": 1.0734, + "num_input_tokens_seen": 578451784, + "step": 8648 + }, + { + "epoch": 0.9813333333333333, + "loss": 1.1259865760803223, + "loss_ce": 0.005381077527999878, + "loss_iou": 0.408203125, + "loss_num": 0.060302734375, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 578451784, + "step": 8648 + }, + { + "epoch": 0.9814468085106383, + "grad_norm": 32.19424057006836, + "learning_rate": 5e-05, + "loss": 1.1267, + "num_input_tokens_seen": 578517960, + "step": 8649 + }, + { + "epoch": 0.9814468085106383, + "loss": 1.1409040689468384, + "loss_ce": 0.012974372133612633, + "loss_iou": 0.373046875, + "loss_num": 0.076171875, + "loss_xval": 1.125, + "num_input_tokens_seen": 578517960, + "step": 8649 + }, + { + "epoch": 0.9815602836879432, + "grad_norm": 35.30428695678711, + "learning_rate": 5e-05, + "loss": 1.0459, + "num_input_tokens_seen": 578583468, + "step": 8650 + }, + { + "epoch": 0.9815602836879432, + "loss": 1.1673805713653564, + "loss_ce": 0.011130666360259056, + "loss_iou": 0.4375, + "loss_num": 0.05615234375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 578583468, + "step": 8650 + }, + { + "epoch": 0.9816737588652482, + "grad_norm": 25.378707885742188, + "learning_rate": 5e-05, + "loss": 0.9846, + "num_input_tokens_seen": 578649852, + "step": 8651 + }, + { + "epoch": 0.9816737588652482, + "loss": 0.7920421361923218, + "loss_ce": 0.00481067132204771, + "loss_iou": 0.318359375, + "loss_num": 0.030029296875, + "loss_xval": 0.7890625, + "num_input_tokens_seen": 578649852, + "step": 8651 + }, + { + "epoch": 0.9817872340425532, + "grad_norm": 30.1999454498291, + "learning_rate": 5e-05, + "loss": 1.0829, + "num_input_tokens_seen": 578716256, + "step": 8652 + }, + { + "epoch": 0.9817872340425532, + "loss": 1.2203946113586426, + "loss_ce": 0.007015644572675228, + "loss_iou": 0.4765625, + "loss_num": 0.051513671875, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 578716256, + "step": 8652 + }, + { + "epoch": 0.9819007092198582, + "grad_norm": 44.873775482177734, + "learning_rate": 5e-05, + "loss": 0.9871, + "num_input_tokens_seen": 578782772, + "step": 8653 + }, + { + "epoch": 0.9819007092198582, + "loss": 1.1550137996673584, + "loss_ce": 0.007736039347946644, + "loss_iou": 0.443359375, + "loss_num": 0.05224609375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 578782772, + "step": 8653 + }, + { + "epoch": 0.9820141843971631, + "grad_norm": 35.71527862548828, + "learning_rate": 5e-05, + "loss": 1.2858, + "num_input_tokens_seen": 578849792, + "step": 8654 + }, + { + "epoch": 0.9820141843971631, + "loss": 1.3331518173217773, + "loss_ce": 0.0060032750479876995, + "loss_iou": 0.486328125, + "loss_num": 0.07080078125, + "loss_xval": 1.328125, + "num_input_tokens_seen": 578849792, + "step": 8654 + }, + { + "epoch": 0.9821276595744681, + "grad_norm": 36.50852584838867, + "learning_rate": 5e-05, + "loss": 1.0639, + "num_input_tokens_seen": 578916208, + "step": 8655 + }, + { + "epoch": 0.9821276595744681, + "loss": 0.8387337923049927, + "loss_ce": 0.0049935635179281235, + "loss_iou": 0.375, + "loss_num": 0.016845703125, + "loss_xval": 0.83203125, + "num_input_tokens_seen": 578916208, + "step": 8655 + }, + { + "epoch": 0.9822411347517731, + "grad_norm": 29.213403701782227, + "learning_rate": 5e-05, + "loss": 1.3755, + "num_input_tokens_seen": 578982504, + "step": 8656 + }, + { + "epoch": 0.9822411347517731, + "loss": 1.351106882095337, + "loss_ce": 0.011751442216336727, + "loss_iou": 0.55078125, + "loss_num": 0.04736328125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 578982504, + "step": 8656 + }, + { + "epoch": 0.982354609929078, + "grad_norm": 18.604875564575195, + "learning_rate": 5e-05, + "loss": 1.1744, + "num_input_tokens_seen": 579048944, + "step": 8657 + }, + { + "epoch": 0.982354609929078, + "loss": 1.0570400953292847, + "loss_ce": 0.0052823349833488464, + "loss_iou": 0.439453125, + "loss_num": 0.034423828125, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 579048944, + "step": 8657 + }, + { + "epoch": 0.982468085106383, + "grad_norm": 19.720508575439453, + "learning_rate": 5e-05, + "loss": 1.345, + "num_input_tokens_seen": 579116912, + "step": 8658 + }, + { + "epoch": 0.982468085106383, + "loss": 1.5611611604690552, + "loss_ce": 0.010379936546087265, + "loss_iou": 0.57421875, + "loss_num": 0.08056640625, + "loss_xval": 1.546875, + "num_input_tokens_seen": 579116912, + "step": 8658 + }, + { + "epoch": 0.9825815602836879, + "grad_norm": 36.928916931152344, + "learning_rate": 5e-05, + "loss": 0.9252, + "num_input_tokens_seen": 579183400, + "step": 8659 + }, + { + "epoch": 0.9825815602836879, + "loss": 0.8714860677719116, + "loss_ce": 0.0047868345864117146, + "loss_iou": 0.34375, + "loss_num": 0.035888671875, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 579183400, + "step": 8659 + }, + { + "epoch": 0.9826950354609929, + "grad_norm": 17.715787887573242, + "learning_rate": 5e-05, + "loss": 1.0566, + "num_input_tokens_seen": 579249344, + "step": 8660 + }, + { + "epoch": 0.9826950354609929, + "loss": 1.0897231101989746, + "loss_ce": 0.0042739189229905605, + "loss_iou": 0.435546875, + "loss_num": 0.042724609375, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 579249344, + "step": 8660 + }, + { + "epoch": 0.9828085106382979, + "grad_norm": 13.546467781066895, + "learning_rate": 5e-05, + "loss": 1.1352, + "num_input_tokens_seen": 579315908, + "step": 8661 + }, + { + "epoch": 0.9828085106382979, + "loss": 0.9790879487991333, + "loss_ce": 0.005699201952666044, + "loss_iou": 0.41015625, + "loss_num": 0.0303955078125, + "loss_xval": 0.97265625, + "num_input_tokens_seen": 579315908, + "step": 8661 + }, + { + "epoch": 0.9829219858156029, + "grad_norm": 19.25365447998047, + "learning_rate": 5e-05, + "loss": 1.0898, + "num_input_tokens_seen": 579383016, + "step": 8662 + }, + { + "epoch": 0.9829219858156029, + "loss": 1.1789183616638184, + "loss_ce": 0.004113656003028154, + "loss_iou": 0.490234375, + "loss_num": 0.038818359375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 579383016, + "step": 8662 + }, + { + "epoch": 0.9830354609929078, + "grad_norm": 19.915218353271484, + "learning_rate": 5e-05, + "loss": 0.9795, + "num_input_tokens_seen": 579449048, + "step": 8663 + }, + { + "epoch": 0.9830354609929078, + "loss": 0.8699144721031189, + "loss_ce": 0.003947693854570389, + "loss_iou": 0.357421875, + "loss_num": 0.030517578125, + "loss_xval": 0.8671875, + "num_input_tokens_seen": 579449048, + "step": 8663 + }, + { + "epoch": 0.9831489361702128, + "grad_norm": 17.385286331176758, + "learning_rate": 5e-05, + "loss": 1.075, + "num_input_tokens_seen": 579516524, + "step": 8664 + }, + { + "epoch": 0.9831489361702128, + "loss": 1.1457277536392212, + "loss_ce": 0.005102744325995445, + "loss_iou": 0.4375, + "loss_num": 0.053466796875, + "loss_xval": 1.140625, + "num_input_tokens_seen": 579516524, + "step": 8664 + }, + { + "epoch": 0.9832624113475177, + "grad_norm": 16.56662368774414, + "learning_rate": 5e-05, + "loss": 1.0081, + "num_input_tokens_seen": 579581728, + "step": 8665 + }, + { + "epoch": 0.9832624113475177, + "loss": 0.9260087013244629, + "loss_ce": 0.006331002339720726, + "loss_iou": 0.330078125, + "loss_num": 0.05224609375, + "loss_xval": 0.91796875, + "num_input_tokens_seen": 579581728, + "step": 8665 + }, + { + "epoch": 0.9833758865248227, + "grad_norm": 63.463253021240234, + "learning_rate": 5e-05, + "loss": 1.1309, + "num_input_tokens_seen": 579648508, + "step": 8666 + }, + { + "epoch": 0.9833758865248227, + "loss": 1.2680913209915161, + "loss_ce": 0.00637257844209671, + "loss_iou": 0.51171875, + "loss_num": 0.048583984375, + "loss_xval": 1.265625, + "num_input_tokens_seen": 579648508, + "step": 8666 + }, + { + "epoch": 0.9834893617021276, + "grad_norm": 26.593244552612305, + "learning_rate": 5e-05, + "loss": 0.9327, + "num_input_tokens_seen": 579715240, + "step": 8667 + }, + { + "epoch": 0.9834893617021276, + "loss": 0.8651859164237976, + "loss_ce": 0.003125398885458708, + "loss_iou": 0.35546875, + "loss_num": 0.030029296875, + "loss_xval": 0.86328125, + "num_input_tokens_seen": 579715240, + "step": 8667 + }, + { + "epoch": 0.9836028368794326, + "grad_norm": 61.95829772949219, + "learning_rate": 5e-05, + "loss": 1.2584, + "num_input_tokens_seen": 579781936, + "step": 8668 + }, + { + "epoch": 0.9836028368794326, + "loss": 1.2040975093841553, + "loss_ce": 0.00585534144192934, + "loss_iou": 0.4921875, + "loss_num": 0.042724609375, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 579781936, + "step": 8668 + }, + { + "epoch": 0.9837163120567376, + "grad_norm": 42.9205322265625, + "learning_rate": 5e-05, + "loss": 1.3185, + "num_input_tokens_seen": 579848816, + "step": 8669 + }, + { + "epoch": 0.9837163120567376, + "loss": 1.3060686588287354, + "loss_ce": 0.0038225760217756033, + "loss_iou": 0.54296875, + "loss_num": 0.043701171875, + "loss_xval": 1.3046875, + "num_input_tokens_seen": 579848816, + "step": 8669 + }, + { + "epoch": 0.9838297872340426, + "grad_norm": 35.053524017333984, + "learning_rate": 5e-05, + "loss": 1.2752, + "num_input_tokens_seen": 579916384, + "step": 8670 + }, + { + "epoch": 0.9838297872340426, + "loss": 1.1688158512115479, + "loss_ce": 0.006218187510967255, + "loss_iou": 0.5078125, + "loss_num": 0.030029296875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 579916384, + "step": 8670 + }, + { + "epoch": 0.9839432624113476, + "grad_norm": 17.026830673217773, + "learning_rate": 5e-05, + "loss": 1.0299, + "num_input_tokens_seen": 579981908, + "step": 8671 + }, + { + "epoch": 0.9839432624113476, + "loss": 1.0162006616592407, + "loss_ce": 0.006435008719563484, + "loss_iou": 0.376953125, + "loss_num": 0.05078125, + "loss_xval": 1.0078125, + "num_input_tokens_seen": 579981908, + "step": 8671 + }, + { + "epoch": 0.9840567375886525, + "grad_norm": 24.283245086669922, + "learning_rate": 5e-05, + "loss": 1.1, + "num_input_tokens_seen": 580048792, + "step": 8672 + }, + { + "epoch": 0.9840567375886525, + "loss": 1.0943853855133057, + "loss_ce": 0.0033209112007170916, + "loss_iou": 0.455078125, + "loss_num": 0.0361328125, + "loss_xval": 1.09375, + "num_input_tokens_seen": 580048792, + "step": 8672 + }, + { + "epoch": 0.9841702127659574, + "grad_norm": 30.241127014160156, + "learning_rate": 5e-05, + "loss": 1.148, + "num_input_tokens_seen": 580115084, + "step": 8673 + }, + { + "epoch": 0.9841702127659574, + "loss": 1.1637920141220093, + "loss_ce": 0.009495127946138382, + "loss_iou": 0.4765625, + "loss_num": 0.040283203125, + "loss_xval": 1.15625, + "num_input_tokens_seen": 580115084, + "step": 8673 + }, + { + "epoch": 0.9842836879432624, + "grad_norm": 31.401464462280273, + "learning_rate": 5e-05, + "loss": 1.1096, + "num_input_tokens_seen": 580181952, + "step": 8674 + }, + { + "epoch": 0.9842836879432624, + "loss": 1.2949856519699097, + "loss_ce": 0.004458323121070862, + "loss_iou": 0.546875, + "loss_num": 0.038818359375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 580181952, + "step": 8674 + }, + { + "epoch": 0.9843971631205674, + "grad_norm": 58.3362922668457, + "learning_rate": 5e-05, + "loss": 1.1133, + "num_input_tokens_seen": 580248468, + "step": 8675 + }, + { + "epoch": 0.9843971631205674, + "loss": 1.3209048509597778, + "loss_ce": 0.007916541770100594, + "loss_iou": 0.51953125, + "loss_num": 0.054443359375, + "loss_xval": 1.3125, + "num_input_tokens_seen": 580248468, + "step": 8675 + }, + { + "epoch": 0.9845106382978723, + "grad_norm": 23.853830337524414, + "learning_rate": 5e-05, + "loss": 1.1912, + "num_input_tokens_seen": 580316084, + "step": 8676 + }, + { + "epoch": 0.9845106382978723, + "loss": 1.2583637237548828, + "loss_ce": 0.008607877418398857, + "loss_iou": 0.5234375, + "loss_num": 0.0400390625, + "loss_xval": 1.25, + "num_input_tokens_seen": 580316084, + "step": 8676 + }, + { + "epoch": 0.9846241134751773, + "grad_norm": 15.394085884094238, + "learning_rate": 5e-05, + "loss": 1.1418, + "num_input_tokens_seen": 580382544, + "step": 8677 + }, + { + "epoch": 0.9846241134751773, + "loss": 1.209394931793213, + "loss_ce": 0.009687809273600578, + "loss_iou": 0.46484375, + "loss_num": 0.053955078125, + "loss_xval": 1.203125, + "num_input_tokens_seen": 580382544, + "step": 8677 + }, + { + "epoch": 0.9847375886524823, + "grad_norm": 18.95863914489746, + "learning_rate": 5e-05, + "loss": 1.0051, + "num_input_tokens_seen": 580449080, + "step": 8678 + }, + { + "epoch": 0.9847375886524823, + "loss": 1.0704598426818848, + "loss_ce": 0.006495076231658459, + "loss_iou": 0.427734375, + "loss_num": 0.0419921875, + "loss_xval": 1.0625, + "num_input_tokens_seen": 580449080, + "step": 8678 + }, + { + "epoch": 0.9848510638297873, + "grad_norm": 32.18822479248047, + "learning_rate": 5e-05, + "loss": 0.9495, + "num_input_tokens_seen": 580515292, + "step": 8679 + }, + { + "epoch": 0.9848510638297873, + "loss": 1.0238971710205078, + "loss_ce": 0.00705146137624979, + "loss_iou": 0.400390625, + "loss_num": 0.04345703125, + "loss_xval": 1.015625, + "num_input_tokens_seen": 580515292, + "step": 8679 + }, + { + "epoch": 0.9849645390070922, + "grad_norm": 36.8477783203125, + "learning_rate": 5e-05, + "loss": 1.3836, + "num_input_tokens_seen": 580580908, + "step": 8680 + }, + { + "epoch": 0.9849645390070922, + "loss": 1.4235033988952637, + "loss_ce": 0.005534638185054064, + "loss_iou": 0.54296875, + "loss_num": 0.06640625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 580580908, + "step": 8680 + }, + { + "epoch": 0.9850780141843971, + "grad_norm": 37.493770599365234, + "learning_rate": 5e-05, + "loss": 0.9232, + "num_input_tokens_seen": 580647960, + "step": 8681 + }, + { + "epoch": 0.9850780141843971, + "loss": 0.8820132613182068, + "loss_ce": 0.009820906445384026, + "loss_iou": 0.373046875, + "loss_num": 0.0252685546875, + "loss_xval": 0.87109375, + "num_input_tokens_seen": 580647960, + "step": 8681 + }, + { + "epoch": 0.9851914893617021, + "grad_norm": 36.10581588745117, + "learning_rate": 5e-05, + "loss": 1.1718, + "num_input_tokens_seen": 580715364, + "step": 8682 + }, + { + "epoch": 0.9851914893617021, + "loss": 1.3453075885772705, + "loss_ce": 0.006440367549657822, + "loss_iou": 0.546875, + "loss_num": 0.048828125, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 580715364, + "step": 8682 + }, + { + "epoch": 0.9853049645390071, + "grad_norm": 20.84719467163086, + "learning_rate": 5e-05, + "loss": 1.1276, + "num_input_tokens_seen": 580782940, + "step": 8683 + }, + { + "epoch": 0.9853049645390071, + "loss": 1.082522988319397, + "loss_ce": 0.002933182055130601, + "loss_iou": 0.439453125, + "loss_num": 0.040283203125, + "loss_xval": 1.078125, + "num_input_tokens_seen": 580782940, + "step": 8683 + }, + { + "epoch": 0.985418439716312, + "grad_norm": 28.547269821166992, + "learning_rate": 5e-05, + "loss": 1.1573, + "num_input_tokens_seen": 580849472, + "step": 8684 + }, + { + "epoch": 0.985418439716312, + "loss": 1.1540335416793823, + "loss_ce": 0.012431997805833817, + "loss_iou": 0.490234375, + "loss_num": 0.03173828125, + "loss_xval": 1.140625, + "num_input_tokens_seen": 580849472, + "step": 8684 + }, + { + "epoch": 0.985531914893617, + "grad_norm": 37.634700775146484, + "learning_rate": 5e-05, + "loss": 1.3452, + "num_input_tokens_seen": 580917352, + "step": 8685 + }, + { + "epoch": 0.985531914893617, + "loss": 1.2157294750213623, + "loss_ce": 0.0035713070537894964, + "loss_iou": 0.48046875, + "loss_num": 0.050537109375, + "loss_xval": 1.2109375, + "num_input_tokens_seen": 580917352, + "step": 8685 + }, + { + "epoch": 0.985645390070922, + "grad_norm": 29.016164779663086, + "learning_rate": 5e-05, + "loss": 1.5736, + "num_input_tokens_seen": 580984856, + "step": 8686 + }, + { + "epoch": 0.985645390070922, + "loss": 1.4923441410064697, + "loss_ce": 0.005039369687438011, + "loss_iou": 0.640625, + "loss_num": 0.04150390625, + "loss_xval": 1.484375, + "num_input_tokens_seen": 580984856, + "step": 8686 + }, + { + "epoch": 0.985758865248227, + "grad_norm": 27.29472541809082, + "learning_rate": 5e-05, + "loss": 1.258, + "num_input_tokens_seen": 581051232, + "step": 8687 + }, + { + "epoch": 0.985758865248227, + "loss": 1.3866691589355469, + "loss_ce": 0.009227786213159561, + "loss_iou": 0.52734375, + "loss_num": 0.06494140625, + "loss_xval": 1.375, + "num_input_tokens_seen": 581051232, + "step": 8687 + }, + { + "epoch": 0.985872340425532, + "grad_norm": 27.354469299316406, + "learning_rate": 5e-05, + "loss": 1.0814, + "num_input_tokens_seen": 581118000, + "step": 8688 + }, + { + "epoch": 0.985872340425532, + "loss": 0.9039252996444702, + "loss_ce": 0.004755406174808741, + "loss_iou": 0.369140625, + "loss_num": 0.0322265625, + "loss_xval": 0.8984375, + "num_input_tokens_seen": 581118000, + "step": 8688 + }, + { + "epoch": 0.9859858156028369, + "grad_norm": 46.46039962768555, + "learning_rate": 5e-05, + "loss": 1.1979, + "num_input_tokens_seen": 581184564, + "step": 8689 + }, + { + "epoch": 0.9859858156028369, + "loss": 1.3415324687957764, + "loss_ce": 0.004618345759809017, + "loss_iou": 0.5390625, + "loss_num": 0.05224609375, + "loss_xval": 1.3359375, + "num_input_tokens_seen": 581184564, + "step": 8689 + }, + { + "epoch": 0.9860992907801418, + "grad_norm": 26.697160720825195, + "learning_rate": 5e-05, + "loss": 1.1895, + "num_input_tokens_seen": 581251752, + "step": 8690 + }, + { + "epoch": 0.9860992907801418, + "loss": 1.123379111289978, + "loss_ce": 0.008388843387365341, + "loss_iou": 0.4609375, + "loss_num": 0.0390625, + "loss_xval": 1.1171875, + "num_input_tokens_seen": 581251752, + "step": 8690 + }, + { + "epoch": 0.9862127659574468, + "grad_norm": 11.02236557006836, + "learning_rate": 5e-05, + "loss": 1.0976, + "num_input_tokens_seen": 581318328, + "step": 8691 + }, + { + "epoch": 0.9862127659574468, + "loss": 1.0425448417663574, + "loss_ce": 0.009830012917518616, + "loss_iou": 0.416015625, + "loss_num": 0.040771484375, + "loss_xval": 1.03125, + "num_input_tokens_seen": 581318328, + "step": 8691 + }, + { + "epoch": 0.9863262411347518, + "grad_norm": 33.20391082763672, + "learning_rate": 5e-05, + "loss": 1.1161, + "num_input_tokens_seen": 581385052, + "step": 8692 + }, + { + "epoch": 0.9863262411347518, + "loss": 1.21958589553833, + "loss_ce": 0.0047421641647815704, + "loss_iou": 0.48046875, + "loss_num": 0.051025390625, + "loss_xval": 1.21875, + "num_input_tokens_seen": 581385052, + "step": 8692 + }, + { + "epoch": 0.9864397163120567, + "grad_norm": 15.615937232971191, + "learning_rate": 5e-05, + "loss": 1.2434, + "num_input_tokens_seen": 581453332, + "step": 8693 + }, + { + "epoch": 0.9864397163120567, + "loss": 1.2543408870697021, + "loss_ce": 0.006294041872024536, + "loss_iou": 0.47265625, + "loss_num": 0.061279296875, + "loss_xval": 1.25, + "num_input_tokens_seen": 581453332, + "step": 8693 + }, + { + "epoch": 0.9865531914893617, + "grad_norm": 23.925668716430664, + "learning_rate": 5e-05, + "loss": 1.1494, + "num_input_tokens_seen": 581519824, + "step": 8694 + }, + { + "epoch": 0.9865531914893617, + "loss": 1.0603634119033813, + "loss_ce": 0.005187593400478363, + "loss_iou": 0.392578125, + "loss_num": 0.05419921875, + "loss_xval": 1.0546875, + "num_input_tokens_seen": 581519824, + "step": 8694 + }, + { + "epoch": 0.9866666666666667, + "grad_norm": 30.580442428588867, + "learning_rate": 5e-05, + "loss": 1.1541, + "num_input_tokens_seen": 581586628, + "step": 8695 + }, + { + "epoch": 0.9866666666666667, + "loss": 1.4705288410186768, + "loss_ce": 0.004708532709628344, + "loss_iou": 0.56640625, + "loss_num": 0.06689453125, + "loss_xval": 1.46875, + "num_input_tokens_seen": 581586628, + "step": 8695 + }, + { + "epoch": 0.9867801418439717, + "grad_norm": 41.35539245605469, + "learning_rate": 5e-05, + "loss": 0.9205, + "num_input_tokens_seen": 581653344, + "step": 8696 + }, + { + "epoch": 0.9867801418439717, + "loss": 0.9185405373573303, + "loss_ce": 0.008628401905298233, + "loss_iou": 0.38671875, + "loss_num": 0.027587890625, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 581653344, + "step": 8696 + }, + { + "epoch": 0.9868936170212766, + "grad_norm": 23.17797088623047, + "learning_rate": 5e-05, + "loss": 1.1407, + "num_input_tokens_seen": 581719232, + "step": 8697 + }, + { + "epoch": 0.9868936170212766, + "loss": 1.3273985385894775, + "loss_ce": 0.006109504960477352, + "loss_iou": 0.54296875, + "loss_num": 0.0478515625, + "loss_xval": 1.3203125, + "num_input_tokens_seen": 581719232, + "step": 8697 + }, + { + "epoch": 0.9870070921985815, + "grad_norm": 44.99117660522461, + "learning_rate": 5e-05, + "loss": 1.2095, + "num_input_tokens_seen": 581786264, + "step": 8698 + }, + { + "epoch": 0.9870070921985815, + "loss": 1.2237623929977417, + "loss_ce": 0.004524115473031998, + "loss_iou": 0.4921875, + "loss_num": 0.046875, + "loss_xval": 1.21875, + "num_input_tokens_seen": 581786264, + "step": 8698 + }, + { + "epoch": 0.9871205673758865, + "grad_norm": 37.15337371826172, + "learning_rate": 5e-05, + "loss": 1.3871, + "num_input_tokens_seen": 581852908, + "step": 8699 + }, + { + "epoch": 0.9871205673758865, + "loss": 1.5049020051956177, + "loss_ce": 0.012226244434714317, + "loss_iou": 0.58203125, + "loss_num": 0.06591796875, + "loss_xval": 1.4921875, + "num_input_tokens_seen": 581852908, + "step": 8699 + }, + { + "epoch": 0.9872340425531915, + "grad_norm": 26.925762176513672, + "learning_rate": 5e-05, + "loss": 1.2301, + "num_input_tokens_seen": 581920100, + "step": 8700 + }, + { + "epoch": 0.9872340425531915, + "loss": 1.20426607131958, + "loss_ce": 0.003582495031878352, + "loss_iou": 0.48828125, + "loss_num": 0.044677734375, + "loss_xval": 1.203125, + "num_input_tokens_seen": 581920100, + "step": 8700 + }, + { + "epoch": 0.9873475177304964, + "grad_norm": 36.51307678222656, + "learning_rate": 5e-05, + "loss": 1.1831, + "num_input_tokens_seen": 581987792, + "step": 8701 + }, + { + "epoch": 0.9873475177304964, + "loss": 1.127558708190918, + "loss_ce": 0.005976760759949684, + "loss_iou": 0.490234375, + "loss_num": 0.0279541015625, + "loss_xval": 1.125, + "num_input_tokens_seen": 581987792, + "step": 8701 + }, + { + "epoch": 0.9874609929078014, + "grad_norm": 47.803993225097656, + "learning_rate": 5e-05, + "loss": 1.0523, + "num_input_tokens_seen": 582055456, + "step": 8702 + }, + { + "epoch": 0.9874609929078014, + "loss": 1.1168845891952515, + "loss_ce": 0.007509537972509861, + "loss_iou": 0.466796875, + "loss_num": 0.035400390625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 582055456, + "step": 8702 + }, + { + "epoch": 0.9875744680851064, + "grad_norm": 29.559162139892578, + "learning_rate": 5e-05, + "loss": 1.3447, + "num_input_tokens_seen": 582123072, + "step": 8703 + }, + { + "epoch": 0.9875744680851064, + "loss": 1.3649286031723022, + "loss_ce": 0.007018466480076313, + "loss_iou": 0.5703125, + "loss_num": 0.042724609375, + "loss_xval": 1.359375, + "num_input_tokens_seen": 582123072, + "step": 8703 + }, + { + "epoch": 0.9876879432624114, + "grad_norm": 30.53843116760254, + "learning_rate": 5e-05, + "loss": 1.1089, + "num_input_tokens_seen": 582190496, + "step": 8704 + }, + { + "epoch": 0.9876879432624114, + "loss": 1.0237255096435547, + "loss_ce": 0.00517090130597353, + "loss_iou": 0.435546875, + "loss_num": 0.029541015625, + "loss_xval": 1.015625, + "num_input_tokens_seen": 582190496, + "step": 8704 + }, + { + "epoch": 0.9878014184397164, + "grad_norm": 55.8323860168457, + "learning_rate": 5e-05, + "loss": 1.1526, + "num_input_tokens_seen": 582256868, + "step": 8705 + }, + { + "epoch": 0.9878014184397164, + "loss": 1.1725236177444458, + "loss_ce": 0.00455494225025177, + "loss_iou": 0.50390625, + "loss_num": 0.032958984375, + "loss_xval": 1.171875, + "num_input_tokens_seen": 582256868, + "step": 8705 + }, + { + "epoch": 0.9879148936170212, + "grad_norm": 14.720269203186035, + "learning_rate": 5e-05, + "loss": 1.0239, + "num_input_tokens_seen": 582323760, + "step": 8706 + }, + { + "epoch": 0.9879148936170212, + "loss": 0.9705126285552979, + "loss_ce": 0.004448156338185072, + "loss_iou": 0.390625, + "loss_num": 0.036865234375, + "loss_xval": 0.96484375, + "num_input_tokens_seen": 582323760, + "step": 8706 + }, + { + "epoch": 0.9880283687943262, + "grad_norm": 13.718938827514648, + "learning_rate": 5e-05, + "loss": 1.0965, + "num_input_tokens_seen": 582390924, + "step": 8707 + }, + { + "epoch": 0.9880283687943262, + "loss": 1.0534672737121582, + "loss_ce": 0.0041509391739964485, + "loss_iou": 0.44140625, + "loss_num": 0.033447265625, + "loss_xval": 1.046875, + "num_input_tokens_seen": 582390924, + "step": 8707 + }, + { + "epoch": 0.9881418439716312, + "grad_norm": 22.558231353759766, + "learning_rate": 5e-05, + "loss": 0.9653, + "num_input_tokens_seen": 582456888, + "step": 8708 + }, + { + "epoch": 0.9881418439716312, + "loss": 0.8930970430374146, + "loss_ce": 0.008331464603543282, + "loss_iou": 0.3828125, + "loss_num": 0.0235595703125, + "loss_xval": 0.8828125, + "num_input_tokens_seen": 582456888, + "step": 8708 + }, + { + "epoch": 0.9882553191489362, + "grad_norm": 51.55116271972656, + "learning_rate": 5e-05, + "loss": 1.1262, + "num_input_tokens_seen": 582524312, + "step": 8709 + }, + { + "epoch": 0.9882553191489362, + "loss": 0.999828577041626, + "loss_ce": 0.0065729510970413685, + "loss_iou": 0.4140625, + "loss_num": 0.03271484375, + "loss_xval": 0.9921875, + "num_input_tokens_seen": 582524312, + "step": 8709 + }, + { + "epoch": 0.9883687943262411, + "grad_norm": 30.36842918395996, + "learning_rate": 5e-05, + "loss": 1.1082, + "num_input_tokens_seen": 582590892, + "step": 8710 + }, + { + "epoch": 0.9883687943262411, + "loss": 1.202558159828186, + "loss_ce": 0.004804241470992565, + "loss_iou": 0.494140625, + "loss_num": 0.0419921875, + "loss_xval": 1.1953125, + "num_input_tokens_seen": 582590892, + "step": 8710 + }, + { + "epoch": 0.9884822695035461, + "grad_norm": 28.095518112182617, + "learning_rate": 5e-05, + "loss": 1.1407, + "num_input_tokens_seen": 582658044, + "step": 8711 + }, + { + "epoch": 0.9884822695035461, + "loss": 1.0926189422607422, + "loss_ce": 0.007169776596128941, + "loss_iou": 0.4453125, + "loss_num": 0.039306640625, + "loss_xval": 1.0859375, + "num_input_tokens_seen": 582658044, + "step": 8711 + }, + { + "epoch": 0.9885957446808511, + "grad_norm": 36.52587890625, + "learning_rate": 5e-05, + "loss": 1.1561, + "num_input_tokens_seen": 582725536, + "step": 8712 + }, + { + "epoch": 0.9885957446808511, + "loss": 1.2975038290023804, + "loss_ce": 0.005999917630106211, + "loss_iou": 0.4921875, + "loss_num": 0.0615234375, + "loss_xval": 1.2890625, + "num_input_tokens_seen": 582725536, + "step": 8712 + }, + { + "epoch": 0.9887092198581561, + "grad_norm": 77.5499496459961, + "learning_rate": 5e-05, + "loss": 1.0336, + "num_input_tokens_seen": 582792132, + "step": 8713 + }, + { + "epoch": 0.9887092198581561, + "loss": 1.0277628898620605, + "loss_ce": 0.0065226247534155846, + "loss_iou": 0.408203125, + "loss_num": 0.040771484375, + "loss_xval": 1.0234375, + "num_input_tokens_seen": 582792132, + "step": 8713 + }, + { + "epoch": 0.9888226950354609, + "grad_norm": 78.6885986328125, + "learning_rate": 5e-05, + "loss": 1.2137, + "num_input_tokens_seen": 582859316, + "step": 8714 + }, + { + "epoch": 0.9888226950354609, + "loss": 1.2425988912582397, + "loss_ce": 0.007735682651400566, + "loss_iou": 0.478515625, + "loss_num": 0.0556640625, + "loss_xval": 1.234375, + "num_input_tokens_seen": 582859316, + "step": 8714 + }, + { + "epoch": 0.9889361702127659, + "grad_norm": 24.649202346801758, + "learning_rate": 5e-05, + "loss": 1.2165, + "num_input_tokens_seen": 582925592, + "step": 8715 + }, + { + "epoch": 0.9889361702127659, + "loss": 1.2617425918579102, + "loss_ce": 0.007592152804136276, + "loss_iou": 0.5, + "loss_num": 0.051025390625, + "loss_xval": 1.2578125, + "num_input_tokens_seen": 582925592, + "step": 8715 + }, + { + "epoch": 0.9890496453900709, + "grad_norm": 27.599184036254883, + "learning_rate": 5e-05, + "loss": 1.0213, + "num_input_tokens_seen": 582992144, + "step": 8716 + }, + { + "epoch": 0.9890496453900709, + "loss": 0.9405179023742676, + "loss_ce": 0.0032620555721223354, + "loss_iou": 0.3828125, + "loss_num": 0.0341796875, + "loss_xval": 0.9375, + "num_input_tokens_seen": 582992144, + "step": 8716 + }, + { + "epoch": 0.9891631205673759, + "grad_norm": 32.6936149597168, + "learning_rate": 5e-05, + "loss": 1.1613, + "num_input_tokens_seen": 583059044, + "step": 8717 + }, + { + "epoch": 0.9891631205673759, + "loss": 1.1796581745147705, + "loss_ce": 0.009248043410480022, + "loss_iou": 0.43359375, + "loss_num": 0.060546875, + "loss_xval": 1.171875, + "num_input_tokens_seen": 583059044, + "step": 8717 + }, + { + "epoch": 0.9892765957446809, + "grad_norm": 30.122589111328125, + "learning_rate": 5e-05, + "loss": 1.2489, + "num_input_tokens_seen": 583126404, + "step": 8718 + }, + { + "epoch": 0.9892765957446809, + "loss": 1.3337130546569824, + "loss_ce": 0.007052934728562832, + "loss_iou": 0.5390625, + "loss_num": 0.05029296875, + "loss_xval": 1.328125, + "num_input_tokens_seen": 583126404, + "step": 8718 + }, + { + "epoch": 0.9893900709219858, + "grad_norm": 22.603837966918945, + "learning_rate": 5e-05, + "loss": 1.0565, + "num_input_tokens_seen": 583193040, + "step": 8719 + }, + { + "epoch": 0.9893900709219858, + "loss": 1.132197618484497, + "loss_ce": 0.004756229929625988, + "loss_iou": 0.412109375, + "loss_num": 0.060791015625, + "loss_xval": 1.125, + "num_input_tokens_seen": 583193040, + "step": 8719 + }, + { + "epoch": 0.9895035460992908, + "grad_norm": 31.150772094726562, + "learning_rate": 5e-05, + "loss": 1.1056, + "num_input_tokens_seen": 583259776, + "step": 8720 + }, + { + "epoch": 0.9895035460992908, + "loss": 1.0725200176239014, + "loss_ce": 0.005625536199659109, + "loss_iou": 0.41015625, + "loss_num": 0.04931640625, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 583259776, + "step": 8720 + }, + { + "epoch": 0.9896170212765958, + "grad_norm": 34.14767074584961, + "learning_rate": 5e-05, + "loss": 1.0167, + "num_input_tokens_seen": 583327248, + "step": 8721 + }, + { + "epoch": 0.9896170212765958, + "loss": 1.0817947387695312, + "loss_ce": 0.009040847420692444, + "loss_iou": 0.435546875, + "loss_num": 0.040283203125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 583327248, + "step": 8721 + }, + { + "epoch": 0.9897304964539007, + "grad_norm": 31.270828247070312, + "learning_rate": 5e-05, + "loss": 1.1922, + "num_input_tokens_seen": 583394832, + "step": 8722 + }, + { + "epoch": 0.9897304964539007, + "loss": 1.2729637622833252, + "loss_ce": 0.004409132990986109, + "loss_iou": 0.5390625, + "loss_num": 0.037841796875, + "loss_xval": 1.265625, + "num_input_tokens_seen": 583394832, + "step": 8722 + }, + { + "epoch": 0.9898439716312056, + "grad_norm": 12.672893524169922, + "learning_rate": 5e-05, + "loss": 1.0085, + "num_input_tokens_seen": 583462080, + "step": 8723 + }, + { + "epoch": 0.9898439716312056, + "loss": 1.1039044857025146, + "loss_ce": 0.004783404525369406, + "loss_iou": 0.462890625, + "loss_num": 0.034423828125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 583462080, + "step": 8723 + }, + { + "epoch": 0.9899574468085106, + "grad_norm": 12.884184837341309, + "learning_rate": 5e-05, + "loss": 0.9793, + "num_input_tokens_seen": 583529588, + "step": 8724 + }, + { + "epoch": 0.9899574468085106, + "loss": 0.9448401927947998, + "loss_ce": 0.004898753948509693, + "loss_iou": 0.38671875, + "loss_num": 0.03369140625, + "loss_xval": 0.94140625, + "num_input_tokens_seen": 583529588, + "step": 8724 + }, + { + "epoch": 0.9900709219858156, + "grad_norm": 20.64093589782715, + "learning_rate": 5e-05, + "loss": 1.1025, + "num_input_tokens_seen": 583596572, + "step": 8725 + }, + { + "epoch": 0.9900709219858156, + "loss": 0.6248800754547119, + "loss_ce": 0.0045187631621956825, + "loss_iou": 0.271484375, + "loss_num": 0.015625, + "loss_xval": 0.62109375, + "num_input_tokens_seen": 583596572, + "step": 8725 + }, + { + "epoch": 0.9901843971631206, + "grad_norm": 21.374069213867188, + "learning_rate": 5e-05, + "loss": 0.7811, + "num_input_tokens_seen": 583662816, + "step": 8726 + }, + { + "epoch": 0.9901843971631206, + "loss": 0.8241082429885864, + "loss_ce": 0.007457836996763945, + "loss_iou": 0.34375, + "loss_num": 0.025634765625, + "loss_xval": 0.81640625, + "num_input_tokens_seen": 583662816, + "step": 8726 + }, + { + "epoch": 0.9902978723404255, + "grad_norm": 24.966934204101562, + "learning_rate": 5e-05, + "loss": 1.0279, + "num_input_tokens_seen": 583729092, + "step": 8727 + }, + { + "epoch": 0.9902978723404255, + "loss": 1.1159780025482178, + "loss_ce": 0.016124412417411804, + "loss_iou": 0.42578125, + "loss_num": 0.050048828125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 583729092, + "step": 8727 + }, + { + "epoch": 0.9904113475177305, + "grad_norm": 39.59864044189453, + "learning_rate": 5e-05, + "loss": 1.2404, + "num_input_tokens_seen": 583795916, + "step": 8728 + }, + { + "epoch": 0.9904113475177305, + "loss": 1.430055856704712, + "loss_ce": 0.01013404130935669, + "loss_iou": 0.55078125, + "loss_num": 0.0634765625, + "loss_xval": 1.421875, + "num_input_tokens_seen": 583795916, + "step": 8728 + }, + { + "epoch": 0.9905248226950355, + "grad_norm": 34.236976623535156, + "learning_rate": 5e-05, + "loss": 1.1783, + "num_input_tokens_seen": 583863384, + "step": 8729 + }, + { + "epoch": 0.9905248226950355, + "loss": 1.1577889919281006, + "loss_ce": 0.004468725994229317, + "loss_iou": 0.474609375, + "loss_num": 0.041015625, + "loss_xval": 1.15625, + "num_input_tokens_seen": 583863384, + "step": 8729 + }, + { + "epoch": 0.9906382978723405, + "grad_norm": 38.31716537475586, + "learning_rate": 5e-05, + "loss": 1.2907, + "num_input_tokens_seen": 583929952, + "step": 8730 + }, + { + "epoch": 0.9906382978723405, + "loss": 1.360064148902893, + "loss_ce": 0.006548580713570118, + "loss_iou": 0.53515625, + "loss_num": 0.05615234375, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 583929952, + "step": 8730 + }, + { + "epoch": 0.9907517730496453, + "grad_norm": 35.248226165771484, + "learning_rate": 5e-05, + "loss": 1.3746, + "num_input_tokens_seen": 583997484, + "step": 8731 + }, + { + "epoch": 0.9907517730496453, + "loss": 1.407366156578064, + "loss_ce": 0.009416955523192883, + "loss_iou": 0.546875, + "loss_num": 0.060546875, + "loss_xval": 1.3984375, + "num_input_tokens_seen": 583997484, + "step": 8731 + }, + { + "epoch": 0.9908652482269503, + "grad_norm": 19.60110855102539, + "learning_rate": 5e-05, + "loss": 0.9107, + "num_input_tokens_seen": 584063868, + "step": 8732 + }, + { + "epoch": 0.9908652482269503, + "loss": 1.1815364360809326, + "loss_ce": 0.014056013897061348, + "loss_iou": 0.435546875, + "loss_num": 0.059326171875, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 584063868, + "step": 8732 + }, + { + "epoch": 0.9909787234042553, + "grad_norm": 21.523921966552734, + "learning_rate": 5e-05, + "loss": 1.0459, + "num_input_tokens_seen": 584130568, + "step": 8733 + }, + { + "epoch": 0.9909787234042553, + "loss": 1.1724088191986084, + "loss_ce": 0.005904960446059704, + "loss_iou": 0.47265625, + "loss_num": 0.04443359375, + "loss_xval": 1.1640625, + "num_input_tokens_seen": 584130568, + "step": 8733 + }, + { + "epoch": 0.9910921985815603, + "grad_norm": 21.10859489440918, + "learning_rate": 5e-05, + "loss": 1.015, + "num_input_tokens_seen": 584195772, + "step": 8734 + }, + { + "epoch": 0.9910921985815603, + "loss": 0.9184207916259766, + "loss_ce": 0.006799725815653801, + "loss_iou": 0.369140625, + "loss_num": 0.034912109375, + "loss_xval": 0.91015625, + "num_input_tokens_seen": 584195772, + "step": 8734 + }, + { + "epoch": 0.9912056737588653, + "grad_norm": 42.29351043701172, + "learning_rate": 5e-05, + "loss": 1.2674, + "num_input_tokens_seen": 584262864, + "step": 8735 + }, + { + "epoch": 0.9912056737588653, + "loss": 1.3603076934814453, + "loss_ce": 0.006792091764509678, + "loss_iou": 0.55859375, + "loss_num": 0.048095703125, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 584262864, + "step": 8735 + }, + { + "epoch": 0.9913191489361702, + "grad_norm": 48.49894332885742, + "learning_rate": 5e-05, + "loss": 1.1488, + "num_input_tokens_seen": 584329824, + "step": 8736 + }, + { + "epoch": 0.9913191489361702, + "loss": 1.0712206363677979, + "loss_ce": 0.00359365064650774, + "loss_iou": 0.4375, + "loss_num": 0.038330078125, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 584329824, + "step": 8736 + }, + { + "epoch": 0.9914326241134752, + "grad_norm": 25.10237693786621, + "learning_rate": 5e-05, + "loss": 1.253, + "num_input_tokens_seen": 584396748, + "step": 8737 + }, + { + "epoch": 0.9914326241134752, + "loss": 1.158725619316101, + "loss_ce": 0.005893521010875702, + "loss_iou": 0.455078125, + "loss_num": 0.048583984375, + "loss_xval": 1.15625, + "num_input_tokens_seen": 584396748, + "step": 8737 + }, + { + "epoch": 0.9915460992907802, + "grad_norm": 18.04346466064453, + "learning_rate": 5e-05, + "loss": 1.1899, + "num_input_tokens_seen": 584463864, + "step": 8738 + }, + { + "epoch": 0.9915460992907802, + "loss": 1.1974914073944092, + "loss_ce": 0.006817486137151718, + "loss_iou": 0.486328125, + "loss_num": 0.04345703125, + "loss_xval": 1.1875, + "num_input_tokens_seen": 584463864, + "step": 8738 + }, + { + "epoch": 0.9916595744680851, + "grad_norm": 32.3076286315918, + "learning_rate": 5e-05, + "loss": 1.105, + "num_input_tokens_seen": 584530776, + "step": 8739 + }, + { + "epoch": 0.9916595744680851, + "loss": 1.1503781080245972, + "loss_ce": 0.005358581896871328, + "loss_iou": 0.455078125, + "loss_num": 0.046630859375, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 584530776, + "step": 8739 + }, + { + "epoch": 0.99177304964539, + "grad_norm": 33.04276657104492, + "learning_rate": 5e-05, + "loss": 1.3656, + "num_input_tokens_seen": 584597696, + "step": 8740 + }, + { + "epoch": 0.99177304964539, + "loss": 1.3221352100372314, + "loss_ce": 0.005728965625166893, + "loss_iou": 0.5390625, + "loss_num": 0.04736328125, + "loss_xval": 1.3125, + "num_input_tokens_seen": 584597696, + "step": 8740 + }, + { + "epoch": 0.991886524822695, + "grad_norm": 15.57834243774414, + "learning_rate": 5e-05, + "loss": 0.9764, + "num_input_tokens_seen": 584663912, + "step": 8741 + }, + { + "epoch": 0.991886524822695, + "loss": 1.109291434288025, + "loss_ce": 0.006264058407396078, + "loss_iou": 0.435546875, + "loss_num": 0.046142578125, + "loss_xval": 1.1015625, + "num_input_tokens_seen": 584663912, + "step": 8741 + }, + { + "epoch": 0.992, + "grad_norm": 20.02033805847168, + "learning_rate": 5e-05, + "loss": 1.0432, + "num_input_tokens_seen": 584728804, + "step": 8742 + }, + { + "epoch": 0.992, + "loss": 1.1778788566589355, + "loss_ce": 0.0025859333109110594, + "loss_iou": 0.48828125, + "loss_num": 0.03955078125, + "loss_xval": 1.171875, + "num_input_tokens_seen": 584728804, + "step": 8742 + }, + { + "epoch": 0.992113475177305, + "grad_norm": 24.79621696472168, + "learning_rate": 5e-05, + "loss": 1.1468, + "num_input_tokens_seen": 584796516, + "step": 8743 + }, + { + "epoch": 0.992113475177305, + "loss": 1.0772192478179932, + "loss_ce": 0.007150935009121895, + "loss_iou": 0.43359375, + "loss_num": 0.04052734375, + "loss_xval": 1.0703125, + "num_input_tokens_seen": 584796516, + "step": 8743 + }, + { + "epoch": 0.99222695035461, + "grad_norm": 27.777502059936523, + "learning_rate": 5e-05, + "loss": 1.1213, + "num_input_tokens_seen": 584864216, + "step": 8744 + }, + { + "epoch": 0.99222695035461, + "loss": 1.114070177078247, + "loss_ce": 0.004695225041359663, + "loss_iou": 0.42578125, + "loss_num": 0.051025390625, + "loss_xval": 1.109375, + "num_input_tokens_seen": 584864216, + "step": 8744 + }, + { + "epoch": 0.9923404255319149, + "grad_norm": 32.92253494262695, + "learning_rate": 5e-05, + "loss": 1.1275, + "num_input_tokens_seen": 584930796, + "step": 8745 + }, + { + "epoch": 0.9923404255319149, + "loss": 1.0465996265411377, + "loss_ce": 0.005461869295686483, + "loss_iou": 0.40625, + "loss_num": 0.045654296875, + "loss_xval": 1.0390625, + "num_input_tokens_seen": 584930796, + "step": 8745 + }, + { + "epoch": 0.9924539007092199, + "grad_norm": 30.522188186645508, + "learning_rate": 5e-05, + "loss": 1.3186, + "num_input_tokens_seen": 584998508, + "step": 8746 + }, + { + "epoch": 0.9924539007092199, + "loss": 1.3690749406814575, + "loss_ce": 0.014582769945263863, + "loss_iou": 0.5390625, + "loss_num": 0.0546875, + "loss_xval": 1.3515625, + "num_input_tokens_seen": 584998508, + "step": 8746 + }, + { + "epoch": 0.9925673758865248, + "grad_norm": 22.92950439453125, + "learning_rate": 5e-05, + "loss": 1.2013, + "num_input_tokens_seen": 585065244, + "step": 8747 + }, + { + "epoch": 0.9925673758865248, + "loss": 1.0785399675369263, + "loss_ce": 0.00383294140920043, + "loss_iou": 0.419921875, + "loss_num": 0.046630859375, + "loss_xval": 1.078125, + "num_input_tokens_seen": 585065244, + "step": 8747 + }, + { + "epoch": 0.9926808510638298, + "grad_norm": 24.98076820373535, + "learning_rate": 5e-05, + "loss": 1.3266, + "num_input_tokens_seen": 585133132, + "step": 8748 + }, + { + "epoch": 0.9926808510638298, + "loss": 1.1530345678329468, + "loss_ce": 0.00532954279333353, + "loss_iou": 0.4609375, + "loss_num": 0.044921875, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 585133132, + "step": 8748 + }, + { + "epoch": 0.9927943262411347, + "grad_norm": 38.450775146484375, + "learning_rate": 5e-05, + "loss": 1.2259, + "num_input_tokens_seen": 585198628, + "step": 8749 + }, + { + "epoch": 0.9927943262411347, + "loss": 1.153802752494812, + "loss_ce": 0.0025576534681022167, + "loss_iou": 0.443359375, + "loss_num": 0.052978515625, + "loss_xval": 1.1484375, + "num_input_tokens_seen": 585198628, + "step": 8749 + }, + { + "epoch": 0.9929078014184397, + "grad_norm": 32.924560546875, + "learning_rate": 5e-05, + "loss": 1.2287, + "num_input_tokens_seen": 585265164, + "step": 8750 + }, + { + "epoch": 0.9929078014184397, + "eval_seeclick_CIoU": 0.37156708538532257, + "eval_seeclick_GIoU": 0.3655627965927124, + "eval_seeclick_IoU": 0.45458798110485077, + "eval_seeclick_MAE_all": 0.15972497314214706, + "eval_seeclick_MAE_h": 0.07422017119824886, + "eval_seeclick_MAE_w": 0.11219105497002602, + "eval_seeclick_MAE_x_boxes": 0.26565732061862946, + "eval_seeclick_MAE_y_boxes": 0.09619405120611191, + "eval_seeclick_NUM_probability": 0.9999047219753265, + "eval_seeclick_inside_bbox": 0.7083333432674408, + "eval_seeclick_loss": 2.4230947494506836, + "eval_seeclick_loss_ce": 0.013850783929228783, + "eval_seeclick_loss_iou": 0.8037109375, + "eval_seeclick_loss_num": 0.154876708984375, + "eval_seeclick_loss_xval": 2.382080078125, + "eval_seeclick_runtime": 66.0039, + "eval_seeclick_samples_per_second": 0.712, + "eval_seeclick_steps_per_second": 0.03, + "num_input_tokens_seen": 585265164, + "step": 8750 + }, + { + "epoch": 0.9929078014184397, + "eval_icons_CIoU": 0.4868471026420593, + "eval_icons_GIoU": 0.5014166533946991, + "eval_icons_IoU": 0.5300910174846649, + "eval_icons_MAE_all": 0.14084957540035248, + "eval_icons_MAE_h": 0.05935008078813553, + "eval_icons_MAE_w": 0.16302501410245895, + "eval_icons_MAE_x_boxes": 0.13698233664035797, + "eval_icons_MAE_y_boxes": 0.04617361072450876, + "eval_icons_NUM_probability": 0.999902606010437, + "eval_icons_inside_bbox": 0.7482638955116272, + "eval_icons_loss": 2.2825546264648438, + "eval_icons_loss_ce": 5.3063877203385346e-05, + "eval_icons_loss_iou": 0.79150390625, + "eval_icons_loss_num": 0.1401691436767578, + "eval_icons_loss_xval": 2.283203125, + "eval_icons_runtime": 67.4678, + "eval_icons_samples_per_second": 0.741, + "eval_icons_steps_per_second": 0.03, + "num_input_tokens_seen": 585265164, + "step": 8750 + }, + { + "epoch": 0.9929078014184397, + "eval_screenspot_CIoU": 0.22758279740810394, + "eval_screenspot_GIoU": 0.18054530769586563, + "eval_screenspot_IoU": 0.33920039733250934, + "eval_screenspot_MAE_all": 0.23607088128725687, + "eval_screenspot_MAE_h": 0.12164188176393509, + "eval_screenspot_MAE_w": 0.1967698484659195, + "eval_screenspot_MAE_x_boxes": 0.33560438454151154, + "eval_screenspot_MAE_y_boxes": 0.13215451190869013, + "eval_screenspot_NUM_probability": 0.999484638373057, + "eval_screenspot_inside_bbox": 0.5379166603088379, + "eval_screenspot_loss": 3.1118109226226807, + "eval_screenspot_loss_ce": 0.013267907624443373, + "eval_screenspot_loss_iou": 0.9646809895833334, + "eval_screenspot_loss_num": 0.24212646484375, + "eval_screenspot_loss_xval": 3.1412760416666665, + "eval_screenspot_runtime": 128.8549, + "eval_screenspot_samples_per_second": 0.691, + "eval_screenspot_steps_per_second": 0.023, + "num_input_tokens_seen": 585265164, + "step": 8750 + }, + { + "epoch": 0.9929078014184397, + "eval_compot_CIoU": 0.29860712587833405, + "eval_compot_GIoU": 0.2718268781900406, + "eval_compot_IoU": 0.3902575820684433, + "eval_compot_MAE_all": 0.21605830639600754, + "eval_compot_MAE_h": 0.09608334675431252, + "eval_compot_MAE_w": 0.24660079181194305, + "eval_compot_MAE_x_boxes": 0.21571293473243713, + "eval_compot_MAE_y_boxes": 0.10388953238725662, + "eval_compot_NUM_probability": 0.9985102713108063, + "eval_compot_inside_bbox": 0.5625, + "eval_compot_loss": 2.933548927307129, + "eval_compot_loss_ce": 0.009341648314148188, + "eval_compot_loss_iou": 0.945068359375, + "eval_compot_loss_num": 0.20684814453125, + "eval_compot_loss_xval": 2.9248046875, + "eval_compot_runtime": 72.1142, + "eval_compot_samples_per_second": 0.693, + "eval_compot_steps_per_second": 0.028, + "num_input_tokens_seen": 585265164, + "step": 8750 + } + ], + "logging_steps": 1.0, + "max_steps": 8812, + "num_input_tokens_seen": 585265164, + "num_train_epochs": 1, + "save_steps": 250, + "stateful_callbacks": { + "TrainerControl": { + "args": { + "should_epoch_stop": false, + "should_evaluate": false, + "should_log": false, + "should_save": true, + "should_training_stop": false + }, + "attributes": {} + } + }, + "total_flos": 2.7227643503615934e+19, + "train_batch_size": 4, + "trial_name": null, + "trial_params": null +}