|
{ |
|
"best_metric": null, |
|
"best_model_checkpoint": null, |
|
"epoch": 1.3020833333333333, |
|
"eval_steps": 250, |
|
"global_step": 500, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0026041666666666665, |
|
"grad_norm": 29.526924338468568, |
|
"learning_rate": 5e-06, |
|
"loss": 0.7869, |
|
"num_input_tokens_seen": 172856, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0026041666666666665, |
|
"loss": 0.8704751133918762, |
|
"loss_ce": 0.5689004063606262, |
|
"loss_iou": 0.435546875, |
|
"loss_num": 0.060302734375, |
|
"loss_xval": 0.30078125, |
|
"num_input_tokens_seen": 172856, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.005208333333333333, |
|
"grad_norm": 70.41913440760779, |
|
"learning_rate": 5e-06, |
|
"loss": 0.4046, |
|
"num_input_tokens_seen": 345648, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.005208333333333333, |
|
"loss": 0.36316120624542236, |
|
"loss_ce": 0.12207232415676117, |
|
"loss_iou": 0.52734375, |
|
"loss_num": 0.048095703125, |
|
"loss_xval": 0.2412109375, |
|
"num_input_tokens_seen": 345648, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0078125, |
|
"grad_norm": 27.92164379971255, |
|
"learning_rate": 5e-06, |
|
"loss": 1.9793, |
|
"num_input_tokens_seen": 518228, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0078125, |
|
"loss": 1.6886000633239746, |
|
"loss_ce": 0.06360010802745819, |
|
"loss_iou": 0.98828125, |
|
"loss_num": 0.32421875, |
|
"loss_xval": 1.625, |
|
"num_input_tokens_seen": 518228, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.010416666666666666, |
|
"grad_norm": 8.939505658432642, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3657, |
|
"num_input_tokens_seen": 690760, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.010416666666666666, |
|
"loss": 0.330140084028244, |
|
"loss_ce": 0.023987744003534317, |
|
"loss_iou": 0.154296875, |
|
"loss_num": 0.061279296875, |
|
"loss_xval": 0.306640625, |
|
"num_input_tokens_seen": 690760, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.013020833333333334, |
|
"grad_norm": 19.28831412717169, |
|
"learning_rate": 5e-06, |
|
"loss": 0.468, |
|
"num_input_tokens_seen": 863320, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.013020833333333334, |
|
"loss": 0.5520405769348145, |
|
"loss_ce": 0.0034565767273306847, |
|
"loss_iou": 0.162109375, |
|
"loss_num": 0.10986328125, |
|
"loss_xval": 0.546875, |
|
"num_input_tokens_seen": 863320, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.015625, |
|
"grad_norm": 10.714818321426886, |
|
"learning_rate": 5e-06, |
|
"loss": 0.457, |
|
"num_input_tokens_seen": 1035776, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.015625, |
|
"loss": 0.5038242340087891, |
|
"loss_ce": 0.0006504527991637588, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.1005859375, |
|
"loss_xval": 0.50390625, |
|
"num_input_tokens_seen": 1035776, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.018229166666666668, |
|
"grad_norm": 21.33070900107311, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3384, |
|
"num_input_tokens_seen": 1208264, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.018229166666666668, |
|
"loss": 0.3143744468688965, |
|
"loss_ce": 0.0038276039995253086, |
|
"loss_iou": 0.25390625, |
|
"loss_num": 0.06201171875, |
|
"loss_xval": 0.310546875, |
|
"num_input_tokens_seen": 1208264, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.020833333333333332, |
|
"grad_norm": 15.620099673180961, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3601, |
|
"num_input_tokens_seen": 1380784, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.020833333333333332, |
|
"loss": 0.37209784984588623, |
|
"loss_ce": 0.0007599706877954304, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.07421875, |
|
"loss_xval": 0.37109375, |
|
"num_input_tokens_seen": 1380784, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.0234375, |
|
"grad_norm": 8.787794677847923, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2684, |
|
"num_input_tokens_seen": 1553796, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.0234375, |
|
"loss": 0.20013384521007538, |
|
"loss_ce": 0.0006099226884543896, |
|
"loss_iou": 0.427734375, |
|
"loss_num": 0.039794921875, |
|
"loss_xval": 0.19921875, |
|
"num_input_tokens_seen": 1553796, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.026041666666666668, |
|
"grad_norm": 8.085511029078585, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2969, |
|
"num_input_tokens_seen": 1726712, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.026041666666666668, |
|
"loss": 0.2954822778701782, |
|
"loss_ce": 0.0004383414634503424, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.05908203125, |
|
"loss_xval": 0.294921875, |
|
"num_input_tokens_seen": 1726712, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.028645833333333332, |
|
"grad_norm": 19.923996243710985, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3582, |
|
"num_input_tokens_seen": 1898600, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.028645833333333332, |
|
"loss": 0.3439289331436157, |
|
"loss_ce": 0.00030101489392109215, |
|
"loss_iou": 0.166015625, |
|
"loss_num": 0.06884765625, |
|
"loss_xval": 0.34375, |
|
"num_input_tokens_seen": 1898600, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.03125, |
|
"grad_norm": 8.414953842541747, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2661, |
|
"num_input_tokens_seen": 2071032, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.03125, |
|
"loss": 0.2936800718307495, |
|
"loss_ce": 0.0003450897347647697, |
|
"loss_iou": 0.23828125, |
|
"loss_num": 0.05859375, |
|
"loss_xval": 0.29296875, |
|
"num_input_tokens_seen": 2071032, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.033854166666666664, |
|
"grad_norm": 19.99273085290305, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3361, |
|
"num_input_tokens_seen": 2243868, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.033854166666666664, |
|
"loss": 0.31856128573417664, |
|
"loss_ce": 0.00044603750575333834, |
|
"loss_iou": 0.22265625, |
|
"loss_num": 0.0634765625, |
|
"loss_xval": 0.318359375, |
|
"num_input_tokens_seen": 2243868, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.036458333333333336, |
|
"grad_norm": 8.014341595032883, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2489, |
|
"num_input_tokens_seen": 2415868, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.036458333333333336, |
|
"loss": 0.17592763900756836, |
|
"loss_ce": 0.00026846557739190757, |
|
"loss_iou": 0.578125, |
|
"loss_num": 0.03515625, |
|
"loss_xval": 0.17578125, |
|
"num_input_tokens_seen": 2415868, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.0390625, |
|
"grad_norm": 14.081056874922753, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2469, |
|
"num_input_tokens_seen": 2588144, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.0390625, |
|
"loss": 0.3065241575241089, |
|
"loss_ce": 0.00037182882078923285, |
|
"loss_iou": 0.298828125, |
|
"loss_num": 0.061279296875, |
|
"loss_xval": 0.306640625, |
|
"num_input_tokens_seen": 2588144, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.041666666666666664, |
|
"grad_norm": 6.867314736910267, |
|
"learning_rate": 5e-06, |
|
"loss": 0.3062, |
|
"num_input_tokens_seen": 2760456, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.041666666666666664, |
|
"loss": 0.4008222818374634, |
|
"loss_ce": 0.0002485612640157342, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.080078125, |
|
"loss_xval": 0.400390625, |
|
"num_input_tokens_seen": 2760456, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.044270833333333336, |
|
"grad_norm": 6.841838623253362, |
|
"learning_rate": 5e-06, |
|
"loss": 0.195, |
|
"num_input_tokens_seen": 2933256, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.044270833333333336, |
|
"loss": 0.20536868274211884, |
|
"loss_ce": 0.00022953077859710902, |
|
"loss_iou": 0.53515625, |
|
"loss_num": 0.041015625, |
|
"loss_xval": 0.205078125, |
|
"num_input_tokens_seen": 2933256, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.046875, |
|
"grad_norm": 8.073482751973284, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2924, |
|
"num_input_tokens_seen": 3105724, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.046875, |
|
"loss": 0.2312113493680954, |
|
"loss_ce": 0.000376395124476403, |
|
"loss_iou": 0.26953125, |
|
"loss_num": 0.046142578125, |
|
"loss_xval": 0.23046875, |
|
"num_input_tokens_seen": 3105724, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.049479166666666664, |
|
"grad_norm": 7.523822902492111, |
|
"learning_rate": 5e-06, |
|
"loss": 0.152, |
|
"num_input_tokens_seen": 3278360, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.049479166666666664, |
|
"loss": 0.14852207899093628, |
|
"loss_ce": 0.00020665550255216658, |
|
"loss_iou": 0.45703125, |
|
"loss_num": 0.0296630859375, |
|
"loss_xval": 0.1484375, |
|
"num_input_tokens_seen": 3278360, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.052083333333333336, |
|
"grad_norm": 6.544223523818296, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2287, |
|
"num_input_tokens_seen": 3450936, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.052083333333333336, |
|
"loss": 0.28778478503227234, |
|
"loss_ce": 0.00030919513665139675, |
|
"loss_iou": 0.453125, |
|
"loss_num": 0.0576171875, |
|
"loss_xval": 0.287109375, |
|
"num_input_tokens_seen": 3450936, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.0546875, |
|
"grad_norm": 11.63193790977644, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1695, |
|
"num_input_tokens_seen": 3623740, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.0546875, |
|
"loss": 0.19103749096393585, |
|
"loss_ce": 0.00018056559201795608, |
|
"loss_iou": 0.306640625, |
|
"loss_num": 0.0380859375, |
|
"loss_xval": 0.1904296875, |
|
"num_input_tokens_seen": 3623740, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.057291666666666664, |
|
"grad_norm": 7.497321698776006, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1598, |
|
"num_input_tokens_seen": 3796836, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.057291666666666664, |
|
"loss": 0.1259785294532776, |
|
"loss_ce": 0.00018507592903915793, |
|
"loss_iou": 0.5078125, |
|
"loss_num": 0.025146484375, |
|
"loss_xval": 0.1259765625, |
|
"num_input_tokens_seen": 3796836, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.059895833333333336, |
|
"grad_norm": 30.78448133351319, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1943, |
|
"num_input_tokens_seen": 3969500, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.059895833333333336, |
|
"loss": 0.15796104073524475, |
|
"loss_ce": 0.0006429227069020271, |
|
"loss_iou": 0.55859375, |
|
"loss_num": 0.031494140625, |
|
"loss_xval": 0.1572265625, |
|
"num_input_tokens_seen": 3969500, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.0625, |
|
"grad_norm": 5.237378782764295, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1909, |
|
"num_input_tokens_seen": 4141940, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.0625, |
|
"loss": 0.17854130268096924, |
|
"loss_ce": 0.00025760685093700886, |
|
"loss_iou": 0.4296875, |
|
"loss_num": 0.03564453125, |
|
"loss_xval": 0.1787109375, |
|
"num_input_tokens_seen": 4141940, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.06510416666666667, |
|
"grad_norm": 13.60608392035419, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1772, |
|
"num_input_tokens_seen": 4314172, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06510416666666667, |
|
"loss": 0.19210708141326904, |
|
"loss_ce": 0.0002125638711731881, |
|
"loss_iou": 0.484375, |
|
"loss_num": 0.038330078125, |
|
"loss_xval": 0.19140625, |
|
"num_input_tokens_seen": 4314172, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.06770833333333333, |
|
"grad_norm": 7.390014761942961, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2151, |
|
"num_input_tokens_seen": 4486776, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.06770833333333333, |
|
"loss": 0.24654103815555573, |
|
"loss_ce": 0.00044729292858392, |
|
"loss_iou": 0.53125, |
|
"loss_num": 0.04931640625, |
|
"loss_xval": 0.24609375, |
|
"num_input_tokens_seen": 4486776, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.0703125, |
|
"grad_norm": 6.597800961680885, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1804, |
|
"num_input_tokens_seen": 4659796, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.0703125, |
|
"loss": 0.18685418367385864, |
|
"loss_ce": 0.00020867137936875224, |
|
"loss_iou": 0.546875, |
|
"loss_num": 0.037353515625, |
|
"loss_xval": 0.1865234375, |
|
"num_input_tokens_seen": 4659796, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.07291666666666667, |
|
"grad_norm": 15.848602164160235, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1935, |
|
"num_input_tokens_seen": 4832580, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.07291666666666667, |
|
"loss": 0.1529167890548706, |
|
"loss_ce": 0.00038992700865492225, |
|
"loss_iou": 0.453125, |
|
"loss_num": 0.030517578125, |
|
"loss_xval": 0.15234375, |
|
"num_input_tokens_seen": 4832580, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.07552083333333333, |
|
"grad_norm": 7.656983950050504, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2008, |
|
"num_input_tokens_seen": 5005204, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.07552083333333333, |
|
"loss": 0.26389509439468384, |
|
"loss_ce": 0.00028428525547496974, |
|
"loss_iou": 0.484375, |
|
"loss_num": 0.052734375, |
|
"loss_xval": 0.263671875, |
|
"num_input_tokens_seen": 5005204, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.078125, |
|
"grad_norm": 4.507917280431056, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1874, |
|
"num_input_tokens_seen": 5177580, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.078125, |
|
"loss": 0.13707002997398376, |
|
"loss_ce": 0.0004123126564081758, |
|
"loss_iou": 0.515625, |
|
"loss_num": 0.02734375, |
|
"loss_xval": 0.13671875, |
|
"num_input_tokens_seen": 5177580, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.08072916666666667, |
|
"grad_norm": 10.885923079707904, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2153, |
|
"num_input_tokens_seen": 5350148, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.08072916666666667, |
|
"loss": 0.2522280216217041, |
|
"loss_ce": 0.0002138598938472569, |
|
"loss_iou": 0.59375, |
|
"loss_num": 0.050537109375, |
|
"loss_xval": 0.251953125, |
|
"num_input_tokens_seen": 5350148, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.08333333333333333, |
|
"grad_norm": 5.223864875647863, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2242, |
|
"num_input_tokens_seen": 5522620, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.08333333333333333, |
|
"loss": 0.22422021627426147, |
|
"loss_ce": 0.0002517293323762715, |
|
"loss_iou": 0.69921875, |
|
"loss_num": 0.044677734375, |
|
"loss_xval": 0.2236328125, |
|
"num_input_tokens_seen": 5522620, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.0859375, |
|
"grad_norm": 8.823576859140516, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1796, |
|
"num_input_tokens_seen": 5695340, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.0859375, |
|
"loss": 0.19266511499881744, |
|
"loss_ce": 0.0003433418460190296, |
|
"loss_iou": 0.6953125, |
|
"loss_num": 0.03857421875, |
|
"loss_xval": 0.1923828125, |
|
"num_input_tokens_seen": 5695340, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.08854166666666667, |
|
"grad_norm": 54.77946225550538, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1289, |
|
"num_input_tokens_seen": 5868252, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.08854166666666667, |
|
"loss": 0.13593435287475586, |
|
"loss_ce": 0.0002532090584281832, |
|
"loss_iou": 0.58984375, |
|
"loss_num": 0.027099609375, |
|
"loss_xval": 0.1357421875, |
|
"num_input_tokens_seen": 5868252, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.09114583333333333, |
|
"grad_norm": 10.116131484083123, |
|
"learning_rate": 5e-06, |
|
"loss": 0.132, |
|
"num_input_tokens_seen": 6041036, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.09114583333333333, |
|
"loss": 0.15703758597373962, |
|
"loss_ce": 0.0003603329823818058, |
|
"loss_iou": 0.6796875, |
|
"loss_num": 0.03125, |
|
"loss_xval": 0.15625, |
|
"num_input_tokens_seen": 6041036, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.09375, |
|
"grad_norm": 3.6841467905553884, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1766, |
|
"num_input_tokens_seen": 6213444, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.09375, |
|
"loss": 0.11154159903526306, |
|
"loss_ce": 0.00021347634901758283, |
|
"loss_iou": 0.486328125, |
|
"loss_num": 0.022216796875, |
|
"loss_xval": 0.111328125, |
|
"num_input_tokens_seen": 6213444, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.09635416666666667, |
|
"grad_norm": 7.922965723176142, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1496, |
|
"num_input_tokens_seen": 6386028, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.09635416666666667, |
|
"loss": 0.14449915289878845, |
|
"loss_ce": 0.00021204788936302066, |
|
"loss_iou": 0.6796875, |
|
"loss_num": 0.02880859375, |
|
"loss_xval": 0.14453125, |
|
"num_input_tokens_seen": 6386028, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.09895833333333333, |
|
"grad_norm": 5.266919761801281, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1401, |
|
"num_input_tokens_seen": 6558240, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.09895833333333333, |
|
"loss": 0.1739426553249359, |
|
"loss_ce": 0.00029765223735012114, |
|
"loss_iou": 0.7578125, |
|
"loss_num": 0.03466796875, |
|
"loss_xval": 0.173828125, |
|
"num_input_tokens_seen": 6558240, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.1015625, |
|
"grad_norm": 7.135945650156497, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1703, |
|
"num_input_tokens_seen": 6731156, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.1015625, |
|
"loss": 0.1407906413078308, |
|
"loss_ce": 0.00022666863515041769, |
|
"loss_iou": 0.62890625, |
|
"loss_num": 0.0281982421875, |
|
"loss_xval": 0.140625, |
|
"num_input_tokens_seen": 6731156, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.10416666666666667, |
|
"grad_norm": 14.956590253309306, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1741, |
|
"num_input_tokens_seen": 6903828, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10416666666666667, |
|
"loss": 0.14085114002227783, |
|
"loss_ce": 0.000409250904340297, |
|
"loss_iou": 0.55078125, |
|
"loss_num": 0.028076171875, |
|
"loss_xval": 0.140625, |
|
"num_input_tokens_seen": 6903828, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.10677083333333333, |
|
"grad_norm": 6.97548951750633, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1407, |
|
"num_input_tokens_seen": 7076944, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.10677083333333333, |
|
"loss": 0.13742247223854065, |
|
"loss_ce": 0.000459590955870226, |
|
"loss_iou": 0.6484375, |
|
"loss_num": 0.02734375, |
|
"loss_xval": 0.13671875, |
|
"num_input_tokens_seen": 7076944, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.109375, |
|
"grad_norm": 5.706351230194716, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1664, |
|
"num_input_tokens_seen": 7249880, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.109375, |
|
"loss": 0.1694188117980957, |
|
"loss_ce": 0.0002903800050262362, |
|
"loss_iou": 0.71875, |
|
"loss_num": 0.03369140625, |
|
"loss_xval": 0.1689453125, |
|
"num_input_tokens_seen": 7249880, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.11197916666666667, |
|
"grad_norm": 7.30786008091978, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1396, |
|
"num_input_tokens_seen": 7422732, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.11197916666666667, |
|
"loss": 0.12171518802642822, |
|
"loss_ce": 0.0002552264486439526, |
|
"loss_iou": 0.609375, |
|
"loss_num": 0.0242919921875, |
|
"loss_xval": 0.12158203125, |
|
"num_input_tokens_seen": 7422732, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.11458333333333333, |
|
"grad_norm": 10.925715703737882, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1589, |
|
"num_input_tokens_seen": 7595068, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.11458333333333333, |
|
"loss": 0.13566899299621582, |
|
"loss_ce": 0.0002930228365585208, |
|
"loss_iou": 0.58984375, |
|
"loss_num": 0.027099609375, |
|
"loss_xval": 0.1357421875, |
|
"num_input_tokens_seen": 7595068, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.1171875, |
|
"grad_norm": 5.054139739954058, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1463, |
|
"num_input_tokens_seen": 7767900, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.1171875, |
|
"loss": 0.12349405884742737, |
|
"loss_ce": 0.00020303628116380423, |
|
"loss_iou": 0.7109375, |
|
"loss_num": 0.024658203125, |
|
"loss_xval": 0.123046875, |
|
"num_input_tokens_seen": 7767900, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.11979166666666667, |
|
"grad_norm": 12.342418471503326, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1424, |
|
"num_input_tokens_seen": 7940544, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.11979166666666667, |
|
"loss": 0.11029690504074097, |
|
"loss_ce": 0.00021999998716637492, |
|
"loss_iou": 0.81640625, |
|
"loss_num": 0.02197265625, |
|
"loss_xval": 0.10986328125, |
|
"num_input_tokens_seen": 7940544, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.12239583333333333, |
|
"grad_norm": 5.062819394898654, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1906, |
|
"num_input_tokens_seen": 8113664, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.12239583333333333, |
|
"loss": 0.1845826804637909, |
|
"loss_ce": 0.0001954784820554778, |
|
"loss_iou": 0.310546875, |
|
"loss_num": 0.036865234375, |
|
"loss_xval": 0.1845703125, |
|
"num_input_tokens_seen": 8113664, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"grad_norm": 9.659514849549943, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1428, |
|
"num_input_tokens_seen": 8286408, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.125, |
|
"loss": 0.13132745027542114, |
|
"loss_ce": 0.00022393176914192736, |
|
"loss_iou": 0.5859375, |
|
"loss_num": 0.0262451171875, |
|
"loss_xval": 0.130859375, |
|
"num_input_tokens_seen": 8286408, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.12760416666666666, |
|
"grad_norm": 3.4602191470453296, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1523, |
|
"num_input_tokens_seen": 8459480, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.12760416666666666, |
|
"loss": 0.09740308672189713, |
|
"loss_ce": 0.00011304817599011585, |
|
"loss_iou": 0.734375, |
|
"loss_num": 0.0194091796875, |
|
"loss_xval": 0.09716796875, |
|
"num_input_tokens_seen": 8459480, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.13020833333333334, |
|
"grad_norm": 2.792621267506476, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1739, |
|
"num_input_tokens_seen": 8632048, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.13020833333333334, |
|
"loss": 0.20529168844223022, |
|
"loss_ce": 0.00015252322191372514, |
|
"loss_iou": 0.55078125, |
|
"loss_num": 0.041015625, |
|
"loss_xval": 0.205078125, |
|
"num_input_tokens_seen": 8632048, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.1328125, |
|
"grad_norm": 112.48651552153446, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1474, |
|
"num_input_tokens_seen": 8804436, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.1328125, |
|
"loss": 0.14565327763557434, |
|
"loss_ce": 0.00020651462546084076, |
|
"loss_iou": 0.796875, |
|
"loss_num": 0.029052734375, |
|
"loss_xval": 0.1455078125, |
|
"num_input_tokens_seen": 8804436, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.13541666666666666, |
|
"grad_norm": 23.381698600452545, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1281, |
|
"num_input_tokens_seen": 8976692, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.13541666666666666, |
|
"loss": 0.07739880681037903, |
|
"loss_ce": 0.0002808899153023958, |
|
"loss_iou": 0.71484375, |
|
"loss_num": 0.01544189453125, |
|
"loss_xval": 0.0771484375, |
|
"num_input_tokens_seen": 8976692, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.13802083333333334, |
|
"grad_norm": 20.24541765865236, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1416, |
|
"num_input_tokens_seen": 9149400, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.13802083333333334, |
|
"loss": 0.09311097115278244, |
|
"loss_ce": 0.00012390354822855443, |
|
"loss_iou": 0.7421875, |
|
"loss_num": 0.0185546875, |
|
"loss_xval": 0.0927734375, |
|
"num_input_tokens_seen": 9149400, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.140625, |
|
"grad_norm": 5.275500097506868, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1424, |
|
"num_input_tokens_seen": 9321876, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.140625, |
|
"loss": 0.11511102318763733, |
|
"loss_ce": 0.00018182306666858494, |
|
"loss_iou": 0.55078125, |
|
"loss_num": 0.02294921875, |
|
"loss_xval": 0.11474609375, |
|
"num_input_tokens_seen": 9321876, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.14322916666666666, |
|
"grad_norm": 6.68044187324112, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1389, |
|
"num_input_tokens_seen": 9494628, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.14322916666666666, |
|
"loss": 0.14306305348873138, |
|
"loss_ce": 0.0001797609293134883, |
|
"loss_iou": 0.6953125, |
|
"loss_num": 0.028564453125, |
|
"loss_xval": 0.142578125, |
|
"num_input_tokens_seen": 9494628, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.14583333333333334, |
|
"grad_norm": 6.008068200145323, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1457, |
|
"num_input_tokens_seen": 9666508, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.14583333333333334, |
|
"loss": 0.10107017308473587, |
|
"loss_ce": 0.00024009394110180438, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.0201416015625, |
|
"loss_xval": 0.1005859375, |
|
"num_input_tokens_seen": 9666508, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.1484375, |
|
"grad_norm": 5.2880560255216436, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1537, |
|
"num_input_tokens_seen": 9839556, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.1484375, |
|
"loss": 0.12539099156856537, |
|
"loss_ce": 0.003534301184117794, |
|
"loss_iou": 0.55078125, |
|
"loss_num": 0.0244140625, |
|
"loss_xval": 0.1220703125, |
|
"num_input_tokens_seen": 9839556, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.15104166666666666, |
|
"grad_norm": 12.763217046347364, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1706, |
|
"num_input_tokens_seen": 10011988, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.15104166666666666, |
|
"loss": 0.17848367989063263, |
|
"loss_ce": 0.00016946055984590203, |
|
"loss_iou": 0.52734375, |
|
"loss_num": 0.03564453125, |
|
"loss_xval": 0.1787109375, |
|
"num_input_tokens_seen": 10011988, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.15364583333333334, |
|
"grad_norm": 8.269658303130955, |
|
"learning_rate": 5e-06, |
|
"loss": 0.157, |
|
"num_input_tokens_seen": 10184712, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.15364583333333334, |
|
"loss": 0.16671502590179443, |
|
"loss_ce": 0.0010350943775847554, |
|
"loss_iou": 0.51953125, |
|
"loss_num": 0.033203125, |
|
"loss_xval": 0.166015625, |
|
"num_input_tokens_seen": 10184712, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.15625, |
|
"grad_norm": 10.823127549550875, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1397, |
|
"num_input_tokens_seen": 10357876, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15625, |
|
"loss": 0.15665964782238007, |
|
"loss_ce": 0.00022653902124147862, |
|
"loss_iou": 0.5390625, |
|
"loss_num": 0.03125, |
|
"loss_xval": 0.15625, |
|
"num_input_tokens_seen": 10357876, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.15885416666666666, |
|
"grad_norm": 6.373677246488681, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1239, |
|
"num_input_tokens_seen": 10530560, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.15885416666666666, |
|
"loss": 0.1182846650481224, |
|
"loss_ce": 0.0001816382718970999, |
|
"loss_iou": 0.78515625, |
|
"loss_num": 0.023681640625, |
|
"loss_xval": 0.1181640625, |
|
"num_input_tokens_seen": 10530560, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.16145833333333334, |
|
"grad_norm": 2.5506045315044688, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1465, |
|
"num_input_tokens_seen": 10702880, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.16145833333333334, |
|
"loss": 0.11390332132577896, |
|
"loss_ce": 0.00019482464995235205, |
|
"loss_iou": 0.890625, |
|
"loss_num": 0.022705078125, |
|
"loss_xval": 0.11376953125, |
|
"num_input_tokens_seen": 10702880, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.1640625, |
|
"grad_norm": 7.222980659687508, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1252, |
|
"num_input_tokens_seen": 10875396, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.1640625, |
|
"loss": 0.14197739958763123, |
|
"loss_ce": 0.00019273148791398853, |
|
"loss_iou": 0.65234375, |
|
"loss_num": 0.0283203125, |
|
"loss_xval": 0.1416015625, |
|
"num_input_tokens_seen": 10875396, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.16666666666666666, |
|
"grad_norm": 5.146091397448424, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1305, |
|
"num_input_tokens_seen": 11047776, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.16666666666666666, |
|
"loss": 0.14921408891677856, |
|
"loss_ce": 0.00013572629541158676, |
|
"loss_iou": 0.72265625, |
|
"loss_num": 0.02978515625, |
|
"loss_xval": 0.1494140625, |
|
"num_input_tokens_seen": 11047776, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.16927083333333334, |
|
"grad_norm": 4.80224094246898, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1154, |
|
"num_input_tokens_seen": 11220188, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.16927083333333334, |
|
"loss": 0.07668769359588623, |
|
"loss_ce": 0.001126162358559668, |
|
"loss_iou": 0.91015625, |
|
"loss_num": 0.01507568359375, |
|
"loss_xval": 0.07568359375, |
|
"num_input_tokens_seen": 11220188, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.171875, |
|
"grad_norm": 10.700514293201024, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1739, |
|
"num_input_tokens_seen": 11392944, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.171875, |
|
"loss": 0.22246834635734558, |
|
"loss_ce": 0.00011727018863894045, |
|
"loss_iou": 0.75390625, |
|
"loss_num": 0.04443359375, |
|
"loss_xval": 0.22265625, |
|
"num_input_tokens_seen": 11392944, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.17447916666666666, |
|
"grad_norm": 9.514503857806982, |
|
"learning_rate": 5e-06, |
|
"loss": 0.2101, |
|
"num_input_tokens_seen": 11565084, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.17447916666666666, |
|
"loss": 0.13935251533985138, |
|
"loss_ce": 0.0004364975611679256, |
|
"loss_iou": 0.58203125, |
|
"loss_num": 0.02783203125, |
|
"loss_xval": 0.138671875, |
|
"num_input_tokens_seen": 11565084, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.17708333333333334, |
|
"grad_norm": 28.845888384168894, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1395, |
|
"num_input_tokens_seen": 11737388, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.17708333333333334, |
|
"loss": 0.17835211753845215, |
|
"loss_ce": 0.00019049833645112813, |
|
"loss_iou": 0.5859375, |
|
"loss_num": 0.03564453125, |
|
"loss_xval": 0.177734375, |
|
"num_input_tokens_seen": 11737388, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.1796875, |
|
"grad_norm": 12.901299207431718, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1475, |
|
"num_input_tokens_seen": 11910160, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.1796875, |
|
"loss": 0.14130395650863647, |
|
"loss_ce": 0.00040430587250739336, |
|
"loss_iou": 0.71875, |
|
"loss_num": 0.028076171875, |
|
"loss_xval": 0.140625, |
|
"num_input_tokens_seen": 11910160, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.18229166666666666, |
|
"grad_norm": 4.066104418883702, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1535, |
|
"num_input_tokens_seen": 12083060, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.18229166666666666, |
|
"loss": 0.22210073471069336, |
|
"loss_ce": 0.00048208353109657764, |
|
"loss_iou": 0.52734375, |
|
"loss_num": 0.044189453125, |
|
"loss_xval": 0.2216796875, |
|
"num_input_tokens_seen": 12083060, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.18489583333333334, |
|
"grad_norm": 7.20629091266797, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1526, |
|
"num_input_tokens_seen": 12255100, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.18489583333333334, |
|
"loss": 0.10638897120952606, |
|
"loss_ce": 0.00015728682046756148, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.021240234375, |
|
"loss_xval": 0.1064453125, |
|
"num_input_tokens_seen": 12255100, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.1875, |
|
"grad_norm": 5.6974371825888515, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1194, |
|
"num_input_tokens_seen": 12428188, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.1875, |
|
"loss": 0.17051713168621063, |
|
"loss_ce": 0.0001069810678018257, |
|
"loss_iou": 0.77734375, |
|
"loss_num": 0.0341796875, |
|
"loss_xval": 0.169921875, |
|
"num_input_tokens_seen": 12428188, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.19010416666666666, |
|
"grad_norm": 9.689078279769502, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1445, |
|
"num_input_tokens_seen": 12601004, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.19010416666666666, |
|
"loss": 0.1433650702238083, |
|
"loss_ce": 0.00020711585239041597, |
|
"loss_iou": 0.7578125, |
|
"loss_num": 0.0286865234375, |
|
"loss_xval": 0.1435546875, |
|
"num_input_tokens_seen": 12601004, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.19270833333333334, |
|
"grad_norm": 5.827672891178693, |
|
"learning_rate": 5e-06, |
|
"loss": 0.134, |
|
"num_input_tokens_seen": 12773320, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.19270833333333334, |
|
"loss": 0.11652399599552155, |
|
"loss_ce": 9.94274887489155e-05, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.0233154296875, |
|
"loss_xval": 0.1162109375, |
|
"num_input_tokens_seen": 12773320, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.1953125, |
|
"grad_norm": 3.990167602163436, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1368, |
|
"num_input_tokens_seen": 12945852, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.1953125, |
|
"loss": 0.14618420600891113, |
|
"loss_ce": 0.0001270811044378206, |
|
"loss_iou": 0.59765625, |
|
"loss_num": 0.0291748046875, |
|
"loss_xval": 0.146484375, |
|
"num_input_tokens_seen": 12945852, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.19791666666666666, |
|
"grad_norm": 13.270667333466802, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1563, |
|
"num_input_tokens_seen": 13118484, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.19791666666666666, |
|
"loss": 0.15230345726013184, |
|
"loss_ce": 0.00026487442664802074, |
|
"loss_iou": 0.76953125, |
|
"loss_num": 0.0303955078125, |
|
"loss_xval": 0.15234375, |
|
"num_input_tokens_seen": 13118484, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.20052083333333334, |
|
"grad_norm": 7.7363268532740745, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1283, |
|
"num_input_tokens_seen": 13291272, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.20052083333333334, |
|
"loss": 0.14224107563495636, |
|
"loss_ce": 5.9679325204342604e-05, |
|
"loss_iou": 0.6796875, |
|
"loss_num": 0.0284423828125, |
|
"loss_xval": 0.142578125, |
|
"num_input_tokens_seen": 13291272, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.203125, |
|
"grad_norm": 6.61313432355891, |
|
"learning_rate": 5e-06, |
|
"loss": 0.124, |
|
"num_input_tokens_seen": 13464624, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.203125, |
|
"loss": 0.1373731642961502, |
|
"loss_ce": 0.001081653987057507, |
|
"loss_iou": 0.578125, |
|
"loss_num": 0.0272216796875, |
|
"loss_xval": 0.13671875, |
|
"num_input_tokens_seen": 13464624, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.20572916666666666, |
|
"grad_norm": 8.581672711095537, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1277, |
|
"num_input_tokens_seen": 13637932, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.20572916666666666, |
|
"loss": 0.07391411066055298, |
|
"loss_ce": 0.0002141633303835988, |
|
"loss_iou": 0.921875, |
|
"loss_num": 0.0147705078125, |
|
"loss_xval": 0.07373046875, |
|
"num_input_tokens_seen": 13637932, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.20833333333333334, |
|
"grad_norm": 9.343129724950805, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1228, |
|
"num_input_tokens_seen": 13810536, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.20833333333333334, |
|
"loss": 0.1317322701215744, |
|
"loss_ce": 0.00010995224147336558, |
|
"loss_iou": 0.5625, |
|
"loss_num": 0.0263671875, |
|
"loss_xval": 0.1318359375, |
|
"num_input_tokens_seen": 13810536, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.2109375, |
|
"grad_norm": 3.335680846026802, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1278, |
|
"num_input_tokens_seen": 13982952, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.2109375, |
|
"loss": 0.11719675362110138, |
|
"loss_ce": 7.029056723695248e-05, |
|
"loss_iou": 0.72265625, |
|
"loss_num": 0.0234375, |
|
"loss_xval": 0.1171875, |
|
"num_input_tokens_seen": 13982952, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.21354166666666666, |
|
"grad_norm": 5.015919335288412, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1141, |
|
"num_input_tokens_seen": 14156116, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.21354166666666666, |
|
"loss": 0.12401551753282547, |
|
"loss_ce": 0.0005414030747488141, |
|
"loss_iou": 0.8125, |
|
"loss_num": 0.024658203125, |
|
"loss_xval": 0.12353515625, |
|
"num_input_tokens_seen": 14156116, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.21614583333333334, |
|
"grad_norm": 2.9623089480232765, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1462, |
|
"num_input_tokens_seen": 14328100, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.21614583333333334, |
|
"loss": 0.13083317875862122, |
|
"loss_ce": 6.534742715302855e-05, |
|
"loss_iou": 0.84765625, |
|
"loss_num": 0.026123046875, |
|
"loss_xval": 0.130859375, |
|
"num_input_tokens_seen": 14328100, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.21875, |
|
"grad_norm": 10.594036737745725, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1571, |
|
"num_input_tokens_seen": 14501028, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.21875, |
|
"loss": 0.12298892438411713, |
|
"loss_ce": 9.464097092859447e-05, |
|
"loss_iou": 0.765625, |
|
"loss_num": 0.0245361328125, |
|
"loss_xval": 0.123046875, |
|
"num_input_tokens_seen": 14501028, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.22135416666666666, |
|
"grad_norm": 15.787971676382128, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1346, |
|
"num_input_tokens_seen": 14673688, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.22135416666666666, |
|
"loss": 0.1362449675798416, |
|
"loss_ce": 0.00013656073133461177, |
|
"loss_iou": 0.77734375, |
|
"loss_num": 0.0272216796875, |
|
"loss_xval": 0.1357421875, |
|
"num_input_tokens_seen": 14673688, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.22395833333333334, |
|
"grad_norm": 17.628757977108236, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1309, |
|
"num_input_tokens_seen": 14846388, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.22395833333333334, |
|
"loss": 0.2271496057510376, |
|
"loss_ce": 0.00012933027755934745, |
|
"loss_iou": 0.6875, |
|
"loss_num": 0.04541015625, |
|
"loss_xval": 0.2265625, |
|
"num_input_tokens_seen": 14846388, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.2265625, |
|
"grad_norm": 4.200455159585171, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1237, |
|
"num_input_tokens_seen": 15019332, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.2265625, |
|
"loss": 0.1411372572183609, |
|
"loss_ce": 0.000146055273944512, |
|
"loss_iou": 0.7109375, |
|
"loss_num": 0.0281982421875, |
|
"loss_xval": 0.140625, |
|
"num_input_tokens_seen": 15019332, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.22916666666666666, |
|
"grad_norm": 16.128679810445924, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1205, |
|
"num_input_tokens_seen": 15191728, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.22916666666666666, |
|
"loss": 0.12598924338817596, |
|
"loss_ce": 0.0001957894128281623, |
|
"loss_iou": 0.7265625, |
|
"loss_num": 0.025146484375, |
|
"loss_xval": 0.1259765625, |
|
"num_input_tokens_seen": 15191728, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.23177083333333334, |
|
"grad_norm": 3.347768447216801, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1091, |
|
"num_input_tokens_seen": 15364328, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.23177083333333334, |
|
"loss": 0.13717257976531982, |
|
"loss_ce": 0.00014865108823869377, |
|
"loss_iou": 0.671875, |
|
"loss_num": 0.0274658203125, |
|
"loss_xval": 0.13671875, |
|
"num_input_tokens_seen": 15364328, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.234375, |
|
"grad_norm": 14.428792014632464, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1169, |
|
"num_input_tokens_seen": 15537008, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.234375, |
|
"loss": 0.09786561131477356, |
|
"loss_ce": 8.728736429475248e-05, |
|
"loss_iou": 0.8984375, |
|
"loss_num": 0.01953125, |
|
"loss_xval": 0.09765625, |
|
"num_input_tokens_seen": 15537008, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.23697916666666666, |
|
"grad_norm": 9.593480834109474, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1477, |
|
"num_input_tokens_seen": 15709404, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.23697916666666666, |
|
"loss": 0.13464778661727905, |
|
"loss_ce": 0.0004925199900753796, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.02685546875, |
|
"loss_xval": 0.1337890625, |
|
"num_input_tokens_seen": 15709404, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.23958333333333334, |
|
"grad_norm": 5.187519307559665, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1256, |
|
"num_input_tokens_seen": 15882256, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.23958333333333334, |
|
"loss": 0.14495471119880676, |
|
"loss_ce": 0.00020984606817364693, |
|
"loss_iou": 0.59375, |
|
"loss_num": 0.0289306640625, |
|
"loss_xval": 0.14453125, |
|
"num_input_tokens_seen": 15882256, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.2421875, |
|
"grad_norm": 3.797220087224051, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0953, |
|
"num_input_tokens_seen": 16055680, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.2421875, |
|
"loss": 0.11551543325185776, |
|
"loss_ce": 0.00012846880417782813, |
|
"loss_iou": 0.84765625, |
|
"loss_num": 0.0230712890625, |
|
"loss_xval": 0.115234375, |
|
"num_input_tokens_seen": 16055680, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.24479166666666666, |
|
"grad_norm": 12.640483904974769, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1633, |
|
"num_input_tokens_seen": 16228580, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.24479166666666666, |
|
"loss": 0.22419461607933044, |
|
"loss_ce": 7.351529347943142e-05, |
|
"loss_iou": 0.57421875, |
|
"loss_num": 0.044921875, |
|
"loss_xval": 0.224609375, |
|
"num_input_tokens_seen": 16228580, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.24739583333333334, |
|
"grad_norm": 5.298357496019875, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1399, |
|
"num_input_tokens_seen": 16401760, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.24739583333333334, |
|
"loss": 0.13041989505290985, |
|
"loss_ce": 0.0002624165790621191, |
|
"loss_iou": 0.86328125, |
|
"loss_num": 0.0260009765625, |
|
"loss_xval": 0.1298828125, |
|
"num_input_tokens_seen": 16401760, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"grad_norm": 9.952010853168657, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1295, |
|
"num_input_tokens_seen": 16574496, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.25, |
|
"loss": 0.1939561665058136, |
|
"loss_ce": 7.798791921231896e-05, |
|
"loss_iou": 0.765625, |
|
"loss_num": 0.038818359375, |
|
"loss_xval": 0.1943359375, |
|
"num_input_tokens_seen": 16574496, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.2526041666666667, |
|
"grad_norm": 3.049823911893114, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1111, |
|
"num_input_tokens_seen": 16747728, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.2526041666666667, |
|
"loss": 0.09351100027561188, |
|
"loss_ce": 6.617652252316475e-05, |
|
"loss_iou": 0.65625, |
|
"loss_num": 0.0186767578125, |
|
"loss_xval": 0.09326171875, |
|
"num_input_tokens_seen": 16747728, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.2552083333333333, |
|
"grad_norm": 16.778434870585635, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1019, |
|
"num_input_tokens_seen": 16920748, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.2552083333333333, |
|
"loss": 0.06255725026130676, |
|
"loss_ce": 0.00036242493661120534, |
|
"loss_iou": 0.796875, |
|
"loss_num": 0.012451171875, |
|
"loss_xval": 0.062255859375, |
|
"num_input_tokens_seen": 16920748, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.2578125, |
|
"grad_norm": 3.1538744569690427, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0905, |
|
"num_input_tokens_seen": 17093788, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2578125, |
|
"loss": 0.07921752333641052, |
|
"loss_ce": 5.4928314057178795e-05, |
|
"loss_iou": 0.75390625, |
|
"loss_num": 0.015869140625, |
|
"loss_xval": 0.0791015625, |
|
"num_input_tokens_seen": 17093788, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.2604166666666667, |
|
"grad_norm": 5.0492536766824925, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1333, |
|
"num_input_tokens_seen": 17266068, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2604166666666667, |
|
"loss": 0.2560691237449646, |
|
"loss_ce": 7.242064020829275e-05, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.05126953125, |
|
"loss_xval": 0.255859375, |
|
"num_input_tokens_seen": 17266068, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.2630208333333333, |
|
"grad_norm": 3.206773313570764, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1103, |
|
"num_input_tokens_seen": 17438252, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.2630208333333333, |
|
"loss": 0.10068385303020477, |
|
"loss_ce": 6.739624950569123e-05, |
|
"loss_iou": 0.66015625, |
|
"loss_num": 0.0201416015625, |
|
"loss_xval": 0.1005859375, |
|
"num_input_tokens_seen": 17438252, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.265625, |
|
"grad_norm": 4.023617188811506, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1235, |
|
"num_input_tokens_seen": 17611072, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.265625, |
|
"loss": 0.17407888174057007, |
|
"loss_ce": 0.0001286847109440714, |
|
"loss_iou": 0.8125, |
|
"loss_num": 0.03466796875, |
|
"loss_xval": 0.173828125, |
|
"num_input_tokens_seen": 17611072, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.2682291666666667, |
|
"grad_norm": 4.68999361675466, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1292, |
|
"num_input_tokens_seen": 17784276, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.2682291666666667, |
|
"loss": 0.14088758826255798, |
|
"loss_ce": 0.00014050997560843825, |
|
"loss_iou": 0.7578125, |
|
"loss_num": 0.028076171875, |
|
"loss_xval": 0.140625, |
|
"num_input_tokens_seen": 17784276, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.2708333333333333, |
|
"grad_norm": 3.670856513287625, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1135, |
|
"num_input_tokens_seen": 17956592, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.2708333333333333, |
|
"loss": 0.1348290890455246, |
|
"loss_ce": 0.00012450128269847482, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.0269775390625, |
|
"loss_xval": 0.134765625, |
|
"num_input_tokens_seen": 17956592, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.2734375, |
|
"grad_norm": 7.988958032566027, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1302, |
|
"num_input_tokens_seen": 18129536, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2734375, |
|
"loss": 0.06641676276922226, |
|
"loss_ce": 0.0001936157641466707, |
|
"loss_iou": 0.58203125, |
|
"loss_num": 0.01324462890625, |
|
"loss_xval": 0.06640625, |
|
"num_input_tokens_seen": 18129536, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.2760416666666667, |
|
"grad_norm": 6.167808656422766, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0922, |
|
"num_input_tokens_seen": 18302572, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.2760416666666667, |
|
"loss": 0.0900074690580368, |
|
"loss_ce": 0.00028579036006703973, |
|
"loss_iou": 0.8828125, |
|
"loss_num": 0.0179443359375, |
|
"loss_xval": 0.08984375, |
|
"num_input_tokens_seen": 18302572, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.2786458333333333, |
|
"grad_norm": 15.444621610105179, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1253, |
|
"num_input_tokens_seen": 18474752, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.2786458333333333, |
|
"loss": 0.08840958774089813, |
|
"loss_ce": 0.0001527518907096237, |
|
"loss_iou": 0.734375, |
|
"loss_num": 0.0177001953125, |
|
"loss_xval": 0.08837890625, |
|
"num_input_tokens_seen": 18474752, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.28125, |
|
"grad_norm": 9.197225254514525, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1234, |
|
"num_input_tokens_seen": 18647420, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.28125, |
|
"loss": 0.1322825402021408, |
|
"loss_ce": 8.039205567911267e-05, |
|
"loss_iou": 0.81640625, |
|
"loss_num": 0.0264892578125, |
|
"loss_xval": 0.1318359375, |
|
"num_input_tokens_seen": 18647420, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.2838541666666667, |
|
"grad_norm": 4.014816953952452, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1298, |
|
"num_input_tokens_seen": 18820060, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.2838541666666667, |
|
"loss": 0.18935684859752655, |
|
"loss_ce": 8.681887993589044e-05, |
|
"loss_iou": 0.703125, |
|
"loss_num": 0.037841796875, |
|
"loss_xval": 0.189453125, |
|
"num_input_tokens_seen": 18820060, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.2864583333333333, |
|
"grad_norm": 5.301291477011863, |
|
"learning_rate": 5e-06, |
|
"loss": 0.123, |
|
"num_input_tokens_seen": 18992164, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2864583333333333, |
|
"loss": 0.1676991879940033, |
|
"loss_ce": 9.665168181527406e-05, |
|
"loss_iou": 0.671875, |
|
"loss_num": 0.03369140625, |
|
"loss_xval": 0.16796875, |
|
"num_input_tokens_seen": 18992164, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.2890625, |
|
"grad_norm": 3.7618362724585865, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0973, |
|
"num_input_tokens_seen": 19164016, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.2890625, |
|
"loss": 0.05550282821059227, |
|
"loss_ce": 5.23876296938397e-05, |
|
"loss_iou": 0.953125, |
|
"loss_num": 0.0111083984375, |
|
"loss_xval": 0.055419921875, |
|
"num_input_tokens_seen": 19164016, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.2916666666666667, |
|
"grad_norm": 6.877157018975216, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1429, |
|
"num_input_tokens_seen": 19336416, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.2916666666666667, |
|
"loss": 0.21898075938224792, |
|
"loss_ce": 0.00023076393699739128, |
|
"loss_iou": 0.63671875, |
|
"loss_num": 0.043701171875, |
|
"loss_xval": 0.21875, |
|
"num_input_tokens_seen": 19336416, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.2942708333333333, |
|
"grad_norm": 8.699267895879803, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1221, |
|
"num_input_tokens_seen": 19508784, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.2942708333333333, |
|
"loss": 0.11330369114875793, |
|
"loss_ce": 0.00014450862363446504, |
|
"loss_iou": 0.703125, |
|
"loss_num": 0.0225830078125, |
|
"loss_xval": 0.11328125, |
|
"num_input_tokens_seen": 19508784, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.296875, |
|
"grad_norm": 6.679175716055245, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1118, |
|
"num_input_tokens_seen": 19681104, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.296875, |
|
"loss": 0.09517869353294373, |
|
"loss_ce": 8.592366793891415e-05, |
|
"loss_iou": 0.77734375, |
|
"loss_num": 0.01904296875, |
|
"loss_xval": 0.09521484375, |
|
"num_input_tokens_seen": 19681104, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.2994791666666667, |
|
"grad_norm": 18.55418733227958, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1435, |
|
"num_input_tokens_seen": 19853176, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.2994791666666667, |
|
"loss": 0.1039402186870575, |
|
"loss_ce": 5.8387617173139006e-05, |
|
"loss_iou": 0.72265625, |
|
"loss_num": 0.020751953125, |
|
"loss_xval": 0.10400390625, |
|
"num_input_tokens_seen": 19853176, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.3020833333333333, |
|
"grad_norm": 4.232168331373671, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1276, |
|
"num_input_tokens_seen": 20025704, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.3020833333333333, |
|
"loss": 0.08785620331764221, |
|
"loss_ce": 8.76499543664977e-05, |
|
"loss_iou": 0.69140625, |
|
"loss_num": 0.017578125, |
|
"loss_xval": 0.087890625, |
|
"num_input_tokens_seen": 20025704, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.3046875, |
|
"grad_norm": 6.847887859581621, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1147, |
|
"num_input_tokens_seen": 20198488, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.3046875, |
|
"loss": 0.1606612354516983, |
|
"loss_ce": 7.774594996590167e-05, |
|
"loss_iou": 0.859375, |
|
"loss_num": 0.031982421875, |
|
"loss_xval": 0.16015625, |
|
"num_input_tokens_seen": 20198488, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.3072916666666667, |
|
"grad_norm": 4.391317523713796, |
|
"learning_rate": 5e-06, |
|
"loss": 0.12, |
|
"num_input_tokens_seen": 20371684, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.3072916666666667, |
|
"loss": 0.10194739699363708, |
|
"loss_ce": 0.00014075401122681797, |
|
"loss_iou": 0.68359375, |
|
"loss_num": 0.0203857421875, |
|
"loss_xval": 0.1015625, |
|
"num_input_tokens_seen": 20371684, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.3098958333333333, |
|
"grad_norm": 8.958657986306372, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1174, |
|
"num_input_tokens_seen": 20544172, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.3098958333333333, |
|
"loss": 0.1237276941537857, |
|
"loss_ce": 0.0004366845532786101, |
|
"loss_iou": 0.71484375, |
|
"loss_num": 0.024658203125, |
|
"loss_xval": 0.123046875, |
|
"num_input_tokens_seen": 20544172, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"grad_norm": 3.5170839929817417, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1268, |
|
"num_input_tokens_seen": 20717160, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3125, |
|
"loss": 0.15593719482421875, |
|
"loss_ce": 0.00017548247706145048, |
|
"loss_iou": 0.890625, |
|
"loss_num": 0.0311279296875, |
|
"loss_xval": 0.15625, |
|
"num_input_tokens_seen": 20717160, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.3151041666666667, |
|
"grad_norm": 6.739906995090889, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1242, |
|
"num_input_tokens_seen": 20890032, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.3151041666666667, |
|
"loss": 0.1494368314743042, |
|
"loss_ce": 0.0012434859527274966, |
|
"loss_iou": 0.671875, |
|
"loss_num": 0.0296630859375, |
|
"loss_xval": 0.1484375, |
|
"num_input_tokens_seen": 20890032, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.3177083333333333, |
|
"grad_norm": 6.127165000561302, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1151, |
|
"num_input_tokens_seen": 21062984, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.3177083333333333, |
|
"loss": 0.09486885368824005, |
|
"loss_ce": 0.0001422952045686543, |
|
"loss_iou": 0.80859375, |
|
"loss_num": 0.0189208984375, |
|
"loss_xval": 0.0947265625, |
|
"num_input_tokens_seen": 21062984, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.3203125, |
|
"grad_norm": 8.718508748737245, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1031, |
|
"num_input_tokens_seen": 21235792, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.3203125, |
|
"loss": 0.11195512861013412, |
|
"loss_ce": 0.0001387260272167623, |
|
"loss_iou": 0.49609375, |
|
"loss_num": 0.0223388671875, |
|
"loss_xval": 0.11181640625, |
|
"num_input_tokens_seen": 21235792, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.3229166666666667, |
|
"grad_norm": 13.341861393347486, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1039, |
|
"num_input_tokens_seen": 21407888, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.3229166666666667, |
|
"loss": 0.11872819066047668, |
|
"loss_ce": 0.0001673972437856719, |
|
"loss_iou": 0.703125, |
|
"loss_num": 0.023681640625, |
|
"loss_xval": 0.11865234375, |
|
"num_input_tokens_seen": 21407888, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.3255208333333333, |
|
"grad_norm": 10.748431502763593, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1201, |
|
"num_input_tokens_seen": 21580252, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.3255208333333333, |
|
"loss": 0.14679506421089172, |
|
"loss_ce": 6.655443576164544e-05, |
|
"loss_iou": 0.373046875, |
|
"loss_num": 0.029296875, |
|
"loss_xval": 0.146484375, |
|
"num_input_tokens_seen": 21580252, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.328125, |
|
"grad_norm": 9.981967396091962, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1147, |
|
"num_input_tokens_seen": 21753052, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.328125, |
|
"loss": 0.09238539636135101, |
|
"loss_ce": 0.00010023377399193123, |
|
"loss_iou": 0.6328125, |
|
"loss_num": 0.0184326171875, |
|
"loss_xval": 0.09228515625, |
|
"num_input_tokens_seen": 21753052, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.3307291666666667, |
|
"grad_norm": 8.119992313803278, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1072, |
|
"num_input_tokens_seen": 21925632, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.3307291666666667, |
|
"loss": 0.07983443140983582, |
|
"loss_ce": 9.200449858326465e-05, |
|
"loss_iou": 0.8046875, |
|
"loss_num": 0.0159912109375, |
|
"loss_xval": 0.07958984375, |
|
"num_input_tokens_seen": 21925632, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"grad_norm": 6.9850353772680105, |
|
"learning_rate": 5e-06, |
|
"loss": 0.125, |
|
"num_input_tokens_seen": 22098616, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.3333333333333333, |
|
"loss": 0.12042608857154846, |
|
"loss_ce": 0.0001257982075912878, |
|
"loss_iou": 0.65234375, |
|
"loss_num": 0.0240478515625, |
|
"loss_xval": 0.1201171875, |
|
"num_input_tokens_seen": 22098616, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.3359375, |
|
"grad_norm": 3.3562574299779073, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0891, |
|
"num_input_tokens_seen": 22270980, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.3359375, |
|
"loss": 0.07171538472175598, |
|
"loss_ce": 0.0001516598858870566, |
|
"loss_iou": 0.9609375, |
|
"loss_num": 0.01434326171875, |
|
"loss_xval": 0.07177734375, |
|
"num_input_tokens_seen": 22270980, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.3385416666666667, |
|
"grad_norm": 2.474071432452823, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0986, |
|
"num_input_tokens_seen": 22443752, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3385416666666667, |
|
"loss": 0.071955606341362, |
|
"loss_ce": 0.00020878079521935433, |
|
"loss_iou": 0.7890625, |
|
"loss_num": 0.01434326171875, |
|
"loss_xval": 0.07177734375, |
|
"num_input_tokens_seen": 22443752, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.3411458333333333, |
|
"grad_norm": 4.769496774720465, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0827, |
|
"num_input_tokens_seen": 22616684, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.3411458333333333, |
|
"loss": 0.08630406856536865, |
|
"loss_ce": 0.00021397518867161125, |
|
"loss_iou": 0.68359375, |
|
"loss_num": 0.0172119140625, |
|
"loss_xval": 0.0859375, |
|
"num_input_tokens_seen": 22616684, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.34375, |
|
"grad_norm": 14.025079611665472, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0834, |
|
"num_input_tokens_seen": 22789044, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.34375, |
|
"loss": 0.10616521537303925, |
|
"loss_ce": 0.00011663565237540752, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.021240234375, |
|
"loss_xval": 0.10595703125, |
|
"num_input_tokens_seen": 22789044, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.3463541666666667, |
|
"grad_norm": 3.9669475156886946, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1167, |
|
"num_input_tokens_seen": 22962080, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.3463541666666667, |
|
"loss": 0.11822222173213959, |
|
"loss_ce": 0.00014970809570513666, |
|
"loss_iou": 0.7109375, |
|
"loss_num": 0.023681640625, |
|
"loss_xval": 0.1181640625, |
|
"num_input_tokens_seen": 22962080, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.3489583333333333, |
|
"grad_norm": 6.482768707064352, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1163, |
|
"num_input_tokens_seen": 23135120, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.3489583333333333, |
|
"loss": 0.09390418976545334, |
|
"loss_ce": 0.00012366939336061478, |
|
"loss_iou": 0.8125, |
|
"loss_num": 0.018798828125, |
|
"loss_xval": 0.09375, |
|
"num_input_tokens_seen": 23135120, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.3515625, |
|
"grad_norm": 4.75477602454939, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1443, |
|
"num_input_tokens_seen": 23308372, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.3515625, |
|
"loss": 0.1679096817970276, |
|
"loss_ce": 6.299919914454222e-05, |
|
"loss_iou": 0.578125, |
|
"loss_num": 0.033447265625, |
|
"loss_xval": 0.16796875, |
|
"num_input_tokens_seen": 23308372, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.3541666666666667, |
|
"grad_norm": 2.514900037834426, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1001, |
|
"num_input_tokens_seen": 23480360, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.3541666666666667, |
|
"loss": 0.10733547061681747, |
|
"loss_ce": 6.619012128794566e-05, |
|
"loss_iou": 0.6875, |
|
"loss_num": 0.021484375, |
|
"loss_xval": 0.107421875, |
|
"num_input_tokens_seen": 23480360, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.3567708333333333, |
|
"grad_norm": 4.934909863394261, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1083, |
|
"num_input_tokens_seen": 23653652, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.3567708333333333, |
|
"loss": 0.11394178867340088, |
|
"loss_ce": 8.070516923908144e-05, |
|
"loss_iou": 0.8125, |
|
"loss_num": 0.0228271484375, |
|
"loss_xval": 0.11376953125, |
|
"num_input_tokens_seen": 23653652, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.359375, |
|
"grad_norm": 3.707663252931766, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0869, |
|
"num_input_tokens_seen": 23826220, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.359375, |
|
"loss": 0.08403357863426208, |
|
"loss_ce": 0.0001407563831890002, |
|
"loss_iou": 0.8046875, |
|
"loss_num": 0.0167236328125, |
|
"loss_xval": 0.083984375, |
|
"num_input_tokens_seen": 23826220, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.3619791666666667, |
|
"grad_norm": 5.810148215517029, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0944, |
|
"num_input_tokens_seen": 23998808, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.3619791666666667, |
|
"loss": 0.15188120305538177, |
|
"loss_ce": 8.676404104335234e-05, |
|
"loss_iou": 0.66796875, |
|
"loss_num": 0.0303955078125, |
|
"loss_xval": 0.1513671875, |
|
"num_input_tokens_seen": 23998808, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.3645833333333333, |
|
"grad_norm": 5.097709919840357, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1118, |
|
"num_input_tokens_seen": 24171244, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3645833333333333, |
|
"loss": 0.0743027776479721, |
|
"loss_ce": 8.402515959460288e-05, |
|
"loss_iou": 0.5859375, |
|
"loss_num": 0.01483154296875, |
|
"loss_xval": 0.07421875, |
|
"num_input_tokens_seen": 24171244, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.3671875, |
|
"grad_norm": 29.485026694205214, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1345, |
|
"num_input_tokens_seen": 24343728, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.3671875, |
|
"loss": 0.20851582288742065, |
|
"loss_ce": 0.00012653246812988073, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.041748046875, |
|
"loss_xval": 0.2080078125, |
|
"num_input_tokens_seen": 24343728, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.3697916666666667, |
|
"grad_norm": 15.306197535117493, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1169, |
|
"num_input_tokens_seen": 24516776, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.3697916666666667, |
|
"loss": 0.06858328729867935, |
|
"loss_ce": 7.132141035981476e-05, |
|
"loss_iou": 0.6328125, |
|
"loss_num": 0.013671875, |
|
"loss_xval": 0.068359375, |
|
"num_input_tokens_seen": 24516776, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.3723958333333333, |
|
"grad_norm": 7.8570075555495205, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0911, |
|
"num_input_tokens_seen": 24689788, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.3723958333333333, |
|
"loss": 0.10039569437503815, |
|
"loss_ce": 8.441291720373556e-05, |
|
"loss_iou": 0.443359375, |
|
"loss_num": 0.02001953125, |
|
"loss_xval": 0.10009765625, |
|
"num_input_tokens_seen": 24689788, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"grad_norm": 7.50330036811974, |
|
"learning_rate": 5e-06, |
|
"loss": 0.125, |
|
"num_input_tokens_seen": 24862452, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.375, |
|
"loss": 0.06121515482664108, |
|
"loss_ce": 5.792636875412427e-05, |
|
"loss_iou": 0.6875, |
|
"loss_num": 0.01226806640625, |
|
"loss_xval": 0.06103515625, |
|
"num_input_tokens_seen": 24862452, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.3776041666666667, |
|
"grad_norm": 9.259685096230124, |
|
"learning_rate": 5e-06, |
|
"loss": 0.115, |
|
"num_input_tokens_seen": 25035336, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.3776041666666667, |
|
"loss": 0.09985796362161636, |
|
"loss_ce": 0.0002180726150982082, |
|
"loss_iou": 0.79296875, |
|
"loss_num": 0.0198974609375, |
|
"loss_xval": 0.099609375, |
|
"num_input_tokens_seen": 25035336, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.3802083333333333, |
|
"grad_norm": 4.49972816018969, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0953, |
|
"num_input_tokens_seen": 25207968, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.3802083333333333, |
|
"loss": 0.10796058923006058, |
|
"loss_ce": 0.0020951118785887957, |
|
"loss_iou": 0.54296875, |
|
"loss_num": 0.021240234375, |
|
"loss_xval": 0.10595703125, |
|
"num_input_tokens_seen": 25207968, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.3828125, |
|
"grad_norm": 5.73441077024277, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1225, |
|
"num_input_tokens_seen": 25380784, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.3828125, |
|
"loss": 0.09899605065584183, |
|
"loss_ce": 5.806491753901355e-05, |
|
"loss_iou": 0.7578125, |
|
"loss_num": 0.019775390625, |
|
"loss_xval": 0.09912109375, |
|
"num_input_tokens_seen": 25380784, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.3854166666666667, |
|
"grad_norm": 3.6755366051445137, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1046, |
|
"num_input_tokens_seen": 25553764, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.3854166666666667, |
|
"loss": 0.07523618638515472, |
|
"loss_ce": 7.139628723962232e-05, |
|
"loss_iou": 0.71875, |
|
"loss_num": 0.0150146484375, |
|
"loss_xval": 0.0751953125, |
|
"num_input_tokens_seen": 25553764, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.3880208333333333, |
|
"grad_norm": 38.91246982097314, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1093, |
|
"num_input_tokens_seen": 25726456, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.3880208333333333, |
|
"loss": 0.08839882165193558, |
|
"loss_ce": 8.095278462860733e-05, |
|
"loss_iou": 0.80078125, |
|
"loss_num": 0.0177001953125, |
|
"loss_xval": 0.08837890625, |
|
"num_input_tokens_seen": 25726456, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.390625, |
|
"grad_norm": 10.207658282865648, |
|
"learning_rate": 5e-06, |
|
"loss": 0.123, |
|
"num_input_tokens_seen": 25899148, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.390625, |
|
"loss": 0.0730600580573082, |
|
"loss_ce": 0.000214598243474029, |
|
"loss_iou": 0.625, |
|
"loss_num": 0.01458740234375, |
|
"loss_xval": 0.07275390625, |
|
"num_input_tokens_seen": 25899148, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.3932291666666667, |
|
"grad_norm": 4.730292038840616, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0989, |
|
"num_input_tokens_seen": 26072084, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.3932291666666667, |
|
"loss": 0.15038591623306274, |
|
"loss_ce": 0.00011736503802239895, |
|
"loss_iou": 0.578125, |
|
"loss_num": 0.030029296875, |
|
"loss_xval": 0.150390625, |
|
"num_input_tokens_seen": 26072084, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.3958333333333333, |
|
"grad_norm": 3.321333890252999, |
|
"learning_rate": 5e-06, |
|
"loss": 0.103, |
|
"num_input_tokens_seen": 26244756, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3958333333333333, |
|
"loss": 0.08549217134714127, |
|
"loss_ce": 0.0001497594639658928, |
|
"loss_iou": 0.51953125, |
|
"loss_num": 0.01708984375, |
|
"loss_xval": 0.08544921875, |
|
"num_input_tokens_seen": 26244756, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.3984375, |
|
"grad_norm": 6.087065910058266, |
|
"learning_rate": 5e-06, |
|
"loss": 0.08, |
|
"num_input_tokens_seen": 26417208, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.3984375, |
|
"loss": 0.073136106133461, |
|
"loss_ce": 4.6509514504577965e-05, |
|
"loss_iou": 0.58203125, |
|
"loss_num": 0.0146484375, |
|
"loss_xval": 0.0732421875, |
|
"num_input_tokens_seen": 26417208, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.4010416666666667, |
|
"grad_norm": 4.65746127286459, |
|
"learning_rate": 5e-06, |
|
"loss": 0.114, |
|
"num_input_tokens_seen": 26590204, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.4010416666666667, |
|
"loss": 0.10405319184064865, |
|
"loss_ce": 7.980540976859629e-05, |
|
"loss_iou": 0.6953125, |
|
"loss_num": 0.020751953125, |
|
"loss_xval": 0.10400390625, |
|
"num_input_tokens_seen": 26590204, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.4036458333333333, |
|
"grad_norm": 6.888837612325361, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1096, |
|
"num_input_tokens_seen": 26762676, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.4036458333333333, |
|
"loss": 0.11900262534618378, |
|
"loss_ce": 7.562051177956164e-05, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.0238037109375, |
|
"loss_xval": 0.119140625, |
|
"num_input_tokens_seen": 26762676, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.40625, |
|
"grad_norm": 5.172471219817385, |
|
"learning_rate": 5e-06, |
|
"loss": 0.111, |
|
"num_input_tokens_seen": 26934984, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.40625, |
|
"loss": 0.11719199270009995, |
|
"loss_ce": 6.55300755170174e-05, |
|
"loss_iou": 0.64453125, |
|
"loss_num": 0.0234375, |
|
"loss_xval": 0.1171875, |
|
"num_input_tokens_seen": 26934984, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.4088541666666667, |
|
"grad_norm": 4.328240204635411, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1044, |
|
"num_input_tokens_seen": 27106980, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.4088541666666667, |
|
"loss": 0.18060433864593506, |
|
"loss_ce": 0.0002149457432096824, |
|
"loss_iou": 0.5703125, |
|
"loss_num": 0.0361328125, |
|
"loss_xval": 0.1806640625, |
|
"num_input_tokens_seen": 27106980, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.4114583333333333, |
|
"grad_norm": 24.038857971844152, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0807, |
|
"num_input_tokens_seen": 27279788, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.4114583333333333, |
|
"loss": 0.07357801496982574, |
|
"loss_ce": 0.00018323655240237713, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.01470947265625, |
|
"loss_xval": 0.0732421875, |
|
"num_input_tokens_seen": 27279788, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.4140625, |
|
"grad_norm": 7.8628512106902315, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0787, |
|
"num_input_tokens_seen": 27452544, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.4140625, |
|
"loss": 0.07588262856006622, |
|
"loss_ce": 9.22220351640135e-05, |
|
"loss_iou": 0.55859375, |
|
"loss_num": 0.01519775390625, |
|
"loss_xval": 0.07568359375, |
|
"num_input_tokens_seen": 27452544, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"grad_norm": 13.319740473348578, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0969, |
|
"num_input_tokens_seen": 27625396, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4166666666666667, |
|
"loss": 0.09103557467460632, |
|
"loss_ce": 6.267878779908642e-05, |
|
"loss_iou": 0.5546875, |
|
"loss_num": 0.0181884765625, |
|
"loss_xval": 0.0908203125, |
|
"num_input_tokens_seen": 27625396, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.4192708333333333, |
|
"grad_norm": 4.7866046147187715, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1014, |
|
"num_input_tokens_seen": 27797456, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.4192708333333333, |
|
"loss": 0.08366774767637253, |
|
"loss_ce": 8.010101737454534e-05, |
|
"loss_iou": 0.5546875, |
|
"loss_num": 0.0167236328125, |
|
"loss_xval": 0.08349609375, |
|
"num_input_tokens_seen": 27797456, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.421875, |
|
"grad_norm": 2.272455331760193, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0512, |
|
"num_input_tokens_seen": 27969760, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.421875, |
|
"loss": 0.03556237369775772, |
|
"loss_ce": 3.9912010834086686e-05, |
|
"loss_iou": 0.7109375, |
|
"loss_num": 0.007110595703125, |
|
"loss_xval": 0.03564453125, |
|
"num_input_tokens_seen": 27969760, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.4244791666666667, |
|
"grad_norm": 14.623479662016367, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0969, |
|
"num_input_tokens_seen": 28141788, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.4244791666666667, |
|
"loss": 0.06145535781979561, |
|
"loss_ce": 0.0001302829186897725, |
|
"loss_iou": 0.478515625, |
|
"loss_num": 0.01226806640625, |
|
"loss_xval": 0.061279296875, |
|
"num_input_tokens_seen": 28141788, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.4270833333333333, |
|
"grad_norm": 5.18949662678828, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0929, |
|
"num_input_tokens_seen": 28314784, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.4270833333333333, |
|
"loss": 0.09928463399410248, |
|
"loss_ce": 7.19891395419836e-05, |
|
"loss_iou": 0.66015625, |
|
"loss_num": 0.0198974609375, |
|
"loss_xval": 0.09912109375, |
|
"num_input_tokens_seen": 28314784, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.4296875, |
|
"grad_norm": 11.297198645176522, |
|
"learning_rate": 5e-06, |
|
"loss": 0.168, |
|
"num_input_tokens_seen": 28488116, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.4296875, |
|
"loss": 0.2097131311893463, |
|
"loss_ce": 0.00036255159648135304, |
|
"loss_iou": 0.58203125, |
|
"loss_num": 0.0419921875, |
|
"loss_xval": 0.208984375, |
|
"num_input_tokens_seen": 28488116, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.4322916666666667, |
|
"grad_norm": 3.7511749963618604, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1104, |
|
"num_input_tokens_seen": 28660644, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.4322916666666667, |
|
"loss": 0.14858925342559814, |
|
"loss_ce": 0.00012123005581088364, |
|
"loss_iou": 0.72265625, |
|
"loss_num": 0.0296630859375, |
|
"loss_xval": 0.1484375, |
|
"num_input_tokens_seen": 28660644, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.4348958333333333, |
|
"grad_norm": 3.143289835870396, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1021, |
|
"num_input_tokens_seen": 28833256, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.4348958333333333, |
|
"loss": 0.07967463880777359, |
|
"loss_ce": 0.00023738775053061545, |
|
"loss_iou": 0.6796875, |
|
"loss_num": 0.015869140625, |
|
"loss_xval": 0.07958984375, |
|
"num_input_tokens_seen": 28833256, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.4375, |
|
"grad_norm": 2.7797894675264336, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0608, |
|
"num_input_tokens_seen": 29005644, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.4375, |
|
"loss": 0.03842185065150261, |
|
"loss_ce": 9.177176252705976e-05, |
|
"loss_iou": 0.6875, |
|
"loss_num": 0.007659912109375, |
|
"loss_xval": 0.038330078125, |
|
"num_input_tokens_seen": 29005644, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.4401041666666667, |
|
"grad_norm": 5.829730930450416, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0882, |
|
"num_input_tokens_seen": 29178140, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.4401041666666667, |
|
"loss": 0.08472438156604767, |
|
"loss_ce": 0.0001754334516590461, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.0169677734375, |
|
"loss_xval": 0.08447265625, |
|
"num_input_tokens_seen": 29178140, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.4427083333333333, |
|
"grad_norm": 19.34918748164043, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0919, |
|
"num_input_tokens_seen": 29350724, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4427083333333333, |
|
"loss": 0.14068102836608887, |
|
"loss_ce": 0.00017810959252528846, |
|
"loss_iou": 0.68359375, |
|
"loss_num": 0.028076171875, |
|
"loss_xval": 0.140625, |
|
"num_input_tokens_seen": 29350724, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.4453125, |
|
"grad_norm": 11.305442675935751, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0914, |
|
"num_input_tokens_seen": 29523556, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.4453125, |
|
"loss": 0.08168038725852966, |
|
"loss_ce": 0.00010690485214581713, |
|
"loss_iou": 0.4921875, |
|
"loss_num": 0.016357421875, |
|
"loss_xval": 0.08154296875, |
|
"num_input_tokens_seen": 29523556, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.4479166666666667, |
|
"grad_norm": 2.746755888252267, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0956, |
|
"num_input_tokens_seen": 29696232, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.4479166666666667, |
|
"loss": 0.08047676831483841, |
|
"loss_ce": 6.295397179201245e-05, |
|
"loss_iou": 0.40234375, |
|
"loss_num": 0.01611328125, |
|
"loss_xval": 0.08056640625, |
|
"num_input_tokens_seen": 29696232, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.4505208333333333, |
|
"grad_norm": 6.619988685929648, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0578, |
|
"num_input_tokens_seen": 29868892, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.4505208333333333, |
|
"loss": 0.03938157111406326, |
|
"loss_ce": 0.0005632122629322112, |
|
"loss_iou": 0.46875, |
|
"loss_num": 0.00775146484375, |
|
"loss_xval": 0.038818359375, |
|
"num_input_tokens_seen": 29868892, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.453125, |
|
"grad_norm": 11.839215400516537, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1263, |
|
"num_input_tokens_seen": 30041044, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.453125, |
|
"loss": 0.06526083499193192, |
|
"loss_ce": 0.0001820992911234498, |
|
"loss_iou": 0.671875, |
|
"loss_num": 0.01300048828125, |
|
"loss_xval": 0.06494140625, |
|
"num_input_tokens_seen": 30041044, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.4557291666666667, |
|
"grad_norm": 4.532895192393366, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0624, |
|
"num_input_tokens_seen": 30213960, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.4557291666666667, |
|
"loss": 0.05709821730852127, |
|
"loss_ce": 0.00015241916116792709, |
|
"loss_iou": 0.4140625, |
|
"loss_num": 0.0113525390625, |
|
"loss_xval": 0.056884765625, |
|
"num_input_tokens_seen": 30213960, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.4583333333333333, |
|
"grad_norm": 4.373257654750305, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0684, |
|
"num_input_tokens_seen": 30386860, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.4583333333333333, |
|
"loss": 0.048836298286914825, |
|
"loss_ce": 6.920905434526503e-05, |
|
"loss_iou": 0.4296875, |
|
"loss_num": 0.009765625, |
|
"loss_xval": 0.048828125, |
|
"num_input_tokens_seen": 30386860, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.4609375, |
|
"grad_norm": 7.579139401570638, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0843, |
|
"num_input_tokens_seen": 30559656, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.4609375, |
|
"loss": 0.12145084142684937, |
|
"loss_ce": 6.717803626088426e-05, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.0242919921875, |
|
"loss_xval": 0.12158203125, |
|
"num_input_tokens_seen": 30559656, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.4635416666666667, |
|
"grad_norm": 5.807914334628034, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1275, |
|
"num_input_tokens_seen": 30732276, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.4635416666666667, |
|
"loss": 0.10631553828716278, |
|
"loss_ce": 0.00014487920270767063, |
|
"loss_iou": 0.671875, |
|
"loss_num": 0.021240234375, |
|
"loss_xval": 0.10595703125, |
|
"num_input_tokens_seen": 30732276, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.4661458333333333, |
|
"grad_norm": 2.6998654471345827, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0584, |
|
"num_input_tokens_seen": 30905448, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.4661458333333333, |
|
"loss": 0.043617475777864456, |
|
"loss_ce": 0.00012992750271223485, |
|
"loss_iou": 0.67578125, |
|
"loss_num": 0.0086669921875, |
|
"loss_xval": 0.04345703125, |
|
"num_input_tokens_seen": 30905448, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.46875, |
|
"grad_norm": 10.092481931653555, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0841, |
|
"num_input_tokens_seen": 31078192, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.46875, |
|
"loss": 0.07613378763198853, |
|
"loss_ce": 0.00038915983168408275, |
|
"loss_iou": 0.439453125, |
|
"loss_num": 0.01513671875, |
|
"loss_xval": 0.07568359375, |
|
"num_input_tokens_seen": 31078192, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.4713541666666667, |
|
"grad_norm": 4.850400427659922, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1259, |
|
"num_input_tokens_seen": 31250936, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.4713541666666667, |
|
"loss": 0.06517961621284485, |
|
"loss_ce": 5.5098360462579876e-05, |
|
"loss_iou": 0.703125, |
|
"loss_num": 0.01300048828125, |
|
"loss_xval": 0.06494140625, |
|
"num_input_tokens_seen": 31250936, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.4739583333333333, |
|
"grad_norm": 8.116429898780023, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0679, |
|
"num_input_tokens_seen": 31423824, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.4739583333333333, |
|
"loss": 0.04832879453897476, |
|
"loss_ce": 0.00014153837400954217, |
|
"loss_iou": 0.67578125, |
|
"loss_num": 0.0096435546875, |
|
"loss_xval": 0.048095703125, |
|
"num_input_tokens_seen": 31423824, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.4765625, |
|
"grad_norm": 15.778873010591404, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0796, |
|
"num_input_tokens_seen": 31596028, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.4765625, |
|
"loss": 0.058887895196676254, |
|
"loss_ce": 9.577826858730987e-05, |
|
"loss_iou": 0.5703125, |
|
"loss_num": 0.01177978515625, |
|
"loss_xval": 0.058837890625, |
|
"num_input_tokens_seen": 31596028, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.4791666666666667, |
|
"grad_norm": 4.58612996328364, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1133, |
|
"num_input_tokens_seen": 31768480, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.4791666666666667, |
|
"loss": 0.07175838947296143, |
|
"loss_ce": 7.259925041580573e-05, |
|
"loss_iou": 0.625, |
|
"loss_num": 0.01434326171875, |
|
"loss_xval": 0.07177734375, |
|
"num_input_tokens_seen": 31768480, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.4817708333333333, |
|
"grad_norm": 15.298267591347829, |
|
"learning_rate": 5e-06, |
|
"loss": 0.137, |
|
"num_input_tokens_seen": 31941340, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.4817708333333333, |
|
"loss": 0.1580718755722046, |
|
"loss_ce": 0.00023497387883253396, |
|
"loss_iou": 0.6484375, |
|
"loss_num": 0.031494140625, |
|
"loss_xval": 0.158203125, |
|
"num_input_tokens_seen": 31941340, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.484375, |
|
"grad_norm": 9.445985569352896, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1414, |
|
"num_input_tokens_seen": 32114196, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.484375, |
|
"loss": 0.1261276751756668, |
|
"loss_ce": 0.00012059589062118903, |
|
"loss_iou": 0.68359375, |
|
"loss_num": 0.025146484375, |
|
"loss_xval": 0.1259765625, |
|
"num_input_tokens_seen": 32114196, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.4869791666666667, |
|
"grad_norm": 4.074608010814493, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1168, |
|
"num_input_tokens_seen": 32286624, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.4869791666666667, |
|
"loss": 0.08998198807239532, |
|
"loss_ce": 0.0001382330956403166, |
|
"loss_iou": 0.6328125, |
|
"loss_num": 0.0179443359375, |
|
"loss_xval": 0.08984375, |
|
"num_input_tokens_seen": 32286624, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.4895833333333333, |
|
"grad_norm": 3.9575106116123293, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1015, |
|
"num_input_tokens_seen": 32459076, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.4895833333333333, |
|
"loss": 0.10690590739250183, |
|
"loss_ce": 0.00015541848551947623, |
|
"loss_iou": 0.6328125, |
|
"loss_num": 0.0213623046875, |
|
"loss_xval": 0.10693359375, |
|
"num_input_tokens_seen": 32459076, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.4921875, |
|
"grad_norm": 3.7334350922271793, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0944, |
|
"num_input_tokens_seen": 32631908, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.4921875, |
|
"loss": 0.12938711047172546, |
|
"loss_ce": 8.413316390942782e-05, |
|
"loss_iou": 0.5, |
|
"loss_num": 0.02587890625, |
|
"loss_xval": 0.12890625, |
|
"num_input_tokens_seen": 32631908, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.4947916666666667, |
|
"grad_norm": 12.613411687656823, |
|
"learning_rate": 5e-06, |
|
"loss": 0.089, |
|
"num_input_tokens_seen": 32804848, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4947916666666667, |
|
"loss": 0.17628361284732819, |
|
"loss_ce": 0.00019717792747542262, |
|
"loss_iou": 0.56640625, |
|
"loss_num": 0.03515625, |
|
"loss_xval": 0.17578125, |
|
"num_input_tokens_seen": 32804848, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.4973958333333333, |
|
"grad_norm": 10.089904229108118, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0984, |
|
"num_input_tokens_seen": 32977460, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.4973958333333333, |
|
"loss": 0.09072966128587723, |
|
"loss_ce": 0.00012297437933739275, |
|
"loss_iou": 0.6796875, |
|
"loss_num": 0.0181884765625, |
|
"loss_xval": 0.0908203125, |
|
"num_input_tokens_seen": 32977460, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"grad_norm": 5.567747432819187, |
|
"learning_rate": 5e-06, |
|
"loss": 0.107, |
|
"num_input_tokens_seen": 33150480, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.5, |
|
"loss": 0.08254844695329666, |
|
"loss_ce": 8.995212556328624e-05, |
|
"loss_iou": 0.68359375, |
|
"loss_num": 0.0164794921875, |
|
"loss_xval": 0.08251953125, |
|
"num_input_tokens_seen": 33150480, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.5026041666666666, |
|
"grad_norm": 5.606336333733017, |
|
"learning_rate": 5e-06, |
|
"loss": 0.092, |
|
"num_input_tokens_seen": 33322812, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.5026041666666666, |
|
"loss": 0.12639451026916504, |
|
"loss_ce": 8.224871271522716e-05, |
|
"loss_iou": 0.416015625, |
|
"loss_num": 0.0252685546875, |
|
"loss_xval": 0.1259765625, |
|
"num_input_tokens_seen": 33322812, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.5052083333333334, |
|
"grad_norm": 10.892578547201238, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0985, |
|
"num_input_tokens_seen": 33494972, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.5052083333333334, |
|
"loss": 0.04478445649147034, |
|
"loss_ce": 9.146681259153411e-05, |
|
"loss_iou": 0.50390625, |
|
"loss_num": 0.0089111328125, |
|
"loss_xval": 0.044677734375, |
|
"num_input_tokens_seen": 33494972, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.5078125, |
|
"grad_norm": 6.379235584994378, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0909, |
|
"num_input_tokens_seen": 33667632, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.5078125, |
|
"loss": 0.08593515306711197, |
|
"loss_ce": 8.921044354792684e-05, |
|
"loss_iou": 0.56640625, |
|
"loss_num": 0.0172119140625, |
|
"loss_xval": 0.0859375, |
|
"num_input_tokens_seen": 33667632, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.5104166666666666, |
|
"grad_norm": 9.027964931503206, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1088, |
|
"num_input_tokens_seen": 33840020, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.5104166666666666, |
|
"loss": 0.07991337776184082, |
|
"loss_ce": 0.00010991313320118934, |
|
"loss_iou": 0.7265625, |
|
"loss_num": 0.0159912109375, |
|
"loss_xval": 0.07958984375, |
|
"num_input_tokens_seen": 33840020, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.5130208333333334, |
|
"grad_norm": 7.170409659790098, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1238, |
|
"num_input_tokens_seen": 34013036, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.5130208333333334, |
|
"loss": 0.18947342038154602, |
|
"loss_ce": 0.00014236349670682102, |
|
"loss_iou": 0.60546875, |
|
"loss_num": 0.037841796875, |
|
"loss_xval": 0.189453125, |
|
"num_input_tokens_seen": 34013036, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.515625, |
|
"grad_norm": 4.032339612187944, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0872, |
|
"num_input_tokens_seen": 34186220, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.515625, |
|
"loss": 0.07599274069070816, |
|
"loss_ce": 6.500923336716369e-05, |
|
"loss_iou": 0.5625, |
|
"loss_num": 0.01519775390625, |
|
"loss_xval": 0.076171875, |
|
"num_input_tokens_seen": 34186220, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.5182291666666666, |
|
"grad_norm": 4.904239326276205, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0764, |
|
"num_input_tokens_seen": 34359052, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.5182291666666666, |
|
"loss": 0.08559094369411469, |
|
"loss_ce": 0.0001112048194045201, |
|
"loss_iou": 0.59765625, |
|
"loss_num": 0.01708984375, |
|
"loss_xval": 0.08544921875, |
|
"num_input_tokens_seen": 34359052, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.5208333333333334, |
|
"grad_norm": 6.516342930606259, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0773, |
|
"num_input_tokens_seen": 34531672, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5208333333333334, |
|
"loss": 0.05880989879369736, |
|
"loss_ce": 0.00010933385055977851, |
|
"loss_iou": 0.66796875, |
|
"loss_num": 0.01171875, |
|
"loss_xval": 0.05859375, |
|
"num_input_tokens_seen": 34531672, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.5234375, |
|
"grad_norm": 3.361383386602773, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0848, |
|
"num_input_tokens_seen": 34704136, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.5234375, |
|
"loss": 0.05374922603368759, |
|
"loss_ce": 6.880733417347074e-05, |
|
"loss_iou": 0.484375, |
|
"loss_num": 0.0107421875, |
|
"loss_xval": 0.0537109375, |
|
"num_input_tokens_seen": 34704136, |
|
"step": 201 |
|
}, |
|
{ |
|
"epoch": 0.5260416666666666, |
|
"grad_norm": 11.210671135103166, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1067, |
|
"num_input_tokens_seen": 34877364, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.5260416666666666, |
|
"loss": 0.09961295872926712, |
|
"loss_ce": 6.461787415901199e-05, |
|
"loss_iou": 0.7109375, |
|
"loss_num": 0.0198974609375, |
|
"loss_xval": 0.099609375, |
|
"num_input_tokens_seen": 34877364, |
|
"step": 202 |
|
}, |
|
{ |
|
"epoch": 0.5286458333333334, |
|
"grad_norm": 6.444880899253943, |
|
"learning_rate": 5e-06, |
|
"loss": 0.111, |
|
"num_input_tokens_seen": 35050192, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.5286458333333334, |
|
"loss": 0.09913990646600723, |
|
"loss_ce": 7.98513792688027e-05, |
|
"loss_iou": 0.5234375, |
|
"loss_num": 0.019775390625, |
|
"loss_xval": 0.09912109375, |
|
"num_input_tokens_seen": 35050192, |
|
"step": 203 |
|
}, |
|
{ |
|
"epoch": 0.53125, |
|
"grad_norm": 3.8614428868304533, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1063, |
|
"num_input_tokens_seen": 35223020, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.53125, |
|
"loss": 0.06953012943267822, |
|
"loss_ce": 7.212607306428254e-05, |
|
"loss_iou": 0.7421875, |
|
"loss_num": 0.013916015625, |
|
"loss_xval": 0.0693359375, |
|
"num_input_tokens_seen": 35223020, |
|
"step": 204 |
|
}, |
|
{ |
|
"epoch": 0.5338541666666666, |
|
"grad_norm": 6.191654916504458, |
|
"learning_rate": 5e-06, |
|
"loss": 0.091, |
|
"num_input_tokens_seen": 35396176, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.5338541666666666, |
|
"loss": 0.15430204570293427, |
|
"loss_ce": 0.00027982849860563874, |
|
"loss_iou": 0.49609375, |
|
"loss_num": 0.03076171875, |
|
"loss_xval": 0.154296875, |
|
"num_input_tokens_seen": 35396176, |
|
"step": 205 |
|
}, |
|
{ |
|
"epoch": 0.5364583333333334, |
|
"grad_norm": 5.468880474808822, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0667, |
|
"num_input_tokens_seen": 35568912, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.5364583333333334, |
|
"loss": 0.06578241288661957, |
|
"loss_ce": 6.280931120272726e-05, |
|
"loss_iou": 0.55078125, |
|
"loss_num": 0.01318359375, |
|
"loss_xval": 0.06591796875, |
|
"num_input_tokens_seen": 35568912, |
|
"step": 206 |
|
}, |
|
{ |
|
"epoch": 0.5390625, |
|
"grad_norm": 5.886325106674437, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1381, |
|
"num_input_tokens_seen": 35741540, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.5390625, |
|
"loss": 0.11603943258523941, |
|
"loss_ce": 0.00010315363761037588, |
|
"loss_iou": 0.765625, |
|
"loss_num": 0.023193359375, |
|
"loss_xval": 0.11572265625, |
|
"num_input_tokens_seen": 35741540, |
|
"step": 207 |
|
}, |
|
{ |
|
"epoch": 0.5416666666666666, |
|
"grad_norm": 4.502393531758672, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0893, |
|
"num_input_tokens_seen": 35914024, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.5416666666666666, |
|
"loss": 0.08899325132369995, |
|
"loss_ce": 6.502882024506107e-05, |
|
"loss_iou": 0.75, |
|
"loss_num": 0.017822265625, |
|
"loss_xval": 0.0888671875, |
|
"num_input_tokens_seen": 35914024, |
|
"step": 208 |
|
}, |
|
{ |
|
"epoch": 0.5442708333333334, |
|
"grad_norm": 10.086026290203142, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1033, |
|
"num_input_tokens_seen": 36087084, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.5442708333333334, |
|
"loss": 0.060362037271261215, |
|
"loss_ce": 0.00013559818034991622, |
|
"loss_iou": 0.51953125, |
|
"loss_num": 0.01202392578125, |
|
"loss_xval": 0.060302734375, |
|
"num_input_tokens_seen": 36087084, |
|
"step": 209 |
|
}, |
|
{ |
|
"epoch": 0.546875, |
|
"grad_norm": 6.731766943850301, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0989, |
|
"num_input_tokens_seen": 36259864, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.546875, |
|
"loss": 0.04847151041030884, |
|
"loss_ce": 0.00013166893040761352, |
|
"loss_iou": 0.53515625, |
|
"loss_num": 0.0096435546875, |
|
"loss_xval": 0.04833984375, |
|
"num_input_tokens_seen": 36259864, |
|
"step": 210 |
|
}, |
|
{ |
|
"epoch": 0.5494791666666666, |
|
"grad_norm": 6.316474875770928, |
|
"learning_rate": 5e-06, |
|
"loss": 0.087, |
|
"num_input_tokens_seen": 36433104, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.5494791666666666, |
|
"loss": 0.09729330986738205, |
|
"loss_ce": 0.0003694796178024262, |
|
"loss_iou": 0.6015625, |
|
"loss_num": 0.0194091796875, |
|
"loss_xval": 0.0966796875, |
|
"num_input_tokens_seen": 36433104, |
|
"step": 211 |
|
}, |
|
{ |
|
"epoch": 0.5520833333333334, |
|
"grad_norm": 8.68013938900971, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1649, |
|
"num_input_tokens_seen": 36605948, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.5520833333333334, |
|
"loss": 0.10549305379390717, |
|
"loss_ce": 5.4825890401843935e-05, |
|
"loss_iou": 0.48046875, |
|
"loss_num": 0.0211181640625, |
|
"loss_xval": 0.10546875, |
|
"num_input_tokens_seen": 36605948, |
|
"step": 212 |
|
}, |
|
{ |
|
"epoch": 0.5546875, |
|
"grad_norm": 2.9587466587848543, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0597, |
|
"num_input_tokens_seen": 36778360, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.5546875, |
|
"loss": 0.054243359714746475, |
|
"loss_ce": 0.00010517801274545491, |
|
"loss_iou": 0.69921875, |
|
"loss_num": 0.01080322265625, |
|
"loss_xval": 0.05419921875, |
|
"num_input_tokens_seen": 36778360, |
|
"step": 213 |
|
}, |
|
{ |
|
"epoch": 0.5572916666666666, |
|
"grad_norm": 3.540440347425946, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0541, |
|
"num_input_tokens_seen": 36950340, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.5572916666666666, |
|
"loss": 0.044868774712085724, |
|
"loss_ce": 0.0001605255965841934, |
|
"loss_iou": 0.5234375, |
|
"loss_num": 0.0089111328125, |
|
"loss_xval": 0.044677734375, |
|
"num_input_tokens_seen": 36950340, |
|
"step": 214 |
|
}, |
|
{ |
|
"epoch": 0.5598958333333334, |
|
"grad_norm": 1.7960907214462793, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0558, |
|
"num_input_tokens_seen": 37123392, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.5598958333333334, |
|
"loss": 0.03447698801755905, |
|
"loss_ce": 0.00014471304893959314, |
|
"loss_iou": 0.5, |
|
"loss_num": 0.006866455078125, |
|
"loss_xval": 0.034423828125, |
|
"num_input_tokens_seen": 37123392, |
|
"step": 215 |
|
}, |
|
{ |
|
"epoch": 0.5625, |
|
"grad_norm": 4.431604970837842, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0965, |
|
"num_input_tokens_seen": 37295368, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.5625, |
|
"loss": 0.1555291712284088, |
|
"loss_ce": 8.788481500232592e-05, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.0311279296875, |
|
"loss_xval": 0.1552734375, |
|
"num_input_tokens_seen": 37295368, |
|
"step": 216 |
|
}, |
|
{ |
|
"epoch": 0.5651041666666666, |
|
"grad_norm": 8.013606775608135, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1017, |
|
"num_input_tokens_seen": 37467908, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.5651041666666666, |
|
"loss": 0.12359996885061264, |
|
"loss_ce": 0.00015636239550076425, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.024658203125, |
|
"loss_xval": 0.12353515625, |
|
"num_input_tokens_seen": 37467908, |
|
"step": 217 |
|
}, |
|
{ |
|
"epoch": 0.5677083333333334, |
|
"grad_norm": 9.000183276004282, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0964, |
|
"num_input_tokens_seen": 37640328, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.5677083333333334, |
|
"loss": 0.09344692528247833, |
|
"loss_ce": 6.314014899544418e-05, |
|
"loss_iou": 0.609375, |
|
"loss_num": 0.0186767578125, |
|
"loss_xval": 0.09326171875, |
|
"num_input_tokens_seen": 37640328, |
|
"step": 218 |
|
}, |
|
{ |
|
"epoch": 0.5703125, |
|
"grad_norm": 28.397075300946053, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1042, |
|
"num_input_tokens_seen": 37812984, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.5703125, |
|
"loss": 0.11136841773986816, |
|
"loss_ce": 7.081658986862749e-05, |
|
"loss_iou": 0.734375, |
|
"loss_num": 0.022216796875, |
|
"loss_xval": 0.111328125, |
|
"num_input_tokens_seen": 37812984, |
|
"step": 219 |
|
}, |
|
{ |
|
"epoch": 0.5729166666666666, |
|
"grad_norm": 3.6482189206456126, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0685, |
|
"num_input_tokens_seen": 37985152, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5729166666666666, |
|
"loss": 0.05360790342092514, |
|
"loss_ce": 6.481433956651017e-05, |
|
"loss_iou": 0.5859375, |
|
"loss_num": 0.0107421875, |
|
"loss_xval": 0.053466796875, |
|
"num_input_tokens_seen": 37985152, |
|
"step": 220 |
|
}, |
|
{ |
|
"epoch": 0.5755208333333334, |
|
"grad_norm": 24.217399076672056, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0891, |
|
"num_input_tokens_seen": 38157616, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.5755208333333334, |
|
"loss": 0.11579165607690811, |
|
"loss_ce": 0.0005420194938778877, |
|
"loss_iou": 0.3984375, |
|
"loss_num": 0.0230712890625, |
|
"loss_xval": 0.115234375, |
|
"num_input_tokens_seen": 38157616, |
|
"step": 221 |
|
}, |
|
{ |
|
"epoch": 0.578125, |
|
"grad_norm": 3.7151820904220063, |
|
"learning_rate": 5e-06, |
|
"loss": 0.057, |
|
"num_input_tokens_seen": 38330496, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.578125, |
|
"loss": 0.04516543075442314, |
|
"loss_ce": 9.09663358470425e-05, |
|
"loss_iou": 0.515625, |
|
"loss_num": 0.009033203125, |
|
"loss_xval": 0.045166015625, |
|
"num_input_tokens_seen": 38330496, |
|
"step": 222 |
|
}, |
|
{ |
|
"epoch": 0.5807291666666666, |
|
"grad_norm": 15.97315866564612, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1208, |
|
"num_input_tokens_seen": 38503204, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.5807291666666666, |
|
"loss": 0.11001908779144287, |
|
"loss_ce": 6.425123137887567e-05, |
|
"loss_iou": 0.6484375, |
|
"loss_num": 0.02197265625, |
|
"loss_xval": 0.10986328125, |
|
"num_input_tokens_seen": 38503204, |
|
"step": 223 |
|
}, |
|
{ |
|
"epoch": 0.5833333333333334, |
|
"grad_norm": 15.54736656112959, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0667, |
|
"num_input_tokens_seen": 38675744, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.5833333333333334, |
|
"loss": 0.11892453581094742, |
|
"loss_ce": 0.00011960987467318773, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.0238037109375, |
|
"loss_xval": 0.11865234375, |
|
"num_input_tokens_seen": 38675744, |
|
"step": 224 |
|
}, |
|
{ |
|
"epoch": 0.5859375, |
|
"grad_norm": 8.708565681630517, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0858, |
|
"num_input_tokens_seen": 38848284, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5859375, |
|
"loss": 0.06076966971158981, |
|
"loss_ce": 0.000436413218267262, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.0120849609375, |
|
"loss_xval": 0.060302734375, |
|
"num_input_tokens_seen": 38848284, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.5885416666666666, |
|
"grad_norm": 11.665207638748996, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1059, |
|
"num_input_tokens_seen": 39021192, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.5885416666666666, |
|
"loss": 0.08464138209819794, |
|
"loss_ce": 0.00010768979700515047, |
|
"loss_iou": 0.57421875, |
|
"loss_num": 0.016845703125, |
|
"loss_xval": 0.08447265625, |
|
"num_input_tokens_seen": 39021192, |
|
"step": 226 |
|
}, |
|
{ |
|
"epoch": 0.5911458333333334, |
|
"grad_norm": 6.122755854158079, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0916, |
|
"num_input_tokens_seen": 39194408, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.5911458333333334, |
|
"loss": 0.11753110587596893, |
|
"loss_ce": 6.894973921589553e-05, |
|
"loss_iou": 0.6796875, |
|
"loss_num": 0.0234375, |
|
"loss_xval": 0.11767578125, |
|
"num_input_tokens_seen": 39194408, |
|
"step": 227 |
|
}, |
|
{ |
|
"epoch": 0.59375, |
|
"grad_norm": 25.91736548090707, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0936, |
|
"num_input_tokens_seen": 39366972, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.59375, |
|
"loss": 0.09147345274686813, |
|
"loss_ce": 7.33033666620031e-05, |
|
"loss_iou": 0.578125, |
|
"loss_num": 0.018310546875, |
|
"loss_xval": 0.09130859375, |
|
"num_input_tokens_seen": 39366972, |
|
"step": 228 |
|
}, |
|
{ |
|
"epoch": 0.5963541666666666, |
|
"grad_norm": 22.31114946018542, |
|
"learning_rate": 5e-06, |
|
"loss": 0.094, |
|
"num_input_tokens_seen": 39539944, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.5963541666666666, |
|
"loss": 0.11777202785015106, |
|
"loss_ce": 3.521383769111708e-05, |
|
"loss_iou": 0.71484375, |
|
"loss_num": 0.0235595703125, |
|
"loss_xval": 0.11767578125, |
|
"num_input_tokens_seen": 39539944, |
|
"step": 229 |
|
}, |
|
{ |
|
"epoch": 0.5989583333333334, |
|
"grad_norm": 4.025666229457589, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0886, |
|
"num_input_tokens_seen": 39712932, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.5989583333333334, |
|
"loss": 0.1201152354478836, |
|
"loss_ce": 0.00015063578030094504, |
|
"loss_iou": 0.6796875, |
|
"loss_num": 0.02392578125, |
|
"loss_xval": 0.1201171875, |
|
"num_input_tokens_seen": 39712932, |
|
"step": 230 |
|
}, |
|
{ |
|
"epoch": 0.6015625, |
|
"grad_norm": 3.7609078788021097, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0683, |
|
"num_input_tokens_seen": 39885616, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.6015625, |
|
"loss": 0.07680265605449677, |
|
"loss_ce": 0.0001730183430481702, |
|
"loss_iou": 0.5859375, |
|
"loss_num": 0.01531982421875, |
|
"loss_xval": 0.07666015625, |
|
"num_input_tokens_seen": 39885616, |
|
"step": 231 |
|
}, |
|
{ |
|
"epoch": 0.6041666666666666, |
|
"grad_norm": 5.989352644027437, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0674, |
|
"num_input_tokens_seen": 40057968, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.6041666666666666, |
|
"loss": 0.1005856841802597, |
|
"loss_ce": 0.00013706949539482594, |
|
"loss_iou": 0.462890625, |
|
"loss_num": 0.02001953125, |
|
"loss_xval": 0.1005859375, |
|
"num_input_tokens_seen": 40057968, |
|
"step": 232 |
|
}, |
|
{ |
|
"epoch": 0.6067708333333334, |
|
"grad_norm": 4.762149494132162, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0698, |
|
"num_input_tokens_seen": 40230848, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.6067708333333334, |
|
"loss": 0.10376375913619995, |
|
"loss_ce": 3.4517663152655587e-05, |
|
"loss_iou": 0.63671875, |
|
"loss_num": 0.020751953125, |
|
"loss_xval": 0.103515625, |
|
"num_input_tokens_seen": 40230848, |
|
"step": 233 |
|
}, |
|
{ |
|
"epoch": 0.609375, |
|
"grad_norm": 5.409386698496161, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1021, |
|
"num_input_tokens_seen": 40403276, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.609375, |
|
"loss": 0.06936685740947723, |
|
"loss_ce": 0.0003818793629761785, |
|
"loss_iou": 0.482421875, |
|
"loss_num": 0.0137939453125, |
|
"loss_xval": 0.06884765625, |
|
"num_input_tokens_seen": 40403276, |
|
"step": 234 |
|
}, |
|
{ |
|
"epoch": 0.6119791666666666, |
|
"grad_norm": 10.974609444669646, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1001, |
|
"num_input_tokens_seen": 40576292, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.6119791666666666, |
|
"loss": 0.11177849024534225, |
|
"loss_ce": 0.00026726460782811046, |
|
"loss_iou": 0.61328125, |
|
"loss_num": 0.0223388671875, |
|
"loss_xval": 0.111328125, |
|
"num_input_tokens_seen": 40576292, |
|
"step": 235 |
|
}, |
|
{ |
|
"epoch": 0.6145833333333334, |
|
"grad_norm": 3.802157730607013, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0768, |
|
"num_input_tokens_seen": 40749076, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.6145833333333334, |
|
"loss": 0.07335153222084045, |
|
"loss_ce": 0.000506069976836443, |
|
"loss_iou": 0.6171875, |
|
"loss_num": 0.0145263671875, |
|
"loss_xval": 0.07275390625, |
|
"num_input_tokens_seen": 40749076, |
|
"step": 236 |
|
}, |
|
{ |
|
"epoch": 0.6171875, |
|
"grad_norm": 3.5754950924222406, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0497, |
|
"num_input_tokens_seen": 40922288, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.6171875, |
|
"loss": 0.02886682003736496, |
|
"loss_ce": 0.00014977881801314652, |
|
"loss_iou": 0.55078125, |
|
"loss_num": 0.0057373046875, |
|
"loss_xval": 0.0286865234375, |
|
"num_input_tokens_seen": 40922288, |
|
"step": 237 |
|
}, |
|
{ |
|
"epoch": 0.6197916666666666, |
|
"grad_norm": 4.288040219675324, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0508, |
|
"num_input_tokens_seen": 41094828, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.6197916666666666, |
|
"loss": 0.05992227792739868, |
|
"loss_ce": 0.0003519634483382106, |
|
"loss_iou": 0.640625, |
|
"loss_num": 0.01190185546875, |
|
"loss_xval": 0.0595703125, |
|
"num_input_tokens_seen": 41094828, |
|
"step": 238 |
|
}, |
|
{ |
|
"epoch": 0.6223958333333334, |
|
"grad_norm": 6.504525689859585, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0925, |
|
"num_input_tokens_seen": 41267332, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.6223958333333334, |
|
"loss": 0.06373357772827148, |
|
"loss_ce": 4.338783037383109e-05, |
|
"loss_iou": 0.6875, |
|
"loss_num": 0.01275634765625, |
|
"loss_xval": 0.0634765625, |
|
"num_input_tokens_seen": 41267332, |
|
"step": 239 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"grad_norm": 5.068763378329545, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0829, |
|
"num_input_tokens_seen": 41439728, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.625, |
|
"loss": 0.08366407454013824, |
|
"loss_ce": 0.00022901550983078778, |
|
"loss_iou": 0.578125, |
|
"loss_num": 0.0167236328125, |
|
"loss_xval": 0.08349609375, |
|
"num_input_tokens_seen": 41439728, |
|
"step": 240 |
|
}, |
|
{ |
|
"epoch": 0.6276041666666666, |
|
"grad_norm": 9.15531863667315, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0909, |
|
"num_input_tokens_seen": 41612180, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.6276041666666666, |
|
"loss": 0.11120368540287018, |
|
"loss_ce": 5.867354047950357e-05, |
|
"loss_iou": 0.478515625, |
|
"loss_num": 0.022216796875, |
|
"loss_xval": 0.111328125, |
|
"num_input_tokens_seen": 41612180, |
|
"step": 241 |
|
}, |
|
{ |
|
"epoch": 0.6302083333333334, |
|
"grad_norm": 2.0214181878511566, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0741, |
|
"num_input_tokens_seen": 41784848, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.6302083333333334, |
|
"loss": 0.05301050841808319, |
|
"loss_ce": 9.30292735574767e-05, |
|
"loss_iou": 0.51171875, |
|
"loss_num": 0.0106201171875, |
|
"loss_xval": 0.052978515625, |
|
"num_input_tokens_seen": 41784848, |
|
"step": 242 |
|
}, |
|
{ |
|
"epoch": 0.6328125, |
|
"grad_norm": 4.1167075841800385, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0462, |
|
"num_input_tokens_seen": 41957024, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.6328125, |
|
"loss": 0.05533324182033539, |
|
"loss_ce": 8.116720709949732e-05, |
|
"loss_iou": 0.498046875, |
|
"loss_num": 0.01104736328125, |
|
"loss_xval": 0.05517578125, |
|
"num_input_tokens_seen": 41957024, |
|
"step": 243 |
|
}, |
|
{ |
|
"epoch": 0.6354166666666666, |
|
"grad_norm": 12.037461080324686, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1285, |
|
"num_input_tokens_seen": 42129920, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.6354166666666666, |
|
"loss": 0.07943513244390488, |
|
"loss_ce": 8.94309050636366e-05, |
|
"loss_iou": 0.7265625, |
|
"loss_num": 0.015869140625, |
|
"loss_xval": 0.0791015625, |
|
"num_input_tokens_seen": 42129920, |
|
"step": 244 |
|
}, |
|
{ |
|
"epoch": 0.6380208333333334, |
|
"grad_norm": 6.295206206189768, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0889, |
|
"num_input_tokens_seen": 42302912, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.6380208333333334, |
|
"loss": 0.09156939387321472, |
|
"loss_ce": 4.717556657851674e-05, |
|
"loss_iou": 0.5234375, |
|
"loss_num": 0.018310546875, |
|
"loss_xval": 0.09130859375, |
|
"num_input_tokens_seen": 42302912, |
|
"step": 245 |
|
}, |
|
{ |
|
"epoch": 0.640625, |
|
"grad_norm": 25.409557942414935, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0661, |
|
"num_input_tokens_seen": 42475584, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.640625, |
|
"loss": 0.097844198346138, |
|
"loss_ce": 6.587710231542587e-05, |
|
"loss_iou": 0.431640625, |
|
"loss_num": 0.01953125, |
|
"loss_xval": 0.09765625, |
|
"num_input_tokens_seen": 42475584, |
|
"step": 246 |
|
}, |
|
{ |
|
"epoch": 0.6432291666666666, |
|
"grad_norm": 4.450370043022936, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0697, |
|
"num_input_tokens_seen": 42647808, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.6432291666666666, |
|
"loss": 0.1061711385846138, |
|
"loss_ce": 0.00015306829300243407, |
|
"loss_iou": 0.5859375, |
|
"loss_num": 0.021240234375, |
|
"loss_xval": 0.10595703125, |
|
"num_input_tokens_seen": 42647808, |
|
"step": 247 |
|
}, |
|
{ |
|
"epoch": 0.6458333333333334, |
|
"grad_norm": 4.116581907360989, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0858, |
|
"num_input_tokens_seen": 42820508, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.6458333333333334, |
|
"loss": 0.07190299779176712, |
|
"loss_ce": 7.987646677065641e-05, |
|
"loss_iou": 0.5546875, |
|
"loss_num": 0.01434326171875, |
|
"loss_xval": 0.07177734375, |
|
"num_input_tokens_seen": 42820508, |
|
"step": 248 |
|
}, |
|
{ |
|
"epoch": 0.6484375, |
|
"grad_norm": 4.147716000593212, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0854, |
|
"num_input_tokens_seen": 42992784, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.6484375, |
|
"loss": 0.05028773471713066, |
|
"loss_ce": 5.58009123778902e-05, |
|
"loss_iou": 0.6640625, |
|
"loss_num": 0.01007080078125, |
|
"loss_xval": 0.05029296875, |
|
"num_input_tokens_seen": 42992784, |
|
"step": 249 |
|
}, |
|
{ |
|
"epoch": 0.6510416666666666, |
|
"grad_norm": 16.495817541741257, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0747, |
|
"num_input_tokens_seen": 43165896, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6510416666666666, |
|
"eval_seeclick_CIoU": 0.4124833643436432, |
|
"eval_seeclick_GIoU": 0.41358618438243866, |
|
"eval_seeclick_IoU": 0.445960208773613, |
|
"eval_seeclick_MAE_all": 0.0724409706890583, |
|
"eval_seeclick_MAE_h": 0.06929375603795052, |
|
"eval_seeclick_MAE_w": 0.09850849956274033, |
|
"eval_seeclick_MAE_x": 0.07914602756500244, |
|
"eval_seeclick_MAE_y": 0.04281560517847538, |
|
"eval_seeclick_NUM_probability": 0.9999896287918091, |
|
"eval_seeclick_inside_bbox": 0.921875, |
|
"eval_seeclick_loss": 0.9194074273109436, |
|
"eval_seeclick_loss_ce": 0.6105623841285706, |
|
"eval_seeclick_loss_iou": 0.67578125, |
|
"eval_seeclick_loss_num": 0.0633697509765625, |
|
"eval_seeclick_loss_xval": 0.31683349609375, |
|
"eval_seeclick_runtime": 73.8784, |
|
"eval_seeclick_samples_per_second": 0.582, |
|
"eval_seeclick_steps_per_second": 0.027, |
|
"num_input_tokens_seen": 43165896, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6510416666666666, |
|
"eval_icons_CIoU": 0.6936101317405701, |
|
"eval_icons_GIoU": 0.697041928768158, |
|
"eval_icons_IoU": 0.704749345779419, |
|
"eval_icons_MAE_all": 0.039153311401605606, |
|
"eval_icons_MAE_h": 0.04060409218072891, |
|
"eval_icons_MAE_w": 0.05324110668152571, |
|
"eval_icons_MAE_x": 0.03839818201959133, |
|
"eval_icons_MAE_y": 0.02436987590044737, |
|
"eval_icons_NUM_probability": 0.9999879896640778, |
|
"eval_icons_inside_bbox": 0.9565972089767456, |
|
"eval_icons_loss": 0.15028713643550873, |
|
"eval_icons_loss_ce": 0.00046230135194491595, |
|
"eval_icons_loss_iou": 0.600341796875, |
|
"eval_icons_loss_num": 0.02852630615234375, |
|
"eval_icons_loss_xval": 0.142608642578125, |
|
"eval_icons_runtime": 80.0672, |
|
"eval_icons_samples_per_second": 0.624, |
|
"eval_icons_steps_per_second": 0.025, |
|
"num_input_tokens_seen": 43165896, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6510416666666666, |
|
"eval_screenspot_CIoU": 0.42071565985679626, |
|
"eval_screenspot_GIoU": 0.4120611349741618, |
|
"eval_screenspot_IoU": 0.48129573464393616, |
|
"eval_screenspot_MAE_all": 0.11981111764907837, |
|
"eval_screenspot_MAE_h": 0.08953167746464412, |
|
"eval_screenspot_MAE_w": 0.19297573963801065, |
|
"eval_screenspot_MAE_x": 0.1243693083524704, |
|
"eval_screenspot_MAE_y": 0.07236775507529576, |
|
"eval_screenspot_NUM_probability": 0.9999845623970032, |
|
"eval_screenspot_inside_bbox": 0.7979166706403097, |
|
"eval_screenspot_loss": 0.8879116177558899, |
|
"eval_screenspot_loss_ce": 0.3984930415948232, |
|
"eval_screenspot_loss_iou": 0.5793863932291666, |
|
"eval_screenspot_loss_num": 0.09791056315104167, |
|
"eval_screenspot_loss_xval": 0.4894205729166667, |
|
"eval_screenspot_runtime": 139.1948, |
|
"eval_screenspot_samples_per_second": 0.639, |
|
"eval_screenspot_steps_per_second": 0.022, |
|
"num_input_tokens_seen": 43165896, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6510416666666666, |
|
"eval_compot_CIoU": 0.8471810519695282, |
|
"eval_compot_GIoU": 0.8459496200084686, |
|
"eval_compot_IoU": 0.8493484258651733, |
|
"eval_compot_MAE_all": 0.01606033928692341, |
|
"eval_compot_MAE_h": 0.015686397906392813, |
|
"eval_compot_MAE_w": 0.024428557604551315, |
|
"eval_compot_MAE_x": 0.013795553240925074, |
|
"eval_compot_MAE_y": 0.01033084886148572, |
|
"eval_compot_NUM_probability": 0.9999726712703705, |
|
"eval_compot_inside_bbox": 1.0, |
|
"eval_compot_loss": 0.07782306522130966, |
|
"eval_compot_loss_ce": 0.0001593182678334415, |
|
"eval_compot_loss_iou": 0.693115234375, |
|
"eval_compot_loss_num": 0.01538848876953125, |
|
"eval_compot_loss_xval": 0.076904296875, |
|
"eval_compot_runtime": 81.1661, |
|
"eval_compot_samples_per_second": 0.616, |
|
"eval_compot_steps_per_second": 0.025, |
|
"num_input_tokens_seen": 43165896, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6510416666666666, |
|
"loss": 0.0681569054722786, |
|
"loss_ce": 0.00022477866150438786, |
|
"loss_iou": 0.7265625, |
|
"loss_num": 0.01361083984375, |
|
"loss_xval": 0.06787109375, |
|
"num_input_tokens_seen": 43165896, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.6536458333333334, |
|
"grad_norm": 3.6701809821868308, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0838, |
|
"num_input_tokens_seen": 43338688, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.6536458333333334, |
|
"loss": 0.06336190551519394, |
|
"loss_ce": 9.896879782900214e-05, |
|
"loss_iou": 0.74609375, |
|
"loss_num": 0.01263427734375, |
|
"loss_xval": 0.0634765625, |
|
"num_input_tokens_seen": 43338688, |
|
"step": 251 |
|
}, |
|
{ |
|
"epoch": 0.65625, |
|
"grad_norm": 1.8275074603928982, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0518, |
|
"num_input_tokens_seen": 43511012, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.65625, |
|
"loss": 0.04357748478651047, |
|
"loss_ce": 0.00015097142022568733, |
|
"loss_iou": 0.5078125, |
|
"loss_num": 0.0086669921875, |
|
"loss_xval": 0.04345703125, |
|
"num_input_tokens_seen": 43511012, |
|
"step": 252 |
|
}, |
|
{ |
|
"epoch": 0.6588541666666666, |
|
"grad_norm": 2.1416791842803238, |
|
"learning_rate": 5e-06, |
|
"loss": 0.057, |
|
"num_input_tokens_seen": 43683084, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.6588541666666666, |
|
"loss": 0.05978800728917122, |
|
"loss_ce": 9.562318882672116e-05, |
|
"loss_iou": 0.447265625, |
|
"loss_num": 0.011962890625, |
|
"loss_xval": 0.0595703125, |
|
"num_input_tokens_seen": 43683084, |
|
"step": 253 |
|
}, |
|
{ |
|
"epoch": 0.6614583333333334, |
|
"grad_norm": 3.5604873362214167, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0713, |
|
"num_input_tokens_seen": 43855336, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.6614583333333334, |
|
"loss": 0.06191530451178551, |
|
"loss_ce": 0.0003155705926474184, |
|
"loss_iou": 0.453125, |
|
"loss_num": 0.0123291015625, |
|
"loss_xval": 0.0615234375, |
|
"num_input_tokens_seen": 43855336, |
|
"step": 254 |
|
}, |
|
{ |
|
"epoch": 0.6640625, |
|
"grad_norm": 7.717445783436579, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0799, |
|
"num_input_tokens_seen": 44028296, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.6640625, |
|
"loss": 0.09458325803279877, |
|
"loss_ce": 3.980396650149487e-05, |
|
"loss_iou": 0.70703125, |
|
"loss_num": 0.0189208984375, |
|
"loss_xval": 0.0947265625, |
|
"num_input_tokens_seen": 44028296, |
|
"step": 255 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"grad_norm": 15.823495393044448, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0915, |
|
"num_input_tokens_seen": 44200980, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.6666666666666666, |
|
"loss": 0.10397086292505264, |
|
"loss_ce": 5.850698289577849e-05, |
|
"loss_iou": 0.73046875, |
|
"loss_num": 0.020751953125, |
|
"loss_xval": 0.10400390625, |
|
"num_input_tokens_seen": 44200980, |
|
"step": 256 |
|
}, |
|
{ |
|
"epoch": 0.6692708333333334, |
|
"grad_norm": 4.767840698347708, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0808, |
|
"num_input_tokens_seen": 44373548, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.6692708333333334, |
|
"loss": 0.04920345917344093, |
|
"loss_ce": 7.015664596110582e-05, |
|
"loss_iou": 0.69140625, |
|
"loss_num": 0.00982666015625, |
|
"loss_xval": 0.049072265625, |
|
"num_input_tokens_seen": 44373548, |
|
"step": 257 |
|
}, |
|
{ |
|
"epoch": 0.671875, |
|
"grad_norm": 7.0061287275719195, |
|
"learning_rate": 5e-06, |
|
"loss": 0.087, |
|
"num_input_tokens_seen": 44545956, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.671875, |
|
"loss": 0.12110073864459991, |
|
"loss_ce": 3.750172254513018e-05, |
|
"loss_iou": 0.6015625, |
|
"loss_num": 0.024169921875, |
|
"loss_xval": 0.12109375, |
|
"num_input_tokens_seen": 44545956, |
|
"step": 258 |
|
}, |
|
{ |
|
"epoch": 0.6744791666666666, |
|
"grad_norm": 16.13195894853677, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1143, |
|
"num_input_tokens_seen": 44719164, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.6744791666666666, |
|
"loss": 0.15427453815937042, |
|
"loss_ce": 9.973209671443328e-05, |
|
"loss_iou": 0.65234375, |
|
"loss_num": 0.0308837890625, |
|
"loss_xval": 0.154296875, |
|
"num_input_tokens_seen": 44719164, |
|
"step": 259 |
|
}, |
|
{ |
|
"epoch": 0.6770833333333334, |
|
"grad_norm": 3.769620732282852, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0715, |
|
"num_input_tokens_seen": 44891856, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6770833333333334, |
|
"loss": 0.08834376931190491, |
|
"loss_ce": 8.69391078595072e-05, |
|
"loss_iou": 0.625, |
|
"loss_num": 0.0177001953125, |
|
"loss_xval": 0.08837890625, |
|
"num_input_tokens_seen": 44891856, |
|
"step": 260 |
|
}, |
|
{ |
|
"epoch": 0.6796875, |
|
"grad_norm": 15.395477460165395, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0877, |
|
"num_input_tokens_seen": 45064988, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.6796875, |
|
"loss": 0.08463309705257416, |
|
"loss_ce": 0.00025199196534231305, |
|
"loss_iou": 0.390625, |
|
"loss_num": 0.016845703125, |
|
"loss_xval": 0.08447265625, |
|
"num_input_tokens_seen": 45064988, |
|
"step": 261 |
|
}, |
|
{ |
|
"epoch": 0.6822916666666666, |
|
"grad_norm": 1.9846406004820456, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0567, |
|
"num_input_tokens_seen": 45237912, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.6822916666666666, |
|
"loss": 0.10574272274971008, |
|
"loss_ce": 4.509550126385875e-05, |
|
"loss_iou": 0.4140625, |
|
"loss_num": 0.0211181640625, |
|
"loss_xval": 0.10546875, |
|
"num_input_tokens_seen": 45237912, |
|
"step": 262 |
|
}, |
|
{ |
|
"epoch": 0.6848958333333334, |
|
"grad_norm": 10.620697806562827, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0748, |
|
"num_input_tokens_seen": 45410408, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.6848958333333334, |
|
"loss": 0.0699179470539093, |
|
"loss_ce": 0.0001700148859526962, |
|
"loss_iou": 0.400390625, |
|
"loss_num": 0.013916015625, |
|
"loss_xval": 0.06982421875, |
|
"num_input_tokens_seen": 45410408, |
|
"step": 263 |
|
}, |
|
{ |
|
"epoch": 0.6875, |
|
"grad_norm": 4.4669986375425985, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0521, |
|
"num_input_tokens_seen": 45583132, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.6875, |
|
"loss": 0.046352967619895935, |
|
"loss_ce": 0.00013409550592768937, |
|
"loss_iou": 0.78515625, |
|
"loss_num": 0.00921630859375, |
|
"loss_xval": 0.046142578125, |
|
"num_input_tokens_seen": 45583132, |
|
"step": 264 |
|
}, |
|
{ |
|
"epoch": 0.6901041666666666, |
|
"grad_norm": 6.203574811391586, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0928, |
|
"num_input_tokens_seen": 45755472, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.6901041666666666, |
|
"loss": 0.061830393970012665, |
|
"loss_ce": 3.229987487429753e-05, |
|
"loss_iou": 0.6328125, |
|
"loss_num": 0.01239013671875, |
|
"loss_xval": 0.061767578125, |
|
"num_input_tokens_seen": 45755472, |
|
"step": 265 |
|
}, |
|
{ |
|
"epoch": 0.6927083333333334, |
|
"grad_norm": 10.894591035750713, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1005, |
|
"num_input_tokens_seen": 45928200, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.6927083333333334, |
|
"loss": 0.1057087630033493, |
|
"loss_ce": 0.00011794811143772677, |
|
"loss_iou": 0.6875, |
|
"loss_num": 0.0211181640625, |
|
"loss_xval": 0.10546875, |
|
"num_input_tokens_seen": 45928200, |
|
"step": 266 |
|
}, |
|
{ |
|
"epoch": 0.6953125, |
|
"grad_norm": 3.559473609758924, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0525, |
|
"num_input_tokens_seen": 46101500, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.6953125, |
|
"loss": 0.059102512896060944, |
|
"loss_ce": 5.099709960632026e-05, |
|
"loss_iou": 0.470703125, |
|
"loss_num": 0.0118408203125, |
|
"loss_xval": 0.05908203125, |
|
"num_input_tokens_seen": 46101500, |
|
"step": 267 |
|
}, |
|
{ |
|
"epoch": 0.6979166666666666, |
|
"grad_norm": 32.963299647312084, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1156, |
|
"num_input_tokens_seen": 46273792, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.6979166666666666, |
|
"loss": 0.17293627560138702, |
|
"loss_ce": 6.945877976249903e-05, |
|
"loss_iou": 0.5859375, |
|
"loss_num": 0.03466796875, |
|
"loss_xval": 0.1728515625, |
|
"num_input_tokens_seen": 46273792, |
|
"step": 268 |
|
}, |
|
{ |
|
"epoch": 0.7005208333333334, |
|
"grad_norm": 5.379185511033478, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1206, |
|
"num_input_tokens_seen": 46446840, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.7005208333333334, |
|
"loss": 0.0702916830778122, |
|
"loss_ce": 4.0215229091700166e-05, |
|
"loss_iou": 0.65625, |
|
"loss_num": 0.0140380859375, |
|
"loss_xval": 0.0703125, |
|
"num_input_tokens_seen": 46446840, |
|
"step": 269 |
|
}, |
|
{ |
|
"epoch": 0.703125, |
|
"grad_norm": 4.833206887807392, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1057, |
|
"num_input_tokens_seen": 46619224, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.703125, |
|
"loss": 0.06372040510177612, |
|
"loss_ce": 4.5477234380086884e-05, |
|
"loss_iou": 0.482421875, |
|
"loss_num": 0.01275634765625, |
|
"loss_xval": 0.0634765625, |
|
"num_input_tokens_seen": 46619224, |
|
"step": 270 |
|
}, |
|
{ |
|
"epoch": 0.7057291666666666, |
|
"grad_norm": 4.9691828426728515, |
|
"learning_rate": 5e-06, |
|
"loss": 0.073, |
|
"num_input_tokens_seen": 46791948, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.7057291666666666, |
|
"loss": 0.041718438267707825, |
|
"loss_ce": 0.00013823516201227903, |
|
"loss_iou": 0.66796875, |
|
"loss_num": 0.00830078125, |
|
"loss_xval": 0.04150390625, |
|
"num_input_tokens_seen": 46791948, |
|
"step": 271 |
|
}, |
|
{ |
|
"epoch": 0.7083333333333334, |
|
"grad_norm": 5.792546307908184, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0812, |
|
"num_input_tokens_seen": 46964400, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.7083333333333334, |
|
"loss": 0.07085588574409485, |
|
"loss_ce": 0.00013139640213921666, |
|
"loss_iou": 0.671875, |
|
"loss_num": 0.01416015625, |
|
"loss_xval": 0.07080078125, |
|
"num_input_tokens_seen": 46964400, |
|
"step": 272 |
|
}, |
|
{ |
|
"epoch": 0.7109375, |
|
"grad_norm": 8.864261104979098, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0973, |
|
"num_input_tokens_seen": 47137156, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.7109375, |
|
"loss": 0.08093667030334473, |
|
"loss_ce": 0.00015664326201658696, |
|
"loss_iou": 0.640625, |
|
"loss_num": 0.0162353515625, |
|
"loss_xval": 0.08056640625, |
|
"num_input_tokens_seen": 47137156, |
|
"step": 273 |
|
}, |
|
{ |
|
"epoch": 0.7135416666666666, |
|
"grad_norm": 3.8762493026111633, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0958, |
|
"num_input_tokens_seen": 47309640, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.7135416666666666, |
|
"loss": 0.1435449719429016, |
|
"loss_ce": 5.13083505211398e-05, |
|
"loss_iou": 0.5234375, |
|
"loss_num": 0.0286865234375, |
|
"loss_xval": 0.1435546875, |
|
"num_input_tokens_seen": 47309640, |
|
"step": 274 |
|
}, |
|
{ |
|
"epoch": 0.7161458333333334, |
|
"grad_norm": 4.845607455502515, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0587, |
|
"num_input_tokens_seen": 47482920, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.7161458333333334, |
|
"loss": 0.06996987760066986, |
|
"loss_ce": 0.000145662619615905, |
|
"loss_iou": 0.45703125, |
|
"loss_num": 0.01397705078125, |
|
"loss_xval": 0.06982421875, |
|
"num_input_tokens_seen": 47482920, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.71875, |
|
"grad_norm": 6.023028440412175, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1017, |
|
"num_input_tokens_seen": 47655164, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.71875, |
|
"loss": 0.11321437358856201, |
|
"loss_ce": 5.519590195035562e-05, |
|
"loss_iou": 0.58984375, |
|
"loss_num": 0.0225830078125, |
|
"loss_xval": 0.11328125, |
|
"num_input_tokens_seen": 47655164, |
|
"step": 276 |
|
}, |
|
{ |
|
"epoch": 0.7213541666666666, |
|
"grad_norm": 4.375656119857942, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0618, |
|
"num_input_tokens_seen": 47827856, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.7213541666666666, |
|
"loss": 0.0524156428873539, |
|
"loss_ce": 0.0002611021918710321, |
|
"loss_iou": 0.73046875, |
|
"loss_num": 0.01043701171875, |
|
"loss_xval": 0.05224609375, |
|
"num_input_tokens_seen": 47827856, |
|
"step": 277 |
|
}, |
|
{ |
|
"epoch": 0.7239583333333334, |
|
"grad_norm": 3.478066675039873, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0642, |
|
"num_input_tokens_seen": 48000956, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.7239583333333334, |
|
"loss": 0.03859657049179077, |
|
"loss_ce": 5.2869407227262855e-05, |
|
"loss_iou": 0.474609375, |
|
"loss_num": 0.007720947265625, |
|
"loss_xval": 0.03857421875, |
|
"num_input_tokens_seen": 48000956, |
|
"step": 278 |
|
}, |
|
{ |
|
"epoch": 0.7265625, |
|
"grad_norm": 10.669002227751372, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0673, |
|
"num_input_tokens_seen": 48173420, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.7265625, |
|
"loss": 0.04094023257493973, |
|
"loss_ce": 0.00010771260713227093, |
|
"loss_iou": 0.482421875, |
|
"loss_num": 0.0081787109375, |
|
"loss_xval": 0.040771484375, |
|
"num_input_tokens_seen": 48173420, |
|
"step": 279 |
|
}, |
|
{ |
|
"epoch": 0.7291666666666666, |
|
"grad_norm": 6.013727130209973, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0843, |
|
"num_input_tokens_seen": 48346040, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7291666666666666, |
|
"loss": 0.06400243937969208, |
|
"loss_ce": 6.811654020566493e-05, |
|
"loss_iou": 0.5546875, |
|
"loss_num": 0.0128173828125, |
|
"loss_xval": 0.06396484375, |
|
"num_input_tokens_seen": 48346040, |
|
"step": 280 |
|
}, |
|
{ |
|
"epoch": 0.7317708333333334, |
|
"grad_norm": 6.320025783309937, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0776, |
|
"num_input_tokens_seen": 48518684, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.7317708333333334, |
|
"loss": 0.08228301256895065, |
|
"loss_ce": 0.00012969484669156373, |
|
"loss_iou": 0.7421875, |
|
"loss_num": 0.0164794921875, |
|
"loss_xval": 0.08203125, |
|
"num_input_tokens_seen": 48518684, |
|
"step": 281 |
|
}, |
|
{ |
|
"epoch": 0.734375, |
|
"grad_norm": 2.3539480804430353, |
|
"learning_rate": 5e-06, |
|
"loss": 0.064, |
|
"num_input_tokens_seen": 48691296, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.734375, |
|
"loss": 0.06360374391078949, |
|
"loss_ce": 5.089196565677412e-05, |
|
"loss_iou": 0.55859375, |
|
"loss_num": 0.0126953125, |
|
"loss_xval": 0.0634765625, |
|
"num_input_tokens_seen": 48691296, |
|
"step": 282 |
|
}, |
|
{ |
|
"epoch": 0.7369791666666666, |
|
"grad_norm": 4.165777643617544, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0581, |
|
"num_input_tokens_seen": 48864252, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.7369791666666666, |
|
"loss": 0.07076792418956757, |
|
"loss_ce": 5.869198503205553e-05, |
|
"loss_iou": 0.455078125, |
|
"loss_num": 0.01409912109375, |
|
"loss_xval": 0.07080078125, |
|
"num_input_tokens_seen": 48864252, |
|
"step": 283 |
|
}, |
|
{ |
|
"epoch": 0.7395833333333334, |
|
"grad_norm": 4.530184060910693, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0857, |
|
"num_input_tokens_seen": 49037116, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.7395833333333334, |
|
"loss": 0.10114337503910065, |
|
"loss_ce": 6.914998812135309e-05, |
|
"loss_iou": 0.66015625, |
|
"loss_num": 0.020263671875, |
|
"loss_xval": 0.10107421875, |
|
"num_input_tokens_seen": 49037116, |
|
"step": 284 |
|
}, |
|
{ |
|
"epoch": 0.7421875, |
|
"grad_norm": 7.025143291686679, |
|
"learning_rate": 5e-06, |
|
"loss": 0.09, |
|
"num_input_tokens_seen": 49209880, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.7421875, |
|
"loss": 0.07852576673030853, |
|
"loss_ce": 3.455359546933323e-05, |
|
"loss_iou": 0.5859375, |
|
"loss_num": 0.0157470703125, |
|
"loss_xval": 0.07861328125, |
|
"num_input_tokens_seen": 49209880, |
|
"step": 285 |
|
}, |
|
{ |
|
"epoch": 0.7447916666666666, |
|
"grad_norm": 8.858211776100614, |
|
"learning_rate": 5e-06, |
|
"loss": 0.084, |
|
"num_input_tokens_seen": 49381700, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.7447916666666666, |
|
"loss": 0.05212024226784706, |
|
"loss_ce": 5.7252564147347584e-05, |
|
"loss_iou": 0.7265625, |
|
"loss_num": 0.01043701171875, |
|
"loss_xval": 0.052001953125, |
|
"num_input_tokens_seen": 49381700, |
|
"step": 286 |
|
}, |
|
{ |
|
"epoch": 0.7473958333333334, |
|
"grad_norm": 3.6537179877047663, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0989, |
|
"num_input_tokens_seen": 49554536, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.7473958333333334, |
|
"loss": 0.055037256330251694, |
|
"loss_ce": 2.9322651244001463e-05, |
|
"loss_iou": 0.546875, |
|
"loss_num": 0.010986328125, |
|
"loss_xval": 0.054931640625, |
|
"num_input_tokens_seen": 49554536, |
|
"step": 287 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"grad_norm": 5.570461350284086, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0772, |
|
"num_input_tokens_seen": 49726396, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.75, |
|
"loss": 0.07801353931427002, |
|
"loss_ce": 7.164124690461904e-05, |
|
"loss_iou": 0.451171875, |
|
"loss_num": 0.015625, |
|
"loss_xval": 0.078125, |
|
"num_input_tokens_seen": 49726396, |
|
"step": 288 |
|
}, |
|
{ |
|
"epoch": 0.7526041666666666, |
|
"grad_norm": 5.806990578827175, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0737, |
|
"num_input_tokens_seen": 49899536, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.7526041666666666, |
|
"loss": 0.09019728004932404, |
|
"loss_ce": 4.8357818741351366e-05, |
|
"loss_iou": 0.46484375, |
|
"loss_num": 0.01806640625, |
|
"loss_xval": 0.09033203125, |
|
"num_input_tokens_seen": 49899536, |
|
"step": 289 |
|
}, |
|
{ |
|
"epoch": 0.7552083333333334, |
|
"grad_norm": 6.584433746493665, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0753, |
|
"num_input_tokens_seen": 50072028, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.7552083333333334, |
|
"loss": 0.05955757200717926, |
|
"loss_ce": 0.00013984768884256482, |
|
"loss_iou": 0.5, |
|
"loss_num": 0.01190185546875, |
|
"loss_xval": 0.059326171875, |
|
"num_input_tokens_seen": 50072028, |
|
"step": 290 |
|
}, |
|
{ |
|
"epoch": 0.7578125, |
|
"grad_norm": 4.769362882722307, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0836, |
|
"num_input_tokens_seen": 50244788, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.7578125, |
|
"loss": 0.08353784680366516, |
|
"loss_ce": 4.175720823695883e-05, |
|
"loss_iou": 0.58203125, |
|
"loss_num": 0.0167236328125, |
|
"loss_xval": 0.08349609375, |
|
"num_input_tokens_seen": 50244788, |
|
"step": 291 |
|
}, |
|
{ |
|
"epoch": 0.7604166666666666, |
|
"grad_norm": 4.630970710069874, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0924, |
|
"num_input_tokens_seen": 50417020, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.7604166666666666, |
|
"loss": 0.0867491364479065, |
|
"loss_ce": 6.395512173185125e-05, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.017333984375, |
|
"loss_xval": 0.0869140625, |
|
"num_input_tokens_seen": 50417020, |
|
"step": 292 |
|
}, |
|
{ |
|
"epoch": 0.7630208333333334, |
|
"grad_norm": 4.771052495662392, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0789, |
|
"num_input_tokens_seen": 50589288, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.7630208333333334, |
|
"loss": 0.04268595576286316, |
|
"loss_ce": 3.76409079763107e-05, |
|
"loss_iou": 0.62109375, |
|
"loss_num": 0.008544921875, |
|
"loss_xval": 0.042724609375, |
|
"num_input_tokens_seen": 50589288, |
|
"step": 293 |
|
}, |
|
{ |
|
"epoch": 0.765625, |
|
"grad_norm": 5.549980291826297, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1124, |
|
"num_input_tokens_seen": 50762276, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.765625, |
|
"loss": 0.08056493103504181, |
|
"loss_ce": 5.956060340395197e-05, |
|
"loss_iou": 0.58203125, |
|
"loss_num": 0.01611328125, |
|
"loss_xval": 0.08056640625, |
|
"num_input_tokens_seen": 50762276, |
|
"step": 294 |
|
}, |
|
{ |
|
"epoch": 0.7682291666666666, |
|
"grad_norm": 58.66835057028341, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0912, |
|
"num_input_tokens_seen": 50935292, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.7682291666666666, |
|
"loss": 0.12624840438365936, |
|
"loss_ce": 5.8218334743287414e-05, |
|
"loss_iou": 0.72265625, |
|
"loss_num": 0.0252685546875, |
|
"loss_xval": 0.1259765625, |
|
"num_input_tokens_seen": 50935292, |
|
"step": 295 |
|
}, |
|
{ |
|
"epoch": 0.7708333333333334, |
|
"grad_norm": 5.644622915739686, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0868, |
|
"num_input_tokens_seen": 51108336, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.7708333333333334, |
|
"loss": 0.1262531876564026, |
|
"loss_ce": 9.352029883302748e-05, |
|
"loss_iou": 0.6015625, |
|
"loss_num": 0.0252685546875, |
|
"loss_xval": 0.1259765625, |
|
"num_input_tokens_seen": 51108336, |
|
"step": 296 |
|
}, |
|
{ |
|
"epoch": 0.7734375, |
|
"grad_norm": 9.321237615443232, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1107, |
|
"num_input_tokens_seen": 51280676, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.7734375, |
|
"loss": 0.08918002992868423, |
|
"loss_ce": 6.8700457632076e-05, |
|
"loss_iou": 0.6484375, |
|
"loss_num": 0.017822265625, |
|
"loss_xval": 0.0888671875, |
|
"num_input_tokens_seen": 51280676, |
|
"step": 297 |
|
}, |
|
{ |
|
"epoch": 0.7760416666666666, |
|
"grad_norm": 8.413905673909936, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0788, |
|
"num_input_tokens_seen": 51452600, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.7760416666666666, |
|
"loss": 0.06944364309310913, |
|
"loss_ce": 7.71840859670192e-05, |
|
"loss_iou": 0.53125, |
|
"loss_num": 0.01385498046875, |
|
"loss_xval": 0.0693359375, |
|
"num_input_tokens_seen": 51452600, |
|
"step": 298 |
|
}, |
|
{ |
|
"epoch": 0.7786458333333334, |
|
"grad_norm": 3.9297688671160738, |
|
"learning_rate": 5e-06, |
|
"loss": 0.08, |
|
"num_input_tokens_seen": 51625560, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.7786458333333334, |
|
"loss": 0.12099509686231613, |
|
"loss_ce": 0.00014548808394465595, |
|
"loss_iou": 0.447265625, |
|
"loss_num": 0.024169921875, |
|
"loss_xval": 0.12109375, |
|
"num_input_tokens_seen": 51625560, |
|
"step": 299 |
|
}, |
|
{ |
|
"epoch": 0.78125, |
|
"grad_norm": 8.840803926190146, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0903, |
|
"num_input_tokens_seen": 51797848, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.78125, |
|
"loss": 0.08386749029159546, |
|
"loss_ce": 0.00018828835163731128, |
|
"loss_iou": 0.62890625, |
|
"loss_num": 0.0167236328125, |
|
"loss_xval": 0.08349609375, |
|
"num_input_tokens_seen": 51797848, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.7838541666666666, |
|
"grad_norm": 6.267252913184398, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0768, |
|
"num_input_tokens_seen": 51970968, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.7838541666666666, |
|
"loss": 0.08612730354070663, |
|
"loss_ce": 0.000205064527108334, |
|
"loss_iou": 0.470703125, |
|
"loss_num": 0.0172119140625, |
|
"loss_xval": 0.0859375, |
|
"num_input_tokens_seen": 51970968, |
|
"step": 301 |
|
}, |
|
{ |
|
"epoch": 0.7864583333333334, |
|
"grad_norm": 5.712597753331284, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0856, |
|
"num_input_tokens_seen": 52143656, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.7864583333333334, |
|
"loss": 0.14908897876739502, |
|
"loss_ce": 0.00016319258429575711, |
|
"loss_iou": 0.6796875, |
|
"loss_num": 0.02978515625, |
|
"loss_xval": 0.1484375, |
|
"num_input_tokens_seen": 52143656, |
|
"step": 302 |
|
}, |
|
{ |
|
"epoch": 0.7890625, |
|
"grad_norm": 8.35751018269278, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0759, |
|
"num_input_tokens_seen": 52316820, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.7890625, |
|
"loss": 0.059616073966026306, |
|
"loss_ce": 7.628079038113356e-05, |
|
"loss_iou": 0.5546875, |
|
"loss_num": 0.01190185546875, |
|
"loss_xval": 0.0595703125, |
|
"num_input_tokens_seen": 52316820, |
|
"step": 303 |
|
}, |
|
{ |
|
"epoch": 0.7916666666666666, |
|
"grad_norm": 21.438956075626194, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0912, |
|
"num_input_tokens_seen": 52489896, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.7916666666666666, |
|
"loss": 0.10695922374725342, |
|
"loss_ce": 0.00010192444460699335, |
|
"loss_iou": 0.6171875, |
|
"loss_num": 0.0213623046875, |
|
"loss_xval": 0.10693359375, |
|
"num_input_tokens_seen": 52489896, |
|
"step": 304 |
|
}, |
|
{ |
|
"epoch": 0.7942708333333334, |
|
"grad_norm": 11.563280258074105, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0721, |
|
"num_input_tokens_seen": 52662040, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.7942708333333334, |
|
"loss": 0.08778760582208633, |
|
"loss_ce": 4.956443444825709e-05, |
|
"loss_iou": 0.609375, |
|
"loss_num": 0.017578125, |
|
"loss_xval": 0.087890625, |
|
"num_input_tokens_seen": 52662040, |
|
"step": 305 |
|
}, |
|
{ |
|
"epoch": 0.796875, |
|
"grad_norm": 3.1582836546422683, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0762, |
|
"num_input_tokens_seen": 52833528, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.796875, |
|
"loss": 0.048152316361665726, |
|
"loss_ce": 5.6614066124893725e-05, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.0096435546875, |
|
"loss_xval": 0.048095703125, |
|
"num_input_tokens_seen": 52833528, |
|
"step": 306 |
|
}, |
|
{ |
|
"epoch": 0.7994791666666666, |
|
"grad_norm": 3.9541505621592403, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0728, |
|
"num_input_tokens_seen": 53006328, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.7994791666666666, |
|
"loss": 0.07045421004295349, |
|
"loss_ce": 6.541772745549679e-05, |
|
"loss_iou": 0.51953125, |
|
"loss_num": 0.01409912109375, |
|
"loss_xval": 0.0703125, |
|
"num_input_tokens_seen": 53006328, |
|
"step": 307 |
|
}, |
|
{ |
|
"epoch": 0.8020833333333334, |
|
"grad_norm": 21.04478597433239, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0841, |
|
"num_input_tokens_seen": 53179340, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.8020833333333334, |
|
"loss": 0.04340720921754837, |
|
"loss_ce": 7.224958972074091e-05, |
|
"loss_iou": 0.486328125, |
|
"loss_num": 0.0086669921875, |
|
"loss_xval": 0.04345703125, |
|
"num_input_tokens_seen": 53179340, |
|
"step": 308 |
|
}, |
|
{ |
|
"epoch": 0.8046875, |
|
"grad_norm": 36.45731809620038, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0903, |
|
"num_input_tokens_seen": 53352024, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.8046875, |
|
"loss": 0.0791003406047821, |
|
"loss_ce": 9.0329660451971e-05, |
|
"loss_iou": 0.59375, |
|
"loss_num": 0.0157470703125, |
|
"loss_xval": 0.0791015625, |
|
"num_input_tokens_seen": 53352024, |
|
"step": 309 |
|
}, |
|
{ |
|
"epoch": 0.8072916666666666, |
|
"grad_norm": 3.6922772893156828, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0982, |
|
"num_input_tokens_seen": 53524908, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8072916666666666, |
|
"loss": 0.06811343133449554, |
|
"loss_ce": 0.00012026849435642362, |
|
"loss_iou": 0.546875, |
|
"loss_num": 0.01361083984375, |
|
"loss_xval": 0.06787109375, |
|
"num_input_tokens_seen": 53524908, |
|
"step": 310 |
|
}, |
|
{ |
|
"epoch": 0.8098958333333334, |
|
"grad_norm": 27.046913168708976, |
|
"learning_rate": 5e-06, |
|
"loss": 0.081, |
|
"num_input_tokens_seen": 53696732, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.8098958333333334, |
|
"loss": 0.0632261261343956, |
|
"loss_ce": 3.94820308429189e-05, |
|
"loss_iou": 0.62109375, |
|
"loss_num": 0.01263427734375, |
|
"loss_xval": 0.06298828125, |
|
"num_input_tokens_seen": 53696732, |
|
"step": 311 |
|
}, |
|
{ |
|
"epoch": 0.8125, |
|
"grad_norm": 14.857627339858754, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1017, |
|
"num_input_tokens_seen": 53869308, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.8125, |
|
"loss": 0.07280921936035156, |
|
"loss_ce": 0.0001468673290219158, |
|
"loss_iou": 0.482421875, |
|
"loss_num": 0.0145263671875, |
|
"loss_xval": 0.07275390625, |
|
"num_input_tokens_seen": 53869308, |
|
"step": 312 |
|
}, |
|
{ |
|
"epoch": 0.8151041666666666, |
|
"grad_norm": 4.652815682219442, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0804, |
|
"num_input_tokens_seen": 54042004, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.8151041666666666, |
|
"loss": 0.08997043967247009, |
|
"loss_ce": 3.514082345645875e-05, |
|
"loss_iou": 0.73046875, |
|
"loss_num": 0.0179443359375, |
|
"loss_xval": 0.08984375, |
|
"num_input_tokens_seen": 54042004, |
|
"step": 313 |
|
}, |
|
{ |
|
"epoch": 0.8177083333333334, |
|
"grad_norm": 4.472330671881049, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0766, |
|
"num_input_tokens_seen": 54214544, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.8177083333333334, |
|
"loss": 0.05066576227545738, |
|
"loss_ce": 3.710209784912877e-05, |
|
"loss_iou": 0.462890625, |
|
"loss_num": 0.0101318359375, |
|
"loss_xval": 0.050537109375, |
|
"num_input_tokens_seen": 54214544, |
|
"step": 314 |
|
}, |
|
{ |
|
"epoch": 0.8203125, |
|
"grad_norm": 14.395534068995472, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0834, |
|
"num_input_tokens_seen": 54387032, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.8203125, |
|
"loss": 0.11046263575553894, |
|
"loss_ce": 5.003847763873637e-05, |
|
"loss_iou": 0.609375, |
|
"loss_num": 0.0220947265625, |
|
"loss_xval": 0.1103515625, |
|
"num_input_tokens_seen": 54387032, |
|
"step": 315 |
|
}, |
|
{ |
|
"epoch": 0.8229166666666666, |
|
"grad_norm": 5.6405315516941545, |
|
"learning_rate": 5e-06, |
|
"loss": 0.071, |
|
"num_input_tokens_seen": 54559764, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.8229166666666666, |
|
"loss": 0.07865004241466522, |
|
"loss_ce": 0.00015882565639913082, |
|
"loss_iou": 0.5859375, |
|
"loss_num": 0.0157470703125, |
|
"loss_xval": 0.07861328125, |
|
"num_input_tokens_seen": 54559764, |
|
"step": 316 |
|
}, |
|
{ |
|
"epoch": 0.8255208333333334, |
|
"grad_norm": 8.404403222163058, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0788, |
|
"num_input_tokens_seen": 54732960, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.8255208333333334, |
|
"loss": 0.09951162338256836, |
|
"loss_ce": 5.4833071772009134e-05, |
|
"loss_iou": 0.462890625, |
|
"loss_num": 0.0198974609375, |
|
"loss_xval": 0.099609375, |
|
"num_input_tokens_seen": 54732960, |
|
"step": 317 |
|
}, |
|
{ |
|
"epoch": 0.828125, |
|
"grad_norm": 12.856336033562837, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0745, |
|
"num_input_tokens_seen": 54905888, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.828125, |
|
"loss": 0.05548687279224396, |
|
"loss_ce": 0.00015850822092033923, |
|
"loss_iou": 0.5078125, |
|
"loss_num": 0.0111083984375, |
|
"loss_xval": 0.055419921875, |
|
"num_input_tokens_seen": 54905888, |
|
"step": 318 |
|
}, |
|
{ |
|
"epoch": 0.8307291666666666, |
|
"grad_norm": 7.5015307945338545, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0814, |
|
"num_input_tokens_seen": 55078584, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.8307291666666666, |
|
"loss": 0.09399284422397614, |
|
"loss_ce": 0.00018180246115662158, |
|
"loss_iou": 0.8203125, |
|
"loss_num": 0.018798828125, |
|
"loss_xval": 0.09375, |
|
"num_input_tokens_seen": 55078584, |
|
"step": 319 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"grad_norm": 3.549717733561083, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0888, |
|
"num_input_tokens_seen": 55251416, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.8333333333333334, |
|
"loss": 0.10571445524692535, |
|
"loss_ce": 0.00012363299902062863, |
|
"loss_iou": 0.390625, |
|
"loss_num": 0.0211181640625, |
|
"loss_xval": 0.10546875, |
|
"num_input_tokens_seen": 55251416, |
|
"step": 320 |
|
}, |
|
{ |
|
"epoch": 0.8359375, |
|
"grad_norm": 15.137913189345245, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0813, |
|
"num_input_tokens_seen": 55424308, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.8359375, |
|
"loss": 0.12823191285133362, |
|
"loss_ce": 0.0001038559275912121, |
|
"loss_iou": 0.423828125, |
|
"loss_num": 0.025634765625, |
|
"loss_xval": 0.1279296875, |
|
"num_input_tokens_seen": 55424308, |
|
"step": 321 |
|
}, |
|
{ |
|
"epoch": 0.8385416666666666, |
|
"grad_norm": 4.928878873643115, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1069, |
|
"num_input_tokens_seen": 55597376, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.8385416666666666, |
|
"loss": 0.1322542130947113, |
|
"loss_ce": 2.1536015992751345e-05, |
|
"loss_iou": 0.578125, |
|
"loss_num": 0.0264892578125, |
|
"loss_xval": 0.1318359375, |
|
"num_input_tokens_seen": 55597376, |
|
"step": 322 |
|
}, |
|
{ |
|
"epoch": 0.8411458333333334, |
|
"grad_norm": 19.737058147658324, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0898, |
|
"num_input_tokens_seen": 55769600, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.8411458333333334, |
|
"loss": 0.17265748977661133, |
|
"loss_ce": 8.058187086135149e-05, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.034423828125, |
|
"loss_xval": 0.1728515625, |
|
"num_input_tokens_seen": 55769600, |
|
"step": 323 |
|
}, |
|
{ |
|
"epoch": 0.84375, |
|
"grad_norm": 7.152491955998749, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0743, |
|
"num_input_tokens_seen": 55942580, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.84375, |
|
"loss": 0.059846702963113785, |
|
"loss_ce": 6.276796921156347e-05, |
|
"loss_iou": 0.5234375, |
|
"loss_num": 0.011962890625, |
|
"loss_xval": 0.059814453125, |
|
"num_input_tokens_seen": 55942580, |
|
"step": 324 |
|
}, |
|
{ |
|
"epoch": 0.8463541666666666, |
|
"grad_norm": 6.664096474807532, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0831, |
|
"num_input_tokens_seen": 56115528, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.8463541666666666, |
|
"loss": 0.06182098388671875, |
|
"loss_ce": 3.814647061517462e-05, |
|
"loss_iou": 0.7734375, |
|
"loss_num": 0.01239013671875, |
|
"loss_xval": 0.061767578125, |
|
"num_input_tokens_seen": 56115528, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.8489583333333334, |
|
"grad_norm": 4.533780334308584, |
|
"learning_rate": 5e-06, |
|
"loss": 0.085, |
|
"num_input_tokens_seen": 56288484, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.8489583333333334, |
|
"loss": 0.10065165907144547, |
|
"loss_ce": 0.00015727368008811027, |
|
"loss_iou": 0.4453125, |
|
"loss_num": 0.0201416015625, |
|
"loss_xval": 0.1005859375, |
|
"num_input_tokens_seen": 56288484, |
|
"step": 326 |
|
}, |
|
{ |
|
"epoch": 0.8515625, |
|
"grad_norm": 4.639727507170639, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0767, |
|
"num_input_tokens_seen": 56460840, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.8515625, |
|
"loss": 0.0594358891248703, |
|
"loss_ce": 7.919950439827517e-05, |
|
"loss_iou": 0.69140625, |
|
"loss_num": 0.01190185546875, |
|
"loss_xval": 0.059326171875, |
|
"num_input_tokens_seen": 56460840, |
|
"step": 327 |
|
}, |
|
{ |
|
"epoch": 0.8541666666666666, |
|
"grad_norm": 4.945822599515496, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0809, |
|
"num_input_tokens_seen": 56633612, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.8541666666666666, |
|
"loss": 0.10031691938638687, |
|
"loss_ce": 3.615960304159671e-05, |
|
"loss_iou": 0.462890625, |
|
"loss_num": 0.02001953125, |
|
"loss_xval": 0.10009765625, |
|
"num_input_tokens_seen": 56633612, |
|
"step": 328 |
|
}, |
|
{ |
|
"epoch": 0.8567708333333334, |
|
"grad_norm": 17.721130156863943, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0852, |
|
"num_input_tokens_seen": 56806644, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.8567708333333334, |
|
"loss": 0.06605279445648193, |
|
"loss_ce": 0.0002263778733322397, |
|
"loss_iou": 0.60546875, |
|
"loss_num": 0.01318359375, |
|
"loss_xval": 0.06591796875, |
|
"num_input_tokens_seen": 56806644, |
|
"step": 329 |
|
}, |
|
{ |
|
"epoch": 0.859375, |
|
"grad_norm": 4.228842310344442, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0829, |
|
"num_input_tokens_seen": 56978832, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.859375, |
|
"loss": 0.09333358705043793, |
|
"loss_ce": 7.186534639913589e-05, |
|
"loss_iou": 0.48046875, |
|
"loss_num": 0.0186767578125, |
|
"loss_xval": 0.09326171875, |
|
"num_input_tokens_seen": 56978832, |
|
"step": 330 |
|
}, |
|
{ |
|
"epoch": 0.8619791666666666, |
|
"grad_norm": 4.379026646689163, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0708, |
|
"num_input_tokens_seen": 57151476, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.8619791666666666, |
|
"loss": 0.05179120972752571, |
|
"loss_ce": 6.39162608422339e-05, |
|
"loss_iou": 0.49609375, |
|
"loss_num": 0.0103759765625, |
|
"loss_xval": 0.0517578125, |
|
"num_input_tokens_seen": 57151476, |
|
"step": 331 |
|
}, |
|
{ |
|
"epoch": 0.8645833333333334, |
|
"grad_norm": 9.125447816364591, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0697, |
|
"num_input_tokens_seen": 57323400, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.8645833333333334, |
|
"loss": 0.10480596870183945, |
|
"loss_ce": 6.964314525248483e-05, |
|
"loss_iou": 0.51171875, |
|
"loss_num": 0.02099609375, |
|
"loss_xval": 0.1044921875, |
|
"num_input_tokens_seen": 57323400, |
|
"step": 332 |
|
}, |
|
{ |
|
"epoch": 0.8671875, |
|
"grad_norm": 5.241494036335466, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0617, |
|
"num_input_tokens_seen": 57496068, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.8671875, |
|
"loss": 0.06167437136173248, |
|
"loss_ce": 0.00012041500303894281, |
|
"loss_iou": 0.71875, |
|
"loss_num": 0.0123291015625, |
|
"loss_xval": 0.0615234375, |
|
"num_input_tokens_seen": 57496068, |
|
"step": 333 |
|
}, |
|
{ |
|
"epoch": 0.8697916666666666, |
|
"grad_norm": 3.7921802750366664, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0859, |
|
"num_input_tokens_seen": 57669236, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.8697916666666666, |
|
"loss": 0.03953123837709427, |
|
"loss_ce": 0.00020934098574798554, |
|
"loss_iou": 0.47265625, |
|
"loss_num": 0.00787353515625, |
|
"loss_xval": 0.039306640625, |
|
"num_input_tokens_seen": 57669236, |
|
"step": 334 |
|
}, |
|
{ |
|
"epoch": 0.8723958333333334, |
|
"grad_norm": 7.046054210110739, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0857, |
|
"num_input_tokens_seen": 57841676, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.8723958333333334, |
|
"loss": 0.13086940348148346, |
|
"loss_ce": 4.054443706991151e-05, |
|
"loss_iou": 0.46875, |
|
"loss_num": 0.026123046875, |
|
"loss_xval": 0.130859375, |
|
"num_input_tokens_seen": 57841676, |
|
"step": 335 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"grad_norm": 7.658169223957337, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0927, |
|
"num_input_tokens_seen": 58015076, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.875, |
|
"loss": 0.03890954330563545, |
|
"loss_ce": 0.0006252414314076304, |
|
"loss_iou": 0.52734375, |
|
"loss_num": 0.007659912109375, |
|
"loss_xval": 0.038330078125, |
|
"num_input_tokens_seen": 58015076, |
|
"step": 336 |
|
}, |
|
{ |
|
"epoch": 0.8776041666666666, |
|
"grad_norm": 4.319289196507174, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0931, |
|
"num_input_tokens_seen": 58187592, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.8776041666666666, |
|
"loss": 0.10160954296588898, |
|
"loss_ce": 4.7046225517988205e-05, |
|
"loss_iou": 0.625, |
|
"loss_num": 0.0203857421875, |
|
"loss_xval": 0.1015625, |
|
"num_input_tokens_seen": 58187592, |
|
"step": 337 |
|
}, |
|
{ |
|
"epoch": 0.8802083333333334, |
|
"grad_norm": 8.26220496195536, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0691, |
|
"num_input_tokens_seen": 58360220, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.8802083333333334, |
|
"loss": 0.05522051081061363, |
|
"loss_ce": 4.472649015951902e-05, |
|
"loss_iou": 0.44140625, |
|
"loss_num": 0.01104736328125, |
|
"loss_xval": 0.05517578125, |
|
"num_input_tokens_seen": 58360220, |
|
"step": 338 |
|
}, |
|
{ |
|
"epoch": 0.8828125, |
|
"grad_norm": 3.8822556756341036, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0498, |
|
"num_input_tokens_seen": 58532536, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.8828125, |
|
"loss": 0.07371848821640015, |
|
"loss_ce": 7.957669731695205e-05, |
|
"loss_iou": 0.515625, |
|
"loss_num": 0.01470947265625, |
|
"loss_xval": 0.07373046875, |
|
"num_input_tokens_seen": 58532536, |
|
"step": 339 |
|
}, |
|
{ |
|
"epoch": 0.8854166666666666, |
|
"grad_norm": 9.884171334560891, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0594, |
|
"num_input_tokens_seen": 58705004, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8854166666666666, |
|
"loss": 0.06320229917764664, |
|
"loss_ce": 6.143321661511436e-05, |
|
"loss_iou": 0.44921875, |
|
"loss_num": 0.01263427734375, |
|
"loss_xval": 0.06298828125, |
|
"num_input_tokens_seen": 58705004, |
|
"step": 340 |
|
}, |
|
{ |
|
"epoch": 0.8880208333333334, |
|
"grad_norm": 5.196359666592977, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0404, |
|
"num_input_tokens_seen": 58878152, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.8880208333333334, |
|
"loss": 0.03186158090829849, |
|
"loss_ce": 0.0002453709894325584, |
|
"loss_iou": 0.546875, |
|
"loss_num": 0.006317138671875, |
|
"loss_xval": 0.03173828125, |
|
"num_input_tokens_seen": 58878152, |
|
"step": 341 |
|
}, |
|
{ |
|
"epoch": 0.890625, |
|
"grad_norm": 6.15237627529603, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0741, |
|
"num_input_tokens_seen": 59050440, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.890625, |
|
"loss": 0.05918329954147339, |
|
"loss_ce": 5.5491131206508726e-05, |
|
"loss_iou": 0.36328125, |
|
"loss_num": 0.0118408203125, |
|
"loss_xval": 0.05908203125, |
|
"num_input_tokens_seen": 59050440, |
|
"step": 342 |
|
}, |
|
{ |
|
"epoch": 0.8932291666666666, |
|
"grad_norm": 2.027289516848372, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0797, |
|
"num_input_tokens_seen": 59223528, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.8932291666666666, |
|
"loss": 0.0674634724855423, |
|
"loss_ce": 3.488633592496626e-05, |
|
"loss_iou": 0.443359375, |
|
"loss_num": 0.01348876953125, |
|
"loss_xval": 0.0673828125, |
|
"num_input_tokens_seen": 59223528, |
|
"step": 343 |
|
}, |
|
{ |
|
"epoch": 0.8958333333333334, |
|
"grad_norm": 3.3607059104825554, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0925, |
|
"num_input_tokens_seen": 59396320, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.8958333333333334, |
|
"loss": 0.04388820007443428, |
|
"loss_ce": 8.021650137379766e-05, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.0087890625, |
|
"loss_xval": 0.043701171875, |
|
"num_input_tokens_seen": 59396320, |
|
"step": 344 |
|
}, |
|
{ |
|
"epoch": 0.8984375, |
|
"grad_norm": 11.313403126591554, |
|
"learning_rate": 5e-06, |
|
"loss": 0.11, |
|
"num_input_tokens_seen": 59568904, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.8984375, |
|
"loss": 0.059322062879800797, |
|
"loss_ce": 8.744518709136173e-05, |
|
"loss_iou": 0.5, |
|
"loss_num": 0.0118408203125, |
|
"loss_xval": 0.059326171875, |
|
"num_input_tokens_seen": 59568904, |
|
"step": 345 |
|
}, |
|
{ |
|
"epoch": 0.9010416666666666, |
|
"grad_norm": 9.716589837853562, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0927, |
|
"num_input_tokens_seen": 59741504, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.9010416666666666, |
|
"loss": 0.08992569893598557, |
|
"loss_ce": 5.143693124409765e-05, |
|
"loss_iou": 0.578125, |
|
"loss_num": 0.0179443359375, |
|
"loss_xval": 0.08984375, |
|
"num_input_tokens_seen": 59741504, |
|
"step": 346 |
|
}, |
|
{ |
|
"epoch": 0.9036458333333334, |
|
"grad_norm": 4.9130642144499985, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0664, |
|
"num_input_tokens_seen": 59913580, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.9036458333333334, |
|
"loss": 0.059036046266555786, |
|
"loss_ce": 6.082511754357256e-05, |
|
"loss_iou": 0.51953125, |
|
"loss_num": 0.01177978515625, |
|
"loss_xval": 0.05908203125, |
|
"num_input_tokens_seen": 59913580, |
|
"step": 347 |
|
}, |
|
{ |
|
"epoch": 0.90625, |
|
"grad_norm": 5.535144728019767, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0459, |
|
"num_input_tokens_seen": 60086300, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.90625, |
|
"loss": 0.040458932518959045, |
|
"loss_ce": 6.891523662488908e-05, |
|
"loss_iou": 0.62109375, |
|
"loss_num": 0.008056640625, |
|
"loss_xval": 0.040283203125, |
|
"num_input_tokens_seen": 60086300, |
|
"step": 348 |
|
}, |
|
{ |
|
"epoch": 0.9088541666666666, |
|
"grad_norm": 8.128924901708682, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1183, |
|
"num_input_tokens_seen": 60258804, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.9088541666666666, |
|
"loss": 0.10604314506053925, |
|
"loss_ce": 2.508011857571546e-05, |
|
"loss_iou": 0.58984375, |
|
"loss_num": 0.021240234375, |
|
"loss_xval": 0.10595703125, |
|
"num_input_tokens_seen": 60258804, |
|
"step": 349 |
|
}, |
|
{ |
|
"epoch": 0.9114583333333334, |
|
"grad_norm": 6.130745719562545, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1182, |
|
"num_input_tokens_seen": 60431928, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9114583333333334, |
|
"loss": 0.137631356716156, |
|
"loss_ce": 5.8111756516154855e-05, |
|
"loss_iou": 0.671875, |
|
"loss_num": 0.0274658203125, |
|
"loss_xval": 0.1376953125, |
|
"num_input_tokens_seen": 60431928, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.9140625, |
|
"grad_norm": 6.6090310971417345, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0682, |
|
"num_input_tokens_seen": 60604596, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.9140625, |
|
"loss": 0.03837839514017105, |
|
"loss_ce": 0.0008875515777617693, |
|
"loss_iou": 0.5078125, |
|
"loss_num": 0.00750732421875, |
|
"loss_xval": 0.03759765625, |
|
"num_input_tokens_seen": 60604596, |
|
"step": 351 |
|
}, |
|
{ |
|
"epoch": 0.9166666666666666, |
|
"grad_norm": 37.977702783393255, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0839, |
|
"num_input_tokens_seen": 60777696, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.9166666666666666, |
|
"loss": 0.05776657909154892, |
|
"loss_ce": 7.309722423087806e-05, |
|
"loss_iou": 0.578125, |
|
"loss_num": 0.01153564453125, |
|
"loss_xval": 0.0576171875, |
|
"num_input_tokens_seen": 60777696, |
|
"step": 352 |
|
}, |
|
{ |
|
"epoch": 0.9192708333333334, |
|
"grad_norm": 10.793340791159972, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1036, |
|
"num_input_tokens_seen": 60950176, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.9192708333333334, |
|
"loss": 0.10405679047107697, |
|
"loss_ce": 5.2886520279571414e-05, |
|
"loss_iou": 0.51953125, |
|
"loss_num": 0.020751953125, |
|
"loss_xval": 0.10400390625, |
|
"num_input_tokens_seen": 60950176, |
|
"step": 353 |
|
}, |
|
{ |
|
"epoch": 0.921875, |
|
"grad_norm": 4.2624655031129395, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0638, |
|
"num_input_tokens_seen": 61123352, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.921875, |
|
"loss": 0.045672204345464706, |
|
"loss_ce": 4.842308408115059e-05, |
|
"loss_iou": 0.640625, |
|
"loss_num": 0.0091552734375, |
|
"loss_xval": 0.045654296875, |
|
"num_input_tokens_seen": 61123352, |
|
"step": 354 |
|
}, |
|
{ |
|
"epoch": 0.9244791666666666, |
|
"grad_norm": 5.263551596545367, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0836, |
|
"num_input_tokens_seen": 61296296, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.9244791666666666, |
|
"loss": 0.07579399645328522, |
|
"loss_ce": 1.8844926671590656e-05, |
|
"loss_iou": 0.484375, |
|
"loss_num": 0.01513671875, |
|
"loss_xval": 0.07568359375, |
|
"num_input_tokens_seen": 61296296, |
|
"step": 355 |
|
}, |
|
{ |
|
"epoch": 0.9270833333333334, |
|
"grad_norm": 4.969020675022387, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1161, |
|
"num_input_tokens_seen": 61468464, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.9270833333333334, |
|
"loss": 0.0982382521033287, |
|
"loss_ce": 9.372214117320254e-05, |
|
"loss_iou": 0.474609375, |
|
"loss_num": 0.0196533203125, |
|
"loss_xval": 0.09814453125, |
|
"num_input_tokens_seen": 61468464, |
|
"step": 356 |
|
}, |
|
{ |
|
"epoch": 0.9296875, |
|
"grad_norm": 9.751227404400339, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0667, |
|
"num_input_tokens_seen": 61641104, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.9296875, |
|
"loss": 0.08714728057384491, |
|
"loss_ce": 6.537619628943503e-05, |
|
"loss_iou": 0.4921875, |
|
"loss_num": 0.0174560546875, |
|
"loss_xval": 0.0869140625, |
|
"num_input_tokens_seen": 61641104, |
|
"step": 357 |
|
}, |
|
{ |
|
"epoch": 0.9322916666666666, |
|
"grad_norm": 3.705998309698105, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0614, |
|
"num_input_tokens_seen": 61813956, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.9322916666666666, |
|
"loss": 0.05996260046958923, |
|
"loss_ce": 7.185334106907248e-05, |
|
"loss_iou": 0.439453125, |
|
"loss_num": 0.011962890625, |
|
"loss_xval": 0.059814453125, |
|
"num_input_tokens_seen": 61813956, |
|
"step": 358 |
|
}, |
|
{ |
|
"epoch": 0.9348958333333334, |
|
"grad_norm": 5.61843483400317, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0953, |
|
"num_input_tokens_seen": 61987068, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.9348958333333334, |
|
"loss": 0.1060662716627121, |
|
"loss_ce": 4.819741297978908e-05, |
|
"loss_iou": 0.5078125, |
|
"loss_num": 0.021240234375, |
|
"loss_xval": 0.10595703125, |
|
"num_input_tokens_seen": 61987068, |
|
"step": 359 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"grad_norm": 4.53602826237247, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0859, |
|
"num_input_tokens_seen": 62160000, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.9375, |
|
"loss": 0.12344817072153091, |
|
"loss_ce": 3.5084449336864054e-05, |
|
"loss_iou": 0.5859375, |
|
"loss_num": 0.024658203125, |
|
"loss_xval": 0.12353515625, |
|
"num_input_tokens_seen": 62160000, |
|
"step": 360 |
|
}, |
|
{ |
|
"epoch": 0.9401041666666666, |
|
"grad_norm": 2.382495598116124, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0654, |
|
"num_input_tokens_seen": 62332704, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.9401041666666666, |
|
"loss": 0.04234257712960243, |
|
"loss_ce": 9.099017916014418e-05, |
|
"loss_iou": 0.53125, |
|
"loss_num": 0.00848388671875, |
|
"loss_xval": 0.042236328125, |
|
"num_input_tokens_seen": 62332704, |
|
"step": 361 |
|
}, |
|
{ |
|
"epoch": 0.9427083333333334, |
|
"grad_norm": 3.67565808505264, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0921, |
|
"num_input_tokens_seen": 62505472, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.9427083333333334, |
|
"loss": 0.1404985636472702, |
|
"loss_ce": 0.0001482181833125651, |
|
"loss_iou": 0.41015625, |
|
"loss_num": 0.028076171875, |
|
"loss_xval": 0.140625, |
|
"num_input_tokens_seen": 62505472, |
|
"step": 362 |
|
}, |
|
{ |
|
"epoch": 0.9453125, |
|
"grad_norm": 4.393117034860246, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0746, |
|
"num_input_tokens_seen": 62677852, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.9453125, |
|
"loss": 0.08209509402513504, |
|
"loss_ce": 3.332511550979689e-05, |
|
"loss_iou": 0.57421875, |
|
"loss_num": 0.016357421875, |
|
"loss_xval": 0.08203125, |
|
"num_input_tokens_seen": 62677852, |
|
"step": 363 |
|
}, |
|
{ |
|
"epoch": 0.9479166666666666, |
|
"grad_norm": 56.3211081199482, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0939, |
|
"num_input_tokens_seen": 62850624, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.9479166666666666, |
|
"loss": 0.06467482447624207, |
|
"loss_ce": 3.859533171635121e-05, |
|
"loss_iou": 0.53515625, |
|
"loss_num": 0.012939453125, |
|
"loss_xval": 0.064453125, |
|
"num_input_tokens_seen": 62850624, |
|
"step": 364 |
|
}, |
|
{ |
|
"epoch": 0.9505208333333334, |
|
"grad_norm": 4.908757065453886, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0602, |
|
"num_input_tokens_seen": 63022912, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.9505208333333334, |
|
"loss": 0.05544174462556839, |
|
"loss_ce": 0.00011337252362864092, |
|
"loss_iou": 0.578125, |
|
"loss_num": 0.01104736328125, |
|
"loss_xval": 0.055419921875, |
|
"num_input_tokens_seen": 63022912, |
|
"step": 365 |
|
}, |
|
{ |
|
"epoch": 0.953125, |
|
"grad_norm": 7.057560906891319, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0884, |
|
"num_input_tokens_seen": 63195992, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.953125, |
|
"loss": 0.08902530372142792, |
|
"loss_ce": 3.6048379115527496e-05, |
|
"loss_iou": 0.51953125, |
|
"loss_num": 0.017822265625, |
|
"loss_xval": 0.0888671875, |
|
"num_input_tokens_seen": 63195992, |
|
"step": 366 |
|
}, |
|
{ |
|
"epoch": 0.9557291666666666, |
|
"grad_norm": 4.46902192772412, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0773, |
|
"num_input_tokens_seen": 63368708, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.9557291666666666, |
|
"loss": 0.02907339483499527, |
|
"loss_ce": 3.591889617382549e-05, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.00579833984375, |
|
"loss_xval": 0.029052734375, |
|
"num_input_tokens_seen": 63368708, |
|
"step": 367 |
|
}, |
|
{ |
|
"epoch": 0.9583333333333334, |
|
"grad_norm": 4.189625335712974, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0685, |
|
"num_input_tokens_seen": 63541312, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.9583333333333334, |
|
"loss": 0.10639164596796036, |
|
"loss_ce": 8.366195834241807e-05, |
|
"loss_iou": 0.578125, |
|
"loss_num": 0.021240234375, |
|
"loss_xval": 0.1064453125, |
|
"num_input_tokens_seen": 63541312, |
|
"step": 368 |
|
}, |
|
{ |
|
"epoch": 0.9609375, |
|
"grad_norm": 5.657094938839105, |
|
"learning_rate": 5e-06, |
|
"loss": 0.068, |
|
"num_input_tokens_seen": 63713968, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.9609375, |
|
"loss": 0.08933991193771362, |
|
"loss_ce": 3.0215423976187594e-05, |
|
"loss_iou": 0.484375, |
|
"loss_num": 0.017822265625, |
|
"loss_xval": 0.08935546875, |
|
"num_input_tokens_seen": 63713968, |
|
"step": 369 |
|
}, |
|
{ |
|
"epoch": 0.9635416666666666, |
|
"grad_norm": 5.423233634302121, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0647, |
|
"num_input_tokens_seen": 63886996, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.9635416666666666, |
|
"loss": 0.06539873778820038, |
|
"loss_ce": 3.008513340319041e-05, |
|
"loss_iou": 0.59765625, |
|
"loss_num": 0.0130615234375, |
|
"loss_xval": 0.0654296875, |
|
"num_input_tokens_seen": 63886996, |
|
"step": 370 |
|
}, |
|
{ |
|
"epoch": 0.9661458333333334, |
|
"grad_norm": 5.395935683494909, |
|
"learning_rate": 5e-06, |
|
"loss": 0.09, |
|
"num_input_tokens_seen": 64059660, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.9661458333333334, |
|
"loss": 0.12266229093074799, |
|
"loss_ce": 7.318713323911652e-05, |
|
"loss_iou": 0.5703125, |
|
"loss_num": 0.0245361328125, |
|
"loss_xval": 0.12255859375, |
|
"num_input_tokens_seen": 64059660, |
|
"step": 371 |
|
}, |
|
{ |
|
"epoch": 0.96875, |
|
"grad_norm": 5.856107929033903, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0854, |
|
"num_input_tokens_seen": 64232096, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.96875, |
|
"loss": 0.09164264798164368, |
|
"loss_ce": 4.413935312186368e-05, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.018310546875, |
|
"loss_xval": 0.091796875, |
|
"num_input_tokens_seen": 64232096, |
|
"step": 372 |
|
}, |
|
{ |
|
"epoch": 0.9713541666666666, |
|
"grad_norm": 5.870548313752241, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0559, |
|
"num_input_tokens_seen": 64404756, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.9713541666666666, |
|
"loss": 0.046882934868335724, |
|
"loss_ce": 2.3196011170512065e-05, |
|
"loss_iou": 0.50390625, |
|
"loss_num": 0.0093994140625, |
|
"loss_xval": 0.046875, |
|
"num_input_tokens_seen": 64404756, |
|
"step": 373 |
|
}, |
|
{ |
|
"epoch": 0.9739583333333334, |
|
"grad_norm": 10.532029531276638, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0822, |
|
"num_input_tokens_seen": 64577476, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.9739583333333334, |
|
"loss": 0.07333735376596451, |
|
"loss_ce": 6.464817124651745e-05, |
|
"loss_iou": 0.5859375, |
|
"loss_num": 0.0146484375, |
|
"loss_xval": 0.0732421875, |
|
"num_input_tokens_seen": 64577476, |
|
"step": 374 |
|
}, |
|
{ |
|
"epoch": 0.9765625, |
|
"grad_norm": 8.746459219065029, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0993, |
|
"num_input_tokens_seen": 64750252, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.9765625, |
|
"loss": 0.08189202845096588, |
|
"loss_ce": 2.8627566280192696e-05, |
|
"loss_iou": 0.55078125, |
|
"loss_num": 0.016357421875, |
|
"loss_xval": 0.08203125, |
|
"num_input_tokens_seen": 64750252, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.9791666666666666, |
|
"grad_norm": 4.369734068569422, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0772, |
|
"num_input_tokens_seen": 64922984, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.9791666666666666, |
|
"loss": 0.07155308127403259, |
|
"loss_ce": 5.0396010919939727e-05, |
|
"loss_iou": 0.51953125, |
|
"loss_num": 0.01434326171875, |
|
"loss_xval": 0.0712890625, |
|
"num_input_tokens_seen": 64922984, |
|
"step": 376 |
|
}, |
|
{ |
|
"epoch": 0.9817708333333334, |
|
"grad_norm": 5.055558558635633, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0739, |
|
"num_input_tokens_seen": 65095228, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.9817708333333334, |
|
"loss": 0.10587326437234879, |
|
"loss_ce": 2.304443478351459e-05, |
|
"loss_iou": 0.37890625, |
|
"loss_num": 0.0211181640625, |
|
"loss_xval": 0.10595703125, |
|
"num_input_tokens_seen": 65095228, |
|
"step": 377 |
|
}, |
|
{ |
|
"epoch": 0.984375, |
|
"grad_norm": 5.286209551414624, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0876, |
|
"num_input_tokens_seen": 65267596, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.984375, |
|
"loss": 0.06380397081375122, |
|
"loss_ce": 5.275038711261004e-05, |
|
"loss_iou": 0.59375, |
|
"loss_num": 0.01275634765625, |
|
"loss_xval": 0.06396484375, |
|
"num_input_tokens_seen": 65267596, |
|
"step": 378 |
|
}, |
|
{ |
|
"epoch": 0.9869791666666666, |
|
"grad_norm": 4.779020534428804, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0801, |
|
"num_input_tokens_seen": 65439632, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.9869791666666666, |
|
"loss": 0.05044550448656082, |
|
"loss_ce": 3.0469000193988904e-05, |
|
"loss_iou": 0.68359375, |
|
"loss_num": 0.01007080078125, |
|
"loss_xval": 0.05029296875, |
|
"num_input_tokens_seen": 65439632, |
|
"step": 379 |
|
}, |
|
{ |
|
"epoch": 0.9895833333333334, |
|
"grad_norm": 4.685839131970804, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0717, |
|
"num_input_tokens_seen": 65612188, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.9895833333333334, |
|
"loss": 0.05971755087375641, |
|
"loss_ce": 2.5168032152578235e-05, |
|
"loss_iou": 0.5234375, |
|
"loss_num": 0.01190185546875, |
|
"loss_xval": 0.0595703125, |
|
"num_input_tokens_seen": 65612188, |
|
"step": 380 |
|
}, |
|
{ |
|
"epoch": 0.9921875, |
|
"grad_norm": 5.019679075383125, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0852, |
|
"num_input_tokens_seen": 65785132, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.9921875, |
|
"loss": 0.0729844868183136, |
|
"loss_ce": 7.799551531206816e-05, |
|
"loss_iou": 0.5390625, |
|
"loss_num": 0.01458740234375, |
|
"loss_xval": 0.07275390625, |
|
"num_input_tokens_seen": 65785132, |
|
"step": 381 |
|
}, |
|
{ |
|
"epoch": 0.9947916666666666, |
|
"grad_norm": 5.2408542210225075, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0618, |
|
"num_input_tokens_seen": 65958084, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.9947916666666666, |
|
"loss": 0.07006223499774933, |
|
"loss_ce": 2.439254785713274e-05, |
|
"loss_iou": 0.400390625, |
|
"loss_num": 0.0140380859375, |
|
"loss_xval": 0.06982421875, |
|
"num_input_tokens_seen": 65958084, |
|
"step": 382 |
|
}, |
|
{ |
|
"epoch": 0.9973958333333334, |
|
"grad_norm": 8.43973663796555, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0739, |
|
"num_input_tokens_seen": 66130316, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 0.9973958333333334, |
|
"loss": 0.05812692642211914, |
|
"loss_ce": 0.00012826945749111474, |
|
"loss_iou": 0.55078125, |
|
"loss_num": 0.0115966796875, |
|
"loss_xval": 0.05810546875, |
|
"num_input_tokens_seen": 66130316, |
|
"step": 383 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"grad_norm": 14.446394116068738, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0949, |
|
"num_input_tokens_seen": 66302752, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.0, |
|
"loss": 0.12496863305568695, |
|
"loss_ce": 4.493331289268099e-05, |
|
"loss_iou": 0.5703125, |
|
"loss_num": 0.0250244140625, |
|
"loss_xval": 0.125, |
|
"num_input_tokens_seen": 66302752, |
|
"step": 384 |
|
}, |
|
{ |
|
"epoch": 1.0026041666666667, |
|
"grad_norm": 4.289918076646653, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0624, |
|
"num_input_tokens_seen": 66475484, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.0026041666666667, |
|
"loss": 0.04102395847439766, |
|
"loss_ce": 2.3592958314111456e-05, |
|
"loss_iou": 0.435546875, |
|
"loss_num": 0.0081787109375, |
|
"loss_xval": 0.041015625, |
|
"num_input_tokens_seen": 66475484, |
|
"step": 385 |
|
}, |
|
{ |
|
"epoch": 1.0052083333333333, |
|
"grad_norm": 4.281041154001583, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0857, |
|
"num_input_tokens_seen": 66647384, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.0052083333333333, |
|
"loss": 0.12955166399478912, |
|
"loss_ce": 3.50592345057521e-05, |
|
"loss_iou": 0.5859375, |
|
"loss_num": 0.02587890625, |
|
"loss_xval": 0.1298828125, |
|
"num_input_tokens_seen": 66647384, |
|
"step": 386 |
|
}, |
|
{ |
|
"epoch": 1.0078125, |
|
"grad_norm": 9.266009885978788, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0655, |
|
"num_input_tokens_seen": 66820256, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.0078125, |
|
"loss": 0.076566182076931, |
|
"loss_ce": 4.3353000364732e-05, |
|
"loss_iou": 0.48046875, |
|
"loss_num": 0.01531982421875, |
|
"loss_xval": 0.07666015625, |
|
"num_input_tokens_seen": 66820256, |
|
"step": 387 |
|
}, |
|
{ |
|
"epoch": 1.0104166666666667, |
|
"grad_norm": 6.544465032820982, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0798, |
|
"num_input_tokens_seen": 66992440, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.0104166666666667, |
|
"loss": 0.0616319440305233, |
|
"loss_ce": 4.7468380216741934e-05, |
|
"loss_iou": 0.609375, |
|
"loss_num": 0.0123291015625, |
|
"loss_xval": 0.0615234375, |
|
"num_input_tokens_seen": 66992440, |
|
"step": 388 |
|
}, |
|
{ |
|
"epoch": 1.0130208333333333, |
|
"grad_norm": 5.606310567972833, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0857, |
|
"num_input_tokens_seen": 67165064, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.0130208333333333, |
|
"loss": 0.08174864202737808, |
|
"loss_ce": 2.2569187422050163e-05, |
|
"loss_iou": 0.515625, |
|
"loss_num": 0.016357421875, |
|
"loss_xval": 0.08154296875, |
|
"num_input_tokens_seen": 67165064, |
|
"step": 389 |
|
}, |
|
{ |
|
"epoch": 1.015625, |
|
"grad_norm": 11.386296445247536, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1006, |
|
"num_input_tokens_seen": 67338208, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.015625, |
|
"loss": 0.13062095642089844, |
|
"loss_ce": 6.675285840174183e-05, |
|
"loss_iou": 0.625, |
|
"loss_num": 0.026123046875, |
|
"loss_xval": 0.130859375, |
|
"num_input_tokens_seen": 67338208, |
|
"step": 390 |
|
}, |
|
{ |
|
"epoch": 1.0182291666666667, |
|
"grad_norm": 9.218245759030461, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0807, |
|
"num_input_tokens_seen": 67511096, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.0182291666666667, |
|
"loss": 0.1230873167514801, |
|
"loss_ce": 4.043774606543593e-05, |
|
"loss_iou": 0.466796875, |
|
"loss_num": 0.024658203125, |
|
"loss_xval": 0.123046875, |
|
"num_input_tokens_seen": 67511096, |
|
"step": 391 |
|
}, |
|
{ |
|
"epoch": 1.0208333333333333, |
|
"grad_norm": 7.360763519108863, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0808, |
|
"num_input_tokens_seen": 67683500, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.0208333333333333, |
|
"loss": 0.04704148322343826, |
|
"loss_ce": 7.492824079236016e-05, |
|
"loss_iou": 0.69140625, |
|
"loss_num": 0.0093994140625, |
|
"loss_xval": 0.046875, |
|
"num_input_tokens_seen": 67683500, |
|
"step": 392 |
|
}, |
|
{ |
|
"epoch": 1.0234375, |
|
"grad_norm": 24.398414774258395, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0653, |
|
"num_input_tokens_seen": 67856564, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.0234375, |
|
"loss": 0.052695855498313904, |
|
"loss_ce": 6.829424819443375e-05, |
|
"loss_iou": 0.48828125, |
|
"loss_num": 0.01055908203125, |
|
"loss_xval": 0.052734375, |
|
"num_input_tokens_seen": 67856564, |
|
"step": 393 |
|
}, |
|
{ |
|
"epoch": 1.0260416666666667, |
|
"grad_norm": 3.952882418188013, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0773, |
|
"num_input_tokens_seen": 68029008, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.0260416666666667, |
|
"loss": 0.05983951687812805, |
|
"loss_ce": 5.558759949053638e-05, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.011962890625, |
|
"loss_xval": 0.059814453125, |
|
"num_input_tokens_seen": 68029008, |
|
"step": 394 |
|
}, |
|
{ |
|
"epoch": 1.0286458333333333, |
|
"grad_norm": 6.537700275238822, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0599, |
|
"num_input_tokens_seen": 68201948, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.0286458333333333, |
|
"loss": 0.09255748987197876, |
|
"loss_ce": 4.3449574150145054e-05, |
|
"loss_iou": 0.53515625, |
|
"loss_num": 0.0185546875, |
|
"loss_xval": 0.09228515625, |
|
"num_input_tokens_seen": 68201948, |
|
"step": 395 |
|
}, |
|
{ |
|
"epoch": 1.03125, |
|
"grad_norm": 9.576915699057926, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0537, |
|
"num_input_tokens_seen": 68374640, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.03125, |
|
"loss": 0.06341977417469025, |
|
"loss_ce": 6.528654193971306e-05, |
|
"loss_iou": 0.419921875, |
|
"loss_num": 0.0126953125, |
|
"loss_xval": 0.0634765625, |
|
"num_input_tokens_seen": 68374640, |
|
"step": 396 |
|
}, |
|
{ |
|
"epoch": 1.0338541666666667, |
|
"grad_norm": 4.782272117515052, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0954, |
|
"num_input_tokens_seen": 68547628, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.0338541666666667, |
|
"loss": 0.05385718494653702, |
|
"loss_ce": 5.469346069730818e-05, |
|
"loss_iou": 0.56640625, |
|
"loss_num": 0.0107421875, |
|
"loss_xval": 0.0537109375, |
|
"num_input_tokens_seen": 68547628, |
|
"step": 397 |
|
}, |
|
{ |
|
"epoch": 1.0364583333333333, |
|
"grad_norm": 5.7075554975649485, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1176, |
|
"num_input_tokens_seen": 68720856, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.0364583333333333, |
|
"loss": 0.04491497576236725, |
|
"loss_ce": 2.361964106967207e-05, |
|
"loss_iou": 0.6953125, |
|
"loss_num": 0.00897216796875, |
|
"loss_xval": 0.044921875, |
|
"num_input_tokens_seen": 68720856, |
|
"step": 398 |
|
}, |
|
{ |
|
"epoch": 1.0390625, |
|
"grad_norm": 5.118278327904573, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0574, |
|
"num_input_tokens_seen": 68893560, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.0390625, |
|
"loss": 0.057190101593732834, |
|
"loss_ce": 6.119744648458436e-05, |
|
"loss_iou": 0.640625, |
|
"loss_num": 0.01141357421875, |
|
"loss_xval": 0.05712890625, |
|
"num_input_tokens_seen": 68893560, |
|
"step": 399 |
|
}, |
|
{ |
|
"epoch": 1.0416666666666667, |
|
"grad_norm": 4.488918016416647, |
|
"learning_rate": 5e-06, |
|
"loss": 0.064, |
|
"num_input_tokens_seen": 69066468, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0416666666666667, |
|
"loss": 0.06421714276075363, |
|
"loss_ce": 3.867531631840393e-05, |
|
"loss_iou": 0.57421875, |
|
"loss_num": 0.0128173828125, |
|
"loss_xval": 0.06396484375, |
|
"num_input_tokens_seen": 69066468, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 1.0442708333333333, |
|
"grad_norm": 5.792990925737602, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0838, |
|
"num_input_tokens_seen": 69239188, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.0442708333333333, |
|
"loss": 0.0709276869893074, |
|
"loss_ce": 3.5352179111214355e-05, |
|
"loss_iou": 0.5703125, |
|
"loss_num": 0.01416015625, |
|
"loss_xval": 0.07080078125, |
|
"num_input_tokens_seen": 69239188, |
|
"step": 401 |
|
}, |
|
{ |
|
"epoch": 1.046875, |
|
"grad_norm": 5.477877590256074, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0616, |
|
"num_input_tokens_seen": 69412048, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.046875, |
|
"loss": 0.05337923392653465, |
|
"loss_ce": 0.00012606415839400142, |
|
"loss_iou": 0.53125, |
|
"loss_num": 0.01068115234375, |
|
"loss_xval": 0.05322265625, |
|
"num_input_tokens_seen": 69412048, |
|
"step": 402 |
|
}, |
|
{ |
|
"epoch": 1.0494791666666667, |
|
"grad_norm": 7.785066348132969, |
|
"learning_rate": 5e-06, |
|
"loss": 0.077, |
|
"num_input_tokens_seen": 69584372, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.0494791666666667, |
|
"loss": 0.11938966065645218, |
|
"loss_ce": 0.0002032608463196084, |
|
"loss_iou": 0.404296875, |
|
"loss_num": 0.0238037109375, |
|
"loss_xval": 0.119140625, |
|
"num_input_tokens_seen": 69584372, |
|
"step": 403 |
|
}, |
|
{ |
|
"epoch": 1.0520833333333333, |
|
"grad_norm": 3.487837088264721, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0598, |
|
"num_input_tokens_seen": 69756908, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.0520833333333333, |
|
"loss": 0.04861289635300636, |
|
"loss_ce": 5.943168798694387e-05, |
|
"loss_iou": 0.421875, |
|
"loss_num": 0.00970458984375, |
|
"loss_xval": 0.048583984375, |
|
"num_input_tokens_seen": 69756908, |
|
"step": 404 |
|
}, |
|
{ |
|
"epoch": 1.0546875, |
|
"grad_norm": 4.5585273415308505, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0738, |
|
"num_input_tokens_seen": 69929892, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.0546875, |
|
"loss": 0.036217886954545975, |
|
"loss_ce": 5.455628706840798e-05, |
|
"loss_iou": 0.494140625, |
|
"loss_num": 0.007232666015625, |
|
"loss_xval": 0.0361328125, |
|
"num_input_tokens_seen": 69929892, |
|
"step": 405 |
|
}, |
|
{ |
|
"epoch": 1.0572916666666667, |
|
"grad_norm": 5.607953623525571, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0589, |
|
"num_input_tokens_seen": 70102304, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.0572916666666667, |
|
"loss": 0.08112768828868866, |
|
"loss_ce": 4.247991455486044e-05, |
|
"loss_iou": 0.66796875, |
|
"loss_num": 0.0162353515625, |
|
"loss_xval": 0.0810546875, |
|
"num_input_tokens_seen": 70102304, |
|
"step": 406 |
|
}, |
|
{ |
|
"epoch": 1.0598958333333333, |
|
"grad_norm": 3.224104704302036, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0706, |
|
"num_input_tokens_seen": 70274860, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.0598958333333333, |
|
"loss": 0.042282506823539734, |
|
"loss_ce": 3.09214046865236e-05, |
|
"loss_iou": 0.52734375, |
|
"loss_num": 0.0084228515625, |
|
"loss_xval": 0.042236328125, |
|
"num_input_tokens_seen": 70274860, |
|
"step": 407 |
|
}, |
|
{ |
|
"epoch": 1.0625, |
|
"grad_norm": 13.790835085548427, |
|
"learning_rate": 5e-06, |
|
"loss": 0.054, |
|
"num_input_tokens_seen": 70447752, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.0625, |
|
"loss": 0.06554967164993286, |
|
"loss_ce": 4.369396629044786e-05, |
|
"loss_iou": 0.453125, |
|
"loss_num": 0.01312255859375, |
|
"loss_xval": 0.0654296875, |
|
"num_input_tokens_seen": 70447752, |
|
"step": 408 |
|
}, |
|
{ |
|
"epoch": 1.0651041666666667, |
|
"grad_norm": 21.170926774013214, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1041, |
|
"num_input_tokens_seen": 70620408, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.0651041666666667, |
|
"loss": 0.05410638824105263, |
|
"loss_ce": 2.9238653951324522e-05, |
|
"loss_iou": 0.5625, |
|
"loss_num": 0.01080322265625, |
|
"loss_xval": 0.05419921875, |
|
"num_input_tokens_seen": 70620408, |
|
"step": 409 |
|
}, |
|
{ |
|
"epoch": 1.0677083333333333, |
|
"grad_norm": 4.451906270983918, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0553, |
|
"num_input_tokens_seen": 70792740, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.0677083333333333, |
|
"loss": 0.060652364045381546, |
|
"loss_ce": 4.4454794988268986e-05, |
|
"loss_iou": 0.43359375, |
|
"loss_num": 0.0120849609375, |
|
"loss_xval": 0.060546875, |
|
"num_input_tokens_seen": 70792740, |
|
"step": 410 |
|
}, |
|
{ |
|
"epoch": 1.0703125, |
|
"grad_norm": 4.685547616833428, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0872, |
|
"num_input_tokens_seen": 70965912, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.0703125, |
|
"loss": 0.038969703018665314, |
|
"loss_ce": 2.9273152904352173e-05, |
|
"loss_iou": 0.578125, |
|
"loss_num": 0.007781982421875, |
|
"loss_xval": 0.0390625, |
|
"num_input_tokens_seen": 70965912, |
|
"step": 411 |
|
}, |
|
{ |
|
"epoch": 1.0729166666666667, |
|
"grad_norm": 4.205176098429634, |
|
"learning_rate": 5e-06, |
|
"loss": 0.097, |
|
"num_input_tokens_seen": 71138240, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.0729166666666667, |
|
"loss": 0.06594446301460266, |
|
"loss_ce": 2.6499863452045247e-05, |
|
"loss_iou": 0.490234375, |
|
"loss_num": 0.01318359375, |
|
"loss_xval": 0.06591796875, |
|
"num_input_tokens_seen": 71138240, |
|
"step": 412 |
|
}, |
|
{ |
|
"epoch": 1.0755208333333333, |
|
"grad_norm": 2.6975606542073414, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0503, |
|
"num_input_tokens_seen": 71311316, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.0755208333333333, |
|
"loss": 0.04244375228881836, |
|
"loss_ce": 2.4318891519214958e-05, |
|
"loss_iou": 0.5078125, |
|
"loss_num": 0.00848388671875, |
|
"loss_xval": 0.04248046875, |
|
"num_input_tokens_seen": 71311316, |
|
"step": 413 |
|
}, |
|
{ |
|
"epoch": 1.078125, |
|
"grad_norm": 7.285990628964785, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0779, |
|
"num_input_tokens_seen": 71483816, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.078125, |
|
"loss": 0.04324822127819061, |
|
"loss_ce": 2.007262264669407e-05, |
|
"loss_iou": 0.5703125, |
|
"loss_num": 0.0086669921875, |
|
"loss_xval": 0.043212890625, |
|
"num_input_tokens_seen": 71483816, |
|
"step": 414 |
|
}, |
|
{ |
|
"epoch": 1.0807291666666667, |
|
"grad_norm": 4.52661538724694, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0641, |
|
"num_input_tokens_seen": 71656340, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.0807291666666667, |
|
"loss": 0.08217348903417587, |
|
"loss_ce": 4.30593136115931e-05, |
|
"loss_iou": 0.45703125, |
|
"loss_num": 0.0164794921875, |
|
"loss_xval": 0.08203125, |
|
"num_input_tokens_seen": 71656340, |
|
"step": 415 |
|
}, |
|
{ |
|
"epoch": 1.0833333333333333, |
|
"grad_norm": 4.6125148439773485, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0882, |
|
"num_input_tokens_seen": 71828256, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.0833333333333333, |
|
"loss": 0.13281017541885376, |
|
"loss_ce": 2.819242035911884e-05, |
|
"loss_iou": 0.5703125, |
|
"loss_num": 0.0264892578125, |
|
"loss_xval": 0.1328125, |
|
"num_input_tokens_seen": 71828256, |
|
"step": 416 |
|
}, |
|
{ |
|
"epoch": 1.0859375, |
|
"grad_norm": 4.643689845065649, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0627, |
|
"num_input_tokens_seen": 72000720, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.0859375, |
|
"loss": 0.11508992314338684, |
|
"loss_ce": 2.3400416466756724e-05, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.0230712890625, |
|
"loss_xval": 0.115234375, |
|
"num_input_tokens_seen": 72000720, |
|
"step": 417 |
|
}, |
|
{ |
|
"epoch": 1.0885416666666667, |
|
"grad_norm": 4.231062348032645, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0809, |
|
"num_input_tokens_seen": 72173088, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.0885416666666667, |
|
"loss": 0.10267479717731476, |
|
"loss_ce": 4.418793832883239e-05, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.0205078125, |
|
"loss_xval": 0.1025390625, |
|
"num_input_tokens_seen": 72173088, |
|
"step": 418 |
|
}, |
|
{ |
|
"epoch": 1.0911458333333333, |
|
"grad_norm": 3.198450887714274, |
|
"learning_rate": 5e-06, |
|
"loss": 0.094, |
|
"num_input_tokens_seen": 72345528, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.0911458333333333, |
|
"loss": 0.10875082015991211, |
|
"loss_ce": 0.00010823761840583757, |
|
"loss_iou": 0.431640625, |
|
"loss_num": 0.021728515625, |
|
"loss_xval": 0.1083984375, |
|
"num_input_tokens_seen": 72345528, |
|
"step": 419 |
|
}, |
|
{ |
|
"epoch": 1.09375, |
|
"grad_norm": 4.428326611539968, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0439, |
|
"num_input_tokens_seen": 72517624, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.09375, |
|
"loss": 0.05049506574869156, |
|
"loss_ce": 4.951009759679437e-05, |
|
"loss_iou": 0.51171875, |
|
"loss_num": 0.01007080078125, |
|
"loss_xval": 0.050537109375, |
|
"num_input_tokens_seen": 72517624, |
|
"step": 420 |
|
}, |
|
{ |
|
"epoch": 1.0963541666666667, |
|
"grad_norm": 5.316131474081422, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0816, |
|
"num_input_tokens_seen": 72690776, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.0963541666666667, |
|
"loss": 0.0798894613981247, |
|
"loss_ce": 4.021547283628024e-05, |
|
"loss_iou": 0.515625, |
|
"loss_num": 0.0159912109375, |
|
"loss_xval": 0.080078125, |
|
"num_input_tokens_seen": 72690776, |
|
"step": 421 |
|
}, |
|
{ |
|
"epoch": 1.0989583333333333, |
|
"grad_norm": 4.80233181201779, |
|
"learning_rate": 5e-06, |
|
"loss": 0.099, |
|
"num_input_tokens_seen": 72863552, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.0989583333333333, |
|
"loss": 0.09374965727329254, |
|
"loss_ce": 3.0174571293173358e-05, |
|
"loss_iou": 0.6484375, |
|
"loss_num": 0.018798828125, |
|
"loss_xval": 0.09375, |
|
"num_input_tokens_seen": 72863552, |
|
"step": 422 |
|
}, |
|
{ |
|
"epoch": 1.1015625, |
|
"grad_norm": 3.042295910685699, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0756, |
|
"num_input_tokens_seen": 73035716, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.1015625, |
|
"loss": 0.05097030848264694, |
|
"loss_ce": 2.1213931177044287e-05, |
|
"loss_iou": 0.57421875, |
|
"loss_num": 0.01019287109375, |
|
"loss_xval": 0.051025390625, |
|
"num_input_tokens_seen": 73035716, |
|
"step": 423 |
|
}, |
|
{ |
|
"epoch": 1.1041666666666667, |
|
"grad_norm": 3.8912805314115473, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0536, |
|
"num_input_tokens_seen": 73208532, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.1041666666666667, |
|
"loss": 0.04248078912496567, |
|
"loss_ce": 4.60965748061426e-05, |
|
"loss_iou": 0.61328125, |
|
"loss_num": 0.00848388671875, |
|
"loss_xval": 0.04248046875, |
|
"num_input_tokens_seen": 73208532, |
|
"step": 424 |
|
}, |
|
{ |
|
"epoch": 1.1067708333333333, |
|
"grad_norm": 14.632789864172295, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0658, |
|
"num_input_tokens_seen": 73381164, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.1067708333333333, |
|
"loss": 0.052568383514881134, |
|
"loss_ce": 0.000154447479872033, |
|
"loss_iou": 0.47265625, |
|
"loss_num": 0.010498046875, |
|
"loss_xval": 0.052490234375, |
|
"num_input_tokens_seen": 73381164, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 1.109375, |
|
"grad_norm": 4.3783199311299486, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0861, |
|
"num_input_tokens_seen": 73553404, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.109375, |
|
"loss": 0.125158429145813, |
|
"loss_ce": 6.687753193546087e-05, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.0250244140625, |
|
"loss_xval": 0.125, |
|
"num_input_tokens_seen": 73553404, |
|
"step": 426 |
|
}, |
|
{ |
|
"epoch": 1.1119791666666667, |
|
"grad_norm": 3.5716619036124766, |
|
"learning_rate": 5e-06, |
|
"loss": 0.075, |
|
"num_input_tokens_seen": 73726020, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.1119791666666667, |
|
"loss": 0.14429143071174622, |
|
"loss_ce": 3.4840733860619366e-05, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.0289306640625, |
|
"loss_xval": 0.14453125, |
|
"num_input_tokens_seen": 73726020, |
|
"step": 427 |
|
}, |
|
{ |
|
"epoch": 1.1145833333333333, |
|
"grad_norm": 4.334696158970524, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0636, |
|
"num_input_tokens_seen": 73898692, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.1145833333333333, |
|
"loss": 0.08402653783559799, |
|
"loss_ce": 4.216242450638674e-05, |
|
"loss_iou": 0.5546875, |
|
"loss_num": 0.016845703125, |
|
"loss_xval": 0.083984375, |
|
"num_input_tokens_seen": 73898692, |
|
"step": 428 |
|
}, |
|
{ |
|
"epoch": 1.1171875, |
|
"grad_norm": 5.554218703009278, |
|
"learning_rate": 5e-06, |
|
"loss": 0.059, |
|
"num_input_tokens_seen": 74071620, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.1171875, |
|
"loss": 0.0587516650557518, |
|
"loss_ce": 3.584453952498734e-05, |
|
"loss_iou": 0.49609375, |
|
"loss_num": 0.01171875, |
|
"loss_xval": 0.05859375, |
|
"num_input_tokens_seen": 74071620, |
|
"step": 429 |
|
}, |
|
{ |
|
"epoch": 1.1197916666666667, |
|
"grad_norm": 4.43058457913269, |
|
"learning_rate": 5e-06, |
|
"loss": 0.111, |
|
"num_input_tokens_seen": 74244416, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.1197916666666667, |
|
"loss": 0.1568872630596161, |
|
"loss_ce": 2.6918030926026404e-05, |
|
"loss_iou": 0.71875, |
|
"loss_num": 0.031494140625, |
|
"loss_xval": 0.1572265625, |
|
"num_input_tokens_seen": 74244416, |
|
"step": 430 |
|
}, |
|
{ |
|
"epoch": 1.1223958333333333, |
|
"grad_norm": 5.12234513289191, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0652, |
|
"num_input_tokens_seen": 74416812, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.1223958333333333, |
|
"loss": 0.04296587407588959, |
|
"loss_ce": 7.341805758187547e-05, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.00860595703125, |
|
"loss_xval": 0.04296875, |
|
"num_input_tokens_seen": 74416812, |
|
"step": 431 |
|
}, |
|
{ |
|
"epoch": 1.125, |
|
"grad_norm": 12.372100052601173, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0794, |
|
"num_input_tokens_seen": 74589952, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.125, |
|
"loss": 0.057425886392593384, |
|
"loss_ce": 3.757977538043633e-05, |
|
"loss_iou": 0.4609375, |
|
"loss_num": 0.011474609375, |
|
"loss_xval": 0.057373046875, |
|
"num_input_tokens_seen": 74589952, |
|
"step": 432 |
|
}, |
|
{ |
|
"epoch": 1.1276041666666667, |
|
"grad_norm": 5.254766938250951, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0813, |
|
"num_input_tokens_seen": 74762884, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.1276041666666667, |
|
"loss": 0.07790642231702805, |
|
"loss_ce": 0.00014763849321752787, |
|
"loss_iou": 0.43359375, |
|
"loss_num": 0.01556396484375, |
|
"loss_xval": 0.07763671875, |
|
"num_input_tokens_seen": 74762884, |
|
"step": 433 |
|
}, |
|
{ |
|
"epoch": 1.1302083333333333, |
|
"grad_norm": 4.363985148609402, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0678, |
|
"num_input_tokens_seen": 74935932, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.1302083333333333, |
|
"loss": 0.09511469304561615, |
|
"loss_ce": 2.1914216631557792e-05, |
|
"loss_iou": 0.671875, |
|
"loss_num": 0.01904296875, |
|
"loss_xval": 0.09521484375, |
|
"num_input_tokens_seen": 74935932, |
|
"step": 434 |
|
}, |
|
{ |
|
"epoch": 1.1328125, |
|
"grad_norm": 36.76822239657336, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0789, |
|
"num_input_tokens_seen": 75109188, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.1328125, |
|
"loss": 0.05521143600344658, |
|
"loss_ce": 3.5653371014632285e-05, |
|
"loss_iou": 0.5234375, |
|
"loss_num": 0.01104736328125, |
|
"loss_xval": 0.05517578125, |
|
"num_input_tokens_seen": 75109188, |
|
"step": 435 |
|
}, |
|
{ |
|
"epoch": 1.1354166666666667, |
|
"grad_norm": 5.8422904737549635, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0833, |
|
"num_input_tokens_seen": 75282080, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.1354166666666667, |
|
"loss": 0.1195131167769432, |
|
"loss_ce": 3.679828660096973e-05, |
|
"loss_iou": 0.5078125, |
|
"loss_num": 0.02392578125, |
|
"loss_xval": 0.11962890625, |
|
"num_input_tokens_seen": 75282080, |
|
"step": 436 |
|
}, |
|
{ |
|
"epoch": 1.1380208333333333, |
|
"grad_norm": 5.633890734428066, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0714, |
|
"num_input_tokens_seen": 75454600, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.1380208333333333, |
|
"loss": 0.04856128245592117, |
|
"loss_ce": 8.410715963691473e-05, |
|
"loss_iou": 0.482421875, |
|
"loss_num": 0.00970458984375, |
|
"loss_xval": 0.048583984375, |
|
"num_input_tokens_seen": 75454600, |
|
"step": 437 |
|
}, |
|
{ |
|
"epoch": 1.140625, |
|
"grad_norm": 4.6822951947306946, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0673, |
|
"num_input_tokens_seen": 75627104, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.140625, |
|
"loss": 0.05475003272294998, |
|
"loss_ce": 4.72724532301072e-05, |
|
"loss_iou": 0.51953125, |
|
"loss_num": 0.01092529296875, |
|
"loss_xval": 0.0546875, |
|
"num_input_tokens_seen": 75627104, |
|
"step": 438 |
|
}, |
|
{ |
|
"epoch": 1.1432291666666667, |
|
"grad_norm": 6.006286624841916, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0639, |
|
"num_input_tokens_seen": 75799808, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.1432291666666667, |
|
"loss": 0.0665750578045845, |
|
"loss_ce": 1.621775300009176e-05, |
|
"loss_iou": 0.62890625, |
|
"loss_num": 0.0133056640625, |
|
"loss_xval": 0.06640625, |
|
"num_input_tokens_seen": 75799808, |
|
"step": 439 |
|
}, |
|
{ |
|
"epoch": 1.1458333333333333, |
|
"grad_norm": 28.980298300208794, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0762, |
|
"num_input_tokens_seen": 75972072, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.1458333333333333, |
|
"loss": 0.09322504699230194, |
|
"loss_ce": 2.435836722725071e-05, |
|
"loss_iou": 0.71875, |
|
"loss_num": 0.0186767578125, |
|
"loss_xval": 0.09326171875, |
|
"num_input_tokens_seen": 75972072, |
|
"step": 440 |
|
}, |
|
{ |
|
"epoch": 1.1484375, |
|
"grad_norm": 13.46707783271563, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1018, |
|
"num_input_tokens_seen": 76144720, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.1484375, |
|
"loss": 0.08068449795246124, |
|
"loss_ce": 7.231286144815385e-05, |
|
"loss_iou": 0.3515625, |
|
"loss_num": 0.01611328125, |
|
"loss_xval": 0.08056640625, |
|
"num_input_tokens_seen": 76144720, |
|
"step": 441 |
|
}, |
|
{ |
|
"epoch": 1.1510416666666667, |
|
"grad_norm": 2.9432117535086357, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0636, |
|
"num_input_tokens_seen": 76316644, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.1510416666666667, |
|
"loss": 0.056132424622774124, |
|
"loss_ce": 0.00016318520647473633, |
|
"loss_iou": 0.66796875, |
|
"loss_num": 0.01116943359375, |
|
"loss_xval": 0.055908203125, |
|
"num_input_tokens_seen": 76316644, |
|
"step": 442 |
|
}, |
|
{ |
|
"epoch": 1.1536458333333333, |
|
"grad_norm": 2.439393218897116, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0374, |
|
"num_input_tokens_seen": 76489288, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.1536458333333333, |
|
"loss": 0.03558982163667679, |
|
"loss_ce": 2.1583975467365235e-05, |
|
"loss_iou": 0.451171875, |
|
"loss_num": 0.007110595703125, |
|
"loss_xval": 0.03564453125, |
|
"num_input_tokens_seen": 76489288, |
|
"step": 443 |
|
}, |
|
{ |
|
"epoch": 1.15625, |
|
"grad_norm": 14.267748702685116, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0665, |
|
"num_input_tokens_seen": 76661744, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.15625, |
|
"loss": 0.11009622365236282, |
|
"loss_ce": 2.694531031011138e-05, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.02197265625, |
|
"loss_xval": 0.10986328125, |
|
"num_input_tokens_seen": 76661744, |
|
"step": 444 |
|
}, |
|
{ |
|
"epoch": 1.1588541666666667, |
|
"grad_norm": 20.903388479491294, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0489, |
|
"num_input_tokens_seen": 76833952, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.1588541666666667, |
|
"loss": 0.03838071599602699, |
|
"loss_ce": 3.5379373002797365e-05, |
|
"loss_iou": 0.65234375, |
|
"loss_num": 0.007659912109375, |
|
"loss_xval": 0.038330078125, |
|
"num_input_tokens_seen": 76833952, |
|
"step": 445 |
|
}, |
|
{ |
|
"epoch": 1.1614583333333333, |
|
"grad_norm": 8.469986250177818, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0857, |
|
"num_input_tokens_seen": 77007080, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.1614583333333333, |
|
"loss": 0.051541343331336975, |
|
"loss_ce": 2.767006662907079e-05, |
|
"loss_iou": 0.59375, |
|
"loss_num": 0.01031494140625, |
|
"loss_xval": 0.051513671875, |
|
"num_input_tokens_seen": 77007080, |
|
"step": 446 |
|
}, |
|
{ |
|
"epoch": 1.1640625, |
|
"grad_norm": 5.002762010889236, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1031, |
|
"num_input_tokens_seen": 77180040, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.1640625, |
|
"loss": 0.20415398478507996, |
|
"loss_ce": 5.2410614443942904e-05, |
|
"loss_iou": 0.45703125, |
|
"loss_num": 0.040771484375, |
|
"loss_xval": 0.2041015625, |
|
"num_input_tokens_seen": 77180040, |
|
"step": 447 |
|
}, |
|
{ |
|
"epoch": 1.1666666666666667, |
|
"grad_norm": 100.8603598057148, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0848, |
|
"num_input_tokens_seen": 77353160, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.1666666666666667, |
|
"loss": 0.07399199903011322, |
|
"loss_ce": 7.841931073926389e-05, |
|
"loss_iou": 0.69921875, |
|
"loss_num": 0.0147705078125, |
|
"loss_xval": 0.07373046875, |
|
"num_input_tokens_seen": 77353160, |
|
"step": 448 |
|
}, |
|
{ |
|
"epoch": 1.1692708333333333, |
|
"grad_norm": 3.351767451423234, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0852, |
|
"num_input_tokens_seen": 77526040, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.1692708333333333, |
|
"loss": 0.06542657315731049, |
|
"loss_ce": 5.792453157482669e-05, |
|
"loss_iou": 0.4609375, |
|
"loss_num": 0.0130615234375, |
|
"loss_xval": 0.0654296875, |
|
"num_input_tokens_seen": 77526040, |
|
"step": 449 |
|
}, |
|
{ |
|
"epoch": 1.171875, |
|
"grad_norm": 4.456484406427641, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0804, |
|
"num_input_tokens_seen": 77698436, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.171875, |
|
"loss": 0.037512898445129395, |
|
"loss_ce": 6.783234130125493e-05, |
|
"loss_iou": 0.57421875, |
|
"loss_num": 0.00750732421875, |
|
"loss_xval": 0.037353515625, |
|
"num_input_tokens_seen": 77698436, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 1.1744791666666667, |
|
"grad_norm": 5.16567471249552, |
|
"learning_rate": 5e-06, |
|
"loss": 0.076, |
|
"num_input_tokens_seen": 77870996, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.1744791666666667, |
|
"loss": 0.1230858787894249, |
|
"loss_ce": 3.9006972656352445e-05, |
|
"loss_iou": 0.52734375, |
|
"loss_num": 0.0245361328125, |
|
"loss_xval": 0.123046875, |
|
"num_input_tokens_seen": 77870996, |
|
"step": 451 |
|
}, |
|
{ |
|
"epoch": 1.1770833333333333, |
|
"grad_norm": 4.701516259626003, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0584, |
|
"num_input_tokens_seen": 78043824, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.1770833333333333, |
|
"loss": 0.04219118878245354, |
|
"loss_ce": 6.16741890553385e-05, |
|
"loss_iou": 0.58984375, |
|
"loss_num": 0.0084228515625, |
|
"loss_xval": 0.042236328125, |
|
"num_input_tokens_seen": 78043824, |
|
"step": 452 |
|
}, |
|
{ |
|
"epoch": 1.1796875, |
|
"grad_norm": 5.151886406386116, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0829, |
|
"num_input_tokens_seen": 78216596, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.1796875, |
|
"loss": 0.11115504801273346, |
|
"loss_ce": 0.00010158185614272952, |
|
"loss_iou": 0.58984375, |
|
"loss_num": 0.022216796875, |
|
"loss_xval": 0.11083984375, |
|
"num_input_tokens_seen": 78216596, |
|
"step": 453 |
|
}, |
|
{ |
|
"epoch": 1.1822916666666667, |
|
"grad_norm": 5.099784873283209, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0496, |
|
"num_input_tokens_seen": 78389868, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.1822916666666667, |
|
"loss": 0.05530470609664917, |
|
"loss_ce": 6.788992322981358e-05, |
|
"loss_iou": 0.57421875, |
|
"loss_num": 0.01104736328125, |
|
"loss_xval": 0.05517578125, |
|
"num_input_tokens_seen": 78389868, |
|
"step": 454 |
|
}, |
|
{ |
|
"epoch": 1.1848958333333333, |
|
"grad_norm": 22.327108090070816, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0734, |
|
"num_input_tokens_seen": 78562732, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.1848958333333333, |
|
"loss": 0.1318536400794983, |
|
"loss_ce": 7.873401773395017e-05, |
|
"loss_iou": 0.453125, |
|
"loss_num": 0.0263671875, |
|
"loss_xval": 0.1318359375, |
|
"num_input_tokens_seen": 78562732, |
|
"step": 455 |
|
}, |
|
{ |
|
"epoch": 1.1875, |
|
"grad_norm": 6.402774286125369, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0785, |
|
"num_input_tokens_seen": 78735540, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.1875, |
|
"loss": 0.08449774980545044, |
|
"loss_ce": 4.035345773445442e-05, |
|
"loss_iou": 0.359375, |
|
"loss_num": 0.016845703125, |
|
"loss_xval": 0.08447265625, |
|
"num_input_tokens_seen": 78735540, |
|
"step": 456 |
|
}, |
|
{ |
|
"epoch": 1.1901041666666667, |
|
"grad_norm": 3.7359791226023495, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0667, |
|
"num_input_tokens_seen": 78907916, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.1901041666666667, |
|
"loss": 0.04169946163892746, |
|
"loss_ce": 4.2965810280293226e-05, |
|
"loss_iou": 0.474609375, |
|
"loss_num": 0.00830078125, |
|
"loss_xval": 0.041748046875, |
|
"num_input_tokens_seen": 78907916, |
|
"step": 457 |
|
}, |
|
{ |
|
"epoch": 1.1927083333333333, |
|
"grad_norm": 7.721272031766003, |
|
"learning_rate": 5e-06, |
|
"loss": 0.1165, |
|
"num_input_tokens_seen": 79080080, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.1927083333333333, |
|
"loss": 0.09451837837696075, |
|
"loss_ce": 0.00012751182657666504, |
|
"loss_iou": 0.421875, |
|
"loss_num": 0.0189208984375, |
|
"loss_xval": 0.09423828125, |
|
"num_input_tokens_seen": 79080080, |
|
"step": 458 |
|
}, |
|
{ |
|
"epoch": 1.1953125, |
|
"grad_norm": 3.5323313091644755, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0699, |
|
"num_input_tokens_seen": 79252520, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.1953125, |
|
"loss": 0.09058161079883575, |
|
"loss_ce": 2.0699575543403625e-05, |
|
"loss_iou": 0.51953125, |
|
"loss_num": 0.0181884765625, |
|
"loss_xval": 0.09033203125, |
|
"num_input_tokens_seen": 79252520, |
|
"step": 459 |
|
}, |
|
{ |
|
"epoch": 1.1979166666666667, |
|
"grad_norm": 6.54371810379822, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0901, |
|
"num_input_tokens_seen": 79425288, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.1979166666666667, |
|
"loss": 0.06754864007234573, |
|
"loss_ce": 4.375486241769977e-05, |
|
"loss_iou": 0.5703125, |
|
"loss_num": 0.01348876953125, |
|
"loss_xval": 0.0673828125, |
|
"num_input_tokens_seen": 79425288, |
|
"step": 460 |
|
}, |
|
{ |
|
"epoch": 1.2005208333333333, |
|
"grad_norm": 4.16776605785161, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0483, |
|
"num_input_tokens_seen": 79597852, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.2005208333333333, |
|
"loss": 0.040252070873975754, |
|
"loss_ce": 2.990448228956666e-05, |
|
"loss_iou": 0.458984375, |
|
"loss_num": 0.008056640625, |
|
"loss_xval": 0.040283203125, |
|
"num_input_tokens_seen": 79597852, |
|
"step": 461 |
|
}, |
|
{ |
|
"epoch": 1.203125, |
|
"grad_norm": 10.246051862590502, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0741, |
|
"num_input_tokens_seen": 79770236, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.203125, |
|
"loss": 0.04001723229885101, |
|
"loss_ce": 5.4462791013065726e-05, |
|
"loss_iou": 0.5625, |
|
"loss_num": 0.00799560546875, |
|
"loss_xval": 0.0400390625, |
|
"num_input_tokens_seen": 79770236, |
|
"step": 462 |
|
}, |
|
{ |
|
"epoch": 1.2057291666666667, |
|
"grad_norm": 6.092643709859456, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0521, |
|
"num_input_tokens_seen": 79943152, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.2057291666666667, |
|
"loss": 0.03291913866996765, |
|
"loss_ce": 2.1190953702898696e-05, |
|
"loss_iou": 0.5390625, |
|
"loss_num": 0.006591796875, |
|
"loss_xval": 0.032958984375, |
|
"num_input_tokens_seen": 79943152, |
|
"step": 463 |
|
}, |
|
{ |
|
"epoch": 1.2083333333333333, |
|
"grad_norm": 4.950298356741838, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0743, |
|
"num_input_tokens_seen": 80115996, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.2083333333333333, |
|
"loss": 0.06474019587039948, |
|
"loss_ce": 4.293021993362345e-05, |
|
"loss_iou": 0.50390625, |
|
"loss_num": 0.012939453125, |
|
"loss_xval": 0.064453125, |
|
"num_input_tokens_seen": 80115996, |
|
"step": 464 |
|
}, |
|
{ |
|
"epoch": 1.2109375, |
|
"grad_norm": 6.018163435684629, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0688, |
|
"num_input_tokens_seen": 80288564, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.2109375, |
|
"loss": 0.06020050495862961, |
|
"loss_ce": 1.984027767321095e-05, |
|
"loss_iou": 0.59765625, |
|
"loss_num": 0.01202392578125, |
|
"loss_xval": 0.06005859375, |
|
"num_input_tokens_seen": 80288564, |
|
"step": 465 |
|
}, |
|
{ |
|
"epoch": 1.2135416666666667, |
|
"grad_norm": 4.280669670593218, |
|
"learning_rate": 5e-06, |
|
"loss": 0.074, |
|
"num_input_tokens_seen": 80461048, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.2135416666666667, |
|
"loss": 0.05621056258678436, |
|
"loss_ce": 2.77029030257836e-05, |
|
"loss_iou": 0.57421875, |
|
"loss_num": 0.01123046875, |
|
"loss_xval": 0.05615234375, |
|
"num_input_tokens_seen": 80461048, |
|
"step": 466 |
|
}, |
|
{ |
|
"epoch": 1.2161458333333333, |
|
"grad_norm": 29.24577961253563, |
|
"learning_rate": 5e-06, |
|
"loss": 0.072, |
|
"num_input_tokens_seen": 80633544, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.2161458333333333, |
|
"loss": 0.036091044545173645, |
|
"loss_ce": 2.689398024813272e-05, |
|
"loss_iou": 0.50390625, |
|
"loss_num": 0.007232666015625, |
|
"loss_xval": 0.0361328125, |
|
"num_input_tokens_seen": 80633544, |
|
"step": 467 |
|
}, |
|
{ |
|
"epoch": 1.21875, |
|
"grad_norm": 3.5977371268772007, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0541, |
|
"num_input_tokens_seen": 80805856, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.21875, |
|
"loss": 0.05104288086295128, |
|
"loss_ce": 3.274788468843326e-05, |
|
"loss_iou": 0.53515625, |
|
"loss_num": 0.01019287109375, |
|
"loss_xval": 0.051025390625, |
|
"num_input_tokens_seen": 80805856, |
|
"step": 468 |
|
}, |
|
{ |
|
"epoch": 1.2213541666666667, |
|
"grad_norm": 8.075177097555214, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0605, |
|
"num_input_tokens_seen": 80978184, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.2213541666666667, |
|
"loss": 0.04464123770594597, |
|
"loss_ce": 2.4538327124901116e-05, |
|
"loss_iou": 0.61328125, |
|
"loss_num": 0.0089111328125, |
|
"loss_xval": 0.044677734375, |
|
"num_input_tokens_seen": 80978184, |
|
"step": 469 |
|
}, |
|
{ |
|
"epoch": 1.2239583333333333, |
|
"grad_norm": 6.996084067501281, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0577, |
|
"num_input_tokens_seen": 81150552, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.2239583333333333, |
|
"loss": 0.04917052388191223, |
|
"loss_ce": 5.248059460427612e-05, |
|
"loss_iou": 0.59765625, |
|
"loss_num": 0.00982666015625, |
|
"loss_xval": 0.049072265625, |
|
"num_input_tokens_seen": 81150552, |
|
"step": 470 |
|
}, |
|
{ |
|
"epoch": 1.2265625, |
|
"grad_norm": 9.557588670219046, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0598, |
|
"num_input_tokens_seen": 81323276, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.2265625, |
|
"loss": 0.08455046266317368, |
|
"loss_ce": 3.203285814379342e-05, |
|
"loss_iou": 0.404296875, |
|
"loss_num": 0.016845703125, |
|
"loss_xval": 0.08447265625, |
|
"num_input_tokens_seen": 81323276, |
|
"step": 471 |
|
}, |
|
{ |
|
"epoch": 1.2291666666666667, |
|
"grad_norm": 4.882489644855878, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0455, |
|
"num_input_tokens_seen": 81496112, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.2291666666666667, |
|
"loss": 0.032756030559539795, |
|
"loss_ce": 0.0001327365607721731, |
|
"loss_iou": 0.53125, |
|
"loss_num": 0.00653076171875, |
|
"loss_xval": 0.03271484375, |
|
"num_input_tokens_seen": 81496112, |
|
"step": 472 |
|
}, |
|
{ |
|
"epoch": 1.2317708333333333, |
|
"grad_norm": 10.129497104665319, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0865, |
|
"num_input_tokens_seen": 81668408, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.2317708333333333, |
|
"loss": 0.053121719509363174, |
|
"loss_ce": 2.1133846530574374e-05, |
|
"loss_iou": 0.5703125, |
|
"loss_num": 0.0106201171875, |
|
"loss_xval": 0.05322265625, |
|
"num_input_tokens_seen": 81668408, |
|
"step": 473 |
|
}, |
|
{ |
|
"epoch": 1.234375, |
|
"grad_norm": 34.29728711508608, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0801, |
|
"num_input_tokens_seen": 81841444, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.234375, |
|
"loss": 0.1159551739692688, |
|
"loss_ce": 1.8899745555245318e-05, |
|
"loss_iou": 0.46484375, |
|
"loss_num": 0.023193359375, |
|
"loss_xval": 0.11572265625, |
|
"num_input_tokens_seen": 81841444, |
|
"step": 474 |
|
}, |
|
{ |
|
"epoch": 1.2369791666666667, |
|
"grad_norm": 5.248583896165671, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0835, |
|
"num_input_tokens_seen": 82014424, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.2369791666666667, |
|
"loss": 0.046804144978523254, |
|
"loss_ce": 2.0697760191978887e-05, |
|
"loss_iou": 0.65234375, |
|
"loss_num": 0.00933837890625, |
|
"loss_xval": 0.046875, |
|
"num_input_tokens_seen": 82014424, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 1.2395833333333333, |
|
"grad_norm": 4.890262555680429, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0638, |
|
"num_input_tokens_seen": 82187060, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.2395833333333333, |
|
"loss": 0.05300772190093994, |
|
"loss_ce": 2.9203043595771305e-05, |
|
"loss_iou": 0.54296875, |
|
"loss_num": 0.0106201171875, |
|
"loss_xval": 0.052978515625, |
|
"num_input_tokens_seen": 82187060, |
|
"step": 476 |
|
}, |
|
{ |
|
"epoch": 1.2421875, |
|
"grad_norm": 6.375507009761332, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0746, |
|
"num_input_tokens_seen": 82359884, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.2421875, |
|
"loss": 0.08658900111913681, |
|
"loss_ce": 0.00011744195217033848, |
|
"loss_iou": 0.515625, |
|
"loss_num": 0.017333984375, |
|
"loss_xval": 0.08642578125, |
|
"num_input_tokens_seen": 82359884, |
|
"step": 477 |
|
}, |
|
{ |
|
"epoch": 1.2447916666666667, |
|
"grad_norm": 6.190781448434917, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0682, |
|
"num_input_tokens_seen": 82532312, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.2447916666666667, |
|
"loss": 0.06419570744037628, |
|
"loss_ce": 1.724117828416638e-05, |
|
"loss_iou": 0.5390625, |
|
"loss_num": 0.0128173828125, |
|
"loss_xval": 0.06396484375, |
|
"num_input_tokens_seen": 82532312, |
|
"step": 478 |
|
}, |
|
{ |
|
"epoch": 1.2473958333333333, |
|
"grad_norm": 4.492503172851453, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0545, |
|
"num_input_tokens_seen": 82705224, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.2473958333333333, |
|
"loss": 0.043702684342861176, |
|
"loss_ce": 4.728833300760016e-05, |
|
"loss_iou": 0.4453125, |
|
"loss_num": 0.00872802734375, |
|
"loss_xval": 0.043701171875, |
|
"num_input_tokens_seen": 82705224, |
|
"step": 479 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"grad_norm": 4.614176563274451, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0825, |
|
"num_input_tokens_seen": 82877740, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.25, |
|
"loss": 0.0817180722951889, |
|
"loss_ce": 8.355021418537945e-05, |
|
"loss_iou": 0.703125, |
|
"loss_num": 0.016357421875, |
|
"loss_xval": 0.08154296875, |
|
"num_input_tokens_seen": 82877740, |
|
"step": 480 |
|
}, |
|
{ |
|
"epoch": 1.2526041666666667, |
|
"grad_norm": 4.135440424213399, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0639, |
|
"num_input_tokens_seen": 83050904, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.2526041666666667, |
|
"loss": 0.045239534229040146, |
|
"loss_ce": 2.774174208752811e-05, |
|
"loss_iou": 0.470703125, |
|
"loss_num": 0.009033203125, |
|
"loss_xval": 0.045166015625, |
|
"num_input_tokens_seen": 83050904, |
|
"step": 481 |
|
}, |
|
{ |
|
"epoch": 1.2552083333333333, |
|
"grad_norm": 5.953250787402434, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0758, |
|
"num_input_tokens_seen": 83223916, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.2552083333333333, |
|
"loss": 0.039626024663448334, |
|
"loss_ce": 2.946431777672842e-05, |
|
"loss_iou": 0.60546875, |
|
"loss_num": 0.0079345703125, |
|
"loss_xval": 0.03955078125, |
|
"num_input_tokens_seen": 83223916, |
|
"step": 482 |
|
}, |
|
{ |
|
"epoch": 1.2578125, |
|
"grad_norm": 5.778983695199196, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0476, |
|
"num_input_tokens_seen": 83397368, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.2578125, |
|
"loss": 0.041962604969739914, |
|
"loss_ce": 4.6711622417205945e-05, |
|
"loss_iou": 0.5703125, |
|
"loss_num": 0.00836181640625, |
|
"loss_xval": 0.0419921875, |
|
"num_input_tokens_seen": 83397368, |
|
"step": 483 |
|
}, |
|
{ |
|
"epoch": 1.2604166666666667, |
|
"grad_norm": 5.433318803087276, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0785, |
|
"num_input_tokens_seen": 83569504, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.2604166666666667, |
|
"loss": 0.05590134114027023, |
|
"loss_ce": 2.3658354621147737e-05, |
|
"loss_iou": 0.58984375, |
|
"loss_num": 0.01116943359375, |
|
"loss_xval": 0.055908203125, |
|
"num_input_tokens_seen": 83569504, |
|
"step": 484 |
|
}, |
|
{ |
|
"epoch": 1.2630208333333333, |
|
"grad_norm": 4.6826104330453955, |
|
"learning_rate": 5e-06, |
|
"loss": 0.054, |
|
"num_input_tokens_seen": 83742676, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.2630208333333333, |
|
"loss": 0.03859255462884903, |
|
"loss_ce": 6.411132198991254e-05, |
|
"loss_iou": 0.50390625, |
|
"loss_num": 0.0076904296875, |
|
"loss_xval": 0.03857421875, |
|
"num_input_tokens_seen": 83742676, |
|
"step": 485 |
|
}, |
|
{ |
|
"epoch": 1.265625, |
|
"grad_norm": 4.369179337344076, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0529, |
|
"num_input_tokens_seen": 83915776, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.265625, |
|
"loss": 0.04284074157476425, |
|
"loss_ce": 7.035740418359637e-05, |
|
"loss_iou": 0.5234375, |
|
"loss_num": 0.008544921875, |
|
"loss_xval": 0.042724609375, |
|
"num_input_tokens_seen": 83915776, |
|
"step": 486 |
|
}, |
|
{ |
|
"epoch": 1.2682291666666667, |
|
"grad_norm": 4.855681248964782, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0893, |
|
"num_input_tokens_seen": 84088164, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.2682291666666667, |
|
"loss": 0.0853077843785286, |
|
"loss_ce": 4.1672632505651563e-05, |
|
"loss_iou": 0.59375, |
|
"loss_num": 0.01708984375, |
|
"loss_xval": 0.08544921875, |
|
"num_input_tokens_seen": 84088164, |
|
"step": 487 |
|
}, |
|
{ |
|
"epoch": 1.2708333333333333, |
|
"grad_norm": 4.574747694340549, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0667, |
|
"num_input_tokens_seen": 84261012, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.2708333333333333, |
|
"loss": 0.1186133474111557, |
|
"loss_ce": 3.7304311263142154e-05, |
|
"loss_iou": 0.455078125, |
|
"loss_num": 0.0238037109375, |
|
"loss_xval": 0.11865234375, |
|
"num_input_tokens_seen": 84261012, |
|
"step": 488 |
|
}, |
|
{ |
|
"epoch": 1.2734375, |
|
"grad_norm": 6.201362257140882, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0656, |
|
"num_input_tokens_seen": 84433984, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.2734375, |
|
"loss": 0.055373311042785645, |
|
"loss_ce": 4.4942811655346304e-05, |
|
"loss_iou": 0.61328125, |
|
"loss_num": 0.01104736328125, |
|
"loss_xval": 0.055419921875, |
|
"num_input_tokens_seen": 84433984, |
|
"step": 489 |
|
}, |
|
{ |
|
"epoch": 1.2760416666666667, |
|
"grad_norm": 4.576166685047339, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0666, |
|
"num_input_tokens_seen": 84606516, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.2760416666666667, |
|
"loss": 0.046068161725997925, |
|
"loss_ce": 4.765454650623724e-05, |
|
"loss_iou": 0.0, |
|
"loss_num": 0.00921630859375, |
|
"loss_xval": 0.0458984375, |
|
"num_input_tokens_seen": 84606516, |
|
"step": 490 |
|
}, |
|
{ |
|
"epoch": 1.2786458333333333, |
|
"grad_norm": 4.832131210851992, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0737, |
|
"num_input_tokens_seen": 84779356, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.2786458333333333, |
|
"loss": 0.11677989363670349, |
|
"loss_ce": 1.964074544957839e-05, |
|
"loss_iou": 0.484375, |
|
"loss_num": 0.0233154296875, |
|
"loss_xval": 0.11669921875, |
|
"num_input_tokens_seen": 84779356, |
|
"step": 491 |
|
}, |
|
{ |
|
"epoch": 1.28125, |
|
"grad_norm": 5.262752038477657, |
|
"learning_rate": 5e-06, |
|
"loss": 0.071, |
|
"num_input_tokens_seen": 84952020, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.28125, |
|
"loss": 0.0537094846367836, |
|
"loss_ce": 2.9066111892461777e-05, |
|
"loss_iou": 0.58203125, |
|
"loss_num": 0.0107421875, |
|
"loss_xval": 0.0537109375, |
|
"num_input_tokens_seen": 84952020, |
|
"step": 492 |
|
}, |
|
{ |
|
"epoch": 1.2838541666666667, |
|
"grad_norm": 2.9344225414677836, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0495, |
|
"num_input_tokens_seen": 85124876, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.2838541666666667, |
|
"loss": 0.03429765999317169, |
|
"loss_ce": 0.00013323240273166448, |
|
"loss_iou": 0.49609375, |
|
"loss_num": 0.0068359375, |
|
"loss_xval": 0.0341796875, |
|
"num_input_tokens_seen": 85124876, |
|
"step": 493 |
|
}, |
|
{ |
|
"epoch": 1.2864583333333333, |
|
"grad_norm": 12.093642895702288, |
|
"learning_rate": 5e-06, |
|
"loss": 0.083, |
|
"num_input_tokens_seen": 85297824, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.2864583333333333, |
|
"loss": 0.13448233902454376, |
|
"loss_ce": 3.7149860872887075e-05, |
|
"loss_iou": 0.498046875, |
|
"loss_num": 0.02685546875, |
|
"loss_xval": 0.134765625, |
|
"num_input_tokens_seen": 85297824, |
|
"step": 494 |
|
}, |
|
{ |
|
"epoch": 1.2890625, |
|
"grad_norm": 5.311410396179597, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0622, |
|
"num_input_tokens_seen": 85469896, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.2890625, |
|
"loss": 0.10600131750106812, |
|
"loss_ce": 4.428675310919061e-05, |
|
"loss_iou": 0.466796875, |
|
"loss_num": 0.0211181640625, |
|
"loss_xval": 0.10595703125, |
|
"num_input_tokens_seen": 85469896, |
|
"step": 495 |
|
}, |
|
{ |
|
"epoch": 1.2916666666666667, |
|
"grad_norm": 13.126940553733593, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0509, |
|
"num_input_tokens_seen": 85642324, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.2916666666666667, |
|
"loss": 0.04261418431997299, |
|
"loss_ce": 0.00010319902503397316, |
|
"loss_iou": 0.53515625, |
|
"loss_num": 0.00848388671875, |
|
"loss_xval": 0.04248046875, |
|
"num_input_tokens_seen": 85642324, |
|
"step": 496 |
|
}, |
|
{ |
|
"epoch": 1.2942708333333333, |
|
"grad_norm": 9.141153643623982, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0514, |
|
"num_input_tokens_seen": 85815252, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.2942708333333333, |
|
"loss": 0.04319656640291214, |
|
"loss_ce": 4.471266584005207e-05, |
|
"loss_iou": 0.46875, |
|
"loss_num": 0.0086669921875, |
|
"loss_xval": 0.043212890625, |
|
"num_input_tokens_seen": 85815252, |
|
"step": 497 |
|
}, |
|
{ |
|
"epoch": 1.296875, |
|
"grad_norm": 5.456561002723919, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0744, |
|
"num_input_tokens_seen": 85988240, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.296875, |
|
"loss": 0.0488949790596962, |
|
"loss_ce": 3.6337674828246236e-05, |
|
"loss_iou": 0.5859375, |
|
"loss_num": 0.009765625, |
|
"loss_xval": 0.048828125, |
|
"num_input_tokens_seen": 85988240, |
|
"step": 498 |
|
}, |
|
{ |
|
"epoch": 1.2994791666666667, |
|
"grad_norm": 4.197467000151624, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0618, |
|
"num_input_tokens_seen": 86160724, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.2994791666666667, |
|
"loss": 0.04255600646138191, |
|
"loss_ce": 2.9761704354314134e-05, |
|
"loss_iou": 0.443359375, |
|
"loss_num": 0.00848388671875, |
|
"loss_xval": 0.04248046875, |
|
"num_input_tokens_seen": 86160724, |
|
"step": 499 |
|
}, |
|
{ |
|
"epoch": 1.3020833333333333, |
|
"grad_norm": 16.018482571236348, |
|
"learning_rate": 5e-06, |
|
"loss": 0.0857, |
|
"num_input_tokens_seen": 86333348, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3020833333333333, |
|
"eval_seeclick_CIoU": 0.49157558381557465, |
|
"eval_seeclick_GIoU": 0.4883834272623062, |
|
"eval_seeclick_IoU": 0.5341370701789856, |
|
"eval_seeclick_MAE_all": 0.07028103247284889, |
|
"eval_seeclick_MAE_h": 0.05726983770728111, |
|
"eval_seeclick_MAE_w": 0.08522269874811172, |
|
"eval_seeclick_MAE_x": 0.08005227893590927, |
|
"eval_seeclick_MAE_y": 0.058579325675964355, |
|
"eval_seeclick_NUM_probability": 0.9999949038028717, |
|
"eval_seeclick_inside_bbox": 0.8764204680919647, |
|
"eval_seeclick_loss": 0.9519317150115967, |
|
"eval_seeclick_loss_ce": 0.6910622417926788, |
|
"eval_seeclick_loss_iou": 0.6273193359375, |
|
"eval_seeclick_loss_num": 0.053680419921875, |
|
"eval_seeclick_loss_xval": 0.26849365234375, |
|
"eval_seeclick_runtime": 71.7405, |
|
"eval_seeclick_samples_per_second": 0.599, |
|
"eval_seeclick_steps_per_second": 0.028, |
|
"num_input_tokens_seen": 86333348, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3020833333333333, |
|
"eval_icons_CIoU": 0.7799727618694305, |
|
"eval_icons_GIoU": 0.7758736610412598, |
|
"eval_icons_IoU": 0.7871803939342499, |
|
"eval_icons_MAE_all": 0.026267122477293015, |
|
"eval_icons_MAE_h": 0.024472126737236977, |
|
"eval_icons_MAE_w": 0.029545767232775688, |
|
"eval_icons_MAE_x": 0.02697262354195118, |
|
"eval_icons_MAE_y": 0.024077963083982468, |
|
"eval_icons_NUM_probability": 0.9999885261058807, |
|
"eval_icons_inside_bbox": 1.0, |
|
"eval_icons_loss": 0.07963114976882935, |
|
"eval_icons_loss_ce": 0.0020425044931471348, |
|
"eval_icons_loss_iou": 0.5069580078125, |
|
"eval_icons_loss_num": 0.014467239379882812, |
|
"eval_icons_loss_xval": 0.07232666015625, |
|
"eval_icons_runtime": 80.3553, |
|
"eval_icons_samples_per_second": 0.622, |
|
"eval_icons_steps_per_second": 0.025, |
|
"num_input_tokens_seen": 86333348, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3020833333333333, |
|
"eval_screenspot_CIoU": 0.3665693998336792, |
|
"eval_screenspot_GIoU": 0.3608221113681793, |
|
"eval_screenspot_IoU": 0.4541289210319519, |
|
"eval_screenspot_MAE_all": 0.13468862076600394, |
|
"eval_screenspot_MAE_h": 0.07963093866904576, |
|
"eval_screenspot_MAE_w": 0.2195572853088379, |
|
"eval_screenspot_MAE_x": 0.16379199425379434, |
|
"eval_screenspot_MAE_y": 0.07577425986528397, |
|
"eval_screenspot_NUM_probability": 0.9999738732973734, |
|
"eval_screenspot_inside_bbox": 0.7116666634877523, |
|
"eval_screenspot_loss": 0.9175184369087219, |
|
"eval_screenspot_loss_ce": 0.42678311467170715, |
|
"eval_screenspot_loss_iou": 0.4466145833333333, |
|
"eval_screenspot_loss_num": 0.09850565592447917, |
|
"eval_screenspot_loss_xval": 0.4925130208333333, |
|
"eval_screenspot_runtime": 149.8949, |
|
"eval_screenspot_samples_per_second": 0.594, |
|
"eval_screenspot_steps_per_second": 0.02, |
|
"num_input_tokens_seen": 86333348, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 1.3020833333333333, |
|
"eval_compot_CIoU": 0.9082967340946198, |
|
"eval_compot_GIoU": 0.9085466265678406, |
|
"eval_compot_IoU": 0.9093170166015625, |
|
"eval_compot_MAE_all": 0.009244627552106977, |
|
"eval_compot_MAE_h": 0.004357840050943196, |
|
"eval_compot_MAE_w": 0.014095565304160118, |
|
"eval_compot_MAE_x": 0.012027833610773087, |
|
"eval_compot_MAE_y": 0.006497269030660391, |
|
"eval_compot_NUM_probability": 0.9999580085277557, |
|
"eval_compot_inside_bbox": 1.0, |
|
"eval_compot_loss": 0.04286140948534012, |
|
"eval_compot_loss_ce": 4.613543933373876e-05, |
|
"eval_compot_loss_iou": 0.507080078125, |
|
"eval_compot_loss_num": 0.009250640869140625, |
|
"eval_compot_loss_xval": 0.0462188720703125, |
|
"eval_compot_runtime": 84.1131, |
|
"eval_compot_samples_per_second": 0.594, |
|
"eval_compot_steps_per_second": 0.024, |
|
"num_input_tokens_seen": 86333348, |
|
"step": 500 |
|
} |
|
], |
|
"logging_steps": 1.0, |
|
"max_steps": 1152, |
|
"num_input_tokens_seen": 86333348, |
|
"num_train_epochs": 3, |
|
"save_steps": 250, |
|
"stateful_callbacks": { |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": false |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 622740728971264.0, |
|
"train_batch_size": 4, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|