Meta-Llama-3-70B-Instruct-2024-05-02-17-33-18-conversation-model
/
checkpoint-1130
/trainer_state.json
{ | |
"best_metric": null, | |
"best_model_checkpoint": null, | |
"epoch": 0.7850629613547546, | |
"eval_steps": 500, | |
"global_step": 1130, | |
"is_hyper_param_search": false, | |
"is_local_process_zero": true, | |
"is_world_process_zero": true, | |
"log_history": [ | |
{ | |
"epoch": 0.0006947459834997829, | |
"grad_norm": 2.5, | |
"learning_rate": 5.780346820809248e-07, | |
"loss": 4.1915, | |
"step": 1 | |
}, | |
{ | |
"epoch": 0.0013894919669995658, | |
"grad_norm": 4.21875, | |
"learning_rate": 1.1560693641618497e-06, | |
"loss": 3.7445, | |
"step": 2 | |
}, | |
{ | |
"epoch": 0.0020842379504993486, | |
"grad_norm": 1.4140625, | |
"learning_rate": 1.7341040462427746e-06, | |
"loss": 3.9999, | |
"step": 3 | |
}, | |
{ | |
"epoch": 0.0027789839339991316, | |
"grad_norm": 1.1484375, | |
"learning_rate": 2.3121387283236993e-06, | |
"loss": 4.0444, | |
"step": 4 | |
}, | |
{ | |
"epoch": 0.0034737299174989146, | |
"grad_norm": 1.3359375, | |
"learning_rate": 2.8901734104046244e-06, | |
"loss": 4.1148, | |
"step": 5 | |
}, | |
{ | |
"epoch": 0.004168475900998697, | |
"grad_norm": 1.7578125, | |
"learning_rate": 3.468208092485549e-06, | |
"loss": 4.5023, | |
"step": 6 | |
}, | |
{ | |
"epoch": 0.004863221884498481, | |
"grad_norm": 6.90625, | |
"learning_rate": 4.046242774566474e-06, | |
"loss": 4.0132, | |
"step": 7 | |
}, | |
{ | |
"epoch": 0.005557967867998263, | |
"grad_norm": 1.3046875, | |
"learning_rate": 4.624277456647399e-06, | |
"loss": 3.7643, | |
"step": 8 | |
}, | |
{ | |
"epoch": 0.006252713851498046, | |
"grad_norm": 1.5234375, | |
"learning_rate": 5.202312138728324e-06, | |
"loss": 3.3901, | |
"step": 9 | |
}, | |
{ | |
"epoch": 0.006947459834997829, | |
"grad_norm": 0.8984375, | |
"learning_rate": 5.780346820809249e-06, | |
"loss": 3.249, | |
"step": 10 | |
}, | |
{ | |
"epoch": 0.007642205818497612, | |
"grad_norm": 2.28125, | |
"learning_rate": 6.358381502890173e-06, | |
"loss": 3.8669, | |
"step": 11 | |
}, | |
{ | |
"epoch": 0.008336951801997394, | |
"grad_norm": 1.40625, | |
"learning_rate": 6.936416184971098e-06, | |
"loss": 3.9794, | |
"step": 12 | |
}, | |
{ | |
"epoch": 0.009031697785497178, | |
"grad_norm": 0.8515625, | |
"learning_rate": 7.514450867052024e-06, | |
"loss": 3.3814, | |
"step": 13 | |
}, | |
{ | |
"epoch": 0.009726443768996961, | |
"grad_norm": 1.1953125, | |
"learning_rate": 8.092485549132949e-06, | |
"loss": 3.7746, | |
"step": 14 | |
}, | |
{ | |
"epoch": 0.010421189752496743, | |
"grad_norm": 1.7265625, | |
"learning_rate": 8.670520231213873e-06, | |
"loss": 3.0555, | |
"step": 15 | |
}, | |
{ | |
"epoch": 0.011115935735996526, | |
"grad_norm": 1.1796875, | |
"learning_rate": 9.248554913294797e-06, | |
"loss": 3.6958, | |
"step": 16 | |
}, | |
{ | |
"epoch": 0.01181068171949631, | |
"grad_norm": 2.25, | |
"learning_rate": 9.826589595375723e-06, | |
"loss": 5.0544, | |
"step": 17 | |
}, | |
{ | |
"epoch": 0.012505427702996091, | |
"grad_norm": 4.875, | |
"learning_rate": 1.0404624277456647e-05, | |
"loss": 5.395, | |
"step": 18 | |
}, | |
{ | |
"epoch": 0.013200173686495875, | |
"grad_norm": 0.9375, | |
"learning_rate": 1.0982658959537573e-05, | |
"loss": 3.3306, | |
"step": 19 | |
}, | |
{ | |
"epoch": 0.013894919669995658, | |
"grad_norm": 1.625, | |
"learning_rate": 1.1560693641618498e-05, | |
"loss": 4.0474, | |
"step": 20 | |
}, | |
{ | |
"epoch": 0.01458966565349544, | |
"grad_norm": 1.65625, | |
"learning_rate": 1.2138728323699422e-05, | |
"loss": 3.6259, | |
"step": 21 | |
}, | |
{ | |
"epoch": 0.015284411636995223, | |
"grad_norm": 3.0, | |
"learning_rate": 1.2716763005780346e-05, | |
"loss": 4.0321, | |
"step": 22 | |
}, | |
{ | |
"epoch": 0.015979157620495007, | |
"grad_norm": 1.9140625, | |
"learning_rate": 1.329479768786127e-05, | |
"loss": 4.1047, | |
"step": 23 | |
}, | |
{ | |
"epoch": 0.01667390360399479, | |
"grad_norm": 2.375, | |
"learning_rate": 1.3872832369942197e-05, | |
"loss": 3.4635, | |
"step": 24 | |
}, | |
{ | |
"epoch": 0.017368649587494574, | |
"grad_norm": 3.03125, | |
"learning_rate": 1.4450867052023123e-05, | |
"loss": 5.1757, | |
"step": 25 | |
}, | |
{ | |
"epoch": 0.018063395570994355, | |
"grad_norm": 7.0, | |
"learning_rate": 1.5028901734104049e-05, | |
"loss": 4.4566, | |
"step": 26 | |
}, | |
{ | |
"epoch": 0.018758141554494137, | |
"grad_norm": 2.015625, | |
"learning_rate": 1.5606936416184973e-05, | |
"loss": 3.9157, | |
"step": 27 | |
}, | |
{ | |
"epoch": 0.019452887537993922, | |
"grad_norm": 2.40625, | |
"learning_rate": 1.6184971098265897e-05, | |
"loss": 3.5971, | |
"step": 28 | |
}, | |
{ | |
"epoch": 0.020147633521493704, | |
"grad_norm": 2.84375, | |
"learning_rate": 1.676300578034682e-05, | |
"loss": 3.1815, | |
"step": 29 | |
}, | |
{ | |
"epoch": 0.020842379504993486, | |
"grad_norm": 2.515625, | |
"learning_rate": 1.7341040462427746e-05, | |
"loss": 3.5147, | |
"step": 30 | |
}, | |
{ | |
"epoch": 0.02153712548849327, | |
"grad_norm": 2.625, | |
"learning_rate": 1.791907514450867e-05, | |
"loss": 3.4658, | |
"step": 31 | |
}, | |
{ | |
"epoch": 0.022231871471993053, | |
"grad_norm": 4.34375, | |
"learning_rate": 1.8497109826589594e-05, | |
"loss": 3.2493, | |
"step": 32 | |
}, | |
{ | |
"epoch": 0.022926617455492834, | |
"grad_norm": 1.75, | |
"learning_rate": 1.907514450867052e-05, | |
"loss": 2.5355, | |
"step": 33 | |
}, | |
{ | |
"epoch": 0.02362136343899262, | |
"grad_norm": 2.234375, | |
"learning_rate": 1.9653179190751446e-05, | |
"loss": 3.1388, | |
"step": 34 | |
}, | |
{ | |
"epoch": 0.0243161094224924, | |
"grad_norm": 5.40625, | |
"learning_rate": 2.023121387283237e-05, | |
"loss": 2.7128, | |
"step": 35 | |
}, | |
{ | |
"epoch": 0.025010855405992183, | |
"grad_norm": 2.78125, | |
"learning_rate": 2.0809248554913295e-05, | |
"loss": 2.9951, | |
"step": 36 | |
}, | |
{ | |
"epoch": 0.025705601389491968, | |
"grad_norm": 1.9453125, | |
"learning_rate": 2.1387283236994223e-05, | |
"loss": 2.938, | |
"step": 37 | |
}, | |
{ | |
"epoch": 0.02640034737299175, | |
"grad_norm": 3.21875, | |
"learning_rate": 2.1965317919075147e-05, | |
"loss": 2.8222, | |
"step": 38 | |
}, | |
{ | |
"epoch": 0.02709509335649153, | |
"grad_norm": 0.8359375, | |
"learning_rate": 2.254335260115607e-05, | |
"loss": 2.4152, | |
"step": 39 | |
}, | |
{ | |
"epoch": 0.027789839339991317, | |
"grad_norm": 1.7890625, | |
"learning_rate": 2.3121387283236996e-05, | |
"loss": 2.5405, | |
"step": 40 | |
}, | |
{ | |
"epoch": 0.028484585323491098, | |
"grad_norm": 1.2421875, | |
"learning_rate": 2.369942196531792e-05, | |
"loss": 2.6256, | |
"step": 41 | |
}, | |
{ | |
"epoch": 0.02917933130699088, | |
"grad_norm": 1.0078125, | |
"learning_rate": 2.4277456647398844e-05, | |
"loss": 2.646, | |
"step": 42 | |
}, | |
{ | |
"epoch": 0.029874077290490665, | |
"grad_norm": 0.84375, | |
"learning_rate": 2.485549132947977e-05, | |
"loss": 2.3207, | |
"step": 43 | |
}, | |
{ | |
"epoch": 0.030568823273990447, | |
"grad_norm": 0.91015625, | |
"learning_rate": 2.5433526011560693e-05, | |
"loss": 2.3893, | |
"step": 44 | |
}, | |
{ | |
"epoch": 0.03126356925749023, | |
"grad_norm": 1.546875, | |
"learning_rate": 2.6011560693641617e-05, | |
"loss": 1.8784, | |
"step": 45 | |
}, | |
{ | |
"epoch": 0.031958315240990014, | |
"grad_norm": 1.3515625, | |
"learning_rate": 2.658959537572254e-05, | |
"loss": 2.0503, | |
"step": 46 | |
}, | |
{ | |
"epoch": 0.0326530612244898, | |
"grad_norm": 0.8515625, | |
"learning_rate": 2.7167630057803466e-05, | |
"loss": 2.1115, | |
"step": 47 | |
}, | |
{ | |
"epoch": 0.03334780720798958, | |
"grad_norm": 1.1171875, | |
"learning_rate": 2.7745664739884393e-05, | |
"loss": 1.92, | |
"step": 48 | |
}, | |
{ | |
"epoch": 0.03404255319148936, | |
"grad_norm": 0.83984375, | |
"learning_rate": 2.832369942196532e-05, | |
"loss": 2.1723, | |
"step": 49 | |
}, | |
{ | |
"epoch": 0.03473729917498915, | |
"grad_norm": 0.78515625, | |
"learning_rate": 2.8901734104046245e-05, | |
"loss": 2.1771, | |
"step": 50 | |
}, | |
{ | |
"epoch": 0.035432045158488926, | |
"grad_norm": 1.96875, | |
"learning_rate": 2.947976878612717e-05, | |
"loss": 2.5537, | |
"step": 51 | |
}, | |
{ | |
"epoch": 0.03612679114198871, | |
"grad_norm": 0.9140625, | |
"learning_rate": 3.0057803468208097e-05, | |
"loss": 2.0183, | |
"step": 52 | |
}, | |
{ | |
"epoch": 0.036821537125488496, | |
"grad_norm": 0.6171875, | |
"learning_rate": 3.063583815028902e-05, | |
"loss": 2.2641, | |
"step": 53 | |
}, | |
{ | |
"epoch": 0.037516283108988274, | |
"grad_norm": 1.2890625, | |
"learning_rate": 3.1213872832369946e-05, | |
"loss": 2.2391, | |
"step": 54 | |
}, | |
{ | |
"epoch": 0.03821102909248806, | |
"grad_norm": 0.92578125, | |
"learning_rate": 3.179190751445087e-05, | |
"loss": 2.0064, | |
"step": 55 | |
}, | |
{ | |
"epoch": 0.038905775075987845, | |
"grad_norm": 1.171875, | |
"learning_rate": 3.2369942196531794e-05, | |
"loss": 1.6102, | |
"step": 56 | |
}, | |
{ | |
"epoch": 0.03960052105948762, | |
"grad_norm": 0.4765625, | |
"learning_rate": 3.294797687861272e-05, | |
"loss": 2.1865, | |
"step": 57 | |
}, | |
{ | |
"epoch": 0.04029526704298741, | |
"grad_norm": 1.1484375, | |
"learning_rate": 3.352601156069364e-05, | |
"loss": 1.9878, | |
"step": 58 | |
}, | |
{ | |
"epoch": 0.04099001302648719, | |
"grad_norm": 1.0234375, | |
"learning_rate": 3.410404624277457e-05, | |
"loss": 2.1405, | |
"step": 59 | |
}, | |
{ | |
"epoch": 0.04168475900998697, | |
"grad_norm": 1.625, | |
"learning_rate": 3.468208092485549e-05, | |
"loss": 2.3983, | |
"step": 60 | |
}, | |
{ | |
"epoch": 0.04237950499348676, | |
"grad_norm": 0.96875, | |
"learning_rate": 3.5260115606936416e-05, | |
"loss": 2.3009, | |
"step": 61 | |
}, | |
{ | |
"epoch": 0.04307425097698654, | |
"grad_norm": 1.0, | |
"learning_rate": 3.583815028901734e-05, | |
"loss": 1.8959, | |
"step": 62 | |
}, | |
{ | |
"epoch": 0.04376899696048632, | |
"grad_norm": 1.0390625, | |
"learning_rate": 3.6416184971098265e-05, | |
"loss": 1.9368, | |
"step": 63 | |
}, | |
{ | |
"epoch": 0.044463742943986105, | |
"grad_norm": 1.046875, | |
"learning_rate": 3.699421965317919e-05, | |
"loss": 2.1604, | |
"step": 64 | |
}, | |
{ | |
"epoch": 0.04515848892748589, | |
"grad_norm": 0.703125, | |
"learning_rate": 3.757225433526011e-05, | |
"loss": 2.034, | |
"step": 65 | |
}, | |
{ | |
"epoch": 0.04585323491098567, | |
"grad_norm": 0.8515625, | |
"learning_rate": 3.815028901734104e-05, | |
"loss": 2.3586, | |
"step": 66 | |
}, | |
{ | |
"epoch": 0.046547980894485454, | |
"grad_norm": 0.62109375, | |
"learning_rate": 3.872832369942196e-05, | |
"loss": 1.8835, | |
"step": 67 | |
}, | |
{ | |
"epoch": 0.04724272687798524, | |
"grad_norm": 0.6328125, | |
"learning_rate": 3.930635838150289e-05, | |
"loss": 2.1474, | |
"step": 68 | |
}, | |
{ | |
"epoch": 0.04793747286148502, | |
"grad_norm": 0.7578125, | |
"learning_rate": 3.988439306358382e-05, | |
"loss": 1.988, | |
"step": 69 | |
}, | |
{ | |
"epoch": 0.0486322188449848, | |
"grad_norm": 0.6953125, | |
"learning_rate": 4.046242774566474e-05, | |
"loss": 2.2501, | |
"step": 70 | |
}, | |
{ | |
"epoch": 0.04932696482848459, | |
"grad_norm": 1.125, | |
"learning_rate": 4.1040462427745666e-05, | |
"loss": 1.6597, | |
"step": 71 | |
}, | |
{ | |
"epoch": 0.050021710811984366, | |
"grad_norm": 0.90234375, | |
"learning_rate": 4.161849710982659e-05, | |
"loss": 2.2616, | |
"step": 72 | |
}, | |
{ | |
"epoch": 0.05071645679548415, | |
"grad_norm": 1.0390625, | |
"learning_rate": 4.2196531791907514e-05, | |
"loss": 1.8914, | |
"step": 73 | |
}, | |
{ | |
"epoch": 0.051411202778983936, | |
"grad_norm": 1.7421875, | |
"learning_rate": 4.2774566473988445e-05, | |
"loss": 2.0235, | |
"step": 74 | |
}, | |
{ | |
"epoch": 0.052105948762483714, | |
"grad_norm": 0.66015625, | |
"learning_rate": 4.335260115606937e-05, | |
"loss": 2.1633, | |
"step": 75 | |
}, | |
{ | |
"epoch": 0.0528006947459835, | |
"grad_norm": 0.68359375, | |
"learning_rate": 4.3930635838150294e-05, | |
"loss": 2.1997, | |
"step": 76 | |
}, | |
{ | |
"epoch": 0.053495440729483285, | |
"grad_norm": 0.98828125, | |
"learning_rate": 4.450867052023122e-05, | |
"loss": 2.2325, | |
"step": 77 | |
}, | |
{ | |
"epoch": 0.05419018671298306, | |
"grad_norm": 0.95703125, | |
"learning_rate": 4.508670520231214e-05, | |
"loss": 1.6797, | |
"step": 78 | |
}, | |
{ | |
"epoch": 0.05488493269648285, | |
"grad_norm": 0.68359375, | |
"learning_rate": 4.566473988439307e-05, | |
"loss": 2.0388, | |
"step": 79 | |
}, | |
{ | |
"epoch": 0.05557967867998263, | |
"grad_norm": 1.34375, | |
"learning_rate": 4.624277456647399e-05, | |
"loss": 1.8112, | |
"step": 80 | |
}, | |
{ | |
"epoch": 0.05627442466348241, | |
"grad_norm": 1.2578125, | |
"learning_rate": 4.6820809248554915e-05, | |
"loss": 1.925, | |
"step": 81 | |
}, | |
{ | |
"epoch": 0.056969170646982197, | |
"grad_norm": 0.80859375, | |
"learning_rate": 4.739884393063584e-05, | |
"loss": 1.8969, | |
"step": 82 | |
}, | |
{ | |
"epoch": 0.05766391663048198, | |
"grad_norm": 1.1171875, | |
"learning_rate": 4.7976878612716764e-05, | |
"loss": 2.1033, | |
"step": 83 | |
}, | |
{ | |
"epoch": 0.05835866261398176, | |
"grad_norm": 0.90234375, | |
"learning_rate": 4.855491329479769e-05, | |
"loss": 2.0978, | |
"step": 84 | |
}, | |
{ | |
"epoch": 0.059053408597481545, | |
"grad_norm": 0.51953125, | |
"learning_rate": 4.913294797687861e-05, | |
"loss": 2.0516, | |
"step": 85 | |
}, | |
{ | |
"epoch": 0.05974815458098133, | |
"grad_norm": 0.474609375, | |
"learning_rate": 4.971098265895954e-05, | |
"loss": 2.0648, | |
"step": 86 | |
}, | |
{ | |
"epoch": 0.06044290056448111, | |
"grad_norm": 5.1875, | |
"learning_rate": 5.028901734104047e-05, | |
"loss": 2.098, | |
"step": 87 | |
}, | |
{ | |
"epoch": 0.061137646547980894, | |
"grad_norm": 1.25, | |
"learning_rate": 5.0867052023121385e-05, | |
"loss": 2.1498, | |
"step": 88 | |
}, | |
{ | |
"epoch": 0.06183239253148068, | |
"grad_norm": 1.375, | |
"learning_rate": 5.1445086705202317e-05, | |
"loss": 1.8586, | |
"step": 89 | |
}, | |
{ | |
"epoch": 0.06252713851498046, | |
"grad_norm": 0.83984375, | |
"learning_rate": 5.2023121387283234e-05, | |
"loss": 1.6702, | |
"step": 90 | |
}, | |
{ | |
"epoch": 0.06322188449848025, | |
"grad_norm": 0.734375, | |
"learning_rate": 5.2601156069364165e-05, | |
"loss": 2.1599, | |
"step": 91 | |
}, | |
{ | |
"epoch": 0.06391663048198003, | |
"grad_norm": 0.85546875, | |
"learning_rate": 5.317919075144508e-05, | |
"loss": 2.0213, | |
"step": 92 | |
}, | |
{ | |
"epoch": 0.0646113764654798, | |
"grad_norm": 0.71875, | |
"learning_rate": 5.3757225433526014e-05, | |
"loss": 2.3254, | |
"step": 93 | |
}, | |
{ | |
"epoch": 0.0653061224489796, | |
"grad_norm": 0.89453125, | |
"learning_rate": 5.433526011560693e-05, | |
"loss": 2.0617, | |
"step": 94 | |
}, | |
{ | |
"epoch": 0.06600086843247938, | |
"grad_norm": 0.62890625, | |
"learning_rate": 5.491329479768786e-05, | |
"loss": 1.8925, | |
"step": 95 | |
}, | |
{ | |
"epoch": 0.06669561441597915, | |
"grad_norm": 0.58203125, | |
"learning_rate": 5.5491329479768787e-05, | |
"loss": 1.886, | |
"step": 96 | |
}, | |
{ | |
"epoch": 0.06739036039947895, | |
"grad_norm": 0.984375, | |
"learning_rate": 5.606936416184971e-05, | |
"loss": 2.2635, | |
"step": 97 | |
}, | |
{ | |
"epoch": 0.06808510638297872, | |
"grad_norm": 1.1796875, | |
"learning_rate": 5.664739884393064e-05, | |
"loss": 1.8094, | |
"step": 98 | |
}, | |
{ | |
"epoch": 0.0687798523664785, | |
"grad_norm": 0.8203125, | |
"learning_rate": 5.722543352601156e-05, | |
"loss": 1.7222, | |
"step": 99 | |
}, | |
{ | |
"epoch": 0.0694745983499783, | |
"grad_norm": 0.5390625, | |
"learning_rate": 5.780346820809249e-05, | |
"loss": 2.1751, | |
"step": 100 | |
}, | |
{ | |
"epoch": 0.07016934433347807, | |
"grad_norm": 0.64453125, | |
"learning_rate": 5.8381502890173415e-05, | |
"loss": 2.0186, | |
"step": 101 | |
}, | |
{ | |
"epoch": 0.07086409031697785, | |
"grad_norm": 0.92578125, | |
"learning_rate": 5.895953757225434e-05, | |
"loss": 1.7453, | |
"step": 102 | |
}, | |
{ | |
"epoch": 0.07155883630047764, | |
"grad_norm": 0.55078125, | |
"learning_rate": 5.9537572254335263e-05, | |
"loss": 2.1655, | |
"step": 103 | |
}, | |
{ | |
"epoch": 0.07225358228397742, | |
"grad_norm": 0.578125, | |
"learning_rate": 6.0115606936416195e-05, | |
"loss": 2.0565, | |
"step": 104 | |
}, | |
{ | |
"epoch": 0.0729483282674772, | |
"grad_norm": 0.84765625, | |
"learning_rate": 6.069364161849711e-05, | |
"loss": 1.9825, | |
"step": 105 | |
}, | |
{ | |
"epoch": 0.07364307425097699, | |
"grad_norm": 1.515625, | |
"learning_rate": 6.127167630057804e-05, | |
"loss": 1.4917, | |
"step": 106 | |
}, | |
{ | |
"epoch": 0.07433782023447677, | |
"grad_norm": 1.546875, | |
"learning_rate": 6.184971098265896e-05, | |
"loss": 1.7809, | |
"step": 107 | |
}, | |
{ | |
"epoch": 0.07503256621797655, | |
"grad_norm": 1.640625, | |
"learning_rate": 6.242774566473989e-05, | |
"loss": 2.2905, | |
"step": 108 | |
}, | |
{ | |
"epoch": 0.07572731220147634, | |
"grad_norm": 0.71875, | |
"learning_rate": 6.300578034682081e-05, | |
"loss": 1.8123, | |
"step": 109 | |
}, | |
{ | |
"epoch": 0.07642205818497612, | |
"grad_norm": 0.81640625, | |
"learning_rate": 6.358381502890174e-05, | |
"loss": 2.1268, | |
"step": 110 | |
}, | |
{ | |
"epoch": 0.0771168041684759, | |
"grad_norm": 0.9921875, | |
"learning_rate": 6.416184971098266e-05, | |
"loss": 1.9522, | |
"step": 111 | |
}, | |
{ | |
"epoch": 0.07781155015197569, | |
"grad_norm": 0.66796875, | |
"learning_rate": 6.473988439306359e-05, | |
"loss": 2.0203, | |
"step": 112 | |
}, | |
{ | |
"epoch": 0.07850629613547547, | |
"grad_norm": 1.7734375, | |
"learning_rate": 6.53179190751445e-05, | |
"loss": 2.4639, | |
"step": 113 | |
}, | |
{ | |
"epoch": 0.07920104211897525, | |
"grad_norm": 0.828125, | |
"learning_rate": 6.589595375722544e-05, | |
"loss": 2.1491, | |
"step": 114 | |
}, | |
{ | |
"epoch": 0.07989578810247504, | |
"grad_norm": 0.96484375, | |
"learning_rate": 6.647398843930635e-05, | |
"loss": 2.0459, | |
"step": 115 | |
}, | |
{ | |
"epoch": 0.08059053408597482, | |
"grad_norm": 1.6640625, | |
"learning_rate": 6.705202312138729e-05, | |
"loss": 2.0957, | |
"step": 116 | |
}, | |
{ | |
"epoch": 0.0812852800694746, | |
"grad_norm": 0.69921875, | |
"learning_rate": 6.763005780346822e-05, | |
"loss": 2.1087, | |
"step": 117 | |
}, | |
{ | |
"epoch": 0.08198002605297439, | |
"grad_norm": 0.96875, | |
"learning_rate": 6.820809248554913e-05, | |
"loss": 1.6713, | |
"step": 118 | |
}, | |
{ | |
"epoch": 0.08267477203647416, | |
"grad_norm": 2.21875, | |
"learning_rate": 6.878612716763007e-05, | |
"loss": 2.0883, | |
"step": 119 | |
}, | |
{ | |
"epoch": 0.08336951801997394, | |
"grad_norm": 1.015625, | |
"learning_rate": 6.936416184971098e-05, | |
"loss": 1.8738, | |
"step": 120 | |
}, | |
{ | |
"epoch": 0.08406426400347373, | |
"grad_norm": 0.73046875, | |
"learning_rate": 6.994219653179191e-05, | |
"loss": 2.0907, | |
"step": 121 | |
}, | |
{ | |
"epoch": 0.08475900998697351, | |
"grad_norm": 0.80078125, | |
"learning_rate": 7.052023121387283e-05, | |
"loss": 1.7412, | |
"step": 122 | |
}, | |
{ | |
"epoch": 0.08545375597047329, | |
"grad_norm": 0.87890625, | |
"learning_rate": 7.109826589595376e-05, | |
"loss": 1.9133, | |
"step": 123 | |
}, | |
{ | |
"epoch": 0.08614850195397308, | |
"grad_norm": 0.60546875, | |
"learning_rate": 7.167630057803468e-05, | |
"loss": 1.9658, | |
"step": 124 | |
}, | |
{ | |
"epoch": 0.08684324793747286, | |
"grad_norm": 1.3203125, | |
"learning_rate": 7.225433526011561e-05, | |
"loss": 1.947, | |
"step": 125 | |
}, | |
{ | |
"epoch": 0.08753799392097264, | |
"grad_norm": 1.1484375, | |
"learning_rate": 7.283236994219653e-05, | |
"loss": 1.912, | |
"step": 126 | |
}, | |
{ | |
"epoch": 0.08823273990447243, | |
"grad_norm": 0.431640625, | |
"learning_rate": 7.341040462427746e-05, | |
"loss": 2.0971, | |
"step": 127 | |
}, | |
{ | |
"epoch": 0.08892748588797221, | |
"grad_norm": 0.6953125, | |
"learning_rate": 7.398843930635838e-05, | |
"loss": 2.044, | |
"step": 128 | |
}, | |
{ | |
"epoch": 0.08962223187147199, | |
"grad_norm": 0.8046875, | |
"learning_rate": 7.456647398843931e-05, | |
"loss": 2.0081, | |
"step": 129 | |
}, | |
{ | |
"epoch": 0.09031697785497178, | |
"grad_norm": 1.109375, | |
"learning_rate": 7.514450867052023e-05, | |
"loss": 1.8501, | |
"step": 130 | |
}, | |
{ | |
"epoch": 0.09101172383847156, | |
"grad_norm": 0.74609375, | |
"learning_rate": 7.572254335260116e-05, | |
"loss": 1.7543, | |
"step": 131 | |
}, | |
{ | |
"epoch": 0.09170646982197134, | |
"grad_norm": 0.5703125, | |
"learning_rate": 7.630057803468207e-05, | |
"loss": 1.9667, | |
"step": 132 | |
}, | |
{ | |
"epoch": 0.09240121580547113, | |
"grad_norm": 0.82421875, | |
"learning_rate": 7.6878612716763e-05, | |
"loss": 2.1222, | |
"step": 133 | |
}, | |
{ | |
"epoch": 0.09309596178897091, | |
"grad_norm": 0.8828125, | |
"learning_rate": 7.745664739884392e-05, | |
"loss": 1.5583, | |
"step": 134 | |
}, | |
{ | |
"epoch": 0.09379070777247069, | |
"grad_norm": 1.625, | |
"learning_rate": 7.803468208092485e-05, | |
"loss": 2.2327, | |
"step": 135 | |
}, | |
{ | |
"epoch": 0.09448545375597048, | |
"grad_norm": 1.0, | |
"learning_rate": 7.861271676300579e-05, | |
"loss": 2.3996, | |
"step": 136 | |
}, | |
{ | |
"epoch": 0.09518019973947026, | |
"grad_norm": 1.0859375, | |
"learning_rate": 7.91907514450867e-05, | |
"loss": 2.1297, | |
"step": 137 | |
}, | |
{ | |
"epoch": 0.09587494572297003, | |
"grad_norm": 1.5390625, | |
"learning_rate": 7.976878612716763e-05, | |
"loss": 1.5923, | |
"step": 138 | |
}, | |
{ | |
"epoch": 0.09656969170646983, | |
"grad_norm": 1.171875, | |
"learning_rate": 8.034682080924855e-05, | |
"loss": 1.671, | |
"step": 139 | |
}, | |
{ | |
"epoch": 0.0972644376899696, | |
"grad_norm": 0.82421875, | |
"learning_rate": 8.092485549132948e-05, | |
"loss": 1.9664, | |
"step": 140 | |
}, | |
{ | |
"epoch": 0.09795918367346938, | |
"grad_norm": 1.484375, | |
"learning_rate": 8.15028901734104e-05, | |
"loss": 1.8026, | |
"step": 141 | |
}, | |
{ | |
"epoch": 0.09865392965696917, | |
"grad_norm": 2.171875, | |
"learning_rate": 8.208092485549133e-05, | |
"loss": 2.1695, | |
"step": 142 | |
}, | |
{ | |
"epoch": 0.09934867564046895, | |
"grad_norm": 0.75390625, | |
"learning_rate": 8.265895953757226e-05, | |
"loss": 1.9029, | |
"step": 143 | |
}, | |
{ | |
"epoch": 0.10004342162396873, | |
"grad_norm": 1.0078125, | |
"learning_rate": 8.323699421965318e-05, | |
"loss": 1.6349, | |
"step": 144 | |
}, | |
{ | |
"epoch": 0.10073816760746852, | |
"grad_norm": 0.73828125, | |
"learning_rate": 8.381502890173411e-05, | |
"loss": 2.1295, | |
"step": 145 | |
}, | |
{ | |
"epoch": 0.1014329135909683, | |
"grad_norm": 2.703125, | |
"learning_rate": 8.439306358381503e-05, | |
"loss": 1.9088, | |
"step": 146 | |
}, | |
{ | |
"epoch": 0.10212765957446808, | |
"grad_norm": 1.34375, | |
"learning_rate": 8.497109826589596e-05, | |
"loss": 2.0262, | |
"step": 147 | |
}, | |
{ | |
"epoch": 0.10282240555796787, | |
"grad_norm": 4.15625, | |
"learning_rate": 8.554913294797689e-05, | |
"loss": 1.7243, | |
"step": 148 | |
}, | |
{ | |
"epoch": 0.10351715154146765, | |
"grad_norm": 0.9921875, | |
"learning_rate": 8.612716763005781e-05, | |
"loss": 2.2122, | |
"step": 149 | |
}, | |
{ | |
"epoch": 0.10421189752496743, | |
"grad_norm": 0.7109375, | |
"learning_rate": 8.670520231213874e-05, | |
"loss": 2.3888, | |
"step": 150 | |
}, | |
{ | |
"epoch": 0.10490664350846722, | |
"grad_norm": 0.734375, | |
"learning_rate": 8.728323699421966e-05, | |
"loss": 1.8788, | |
"step": 151 | |
}, | |
{ | |
"epoch": 0.105601389491967, | |
"grad_norm": 1.3203125, | |
"learning_rate": 8.786127167630059e-05, | |
"loss": 1.9623, | |
"step": 152 | |
}, | |
{ | |
"epoch": 0.10629613547546678, | |
"grad_norm": 1.25, | |
"learning_rate": 8.84393063583815e-05, | |
"loss": 1.7922, | |
"step": 153 | |
}, | |
{ | |
"epoch": 0.10699088145896657, | |
"grad_norm": 1.3359375, | |
"learning_rate": 8.901734104046244e-05, | |
"loss": 2.0573, | |
"step": 154 | |
}, | |
{ | |
"epoch": 0.10768562744246635, | |
"grad_norm": 1.4609375, | |
"learning_rate": 8.959537572254337e-05, | |
"loss": 1.978, | |
"step": 155 | |
}, | |
{ | |
"epoch": 0.10838037342596613, | |
"grad_norm": 0.81640625, | |
"learning_rate": 9.017341040462428e-05, | |
"loss": 2.4477, | |
"step": 156 | |
}, | |
{ | |
"epoch": 0.10907511940946592, | |
"grad_norm": 0.8125, | |
"learning_rate": 9.075144508670522e-05, | |
"loss": 2.1113, | |
"step": 157 | |
}, | |
{ | |
"epoch": 0.1097698653929657, | |
"grad_norm": 1.0078125, | |
"learning_rate": 9.132947976878613e-05, | |
"loss": 1.7494, | |
"step": 158 | |
}, | |
{ | |
"epoch": 0.11046461137646547, | |
"grad_norm": 0.81640625, | |
"learning_rate": 9.190751445086706e-05, | |
"loss": 2.1112, | |
"step": 159 | |
}, | |
{ | |
"epoch": 0.11115935735996527, | |
"grad_norm": 0.9765625, | |
"learning_rate": 9.248554913294798e-05, | |
"loss": 1.9607, | |
"step": 160 | |
}, | |
{ | |
"epoch": 0.11185410334346504, | |
"grad_norm": 0.84765625, | |
"learning_rate": 9.306358381502891e-05, | |
"loss": 1.8264, | |
"step": 161 | |
}, | |
{ | |
"epoch": 0.11254884932696482, | |
"grad_norm": 0.8984375, | |
"learning_rate": 9.364161849710983e-05, | |
"loss": 1.9532, | |
"step": 162 | |
}, | |
{ | |
"epoch": 0.11324359531046461, | |
"grad_norm": 1.1796875, | |
"learning_rate": 9.421965317919076e-05, | |
"loss": 1.9819, | |
"step": 163 | |
}, | |
{ | |
"epoch": 0.11393834129396439, | |
"grad_norm": 0.70703125, | |
"learning_rate": 9.479768786127168e-05, | |
"loss": 2.0391, | |
"step": 164 | |
}, | |
{ | |
"epoch": 0.11463308727746417, | |
"grad_norm": 0.92578125, | |
"learning_rate": 9.537572254335261e-05, | |
"loss": 2.027, | |
"step": 165 | |
}, | |
{ | |
"epoch": 0.11532783326096396, | |
"grad_norm": 0.76953125, | |
"learning_rate": 9.595375722543353e-05, | |
"loss": 1.5242, | |
"step": 166 | |
}, | |
{ | |
"epoch": 0.11602257924446374, | |
"grad_norm": 0.77734375, | |
"learning_rate": 9.653179190751446e-05, | |
"loss": 1.8081, | |
"step": 167 | |
}, | |
{ | |
"epoch": 0.11671732522796352, | |
"grad_norm": 1.2734375, | |
"learning_rate": 9.710982658959538e-05, | |
"loss": 1.8001, | |
"step": 168 | |
}, | |
{ | |
"epoch": 0.11741207121146331, | |
"grad_norm": 0.82421875, | |
"learning_rate": 9.768786127167631e-05, | |
"loss": 1.8917, | |
"step": 169 | |
}, | |
{ | |
"epoch": 0.11810681719496309, | |
"grad_norm": 0.57421875, | |
"learning_rate": 9.826589595375723e-05, | |
"loss": 2.0557, | |
"step": 170 | |
}, | |
{ | |
"epoch": 0.11880156317846287, | |
"grad_norm": 0.8046875, | |
"learning_rate": 9.884393063583816e-05, | |
"loss": 2.0773, | |
"step": 171 | |
}, | |
{ | |
"epoch": 0.11949630916196266, | |
"grad_norm": 1.015625, | |
"learning_rate": 9.942196531791907e-05, | |
"loss": 1.566, | |
"step": 172 | |
}, | |
{ | |
"epoch": 0.12019105514546244, | |
"grad_norm": 0.78125, | |
"learning_rate": 0.0001, | |
"loss": 2.0485, | |
"step": 173 | |
}, | |
{ | |
"epoch": 0.12088580112896222, | |
"grad_norm": 0.87109375, | |
"learning_rate": 0.00010057803468208094, | |
"loss": 1.7415, | |
"step": 174 | |
}, | |
{ | |
"epoch": 0.12158054711246201, | |
"grad_norm": 0.75390625, | |
"learning_rate": 0.00010115606936416187, | |
"loss": 2.2481, | |
"step": 175 | |
}, | |
{ | |
"epoch": 0.12227529309596179, | |
"grad_norm": 1.0, | |
"learning_rate": 0.00010173410404624277, | |
"loss": 1.878, | |
"step": 176 | |
}, | |
{ | |
"epoch": 0.12297003907946157, | |
"grad_norm": 1.1328125, | |
"learning_rate": 0.0001023121387283237, | |
"loss": 1.5244, | |
"step": 177 | |
}, | |
{ | |
"epoch": 0.12366478506296136, | |
"grad_norm": 0.57421875, | |
"learning_rate": 0.00010289017341040463, | |
"loss": 2.1072, | |
"step": 178 | |
}, | |
{ | |
"epoch": 0.12435953104646114, | |
"grad_norm": 0.81640625, | |
"learning_rate": 0.00010346820809248556, | |
"loss": 1.9914, | |
"step": 179 | |
}, | |
{ | |
"epoch": 0.12505427702996091, | |
"grad_norm": 0.828125, | |
"learning_rate": 0.00010404624277456647, | |
"loss": 1.3949, | |
"step": 180 | |
}, | |
{ | |
"epoch": 0.1257490230134607, | |
"grad_norm": 0.71484375, | |
"learning_rate": 0.0001046242774566474, | |
"loss": 1.7482, | |
"step": 181 | |
}, | |
{ | |
"epoch": 0.1264437689969605, | |
"grad_norm": 1.1015625, | |
"learning_rate": 0.00010520231213872833, | |
"loss": 1.6729, | |
"step": 182 | |
}, | |
{ | |
"epoch": 0.12713851498046028, | |
"grad_norm": 0.58984375, | |
"learning_rate": 0.00010578034682080926, | |
"loss": 1.7445, | |
"step": 183 | |
}, | |
{ | |
"epoch": 0.12783326096396005, | |
"grad_norm": 0.462890625, | |
"learning_rate": 0.00010635838150289017, | |
"loss": 2.1268, | |
"step": 184 | |
}, | |
{ | |
"epoch": 0.12852800694745983, | |
"grad_norm": 0.9453125, | |
"learning_rate": 0.0001069364161849711, | |
"loss": 1.6518, | |
"step": 185 | |
}, | |
{ | |
"epoch": 0.1292227529309596, | |
"grad_norm": 0.8515625, | |
"learning_rate": 0.00010751445086705203, | |
"loss": 1.9006, | |
"step": 186 | |
}, | |
{ | |
"epoch": 0.1299174989144594, | |
"grad_norm": 17.25, | |
"learning_rate": 0.00010809248554913296, | |
"loss": 2.0701, | |
"step": 187 | |
}, | |
{ | |
"epoch": 0.1306122448979592, | |
"grad_norm": 0.828125, | |
"learning_rate": 0.00010867052023121386, | |
"loss": 2.0978, | |
"step": 188 | |
}, | |
{ | |
"epoch": 0.13130699088145897, | |
"grad_norm": 1.1015625, | |
"learning_rate": 0.0001092485549132948, | |
"loss": 1.8128, | |
"step": 189 | |
}, | |
{ | |
"epoch": 0.13200173686495875, | |
"grad_norm": 0.90625, | |
"learning_rate": 0.00010982658959537572, | |
"loss": 1.9241, | |
"step": 190 | |
}, | |
{ | |
"epoch": 0.13269648284845853, | |
"grad_norm": 0.5859375, | |
"learning_rate": 0.00011040462427745666, | |
"loss": 1.7011, | |
"step": 191 | |
}, | |
{ | |
"epoch": 0.1333912288319583, | |
"grad_norm": 0.6796875, | |
"learning_rate": 0.00011098265895953757, | |
"loss": 2.2784, | |
"step": 192 | |
}, | |
{ | |
"epoch": 0.1340859748154581, | |
"grad_norm": 3.078125, | |
"learning_rate": 0.00011156069364161849, | |
"loss": 2.16, | |
"step": 193 | |
}, | |
{ | |
"epoch": 0.1347807207989579, | |
"grad_norm": 0.7421875, | |
"learning_rate": 0.00011213872832369942, | |
"loss": 1.734, | |
"step": 194 | |
}, | |
{ | |
"epoch": 0.13547546678245767, | |
"grad_norm": 0.734375, | |
"learning_rate": 0.00011271676300578035, | |
"loss": 1.6558, | |
"step": 195 | |
}, | |
{ | |
"epoch": 0.13617021276595745, | |
"grad_norm": 0.70703125, | |
"learning_rate": 0.00011329479768786128, | |
"loss": 1.7766, | |
"step": 196 | |
}, | |
{ | |
"epoch": 0.13686495874945723, | |
"grad_norm": 0.72265625, | |
"learning_rate": 0.0001138728323699422, | |
"loss": 1.6735, | |
"step": 197 | |
}, | |
{ | |
"epoch": 0.137559704732957, | |
"grad_norm": 1.1640625, | |
"learning_rate": 0.00011445086705202312, | |
"loss": 2.2215, | |
"step": 198 | |
}, | |
{ | |
"epoch": 0.13825445071645678, | |
"grad_norm": 0.6484375, | |
"learning_rate": 0.00011502890173410405, | |
"loss": 1.8671, | |
"step": 199 | |
}, | |
{ | |
"epoch": 0.1389491966999566, | |
"grad_norm": 1.40625, | |
"learning_rate": 0.00011560693641618498, | |
"loss": 1.9517, | |
"step": 200 | |
}, | |
{ | |
"epoch": 0.13964394268345637, | |
"grad_norm": 0.67578125, | |
"learning_rate": 0.0001161849710982659, | |
"loss": 2.0064, | |
"step": 201 | |
}, | |
{ | |
"epoch": 0.14033868866695615, | |
"grad_norm": 1.0234375, | |
"learning_rate": 0.00011676300578034683, | |
"loss": 1.7618, | |
"step": 202 | |
}, | |
{ | |
"epoch": 0.14103343465045592, | |
"grad_norm": 1.3828125, | |
"learning_rate": 0.00011734104046242775, | |
"loss": 1.746, | |
"step": 203 | |
}, | |
{ | |
"epoch": 0.1417281806339557, | |
"grad_norm": 1.453125, | |
"learning_rate": 0.00011791907514450868, | |
"loss": 2.4571, | |
"step": 204 | |
}, | |
{ | |
"epoch": 0.14242292661745548, | |
"grad_norm": 1.171875, | |
"learning_rate": 0.0001184971098265896, | |
"loss": 2.0719, | |
"step": 205 | |
}, | |
{ | |
"epoch": 0.1431176726009553, | |
"grad_norm": 0.93359375, | |
"learning_rate": 0.00011907514450867053, | |
"loss": 1.892, | |
"step": 206 | |
}, | |
{ | |
"epoch": 0.14381241858445507, | |
"grad_norm": 0.9375, | |
"learning_rate": 0.00011965317919075146, | |
"loss": 1.9472, | |
"step": 207 | |
}, | |
{ | |
"epoch": 0.14450716456795484, | |
"grad_norm": 0.78515625, | |
"learning_rate": 0.00012023121387283239, | |
"loss": 1.9143, | |
"step": 208 | |
}, | |
{ | |
"epoch": 0.14520191055145462, | |
"grad_norm": 0.4296875, | |
"learning_rate": 0.00012080924855491329, | |
"loss": 1.9386, | |
"step": 209 | |
}, | |
{ | |
"epoch": 0.1458966565349544, | |
"grad_norm": 4.40625, | |
"learning_rate": 0.00012138728323699422, | |
"loss": 1.6159, | |
"step": 210 | |
}, | |
{ | |
"epoch": 0.14659140251845418, | |
"grad_norm": 0.59375, | |
"learning_rate": 0.00012196531791907516, | |
"loss": 1.9722, | |
"step": 211 | |
}, | |
{ | |
"epoch": 0.14728614850195398, | |
"grad_norm": 1.0703125, | |
"learning_rate": 0.00012254335260115609, | |
"loss": 2.2088, | |
"step": 212 | |
}, | |
{ | |
"epoch": 0.14798089448545376, | |
"grad_norm": 0.416015625, | |
"learning_rate": 0.00012312138728323702, | |
"loss": 2.3268, | |
"step": 213 | |
}, | |
{ | |
"epoch": 0.14867564046895354, | |
"grad_norm": 0.87109375, | |
"learning_rate": 0.00012369942196531792, | |
"loss": 1.9869, | |
"step": 214 | |
}, | |
{ | |
"epoch": 0.14937038645245332, | |
"grad_norm": 0.82421875, | |
"learning_rate": 0.00012427745664739885, | |
"loss": 1.6529, | |
"step": 215 | |
}, | |
{ | |
"epoch": 0.1500651324359531, | |
"grad_norm": 1.1796875, | |
"learning_rate": 0.00012485549132947978, | |
"loss": 1.788, | |
"step": 216 | |
}, | |
{ | |
"epoch": 0.15075987841945288, | |
"grad_norm": 0.80078125, | |
"learning_rate": 0.00012543352601156071, | |
"loss": 2.1698, | |
"step": 217 | |
}, | |
{ | |
"epoch": 0.15145462440295268, | |
"grad_norm": 0.9296875, | |
"learning_rate": 0.00012601156069364162, | |
"loss": 1.8094, | |
"step": 218 | |
}, | |
{ | |
"epoch": 0.15214937038645246, | |
"grad_norm": 0.859375, | |
"learning_rate": 0.00012658959537572255, | |
"loss": 1.5737, | |
"step": 219 | |
}, | |
{ | |
"epoch": 0.15284411636995224, | |
"grad_norm": 0.75, | |
"learning_rate": 0.00012716763005780348, | |
"loss": 2.0224, | |
"step": 220 | |
}, | |
{ | |
"epoch": 0.15353886235345202, | |
"grad_norm": 0.9140625, | |
"learning_rate": 0.0001277456647398844, | |
"loss": 1.9444, | |
"step": 221 | |
}, | |
{ | |
"epoch": 0.1542336083369518, | |
"grad_norm": 1.5078125, | |
"learning_rate": 0.00012832369942196532, | |
"loss": 1.8394, | |
"step": 222 | |
}, | |
{ | |
"epoch": 0.15492835432045157, | |
"grad_norm": 0.9609375, | |
"learning_rate": 0.00012890173410404625, | |
"loss": 1.9949, | |
"step": 223 | |
}, | |
{ | |
"epoch": 0.15562310030395138, | |
"grad_norm": 0.515625, | |
"learning_rate": 0.00012947976878612718, | |
"loss": 1.8063, | |
"step": 224 | |
}, | |
{ | |
"epoch": 0.15631784628745116, | |
"grad_norm": 0.76171875, | |
"learning_rate": 0.0001300578034682081, | |
"loss": 2.108, | |
"step": 225 | |
}, | |
{ | |
"epoch": 0.15701259227095093, | |
"grad_norm": 0.8203125, | |
"learning_rate": 0.000130635838150289, | |
"loss": 1.865, | |
"step": 226 | |
}, | |
{ | |
"epoch": 0.1577073382544507, | |
"grad_norm": 1.171875, | |
"learning_rate": 0.00013121387283236994, | |
"loss": 1.9677, | |
"step": 227 | |
}, | |
{ | |
"epoch": 0.1584020842379505, | |
"grad_norm": 0.875, | |
"learning_rate": 0.00013179190751445087, | |
"loss": 1.5685, | |
"step": 228 | |
}, | |
{ | |
"epoch": 0.15909683022145027, | |
"grad_norm": 0.609375, | |
"learning_rate": 0.0001323699421965318, | |
"loss": 1.7492, | |
"step": 229 | |
}, | |
{ | |
"epoch": 0.15979157620495008, | |
"grad_norm": 1.609375, | |
"learning_rate": 0.0001329479768786127, | |
"loss": 2.3405, | |
"step": 230 | |
}, | |
{ | |
"epoch": 0.16048632218844985, | |
"grad_norm": 1.3125, | |
"learning_rate": 0.00013352601156069364, | |
"loss": 1.8523, | |
"step": 231 | |
}, | |
{ | |
"epoch": 0.16118106817194963, | |
"grad_norm": 0.7890625, | |
"learning_rate": 0.00013410404624277457, | |
"loss": 1.6507, | |
"step": 232 | |
}, | |
{ | |
"epoch": 0.1618758141554494, | |
"grad_norm": 0.70703125, | |
"learning_rate": 0.0001346820809248555, | |
"loss": 1.9661, | |
"step": 233 | |
}, | |
{ | |
"epoch": 0.1625705601389492, | |
"grad_norm": 0.65234375, | |
"learning_rate": 0.00013526011560693643, | |
"loss": 1.8777, | |
"step": 234 | |
}, | |
{ | |
"epoch": 0.16326530612244897, | |
"grad_norm": 0.78125, | |
"learning_rate": 0.00013583815028901734, | |
"loss": 1.7511, | |
"step": 235 | |
}, | |
{ | |
"epoch": 0.16396005210594877, | |
"grad_norm": 0.69140625, | |
"learning_rate": 0.00013641618497109827, | |
"loss": 2.1641, | |
"step": 236 | |
}, | |
{ | |
"epoch": 0.16465479808944855, | |
"grad_norm": 0.77734375, | |
"learning_rate": 0.0001369942196531792, | |
"loss": 2.0807, | |
"step": 237 | |
}, | |
{ | |
"epoch": 0.16534954407294833, | |
"grad_norm": 0.93359375, | |
"learning_rate": 0.00013757225433526013, | |
"loss": 1.5565, | |
"step": 238 | |
}, | |
{ | |
"epoch": 0.1660442900564481, | |
"grad_norm": 0.58984375, | |
"learning_rate": 0.00013815028901734104, | |
"loss": 2.0807, | |
"step": 239 | |
}, | |
{ | |
"epoch": 0.16673903603994789, | |
"grad_norm": 0.5078125, | |
"learning_rate": 0.00013872832369942197, | |
"loss": 1.7981, | |
"step": 240 | |
}, | |
{ | |
"epoch": 0.16743378202344766, | |
"grad_norm": 0.578125, | |
"learning_rate": 0.0001393063583815029, | |
"loss": 1.6482, | |
"step": 241 | |
}, | |
{ | |
"epoch": 0.16812852800694747, | |
"grad_norm": 0.8046875, | |
"learning_rate": 0.00013988439306358383, | |
"loss": 1.8768, | |
"step": 242 | |
}, | |
{ | |
"epoch": 0.16882327399044725, | |
"grad_norm": 1.328125, | |
"learning_rate": 0.00014046242774566473, | |
"loss": 1.6573, | |
"step": 243 | |
}, | |
{ | |
"epoch": 0.16951801997394703, | |
"grad_norm": 1.0546875, | |
"learning_rate": 0.00014104046242774566, | |
"loss": 1.6465, | |
"step": 244 | |
}, | |
{ | |
"epoch": 0.1702127659574468, | |
"grad_norm": 0.859375, | |
"learning_rate": 0.0001416184971098266, | |
"loss": 2.1823, | |
"step": 245 | |
}, | |
{ | |
"epoch": 0.17090751194094658, | |
"grad_norm": 1.078125, | |
"learning_rate": 0.00014219653179190753, | |
"loss": 2.1286, | |
"step": 246 | |
}, | |
{ | |
"epoch": 0.17160225792444636, | |
"grad_norm": 0.64453125, | |
"learning_rate": 0.00014277456647398843, | |
"loss": 2.0725, | |
"step": 247 | |
}, | |
{ | |
"epoch": 0.17229700390794617, | |
"grad_norm": 1.0, | |
"learning_rate": 0.00014335260115606936, | |
"loss": 1.9063, | |
"step": 248 | |
}, | |
{ | |
"epoch": 0.17299174989144595, | |
"grad_norm": 0.71484375, | |
"learning_rate": 0.0001439306358381503, | |
"loss": 2.1774, | |
"step": 249 | |
}, | |
{ | |
"epoch": 0.17368649587494572, | |
"grad_norm": 0.640625, | |
"learning_rate": 0.00014450867052023122, | |
"loss": 1.8539, | |
"step": 250 | |
}, | |
{ | |
"epoch": 0.1743812418584455, | |
"grad_norm": 0.89453125, | |
"learning_rate": 0.00014508670520231215, | |
"loss": 1.8404, | |
"step": 251 | |
}, | |
{ | |
"epoch": 0.17507598784194528, | |
"grad_norm": 0.84375, | |
"learning_rate": 0.00014566473988439306, | |
"loss": 1.8873, | |
"step": 252 | |
}, | |
{ | |
"epoch": 0.17577073382544506, | |
"grad_norm": 0.6953125, | |
"learning_rate": 0.000146242774566474, | |
"loss": 1.9706, | |
"step": 253 | |
}, | |
{ | |
"epoch": 0.17646547980894486, | |
"grad_norm": 0.94921875, | |
"learning_rate": 0.00014682080924855492, | |
"loss": 1.9709, | |
"step": 254 | |
}, | |
{ | |
"epoch": 0.17716022579244464, | |
"grad_norm": 0.765625, | |
"learning_rate": 0.00014739884393063585, | |
"loss": 1.8306, | |
"step": 255 | |
}, | |
{ | |
"epoch": 0.17785497177594442, | |
"grad_norm": 1.0, | |
"learning_rate": 0.00014797687861271676, | |
"loss": 2.1021, | |
"step": 256 | |
}, | |
{ | |
"epoch": 0.1785497177594442, | |
"grad_norm": 0.62890625, | |
"learning_rate": 0.00014855491329479769, | |
"loss": 2.1095, | |
"step": 257 | |
}, | |
{ | |
"epoch": 0.17924446374294398, | |
"grad_norm": 0.6328125, | |
"learning_rate": 0.00014913294797687862, | |
"loss": 2.0039, | |
"step": 258 | |
}, | |
{ | |
"epoch": 0.17993920972644378, | |
"grad_norm": 1.4765625, | |
"learning_rate": 0.00014971098265895955, | |
"loss": 1.7203, | |
"step": 259 | |
}, | |
{ | |
"epoch": 0.18063395570994356, | |
"grad_norm": 0.72265625, | |
"learning_rate": 0.00015028901734104045, | |
"loss": 1.6399, | |
"step": 260 | |
}, | |
{ | |
"epoch": 0.18132870169344334, | |
"grad_norm": 0.90625, | |
"learning_rate": 0.00015086705202312138, | |
"loss": 1.6332, | |
"step": 261 | |
}, | |
{ | |
"epoch": 0.18202344767694312, | |
"grad_norm": 1.3359375, | |
"learning_rate": 0.00015144508670520231, | |
"loss": 1.9613, | |
"step": 262 | |
}, | |
{ | |
"epoch": 0.1827181936604429, | |
"grad_norm": 1.2265625, | |
"learning_rate": 0.00015202312138728325, | |
"loss": 1.7818, | |
"step": 263 | |
}, | |
{ | |
"epoch": 0.18341293964394267, | |
"grad_norm": 1.5625, | |
"learning_rate": 0.00015260115606936415, | |
"loss": 2.2765, | |
"step": 264 | |
}, | |
{ | |
"epoch": 0.18410768562744248, | |
"grad_norm": 1.625, | |
"learning_rate": 0.00015317919075144508, | |
"loss": 1.7086, | |
"step": 265 | |
}, | |
{ | |
"epoch": 0.18480243161094226, | |
"grad_norm": 1.203125, | |
"learning_rate": 0.000153757225433526, | |
"loss": 1.7275, | |
"step": 266 | |
}, | |
{ | |
"epoch": 0.18549717759444204, | |
"grad_norm": 0.7890625, | |
"learning_rate": 0.00015433526011560694, | |
"loss": 2.0655, | |
"step": 267 | |
}, | |
{ | |
"epoch": 0.18619192357794181, | |
"grad_norm": 1.1796875, | |
"learning_rate": 0.00015491329479768785, | |
"loss": 1.5989, | |
"step": 268 | |
}, | |
{ | |
"epoch": 0.1868866695614416, | |
"grad_norm": 1.546875, | |
"learning_rate": 0.00015549132947976878, | |
"loss": 1.1796, | |
"step": 269 | |
}, | |
{ | |
"epoch": 0.18758141554494137, | |
"grad_norm": 0.859375, | |
"learning_rate": 0.0001560693641618497, | |
"loss": 1.8115, | |
"step": 270 | |
}, | |
{ | |
"epoch": 0.18827616152844118, | |
"grad_norm": 1.359375, | |
"learning_rate": 0.00015664739884393064, | |
"loss": 1.9066, | |
"step": 271 | |
}, | |
{ | |
"epoch": 0.18897090751194096, | |
"grad_norm": 1.1875, | |
"learning_rate": 0.00015722543352601157, | |
"loss": 2.0165, | |
"step": 272 | |
}, | |
{ | |
"epoch": 0.18966565349544073, | |
"grad_norm": 1.4765625, | |
"learning_rate": 0.00015780346820809248, | |
"loss": 2.3133, | |
"step": 273 | |
}, | |
{ | |
"epoch": 0.1903603994789405, | |
"grad_norm": 1.1953125, | |
"learning_rate": 0.0001583815028901734, | |
"loss": 2.0332, | |
"step": 274 | |
}, | |
{ | |
"epoch": 0.1910551454624403, | |
"grad_norm": 3.734375, | |
"learning_rate": 0.00015895953757225434, | |
"loss": 1.9067, | |
"step": 275 | |
}, | |
{ | |
"epoch": 0.19174989144594007, | |
"grad_norm": 0.83203125, | |
"learning_rate": 0.00015953757225433527, | |
"loss": 1.9406, | |
"step": 276 | |
}, | |
{ | |
"epoch": 0.19244463742943987, | |
"grad_norm": 0.66796875, | |
"learning_rate": 0.00016011560693641617, | |
"loss": 1.8024, | |
"step": 277 | |
}, | |
{ | |
"epoch": 0.19313938341293965, | |
"grad_norm": 0.76953125, | |
"learning_rate": 0.0001606936416184971, | |
"loss": 1.803, | |
"step": 278 | |
}, | |
{ | |
"epoch": 0.19383412939643943, | |
"grad_norm": 0.84375, | |
"learning_rate": 0.00016127167630057803, | |
"loss": 2.1169, | |
"step": 279 | |
}, | |
{ | |
"epoch": 0.1945288753799392, | |
"grad_norm": 0.75390625, | |
"learning_rate": 0.00016184971098265897, | |
"loss": 1.8095, | |
"step": 280 | |
}, | |
{ | |
"epoch": 0.195223621363439, | |
"grad_norm": 0.671875, | |
"learning_rate": 0.0001624277456647399, | |
"loss": 2.1282, | |
"step": 281 | |
}, | |
{ | |
"epoch": 0.19591836734693877, | |
"grad_norm": 0.92578125, | |
"learning_rate": 0.0001630057803468208, | |
"loss": 1.7845, | |
"step": 282 | |
}, | |
{ | |
"epoch": 0.19661311333043857, | |
"grad_norm": 1.15625, | |
"learning_rate": 0.00016358381502890173, | |
"loss": 1.8735, | |
"step": 283 | |
}, | |
{ | |
"epoch": 0.19730785931393835, | |
"grad_norm": 1.2265625, | |
"learning_rate": 0.00016416184971098266, | |
"loss": 1.9958, | |
"step": 284 | |
}, | |
{ | |
"epoch": 0.19800260529743813, | |
"grad_norm": 0.796875, | |
"learning_rate": 0.0001647398843930636, | |
"loss": 2.1224, | |
"step": 285 | |
}, | |
{ | |
"epoch": 0.1986973512809379, | |
"grad_norm": 1.0390625, | |
"learning_rate": 0.00016531791907514452, | |
"loss": 2.1101, | |
"step": 286 | |
}, | |
{ | |
"epoch": 0.19939209726443768, | |
"grad_norm": 0.69140625, | |
"learning_rate": 0.00016589595375722543, | |
"loss": 2.1356, | |
"step": 287 | |
}, | |
{ | |
"epoch": 0.20008684324793746, | |
"grad_norm": 1.2109375, | |
"learning_rate": 0.00016647398843930636, | |
"loss": 1.6759, | |
"step": 288 | |
}, | |
{ | |
"epoch": 0.20078158923143727, | |
"grad_norm": 0.94140625, | |
"learning_rate": 0.0001670520231213873, | |
"loss": 2.1158, | |
"step": 289 | |
}, | |
{ | |
"epoch": 0.20147633521493705, | |
"grad_norm": 0.66796875, | |
"learning_rate": 0.00016763005780346822, | |
"loss": 2.1826, | |
"step": 290 | |
}, | |
{ | |
"epoch": 0.20217108119843683, | |
"grad_norm": 0.91015625, | |
"learning_rate": 0.00016820809248554915, | |
"loss": 1.8656, | |
"step": 291 | |
}, | |
{ | |
"epoch": 0.2028658271819366, | |
"grad_norm": 1.03125, | |
"learning_rate": 0.00016878612716763006, | |
"loss": 2.2818, | |
"step": 292 | |
}, | |
{ | |
"epoch": 0.20356057316543638, | |
"grad_norm": 0.7109375, | |
"learning_rate": 0.000169364161849711, | |
"loss": 1.6402, | |
"step": 293 | |
}, | |
{ | |
"epoch": 0.20425531914893616, | |
"grad_norm": 5.40625, | |
"learning_rate": 0.00016994219653179192, | |
"loss": 1.8538, | |
"step": 294 | |
}, | |
{ | |
"epoch": 0.20495006513243597, | |
"grad_norm": 2.859375, | |
"learning_rate": 0.00017052023121387285, | |
"loss": 1.6352, | |
"step": 295 | |
}, | |
{ | |
"epoch": 0.20564481111593574, | |
"grad_norm": 0.9453125, | |
"learning_rate": 0.00017109826589595378, | |
"loss": 1.7506, | |
"step": 296 | |
}, | |
{ | |
"epoch": 0.20633955709943552, | |
"grad_norm": 0.78125, | |
"learning_rate": 0.0001716763005780347, | |
"loss": 1.889, | |
"step": 297 | |
}, | |
{ | |
"epoch": 0.2070343030829353, | |
"grad_norm": 0.703125, | |
"learning_rate": 0.00017225433526011562, | |
"loss": 2.0453, | |
"step": 298 | |
}, | |
{ | |
"epoch": 0.20772904906643508, | |
"grad_norm": 1.125, | |
"learning_rate": 0.00017283236994219655, | |
"loss": 2.182, | |
"step": 299 | |
}, | |
{ | |
"epoch": 0.20842379504993486, | |
"grad_norm": 0.73828125, | |
"learning_rate": 0.00017341040462427748, | |
"loss": 1.943, | |
"step": 300 | |
}, | |
{ | |
"epoch": 0.20911854103343466, | |
"grad_norm": 0.93359375, | |
"learning_rate": 0.0001739884393063584, | |
"loss": 1.9609, | |
"step": 301 | |
}, | |
{ | |
"epoch": 0.20981328701693444, | |
"grad_norm": 0.93359375, | |
"learning_rate": 0.0001745664739884393, | |
"loss": 1.8991, | |
"step": 302 | |
}, | |
{ | |
"epoch": 0.21050803300043422, | |
"grad_norm": 1.328125, | |
"learning_rate": 0.00017514450867052024, | |
"loss": 1.8573, | |
"step": 303 | |
}, | |
{ | |
"epoch": 0.211202778983934, | |
"grad_norm": 0.84375, | |
"learning_rate": 0.00017572254335260118, | |
"loss": 2.1771, | |
"step": 304 | |
}, | |
{ | |
"epoch": 0.21189752496743378, | |
"grad_norm": 0.9375, | |
"learning_rate": 0.0001763005780346821, | |
"loss": 1.9985, | |
"step": 305 | |
}, | |
{ | |
"epoch": 0.21259227095093355, | |
"grad_norm": 0.62890625, | |
"learning_rate": 0.000176878612716763, | |
"loss": 1.5911, | |
"step": 306 | |
}, | |
{ | |
"epoch": 0.21328701693443336, | |
"grad_norm": 0.91796875, | |
"learning_rate": 0.00017745664739884394, | |
"loss": 2.0792, | |
"step": 307 | |
}, | |
{ | |
"epoch": 0.21398176291793314, | |
"grad_norm": 0.8046875, | |
"learning_rate": 0.00017803468208092487, | |
"loss": 1.5303, | |
"step": 308 | |
}, | |
{ | |
"epoch": 0.21467650890143292, | |
"grad_norm": 0.55078125, | |
"learning_rate": 0.0001786127167630058, | |
"loss": 2.1808, | |
"step": 309 | |
}, | |
{ | |
"epoch": 0.2153712548849327, | |
"grad_norm": 0.84765625, | |
"learning_rate": 0.00017919075144508673, | |
"loss": 1.5205, | |
"step": 310 | |
}, | |
{ | |
"epoch": 0.21606600086843247, | |
"grad_norm": 0.96875, | |
"learning_rate": 0.00017976878612716764, | |
"loss": 1.8961, | |
"step": 311 | |
}, | |
{ | |
"epoch": 0.21676074685193225, | |
"grad_norm": 0.9921875, | |
"learning_rate": 0.00018034682080924857, | |
"loss": 1.634, | |
"step": 312 | |
}, | |
{ | |
"epoch": 0.21745549283543206, | |
"grad_norm": 1.1953125, | |
"learning_rate": 0.0001809248554913295, | |
"loss": 1.9757, | |
"step": 313 | |
}, | |
{ | |
"epoch": 0.21815023881893184, | |
"grad_norm": 1.2109375, | |
"learning_rate": 0.00018150289017341043, | |
"loss": 1.6177, | |
"step": 314 | |
}, | |
{ | |
"epoch": 0.2188449848024316, | |
"grad_norm": 0.76953125, | |
"learning_rate": 0.00018208092485549134, | |
"loss": 1.5741, | |
"step": 315 | |
}, | |
{ | |
"epoch": 0.2195397307859314, | |
"grad_norm": 0.61328125, | |
"learning_rate": 0.00018265895953757227, | |
"loss": 1.8792, | |
"step": 316 | |
}, | |
{ | |
"epoch": 0.22023447676943117, | |
"grad_norm": 0.984375, | |
"learning_rate": 0.0001832369942196532, | |
"loss": 1.973, | |
"step": 317 | |
}, | |
{ | |
"epoch": 0.22092922275293095, | |
"grad_norm": 1.0703125, | |
"learning_rate": 0.00018381502890173413, | |
"loss": 2.0379, | |
"step": 318 | |
}, | |
{ | |
"epoch": 0.22162396873643075, | |
"grad_norm": 0.84375, | |
"learning_rate": 0.00018439306358381503, | |
"loss": 1.9269, | |
"step": 319 | |
}, | |
{ | |
"epoch": 0.22231871471993053, | |
"grad_norm": 0.8359375, | |
"learning_rate": 0.00018497109826589596, | |
"loss": 2.2882, | |
"step": 320 | |
}, | |
{ | |
"epoch": 0.2230134607034303, | |
"grad_norm": 1.09375, | |
"learning_rate": 0.0001855491329479769, | |
"loss": 1.7531, | |
"step": 321 | |
}, | |
{ | |
"epoch": 0.2237082066869301, | |
"grad_norm": 0.84765625, | |
"learning_rate": 0.00018612716763005783, | |
"loss": 2.011, | |
"step": 322 | |
}, | |
{ | |
"epoch": 0.22440295267042987, | |
"grad_norm": 0.61328125, | |
"learning_rate": 0.00018670520231213873, | |
"loss": 1.7293, | |
"step": 323 | |
}, | |
{ | |
"epoch": 0.22509769865392965, | |
"grad_norm": 1.328125, | |
"learning_rate": 0.00018728323699421966, | |
"loss": 1.8176, | |
"step": 324 | |
}, | |
{ | |
"epoch": 0.22579244463742945, | |
"grad_norm": 0.75, | |
"learning_rate": 0.0001878612716763006, | |
"loss": 2.1176, | |
"step": 325 | |
}, | |
{ | |
"epoch": 0.22648719062092923, | |
"grad_norm": 0.859375, | |
"learning_rate": 0.00018843930635838152, | |
"loss": 1.9007, | |
"step": 326 | |
}, | |
{ | |
"epoch": 0.227181936604429, | |
"grad_norm": 0.7421875, | |
"learning_rate": 0.00018901734104046245, | |
"loss": 1.9553, | |
"step": 327 | |
}, | |
{ | |
"epoch": 0.22787668258792879, | |
"grad_norm": 2.78125, | |
"learning_rate": 0.00018959537572254336, | |
"loss": 1.7696, | |
"step": 328 | |
}, | |
{ | |
"epoch": 0.22857142857142856, | |
"grad_norm": 1.6484375, | |
"learning_rate": 0.0001901734104046243, | |
"loss": 2.1033, | |
"step": 329 | |
}, | |
{ | |
"epoch": 0.22926617455492834, | |
"grad_norm": 0.703125, | |
"learning_rate": 0.00019075144508670522, | |
"loss": 2.0961, | |
"step": 330 | |
}, | |
{ | |
"epoch": 0.22996092053842815, | |
"grad_norm": 1.0078125, | |
"learning_rate": 0.00019132947976878615, | |
"loss": 1.8456, | |
"step": 331 | |
}, | |
{ | |
"epoch": 0.23065566652192793, | |
"grad_norm": 0.81640625, | |
"learning_rate": 0.00019190751445086706, | |
"loss": 1.6038, | |
"step": 332 | |
}, | |
{ | |
"epoch": 0.2313504125054277, | |
"grad_norm": 0.625, | |
"learning_rate": 0.000192485549132948, | |
"loss": 1.9724, | |
"step": 333 | |
}, | |
{ | |
"epoch": 0.23204515848892748, | |
"grad_norm": 0.71484375, | |
"learning_rate": 0.00019306358381502892, | |
"loss": 1.9463, | |
"step": 334 | |
}, | |
{ | |
"epoch": 0.23273990447242726, | |
"grad_norm": 0.65234375, | |
"learning_rate": 0.00019364161849710985, | |
"loss": 1.8556, | |
"step": 335 | |
}, | |
{ | |
"epoch": 0.23343465045592704, | |
"grad_norm": 1.375, | |
"learning_rate": 0.00019421965317919075, | |
"loss": 2.1624, | |
"step": 336 | |
}, | |
{ | |
"epoch": 0.23412939643942685, | |
"grad_norm": 0.88671875, | |
"learning_rate": 0.00019479768786127168, | |
"loss": 1.8647, | |
"step": 337 | |
}, | |
{ | |
"epoch": 0.23482414242292662, | |
"grad_norm": 0.91796875, | |
"learning_rate": 0.00019537572254335262, | |
"loss": 1.5004, | |
"step": 338 | |
}, | |
{ | |
"epoch": 0.2355188884064264, | |
"grad_norm": 0.79296875, | |
"learning_rate": 0.00019595375722543355, | |
"loss": 2.1977, | |
"step": 339 | |
}, | |
{ | |
"epoch": 0.23621363438992618, | |
"grad_norm": 0.68359375, | |
"learning_rate": 0.00019653179190751445, | |
"loss": 1.8312, | |
"step": 340 | |
}, | |
{ | |
"epoch": 0.23690838037342596, | |
"grad_norm": 0.609375, | |
"learning_rate": 0.00019710982658959538, | |
"loss": 1.9086, | |
"step": 341 | |
}, | |
{ | |
"epoch": 0.23760312635692574, | |
"grad_norm": 1.453125, | |
"learning_rate": 0.0001976878612716763, | |
"loss": 1.7357, | |
"step": 342 | |
}, | |
{ | |
"epoch": 0.23829787234042554, | |
"grad_norm": 0.828125, | |
"learning_rate": 0.00019826589595375724, | |
"loss": 1.7354, | |
"step": 343 | |
}, | |
{ | |
"epoch": 0.23899261832392532, | |
"grad_norm": 0.8203125, | |
"learning_rate": 0.00019884393063583815, | |
"loss": 1.867, | |
"step": 344 | |
}, | |
{ | |
"epoch": 0.2396873643074251, | |
"grad_norm": 0.94921875, | |
"learning_rate": 0.00019942196531791908, | |
"loss": 1.9156, | |
"step": 345 | |
}, | |
{ | |
"epoch": 0.24038211029092488, | |
"grad_norm": 0.953125, | |
"learning_rate": 0.0002, | |
"loss": 1.9306, | |
"step": 346 | |
}, | |
{ | |
"epoch": 0.24107685627442466, | |
"grad_norm": 0.69140625, | |
"learning_rate": 0.00019994963485268195, | |
"loss": 2.1757, | |
"step": 347 | |
}, | |
{ | |
"epoch": 0.24177160225792443, | |
"grad_norm": 4.09375, | |
"learning_rate": 0.0001998992697053639, | |
"loss": 1.7667, | |
"step": 348 | |
}, | |
{ | |
"epoch": 0.24246634824142424, | |
"grad_norm": 0.875, | |
"learning_rate": 0.00019984890455804585, | |
"loss": 1.9543, | |
"step": 349 | |
}, | |
{ | |
"epoch": 0.24316109422492402, | |
"grad_norm": 1.234375, | |
"learning_rate": 0.00019979853941072778, | |
"loss": 1.5744, | |
"step": 350 | |
}, | |
{ | |
"epoch": 0.2438558402084238, | |
"grad_norm": 0.83203125, | |
"learning_rate": 0.00019974817426340972, | |
"loss": 1.9601, | |
"step": 351 | |
}, | |
{ | |
"epoch": 0.24455058619192357, | |
"grad_norm": 1.1015625, | |
"learning_rate": 0.00019969780911609168, | |
"loss": 2.0014, | |
"step": 352 | |
}, | |
{ | |
"epoch": 0.24524533217542335, | |
"grad_norm": 0.6484375, | |
"learning_rate": 0.00019964744396877362, | |
"loss": 1.8605, | |
"step": 353 | |
}, | |
{ | |
"epoch": 0.24594007815892313, | |
"grad_norm": 1.0859375, | |
"learning_rate": 0.00019959707882145555, | |
"loss": 1.9851, | |
"step": 354 | |
}, | |
{ | |
"epoch": 0.24663482414242294, | |
"grad_norm": 0.69921875, | |
"learning_rate": 0.00019954671367413752, | |
"loss": 2.1791, | |
"step": 355 | |
}, | |
{ | |
"epoch": 0.24732957012592272, | |
"grad_norm": 0.640625, | |
"learning_rate": 0.00019949634852681945, | |
"loss": 1.8084, | |
"step": 356 | |
}, | |
{ | |
"epoch": 0.2480243161094225, | |
"grad_norm": 0.78125, | |
"learning_rate": 0.0001994459833795014, | |
"loss": 1.9735, | |
"step": 357 | |
}, | |
{ | |
"epoch": 0.24871906209292227, | |
"grad_norm": 0.9609375, | |
"learning_rate": 0.00019939561823218333, | |
"loss": 2.0874, | |
"step": 358 | |
}, | |
{ | |
"epoch": 0.24941380807642205, | |
"grad_norm": 0.91015625, | |
"learning_rate": 0.0001993452530848653, | |
"loss": 2.0039, | |
"step": 359 | |
}, | |
{ | |
"epoch": 0.25010855405992183, | |
"grad_norm": 0.734375, | |
"learning_rate": 0.00019929488793754723, | |
"loss": 1.8483, | |
"step": 360 | |
}, | |
{ | |
"epoch": 0.2508033000434216, | |
"grad_norm": 0.97265625, | |
"learning_rate": 0.00019924452279022916, | |
"loss": 1.7729, | |
"step": 361 | |
}, | |
{ | |
"epoch": 0.2514980460269214, | |
"grad_norm": 0.90234375, | |
"learning_rate": 0.00019919415764291113, | |
"loss": 1.8132, | |
"step": 362 | |
}, | |
{ | |
"epoch": 0.25219279201042116, | |
"grad_norm": 1.015625, | |
"learning_rate": 0.00019914379249559306, | |
"loss": 1.5844, | |
"step": 363 | |
}, | |
{ | |
"epoch": 0.252887537993921, | |
"grad_norm": 1.4453125, | |
"learning_rate": 0.000199093427348275, | |
"loss": 2.1404, | |
"step": 364 | |
}, | |
{ | |
"epoch": 0.2535822839774208, | |
"grad_norm": 2.796875, | |
"learning_rate": 0.00019904306220095693, | |
"loss": 2.0058, | |
"step": 365 | |
}, | |
{ | |
"epoch": 0.25427702996092055, | |
"grad_norm": 0.8359375, | |
"learning_rate": 0.0001989926970536389, | |
"loss": 1.795, | |
"step": 366 | |
}, | |
{ | |
"epoch": 0.25497177594442033, | |
"grad_norm": 0.83203125, | |
"learning_rate": 0.00019894233190632083, | |
"loss": 2.0315, | |
"step": 367 | |
}, | |
{ | |
"epoch": 0.2556665219279201, | |
"grad_norm": 0.66796875, | |
"learning_rate": 0.0001988919667590028, | |
"loss": 1.8603, | |
"step": 368 | |
}, | |
{ | |
"epoch": 0.2563612679114199, | |
"grad_norm": 0.6171875, | |
"learning_rate": 0.00019884160161168473, | |
"loss": 2.0518, | |
"step": 369 | |
}, | |
{ | |
"epoch": 0.25705601389491967, | |
"grad_norm": 0.62890625, | |
"learning_rate": 0.00019879123646436667, | |
"loss": 1.4851, | |
"step": 370 | |
}, | |
{ | |
"epoch": 0.25775075987841944, | |
"grad_norm": 1.2265625, | |
"learning_rate": 0.0001987408713170486, | |
"loss": 1.5712, | |
"step": 371 | |
}, | |
{ | |
"epoch": 0.2584455058619192, | |
"grad_norm": 1.0390625, | |
"learning_rate": 0.00019869050616973054, | |
"loss": 2.2505, | |
"step": 372 | |
}, | |
{ | |
"epoch": 0.259140251845419, | |
"grad_norm": 0.93359375, | |
"learning_rate": 0.0001986401410224125, | |
"loss": 2.3709, | |
"step": 373 | |
}, | |
{ | |
"epoch": 0.2598349978289188, | |
"grad_norm": 0.671875, | |
"learning_rate": 0.00019858977587509444, | |
"loss": 2.1194, | |
"step": 374 | |
}, | |
{ | |
"epoch": 0.26052974381241856, | |
"grad_norm": 1.0546875, | |
"learning_rate": 0.0001985394107277764, | |
"loss": 1.5963, | |
"step": 375 | |
}, | |
{ | |
"epoch": 0.2612244897959184, | |
"grad_norm": 0.56640625, | |
"learning_rate": 0.00019848904558045834, | |
"loss": 2.1682, | |
"step": 376 | |
}, | |
{ | |
"epoch": 0.26191923577941817, | |
"grad_norm": 0.66015625, | |
"learning_rate": 0.00019843868043314028, | |
"loss": 1.848, | |
"step": 377 | |
}, | |
{ | |
"epoch": 0.26261398176291795, | |
"grad_norm": 0.55078125, | |
"learning_rate": 0.00019838831528582222, | |
"loss": 1.5301, | |
"step": 378 | |
}, | |
{ | |
"epoch": 0.2633087277464177, | |
"grad_norm": 0.83984375, | |
"learning_rate": 0.00019833795013850415, | |
"loss": 1.6342, | |
"step": 379 | |
}, | |
{ | |
"epoch": 0.2640034737299175, | |
"grad_norm": 0.8203125, | |
"learning_rate": 0.00019828758499118611, | |
"loss": 1.7481, | |
"step": 380 | |
}, | |
{ | |
"epoch": 0.2646982197134173, | |
"grad_norm": 0.875, | |
"learning_rate": 0.00019823721984386805, | |
"loss": 2.0733, | |
"step": 381 | |
}, | |
{ | |
"epoch": 0.26539296569691706, | |
"grad_norm": 0.85546875, | |
"learning_rate": 0.00019818685469655001, | |
"loss": 2.2002, | |
"step": 382 | |
}, | |
{ | |
"epoch": 0.26608771168041684, | |
"grad_norm": 4.0, | |
"learning_rate": 0.00019813648954923195, | |
"loss": 1.8585, | |
"step": 383 | |
}, | |
{ | |
"epoch": 0.2667824576639166, | |
"grad_norm": 0.83203125, | |
"learning_rate": 0.0001980861244019139, | |
"loss": 1.8138, | |
"step": 384 | |
}, | |
{ | |
"epoch": 0.2674772036474164, | |
"grad_norm": 1.234375, | |
"learning_rate": 0.00019803575925459582, | |
"loss": 2.0099, | |
"step": 385 | |
}, | |
{ | |
"epoch": 0.2681719496309162, | |
"grad_norm": 0.91015625, | |
"learning_rate": 0.00019798539410727776, | |
"loss": 1.9675, | |
"step": 386 | |
}, | |
{ | |
"epoch": 0.26886669561441595, | |
"grad_norm": 0.87109375, | |
"learning_rate": 0.00019793502895995972, | |
"loss": 1.6869, | |
"step": 387 | |
}, | |
{ | |
"epoch": 0.2695614415979158, | |
"grad_norm": 0.64453125, | |
"learning_rate": 0.00019788466381264166, | |
"loss": 1.9112, | |
"step": 388 | |
}, | |
{ | |
"epoch": 0.27025618758141556, | |
"grad_norm": 1.0859375, | |
"learning_rate": 0.00019783429866532362, | |
"loss": 1.7991, | |
"step": 389 | |
}, | |
{ | |
"epoch": 0.27095093356491534, | |
"grad_norm": 0.8828125, | |
"learning_rate": 0.00019778393351800556, | |
"loss": 1.7384, | |
"step": 390 | |
}, | |
{ | |
"epoch": 0.2716456795484151, | |
"grad_norm": 0.66015625, | |
"learning_rate": 0.0001977335683706875, | |
"loss": 1.763, | |
"step": 391 | |
}, | |
{ | |
"epoch": 0.2723404255319149, | |
"grad_norm": 1.3671875, | |
"learning_rate": 0.00019768320322336943, | |
"loss": 1.837, | |
"step": 392 | |
}, | |
{ | |
"epoch": 0.2730351715154147, | |
"grad_norm": 1.4140625, | |
"learning_rate": 0.00019763283807605137, | |
"loss": 1.9485, | |
"step": 393 | |
}, | |
{ | |
"epoch": 0.27372991749891445, | |
"grad_norm": 0.8203125, | |
"learning_rate": 0.00019758247292873333, | |
"loss": 2.2162, | |
"step": 394 | |
}, | |
{ | |
"epoch": 0.27442466348241423, | |
"grad_norm": 1.171875, | |
"learning_rate": 0.00019753210778141527, | |
"loss": 2.1763, | |
"step": 395 | |
}, | |
{ | |
"epoch": 0.275119409465914, | |
"grad_norm": 0.85546875, | |
"learning_rate": 0.00019748174263409723, | |
"loss": 1.8636, | |
"step": 396 | |
}, | |
{ | |
"epoch": 0.2758141554494138, | |
"grad_norm": 1.09375, | |
"learning_rate": 0.00019743137748677917, | |
"loss": 2.0849, | |
"step": 397 | |
}, | |
{ | |
"epoch": 0.27650890143291357, | |
"grad_norm": 1.0078125, | |
"learning_rate": 0.0001973810123394611, | |
"loss": 2.1198, | |
"step": 398 | |
}, | |
{ | |
"epoch": 0.2772036474164134, | |
"grad_norm": 0.5546875, | |
"learning_rate": 0.00019733064719214304, | |
"loss": 1.8171, | |
"step": 399 | |
}, | |
{ | |
"epoch": 0.2778983933999132, | |
"grad_norm": 0.5625, | |
"learning_rate": 0.00019728028204482498, | |
"loss": 2.1271, | |
"step": 400 | |
}, | |
{ | |
"epoch": 0.27859313938341296, | |
"grad_norm": 0.80859375, | |
"learning_rate": 0.00019722991689750694, | |
"loss": 1.8004, | |
"step": 401 | |
}, | |
{ | |
"epoch": 0.27928788536691274, | |
"grad_norm": 1.1328125, | |
"learning_rate": 0.00019717955175018888, | |
"loss": 2.0326, | |
"step": 402 | |
}, | |
{ | |
"epoch": 0.2799826313504125, | |
"grad_norm": 0.8359375, | |
"learning_rate": 0.00019712918660287084, | |
"loss": 1.6586, | |
"step": 403 | |
}, | |
{ | |
"epoch": 0.2806773773339123, | |
"grad_norm": 0.60546875, | |
"learning_rate": 0.00019707882145555278, | |
"loss": 1.7802, | |
"step": 404 | |
}, | |
{ | |
"epoch": 0.28137212331741207, | |
"grad_norm": 0.59375, | |
"learning_rate": 0.0001970284563082347, | |
"loss": 1.9849, | |
"step": 405 | |
}, | |
{ | |
"epoch": 0.28206686930091185, | |
"grad_norm": 0.73828125, | |
"learning_rate": 0.00019697809116091665, | |
"loss": 2.3161, | |
"step": 406 | |
}, | |
{ | |
"epoch": 0.2827616152844116, | |
"grad_norm": 0.97265625, | |
"learning_rate": 0.00019692772601359858, | |
"loss": 1.7711, | |
"step": 407 | |
}, | |
{ | |
"epoch": 0.2834563612679114, | |
"grad_norm": 0.66015625, | |
"learning_rate": 0.00019687736086628055, | |
"loss": 1.8916, | |
"step": 408 | |
}, | |
{ | |
"epoch": 0.2841511072514112, | |
"grad_norm": 0.62890625, | |
"learning_rate": 0.00019682699571896248, | |
"loss": 2.0297, | |
"step": 409 | |
}, | |
{ | |
"epoch": 0.28484585323491096, | |
"grad_norm": 0.8359375, | |
"learning_rate": 0.00019677663057164445, | |
"loss": 1.8274, | |
"step": 410 | |
}, | |
{ | |
"epoch": 0.2855405992184108, | |
"grad_norm": 0.69140625, | |
"learning_rate": 0.00019672626542432638, | |
"loss": 1.5261, | |
"step": 411 | |
}, | |
{ | |
"epoch": 0.2862353452019106, | |
"grad_norm": 0.4921875, | |
"learning_rate": 0.00019667590027700832, | |
"loss": 1.9996, | |
"step": 412 | |
}, | |
{ | |
"epoch": 0.28693009118541035, | |
"grad_norm": 1.3671875, | |
"learning_rate": 0.00019662553512969026, | |
"loss": 2.1384, | |
"step": 413 | |
}, | |
{ | |
"epoch": 0.28762483716891013, | |
"grad_norm": 0.90234375, | |
"learning_rate": 0.0001965751699823722, | |
"loss": 1.6071, | |
"step": 414 | |
}, | |
{ | |
"epoch": 0.2883195831524099, | |
"grad_norm": 0.890625, | |
"learning_rate": 0.00019652480483505416, | |
"loss": 2.268, | |
"step": 415 | |
}, | |
{ | |
"epoch": 0.2890143291359097, | |
"grad_norm": 0.515625, | |
"learning_rate": 0.0001964744396877361, | |
"loss": 1.6901, | |
"step": 416 | |
}, | |
{ | |
"epoch": 0.28970907511940946, | |
"grad_norm": 0.953125, | |
"learning_rate": 0.00019642407454041806, | |
"loss": 2.0621, | |
"step": 417 | |
}, | |
{ | |
"epoch": 0.29040382110290924, | |
"grad_norm": 1.234375, | |
"learning_rate": 0.0001963737093931, | |
"loss": 1.7457, | |
"step": 418 | |
}, | |
{ | |
"epoch": 0.291098567086409, | |
"grad_norm": 0.84765625, | |
"learning_rate": 0.00019632334424578193, | |
"loss": 1.8165, | |
"step": 419 | |
}, | |
{ | |
"epoch": 0.2917933130699088, | |
"grad_norm": 0.75390625, | |
"learning_rate": 0.00019627297909846387, | |
"loss": 1.8413, | |
"step": 420 | |
}, | |
{ | |
"epoch": 0.2924880590534086, | |
"grad_norm": 0.8046875, | |
"learning_rate": 0.0001962226139511458, | |
"loss": 2.0921, | |
"step": 421 | |
}, | |
{ | |
"epoch": 0.29318280503690836, | |
"grad_norm": 0.72265625, | |
"learning_rate": 0.00019617224880382777, | |
"loss": 1.9443, | |
"step": 422 | |
}, | |
{ | |
"epoch": 0.2938775510204082, | |
"grad_norm": 2.390625, | |
"learning_rate": 0.0001961218836565097, | |
"loss": 1.8594, | |
"step": 423 | |
}, | |
{ | |
"epoch": 0.29457229700390797, | |
"grad_norm": 1.6328125, | |
"learning_rate": 0.00019607151850919166, | |
"loss": 1.8851, | |
"step": 424 | |
}, | |
{ | |
"epoch": 0.29526704298740775, | |
"grad_norm": 0.73828125, | |
"learning_rate": 0.0001960211533618736, | |
"loss": 2.0707, | |
"step": 425 | |
}, | |
{ | |
"epoch": 0.2959617889709075, | |
"grad_norm": 0.9296875, | |
"learning_rate": 0.00019597078821455554, | |
"loss": 2.2316, | |
"step": 426 | |
}, | |
{ | |
"epoch": 0.2966565349544073, | |
"grad_norm": 0.84765625, | |
"learning_rate": 0.00019592042306723747, | |
"loss": 1.8408, | |
"step": 427 | |
}, | |
{ | |
"epoch": 0.2973512809379071, | |
"grad_norm": 0.8515625, | |
"learning_rate": 0.0001958700579199194, | |
"loss": 1.893, | |
"step": 428 | |
}, | |
{ | |
"epoch": 0.29804602692140686, | |
"grad_norm": 0.7109375, | |
"learning_rate": 0.00019581969277260137, | |
"loss": 1.929, | |
"step": 429 | |
}, | |
{ | |
"epoch": 0.29874077290490664, | |
"grad_norm": 0.68359375, | |
"learning_rate": 0.0001957693276252833, | |
"loss": 1.9385, | |
"step": 430 | |
}, | |
{ | |
"epoch": 0.2994355188884064, | |
"grad_norm": 0.875, | |
"learning_rate": 0.00019571896247796527, | |
"loss": 1.9914, | |
"step": 431 | |
}, | |
{ | |
"epoch": 0.3001302648719062, | |
"grad_norm": 1.1640625, | |
"learning_rate": 0.0001956685973306472, | |
"loss": 1.7315, | |
"step": 432 | |
}, | |
{ | |
"epoch": 0.30082501085540597, | |
"grad_norm": 0.8046875, | |
"learning_rate": 0.00019561823218332915, | |
"loss": 1.5899, | |
"step": 433 | |
}, | |
{ | |
"epoch": 0.30151975683890575, | |
"grad_norm": 0.53515625, | |
"learning_rate": 0.00019556786703601108, | |
"loss": 2.0665, | |
"step": 434 | |
}, | |
{ | |
"epoch": 0.3022145028224056, | |
"grad_norm": 1.25, | |
"learning_rate": 0.00019551750188869305, | |
"loss": 1.7969, | |
"step": 435 | |
}, | |
{ | |
"epoch": 0.30290924880590536, | |
"grad_norm": 0.6953125, | |
"learning_rate": 0.00019546713674137498, | |
"loss": 1.8387, | |
"step": 436 | |
}, | |
{ | |
"epoch": 0.30360399478940514, | |
"grad_norm": 1.1015625, | |
"learning_rate": 0.00019541677159405692, | |
"loss": 1.8154, | |
"step": 437 | |
}, | |
{ | |
"epoch": 0.3042987407729049, | |
"grad_norm": 0.890625, | |
"learning_rate": 0.00019536640644673888, | |
"loss": 1.9395, | |
"step": 438 | |
}, | |
{ | |
"epoch": 0.3049934867564047, | |
"grad_norm": 0.60546875, | |
"learning_rate": 0.00019531604129942082, | |
"loss": 1.8653, | |
"step": 439 | |
}, | |
{ | |
"epoch": 0.3056882327399045, | |
"grad_norm": 0.7109375, | |
"learning_rate": 0.00019526567615210275, | |
"loss": 1.9524, | |
"step": 440 | |
}, | |
{ | |
"epoch": 0.30638297872340425, | |
"grad_norm": 0.75390625, | |
"learning_rate": 0.0001952153110047847, | |
"loss": 1.5947, | |
"step": 441 | |
}, | |
{ | |
"epoch": 0.30707772470690403, | |
"grad_norm": 1.4765625, | |
"learning_rate": 0.00019516494585746665, | |
"loss": 2.1353, | |
"step": 442 | |
}, | |
{ | |
"epoch": 0.3077724706904038, | |
"grad_norm": 1.453125, | |
"learning_rate": 0.0001951145807101486, | |
"loss": 1.6529, | |
"step": 443 | |
}, | |
{ | |
"epoch": 0.3084672166739036, | |
"grad_norm": 1.0234375, | |
"learning_rate": 0.00019506421556283053, | |
"loss": 1.9062, | |
"step": 444 | |
}, | |
{ | |
"epoch": 0.30916196265740337, | |
"grad_norm": 1.1484375, | |
"learning_rate": 0.0001950138504155125, | |
"loss": 2.128, | |
"step": 445 | |
}, | |
{ | |
"epoch": 0.30985670864090314, | |
"grad_norm": 0.8203125, | |
"learning_rate": 0.0001949634852681944, | |
"loss": 1.9101, | |
"step": 446 | |
}, | |
{ | |
"epoch": 0.310551454624403, | |
"grad_norm": 0.73046875, | |
"learning_rate": 0.00019491312012087636, | |
"loss": 1.6201, | |
"step": 447 | |
}, | |
{ | |
"epoch": 0.31124620060790276, | |
"grad_norm": 1.296875, | |
"learning_rate": 0.0001948627549735583, | |
"loss": 2.1286, | |
"step": 448 | |
}, | |
{ | |
"epoch": 0.31194094659140253, | |
"grad_norm": 0.65625, | |
"learning_rate": 0.00019481238982624026, | |
"loss": 1.7674, | |
"step": 449 | |
}, | |
{ | |
"epoch": 0.3126356925749023, | |
"grad_norm": 0.9609375, | |
"learning_rate": 0.0001947620246789222, | |
"loss": 2.2924, | |
"step": 450 | |
}, | |
{ | |
"epoch": 0.3133304385584021, | |
"grad_norm": 0.50390625, | |
"learning_rate": 0.00019471165953160413, | |
"loss": 2.1334, | |
"step": 451 | |
}, | |
{ | |
"epoch": 0.31402518454190187, | |
"grad_norm": 1.4140625, | |
"learning_rate": 0.0001946612943842861, | |
"loss": 2.1055, | |
"step": 452 | |
}, | |
{ | |
"epoch": 0.31471993052540165, | |
"grad_norm": 0.98828125, | |
"learning_rate": 0.000194610929236968, | |
"loss": 2.0685, | |
"step": 453 | |
}, | |
{ | |
"epoch": 0.3154146765089014, | |
"grad_norm": 0.91796875, | |
"learning_rate": 0.00019456056408964997, | |
"loss": 2.2465, | |
"step": 454 | |
}, | |
{ | |
"epoch": 0.3161094224924012, | |
"grad_norm": 0.9921875, | |
"learning_rate": 0.0001945101989423319, | |
"loss": 1.4865, | |
"step": 455 | |
}, | |
{ | |
"epoch": 0.316804168475901, | |
"grad_norm": 0.9453125, | |
"learning_rate": 0.00019445983379501387, | |
"loss": 1.9726, | |
"step": 456 | |
}, | |
{ | |
"epoch": 0.31749891445940076, | |
"grad_norm": 2.984375, | |
"learning_rate": 0.0001944094686476958, | |
"loss": 1.8845, | |
"step": 457 | |
}, | |
{ | |
"epoch": 0.31819366044290054, | |
"grad_norm": 0.90625, | |
"learning_rate": 0.00019435910350037774, | |
"loss": 1.9736, | |
"step": 458 | |
}, | |
{ | |
"epoch": 0.3188884064264004, | |
"grad_norm": 0.53515625, | |
"learning_rate": 0.0001943087383530597, | |
"loss": 2.0074, | |
"step": 459 | |
}, | |
{ | |
"epoch": 0.31958315240990015, | |
"grad_norm": 0.56640625, | |
"learning_rate": 0.00019425837320574162, | |
"loss": 2.0931, | |
"step": 460 | |
}, | |
{ | |
"epoch": 0.32027789839339993, | |
"grad_norm": 1.0, | |
"learning_rate": 0.00019420800805842358, | |
"loss": 1.9506, | |
"step": 461 | |
}, | |
{ | |
"epoch": 0.3209726443768997, | |
"grad_norm": 0.90234375, | |
"learning_rate": 0.00019415764291110552, | |
"loss": 2.0672, | |
"step": 462 | |
}, | |
{ | |
"epoch": 0.3216673903603995, | |
"grad_norm": 0.94140625, | |
"learning_rate": 0.00019410727776378748, | |
"loss": 2.1114, | |
"step": 463 | |
}, | |
{ | |
"epoch": 0.32236213634389926, | |
"grad_norm": 1.4296875, | |
"learning_rate": 0.00019405691261646942, | |
"loss": 1.8596, | |
"step": 464 | |
}, | |
{ | |
"epoch": 0.32305688232739904, | |
"grad_norm": 0.7890625, | |
"learning_rate": 0.00019400654746915138, | |
"loss": 2.2623, | |
"step": 465 | |
}, | |
{ | |
"epoch": 0.3237516283108988, | |
"grad_norm": 0.515625, | |
"learning_rate": 0.00019395618232183331, | |
"loss": 1.9356, | |
"step": 466 | |
}, | |
{ | |
"epoch": 0.3244463742943986, | |
"grad_norm": 0.72265625, | |
"learning_rate": 0.00019390581717451522, | |
"loss": 2.1368, | |
"step": 467 | |
}, | |
{ | |
"epoch": 0.3251411202778984, | |
"grad_norm": 1.046875, | |
"learning_rate": 0.0001938554520271972, | |
"loss": 1.9551, | |
"step": 468 | |
}, | |
{ | |
"epoch": 0.32583586626139815, | |
"grad_norm": 0.9375, | |
"learning_rate": 0.00019380508687987912, | |
"loss": 1.7151, | |
"step": 469 | |
}, | |
{ | |
"epoch": 0.32653061224489793, | |
"grad_norm": 0.58984375, | |
"learning_rate": 0.0001937547217325611, | |
"loss": 1.6639, | |
"step": 470 | |
}, | |
{ | |
"epoch": 0.32722535822839777, | |
"grad_norm": 1.265625, | |
"learning_rate": 0.00019370435658524302, | |
"loss": 1.9154, | |
"step": 471 | |
}, | |
{ | |
"epoch": 0.32792010421189755, | |
"grad_norm": 1.3984375, | |
"learning_rate": 0.000193653991437925, | |
"loss": 1.6919, | |
"step": 472 | |
}, | |
{ | |
"epoch": 0.3286148501953973, | |
"grad_norm": 1.1328125, | |
"learning_rate": 0.00019360362629060692, | |
"loss": 2.0988, | |
"step": 473 | |
}, | |
{ | |
"epoch": 0.3293095961788971, | |
"grad_norm": 1.171875, | |
"learning_rate": 0.00019355326114328883, | |
"loss": 2.12, | |
"step": 474 | |
}, | |
{ | |
"epoch": 0.3300043421623969, | |
"grad_norm": 0.94140625, | |
"learning_rate": 0.0001935028959959708, | |
"loss": 2.1701, | |
"step": 475 | |
}, | |
{ | |
"epoch": 0.33069908814589666, | |
"grad_norm": 0.9296875, | |
"learning_rate": 0.00019345253084865273, | |
"loss": 2.0642, | |
"step": 476 | |
}, | |
{ | |
"epoch": 0.33139383412939644, | |
"grad_norm": 0.85546875, | |
"learning_rate": 0.0001934021657013347, | |
"loss": 1.9536, | |
"step": 477 | |
}, | |
{ | |
"epoch": 0.3320885801128962, | |
"grad_norm": 1.4453125, | |
"learning_rate": 0.00019335180055401663, | |
"loss": 2.023, | |
"step": 478 | |
}, | |
{ | |
"epoch": 0.332783326096396, | |
"grad_norm": 0.734375, | |
"learning_rate": 0.0001933014354066986, | |
"loss": 1.9748, | |
"step": 479 | |
}, | |
{ | |
"epoch": 0.33347807207989577, | |
"grad_norm": 4.15625, | |
"learning_rate": 0.0001932510702593805, | |
"loss": 1.7722, | |
"step": 480 | |
}, | |
{ | |
"epoch": 0.33417281806339555, | |
"grad_norm": 0.703125, | |
"learning_rate": 0.00019320070511206244, | |
"loss": 2.1057, | |
"step": 481 | |
}, | |
{ | |
"epoch": 0.3348675640468953, | |
"grad_norm": 0.66796875, | |
"learning_rate": 0.0001931503399647444, | |
"loss": 1.6782, | |
"step": 482 | |
}, | |
{ | |
"epoch": 0.33556231003039516, | |
"grad_norm": 1.1640625, | |
"learning_rate": 0.00019309997481742634, | |
"loss": 2.1179, | |
"step": 483 | |
}, | |
{ | |
"epoch": 0.33625705601389494, | |
"grad_norm": 0.9375, | |
"learning_rate": 0.0001930496096701083, | |
"loss": 2.192, | |
"step": 484 | |
}, | |
{ | |
"epoch": 0.3369518019973947, | |
"grad_norm": 0.77734375, | |
"learning_rate": 0.00019299924452279024, | |
"loss": 1.8594, | |
"step": 485 | |
}, | |
{ | |
"epoch": 0.3376465479808945, | |
"grad_norm": 0.6484375, | |
"learning_rate": 0.0001929488793754722, | |
"loss": 1.9312, | |
"step": 486 | |
}, | |
{ | |
"epoch": 0.3383412939643943, | |
"grad_norm": 1.4375, | |
"learning_rate": 0.0001928985142281541, | |
"loss": 1.9013, | |
"step": 487 | |
}, | |
{ | |
"epoch": 0.33903603994789405, | |
"grad_norm": 0.859375, | |
"learning_rate": 0.00019284814908083605, | |
"loss": 1.9257, | |
"step": 488 | |
}, | |
{ | |
"epoch": 0.33973078593139383, | |
"grad_norm": 0.7265625, | |
"learning_rate": 0.000192797783933518, | |
"loss": 2.0159, | |
"step": 489 | |
}, | |
{ | |
"epoch": 0.3404255319148936, | |
"grad_norm": 1.203125, | |
"learning_rate": 0.00019274741878619995, | |
"loss": 1.5344, | |
"step": 490 | |
}, | |
{ | |
"epoch": 0.3411202778983934, | |
"grad_norm": 0.6953125, | |
"learning_rate": 0.0001926970536388819, | |
"loss": 1.5615, | |
"step": 491 | |
}, | |
{ | |
"epoch": 0.34181502388189317, | |
"grad_norm": 0.81640625, | |
"learning_rate": 0.00019264668849156385, | |
"loss": 1.8554, | |
"step": 492 | |
}, | |
{ | |
"epoch": 0.34250976986539294, | |
"grad_norm": 0.79296875, | |
"learning_rate": 0.0001925963233442458, | |
"loss": 1.8949, | |
"step": 493 | |
}, | |
{ | |
"epoch": 0.3432045158488927, | |
"grad_norm": 1.078125, | |
"learning_rate": 0.00019254595819692772, | |
"loss": 1.8137, | |
"step": 494 | |
}, | |
{ | |
"epoch": 0.34389926183239256, | |
"grad_norm": 1.2734375, | |
"learning_rate": 0.00019249559304960968, | |
"loss": 2.0595, | |
"step": 495 | |
}, | |
{ | |
"epoch": 0.34459400781589233, | |
"grad_norm": 0.91015625, | |
"learning_rate": 0.00019244522790229162, | |
"loss": 2.2088, | |
"step": 496 | |
}, | |
{ | |
"epoch": 0.3452887537993921, | |
"grad_norm": 0.89453125, | |
"learning_rate": 0.00019239486275497356, | |
"loss": 1.789, | |
"step": 497 | |
}, | |
{ | |
"epoch": 0.3459834997828919, | |
"grad_norm": 1.5078125, | |
"learning_rate": 0.00019234449760765552, | |
"loss": 1.7053, | |
"step": 498 | |
}, | |
{ | |
"epoch": 0.34667824576639167, | |
"grad_norm": 0.87109375, | |
"learning_rate": 0.00019229413246033746, | |
"loss": 1.985, | |
"step": 499 | |
}, | |
{ | |
"epoch": 0.34737299174989145, | |
"grad_norm": 0.79296875, | |
"learning_rate": 0.00019224376731301942, | |
"loss": 2.0731, | |
"step": 500 | |
}, | |
{ | |
"epoch": 0.3480677377333912, | |
"grad_norm": 0.56640625, | |
"learning_rate": 0.00019219340216570133, | |
"loss": 2.3603, | |
"step": 501 | |
}, | |
{ | |
"epoch": 0.348762483716891, | |
"grad_norm": 1.0703125, | |
"learning_rate": 0.0001921430370183833, | |
"loss": 2.2065, | |
"step": 502 | |
}, | |
{ | |
"epoch": 0.3494572297003908, | |
"grad_norm": 1.0234375, | |
"learning_rate": 0.00019209267187106523, | |
"loss": 2.1569, | |
"step": 503 | |
}, | |
{ | |
"epoch": 0.35015197568389056, | |
"grad_norm": 0.71484375, | |
"learning_rate": 0.00019204230672374717, | |
"loss": 1.8459, | |
"step": 504 | |
}, | |
{ | |
"epoch": 0.35084672166739034, | |
"grad_norm": 0.6953125, | |
"learning_rate": 0.00019199194157642913, | |
"loss": 1.8065, | |
"step": 505 | |
}, | |
{ | |
"epoch": 0.3515414676508901, | |
"grad_norm": 0.76171875, | |
"learning_rate": 0.00019194157642911107, | |
"loss": 2.2484, | |
"step": 506 | |
}, | |
{ | |
"epoch": 0.35223621363438995, | |
"grad_norm": 0.6171875, | |
"learning_rate": 0.00019189121128179303, | |
"loss": 1.9146, | |
"step": 507 | |
}, | |
{ | |
"epoch": 0.35293095961788973, | |
"grad_norm": 1.0390625, | |
"learning_rate": 0.00019184084613447494, | |
"loss": 1.7471, | |
"step": 508 | |
}, | |
{ | |
"epoch": 0.3536257056013895, | |
"grad_norm": 0.87890625, | |
"learning_rate": 0.0001917904809871569, | |
"loss": 1.7203, | |
"step": 509 | |
}, | |
{ | |
"epoch": 0.3543204515848893, | |
"grad_norm": 0.71875, | |
"learning_rate": 0.00019174011583983884, | |
"loss": 2.0287, | |
"step": 510 | |
}, | |
{ | |
"epoch": 0.35501519756838906, | |
"grad_norm": 1.03125, | |
"learning_rate": 0.00019168975069252077, | |
"loss": 1.7405, | |
"step": 511 | |
}, | |
{ | |
"epoch": 0.35570994355188884, | |
"grad_norm": 1.71875, | |
"learning_rate": 0.00019163938554520274, | |
"loss": 1.7935, | |
"step": 512 | |
}, | |
{ | |
"epoch": 0.3564046895353886, | |
"grad_norm": 0.98828125, | |
"learning_rate": 0.00019158902039788467, | |
"loss": 1.7058, | |
"step": 513 | |
}, | |
{ | |
"epoch": 0.3570994355188884, | |
"grad_norm": 1.2578125, | |
"learning_rate": 0.0001915386552505666, | |
"loss": 1.6631, | |
"step": 514 | |
}, | |
{ | |
"epoch": 0.3577941815023882, | |
"grad_norm": 1.1640625, | |
"learning_rate": 0.00019148829010324855, | |
"loss": 2.0297, | |
"step": 515 | |
}, | |
{ | |
"epoch": 0.35848892748588795, | |
"grad_norm": 0.69921875, | |
"learning_rate": 0.0001914379249559305, | |
"loss": 1.3673, | |
"step": 516 | |
}, | |
{ | |
"epoch": 0.35918367346938773, | |
"grad_norm": 0.95703125, | |
"learning_rate": 0.00019138755980861245, | |
"loss": 1.7896, | |
"step": 517 | |
}, | |
{ | |
"epoch": 0.35987841945288757, | |
"grad_norm": 0.890625, | |
"learning_rate": 0.00019133719466129438, | |
"loss": 2.2388, | |
"step": 518 | |
}, | |
{ | |
"epoch": 0.36057316543638734, | |
"grad_norm": 0.75390625, | |
"learning_rate": 0.00019128682951397635, | |
"loss": 1.8827, | |
"step": 519 | |
}, | |
{ | |
"epoch": 0.3612679114198871, | |
"grad_norm": 2.4375, | |
"learning_rate": 0.00019123646436665828, | |
"loss": 1.8957, | |
"step": 520 | |
}, | |
{ | |
"epoch": 0.3619626574033869, | |
"grad_norm": 0.921875, | |
"learning_rate": 0.00019118609921934022, | |
"loss": 1.666, | |
"step": 521 | |
}, | |
{ | |
"epoch": 0.3626574033868867, | |
"grad_norm": 0.71484375, | |
"learning_rate": 0.00019113573407202215, | |
"loss": 1.6648, | |
"step": 522 | |
}, | |
{ | |
"epoch": 0.36335214937038646, | |
"grad_norm": 0.64453125, | |
"learning_rate": 0.00019108536892470412, | |
"loss": 1.9235, | |
"step": 523 | |
}, | |
{ | |
"epoch": 0.36404689535388624, | |
"grad_norm": 0.65625, | |
"learning_rate": 0.00019103500377738605, | |
"loss": 2.1473, | |
"step": 524 | |
}, | |
{ | |
"epoch": 0.364741641337386, | |
"grad_norm": 0.8828125, | |
"learning_rate": 0.000190984638630068, | |
"loss": 2.0953, | |
"step": 525 | |
}, | |
{ | |
"epoch": 0.3654363873208858, | |
"grad_norm": 1.234375, | |
"learning_rate": 0.00019093427348274995, | |
"loss": 1.8025, | |
"step": 526 | |
}, | |
{ | |
"epoch": 0.36613113330438557, | |
"grad_norm": 1.0546875, | |
"learning_rate": 0.0001908839083354319, | |
"loss": 2.0172, | |
"step": 527 | |
}, | |
{ | |
"epoch": 0.36682587928788535, | |
"grad_norm": 1.3515625, | |
"learning_rate": 0.00019083354318811383, | |
"loss": 1.7116, | |
"step": 528 | |
}, | |
{ | |
"epoch": 0.3675206252713851, | |
"grad_norm": 1.0078125, | |
"learning_rate": 0.00019078317804079576, | |
"loss": 2.0673, | |
"step": 529 | |
}, | |
{ | |
"epoch": 0.36821537125488496, | |
"grad_norm": 1.09375, | |
"learning_rate": 0.00019073281289347773, | |
"loss": 2.1515, | |
"step": 530 | |
}, | |
{ | |
"epoch": 0.36891011723838474, | |
"grad_norm": 0.6171875, | |
"learning_rate": 0.00019068244774615966, | |
"loss": 2.1945, | |
"step": 531 | |
}, | |
{ | |
"epoch": 0.3696048632218845, | |
"grad_norm": 0.71484375, | |
"learning_rate": 0.00019063208259884163, | |
"loss": 1.9585, | |
"step": 532 | |
}, | |
{ | |
"epoch": 0.3702996092053843, | |
"grad_norm": 0.7890625, | |
"learning_rate": 0.00019058171745152356, | |
"loss": 2.1528, | |
"step": 533 | |
}, | |
{ | |
"epoch": 0.3709943551888841, | |
"grad_norm": 0.8203125, | |
"learning_rate": 0.0001905313523042055, | |
"loss": 1.8423, | |
"step": 534 | |
}, | |
{ | |
"epoch": 0.37168910117238385, | |
"grad_norm": 1.1015625, | |
"learning_rate": 0.00019048098715688743, | |
"loss": 1.9329, | |
"step": 535 | |
}, | |
{ | |
"epoch": 0.37238384715588363, | |
"grad_norm": 0.84765625, | |
"learning_rate": 0.00019043062200956937, | |
"loss": 1.7946, | |
"step": 536 | |
}, | |
{ | |
"epoch": 0.3730785931393834, | |
"grad_norm": 0.84375, | |
"learning_rate": 0.00019038025686225133, | |
"loss": 2.1579, | |
"step": 537 | |
}, | |
{ | |
"epoch": 0.3737733391228832, | |
"grad_norm": 0.8046875, | |
"learning_rate": 0.00019032989171493327, | |
"loss": 1.9882, | |
"step": 538 | |
}, | |
{ | |
"epoch": 0.37446808510638296, | |
"grad_norm": 0.81640625, | |
"learning_rate": 0.00019027952656761523, | |
"loss": 1.6992, | |
"step": 539 | |
}, | |
{ | |
"epoch": 0.37516283108988274, | |
"grad_norm": 0.62890625, | |
"learning_rate": 0.00019022916142029717, | |
"loss": 2.003, | |
"step": 540 | |
}, | |
{ | |
"epoch": 0.3758575770733825, | |
"grad_norm": 1.5, | |
"learning_rate": 0.0001901787962729791, | |
"loss": 2.1709, | |
"step": 541 | |
}, | |
{ | |
"epoch": 0.37655232305688235, | |
"grad_norm": 1.0546875, | |
"learning_rate": 0.00019012843112566104, | |
"loss": 1.9809, | |
"step": 542 | |
}, | |
{ | |
"epoch": 0.37724706904038213, | |
"grad_norm": 0.7734375, | |
"learning_rate": 0.00019007806597834298, | |
"loss": 1.956, | |
"step": 543 | |
}, | |
{ | |
"epoch": 0.3779418150238819, | |
"grad_norm": 0.953125, | |
"learning_rate": 0.00019002770083102494, | |
"loss": 1.619, | |
"step": 544 | |
}, | |
{ | |
"epoch": 0.3786365610073817, | |
"grad_norm": 0.83984375, | |
"learning_rate": 0.00018997733568370688, | |
"loss": 1.7824, | |
"step": 545 | |
}, | |
{ | |
"epoch": 0.37933130699088147, | |
"grad_norm": 0.984375, | |
"learning_rate": 0.00018992697053638884, | |
"loss": 1.885, | |
"step": 546 | |
}, | |
{ | |
"epoch": 0.38002605297438125, | |
"grad_norm": 1.3125, | |
"learning_rate": 0.00018987660538907078, | |
"loss": 2.0227, | |
"step": 547 | |
}, | |
{ | |
"epoch": 0.380720798957881, | |
"grad_norm": 0.79296875, | |
"learning_rate": 0.00018982624024175272, | |
"loss": 1.7396, | |
"step": 548 | |
}, | |
{ | |
"epoch": 0.3814155449413808, | |
"grad_norm": 0.67578125, | |
"learning_rate": 0.00018977587509443465, | |
"loss": 1.8219, | |
"step": 549 | |
}, | |
{ | |
"epoch": 0.3821102909248806, | |
"grad_norm": 4.65625, | |
"learning_rate": 0.0001897255099471166, | |
"loss": 2.2175, | |
"step": 550 | |
}, | |
{ | |
"epoch": 0.38280503690838036, | |
"grad_norm": 0.81640625, | |
"learning_rate": 0.00018967514479979855, | |
"loss": 1.7872, | |
"step": 551 | |
}, | |
{ | |
"epoch": 0.38349978289188014, | |
"grad_norm": 0.98046875, | |
"learning_rate": 0.0001896247796524805, | |
"loss": 1.6591, | |
"step": 552 | |
}, | |
{ | |
"epoch": 0.3841945288753799, | |
"grad_norm": 1.2421875, | |
"learning_rate": 0.00018957441450516245, | |
"loss": 2.0484, | |
"step": 553 | |
}, | |
{ | |
"epoch": 0.38488927485887975, | |
"grad_norm": 0.90625, | |
"learning_rate": 0.0001895240493578444, | |
"loss": 1.8777, | |
"step": 554 | |
}, | |
{ | |
"epoch": 0.3855840208423795, | |
"grad_norm": 0.921875, | |
"learning_rate": 0.00018947368421052632, | |
"loss": 2.1238, | |
"step": 555 | |
}, | |
{ | |
"epoch": 0.3862787668258793, | |
"grad_norm": 0.60546875, | |
"learning_rate": 0.00018942331906320826, | |
"loss": 1.9607, | |
"step": 556 | |
}, | |
{ | |
"epoch": 0.3869735128093791, | |
"grad_norm": 0.8125, | |
"learning_rate": 0.0001893729539158902, | |
"loss": 1.6038, | |
"step": 557 | |
}, | |
{ | |
"epoch": 0.38766825879287886, | |
"grad_norm": 1.359375, | |
"learning_rate": 0.00018932258876857216, | |
"loss": 1.7207, | |
"step": 558 | |
}, | |
{ | |
"epoch": 0.38836300477637864, | |
"grad_norm": 0.81640625, | |
"learning_rate": 0.0001892722236212541, | |
"loss": 1.94, | |
"step": 559 | |
}, | |
{ | |
"epoch": 0.3890577507598784, | |
"grad_norm": 1.546875, | |
"learning_rate": 0.00018922185847393606, | |
"loss": 1.6762, | |
"step": 560 | |
}, | |
{ | |
"epoch": 0.3897524967433782, | |
"grad_norm": 0.8125, | |
"learning_rate": 0.000189171493326618, | |
"loss": 1.8562, | |
"step": 561 | |
}, | |
{ | |
"epoch": 0.390447242726878, | |
"grad_norm": 0.734375, | |
"learning_rate": 0.00018912112817929993, | |
"loss": 2.252, | |
"step": 562 | |
}, | |
{ | |
"epoch": 0.39114198871037775, | |
"grad_norm": 1.3359375, | |
"learning_rate": 0.00018907076303198187, | |
"loss": 1.7797, | |
"step": 563 | |
}, | |
{ | |
"epoch": 0.39183673469387753, | |
"grad_norm": 0.87890625, | |
"learning_rate": 0.0001890203978846638, | |
"loss": 2.028, | |
"step": 564 | |
}, | |
{ | |
"epoch": 0.3925314806773773, | |
"grad_norm": 0.8515625, | |
"learning_rate": 0.00018897003273734577, | |
"loss": 1.9406, | |
"step": 565 | |
}, | |
{ | |
"epoch": 0.39322622666087714, | |
"grad_norm": 1.25, | |
"learning_rate": 0.0001889196675900277, | |
"loss": 1.4522, | |
"step": 566 | |
}, | |
{ | |
"epoch": 0.3939209726443769, | |
"grad_norm": 1.53125, | |
"learning_rate": 0.00018886930244270967, | |
"loss": 2.1899, | |
"step": 567 | |
}, | |
{ | |
"epoch": 0.3946157186278767, | |
"grad_norm": 0.8828125, | |
"learning_rate": 0.0001888189372953916, | |
"loss": 2.0125, | |
"step": 568 | |
}, | |
{ | |
"epoch": 0.3953104646113765, | |
"grad_norm": 0.80078125, | |
"learning_rate": 0.00018876857214807354, | |
"loss": 1.8498, | |
"step": 569 | |
}, | |
{ | |
"epoch": 0.39600521059487626, | |
"grad_norm": 0.85546875, | |
"learning_rate": 0.00018871820700075548, | |
"loss": 2.1336, | |
"step": 570 | |
}, | |
{ | |
"epoch": 0.39669995657837603, | |
"grad_norm": 0.953125, | |
"learning_rate": 0.0001886678418534374, | |
"loss": 1.9395, | |
"step": 571 | |
}, | |
{ | |
"epoch": 0.3973947025618758, | |
"grad_norm": 0.8515625, | |
"learning_rate": 0.00018861747670611938, | |
"loss": 1.9798, | |
"step": 572 | |
}, | |
{ | |
"epoch": 0.3980894485453756, | |
"grad_norm": 2.375, | |
"learning_rate": 0.0001885671115588013, | |
"loss": 1.9257, | |
"step": 573 | |
}, | |
{ | |
"epoch": 0.39878419452887537, | |
"grad_norm": 1.125, | |
"learning_rate": 0.00018851674641148328, | |
"loss": 1.7834, | |
"step": 574 | |
}, | |
{ | |
"epoch": 0.39947894051237515, | |
"grad_norm": 1.09375, | |
"learning_rate": 0.0001884663812641652, | |
"loss": 2.0443, | |
"step": 575 | |
}, | |
{ | |
"epoch": 0.4001736864958749, | |
"grad_norm": 1.109375, | |
"learning_rate": 0.00018841601611684715, | |
"loss": 1.6684, | |
"step": 576 | |
}, | |
{ | |
"epoch": 0.4008684324793747, | |
"grad_norm": 0.79296875, | |
"learning_rate": 0.00018836565096952908, | |
"loss": 1.6101, | |
"step": 577 | |
}, | |
{ | |
"epoch": 0.40156317846287454, | |
"grad_norm": 0.7265625, | |
"learning_rate": 0.00018831528582221102, | |
"loss": 1.5633, | |
"step": 578 | |
}, | |
{ | |
"epoch": 0.4022579244463743, | |
"grad_norm": 0.7890625, | |
"learning_rate": 0.00018826492067489298, | |
"loss": 1.9582, | |
"step": 579 | |
}, | |
{ | |
"epoch": 0.4029526704298741, | |
"grad_norm": 0.73828125, | |
"learning_rate": 0.00018821455552757492, | |
"loss": 2.1624, | |
"step": 580 | |
}, | |
{ | |
"epoch": 0.40364741641337387, | |
"grad_norm": 1.078125, | |
"learning_rate": 0.00018816419038025688, | |
"loss": 1.8846, | |
"step": 581 | |
}, | |
{ | |
"epoch": 0.40434216239687365, | |
"grad_norm": 1.1484375, | |
"learning_rate": 0.00018811382523293882, | |
"loss": 1.6138, | |
"step": 582 | |
}, | |
{ | |
"epoch": 0.40503690838037343, | |
"grad_norm": 0.93359375, | |
"learning_rate": 0.00018806346008562076, | |
"loss": 1.6372, | |
"step": 583 | |
}, | |
{ | |
"epoch": 0.4057316543638732, | |
"grad_norm": 0.66796875, | |
"learning_rate": 0.0001880130949383027, | |
"loss": 2.1048, | |
"step": 584 | |
}, | |
{ | |
"epoch": 0.406426400347373, | |
"grad_norm": 0.94921875, | |
"learning_rate": 0.00018796272979098463, | |
"loss": 2.3389, | |
"step": 585 | |
}, | |
{ | |
"epoch": 0.40712114633087276, | |
"grad_norm": 0.625, | |
"learning_rate": 0.0001879123646436666, | |
"loss": 2.0405, | |
"step": 586 | |
}, | |
{ | |
"epoch": 0.40781589231437254, | |
"grad_norm": 0.79296875, | |
"learning_rate": 0.00018786199949634853, | |
"loss": 1.8406, | |
"step": 587 | |
}, | |
{ | |
"epoch": 0.4085106382978723, | |
"grad_norm": 0.88671875, | |
"learning_rate": 0.0001878116343490305, | |
"loss": 2.2409, | |
"step": 588 | |
}, | |
{ | |
"epoch": 0.4092053842813721, | |
"grad_norm": 2.421875, | |
"learning_rate": 0.00018776126920171243, | |
"loss": 1.8545, | |
"step": 589 | |
}, | |
{ | |
"epoch": 0.40990013026487193, | |
"grad_norm": 0.765625, | |
"learning_rate": 0.00018771090405439437, | |
"loss": 1.5796, | |
"step": 590 | |
}, | |
{ | |
"epoch": 0.4105948762483717, | |
"grad_norm": 0.91015625, | |
"learning_rate": 0.0001876605389070763, | |
"loss": 1.3451, | |
"step": 591 | |
}, | |
{ | |
"epoch": 0.4112896222318715, | |
"grad_norm": 3.015625, | |
"learning_rate": 0.00018761017375975824, | |
"loss": 2.2266, | |
"step": 592 | |
}, | |
{ | |
"epoch": 0.41198436821537127, | |
"grad_norm": 0.66796875, | |
"learning_rate": 0.0001875598086124402, | |
"loss": 1.9017, | |
"step": 593 | |
}, | |
{ | |
"epoch": 0.41267911419887104, | |
"grad_norm": 0.96875, | |
"learning_rate": 0.00018750944346512214, | |
"loss": 1.6085, | |
"step": 594 | |
}, | |
{ | |
"epoch": 0.4133738601823708, | |
"grad_norm": 0.71484375, | |
"learning_rate": 0.0001874590783178041, | |
"loss": 2.0503, | |
"step": 595 | |
}, | |
{ | |
"epoch": 0.4140686061658706, | |
"grad_norm": 0.77734375, | |
"learning_rate": 0.00018740871317048604, | |
"loss": 1.9612, | |
"step": 596 | |
}, | |
{ | |
"epoch": 0.4147633521493704, | |
"grad_norm": 0.6796875, | |
"learning_rate": 0.00018735834802316797, | |
"loss": 1.6432, | |
"step": 597 | |
}, | |
{ | |
"epoch": 0.41545809813287016, | |
"grad_norm": 0.61328125, | |
"learning_rate": 0.0001873079828758499, | |
"loss": 1.855, | |
"step": 598 | |
}, | |
{ | |
"epoch": 0.41615284411636994, | |
"grad_norm": 1.2265625, | |
"learning_rate": 0.00018725761772853187, | |
"loss": 1.7901, | |
"step": 599 | |
}, | |
{ | |
"epoch": 0.4168475900998697, | |
"grad_norm": 0.828125, | |
"learning_rate": 0.0001872072525812138, | |
"loss": 1.6167, | |
"step": 600 | |
}, | |
{ | |
"epoch": 0.4175423360833695, | |
"grad_norm": 1.046875, | |
"learning_rate": 0.00018715688743389575, | |
"loss": 1.6097, | |
"step": 601 | |
}, | |
{ | |
"epoch": 0.4182370820668693, | |
"grad_norm": 0.8046875, | |
"learning_rate": 0.0001871065222865777, | |
"loss": 1.8758, | |
"step": 602 | |
}, | |
{ | |
"epoch": 0.4189318280503691, | |
"grad_norm": 0.84375, | |
"learning_rate": 0.00018705615713925965, | |
"loss": 1.8387, | |
"step": 603 | |
}, | |
{ | |
"epoch": 0.4196265740338689, | |
"grad_norm": 1.125, | |
"learning_rate": 0.00018700579199194158, | |
"loss": 1.5821, | |
"step": 604 | |
}, | |
{ | |
"epoch": 0.42032132001736866, | |
"grad_norm": 1.2421875, | |
"learning_rate": 0.00018695542684462352, | |
"loss": 1.8596, | |
"step": 605 | |
}, | |
{ | |
"epoch": 0.42101606600086844, | |
"grad_norm": 0.8828125, | |
"learning_rate": 0.00018690506169730548, | |
"loss": 1.6423, | |
"step": 606 | |
}, | |
{ | |
"epoch": 0.4217108119843682, | |
"grad_norm": 0.640625, | |
"learning_rate": 0.00018685469654998742, | |
"loss": 1.9369, | |
"step": 607 | |
}, | |
{ | |
"epoch": 0.422405557967868, | |
"grad_norm": 0.83984375, | |
"learning_rate": 0.00018680433140266935, | |
"loss": 1.7383, | |
"step": 608 | |
}, | |
{ | |
"epoch": 0.4231003039513678, | |
"grad_norm": 1.078125, | |
"learning_rate": 0.00018675396625535132, | |
"loss": 1.5791, | |
"step": 609 | |
}, | |
{ | |
"epoch": 0.42379504993486755, | |
"grad_norm": 1.0234375, | |
"learning_rate": 0.00018670360110803325, | |
"loss": 1.8138, | |
"step": 610 | |
}, | |
{ | |
"epoch": 0.42448979591836733, | |
"grad_norm": 1.328125, | |
"learning_rate": 0.0001866532359607152, | |
"loss": 1.8374, | |
"step": 611 | |
}, | |
{ | |
"epoch": 0.4251845419018671, | |
"grad_norm": 1.046875, | |
"learning_rate": 0.00018660287081339713, | |
"loss": 2.1108, | |
"step": 612 | |
}, | |
{ | |
"epoch": 0.4258792878853669, | |
"grad_norm": 1.0, | |
"learning_rate": 0.0001865525056660791, | |
"loss": 1.7101, | |
"step": 613 | |
}, | |
{ | |
"epoch": 0.4265740338688667, | |
"grad_norm": 0.7734375, | |
"learning_rate": 0.00018650214051876103, | |
"loss": 1.8065, | |
"step": 614 | |
}, | |
{ | |
"epoch": 0.4272687798523665, | |
"grad_norm": 0.75390625, | |
"learning_rate": 0.00018645177537144296, | |
"loss": 1.7606, | |
"step": 615 | |
}, | |
{ | |
"epoch": 0.4279635258358663, | |
"grad_norm": 0.953125, | |
"learning_rate": 0.00018640141022412493, | |
"loss": 1.9831, | |
"step": 616 | |
}, | |
{ | |
"epoch": 0.42865827181936605, | |
"grad_norm": 1.171875, | |
"learning_rate": 0.00018635104507680686, | |
"loss": 2.1788, | |
"step": 617 | |
}, | |
{ | |
"epoch": 0.42935301780286583, | |
"grad_norm": 1.0, | |
"learning_rate": 0.0001863006799294888, | |
"loss": 1.7741, | |
"step": 618 | |
}, | |
{ | |
"epoch": 0.4300477637863656, | |
"grad_norm": 0.55078125, | |
"learning_rate": 0.00018625031478217073, | |
"loss": 1.6123, | |
"step": 619 | |
}, | |
{ | |
"epoch": 0.4307425097698654, | |
"grad_norm": 1.171875, | |
"learning_rate": 0.0001861999496348527, | |
"loss": 2.0256, | |
"step": 620 | |
}, | |
{ | |
"epoch": 0.43143725575336517, | |
"grad_norm": 1.3671875, | |
"learning_rate": 0.00018614958448753463, | |
"loss": 2.1769, | |
"step": 621 | |
}, | |
{ | |
"epoch": 0.43213200173686495, | |
"grad_norm": 0.5859375, | |
"learning_rate": 0.00018609921934021657, | |
"loss": 1.765, | |
"step": 622 | |
}, | |
{ | |
"epoch": 0.4328267477203647, | |
"grad_norm": 0.75390625, | |
"learning_rate": 0.00018604885419289853, | |
"loss": 2.0414, | |
"step": 623 | |
}, | |
{ | |
"epoch": 0.4335214937038645, | |
"grad_norm": 0.9375, | |
"learning_rate": 0.00018599848904558047, | |
"loss": 1.823, | |
"step": 624 | |
}, | |
{ | |
"epoch": 0.4342162396873643, | |
"grad_norm": 0.64453125, | |
"learning_rate": 0.0001859481238982624, | |
"loss": 1.9846, | |
"step": 625 | |
}, | |
{ | |
"epoch": 0.4349109856708641, | |
"grad_norm": 0.69140625, | |
"learning_rate": 0.00018589775875094434, | |
"loss": 2.1152, | |
"step": 626 | |
}, | |
{ | |
"epoch": 0.4356057316543639, | |
"grad_norm": 0.91796875, | |
"learning_rate": 0.0001858473936036263, | |
"loss": 1.8232, | |
"step": 627 | |
}, | |
{ | |
"epoch": 0.43630047763786367, | |
"grad_norm": 0.96484375, | |
"learning_rate": 0.00018579702845630824, | |
"loss": 1.4993, | |
"step": 628 | |
}, | |
{ | |
"epoch": 0.43699522362136345, | |
"grad_norm": 0.91796875, | |
"learning_rate": 0.0001857466633089902, | |
"loss": 1.7799, | |
"step": 629 | |
}, | |
{ | |
"epoch": 0.4376899696048632, | |
"grad_norm": 0.75, | |
"learning_rate": 0.00018569629816167214, | |
"loss": 1.5612, | |
"step": 630 | |
}, | |
{ | |
"epoch": 0.438384715588363, | |
"grad_norm": 1.1015625, | |
"learning_rate": 0.00018564593301435408, | |
"loss": 2.114, | |
"step": 631 | |
}, | |
{ | |
"epoch": 0.4390794615718628, | |
"grad_norm": 0.74609375, | |
"learning_rate": 0.00018559556786703602, | |
"loss": 1.8577, | |
"step": 632 | |
}, | |
{ | |
"epoch": 0.43977420755536256, | |
"grad_norm": 1.1015625, | |
"learning_rate": 0.00018554520271971795, | |
"loss": 1.9846, | |
"step": 633 | |
}, | |
{ | |
"epoch": 0.44046895353886234, | |
"grad_norm": 1.7734375, | |
"learning_rate": 0.00018549483757239991, | |
"loss": 1.9394, | |
"step": 634 | |
}, | |
{ | |
"epoch": 0.4411636995223621, | |
"grad_norm": 0.71875, | |
"learning_rate": 0.00018544447242508185, | |
"loss": 1.6845, | |
"step": 635 | |
}, | |
{ | |
"epoch": 0.4418584455058619, | |
"grad_norm": 1.046875, | |
"learning_rate": 0.00018539410727776381, | |
"loss": 1.9656, | |
"step": 636 | |
}, | |
{ | |
"epoch": 0.4425531914893617, | |
"grad_norm": 0.61328125, | |
"learning_rate": 0.00018534374213044575, | |
"loss": 2.2499, | |
"step": 637 | |
}, | |
{ | |
"epoch": 0.4432479374728615, | |
"grad_norm": 1.0234375, | |
"learning_rate": 0.0001852933769831277, | |
"loss": 1.9494, | |
"step": 638 | |
}, | |
{ | |
"epoch": 0.4439426834563613, | |
"grad_norm": 1.1875, | |
"learning_rate": 0.00018524301183580962, | |
"loss": 1.2849, | |
"step": 639 | |
}, | |
{ | |
"epoch": 0.44463742943986106, | |
"grad_norm": 1.046875, | |
"learning_rate": 0.00018519264668849156, | |
"loss": 1.8238, | |
"step": 640 | |
}, | |
{ | |
"epoch": 0.44533217542336084, | |
"grad_norm": 0.68359375, | |
"learning_rate": 0.00018514228154117352, | |
"loss": 1.8664, | |
"step": 641 | |
}, | |
{ | |
"epoch": 0.4460269214068606, | |
"grad_norm": 0.6953125, | |
"learning_rate": 0.00018509191639385546, | |
"loss": 1.8457, | |
"step": 642 | |
}, | |
{ | |
"epoch": 0.4467216673903604, | |
"grad_norm": 1.1640625, | |
"learning_rate": 0.00018504155124653742, | |
"loss": 1.9668, | |
"step": 643 | |
}, | |
{ | |
"epoch": 0.4474164133738602, | |
"grad_norm": 0.79296875, | |
"learning_rate": 0.00018499118609921936, | |
"loss": 1.5451, | |
"step": 644 | |
}, | |
{ | |
"epoch": 0.44811115935735996, | |
"grad_norm": 1.09375, | |
"learning_rate": 0.0001849408209519013, | |
"loss": 2.2038, | |
"step": 645 | |
}, | |
{ | |
"epoch": 0.44880590534085973, | |
"grad_norm": 0.474609375, | |
"learning_rate": 0.00018489045580458323, | |
"loss": 1.7649, | |
"step": 646 | |
}, | |
{ | |
"epoch": 0.4495006513243595, | |
"grad_norm": 2.5625, | |
"learning_rate": 0.00018484009065726517, | |
"loss": 1.9661, | |
"step": 647 | |
}, | |
{ | |
"epoch": 0.4501953973078593, | |
"grad_norm": 0.75390625, | |
"learning_rate": 0.00018478972550994713, | |
"loss": 1.7701, | |
"step": 648 | |
}, | |
{ | |
"epoch": 0.4508901432913591, | |
"grad_norm": 0.6953125, | |
"learning_rate": 0.00018473936036262907, | |
"loss": 1.7554, | |
"step": 649 | |
}, | |
{ | |
"epoch": 0.4515848892748589, | |
"grad_norm": 0.5859375, | |
"learning_rate": 0.00018468899521531103, | |
"loss": 1.9016, | |
"step": 650 | |
}, | |
{ | |
"epoch": 0.4522796352583587, | |
"grad_norm": 0.51953125, | |
"learning_rate": 0.00018463863006799297, | |
"loss": 1.9223, | |
"step": 651 | |
}, | |
{ | |
"epoch": 0.45297438124185846, | |
"grad_norm": 1.09375, | |
"learning_rate": 0.0001845882649206749, | |
"loss": 1.7449, | |
"step": 652 | |
}, | |
{ | |
"epoch": 0.45366912722535824, | |
"grad_norm": 0.62890625, | |
"learning_rate": 0.00018453789977335684, | |
"loss": 1.9779, | |
"step": 653 | |
}, | |
{ | |
"epoch": 0.454363873208858, | |
"grad_norm": 0.8125, | |
"learning_rate": 0.00018448753462603878, | |
"loss": 1.7003, | |
"step": 654 | |
}, | |
{ | |
"epoch": 0.4550586191923578, | |
"grad_norm": 1.015625, | |
"learning_rate": 0.00018443716947872074, | |
"loss": 1.6883, | |
"step": 655 | |
}, | |
{ | |
"epoch": 0.45575336517585757, | |
"grad_norm": 0.97265625, | |
"learning_rate": 0.00018438680433140268, | |
"loss": 1.9557, | |
"step": 656 | |
}, | |
{ | |
"epoch": 0.45644811115935735, | |
"grad_norm": 2.578125, | |
"learning_rate": 0.00018433643918408464, | |
"loss": 2.0112, | |
"step": 657 | |
}, | |
{ | |
"epoch": 0.45714285714285713, | |
"grad_norm": 0.63671875, | |
"learning_rate": 0.00018428607403676658, | |
"loss": 2.0644, | |
"step": 658 | |
}, | |
{ | |
"epoch": 0.4578376031263569, | |
"grad_norm": 0.796875, | |
"learning_rate": 0.0001842357088894485, | |
"loss": 2.2675, | |
"step": 659 | |
}, | |
{ | |
"epoch": 0.4585323491098567, | |
"grad_norm": 0.7578125, | |
"learning_rate": 0.00018418534374213045, | |
"loss": 2.0472, | |
"step": 660 | |
}, | |
{ | |
"epoch": 0.4592270950933565, | |
"grad_norm": 0.703125, | |
"learning_rate": 0.00018413497859481238, | |
"loss": 1.7624, | |
"step": 661 | |
}, | |
{ | |
"epoch": 0.4599218410768563, | |
"grad_norm": 1.28125, | |
"learning_rate": 0.00018408461344749435, | |
"loss": 1.9001, | |
"step": 662 | |
}, | |
{ | |
"epoch": 0.4606165870603561, | |
"grad_norm": 0.5625, | |
"learning_rate": 0.00018403424830017628, | |
"loss": 2.1622, | |
"step": 663 | |
}, | |
{ | |
"epoch": 0.46131133304385585, | |
"grad_norm": 0.90625, | |
"learning_rate": 0.00018398388315285825, | |
"loss": 2.0592, | |
"step": 664 | |
}, | |
{ | |
"epoch": 0.46200607902735563, | |
"grad_norm": 0.92578125, | |
"learning_rate": 0.00018393351800554018, | |
"loss": 1.97, | |
"step": 665 | |
}, | |
{ | |
"epoch": 0.4627008250108554, | |
"grad_norm": 0.8203125, | |
"learning_rate": 0.00018388315285822212, | |
"loss": 1.7164, | |
"step": 666 | |
}, | |
{ | |
"epoch": 0.4633955709943552, | |
"grad_norm": 1.21875, | |
"learning_rate": 0.00018383278771090406, | |
"loss": 2.0458, | |
"step": 667 | |
}, | |
{ | |
"epoch": 0.46409031697785497, | |
"grad_norm": 0.90625, | |
"learning_rate": 0.000183782422563586, | |
"loss": 2.0149, | |
"step": 668 | |
}, | |
{ | |
"epoch": 0.46478506296135474, | |
"grad_norm": 0.9453125, | |
"learning_rate": 0.00018373205741626796, | |
"loss": 2.3638, | |
"step": 669 | |
}, | |
{ | |
"epoch": 0.4654798089448545, | |
"grad_norm": 1.1796875, | |
"learning_rate": 0.0001836816922689499, | |
"loss": 2.0574, | |
"step": 670 | |
}, | |
{ | |
"epoch": 0.4661745549283543, | |
"grad_norm": 0.87109375, | |
"learning_rate": 0.00018363132712163186, | |
"loss": 1.7857, | |
"step": 671 | |
}, | |
{ | |
"epoch": 0.4668693009118541, | |
"grad_norm": 0.734375, | |
"learning_rate": 0.0001835809619743138, | |
"loss": 1.8794, | |
"step": 672 | |
}, | |
{ | |
"epoch": 0.4675640468953539, | |
"grad_norm": 0.625, | |
"learning_rate": 0.00018353059682699573, | |
"loss": 1.7081, | |
"step": 673 | |
}, | |
{ | |
"epoch": 0.4682587928788537, | |
"grad_norm": 0.765625, | |
"learning_rate": 0.00018348023167967767, | |
"loss": 1.5881, | |
"step": 674 | |
}, | |
{ | |
"epoch": 0.46895353886235347, | |
"grad_norm": 1.0546875, | |
"learning_rate": 0.0001834298665323596, | |
"loss": 2.0648, | |
"step": 675 | |
}, | |
{ | |
"epoch": 0.46964828484585325, | |
"grad_norm": 0.93359375, | |
"learning_rate": 0.00018337950138504156, | |
"loss": 1.9419, | |
"step": 676 | |
}, | |
{ | |
"epoch": 0.470343030829353, | |
"grad_norm": 0.83984375, | |
"learning_rate": 0.0001833291362377235, | |
"loss": 2.1462, | |
"step": 677 | |
}, | |
{ | |
"epoch": 0.4710377768128528, | |
"grad_norm": 0.88671875, | |
"learning_rate": 0.00018327877109040546, | |
"loss": 2.0603, | |
"step": 678 | |
}, | |
{ | |
"epoch": 0.4717325227963526, | |
"grad_norm": 1.296875, | |
"learning_rate": 0.0001832284059430874, | |
"loss": 1.9721, | |
"step": 679 | |
}, | |
{ | |
"epoch": 0.47242726877985236, | |
"grad_norm": 0.859375, | |
"learning_rate": 0.00018317804079576934, | |
"loss": 1.5208, | |
"step": 680 | |
}, | |
{ | |
"epoch": 0.47312201476335214, | |
"grad_norm": 1.125, | |
"learning_rate": 0.00018312767564845127, | |
"loss": 2.3462, | |
"step": 681 | |
}, | |
{ | |
"epoch": 0.4738167607468519, | |
"grad_norm": 0.7890625, | |
"learning_rate": 0.0001830773105011332, | |
"loss": 1.8435, | |
"step": 682 | |
}, | |
{ | |
"epoch": 0.4745115067303517, | |
"grad_norm": 1.0546875, | |
"learning_rate": 0.00018302694535381517, | |
"loss": 2.2692, | |
"step": 683 | |
}, | |
{ | |
"epoch": 0.4752062527138515, | |
"grad_norm": 0.91015625, | |
"learning_rate": 0.0001829765802064971, | |
"loss": 2.1077, | |
"step": 684 | |
}, | |
{ | |
"epoch": 0.4759009986973513, | |
"grad_norm": 0.796875, | |
"learning_rate": 0.00018292621505917907, | |
"loss": 1.9386, | |
"step": 685 | |
}, | |
{ | |
"epoch": 0.4765957446808511, | |
"grad_norm": 0.78125, | |
"learning_rate": 0.000182875849911861, | |
"loss": 1.7375, | |
"step": 686 | |
}, | |
{ | |
"epoch": 0.47729049066435086, | |
"grad_norm": 0.875, | |
"learning_rate": 0.00018282548476454295, | |
"loss": 1.8864, | |
"step": 687 | |
}, | |
{ | |
"epoch": 0.47798523664785064, | |
"grad_norm": 0.7734375, | |
"learning_rate": 0.00018277511961722488, | |
"loss": 2.2137, | |
"step": 688 | |
}, | |
{ | |
"epoch": 0.4786799826313504, | |
"grad_norm": 1.140625, | |
"learning_rate": 0.00018272475446990682, | |
"loss": 2.0135, | |
"step": 689 | |
}, | |
{ | |
"epoch": 0.4793747286148502, | |
"grad_norm": 0.66015625, | |
"learning_rate": 0.00018267438932258878, | |
"loss": 1.9841, | |
"step": 690 | |
}, | |
{ | |
"epoch": 0.48006947459835, | |
"grad_norm": 0.984375, | |
"learning_rate": 0.00018262402417527072, | |
"loss": 1.8652, | |
"step": 691 | |
}, | |
{ | |
"epoch": 0.48076422058184975, | |
"grad_norm": 0.94921875, | |
"learning_rate": 0.00018257365902795268, | |
"loss": 1.533, | |
"step": 692 | |
}, | |
{ | |
"epoch": 0.48145896656534953, | |
"grad_norm": 0.97265625, | |
"learning_rate": 0.00018252329388063462, | |
"loss": 1.7846, | |
"step": 693 | |
}, | |
{ | |
"epoch": 0.4821537125488493, | |
"grad_norm": 0.890625, | |
"learning_rate": 0.00018247292873331655, | |
"loss": 1.8461, | |
"step": 694 | |
}, | |
{ | |
"epoch": 0.4828484585323491, | |
"grad_norm": 0.953125, | |
"learning_rate": 0.0001824225635859985, | |
"loss": 1.8622, | |
"step": 695 | |
}, | |
{ | |
"epoch": 0.48354320451584887, | |
"grad_norm": 1.125, | |
"learning_rate": 0.00018237219843868045, | |
"loss": 2.1919, | |
"step": 696 | |
}, | |
{ | |
"epoch": 0.4842379504993487, | |
"grad_norm": 0.94921875, | |
"learning_rate": 0.0001823218332913624, | |
"loss": 1.9689, | |
"step": 697 | |
}, | |
{ | |
"epoch": 0.4849326964828485, | |
"grad_norm": 0.953125, | |
"learning_rate": 0.00018227146814404433, | |
"loss": 1.5199, | |
"step": 698 | |
}, | |
{ | |
"epoch": 0.48562744246634826, | |
"grad_norm": 1.484375, | |
"learning_rate": 0.0001822211029967263, | |
"loss": 2.2635, | |
"step": 699 | |
}, | |
{ | |
"epoch": 0.48632218844984804, | |
"grad_norm": 0.98828125, | |
"learning_rate": 0.00018217073784940823, | |
"loss": 2.2775, | |
"step": 700 | |
}, | |
{ | |
"epoch": 0.4870169344333478, | |
"grad_norm": 0.62109375, | |
"learning_rate": 0.00018212037270209016, | |
"loss": 1.9399, | |
"step": 701 | |
}, | |
{ | |
"epoch": 0.4877116804168476, | |
"grad_norm": 2.9375, | |
"learning_rate": 0.0001820700075547721, | |
"loss": 2.0305, | |
"step": 702 | |
}, | |
{ | |
"epoch": 0.48840642640034737, | |
"grad_norm": 1.0078125, | |
"learning_rate": 0.00018201964240745406, | |
"loss": 2.098, | |
"step": 703 | |
}, | |
{ | |
"epoch": 0.48910117238384715, | |
"grad_norm": 1.2578125, | |
"learning_rate": 0.000181969277260136, | |
"loss": 2.0633, | |
"step": 704 | |
}, | |
{ | |
"epoch": 0.4897959183673469, | |
"grad_norm": 3.0625, | |
"learning_rate": 0.00018191891211281793, | |
"loss": 1.8335, | |
"step": 705 | |
}, | |
{ | |
"epoch": 0.4904906643508467, | |
"grad_norm": 0.8515625, | |
"learning_rate": 0.0001818685469654999, | |
"loss": 1.4018, | |
"step": 706 | |
}, | |
{ | |
"epoch": 0.4911854103343465, | |
"grad_norm": 0.69921875, | |
"learning_rate": 0.00018181818181818183, | |
"loss": 1.5096, | |
"step": 707 | |
}, | |
{ | |
"epoch": 0.49188015631784626, | |
"grad_norm": 0.62890625, | |
"learning_rate": 0.00018176781667086377, | |
"loss": 2.0383, | |
"step": 708 | |
}, | |
{ | |
"epoch": 0.4925749023013461, | |
"grad_norm": 0.8515625, | |
"learning_rate": 0.0001817174515235457, | |
"loss": 2.0676, | |
"step": 709 | |
}, | |
{ | |
"epoch": 0.4932696482848459, | |
"grad_norm": 0.9765625, | |
"learning_rate": 0.00018166708637622767, | |
"loss": 2.1933, | |
"step": 710 | |
}, | |
{ | |
"epoch": 0.49396439426834565, | |
"grad_norm": 0.6953125, | |
"learning_rate": 0.0001816167212289096, | |
"loss": 1.7498, | |
"step": 711 | |
}, | |
{ | |
"epoch": 0.49465914025184543, | |
"grad_norm": 0.8125, | |
"learning_rate": 0.00018156635608159154, | |
"loss": 1.9815, | |
"step": 712 | |
}, | |
{ | |
"epoch": 0.4953538862353452, | |
"grad_norm": 0.90625, | |
"learning_rate": 0.0001815159909342735, | |
"loss": 2.2162, | |
"step": 713 | |
}, | |
{ | |
"epoch": 0.496048632218845, | |
"grad_norm": 1.34375, | |
"learning_rate": 0.00018146562578695542, | |
"loss": 1.8575, | |
"step": 714 | |
}, | |
{ | |
"epoch": 0.49674337820234477, | |
"grad_norm": 0.78125, | |
"learning_rate": 0.00018141526063963738, | |
"loss": 1.8807, | |
"step": 715 | |
}, | |
{ | |
"epoch": 0.49743812418584454, | |
"grad_norm": 0.58203125, | |
"learning_rate": 0.00018136489549231932, | |
"loss": 1.1918, | |
"step": 716 | |
}, | |
{ | |
"epoch": 0.4981328701693443, | |
"grad_norm": 1.03125, | |
"learning_rate": 0.00018131453034500128, | |
"loss": 2.1739, | |
"step": 717 | |
}, | |
{ | |
"epoch": 0.4988276161528441, | |
"grad_norm": 0.6796875, | |
"learning_rate": 0.00018126416519768321, | |
"loss": 1.9931, | |
"step": 718 | |
}, | |
{ | |
"epoch": 0.4995223621363439, | |
"grad_norm": 0.99609375, | |
"learning_rate": 0.00018121380005036515, | |
"loss": 1.8006, | |
"step": 719 | |
}, | |
{ | |
"epoch": 0.5002171081198437, | |
"grad_norm": 0.7109375, | |
"learning_rate": 0.00018116343490304711, | |
"loss": 1.9731, | |
"step": 720 | |
}, | |
{ | |
"epoch": 0.5009118541033435, | |
"grad_norm": 1.0390625, | |
"learning_rate": 0.00018111306975572902, | |
"loss": 1.9277, | |
"step": 721 | |
}, | |
{ | |
"epoch": 0.5016066000868432, | |
"grad_norm": 1.8515625, | |
"learning_rate": 0.000181062704608411, | |
"loss": 1.8397, | |
"step": 722 | |
}, | |
{ | |
"epoch": 0.502301346070343, | |
"grad_norm": 0.90625, | |
"learning_rate": 0.00018101233946109292, | |
"loss": 1.6404, | |
"step": 723 | |
}, | |
{ | |
"epoch": 0.5029960920538428, | |
"grad_norm": 0.69921875, | |
"learning_rate": 0.0001809619743137749, | |
"loss": 1.6856, | |
"step": 724 | |
}, | |
{ | |
"epoch": 0.5036908380373426, | |
"grad_norm": 1.1171875, | |
"learning_rate": 0.00018091160916645682, | |
"loss": 1.8246, | |
"step": 725 | |
}, | |
{ | |
"epoch": 0.5043855840208423, | |
"grad_norm": 0.68359375, | |
"learning_rate": 0.00018086124401913876, | |
"loss": 1.9523, | |
"step": 726 | |
}, | |
{ | |
"epoch": 0.5050803300043422, | |
"grad_norm": 0.81640625, | |
"learning_rate": 0.00018081087887182072, | |
"loss": 1.8332, | |
"step": 727 | |
}, | |
{ | |
"epoch": 0.505775075987842, | |
"grad_norm": 0.9296875, | |
"learning_rate": 0.00018076051372450263, | |
"loss": 2.008, | |
"step": 728 | |
}, | |
{ | |
"epoch": 0.5064698219713417, | |
"grad_norm": 1.0078125, | |
"learning_rate": 0.0001807101485771846, | |
"loss": 2.0791, | |
"step": 729 | |
}, | |
{ | |
"epoch": 0.5071645679548415, | |
"grad_norm": 0.890625, | |
"learning_rate": 0.00018065978342986653, | |
"loss": 2.2381, | |
"step": 730 | |
}, | |
{ | |
"epoch": 0.5078593139383413, | |
"grad_norm": 0.7109375, | |
"learning_rate": 0.0001806094182825485, | |
"loss": 1.6686, | |
"step": 731 | |
}, | |
{ | |
"epoch": 0.5085540599218411, | |
"grad_norm": 1.0859375, | |
"learning_rate": 0.00018055905313523043, | |
"loss": 1.9747, | |
"step": 732 | |
}, | |
{ | |
"epoch": 0.5092488059053408, | |
"grad_norm": 1.03125, | |
"learning_rate": 0.0001805086879879124, | |
"loss": 1.8613, | |
"step": 733 | |
}, | |
{ | |
"epoch": 0.5099435518888407, | |
"grad_norm": 1.1953125, | |
"learning_rate": 0.00018045832284059433, | |
"loss": 1.6721, | |
"step": 734 | |
}, | |
{ | |
"epoch": 0.5106382978723404, | |
"grad_norm": 0.7265625, | |
"learning_rate": 0.00018040795769327624, | |
"loss": 1.9698, | |
"step": 735 | |
}, | |
{ | |
"epoch": 0.5113330438558402, | |
"grad_norm": 1.703125, | |
"learning_rate": 0.0001803575925459582, | |
"loss": 1.9346, | |
"step": 736 | |
}, | |
{ | |
"epoch": 0.5120277898393399, | |
"grad_norm": 1.7421875, | |
"learning_rate": 0.00018030722739864014, | |
"loss": 1.6338, | |
"step": 737 | |
}, | |
{ | |
"epoch": 0.5127225358228398, | |
"grad_norm": 0.828125, | |
"learning_rate": 0.0001802568622513221, | |
"loss": 1.7765, | |
"step": 738 | |
}, | |
{ | |
"epoch": 0.5134172818063396, | |
"grad_norm": 0.9765625, | |
"learning_rate": 0.00018020649710400404, | |
"loss": 1.6058, | |
"step": 739 | |
}, | |
{ | |
"epoch": 0.5141120277898393, | |
"grad_norm": 0.80078125, | |
"learning_rate": 0.000180156131956686, | |
"loss": 1.7684, | |
"step": 740 | |
}, | |
{ | |
"epoch": 0.5148067737733392, | |
"grad_norm": 1.890625, | |
"learning_rate": 0.00018010576680936794, | |
"loss": 1.7943, | |
"step": 741 | |
}, | |
{ | |
"epoch": 0.5155015197568389, | |
"grad_norm": 1.0625, | |
"learning_rate": 0.00018005540166204985, | |
"loss": 2.029, | |
"step": 742 | |
}, | |
{ | |
"epoch": 0.5161962657403387, | |
"grad_norm": 13.1875, | |
"learning_rate": 0.0001800050365147318, | |
"loss": 2.0415, | |
"step": 743 | |
}, | |
{ | |
"epoch": 0.5168910117238384, | |
"grad_norm": 1.0703125, | |
"learning_rate": 0.00017995467136741375, | |
"loss": 2.0677, | |
"step": 744 | |
}, | |
{ | |
"epoch": 0.5175857577073383, | |
"grad_norm": 0.9609375, | |
"learning_rate": 0.0001799043062200957, | |
"loss": 1.7026, | |
"step": 745 | |
}, | |
{ | |
"epoch": 0.518280503690838, | |
"grad_norm": 1.21875, | |
"learning_rate": 0.00017985394107277765, | |
"loss": 1.7546, | |
"step": 746 | |
}, | |
{ | |
"epoch": 0.5189752496743378, | |
"grad_norm": 0.75, | |
"learning_rate": 0.0001798035759254596, | |
"loss": 1.7259, | |
"step": 747 | |
}, | |
{ | |
"epoch": 0.5196699956578376, | |
"grad_norm": 1.1015625, | |
"learning_rate": 0.00017975321077814152, | |
"loss": 1.8416, | |
"step": 748 | |
}, | |
{ | |
"epoch": 0.5203647416413374, | |
"grad_norm": 1.0703125, | |
"learning_rate": 0.00017970284563082346, | |
"loss": 2.3549, | |
"step": 749 | |
}, | |
{ | |
"epoch": 0.5210594876248371, | |
"grad_norm": 1.140625, | |
"learning_rate": 0.00017965248048350542, | |
"loss": 1.573, | |
"step": 750 | |
}, | |
{ | |
"epoch": 0.521754233608337, | |
"grad_norm": 0.73828125, | |
"learning_rate": 0.00017960211533618736, | |
"loss": 1.5468, | |
"step": 751 | |
}, | |
{ | |
"epoch": 0.5224489795918368, | |
"grad_norm": 0.91796875, | |
"learning_rate": 0.00017955175018886932, | |
"loss": 1.8732, | |
"step": 752 | |
}, | |
{ | |
"epoch": 0.5231437255753365, | |
"grad_norm": 0.73046875, | |
"learning_rate": 0.00017950138504155126, | |
"loss": 1.8024, | |
"step": 753 | |
}, | |
{ | |
"epoch": 0.5238384715588363, | |
"grad_norm": 2.015625, | |
"learning_rate": 0.00017945101989423322, | |
"loss": 1.8361, | |
"step": 754 | |
}, | |
{ | |
"epoch": 0.5245332175423361, | |
"grad_norm": 36.75, | |
"learning_rate": 0.00017940065474691513, | |
"loss": 2.3295, | |
"step": 755 | |
}, | |
{ | |
"epoch": 0.5252279635258359, | |
"grad_norm": 2.25, | |
"learning_rate": 0.00017935028959959707, | |
"loss": 2.1014, | |
"step": 756 | |
}, | |
{ | |
"epoch": 0.5259227095093356, | |
"grad_norm": 0.6484375, | |
"learning_rate": 0.00017929992445227903, | |
"loss": 1.6904, | |
"step": 757 | |
}, | |
{ | |
"epoch": 0.5266174554928355, | |
"grad_norm": 3.25, | |
"learning_rate": 0.00017924955930496097, | |
"loss": 1.8936, | |
"step": 758 | |
}, | |
{ | |
"epoch": 0.5273122014763352, | |
"grad_norm": 0.83203125, | |
"learning_rate": 0.00017919919415764293, | |
"loss": 2.0644, | |
"step": 759 | |
}, | |
{ | |
"epoch": 0.528006947459835, | |
"grad_norm": 1.1484375, | |
"learning_rate": 0.00017914882901032486, | |
"loss": 1.9939, | |
"step": 760 | |
}, | |
{ | |
"epoch": 0.5287016934433347, | |
"grad_norm": 0.86328125, | |
"learning_rate": 0.00017909846386300683, | |
"loss": 1.992, | |
"step": 761 | |
}, | |
{ | |
"epoch": 0.5293964394268346, | |
"grad_norm": 2.0625, | |
"learning_rate": 0.00017904809871568874, | |
"loss": 2.1399, | |
"step": 762 | |
}, | |
{ | |
"epoch": 0.5300911854103344, | |
"grad_norm": 1.09375, | |
"learning_rate": 0.0001789977335683707, | |
"loss": 1.4181, | |
"step": 763 | |
}, | |
{ | |
"epoch": 0.5307859313938341, | |
"grad_norm": 1.109375, | |
"learning_rate": 0.00017894736842105264, | |
"loss": 1.8635, | |
"step": 764 | |
}, | |
{ | |
"epoch": 0.531480677377334, | |
"grad_norm": 1.0390625, | |
"learning_rate": 0.00017889700327373457, | |
"loss": 1.6296, | |
"step": 765 | |
}, | |
{ | |
"epoch": 0.5321754233608337, | |
"grad_norm": 1.0, | |
"learning_rate": 0.00017884663812641654, | |
"loss": 2.0959, | |
"step": 766 | |
}, | |
{ | |
"epoch": 0.5328701693443335, | |
"grad_norm": 0.91796875, | |
"learning_rate": 0.00017879627297909847, | |
"loss": 1.6697, | |
"step": 767 | |
}, | |
{ | |
"epoch": 0.5335649153278332, | |
"grad_norm": 1.0703125, | |
"learning_rate": 0.00017874590783178044, | |
"loss": 2.4245, | |
"step": 768 | |
}, | |
{ | |
"epoch": 0.5342596613113331, | |
"grad_norm": 1.0859375, | |
"learning_rate": 0.00017869554268446235, | |
"loss": 1.5763, | |
"step": 769 | |
}, | |
{ | |
"epoch": 0.5349544072948328, | |
"grad_norm": 1.125, | |
"learning_rate": 0.0001786451775371443, | |
"loss": 1.9295, | |
"step": 770 | |
}, | |
{ | |
"epoch": 0.5356491532783326, | |
"grad_norm": 1.1171875, | |
"learning_rate": 0.00017859481238982625, | |
"loss": 1.3931, | |
"step": 771 | |
}, | |
{ | |
"epoch": 0.5363438992618323, | |
"grad_norm": 0.98046875, | |
"learning_rate": 0.00017854444724250818, | |
"loss": 2.1037, | |
"step": 772 | |
}, | |
{ | |
"epoch": 0.5370386452453322, | |
"grad_norm": 0.74609375, | |
"learning_rate": 0.00017849408209519015, | |
"loss": 1.7615, | |
"step": 773 | |
}, | |
{ | |
"epoch": 0.5377333912288319, | |
"grad_norm": 0.91015625, | |
"learning_rate": 0.00017844371694787208, | |
"loss": 1.7058, | |
"step": 774 | |
}, | |
{ | |
"epoch": 0.5384281372123317, | |
"grad_norm": 1.109375, | |
"learning_rate": 0.00017839335180055405, | |
"loss": 1.9699, | |
"step": 775 | |
}, | |
{ | |
"epoch": 0.5391228831958316, | |
"grad_norm": 0.69921875, | |
"learning_rate": 0.00017834298665323595, | |
"loss": 1.9709, | |
"step": 776 | |
}, | |
{ | |
"epoch": 0.5398176291793313, | |
"grad_norm": 0.88671875, | |
"learning_rate": 0.00017829262150591792, | |
"loss": 1.9188, | |
"step": 777 | |
}, | |
{ | |
"epoch": 0.5405123751628311, | |
"grad_norm": 0.6796875, | |
"learning_rate": 0.00017824225635859985, | |
"loss": 1.658, | |
"step": 778 | |
}, | |
{ | |
"epoch": 0.5412071211463308, | |
"grad_norm": 1.046875, | |
"learning_rate": 0.0001781918912112818, | |
"loss": 1.9932, | |
"step": 779 | |
}, | |
{ | |
"epoch": 0.5419018671298307, | |
"grad_norm": 1.3359375, | |
"learning_rate": 0.00017814152606396375, | |
"loss": 1.9009, | |
"step": 780 | |
}, | |
{ | |
"epoch": 0.5425966131133304, | |
"grad_norm": 0.8046875, | |
"learning_rate": 0.0001780911609166457, | |
"loss": 1.734, | |
"step": 781 | |
}, | |
{ | |
"epoch": 0.5432913590968302, | |
"grad_norm": 1.40625, | |
"learning_rate": 0.00017804079576932763, | |
"loss": 2.1049, | |
"step": 782 | |
}, | |
{ | |
"epoch": 0.54398610508033, | |
"grad_norm": 0.8203125, | |
"learning_rate": 0.00017799043062200956, | |
"loss": 1.6874, | |
"step": 783 | |
}, | |
{ | |
"epoch": 0.5446808510638298, | |
"grad_norm": 0.73046875, | |
"learning_rate": 0.00017794006547469153, | |
"loss": 1.9051, | |
"step": 784 | |
}, | |
{ | |
"epoch": 0.5453755970473295, | |
"grad_norm": 1.25, | |
"learning_rate": 0.00017788970032737346, | |
"loss": 1.8446, | |
"step": 785 | |
}, | |
{ | |
"epoch": 0.5460703430308294, | |
"grad_norm": 0.71484375, | |
"learning_rate": 0.0001778393351800554, | |
"loss": 1.9275, | |
"step": 786 | |
}, | |
{ | |
"epoch": 0.5467650890143292, | |
"grad_norm": 1.0, | |
"learning_rate": 0.00017778897003273736, | |
"loss": 2.1717, | |
"step": 787 | |
}, | |
{ | |
"epoch": 0.5474598349978289, | |
"grad_norm": 0.84375, | |
"learning_rate": 0.0001777386048854193, | |
"loss": 2.5151, | |
"step": 788 | |
}, | |
{ | |
"epoch": 0.5481545809813287, | |
"grad_norm": 1.1015625, | |
"learning_rate": 0.00017768823973810123, | |
"loss": 1.7945, | |
"step": 789 | |
}, | |
{ | |
"epoch": 0.5488493269648285, | |
"grad_norm": 0.98828125, | |
"learning_rate": 0.00017763787459078317, | |
"loss": 1.9144, | |
"step": 790 | |
}, | |
{ | |
"epoch": 0.5495440729483283, | |
"grad_norm": 1.59375, | |
"learning_rate": 0.00017758750944346513, | |
"loss": 1.7331, | |
"step": 791 | |
}, | |
{ | |
"epoch": 0.550238818931828, | |
"grad_norm": 0.62109375, | |
"learning_rate": 0.00017753714429614707, | |
"loss": 1.8174, | |
"step": 792 | |
}, | |
{ | |
"epoch": 0.5509335649153279, | |
"grad_norm": 0.72265625, | |
"learning_rate": 0.00017748677914882903, | |
"loss": 2.1623, | |
"step": 793 | |
}, | |
{ | |
"epoch": 0.5516283108988276, | |
"grad_norm": 1.015625, | |
"learning_rate": 0.00017743641400151097, | |
"loss": 2.0982, | |
"step": 794 | |
}, | |
{ | |
"epoch": 0.5523230568823274, | |
"grad_norm": 0.62890625, | |
"learning_rate": 0.0001773860488541929, | |
"loss": 1.9725, | |
"step": 795 | |
}, | |
{ | |
"epoch": 0.5530178028658271, | |
"grad_norm": 1.1640625, | |
"learning_rate": 0.00017733568370687484, | |
"loss": 1.9904, | |
"step": 796 | |
}, | |
{ | |
"epoch": 0.553712548849327, | |
"grad_norm": 1.046875, | |
"learning_rate": 0.00017728531855955678, | |
"loss": 2.066, | |
"step": 797 | |
}, | |
{ | |
"epoch": 0.5544072948328268, | |
"grad_norm": 0.73046875, | |
"learning_rate": 0.00017723495341223874, | |
"loss": 1.9711, | |
"step": 798 | |
}, | |
{ | |
"epoch": 0.5551020408163265, | |
"grad_norm": 0.59375, | |
"learning_rate": 0.00017718458826492068, | |
"loss": 1.64, | |
"step": 799 | |
}, | |
{ | |
"epoch": 0.5557967867998264, | |
"grad_norm": 0.88671875, | |
"learning_rate": 0.00017713422311760264, | |
"loss": 1.3968, | |
"step": 800 | |
}, | |
{ | |
"epoch": 0.5564915327833261, | |
"grad_norm": 1.203125, | |
"learning_rate": 0.00017708385797028458, | |
"loss": 2.1326, | |
"step": 801 | |
}, | |
{ | |
"epoch": 0.5571862787668259, | |
"grad_norm": 0.5, | |
"learning_rate": 0.00017703349282296652, | |
"loss": 1.8806, | |
"step": 802 | |
}, | |
{ | |
"epoch": 0.5578810247503256, | |
"grad_norm": 1.015625, | |
"learning_rate": 0.00017698312767564845, | |
"loss": 2.022, | |
"step": 803 | |
}, | |
{ | |
"epoch": 0.5585757707338255, | |
"grad_norm": 1.1171875, | |
"learning_rate": 0.0001769327625283304, | |
"loss": 1.79, | |
"step": 804 | |
}, | |
{ | |
"epoch": 0.5592705167173252, | |
"grad_norm": 0.8984375, | |
"learning_rate": 0.00017688239738101235, | |
"loss": 2.2328, | |
"step": 805 | |
}, | |
{ | |
"epoch": 0.559965262700825, | |
"grad_norm": 0.76953125, | |
"learning_rate": 0.0001768320322336943, | |
"loss": 2.0766, | |
"step": 806 | |
}, | |
{ | |
"epoch": 0.5606600086843248, | |
"grad_norm": 1.5234375, | |
"learning_rate": 0.00017678166708637625, | |
"loss": 2.4812, | |
"step": 807 | |
}, | |
{ | |
"epoch": 0.5613547546678246, | |
"grad_norm": 1.03125, | |
"learning_rate": 0.0001767313019390582, | |
"loss": 1.9864, | |
"step": 808 | |
}, | |
{ | |
"epoch": 0.5620495006513243, | |
"grad_norm": 0.82421875, | |
"learning_rate": 0.00017668093679174012, | |
"loss": 2.1901, | |
"step": 809 | |
}, | |
{ | |
"epoch": 0.5627442466348241, | |
"grad_norm": 1.2109375, | |
"learning_rate": 0.00017663057164442206, | |
"loss": 1.9752, | |
"step": 810 | |
}, | |
{ | |
"epoch": 0.563438992618324, | |
"grad_norm": 0.89453125, | |
"learning_rate": 0.000176580206497104, | |
"loss": 1.5796, | |
"step": 811 | |
}, | |
{ | |
"epoch": 0.5641337386018237, | |
"grad_norm": 0.76171875, | |
"learning_rate": 0.00017652984134978596, | |
"loss": 2.244, | |
"step": 812 | |
}, | |
{ | |
"epoch": 0.5648284845853235, | |
"grad_norm": 0.89453125, | |
"learning_rate": 0.0001764794762024679, | |
"loss": 1.7504, | |
"step": 813 | |
}, | |
{ | |
"epoch": 0.5655232305688233, | |
"grad_norm": 1.015625, | |
"learning_rate": 0.00017642911105514986, | |
"loss": 2.24, | |
"step": 814 | |
}, | |
{ | |
"epoch": 0.5662179765523231, | |
"grad_norm": 0.65234375, | |
"learning_rate": 0.0001763787459078318, | |
"loss": 2.1324, | |
"step": 815 | |
}, | |
{ | |
"epoch": 0.5669127225358228, | |
"grad_norm": 1.0078125, | |
"learning_rate": 0.00017632838076051373, | |
"loss": 1.9129, | |
"step": 816 | |
}, | |
{ | |
"epoch": 0.5676074685193226, | |
"grad_norm": 0.81640625, | |
"learning_rate": 0.00017627801561319567, | |
"loss": 1.8645, | |
"step": 817 | |
}, | |
{ | |
"epoch": 0.5683022145028224, | |
"grad_norm": 0.80078125, | |
"learning_rate": 0.0001762276504658776, | |
"loss": 1.8864, | |
"step": 818 | |
}, | |
{ | |
"epoch": 0.5689969604863222, | |
"grad_norm": 0.82421875, | |
"learning_rate": 0.00017617728531855957, | |
"loss": 1.9661, | |
"step": 819 | |
}, | |
{ | |
"epoch": 0.5696917064698219, | |
"grad_norm": 0.9453125, | |
"learning_rate": 0.0001761269201712415, | |
"loss": 2.0306, | |
"step": 820 | |
}, | |
{ | |
"epoch": 0.5703864524533218, | |
"grad_norm": 0.83984375, | |
"learning_rate": 0.00017607655502392347, | |
"loss": 1.8562, | |
"step": 821 | |
}, | |
{ | |
"epoch": 0.5710811984368216, | |
"grad_norm": 1.2265625, | |
"learning_rate": 0.0001760261898766054, | |
"loss": 2.1271, | |
"step": 822 | |
}, | |
{ | |
"epoch": 0.5717759444203213, | |
"grad_norm": 1.03125, | |
"learning_rate": 0.00017597582472928734, | |
"loss": 2.0361, | |
"step": 823 | |
}, | |
{ | |
"epoch": 0.5724706904038211, | |
"grad_norm": 0.8359375, | |
"learning_rate": 0.00017592545958196928, | |
"loss": 1.5519, | |
"step": 824 | |
}, | |
{ | |
"epoch": 0.5731654363873209, | |
"grad_norm": 0.7734375, | |
"learning_rate": 0.0001758750944346512, | |
"loss": 2.0971, | |
"step": 825 | |
}, | |
{ | |
"epoch": 0.5738601823708207, | |
"grad_norm": 1.46875, | |
"learning_rate": 0.00017582472928733318, | |
"loss": 1.9318, | |
"step": 826 | |
}, | |
{ | |
"epoch": 0.5745549283543204, | |
"grad_norm": 0.8046875, | |
"learning_rate": 0.0001757743641400151, | |
"loss": 2.0558, | |
"step": 827 | |
}, | |
{ | |
"epoch": 0.5752496743378203, | |
"grad_norm": 0.91796875, | |
"learning_rate": 0.00017572399899269708, | |
"loss": 1.7626, | |
"step": 828 | |
}, | |
{ | |
"epoch": 0.57594442032132, | |
"grad_norm": 1.125, | |
"learning_rate": 0.000175673633845379, | |
"loss": 2.0998, | |
"step": 829 | |
}, | |
{ | |
"epoch": 0.5766391663048198, | |
"grad_norm": 0.97265625, | |
"learning_rate": 0.00017562326869806095, | |
"loss": 1.6269, | |
"step": 830 | |
}, | |
{ | |
"epoch": 0.5773339122883195, | |
"grad_norm": 1.3671875, | |
"learning_rate": 0.00017557290355074288, | |
"loss": 2.3553, | |
"step": 831 | |
}, | |
{ | |
"epoch": 0.5780286582718194, | |
"grad_norm": 0.83203125, | |
"learning_rate": 0.00017552253840342482, | |
"loss": 1.9309, | |
"step": 832 | |
}, | |
{ | |
"epoch": 0.5787234042553191, | |
"grad_norm": 0.55859375, | |
"learning_rate": 0.00017547217325610678, | |
"loss": 1.7086, | |
"step": 833 | |
}, | |
{ | |
"epoch": 0.5794181502388189, | |
"grad_norm": 0.84765625, | |
"learning_rate": 0.00017542180810878872, | |
"loss": 1.8288, | |
"step": 834 | |
}, | |
{ | |
"epoch": 0.5801128962223188, | |
"grad_norm": 0.70703125, | |
"learning_rate": 0.00017537144296147068, | |
"loss": 1.6589, | |
"step": 835 | |
}, | |
{ | |
"epoch": 0.5808076422058185, | |
"grad_norm": 1.53125, | |
"learning_rate": 0.00017532107781415262, | |
"loss": 1.7197, | |
"step": 836 | |
}, | |
{ | |
"epoch": 0.5815023881893183, | |
"grad_norm": 0.8359375, | |
"learning_rate": 0.00017527071266683456, | |
"loss": 2.2181, | |
"step": 837 | |
}, | |
{ | |
"epoch": 0.582197134172818, | |
"grad_norm": 0.8984375, | |
"learning_rate": 0.0001752203475195165, | |
"loss": 2.1823, | |
"step": 838 | |
}, | |
{ | |
"epoch": 0.5828918801563179, | |
"grad_norm": 0.6328125, | |
"learning_rate": 0.00017516998237219843, | |
"loss": 1.3199, | |
"step": 839 | |
}, | |
{ | |
"epoch": 0.5835866261398176, | |
"grad_norm": 0.87890625, | |
"learning_rate": 0.0001751196172248804, | |
"loss": 1.8895, | |
"step": 840 | |
}, | |
{ | |
"epoch": 0.5842813721233174, | |
"grad_norm": 1.3046875, | |
"learning_rate": 0.00017506925207756233, | |
"loss": 2.1302, | |
"step": 841 | |
}, | |
{ | |
"epoch": 0.5849761181068172, | |
"grad_norm": 0.83984375, | |
"learning_rate": 0.0001750188869302443, | |
"loss": 1.9623, | |
"step": 842 | |
}, | |
{ | |
"epoch": 0.585670864090317, | |
"grad_norm": 1.375, | |
"learning_rate": 0.00017496852178292623, | |
"loss": 1.61, | |
"step": 843 | |
}, | |
{ | |
"epoch": 0.5863656100738167, | |
"grad_norm": 0.9921875, | |
"learning_rate": 0.00017491815663560817, | |
"loss": 1.8907, | |
"step": 844 | |
}, | |
{ | |
"epoch": 0.5870603560573165, | |
"grad_norm": 1.4375, | |
"learning_rate": 0.0001748677914882901, | |
"loss": 1.7086, | |
"step": 845 | |
}, | |
{ | |
"epoch": 0.5877551020408164, | |
"grad_norm": 1.0703125, | |
"learning_rate": 0.00017481742634097204, | |
"loss": 1.718, | |
"step": 846 | |
}, | |
{ | |
"epoch": 0.5884498480243161, | |
"grad_norm": 0.90234375, | |
"learning_rate": 0.000174767061193654, | |
"loss": 2.0364, | |
"step": 847 | |
}, | |
{ | |
"epoch": 0.5891445940078159, | |
"grad_norm": 1.28125, | |
"learning_rate": 0.00017471669604633594, | |
"loss": 2.1759, | |
"step": 848 | |
}, | |
{ | |
"epoch": 0.5898393399913157, | |
"grad_norm": 1.5, | |
"learning_rate": 0.0001746663308990179, | |
"loss": 2.1323, | |
"step": 849 | |
}, | |
{ | |
"epoch": 0.5905340859748155, | |
"grad_norm": 1.28125, | |
"learning_rate": 0.00017461596575169984, | |
"loss": 1.9511, | |
"step": 850 | |
}, | |
{ | |
"epoch": 0.5912288319583152, | |
"grad_norm": 0.625, | |
"learning_rate": 0.00017456560060438177, | |
"loss": 2.0314, | |
"step": 851 | |
}, | |
{ | |
"epoch": 0.591923577941815, | |
"grad_norm": 0.90234375, | |
"learning_rate": 0.0001745152354570637, | |
"loss": 1.5775, | |
"step": 852 | |
}, | |
{ | |
"epoch": 0.5926183239253148, | |
"grad_norm": 0.81640625, | |
"learning_rate": 0.00017446487030974565, | |
"loss": 2.047, | |
"step": 853 | |
}, | |
{ | |
"epoch": 0.5933130699088146, | |
"grad_norm": 1.1328125, | |
"learning_rate": 0.0001744145051624276, | |
"loss": 2.1235, | |
"step": 854 | |
}, | |
{ | |
"epoch": 0.5940078158923143, | |
"grad_norm": 0.80859375, | |
"learning_rate": 0.00017436414001510955, | |
"loss": 1.7731, | |
"step": 855 | |
}, | |
{ | |
"epoch": 0.5947025618758142, | |
"grad_norm": 1.1796875, | |
"learning_rate": 0.0001743137748677915, | |
"loss": 1.9088, | |
"step": 856 | |
}, | |
{ | |
"epoch": 0.5953973078593139, | |
"grad_norm": 0.97265625, | |
"learning_rate": 0.00017426340972047345, | |
"loss": 1.6199, | |
"step": 857 | |
}, | |
{ | |
"epoch": 0.5960920538428137, | |
"grad_norm": 1.7109375, | |
"learning_rate": 0.00017421304457315538, | |
"loss": 1.6654, | |
"step": 858 | |
}, | |
{ | |
"epoch": 0.5967867998263136, | |
"grad_norm": 1.2734375, | |
"learning_rate": 0.00017416267942583732, | |
"loss": 2.0971, | |
"step": 859 | |
}, | |
{ | |
"epoch": 0.5974815458098133, | |
"grad_norm": 0.87109375, | |
"learning_rate": 0.00017411231427851928, | |
"loss": 1.8046, | |
"step": 860 | |
}, | |
{ | |
"epoch": 0.5981762917933131, | |
"grad_norm": 0.97265625, | |
"learning_rate": 0.00017406194913120122, | |
"loss": 2.2793, | |
"step": 861 | |
}, | |
{ | |
"epoch": 0.5988710377768128, | |
"grad_norm": 1.4375, | |
"learning_rate": 0.00017401158398388315, | |
"loss": 1.8008, | |
"step": 862 | |
}, | |
{ | |
"epoch": 0.5995657837603127, | |
"grad_norm": 1.1953125, | |
"learning_rate": 0.00017396121883656512, | |
"loss": 2.0149, | |
"step": 863 | |
}, | |
{ | |
"epoch": 0.6002605297438124, | |
"grad_norm": 0.80078125, | |
"learning_rate": 0.00017391085368924705, | |
"loss": 2.0395, | |
"step": 864 | |
}, | |
{ | |
"epoch": 0.6009552757273122, | |
"grad_norm": 1.0625, | |
"learning_rate": 0.000173860488541929, | |
"loss": 2.0035, | |
"step": 865 | |
}, | |
{ | |
"epoch": 0.6016500217108119, | |
"grad_norm": 1.2578125, | |
"learning_rate": 0.00017381012339461093, | |
"loss": 1.856, | |
"step": 866 | |
}, | |
{ | |
"epoch": 0.6023447676943118, | |
"grad_norm": 4.40625, | |
"learning_rate": 0.0001737597582472929, | |
"loss": 1.8616, | |
"step": 867 | |
}, | |
{ | |
"epoch": 0.6030395136778115, | |
"grad_norm": 1.359375, | |
"learning_rate": 0.00017370939309997483, | |
"loss": 1.829, | |
"step": 868 | |
}, | |
{ | |
"epoch": 0.6037342596613113, | |
"grad_norm": 1.203125, | |
"learning_rate": 0.00017365902795265676, | |
"loss": 2.1977, | |
"step": 869 | |
}, | |
{ | |
"epoch": 0.6044290056448112, | |
"grad_norm": 2.609375, | |
"learning_rate": 0.00017360866280533873, | |
"loss": 2.1831, | |
"step": 870 | |
}, | |
{ | |
"epoch": 0.6051237516283109, | |
"grad_norm": 0.7890625, | |
"learning_rate": 0.00017355829765802066, | |
"loss": 1.5552, | |
"step": 871 | |
}, | |
{ | |
"epoch": 0.6058184976118107, | |
"grad_norm": 0.88671875, | |
"learning_rate": 0.0001735079325107026, | |
"loss": 1.977, | |
"step": 872 | |
}, | |
{ | |
"epoch": 0.6065132435953104, | |
"grad_norm": 0.703125, | |
"learning_rate": 0.00017345756736338453, | |
"loss": 1.9865, | |
"step": 873 | |
}, | |
{ | |
"epoch": 0.6072079895788103, | |
"grad_norm": 1.1796875, | |
"learning_rate": 0.0001734072022160665, | |
"loss": 1.9972, | |
"step": 874 | |
}, | |
{ | |
"epoch": 0.60790273556231, | |
"grad_norm": 0.74609375, | |
"learning_rate": 0.00017335683706874843, | |
"loss": 1.7239, | |
"step": 875 | |
}, | |
{ | |
"epoch": 0.6085974815458098, | |
"grad_norm": 2.046875, | |
"learning_rate": 0.00017330647192143037, | |
"loss": 1.948, | |
"step": 876 | |
}, | |
{ | |
"epoch": 0.6092922275293096, | |
"grad_norm": 0.9609375, | |
"learning_rate": 0.00017325610677411233, | |
"loss": 1.8617, | |
"step": 877 | |
}, | |
{ | |
"epoch": 0.6099869735128094, | |
"grad_norm": 0.953125, | |
"learning_rate": 0.00017320574162679427, | |
"loss": 2.1794, | |
"step": 878 | |
}, | |
{ | |
"epoch": 0.6106817194963091, | |
"grad_norm": 1.0078125, | |
"learning_rate": 0.0001731553764794762, | |
"loss": 1.9517, | |
"step": 879 | |
}, | |
{ | |
"epoch": 0.611376465479809, | |
"grad_norm": 0.98828125, | |
"learning_rate": 0.00017310501133215814, | |
"loss": 1.8475, | |
"step": 880 | |
}, | |
{ | |
"epoch": 0.6120712114633087, | |
"grad_norm": 1.9921875, | |
"learning_rate": 0.0001730546461848401, | |
"loss": 2.4872, | |
"step": 881 | |
}, | |
{ | |
"epoch": 0.6127659574468085, | |
"grad_norm": 0.62109375, | |
"learning_rate": 0.00017300428103752204, | |
"loss": 1.7795, | |
"step": 882 | |
}, | |
{ | |
"epoch": 0.6134607034303083, | |
"grad_norm": 0.890625, | |
"learning_rate": 0.00017295391589020398, | |
"loss": 1.8401, | |
"step": 883 | |
}, | |
{ | |
"epoch": 0.6141554494138081, | |
"grad_norm": 1.0703125, | |
"learning_rate": 0.00017290355074288594, | |
"loss": 1.6861, | |
"step": 884 | |
}, | |
{ | |
"epoch": 0.6148501953973079, | |
"grad_norm": 0.65625, | |
"learning_rate": 0.00017285318559556788, | |
"loss": 2.1298, | |
"step": 885 | |
}, | |
{ | |
"epoch": 0.6155449413808076, | |
"grad_norm": 1.0390625, | |
"learning_rate": 0.00017280282044824982, | |
"loss": 1.9718, | |
"step": 886 | |
}, | |
{ | |
"epoch": 0.6162396873643075, | |
"grad_norm": 0.87109375, | |
"learning_rate": 0.00017275245530093175, | |
"loss": 2.0941, | |
"step": 887 | |
}, | |
{ | |
"epoch": 0.6169344333478072, | |
"grad_norm": 1.0703125, | |
"learning_rate": 0.00017270209015361371, | |
"loss": 1.972, | |
"step": 888 | |
}, | |
{ | |
"epoch": 0.617629179331307, | |
"grad_norm": 1.3125, | |
"learning_rate": 0.00017265172500629565, | |
"loss": 1.6474, | |
"step": 889 | |
}, | |
{ | |
"epoch": 0.6183239253148067, | |
"grad_norm": 0.703125, | |
"learning_rate": 0.0001726013598589776, | |
"loss": 1.9091, | |
"step": 890 | |
}, | |
{ | |
"epoch": 0.6190186712983066, | |
"grad_norm": 1.2109375, | |
"learning_rate": 0.00017255099471165955, | |
"loss": 1.6085, | |
"step": 891 | |
}, | |
{ | |
"epoch": 0.6197134172818063, | |
"grad_norm": 0.69921875, | |
"learning_rate": 0.0001725006295643415, | |
"loss": 1.8403, | |
"step": 892 | |
}, | |
{ | |
"epoch": 0.6204081632653061, | |
"grad_norm": 1.0703125, | |
"learning_rate": 0.00017245026441702342, | |
"loss": 1.6317, | |
"step": 893 | |
}, | |
{ | |
"epoch": 0.621102909248806, | |
"grad_norm": 1.125, | |
"learning_rate": 0.00017239989926970536, | |
"loss": 1.6257, | |
"step": 894 | |
}, | |
{ | |
"epoch": 0.6217976552323057, | |
"grad_norm": 1.953125, | |
"learning_rate": 0.00017234953412238732, | |
"loss": 2.384, | |
"step": 895 | |
}, | |
{ | |
"epoch": 0.6224924012158055, | |
"grad_norm": 0.765625, | |
"learning_rate": 0.00017229916897506926, | |
"loss": 1.8285, | |
"step": 896 | |
}, | |
{ | |
"epoch": 0.6231871471993052, | |
"grad_norm": 1.03125, | |
"learning_rate": 0.00017224880382775122, | |
"loss": 1.9576, | |
"step": 897 | |
}, | |
{ | |
"epoch": 0.6238818931828051, | |
"grad_norm": 1.0234375, | |
"learning_rate": 0.00017219843868043316, | |
"loss": 1.8259, | |
"step": 898 | |
}, | |
{ | |
"epoch": 0.6245766391663048, | |
"grad_norm": 0.9375, | |
"learning_rate": 0.0001721480735331151, | |
"loss": 2.2432, | |
"step": 899 | |
}, | |
{ | |
"epoch": 0.6252713851498046, | |
"grad_norm": 0.83984375, | |
"learning_rate": 0.00017209770838579703, | |
"loss": 1.6558, | |
"step": 900 | |
}, | |
{ | |
"epoch": 0.6259661311333043, | |
"grad_norm": 0.7421875, | |
"learning_rate": 0.00017204734323847897, | |
"loss": 2.0904, | |
"step": 901 | |
}, | |
{ | |
"epoch": 0.6266608771168042, | |
"grad_norm": 0.8671875, | |
"learning_rate": 0.00017199697809116093, | |
"loss": 1.8032, | |
"step": 902 | |
}, | |
{ | |
"epoch": 0.6273556231003039, | |
"grad_norm": 0.92578125, | |
"learning_rate": 0.00017194661294384287, | |
"loss": 1.7511, | |
"step": 903 | |
}, | |
{ | |
"epoch": 0.6280503690838037, | |
"grad_norm": 0.6484375, | |
"learning_rate": 0.00017189624779652483, | |
"loss": 1.8939, | |
"step": 904 | |
}, | |
{ | |
"epoch": 0.6287451150673035, | |
"grad_norm": 0.80078125, | |
"learning_rate": 0.00017184588264920677, | |
"loss": 1.9086, | |
"step": 905 | |
}, | |
{ | |
"epoch": 0.6294398610508033, | |
"grad_norm": 0.83984375, | |
"learning_rate": 0.0001717955175018887, | |
"loss": 1.6633, | |
"step": 906 | |
}, | |
{ | |
"epoch": 0.6301346070343031, | |
"grad_norm": 0.83984375, | |
"learning_rate": 0.00017174515235457064, | |
"loss": 1.9224, | |
"step": 907 | |
}, | |
{ | |
"epoch": 0.6308293530178029, | |
"grad_norm": 0.91015625, | |
"learning_rate": 0.00017169478720725258, | |
"loss": 2.0203, | |
"step": 908 | |
}, | |
{ | |
"epoch": 0.6315240990013027, | |
"grad_norm": 0.6796875, | |
"learning_rate": 0.00017164442205993454, | |
"loss": 1.9564, | |
"step": 909 | |
}, | |
{ | |
"epoch": 0.6322188449848024, | |
"grad_norm": 1.0546875, | |
"learning_rate": 0.00017159405691261648, | |
"loss": 1.8416, | |
"step": 910 | |
}, | |
{ | |
"epoch": 0.6329135909683022, | |
"grad_norm": 1.1640625, | |
"learning_rate": 0.00017154369176529844, | |
"loss": 1.9112, | |
"step": 911 | |
}, | |
{ | |
"epoch": 0.633608336951802, | |
"grad_norm": 0.765625, | |
"learning_rate": 0.00017149332661798038, | |
"loss": 1.9129, | |
"step": 912 | |
}, | |
{ | |
"epoch": 0.6343030829353018, | |
"grad_norm": 0.984375, | |
"learning_rate": 0.0001714429614706623, | |
"loss": 1.7555, | |
"step": 913 | |
}, | |
{ | |
"epoch": 0.6349978289188015, | |
"grad_norm": 2.0625, | |
"learning_rate": 0.00017139259632334425, | |
"loss": 1.7537, | |
"step": 914 | |
}, | |
{ | |
"epoch": 0.6356925749023014, | |
"grad_norm": 0.71484375, | |
"learning_rate": 0.00017134223117602618, | |
"loss": 2.1289, | |
"step": 915 | |
}, | |
{ | |
"epoch": 0.6363873208858011, | |
"grad_norm": 1.2734375, | |
"learning_rate": 0.00017129186602870815, | |
"loss": 2.0833, | |
"step": 916 | |
}, | |
{ | |
"epoch": 0.6370820668693009, | |
"grad_norm": 1.3984375, | |
"learning_rate": 0.00017124150088139008, | |
"loss": 2.0708, | |
"step": 917 | |
}, | |
{ | |
"epoch": 0.6377768128528007, | |
"grad_norm": 0.73046875, | |
"learning_rate": 0.00017119113573407205, | |
"loss": 1.9677, | |
"step": 918 | |
}, | |
{ | |
"epoch": 0.6384715588363005, | |
"grad_norm": 1.265625, | |
"learning_rate": 0.00017114077058675398, | |
"loss": 1.7507, | |
"step": 919 | |
}, | |
{ | |
"epoch": 0.6391663048198003, | |
"grad_norm": 0.85546875, | |
"learning_rate": 0.00017109040543943592, | |
"loss": 2.2454, | |
"step": 920 | |
}, | |
{ | |
"epoch": 0.6398610508033, | |
"grad_norm": 0.9296875, | |
"learning_rate": 0.00017104004029211786, | |
"loss": 2.0876, | |
"step": 921 | |
}, | |
{ | |
"epoch": 0.6405557967867999, | |
"grad_norm": 1.515625, | |
"learning_rate": 0.0001709896751447998, | |
"loss": 1.7415, | |
"step": 922 | |
}, | |
{ | |
"epoch": 0.6412505427702996, | |
"grad_norm": 0.95703125, | |
"learning_rate": 0.00017093930999748176, | |
"loss": 2.4147, | |
"step": 923 | |
}, | |
{ | |
"epoch": 0.6419452887537994, | |
"grad_norm": 0.86328125, | |
"learning_rate": 0.0001708889448501637, | |
"loss": 1.6442, | |
"step": 924 | |
}, | |
{ | |
"epoch": 0.6426400347372991, | |
"grad_norm": 1.0, | |
"learning_rate": 0.00017083857970284566, | |
"loss": 1.9067, | |
"step": 925 | |
}, | |
{ | |
"epoch": 0.643334780720799, | |
"grad_norm": 1.1015625, | |
"learning_rate": 0.0001707882145555276, | |
"loss": 2.1339, | |
"step": 926 | |
}, | |
{ | |
"epoch": 0.6440295267042987, | |
"grad_norm": 0.828125, | |
"learning_rate": 0.00017073784940820953, | |
"loss": 1.7962, | |
"step": 927 | |
}, | |
{ | |
"epoch": 0.6447242726877985, | |
"grad_norm": 0.96875, | |
"learning_rate": 0.00017068748426089147, | |
"loss": 2.1669, | |
"step": 928 | |
}, | |
{ | |
"epoch": 0.6454190186712984, | |
"grad_norm": 0.56640625, | |
"learning_rate": 0.0001706371191135734, | |
"loss": 1.9113, | |
"step": 929 | |
}, | |
{ | |
"epoch": 0.6461137646547981, | |
"grad_norm": 0.90625, | |
"learning_rate": 0.00017058675396625536, | |
"loss": 1.9345, | |
"step": 930 | |
}, | |
{ | |
"epoch": 0.6468085106382979, | |
"grad_norm": 0.8984375, | |
"learning_rate": 0.0001705363888189373, | |
"loss": 2.1483, | |
"step": 931 | |
}, | |
{ | |
"epoch": 0.6475032566217976, | |
"grad_norm": 0.87109375, | |
"learning_rate": 0.00017048602367161926, | |
"loss": 2.1292, | |
"step": 932 | |
}, | |
{ | |
"epoch": 0.6481980026052975, | |
"grad_norm": 0.83984375, | |
"learning_rate": 0.0001704356585243012, | |
"loss": 1.855, | |
"step": 933 | |
}, | |
{ | |
"epoch": 0.6488927485887972, | |
"grad_norm": 0.86328125, | |
"learning_rate": 0.00017038529337698314, | |
"loss": 1.9374, | |
"step": 934 | |
}, | |
{ | |
"epoch": 0.649587494572297, | |
"grad_norm": 0.859375, | |
"learning_rate": 0.00017033492822966507, | |
"loss": 1.9404, | |
"step": 935 | |
}, | |
{ | |
"epoch": 0.6502822405557968, | |
"grad_norm": 0.66796875, | |
"learning_rate": 0.000170284563082347, | |
"loss": 1.6083, | |
"step": 936 | |
}, | |
{ | |
"epoch": 0.6509769865392966, | |
"grad_norm": 1.046875, | |
"learning_rate": 0.00017023419793502897, | |
"loss": 1.8623, | |
"step": 937 | |
}, | |
{ | |
"epoch": 0.6516717325227963, | |
"grad_norm": 1.265625, | |
"learning_rate": 0.0001701838327877109, | |
"loss": 2.0822, | |
"step": 938 | |
}, | |
{ | |
"epoch": 0.6523664785062961, | |
"grad_norm": 0.70703125, | |
"learning_rate": 0.00017013346764039287, | |
"loss": 1.6943, | |
"step": 939 | |
}, | |
{ | |
"epoch": 0.6530612244897959, | |
"grad_norm": 1.1328125, | |
"learning_rate": 0.0001700831024930748, | |
"loss": 1.745, | |
"step": 940 | |
}, | |
{ | |
"epoch": 0.6537559704732957, | |
"grad_norm": 0.96484375, | |
"learning_rate": 0.00017003273734575675, | |
"loss": 1.7084, | |
"step": 941 | |
}, | |
{ | |
"epoch": 0.6544507164567955, | |
"grad_norm": 0.87109375, | |
"learning_rate": 0.00016998237219843868, | |
"loss": 1.6061, | |
"step": 942 | |
}, | |
{ | |
"epoch": 0.6551454624402953, | |
"grad_norm": 0.94140625, | |
"learning_rate": 0.00016993200705112062, | |
"loss": 2.2639, | |
"step": 943 | |
}, | |
{ | |
"epoch": 0.6558402084237951, | |
"grad_norm": 0.7890625, | |
"learning_rate": 0.00016988164190380258, | |
"loss": 1.9709, | |
"step": 944 | |
}, | |
{ | |
"epoch": 0.6565349544072948, | |
"grad_norm": 1.0390625, | |
"learning_rate": 0.00016983127675648452, | |
"loss": 1.9258, | |
"step": 945 | |
}, | |
{ | |
"epoch": 0.6572297003907946, | |
"grad_norm": 1.3046875, | |
"learning_rate": 0.00016978091160916648, | |
"loss": 2.153, | |
"step": 946 | |
}, | |
{ | |
"epoch": 0.6579244463742944, | |
"grad_norm": 1.015625, | |
"learning_rate": 0.00016973054646184842, | |
"loss": 1.7945, | |
"step": 947 | |
}, | |
{ | |
"epoch": 0.6586191923577942, | |
"grad_norm": 0.9453125, | |
"learning_rate": 0.00016968018131453035, | |
"loss": 1.9769, | |
"step": 948 | |
}, | |
{ | |
"epoch": 0.6593139383412939, | |
"grad_norm": 0.8203125, | |
"learning_rate": 0.0001696298161672123, | |
"loss": 1.9792, | |
"step": 949 | |
}, | |
{ | |
"epoch": 0.6600086843247938, | |
"grad_norm": 1.1640625, | |
"learning_rate": 0.00016957945101989423, | |
"loss": 1.9845, | |
"step": 950 | |
}, | |
{ | |
"epoch": 0.6607034303082935, | |
"grad_norm": 1.0234375, | |
"learning_rate": 0.0001695290858725762, | |
"loss": 1.9359, | |
"step": 951 | |
}, | |
{ | |
"epoch": 0.6613981762917933, | |
"grad_norm": 4.1875, | |
"learning_rate": 0.00016947872072525813, | |
"loss": 1.9572, | |
"step": 952 | |
}, | |
{ | |
"epoch": 0.6620929222752931, | |
"grad_norm": 0.70703125, | |
"learning_rate": 0.0001694283555779401, | |
"loss": 1.3144, | |
"step": 953 | |
}, | |
{ | |
"epoch": 0.6627876682587929, | |
"grad_norm": 0.76171875, | |
"learning_rate": 0.00016937799043062203, | |
"loss": 1.4918, | |
"step": 954 | |
}, | |
{ | |
"epoch": 0.6634824142422927, | |
"grad_norm": 0.80859375, | |
"learning_rate": 0.00016932762528330396, | |
"loss": 2.0, | |
"step": 955 | |
}, | |
{ | |
"epoch": 0.6641771602257924, | |
"grad_norm": 0.7890625, | |
"learning_rate": 0.0001692772601359859, | |
"loss": 1.7969, | |
"step": 956 | |
}, | |
{ | |
"epoch": 0.6648719062092923, | |
"grad_norm": 0.62109375, | |
"learning_rate": 0.00016922689498866783, | |
"loss": 1.7975, | |
"step": 957 | |
}, | |
{ | |
"epoch": 0.665566652192792, | |
"grad_norm": 0.703125, | |
"learning_rate": 0.0001691765298413498, | |
"loss": 1.5022, | |
"step": 958 | |
}, | |
{ | |
"epoch": 0.6662613981762918, | |
"grad_norm": 0.921875, | |
"learning_rate": 0.00016912616469403173, | |
"loss": 1.7859, | |
"step": 959 | |
}, | |
{ | |
"epoch": 0.6669561441597915, | |
"grad_norm": 0.8671875, | |
"learning_rate": 0.0001690757995467137, | |
"loss": 2.1235, | |
"step": 960 | |
}, | |
{ | |
"epoch": 0.6676508901432914, | |
"grad_norm": 0.7421875, | |
"learning_rate": 0.00016902543439939563, | |
"loss": 1.8601, | |
"step": 961 | |
}, | |
{ | |
"epoch": 0.6683456361267911, | |
"grad_norm": 0.73828125, | |
"learning_rate": 0.00016897506925207757, | |
"loss": 2.0707, | |
"step": 962 | |
}, | |
{ | |
"epoch": 0.6690403821102909, | |
"grad_norm": 0.87109375, | |
"learning_rate": 0.0001689247041047595, | |
"loss": 1.9595, | |
"step": 963 | |
}, | |
{ | |
"epoch": 0.6697351280937907, | |
"grad_norm": 1.8671875, | |
"learning_rate": 0.00016887433895744147, | |
"loss": 2.1069, | |
"step": 964 | |
}, | |
{ | |
"epoch": 0.6704298740772905, | |
"grad_norm": 0.921875, | |
"learning_rate": 0.0001688239738101234, | |
"loss": 1.6447, | |
"step": 965 | |
}, | |
{ | |
"epoch": 0.6711246200607903, | |
"grad_norm": 0.8203125, | |
"learning_rate": 0.00016877360866280534, | |
"loss": 1.8459, | |
"step": 966 | |
}, | |
{ | |
"epoch": 0.67181936604429, | |
"grad_norm": 2.53125, | |
"learning_rate": 0.0001687232435154873, | |
"loss": 1.9345, | |
"step": 967 | |
}, | |
{ | |
"epoch": 0.6725141120277899, | |
"grad_norm": 1.625, | |
"learning_rate": 0.00016867287836816924, | |
"loss": 1.8392, | |
"step": 968 | |
}, | |
{ | |
"epoch": 0.6732088580112896, | |
"grad_norm": 0.93359375, | |
"learning_rate": 0.00016862251322085118, | |
"loss": 1.8335, | |
"step": 969 | |
}, | |
{ | |
"epoch": 0.6739036039947894, | |
"grad_norm": 0.80859375, | |
"learning_rate": 0.00016857214807353312, | |
"loss": 1.8878, | |
"step": 970 | |
}, | |
{ | |
"epoch": 0.6745983499782892, | |
"grad_norm": 3.390625, | |
"learning_rate": 0.00016852178292621508, | |
"loss": 2.0614, | |
"step": 971 | |
}, | |
{ | |
"epoch": 0.675293095961789, | |
"grad_norm": 0.80078125, | |
"learning_rate": 0.00016847141777889701, | |
"loss": 1.7292, | |
"step": 972 | |
}, | |
{ | |
"epoch": 0.6759878419452887, | |
"grad_norm": 1.34375, | |
"learning_rate": 0.00016842105263157895, | |
"loss": 2.096, | |
"step": 973 | |
}, | |
{ | |
"epoch": 0.6766825879287885, | |
"grad_norm": 1.28125, | |
"learning_rate": 0.00016837068748426091, | |
"loss": 1.7483, | |
"step": 974 | |
}, | |
{ | |
"epoch": 0.6773773339122883, | |
"grad_norm": 0.77734375, | |
"learning_rate": 0.00016832032233694282, | |
"loss": 1.8725, | |
"step": 975 | |
}, | |
{ | |
"epoch": 0.6780720798957881, | |
"grad_norm": 0.9296875, | |
"learning_rate": 0.0001682699571896248, | |
"loss": 2.1252, | |
"step": 976 | |
}, | |
{ | |
"epoch": 0.6787668258792879, | |
"grad_norm": 0.89453125, | |
"learning_rate": 0.00016821959204230672, | |
"loss": 1.6795, | |
"step": 977 | |
}, | |
{ | |
"epoch": 0.6794615718627877, | |
"grad_norm": 1.0859375, | |
"learning_rate": 0.0001681692268949887, | |
"loss": 1.7347, | |
"step": 978 | |
}, | |
{ | |
"epoch": 0.6801563178462875, | |
"grad_norm": 0.7734375, | |
"learning_rate": 0.00016811886174767062, | |
"loss": 2.0712, | |
"step": 979 | |
}, | |
{ | |
"epoch": 0.6808510638297872, | |
"grad_norm": 0.8125, | |
"learning_rate": 0.00016806849660035256, | |
"loss": 1.7589, | |
"step": 980 | |
}, | |
{ | |
"epoch": 0.681545809813287, | |
"grad_norm": 1.1015625, | |
"learning_rate": 0.00016801813145303452, | |
"loss": 2.2129, | |
"step": 981 | |
}, | |
{ | |
"epoch": 0.6822405557967868, | |
"grad_norm": 0.9375, | |
"learning_rate": 0.00016796776630571643, | |
"loss": 2.141, | |
"step": 982 | |
}, | |
{ | |
"epoch": 0.6829353017802866, | |
"grad_norm": 0.97265625, | |
"learning_rate": 0.0001679174011583984, | |
"loss": 1.8133, | |
"step": 983 | |
}, | |
{ | |
"epoch": 0.6836300477637863, | |
"grad_norm": 0.76953125, | |
"learning_rate": 0.00016786703601108033, | |
"loss": 1.7274, | |
"step": 984 | |
}, | |
{ | |
"epoch": 0.6843247937472862, | |
"grad_norm": 0.65625, | |
"learning_rate": 0.0001678166708637623, | |
"loss": 1.7442, | |
"step": 985 | |
}, | |
{ | |
"epoch": 0.6850195397307859, | |
"grad_norm": 0.80859375, | |
"learning_rate": 0.00016776630571644423, | |
"loss": 1.7292, | |
"step": 986 | |
}, | |
{ | |
"epoch": 0.6857142857142857, | |
"grad_norm": 0.875, | |
"learning_rate": 0.00016771594056912617, | |
"loss": 1.8515, | |
"step": 987 | |
}, | |
{ | |
"epoch": 0.6864090316977854, | |
"grad_norm": 0.58984375, | |
"learning_rate": 0.00016766557542180813, | |
"loss": 1.3847, | |
"step": 988 | |
}, | |
{ | |
"epoch": 0.6871037776812853, | |
"grad_norm": 0.703125, | |
"learning_rate": 0.00016761521027449004, | |
"loss": 1.9493, | |
"step": 989 | |
}, | |
{ | |
"epoch": 0.6877985236647851, | |
"grad_norm": 0.765625, | |
"learning_rate": 0.000167564845127172, | |
"loss": 1.8116, | |
"step": 990 | |
}, | |
{ | |
"epoch": 0.6884932696482848, | |
"grad_norm": 1.015625, | |
"learning_rate": 0.00016751447997985394, | |
"loss": 1.5132, | |
"step": 991 | |
}, | |
{ | |
"epoch": 0.6891880156317847, | |
"grad_norm": 0.94921875, | |
"learning_rate": 0.0001674641148325359, | |
"loss": 2.2143, | |
"step": 992 | |
}, | |
{ | |
"epoch": 0.6898827616152844, | |
"grad_norm": 0.859375, | |
"learning_rate": 0.00016741374968521784, | |
"loss": 1.8619, | |
"step": 993 | |
}, | |
{ | |
"epoch": 0.6905775075987842, | |
"grad_norm": 0.81640625, | |
"learning_rate": 0.0001673633845378998, | |
"loss": 1.8431, | |
"step": 994 | |
}, | |
{ | |
"epoch": 0.691272253582284, | |
"grad_norm": 0.75, | |
"learning_rate": 0.00016731301939058174, | |
"loss": 1.942, | |
"step": 995 | |
}, | |
{ | |
"epoch": 0.6919669995657838, | |
"grad_norm": 0.953125, | |
"learning_rate": 0.00016726265424326365, | |
"loss": 2.0124, | |
"step": 996 | |
}, | |
{ | |
"epoch": 0.6926617455492835, | |
"grad_norm": 0.9453125, | |
"learning_rate": 0.0001672122890959456, | |
"loss": 1.9465, | |
"step": 997 | |
}, | |
{ | |
"epoch": 0.6933564915327833, | |
"grad_norm": 0.8984375, | |
"learning_rate": 0.00016716192394862755, | |
"loss": 1.6335, | |
"step": 998 | |
}, | |
{ | |
"epoch": 0.6940512375162831, | |
"grad_norm": 0.89453125, | |
"learning_rate": 0.0001671115588013095, | |
"loss": 1.8172, | |
"step": 999 | |
}, | |
{ | |
"epoch": 0.6947459834997829, | |
"grad_norm": 1.234375, | |
"learning_rate": 0.00016706119365399145, | |
"loss": 1.8174, | |
"step": 1000 | |
}, | |
{ | |
"epoch": 0.6954407294832827, | |
"grad_norm": 0.703125, | |
"learning_rate": 0.0001670108285066734, | |
"loss": 1.8297, | |
"step": 1001 | |
}, | |
{ | |
"epoch": 0.6961354754667824, | |
"grad_norm": 0.73828125, | |
"learning_rate": 0.00016696046335935535, | |
"loss": 1.9633, | |
"step": 1002 | |
}, | |
{ | |
"epoch": 0.6968302214502823, | |
"grad_norm": 1.1171875, | |
"learning_rate": 0.00016691009821203726, | |
"loss": 2.1313, | |
"step": 1003 | |
}, | |
{ | |
"epoch": 0.697524967433782, | |
"grad_norm": 0.83203125, | |
"learning_rate": 0.00016685973306471922, | |
"loss": 1.6867, | |
"step": 1004 | |
}, | |
{ | |
"epoch": 0.6982197134172818, | |
"grad_norm": 0.71484375, | |
"learning_rate": 0.00016680936791740116, | |
"loss": 1.4534, | |
"step": 1005 | |
}, | |
{ | |
"epoch": 0.6989144594007816, | |
"grad_norm": 1.3515625, | |
"learning_rate": 0.00016675900277008312, | |
"loss": 2.0626, | |
"step": 1006 | |
}, | |
{ | |
"epoch": 0.6996092053842814, | |
"grad_norm": 0.859375, | |
"learning_rate": 0.00016670863762276506, | |
"loss": 2.0868, | |
"step": 1007 | |
}, | |
{ | |
"epoch": 0.7003039513677811, | |
"grad_norm": 0.8984375, | |
"learning_rate": 0.00016665827247544702, | |
"loss": 1.6758, | |
"step": 1008 | |
}, | |
{ | |
"epoch": 0.700998697351281, | |
"grad_norm": 0.71484375, | |
"learning_rate": 0.00016660790732812893, | |
"loss": 2.0535, | |
"step": 1009 | |
}, | |
{ | |
"epoch": 0.7016934433347807, | |
"grad_norm": 1.234375, | |
"learning_rate": 0.00016655754218081087, | |
"loss": 1.7197, | |
"step": 1010 | |
}, | |
{ | |
"epoch": 0.7023881893182805, | |
"grad_norm": 1.8359375, | |
"learning_rate": 0.00016650717703349283, | |
"loss": 2.23, | |
"step": 1011 | |
}, | |
{ | |
"epoch": 0.7030829353017802, | |
"grad_norm": 0.9296875, | |
"learning_rate": 0.00016645681188617477, | |
"loss": 1.4958, | |
"step": 1012 | |
}, | |
{ | |
"epoch": 0.7037776812852801, | |
"grad_norm": 1.078125, | |
"learning_rate": 0.00016640644673885673, | |
"loss": 1.569, | |
"step": 1013 | |
}, | |
{ | |
"epoch": 0.7044724272687799, | |
"grad_norm": 1.3671875, | |
"learning_rate": 0.00016635608159153866, | |
"loss": 1.9083, | |
"step": 1014 | |
}, | |
{ | |
"epoch": 0.7051671732522796, | |
"grad_norm": 1.015625, | |
"learning_rate": 0.00016630571644422063, | |
"loss": 2.0236, | |
"step": 1015 | |
}, | |
{ | |
"epoch": 0.7058619192357795, | |
"grad_norm": 1.109375, | |
"learning_rate": 0.00016625535129690254, | |
"loss": 1.8555, | |
"step": 1016 | |
}, | |
{ | |
"epoch": 0.7065566652192792, | |
"grad_norm": 1.1796875, | |
"learning_rate": 0.00016620498614958447, | |
"loss": 2.0917, | |
"step": 1017 | |
}, | |
{ | |
"epoch": 0.707251411202779, | |
"grad_norm": 0.92578125, | |
"learning_rate": 0.00016615462100226644, | |
"loss": 1.9695, | |
"step": 1018 | |
}, | |
{ | |
"epoch": 0.7079461571862787, | |
"grad_norm": 0.486328125, | |
"learning_rate": 0.00016610425585494837, | |
"loss": 1.951, | |
"step": 1019 | |
}, | |
{ | |
"epoch": 0.7086409031697786, | |
"grad_norm": 0.82421875, | |
"learning_rate": 0.00016605389070763034, | |
"loss": 2.0722, | |
"step": 1020 | |
}, | |
{ | |
"epoch": 0.7093356491532783, | |
"grad_norm": 1.109375, | |
"learning_rate": 0.00016600352556031227, | |
"loss": 1.7612, | |
"step": 1021 | |
}, | |
{ | |
"epoch": 0.7100303951367781, | |
"grad_norm": 0.8828125, | |
"learning_rate": 0.00016595316041299424, | |
"loss": 1.5708, | |
"step": 1022 | |
}, | |
{ | |
"epoch": 0.7107251411202778, | |
"grad_norm": 0.9140625, | |
"learning_rate": 0.00016590279526567615, | |
"loss": 2.0463, | |
"step": 1023 | |
}, | |
{ | |
"epoch": 0.7114198871037777, | |
"grad_norm": 1.453125, | |
"learning_rate": 0.00016585243011835808, | |
"loss": 1.9702, | |
"step": 1024 | |
}, | |
{ | |
"epoch": 0.7121146330872775, | |
"grad_norm": 1.015625, | |
"learning_rate": 0.00016580206497104005, | |
"loss": 1.6529, | |
"step": 1025 | |
}, | |
{ | |
"epoch": 0.7128093790707772, | |
"grad_norm": 0.890625, | |
"learning_rate": 0.00016575169982372198, | |
"loss": 1.8015, | |
"step": 1026 | |
}, | |
{ | |
"epoch": 0.7135041250542771, | |
"grad_norm": 0.78125, | |
"learning_rate": 0.00016570133467640395, | |
"loss": 2.2328, | |
"step": 1027 | |
}, | |
{ | |
"epoch": 0.7141988710377768, | |
"grad_norm": 0.9375, | |
"learning_rate": 0.00016565096952908588, | |
"loss": 1.9973, | |
"step": 1028 | |
}, | |
{ | |
"epoch": 0.7148936170212766, | |
"grad_norm": 1.5703125, | |
"learning_rate": 0.00016560060438176784, | |
"loss": 2.0212, | |
"step": 1029 | |
}, | |
{ | |
"epoch": 0.7155883630047764, | |
"grad_norm": 1.078125, | |
"learning_rate": 0.00016555023923444975, | |
"loss": 1.986, | |
"step": 1030 | |
}, | |
{ | |
"epoch": 0.7162831089882762, | |
"grad_norm": 0.62890625, | |
"learning_rate": 0.00016549987408713172, | |
"loss": 2.0747, | |
"step": 1031 | |
}, | |
{ | |
"epoch": 0.7169778549717759, | |
"grad_norm": 0.79296875, | |
"learning_rate": 0.00016544950893981365, | |
"loss": 1.8655, | |
"step": 1032 | |
}, | |
{ | |
"epoch": 0.7176726009552757, | |
"grad_norm": 1.7109375, | |
"learning_rate": 0.0001653991437924956, | |
"loss": 2.2013, | |
"step": 1033 | |
}, | |
{ | |
"epoch": 0.7183673469387755, | |
"grad_norm": 1.1875, | |
"learning_rate": 0.00016534877864517755, | |
"loss": 2.1917, | |
"step": 1034 | |
}, | |
{ | |
"epoch": 0.7190620929222753, | |
"grad_norm": 0.94140625, | |
"learning_rate": 0.0001652984134978595, | |
"loss": 1.8524, | |
"step": 1035 | |
}, | |
{ | |
"epoch": 0.7197568389057751, | |
"grad_norm": 0.63671875, | |
"learning_rate": 0.00016524804835054145, | |
"loss": 2.0421, | |
"step": 1036 | |
}, | |
{ | |
"epoch": 0.7204515848892749, | |
"grad_norm": 0.8515625, | |
"learning_rate": 0.00016519768320322336, | |
"loss": 1.6317, | |
"step": 1037 | |
}, | |
{ | |
"epoch": 0.7211463308727747, | |
"grad_norm": 1.859375, | |
"learning_rate": 0.00016514731805590533, | |
"loss": 1.6442, | |
"step": 1038 | |
}, | |
{ | |
"epoch": 0.7218410768562744, | |
"grad_norm": 0.9453125, | |
"learning_rate": 0.00016509695290858726, | |
"loss": 1.8236, | |
"step": 1039 | |
}, | |
{ | |
"epoch": 0.7225358228397742, | |
"grad_norm": 1.328125, | |
"learning_rate": 0.0001650465877612692, | |
"loss": 1.6599, | |
"step": 1040 | |
}, | |
{ | |
"epoch": 0.723230568823274, | |
"grad_norm": 0.69921875, | |
"learning_rate": 0.00016499622261395116, | |
"loss": 2.0284, | |
"step": 1041 | |
}, | |
{ | |
"epoch": 0.7239253148067738, | |
"grad_norm": 3.515625, | |
"learning_rate": 0.0001649458574666331, | |
"loss": 1.6601, | |
"step": 1042 | |
}, | |
{ | |
"epoch": 0.7246200607902735, | |
"grad_norm": 0.58203125, | |
"learning_rate": 0.00016489549231931503, | |
"loss": 1.3995, | |
"step": 1043 | |
}, | |
{ | |
"epoch": 0.7253148067737734, | |
"grad_norm": 1.3671875, | |
"learning_rate": 0.00016484512717199697, | |
"loss": 1.9502, | |
"step": 1044 | |
}, | |
{ | |
"epoch": 0.7260095527572731, | |
"grad_norm": 0.6484375, | |
"learning_rate": 0.00016479476202467893, | |
"loss": 1.7405, | |
"step": 1045 | |
}, | |
{ | |
"epoch": 0.7267042987407729, | |
"grad_norm": 0.79296875, | |
"learning_rate": 0.00016474439687736087, | |
"loss": 1.6638, | |
"step": 1046 | |
}, | |
{ | |
"epoch": 0.7273990447242726, | |
"grad_norm": 1.546875, | |
"learning_rate": 0.0001646940317300428, | |
"loss": 1.9068, | |
"step": 1047 | |
}, | |
{ | |
"epoch": 0.7280937907077725, | |
"grad_norm": 0.5546875, | |
"learning_rate": 0.00016464366658272477, | |
"loss": 1.9891, | |
"step": 1048 | |
}, | |
{ | |
"epoch": 0.7287885366912723, | |
"grad_norm": 1.328125, | |
"learning_rate": 0.0001645933014354067, | |
"loss": 1.6797, | |
"step": 1049 | |
}, | |
{ | |
"epoch": 0.729483282674772, | |
"grad_norm": 0.859375, | |
"learning_rate": 0.00016454293628808864, | |
"loss": 2.1491, | |
"step": 1050 | |
}, | |
{ | |
"epoch": 0.7301780286582719, | |
"grad_norm": 0.79296875, | |
"learning_rate": 0.00016449257114077058, | |
"loss": 1.6768, | |
"step": 1051 | |
}, | |
{ | |
"epoch": 0.7308727746417716, | |
"grad_norm": 1.21875, | |
"learning_rate": 0.00016444220599345254, | |
"loss": 1.6134, | |
"step": 1052 | |
}, | |
{ | |
"epoch": 0.7315675206252714, | |
"grad_norm": 0.98046875, | |
"learning_rate": 0.00016439184084613448, | |
"loss": 1.4917, | |
"step": 1053 | |
}, | |
{ | |
"epoch": 0.7322622666087711, | |
"grad_norm": 1.0546875, | |
"learning_rate": 0.00016434147569881642, | |
"loss": 1.9061, | |
"step": 1054 | |
}, | |
{ | |
"epoch": 0.732957012592271, | |
"grad_norm": 0.765625, | |
"learning_rate": 0.00016429111055149838, | |
"loss": 2.0705, | |
"step": 1055 | |
}, | |
{ | |
"epoch": 0.7336517585757707, | |
"grad_norm": 0.99609375, | |
"learning_rate": 0.00016424074540418031, | |
"loss": 1.7986, | |
"step": 1056 | |
}, | |
{ | |
"epoch": 0.7343465045592705, | |
"grad_norm": 1.203125, | |
"learning_rate": 0.00016419038025686225, | |
"loss": 1.5941, | |
"step": 1057 | |
}, | |
{ | |
"epoch": 0.7350412505427703, | |
"grad_norm": 1.03125, | |
"learning_rate": 0.0001641400151095442, | |
"loss": 2.0374, | |
"step": 1058 | |
}, | |
{ | |
"epoch": 0.7357359965262701, | |
"grad_norm": 0.8984375, | |
"learning_rate": 0.00016408964996222615, | |
"loss": 1.8155, | |
"step": 1059 | |
}, | |
{ | |
"epoch": 0.7364307425097699, | |
"grad_norm": 1.703125, | |
"learning_rate": 0.0001640392848149081, | |
"loss": 1.7869, | |
"step": 1060 | |
}, | |
{ | |
"epoch": 0.7371254884932696, | |
"grad_norm": 0.9609375, | |
"learning_rate": 0.00016398891966759005, | |
"loss": 2.0092, | |
"step": 1061 | |
}, | |
{ | |
"epoch": 0.7378202344767695, | |
"grad_norm": 0.8125, | |
"learning_rate": 0.000163938554520272, | |
"loss": 1.8563, | |
"step": 1062 | |
}, | |
{ | |
"epoch": 0.7385149804602692, | |
"grad_norm": 3.453125, | |
"learning_rate": 0.00016388818937295392, | |
"loss": 1.93, | |
"step": 1063 | |
}, | |
{ | |
"epoch": 0.739209726443769, | |
"grad_norm": 1.234375, | |
"learning_rate": 0.00016383782422563586, | |
"loss": 1.957, | |
"step": 1064 | |
}, | |
{ | |
"epoch": 0.7399044724272688, | |
"grad_norm": 1.25, | |
"learning_rate": 0.0001637874590783178, | |
"loss": 1.6903, | |
"step": 1065 | |
}, | |
{ | |
"epoch": 0.7405992184107686, | |
"grad_norm": 1.0546875, | |
"learning_rate": 0.00016373709393099976, | |
"loss": 1.7496, | |
"step": 1066 | |
}, | |
{ | |
"epoch": 0.7412939643942683, | |
"grad_norm": 1.15625, | |
"learning_rate": 0.0001636867287836817, | |
"loss": 2.2642, | |
"step": 1067 | |
}, | |
{ | |
"epoch": 0.7419887103777681, | |
"grad_norm": 0.83203125, | |
"learning_rate": 0.00016363636363636366, | |
"loss": 1.8255, | |
"step": 1068 | |
}, | |
{ | |
"epoch": 0.7426834563612679, | |
"grad_norm": 1.0625, | |
"learning_rate": 0.0001635859984890456, | |
"loss": 2.0617, | |
"step": 1069 | |
}, | |
{ | |
"epoch": 0.7433782023447677, | |
"grad_norm": 0.65234375, | |
"learning_rate": 0.00016353563334172753, | |
"loss": 2.2496, | |
"step": 1070 | |
}, | |
{ | |
"epoch": 0.7440729483282674, | |
"grad_norm": 1.5390625, | |
"learning_rate": 0.00016348526819440947, | |
"loss": 2.2341, | |
"step": 1071 | |
}, | |
{ | |
"epoch": 0.7447676943117673, | |
"grad_norm": 0.65234375, | |
"learning_rate": 0.0001634349030470914, | |
"loss": 2.0895, | |
"step": 1072 | |
}, | |
{ | |
"epoch": 0.7454624402952671, | |
"grad_norm": 1.125, | |
"learning_rate": 0.00016338453789977337, | |
"loss": 1.3802, | |
"step": 1073 | |
}, | |
{ | |
"epoch": 0.7461571862787668, | |
"grad_norm": 0.84765625, | |
"learning_rate": 0.0001633341727524553, | |
"loss": 1.874, | |
"step": 1074 | |
}, | |
{ | |
"epoch": 0.7468519322622666, | |
"grad_norm": 1.03125, | |
"learning_rate": 0.00016328380760513727, | |
"loss": 2.2254, | |
"step": 1075 | |
}, | |
{ | |
"epoch": 0.7475466782457664, | |
"grad_norm": 0.9609375, | |
"learning_rate": 0.0001632334424578192, | |
"loss": 1.6083, | |
"step": 1076 | |
}, | |
{ | |
"epoch": 0.7482414242292662, | |
"grad_norm": 1.0625, | |
"learning_rate": 0.00016318307731050114, | |
"loss": 2.1731, | |
"step": 1077 | |
}, | |
{ | |
"epoch": 0.7489361702127659, | |
"grad_norm": 0.71875, | |
"learning_rate": 0.00016313271216318308, | |
"loss": 1.7249, | |
"step": 1078 | |
}, | |
{ | |
"epoch": 0.7496309161962658, | |
"grad_norm": 0.66015625, | |
"learning_rate": 0.000163082347015865, | |
"loss": 1.9163, | |
"step": 1079 | |
}, | |
{ | |
"epoch": 0.7503256621797655, | |
"grad_norm": 0.65625, | |
"learning_rate": 0.00016303198186854698, | |
"loss": 1.8562, | |
"step": 1080 | |
}, | |
{ | |
"epoch": 0.7510204081632653, | |
"grad_norm": 0.6171875, | |
"learning_rate": 0.0001629816167212289, | |
"loss": 1.7651, | |
"step": 1081 | |
}, | |
{ | |
"epoch": 0.751715154146765, | |
"grad_norm": 1.28125, | |
"learning_rate": 0.00016293125157391088, | |
"loss": 2.4086, | |
"step": 1082 | |
}, | |
{ | |
"epoch": 0.7524099001302649, | |
"grad_norm": 0.6015625, | |
"learning_rate": 0.0001628808864265928, | |
"loss": 1.6701, | |
"step": 1083 | |
}, | |
{ | |
"epoch": 0.7531046461137647, | |
"grad_norm": 0.83203125, | |
"learning_rate": 0.00016283052127927475, | |
"loss": 1.7093, | |
"step": 1084 | |
}, | |
{ | |
"epoch": 0.7537993920972644, | |
"grad_norm": 2.1875, | |
"learning_rate": 0.00016278015613195668, | |
"loss": 1.8675, | |
"step": 1085 | |
}, | |
{ | |
"epoch": 0.7544941380807643, | |
"grad_norm": 0.73828125, | |
"learning_rate": 0.00016272979098463862, | |
"loss": 1.8314, | |
"step": 1086 | |
}, | |
{ | |
"epoch": 0.755188884064264, | |
"grad_norm": 0.8046875, | |
"learning_rate": 0.00016267942583732058, | |
"loss": 1.9986, | |
"step": 1087 | |
}, | |
{ | |
"epoch": 0.7558836300477638, | |
"grad_norm": 0.73828125, | |
"learning_rate": 0.00016262906069000252, | |
"loss": 2.0108, | |
"step": 1088 | |
}, | |
{ | |
"epoch": 0.7565783760312635, | |
"grad_norm": 0.68359375, | |
"learning_rate": 0.00016257869554268448, | |
"loss": 1.6761, | |
"step": 1089 | |
}, | |
{ | |
"epoch": 0.7572731220147634, | |
"grad_norm": 0.76953125, | |
"learning_rate": 0.00016252833039536642, | |
"loss": 1.9731, | |
"step": 1090 | |
}, | |
{ | |
"epoch": 0.7579678679982631, | |
"grad_norm": 1.15625, | |
"learning_rate": 0.00016247796524804836, | |
"loss": 1.6051, | |
"step": 1091 | |
}, | |
{ | |
"epoch": 0.7586626139817629, | |
"grad_norm": 0.78515625, | |
"learning_rate": 0.0001624276001007303, | |
"loss": 2.0622, | |
"step": 1092 | |
}, | |
{ | |
"epoch": 0.7593573599652627, | |
"grad_norm": 0.76171875, | |
"learning_rate": 0.00016237723495341223, | |
"loss": 1.8382, | |
"step": 1093 | |
}, | |
{ | |
"epoch": 0.7600521059487625, | |
"grad_norm": 0.8046875, | |
"learning_rate": 0.0001623268698060942, | |
"loss": 2.0037, | |
"step": 1094 | |
}, | |
{ | |
"epoch": 0.7607468519322622, | |
"grad_norm": 0.734375, | |
"learning_rate": 0.00016227650465877613, | |
"loss": 2.0415, | |
"step": 1095 | |
}, | |
{ | |
"epoch": 0.761441597915762, | |
"grad_norm": 0.8515625, | |
"learning_rate": 0.0001622261395114581, | |
"loss": 2.0115, | |
"step": 1096 | |
}, | |
{ | |
"epoch": 0.7621363438992619, | |
"grad_norm": 0.77734375, | |
"learning_rate": 0.00016217577436414003, | |
"loss": 2.1002, | |
"step": 1097 | |
}, | |
{ | |
"epoch": 0.7628310898827616, | |
"grad_norm": 0.9140625, | |
"learning_rate": 0.00016212540921682196, | |
"loss": 1.3513, | |
"step": 1098 | |
}, | |
{ | |
"epoch": 0.7635258358662614, | |
"grad_norm": 0.65625, | |
"learning_rate": 0.0001620750440695039, | |
"loss": 1.8398, | |
"step": 1099 | |
}, | |
{ | |
"epoch": 0.7642205818497612, | |
"grad_norm": 1.3828125, | |
"learning_rate": 0.00016202467892218584, | |
"loss": 2.1121, | |
"step": 1100 | |
}, | |
{ | |
"epoch": 0.764915327833261, | |
"grad_norm": 0.90234375, | |
"learning_rate": 0.0001619743137748678, | |
"loss": 1.3797, | |
"step": 1101 | |
}, | |
{ | |
"epoch": 0.7656100738167607, | |
"grad_norm": 1.046875, | |
"learning_rate": 0.00016192394862754974, | |
"loss": 2.0456, | |
"step": 1102 | |
}, | |
{ | |
"epoch": 0.7663048198002606, | |
"grad_norm": 0.6875, | |
"learning_rate": 0.0001618735834802317, | |
"loss": 1.7388, | |
"step": 1103 | |
}, | |
{ | |
"epoch": 0.7669995657837603, | |
"grad_norm": 0.9609375, | |
"learning_rate": 0.00016182321833291364, | |
"loss": 2.0159, | |
"step": 1104 | |
}, | |
{ | |
"epoch": 0.7676943117672601, | |
"grad_norm": 0.921875, | |
"learning_rate": 0.00016177285318559557, | |
"loss": 1.6245, | |
"step": 1105 | |
}, | |
{ | |
"epoch": 0.7683890577507598, | |
"grad_norm": 1.15625, | |
"learning_rate": 0.0001617224880382775, | |
"loss": 1.9893, | |
"step": 1106 | |
}, | |
{ | |
"epoch": 0.7690838037342597, | |
"grad_norm": 1.015625, | |
"learning_rate": 0.00016167212289095945, | |
"loss": 1.8131, | |
"step": 1107 | |
}, | |
{ | |
"epoch": 0.7697785497177595, | |
"grad_norm": 0.89453125, | |
"learning_rate": 0.0001616217577436414, | |
"loss": 2.1454, | |
"step": 1108 | |
}, | |
{ | |
"epoch": 0.7704732957012592, | |
"grad_norm": 0.93359375, | |
"learning_rate": 0.00016157139259632335, | |
"loss": 1.9464, | |
"step": 1109 | |
}, | |
{ | |
"epoch": 0.771168041684759, | |
"grad_norm": 0.68359375, | |
"learning_rate": 0.0001615210274490053, | |
"loss": 1.5576, | |
"step": 1110 | |
}, | |
{ | |
"epoch": 0.7718627876682588, | |
"grad_norm": 0.8203125, | |
"learning_rate": 0.00016147066230168725, | |
"loss": 1.524, | |
"step": 1111 | |
}, | |
{ | |
"epoch": 0.7725575336517586, | |
"grad_norm": 0.6484375, | |
"learning_rate": 0.00016142029715436918, | |
"loss": 1.597, | |
"step": 1112 | |
}, | |
{ | |
"epoch": 0.7732522796352583, | |
"grad_norm": 1.1953125, | |
"learning_rate": 0.00016136993200705112, | |
"loss": 1.8816, | |
"step": 1113 | |
}, | |
{ | |
"epoch": 0.7739470256187582, | |
"grad_norm": 0.68359375, | |
"learning_rate": 0.00016131956685973305, | |
"loss": 2.019, | |
"step": 1114 | |
}, | |
{ | |
"epoch": 0.7746417716022579, | |
"grad_norm": 0.7890625, | |
"learning_rate": 0.00016126920171241502, | |
"loss": 1.7259, | |
"step": 1115 | |
}, | |
{ | |
"epoch": 0.7753365175857577, | |
"grad_norm": 0.90625, | |
"learning_rate": 0.00016121883656509695, | |
"loss": 1.7952, | |
"step": 1116 | |
}, | |
{ | |
"epoch": 0.7760312635692574, | |
"grad_norm": 2.0, | |
"learning_rate": 0.00016116847141777892, | |
"loss": 1.8621, | |
"step": 1117 | |
}, | |
{ | |
"epoch": 0.7767260095527573, | |
"grad_norm": 1.015625, | |
"learning_rate": 0.00016111810627046085, | |
"loss": 2.1855, | |
"step": 1118 | |
}, | |
{ | |
"epoch": 0.777420755536257, | |
"grad_norm": 1.0234375, | |
"learning_rate": 0.0001610677411231428, | |
"loss": 1.9703, | |
"step": 1119 | |
}, | |
{ | |
"epoch": 0.7781155015197568, | |
"grad_norm": 0.94921875, | |
"learning_rate": 0.00016101737597582473, | |
"loss": 2.0817, | |
"step": 1120 | |
}, | |
{ | |
"epoch": 0.7788102475032567, | |
"grad_norm": 1.2421875, | |
"learning_rate": 0.00016096701082850666, | |
"loss": 2.18, | |
"step": 1121 | |
}, | |
{ | |
"epoch": 0.7795049934867564, | |
"grad_norm": 1.03125, | |
"learning_rate": 0.00016091664568118863, | |
"loss": 1.8113, | |
"step": 1122 | |
}, | |
{ | |
"epoch": 0.7801997394702562, | |
"grad_norm": 0.94140625, | |
"learning_rate": 0.00016086628053387056, | |
"loss": 1.9625, | |
"step": 1123 | |
}, | |
{ | |
"epoch": 0.780894485453756, | |
"grad_norm": 0.8203125, | |
"learning_rate": 0.00016081591538655253, | |
"loss": 1.6948, | |
"step": 1124 | |
}, | |
{ | |
"epoch": 0.7815892314372558, | |
"grad_norm": 0.99609375, | |
"learning_rate": 0.00016076555023923446, | |
"loss": 2.1205, | |
"step": 1125 | |
}, | |
{ | |
"epoch": 0.7822839774207555, | |
"grad_norm": 1.1015625, | |
"learning_rate": 0.0001607151850919164, | |
"loss": 1.8704, | |
"step": 1126 | |
}, | |
{ | |
"epoch": 0.7829787234042553, | |
"grad_norm": 0.87109375, | |
"learning_rate": 0.00016066481994459833, | |
"loss": 1.6526, | |
"step": 1127 | |
}, | |
{ | |
"epoch": 0.7836734693877551, | |
"grad_norm": 0.69140625, | |
"learning_rate": 0.0001606144547972803, | |
"loss": 2.0315, | |
"step": 1128 | |
}, | |
{ | |
"epoch": 0.7843682153712549, | |
"grad_norm": 0.9296875, | |
"learning_rate": 0.00016056408964996223, | |
"loss": 2.0635, | |
"step": 1129 | |
}, | |
{ | |
"epoch": 0.7850629613547546, | |
"grad_norm": 0.859375, | |
"learning_rate": 0.00016051372450264417, | |
"loss": 1.9912, | |
"step": 1130 | |
} | |
], | |
"logging_steps": 1, | |
"max_steps": 4317, | |
"num_input_tokens_seen": 0, | |
"num_train_epochs": 3, | |
"save_steps": 10, | |
"total_flos": 7.924900854625124e+18, | |
"train_batch_size": 1, | |
"trial_name": null, | |
"trial_params": null | |
} | |