|
{ |
|
"best_metric": 2.559772253036499, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-200", |
|
"epoch": 0.23802439750074383, |
|
"eval_steps": 50, |
|
"global_step": 200, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.0011901219875037191, |
|
"grad_norm": 0.7740907073020935, |
|
"learning_rate": 1.0100000000000002e-05, |
|
"loss": 2.8831, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0011901219875037191, |
|
"eval_loss": 3.97263765335083, |
|
"eval_runtime": 212.008, |
|
"eval_samples_per_second": 6.674, |
|
"eval_steps_per_second": 1.67, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.0023802439750074383, |
|
"grad_norm": 0.8903758525848389, |
|
"learning_rate": 2.0200000000000003e-05, |
|
"loss": 2.9817, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.0035703659625111574, |
|
"grad_norm": 1.0427348613739014, |
|
"learning_rate": 3.0299999999999998e-05, |
|
"loss": 3.1211, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.0047604879500148765, |
|
"grad_norm": 1.058449387550354, |
|
"learning_rate": 4.0400000000000006e-05, |
|
"loss": 3.1337, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.005950609937518596, |
|
"grad_norm": 1.0233001708984375, |
|
"learning_rate": 5.05e-05, |
|
"loss": 3.1145, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.007140731925022315, |
|
"grad_norm": 1.0409022569656372, |
|
"learning_rate": 6.0599999999999996e-05, |
|
"loss": 3.0859, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.008330853912526033, |
|
"grad_norm": 1.2061635255813599, |
|
"learning_rate": 7.07e-05, |
|
"loss": 3.042, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.009520975900029753, |
|
"grad_norm": 1.5559790134429932, |
|
"learning_rate": 8.080000000000001e-05, |
|
"loss": 3.0282, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.010711097887533471, |
|
"grad_norm": 1.5155705213546753, |
|
"learning_rate": 9.09e-05, |
|
"loss": 2.9623, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.011901219875037191, |
|
"grad_norm": 1.4185665845870972, |
|
"learning_rate": 0.000101, |
|
"loss": 2.8997, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.01309134186254091, |
|
"grad_norm": 1.2759884595870972, |
|
"learning_rate": 0.00010046842105263158, |
|
"loss": 3.0715, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.01428146385004463, |
|
"grad_norm": 1.3131742477416992, |
|
"learning_rate": 9.993684210526315e-05, |
|
"loss": 3.1495, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.015471585837548348, |
|
"grad_norm": 1.293761968612671, |
|
"learning_rate": 9.940526315789473e-05, |
|
"loss": 2.9229, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.016661707825052066, |
|
"grad_norm": 1.2810025215148926, |
|
"learning_rate": 9.887368421052632e-05, |
|
"loss": 3.0321, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.017851829812555786, |
|
"grad_norm": 1.2333396673202515, |
|
"learning_rate": 9.83421052631579e-05, |
|
"loss": 2.9467, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.019041951800059506, |
|
"grad_norm": 1.2278225421905518, |
|
"learning_rate": 9.781052631578948e-05, |
|
"loss": 2.9293, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.020232073787563226, |
|
"grad_norm": 1.2646679878234863, |
|
"learning_rate": 9.727894736842106e-05, |
|
"loss": 2.9282, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.021422195775066943, |
|
"grad_norm": 1.4180850982666016, |
|
"learning_rate": 9.674736842105263e-05, |
|
"loss": 2.9221, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.022612317762570663, |
|
"grad_norm": 1.4099845886230469, |
|
"learning_rate": 9.621578947368421e-05, |
|
"loss": 2.9182, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.023802439750074383, |
|
"grad_norm": 1.5067027807235718, |
|
"learning_rate": 9.568421052631578e-05, |
|
"loss": 2.8319, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.024992561737578103, |
|
"grad_norm": 1.4886541366577148, |
|
"learning_rate": 9.515263157894737e-05, |
|
"loss": 2.992, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.02618268372508182, |
|
"grad_norm": 1.6468743085861206, |
|
"learning_rate": 9.462105263157895e-05, |
|
"loss": 3.0023, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.02737280571258554, |
|
"grad_norm": 1.5920535326004028, |
|
"learning_rate": 9.408947368421054e-05, |
|
"loss": 2.7943, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.02856292770008926, |
|
"grad_norm": 1.651477336883545, |
|
"learning_rate": 9.355789473684211e-05, |
|
"loss": 3.0897, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.02975304968759298, |
|
"grad_norm": 1.6968199014663696, |
|
"learning_rate": 9.302631578947369e-05, |
|
"loss": 2.9393, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.030943171675096696, |
|
"grad_norm": 1.7623414993286133, |
|
"learning_rate": 9.249473684210526e-05, |
|
"loss": 3.0614, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.03213329366260042, |
|
"grad_norm": 1.6790002584457397, |
|
"learning_rate": 9.196315789473685e-05, |
|
"loss": 2.8908, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.03332341565010413, |
|
"grad_norm": 1.7653381824493408, |
|
"learning_rate": 9.143157894736843e-05, |
|
"loss": 2.9874, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.03451353763760785, |
|
"grad_norm": 1.9056634902954102, |
|
"learning_rate": 9.09e-05, |
|
"loss": 2.9435, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.03570365962511157, |
|
"grad_norm": 1.9138984680175781, |
|
"learning_rate": 9.036842105263158e-05, |
|
"loss": 2.9898, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.03689378161261529, |
|
"grad_norm": 2.077247142791748, |
|
"learning_rate": 8.983684210526316e-05, |
|
"loss": 3.0406, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.03808390360011901, |
|
"grad_norm": 2.173475980758667, |
|
"learning_rate": 8.930526315789474e-05, |
|
"loss": 3.1873, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.03927402558762273, |
|
"grad_norm": 2.2418313026428223, |
|
"learning_rate": 8.877368421052632e-05, |
|
"loss": 3.0145, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.04046414757512645, |
|
"grad_norm": 2.7017998695373535, |
|
"learning_rate": 8.82421052631579e-05, |
|
"loss": 3.0392, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.04165426956263017, |
|
"grad_norm": 2.644977569580078, |
|
"learning_rate": 8.771052631578948e-05, |
|
"loss": 3.065, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.042844391550133885, |
|
"grad_norm": 2.72674822807312, |
|
"learning_rate": 8.717894736842105e-05, |
|
"loss": 3.0143, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.044034513537637605, |
|
"grad_norm": 2.9149155616760254, |
|
"learning_rate": 8.664736842105263e-05, |
|
"loss": 3.0145, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.045224635525141325, |
|
"grad_norm": 2.7656924724578857, |
|
"learning_rate": 8.61157894736842e-05, |
|
"loss": 2.678, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 0.046414757512645045, |
|
"grad_norm": 3.271090507507324, |
|
"learning_rate": 8.55842105263158e-05, |
|
"loss": 2.7121, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 0.047604879500148765, |
|
"grad_norm": 3.187629461288452, |
|
"learning_rate": 8.505263157894737e-05, |
|
"loss": 2.7628, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 0.048795001487652485, |
|
"grad_norm": 3.188955783843994, |
|
"learning_rate": 8.452105263157896e-05, |
|
"loss": 2.3208, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 0.049985123475156205, |
|
"grad_norm": 2.877542734146118, |
|
"learning_rate": 8.398947368421053e-05, |
|
"loss": 2.3759, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 0.051175245462659925, |
|
"grad_norm": 3.292560338973999, |
|
"learning_rate": 8.345789473684211e-05, |
|
"loss": 2.3581, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 0.05236536745016364, |
|
"grad_norm": 3.187638282775879, |
|
"learning_rate": 8.292631578947368e-05, |
|
"loss": 2.5877, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 0.05355548943766736, |
|
"grad_norm": 3.735719680786133, |
|
"learning_rate": 8.239473684210526e-05, |
|
"loss": 2.6599, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 0.05474561142517108, |
|
"grad_norm": 4.3021697998046875, |
|
"learning_rate": 8.186315789473683e-05, |
|
"loss": 2.5517, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 0.0559357334126748, |
|
"grad_norm": 3.9108691215515137, |
|
"learning_rate": 8.133157894736842e-05, |
|
"loss": 2.8813, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 0.05712585540017852, |
|
"grad_norm": 3.6961636543273926, |
|
"learning_rate": 8.080000000000001e-05, |
|
"loss": 2.5407, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 0.05831597738768224, |
|
"grad_norm": 3.648516893386841, |
|
"learning_rate": 8.026842105263159e-05, |
|
"loss": 2.2375, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 0.05950609937518596, |
|
"grad_norm": 5.250331878662109, |
|
"learning_rate": 7.973684210526316e-05, |
|
"loss": 2.8475, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.05950609937518596, |
|
"eval_loss": 3.4745068550109863, |
|
"eval_runtime": 160.7821, |
|
"eval_samples_per_second": 8.801, |
|
"eval_steps_per_second": 2.202, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.06069622136268968, |
|
"grad_norm": 5.582263469696045, |
|
"learning_rate": 7.920526315789474e-05, |
|
"loss": 3.3864, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 0.06188634335019339, |
|
"grad_norm": 3.670656442642212, |
|
"learning_rate": 7.867368421052631e-05, |
|
"loss": 3.1908, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 0.06307646533769712, |
|
"grad_norm": 2.174717426300049, |
|
"learning_rate": 7.814210526315789e-05, |
|
"loss": 3.1241, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 0.06426658732520084, |
|
"grad_norm": 1.5080410242080688, |
|
"learning_rate": 7.761052631578946e-05, |
|
"loss": 3.0446, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 0.06545670931270456, |
|
"grad_norm": 1.178946614265442, |
|
"learning_rate": 7.707894736842105e-05, |
|
"loss": 2.8995, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 0.06664683130020826, |
|
"grad_norm": 1.1536166667938232, |
|
"learning_rate": 7.654736842105264e-05, |
|
"loss": 2.8091, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 0.06783695328771198, |
|
"grad_norm": 1.0446966886520386, |
|
"learning_rate": 7.601578947368422e-05, |
|
"loss": 2.8239, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 0.0690270752752157, |
|
"grad_norm": 0.9518328905105591, |
|
"learning_rate": 7.548421052631579e-05, |
|
"loss": 2.7598, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 0.07021719726271942, |
|
"grad_norm": 1.0942273139953613, |
|
"learning_rate": 7.495263157894737e-05, |
|
"loss": 2.7408, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 0.07140731925022314, |
|
"grad_norm": 1.1928379535675049, |
|
"learning_rate": 7.442105263157894e-05, |
|
"loss": 2.7216, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 0.07259744123772686, |
|
"grad_norm": 1.1087335348129272, |
|
"learning_rate": 7.388947368421053e-05, |
|
"loss": 2.8022, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 0.07378756322523058, |
|
"grad_norm": 1.1715253591537476, |
|
"learning_rate": 7.335789473684211e-05, |
|
"loss": 2.8765, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 0.0749776852127343, |
|
"grad_norm": 1.0879360437393188, |
|
"learning_rate": 7.282631578947368e-05, |
|
"loss": 2.5474, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 0.07616780720023802, |
|
"grad_norm": 1.0960795879364014, |
|
"learning_rate": 7.229473684210527e-05, |
|
"loss": 2.7884, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 0.07735792918774174, |
|
"grad_norm": 1.1867204904556274, |
|
"learning_rate": 7.176315789473685e-05, |
|
"loss": 2.8801, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 0.07854805117524546, |
|
"grad_norm": 1.1470366716384888, |
|
"learning_rate": 7.123157894736842e-05, |
|
"loss": 2.8809, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 0.07973817316274918, |
|
"grad_norm": 1.210035800933838, |
|
"learning_rate": 7.07e-05, |
|
"loss": 2.7099, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 0.0809282951502529, |
|
"grad_norm": 1.1540971994400024, |
|
"learning_rate": 7.016842105263159e-05, |
|
"loss": 2.7081, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 0.08211841713775662, |
|
"grad_norm": 1.1911342144012451, |
|
"learning_rate": 6.963684210526316e-05, |
|
"loss": 2.676, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 0.08330853912526034, |
|
"grad_norm": 1.2271251678466797, |
|
"learning_rate": 6.910526315789474e-05, |
|
"loss": 2.7204, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 0.08449866111276406, |
|
"grad_norm": 1.435076117515564, |
|
"learning_rate": 6.857368421052631e-05, |
|
"loss": 2.7106, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 0.08568878310026777, |
|
"grad_norm": 1.3325750827789307, |
|
"learning_rate": 6.80421052631579e-05, |
|
"loss": 2.7424, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 0.08687890508777149, |
|
"grad_norm": 1.4230831861495972, |
|
"learning_rate": 6.751052631578948e-05, |
|
"loss": 2.952, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 0.08806902707527521, |
|
"grad_norm": 1.4959286451339722, |
|
"learning_rate": 6.697894736842105e-05, |
|
"loss": 2.8185, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 0.08925914906277893, |
|
"grad_norm": 1.5184545516967773, |
|
"learning_rate": 6.644736842105264e-05, |
|
"loss": 2.7411, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.09044927105028265, |
|
"grad_norm": 1.5939208269119263, |
|
"learning_rate": 6.591578947368422e-05, |
|
"loss": 2.8724, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 0.09163939303778637, |
|
"grad_norm": 1.5517206192016602, |
|
"learning_rate": 6.538421052631579e-05, |
|
"loss": 2.9149, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 0.09282951502529009, |
|
"grad_norm": 1.5986747741699219, |
|
"learning_rate": 6.485263157894737e-05, |
|
"loss": 2.7335, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 0.09401963701279381, |
|
"grad_norm": 1.9907118082046509, |
|
"learning_rate": 6.432105263157894e-05, |
|
"loss": 3.0217, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 0.09520975900029753, |
|
"grad_norm": 2.0418686866760254, |
|
"learning_rate": 6.378947368421053e-05, |
|
"loss": 2.8225, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 0.09639988098780125, |
|
"grad_norm": 2.0640804767608643, |
|
"learning_rate": 6.32578947368421e-05, |
|
"loss": 3.1019, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 0.09759000297530497, |
|
"grad_norm": 2.187643527984619, |
|
"learning_rate": 6.27263157894737e-05, |
|
"loss": 2.915, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 0.09878012496280869, |
|
"grad_norm": 2.249582052230835, |
|
"learning_rate": 6.219473684210527e-05, |
|
"loss": 3.0026, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 0.09997024695031241, |
|
"grad_norm": 2.528813362121582, |
|
"learning_rate": 6.166315789473685e-05, |
|
"loss": 2.777, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 0.10116036893781613, |
|
"grad_norm": 2.0678341388702393, |
|
"learning_rate": 6.113157894736842e-05, |
|
"loss": 2.7042, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 0.10235049092531985, |
|
"grad_norm": 2.3720791339874268, |
|
"learning_rate": 6.0599999999999996e-05, |
|
"loss": 2.7912, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 0.10354061291282357, |
|
"grad_norm": 2.4685397148132324, |
|
"learning_rate": 6.006842105263158e-05, |
|
"loss": 2.8001, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 0.10473073490032728, |
|
"grad_norm": 2.406266927719116, |
|
"learning_rate": 5.953684210526315e-05, |
|
"loss": 2.558, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 0.105920856887831, |
|
"grad_norm": 2.5169339179992676, |
|
"learning_rate": 5.900526315789474e-05, |
|
"loss": 2.2928, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 0.10711097887533472, |
|
"grad_norm": 2.6541452407836914, |
|
"learning_rate": 5.847368421052632e-05, |
|
"loss": 2.7278, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 0.10830110086283844, |
|
"grad_norm": 2.8647027015686035, |
|
"learning_rate": 5.79421052631579e-05, |
|
"loss": 2.635, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 0.10949122285034216, |
|
"grad_norm": 3.1823761463165283, |
|
"learning_rate": 5.7410526315789475e-05, |
|
"loss": 2.2292, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 0.11068134483784588, |
|
"grad_norm": 3.207031726837158, |
|
"learning_rate": 5.687894736842105e-05, |
|
"loss": 2.7533, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 0.1118714668253496, |
|
"grad_norm": 3.163825273513794, |
|
"learning_rate": 5.6347368421052625e-05, |
|
"loss": 2.5126, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 0.11306158881285332, |
|
"grad_norm": 3.2235989570617676, |
|
"learning_rate": 5.5815789473684214e-05, |
|
"loss": 2.5196, |
|
"step": 95 |
|
}, |
|
{ |
|
"epoch": 0.11425171080035704, |
|
"grad_norm": 4.001104831695557, |
|
"learning_rate": 5.5284210526315796e-05, |
|
"loss": 3.0249, |
|
"step": 96 |
|
}, |
|
{ |
|
"epoch": 0.11544183278786076, |
|
"grad_norm": 3.1947779655456543, |
|
"learning_rate": 5.475263157894737e-05, |
|
"loss": 2.0786, |
|
"step": 97 |
|
}, |
|
{ |
|
"epoch": 0.11663195477536448, |
|
"grad_norm": 3.7150704860687256, |
|
"learning_rate": 5.422105263157895e-05, |
|
"loss": 2.1846, |
|
"step": 98 |
|
}, |
|
{ |
|
"epoch": 0.1178220767628682, |
|
"grad_norm": 3.942005157470703, |
|
"learning_rate": 5.368947368421053e-05, |
|
"loss": 2.2755, |
|
"step": 99 |
|
}, |
|
{ |
|
"epoch": 0.11901219875037192, |
|
"grad_norm": 8.126349449157715, |
|
"learning_rate": 5.3157894736842104e-05, |
|
"loss": 2.4846, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.11901219875037192, |
|
"eval_loss": 3.5016069412231445, |
|
"eval_runtime": 160.8549, |
|
"eval_samples_per_second": 8.797, |
|
"eval_steps_per_second": 2.201, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.12020232073787564, |
|
"grad_norm": 8.165000915527344, |
|
"learning_rate": 5.262631578947368e-05, |
|
"loss": 3.5609, |
|
"step": 101 |
|
}, |
|
{ |
|
"epoch": 0.12139244272537936, |
|
"grad_norm": 6.8532938957214355, |
|
"learning_rate": 5.209473684210527e-05, |
|
"loss": 3.6081, |
|
"step": 102 |
|
}, |
|
{ |
|
"epoch": 0.12258256471288308, |
|
"grad_norm": 4.252460479736328, |
|
"learning_rate": 5.1563157894736844e-05, |
|
"loss": 3.2864, |
|
"step": 103 |
|
}, |
|
{ |
|
"epoch": 0.12377268670038678, |
|
"grad_norm": 2.2745885848999023, |
|
"learning_rate": 5.1031578947368426e-05, |
|
"loss": 3.0608, |
|
"step": 104 |
|
}, |
|
{ |
|
"epoch": 0.1249628086878905, |
|
"grad_norm": 1.3418879508972168, |
|
"learning_rate": 5.05e-05, |
|
"loss": 2.8344, |
|
"step": 105 |
|
}, |
|
{ |
|
"epoch": 0.12615293067539424, |
|
"grad_norm": 1.0786305665969849, |
|
"learning_rate": 4.9968421052631576e-05, |
|
"loss": 2.8097, |
|
"step": 106 |
|
}, |
|
{ |
|
"epoch": 0.12734305266289794, |
|
"grad_norm": 1.0196248292922974, |
|
"learning_rate": 4.943684210526316e-05, |
|
"loss": 2.7265, |
|
"step": 107 |
|
}, |
|
{ |
|
"epoch": 0.12853317465040168, |
|
"grad_norm": 0.9965652823448181, |
|
"learning_rate": 4.890526315789474e-05, |
|
"loss": 2.785, |
|
"step": 108 |
|
}, |
|
{ |
|
"epoch": 0.12972329663790538, |
|
"grad_norm": 0.9790583252906799, |
|
"learning_rate": 4.8373684210526316e-05, |
|
"loss": 2.704, |
|
"step": 109 |
|
}, |
|
{ |
|
"epoch": 0.13091341862540912, |
|
"grad_norm": 1.0119240283966064, |
|
"learning_rate": 4.784210526315789e-05, |
|
"loss": 2.7151, |
|
"step": 110 |
|
}, |
|
{ |
|
"epoch": 0.13210354061291282, |
|
"grad_norm": 0.9607682228088379, |
|
"learning_rate": 4.731052631578947e-05, |
|
"loss": 2.6745, |
|
"step": 111 |
|
}, |
|
{ |
|
"epoch": 0.13329366260041653, |
|
"grad_norm": 1.0079097747802734, |
|
"learning_rate": 4.6778947368421055e-05, |
|
"loss": 2.6822, |
|
"step": 112 |
|
}, |
|
{ |
|
"epoch": 0.13448378458792026, |
|
"grad_norm": 1.1215417385101318, |
|
"learning_rate": 4.624736842105263e-05, |
|
"loss": 2.6709, |
|
"step": 113 |
|
}, |
|
{ |
|
"epoch": 0.13567390657542397, |
|
"grad_norm": 1.1396487951278687, |
|
"learning_rate": 4.571578947368421e-05, |
|
"loss": 2.7219, |
|
"step": 114 |
|
}, |
|
{ |
|
"epoch": 0.1368640285629277, |
|
"grad_norm": 1.1168203353881836, |
|
"learning_rate": 4.518421052631579e-05, |
|
"loss": 2.6713, |
|
"step": 115 |
|
}, |
|
{ |
|
"epoch": 0.1380541505504314, |
|
"grad_norm": 1.1319602727890015, |
|
"learning_rate": 4.465263157894737e-05, |
|
"loss": 2.7036, |
|
"step": 116 |
|
}, |
|
{ |
|
"epoch": 0.13924427253793514, |
|
"grad_norm": 1.2012885808944702, |
|
"learning_rate": 4.412105263157895e-05, |
|
"loss": 2.7752, |
|
"step": 117 |
|
}, |
|
{ |
|
"epoch": 0.14043439452543885, |
|
"grad_norm": 1.2033405303955078, |
|
"learning_rate": 4.358947368421053e-05, |
|
"loss": 2.7599, |
|
"step": 118 |
|
}, |
|
{ |
|
"epoch": 0.14162451651294258, |
|
"grad_norm": 1.1886316537857056, |
|
"learning_rate": 4.30578947368421e-05, |
|
"loss": 2.6826, |
|
"step": 119 |
|
}, |
|
{ |
|
"epoch": 0.1428146385004463, |
|
"grad_norm": 1.200430154800415, |
|
"learning_rate": 4.2526315789473685e-05, |
|
"loss": 2.7017, |
|
"step": 120 |
|
}, |
|
{ |
|
"epoch": 0.14400476048795002, |
|
"grad_norm": 1.2769813537597656, |
|
"learning_rate": 4.199473684210527e-05, |
|
"loss": 2.7329, |
|
"step": 121 |
|
}, |
|
{ |
|
"epoch": 0.14519488247545373, |
|
"grad_norm": 1.3486050367355347, |
|
"learning_rate": 4.146315789473684e-05, |
|
"loss": 2.5735, |
|
"step": 122 |
|
}, |
|
{ |
|
"epoch": 0.14638500446295746, |
|
"grad_norm": 1.413003921508789, |
|
"learning_rate": 4.093157894736842e-05, |
|
"loss": 2.7845, |
|
"step": 123 |
|
}, |
|
{ |
|
"epoch": 0.14757512645046117, |
|
"grad_norm": 1.3913912773132324, |
|
"learning_rate": 4.0400000000000006e-05, |
|
"loss": 2.6509, |
|
"step": 124 |
|
}, |
|
{ |
|
"epoch": 0.1487652484379649, |
|
"grad_norm": 1.4366058111190796, |
|
"learning_rate": 3.986842105263158e-05, |
|
"loss": 2.6653, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.1499553704254686, |
|
"grad_norm": 1.4925942420959473, |
|
"learning_rate": 3.933684210526316e-05, |
|
"loss": 2.6635, |
|
"step": 126 |
|
}, |
|
{ |
|
"epoch": 0.15114549241297234, |
|
"grad_norm": 1.6500319242477417, |
|
"learning_rate": 3.880526315789473e-05, |
|
"loss": 2.674, |
|
"step": 127 |
|
}, |
|
{ |
|
"epoch": 0.15233561440047605, |
|
"grad_norm": 1.5842981338500977, |
|
"learning_rate": 3.827368421052632e-05, |
|
"loss": 2.7202, |
|
"step": 128 |
|
}, |
|
{ |
|
"epoch": 0.15352573638797976, |
|
"grad_norm": 1.6864389181137085, |
|
"learning_rate": 3.7742105263157896e-05, |
|
"loss": 2.8063, |
|
"step": 129 |
|
}, |
|
{ |
|
"epoch": 0.1547158583754835, |
|
"grad_norm": 1.876000165939331, |
|
"learning_rate": 3.721052631578947e-05, |
|
"loss": 2.644, |
|
"step": 130 |
|
}, |
|
{ |
|
"epoch": 0.1559059803629872, |
|
"grad_norm": 1.9258829355239868, |
|
"learning_rate": 3.6678947368421054e-05, |
|
"loss": 2.8121, |
|
"step": 131 |
|
}, |
|
{ |
|
"epoch": 0.15709610235049093, |
|
"grad_norm": 1.9644100666046143, |
|
"learning_rate": 3.6147368421052636e-05, |
|
"loss": 2.811, |
|
"step": 132 |
|
}, |
|
{ |
|
"epoch": 0.15828622433799464, |
|
"grad_norm": 2.027679681777954, |
|
"learning_rate": 3.561578947368421e-05, |
|
"loss": 2.6938, |
|
"step": 133 |
|
}, |
|
{ |
|
"epoch": 0.15947634632549837, |
|
"grad_norm": 2.0536186695098877, |
|
"learning_rate": 3.508421052631579e-05, |
|
"loss": 2.5575, |
|
"step": 134 |
|
}, |
|
{ |
|
"epoch": 0.16066646831300208, |
|
"grad_norm": 2.3553948402404785, |
|
"learning_rate": 3.455263157894737e-05, |
|
"loss": 2.8297, |
|
"step": 135 |
|
}, |
|
{ |
|
"epoch": 0.1618565903005058, |
|
"grad_norm": 2.237311601638794, |
|
"learning_rate": 3.402105263157895e-05, |
|
"loss": 2.6245, |
|
"step": 136 |
|
}, |
|
{ |
|
"epoch": 0.16304671228800952, |
|
"grad_norm": 2.386514663696289, |
|
"learning_rate": 3.3489473684210526e-05, |
|
"loss": 2.5633, |
|
"step": 137 |
|
}, |
|
{ |
|
"epoch": 0.16423683427551325, |
|
"grad_norm": 2.3817429542541504, |
|
"learning_rate": 3.295789473684211e-05, |
|
"loss": 2.3802, |
|
"step": 138 |
|
}, |
|
{ |
|
"epoch": 0.16542695626301696, |
|
"grad_norm": 2.4430129528045654, |
|
"learning_rate": 3.242631578947368e-05, |
|
"loss": 2.6482, |
|
"step": 139 |
|
}, |
|
{ |
|
"epoch": 0.1666170782505207, |
|
"grad_norm": 2.4865427017211914, |
|
"learning_rate": 3.1894736842105265e-05, |
|
"loss": 1.8909, |
|
"step": 140 |
|
}, |
|
{ |
|
"epoch": 0.1678072002380244, |
|
"grad_norm": 3.3364109992980957, |
|
"learning_rate": 3.136315789473685e-05, |
|
"loss": 2.3072, |
|
"step": 141 |
|
}, |
|
{ |
|
"epoch": 0.16899732222552813, |
|
"grad_norm": 3.3114304542541504, |
|
"learning_rate": 3.083157894736842e-05, |
|
"loss": 2.7235, |
|
"step": 142 |
|
}, |
|
{ |
|
"epoch": 0.17018744421303184, |
|
"grad_norm": 3.0344221591949463, |
|
"learning_rate": 3.0299999999999998e-05, |
|
"loss": 2.2445, |
|
"step": 143 |
|
}, |
|
{ |
|
"epoch": 0.17137756620053554, |
|
"grad_norm": 2.9184038639068604, |
|
"learning_rate": 2.9768421052631577e-05, |
|
"loss": 2.1895, |
|
"step": 144 |
|
}, |
|
{ |
|
"epoch": 0.17256768818803928, |
|
"grad_norm": 3.6383919715881348, |
|
"learning_rate": 2.923684210526316e-05, |
|
"loss": 2.4532, |
|
"step": 145 |
|
}, |
|
{ |
|
"epoch": 0.17375781017554298, |
|
"grad_norm": 3.0095598697662354, |
|
"learning_rate": 2.8705263157894737e-05, |
|
"loss": 2.0861, |
|
"step": 146 |
|
}, |
|
{ |
|
"epoch": 0.17494793216304672, |
|
"grad_norm": 3.4419445991516113, |
|
"learning_rate": 2.8173684210526313e-05, |
|
"loss": 2.3893, |
|
"step": 147 |
|
}, |
|
{ |
|
"epoch": 0.17613805415055042, |
|
"grad_norm": 4.293227195739746, |
|
"learning_rate": 2.7642105263157898e-05, |
|
"loss": 2.0973, |
|
"step": 148 |
|
}, |
|
{ |
|
"epoch": 0.17732817613805416, |
|
"grad_norm": 4.744500637054443, |
|
"learning_rate": 2.7110526315789473e-05, |
|
"loss": 2.3532, |
|
"step": 149 |
|
}, |
|
{ |
|
"epoch": 0.17851829812555786, |
|
"grad_norm": 6.184078216552734, |
|
"learning_rate": 2.6578947368421052e-05, |
|
"loss": 2.7711, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.17851829812555786, |
|
"eval_loss": 2.812873125076294, |
|
"eval_runtime": 160.8573, |
|
"eval_samples_per_second": 8.797, |
|
"eval_steps_per_second": 2.201, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1797084201130616, |
|
"grad_norm": 2.1665163040161133, |
|
"learning_rate": 2.6047368421052634e-05, |
|
"loss": 2.9363, |
|
"step": 151 |
|
}, |
|
{ |
|
"epoch": 0.1808985421005653, |
|
"grad_norm": 2.231947422027588, |
|
"learning_rate": 2.5515789473684213e-05, |
|
"loss": 2.8189, |
|
"step": 152 |
|
}, |
|
{ |
|
"epoch": 0.18208866408806904, |
|
"grad_norm": 1.9498807191848755, |
|
"learning_rate": 2.4984210526315788e-05, |
|
"loss": 2.8967, |
|
"step": 153 |
|
}, |
|
{ |
|
"epoch": 0.18327878607557274, |
|
"grad_norm": 1.6646301746368408, |
|
"learning_rate": 2.445263157894737e-05, |
|
"loss": 2.9605, |
|
"step": 154 |
|
}, |
|
{ |
|
"epoch": 0.18446890806307648, |
|
"grad_norm": 1.2766884565353394, |
|
"learning_rate": 2.3921052631578946e-05, |
|
"loss": 2.7265, |
|
"step": 155 |
|
}, |
|
{ |
|
"epoch": 0.18565903005058018, |
|
"grad_norm": 1.0804224014282227, |
|
"learning_rate": 2.3389473684210528e-05, |
|
"loss": 2.7534, |
|
"step": 156 |
|
}, |
|
{ |
|
"epoch": 0.18684915203808392, |
|
"grad_norm": 0.9548969268798828, |
|
"learning_rate": 2.2857894736842106e-05, |
|
"loss": 2.7941, |
|
"step": 157 |
|
}, |
|
{ |
|
"epoch": 0.18803927402558762, |
|
"grad_norm": 0.8820357918739319, |
|
"learning_rate": 2.2326315789473685e-05, |
|
"loss": 2.5207, |
|
"step": 158 |
|
}, |
|
{ |
|
"epoch": 0.18922939601309136, |
|
"grad_norm": 0.9637076258659363, |
|
"learning_rate": 2.1794736842105264e-05, |
|
"loss": 2.5984, |
|
"step": 159 |
|
}, |
|
{ |
|
"epoch": 0.19041951800059506, |
|
"grad_norm": 0.9365648627281189, |
|
"learning_rate": 2.1263157894736842e-05, |
|
"loss": 2.6507, |
|
"step": 160 |
|
}, |
|
{ |
|
"epoch": 0.19160963998809877, |
|
"grad_norm": 0.9047538638114929, |
|
"learning_rate": 2.073157894736842e-05, |
|
"loss": 2.5624, |
|
"step": 161 |
|
}, |
|
{ |
|
"epoch": 0.1927997619756025, |
|
"grad_norm": 0.9913797974586487, |
|
"learning_rate": 2.0200000000000003e-05, |
|
"loss": 2.7407, |
|
"step": 162 |
|
}, |
|
{ |
|
"epoch": 0.1939898839631062, |
|
"grad_norm": 0.9947323203086853, |
|
"learning_rate": 1.966842105263158e-05, |
|
"loss": 2.6398, |
|
"step": 163 |
|
}, |
|
{ |
|
"epoch": 0.19518000595060994, |
|
"grad_norm": 0.9551875591278076, |
|
"learning_rate": 1.913684210526316e-05, |
|
"loss": 2.6093, |
|
"step": 164 |
|
}, |
|
{ |
|
"epoch": 0.19637012793811365, |
|
"grad_norm": 0.9988086819648743, |
|
"learning_rate": 1.8605263157894736e-05, |
|
"loss": 2.5585, |
|
"step": 165 |
|
}, |
|
{ |
|
"epoch": 0.19756024992561738, |
|
"grad_norm": 1.087716817855835, |
|
"learning_rate": 1.8073684210526318e-05, |
|
"loss": 2.6282, |
|
"step": 166 |
|
}, |
|
{ |
|
"epoch": 0.1987503719131211, |
|
"grad_norm": 1.0601743459701538, |
|
"learning_rate": 1.7542105263157897e-05, |
|
"loss": 2.6258, |
|
"step": 167 |
|
}, |
|
{ |
|
"epoch": 0.19994049390062482, |
|
"grad_norm": 1.1024737358093262, |
|
"learning_rate": 1.7010526315789475e-05, |
|
"loss": 2.5256, |
|
"step": 168 |
|
}, |
|
{ |
|
"epoch": 0.20113061588812853, |
|
"grad_norm": 1.1294111013412476, |
|
"learning_rate": 1.6478947368421054e-05, |
|
"loss": 2.6306, |
|
"step": 169 |
|
}, |
|
{ |
|
"epoch": 0.20232073787563226, |
|
"grad_norm": 1.1903879642486572, |
|
"learning_rate": 1.5947368421052633e-05, |
|
"loss": 2.6052, |
|
"step": 170 |
|
}, |
|
{ |
|
"epoch": 0.20351085986313597, |
|
"grad_norm": 1.253252387046814, |
|
"learning_rate": 1.541578947368421e-05, |
|
"loss": 2.7537, |
|
"step": 171 |
|
}, |
|
{ |
|
"epoch": 0.2047009818506397, |
|
"grad_norm": 1.3783352375030518, |
|
"learning_rate": 1.4884210526315788e-05, |
|
"loss": 2.5608, |
|
"step": 172 |
|
}, |
|
{ |
|
"epoch": 0.2058911038381434, |
|
"grad_norm": 1.3314725160598755, |
|
"learning_rate": 1.4352631578947369e-05, |
|
"loss": 2.6971, |
|
"step": 173 |
|
}, |
|
{ |
|
"epoch": 0.20708122582564714, |
|
"grad_norm": 1.3991272449493408, |
|
"learning_rate": 1.3821052631578949e-05, |
|
"loss": 2.6963, |
|
"step": 174 |
|
}, |
|
{ |
|
"epoch": 0.20827134781315085, |
|
"grad_norm": 1.5228500366210938, |
|
"learning_rate": 1.3289473684210526e-05, |
|
"loss": 2.5793, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.20946146980065455, |
|
"grad_norm": 1.4984205961227417, |
|
"learning_rate": 1.2757894736842106e-05, |
|
"loss": 2.666, |
|
"step": 176 |
|
}, |
|
{ |
|
"epoch": 0.2106515917881583, |
|
"grad_norm": 1.7694042921066284, |
|
"learning_rate": 1.2226315789473685e-05, |
|
"loss": 2.8852, |
|
"step": 177 |
|
}, |
|
{ |
|
"epoch": 0.211841713775662, |
|
"grad_norm": 1.8036147356033325, |
|
"learning_rate": 1.1694736842105264e-05, |
|
"loss": 2.7626, |
|
"step": 178 |
|
}, |
|
{ |
|
"epoch": 0.21303183576316573, |
|
"grad_norm": 1.7980536222457886, |
|
"learning_rate": 1.1163157894736842e-05, |
|
"loss": 2.7129, |
|
"step": 179 |
|
}, |
|
{ |
|
"epoch": 0.21422195775066943, |
|
"grad_norm": 2.07534122467041, |
|
"learning_rate": 1.0631578947368421e-05, |
|
"loss": 2.9602, |
|
"step": 180 |
|
}, |
|
{ |
|
"epoch": 0.21541207973817317, |
|
"grad_norm": 2.0630900859832764, |
|
"learning_rate": 1.0100000000000002e-05, |
|
"loss": 2.765, |
|
"step": 181 |
|
}, |
|
{ |
|
"epoch": 0.21660220172567687, |
|
"grad_norm": 2.077697992324829, |
|
"learning_rate": 9.56842105263158e-06, |
|
"loss": 2.5767, |
|
"step": 182 |
|
}, |
|
{ |
|
"epoch": 0.2177923237131806, |
|
"grad_norm": 2.0414209365844727, |
|
"learning_rate": 9.036842105263159e-06, |
|
"loss": 2.7381, |
|
"step": 183 |
|
}, |
|
{ |
|
"epoch": 0.21898244570068431, |
|
"grad_norm": 2.3121683597564697, |
|
"learning_rate": 8.505263157894738e-06, |
|
"loss": 2.6139, |
|
"step": 184 |
|
}, |
|
{ |
|
"epoch": 0.22017256768818805, |
|
"grad_norm": 2.3920252323150635, |
|
"learning_rate": 7.973684210526316e-06, |
|
"loss": 2.8152, |
|
"step": 185 |
|
}, |
|
{ |
|
"epoch": 0.22136268967569175, |
|
"grad_norm": 2.3734066486358643, |
|
"learning_rate": 7.442105263157894e-06, |
|
"loss": 2.3302, |
|
"step": 186 |
|
}, |
|
{ |
|
"epoch": 0.2225528116631955, |
|
"grad_norm": 2.291586399078369, |
|
"learning_rate": 6.9105263157894745e-06, |
|
"loss": 2.7101, |
|
"step": 187 |
|
}, |
|
{ |
|
"epoch": 0.2237429336506992, |
|
"grad_norm": 2.3693385124206543, |
|
"learning_rate": 6.378947368421053e-06, |
|
"loss": 2.232, |
|
"step": 188 |
|
}, |
|
{ |
|
"epoch": 0.22493305563820293, |
|
"grad_norm": 3.0694398880004883, |
|
"learning_rate": 5.847368421052632e-06, |
|
"loss": 2.2262, |
|
"step": 189 |
|
}, |
|
{ |
|
"epoch": 0.22612317762570663, |
|
"grad_norm": 2.5530786514282227, |
|
"learning_rate": 5.315789473684211e-06, |
|
"loss": 2.2363, |
|
"step": 190 |
|
}, |
|
{ |
|
"epoch": 0.22731329961321037, |
|
"grad_norm": 2.73111629486084, |
|
"learning_rate": 4.78421052631579e-06, |
|
"loss": 2.3876, |
|
"step": 191 |
|
}, |
|
{ |
|
"epoch": 0.22850342160071407, |
|
"grad_norm": 2.807893753051758, |
|
"learning_rate": 4.252631578947369e-06, |
|
"loss": 2.261, |
|
"step": 192 |
|
}, |
|
{ |
|
"epoch": 0.22969354358821778, |
|
"grad_norm": 2.7763075828552246, |
|
"learning_rate": 3.721052631578947e-06, |
|
"loss": 2.0528, |
|
"step": 193 |
|
}, |
|
{ |
|
"epoch": 0.23088366557572151, |
|
"grad_norm": 3.2379202842712402, |
|
"learning_rate": 3.1894736842105266e-06, |
|
"loss": 2.5698, |
|
"step": 194 |
|
}, |
|
{ |
|
"epoch": 0.23207378756322522, |
|
"grad_norm": 4.352906227111816, |
|
"learning_rate": 2.6578947368421053e-06, |
|
"loss": 2.8109, |
|
"step": 195 |
|
}, |
|
{ |
|
"epoch": 0.23326390955072895, |
|
"grad_norm": 3.5866363048553467, |
|
"learning_rate": 2.1263157894736844e-06, |
|
"loss": 2.2815, |
|
"step": 196 |
|
}, |
|
{ |
|
"epoch": 0.23445403153823266, |
|
"grad_norm": 4.414037227630615, |
|
"learning_rate": 1.5947368421052633e-06, |
|
"loss": 2.4135, |
|
"step": 197 |
|
}, |
|
{ |
|
"epoch": 0.2356441535257364, |
|
"grad_norm": 4.259603500366211, |
|
"learning_rate": 1.0631578947368422e-06, |
|
"loss": 2.0559, |
|
"step": 198 |
|
}, |
|
{ |
|
"epoch": 0.2368342755132401, |
|
"grad_norm": 4.946102619171143, |
|
"learning_rate": 5.315789473684211e-07, |
|
"loss": 1.8129, |
|
"step": 199 |
|
}, |
|
{ |
|
"epoch": 0.23802439750074383, |
|
"grad_norm": 7.032172203063965, |
|
"learning_rate": 0.0, |
|
"loss": 3.0412, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.23802439750074383, |
|
"eval_loss": 2.559772253036499, |
|
"eval_runtime": 160.6933, |
|
"eval_samples_per_second": 8.806, |
|
"eval_steps_per_second": 2.203, |
|
"step": 200 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 200, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 50, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 5, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 3.0321122932791706e+17, |
|
"train_batch_size": 8, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|