|
{ |
|
"best_metric": 3.767355442047119, |
|
"best_model_checkpoint": "miner_id_24/checkpoint-75", |
|
"epoch": 2.4877250409165304, |
|
"eval_steps": 25, |
|
"global_step": 95, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.02618657937806874, |
|
"grad_norm": 0.8365817070007324, |
|
"learning_rate": 3.3333333333333335e-05, |
|
"loss": 4.3524, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.02618657937806874, |
|
"eval_loss": 4.789937496185303, |
|
"eval_runtime": 0.2765, |
|
"eval_samples_per_second": 180.833, |
|
"eval_steps_per_second": 47.017, |
|
"step": 1 |
|
}, |
|
{ |
|
"epoch": 0.05237315875613748, |
|
"grad_norm": 1.151999831199646, |
|
"learning_rate": 6.666666666666667e-05, |
|
"loss": 4.528, |
|
"step": 2 |
|
}, |
|
{ |
|
"epoch": 0.07855973813420622, |
|
"grad_norm": 1.1144686937332153, |
|
"learning_rate": 0.0001, |
|
"loss": 4.5486, |
|
"step": 3 |
|
}, |
|
{ |
|
"epoch": 0.10474631751227496, |
|
"grad_norm": 0.9449473023414612, |
|
"learning_rate": 9.997376600647783e-05, |
|
"loss": 4.6597, |
|
"step": 4 |
|
}, |
|
{ |
|
"epoch": 0.1309328968903437, |
|
"grad_norm": 0.8100228905677795, |
|
"learning_rate": 9.989509461357426e-05, |
|
"loss": 4.6623, |
|
"step": 5 |
|
}, |
|
{ |
|
"epoch": 0.15711947626841244, |
|
"grad_norm": 0.8471514582633972, |
|
"learning_rate": 9.976407754861426e-05, |
|
"loss": 4.6003, |
|
"step": 6 |
|
}, |
|
{ |
|
"epoch": 0.18330605564648117, |
|
"grad_norm": 0.9861735701560974, |
|
"learning_rate": 9.958086757163489e-05, |
|
"loss": 4.5851, |
|
"step": 7 |
|
}, |
|
{ |
|
"epoch": 0.20949263502454993, |
|
"grad_norm": 1.1125617027282715, |
|
"learning_rate": 9.934567829727386e-05, |
|
"loss": 4.5956, |
|
"step": 8 |
|
}, |
|
{ |
|
"epoch": 0.23567921440261866, |
|
"grad_norm": 1.0278071165084839, |
|
"learning_rate": 9.905878394570453e-05, |
|
"loss": 4.7214, |
|
"step": 9 |
|
}, |
|
{ |
|
"epoch": 0.2618657937806874, |
|
"grad_norm": 2.4233837127685547, |
|
"learning_rate": 9.872051902290737e-05, |
|
"loss": 4.0809, |
|
"step": 10 |
|
}, |
|
{ |
|
"epoch": 0.28805237315875615, |
|
"grad_norm": 0.6828261613845825, |
|
"learning_rate": 9.833127793065098e-05, |
|
"loss": 4.1133, |
|
"step": 11 |
|
}, |
|
{ |
|
"epoch": 0.3142389525368249, |
|
"grad_norm": 1.080686330795288, |
|
"learning_rate": 9.789151450663723e-05, |
|
"loss": 4.1603, |
|
"step": 12 |
|
}, |
|
{ |
|
"epoch": 0.3404255319148936, |
|
"grad_norm": 0.7597512602806091, |
|
"learning_rate": 9.740174149534693e-05, |
|
"loss": 4.2075, |
|
"step": 13 |
|
}, |
|
{ |
|
"epoch": 0.36661211129296234, |
|
"grad_norm": 0.6978819370269775, |
|
"learning_rate": 9.686252995020249e-05, |
|
"loss": 4.155, |
|
"step": 14 |
|
}, |
|
{ |
|
"epoch": 0.39279869067103107, |
|
"grad_norm": 0.5029189586639404, |
|
"learning_rate": 9.627450856774539e-05, |
|
"loss": 4.2456, |
|
"step": 15 |
|
}, |
|
{ |
|
"epoch": 0.41898527004909986, |
|
"grad_norm": 0.49850398302078247, |
|
"learning_rate": 9.563836295460398e-05, |
|
"loss": 4.375, |
|
"step": 16 |
|
}, |
|
{ |
|
"epoch": 0.4451718494271686, |
|
"grad_norm": 0.6559662222862244, |
|
"learning_rate": 9.495483482810688e-05, |
|
"loss": 4.3, |
|
"step": 17 |
|
}, |
|
{ |
|
"epoch": 0.4713584288052373, |
|
"grad_norm": 1.1999343633651733, |
|
"learning_rate": 9.422472115147382e-05, |
|
"loss": 4.4429, |
|
"step": 18 |
|
}, |
|
{ |
|
"epoch": 0.49754500818330605, |
|
"grad_norm": 1.2498600482940674, |
|
"learning_rate": 9.3448873204592e-05, |
|
"loss": 4.5719, |
|
"step": 19 |
|
}, |
|
{ |
|
"epoch": 0.5237315875613748, |
|
"grad_norm": 0.8708962798118591, |
|
"learning_rate": 9.2628195591462e-05, |
|
"loss": 3.7549, |
|
"step": 20 |
|
}, |
|
{ |
|
"epoch": 0.5499181669394435, |
|
"grad_norm": 0.6062394380569458, |
|
"learning_rate": 9.176364518546989e-05, |
|
"loss": 3.9636, |
|
"step": 21 |
|
}, |
|
{ |
|
"epoch": 0.5761047463175123, |
|
"grad_norm": 0.46776407957077026, |
|
"learning_rate": 9.08562300137157e-05, |
|
"loss": 4.0005, |
|
"step": 22 |
|
}, |
|
{ |
|
"epoch": 0.602291325695581, |
|
"grad_norm": 0.41210252046585083, |
|
"learning_rate": 8.990700808169889e-05, |
|
"loss": 3.9414, |
|
"step": 23 |
|
}, |
|
{ |
|
"epoch": 0.6284779050736498, |
|
"grad_norm": 0.4796847701072693, |
|
"learning_rate": 8.891708613973126e-05, |
|
"loss": 4.0629, |
|
"step": 24 |
|
}, |
|
{ |
|
"epoch": 0.6546644844517185, |
|
"grad_norm": 0.42416390776634216, |
|
"learning_rate": 8.788761839251559e-05, |
|
"loss": 4.0834, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.6546644844517185, |
|
"eval_loss": 4.142909526824951, |
|
"eval_runtime": 0.2882, |
|
"eval_samples_per_second": 173.488, |
|
"eval_steps_per_second": 45.107, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.6808510638297872, |
|
"grad_norm": 0.4640260636806488, |
|
"learning_rate": 8.681980515339464e-05, |
|
"loss": 4.2203, |
|
"step": 26 |
|
}, |
|
{ |
|
"epoch": 0.707037643207856, |
|
"grad_norm": 0.5100923776626587, |
|
"learning_rate": 8.571489144483944e-05, |
|
"loss": 4.181, |
|
"step": 27 |
|
}, |
|
{ |
|
"epoch": 0.7332242225859247, |
|
"grad_norm": 0.6287262439727783, |
|
"learning_rate": 8.457416554680877e-05, |
|
"loss": 4.3732, |
|
"step": 28 |
|
}, |
|
{ |
|
"epoch": 0.7594108019639935, |
|
"grad_norm": 0.5281463265419006, |
|
"learning_rate": 8.339895749467238e-05, |
|
"loss": 3.7263, |
|
"step": 29 |
|
}, |
|
{ |
|
"epoch": 0.7855973813420621, |
|
"grad_norm": 0.41669386625289917, |
|
"learning_rate": 8.219063752844926e-05, |
|
"loss": 3.7891, |
|
"step": 30 |
|
}, |
|
{ |
|
"epoch": 0.8117839607201309, |
|
"grad_norm": 0.39933446049690247, |
|
"learning_rate": 8.095061449516903e-05, |
|
"loss": 3.8458, |
|
"step": 31 |
|
}, |
|
{ |
|
"epoch": 0.8379705400981997, |
|
"grad_norm": 0.42396020889282227, |
|
"learning_rate": 7.968033420621935e-05, |
|
"loss": 3.8991, |
|
"step": 32 |
|
}, |
|
{ |
|
"epoch": 0.8641571194762684, |
|
"grad_norm": 0.45608440041542053, |
|
"learning_rate": 7.838127775159452e-05, |
|
"loss": 3.9113, |
|
"step": 33 |
|
}, |
|
{ |
|
"epoch": 0.8903436988543372, |
|
"grad_norm": 0.5071250200271606, |
|
"learning_rate": 7.705495977301078e-05, |
|
"loss": 3.9696, |
|
"step": 34 |
|
}, |
|
{ |
|
"epoch": 0.9165302782324058, |
|
"grad_norm": 0.408988356590271, |
|
"learning_rate": 7.570292669790186e-05, |
|
"loss": 4.0264, |
|
"step": 35 |
|
}, |
|
{ |
|
"epoch": 0.9427168576104746, |
|
"grad_norm": 0.41267451643943787, |
|
"learning_rate": 7.43267549363537e-05, |
|
"loss": 4.0024, |
|
"step": 36 |
|
}, |
|
{ |
|
"epoch": 0.9689034369885434, |
|
"grad_norm": 0.6320325136184692, |
|
"learning_rate": 7.292804904308087e-05, |
|
"loss": 4.0503, |
|
"step": 37 |
|
}, |
|
{ |
|
"epoch": 0.9950900163666121, |
|
"grad_norm": 0.7328307628631592, |
|
"learning_rate": 7.150843984658754e-05, |
|
"loss": 4.1336, |
|
"step": 38 |
|
}, |
|
{ |
|
"epoch": 1.0212765957446808, |
|
"grad_norm": 1.987341284751892, |
|
"learning_rate": 7.006958254769438e-05, |
|
"loss": 6.7162, |
|
"step": 39 |
|
}, |
|
{ |
|
"epoch": 1.0474631751227497, |
|
"grad_norm": 1.042544960975647, |
|
"learning_rate": 6.861315478964841e-05, |
|
"loss": 3.6702, |
|
"step": 40 |
|
}, |
|
{ |
|
"epoch": 1.0736497545008183, |
|
"grad_norm": 0.6465303301811218, |
|
"learning_rate": 6.714085470206609e-05, |
|
"loss": 3.8179, |
|
"step": 41 |
|
}, |
|
{ |
|
"epoch": 1.099836333878887, |
|
"grad_norm": 0.3951312005519867, |
|
"learning_rate": 6.56543989209901e-05, |
|
"loss": 3.7864, |
|
"step": 42 |
|
}, |
|
{ |
|
"epoch": 1.1260229132569557, |
|
"grad_norm": 0.41512835025787354, |
|
"learning_rate": 6.415552058736854e-05, |
|
"loss": 3.7806, |
|
"step": 43 |
|
}, |
|
{ |
|
"epoch": 1.1522094926350246, |
|
"grad_norm": 0.534665584564209, |
|
"learning_rate": 6.264596732629e-05, |
|
"loss": 3.8374, |
|
"step": 44 |
|
}, |
|
{ |
|
"epoch": 1.1783960720130933, |
|
"grad_norm": 0.5684964060783386, |
|
"learning_rate": 6.112749920933111e-05, |
|
"loss": 3.9551, |
|
"step": 45 |
|
}, |
|
{ |
|
"epoch": 1.204582651391162, |
|
"grad_norm": 0.6400809288024902, |
|
"learning_rate": 5.960188670239154e-05, |
|
"loss": 4.0032, |
|
"step": 46 |
|
}, |
|
{ |
|
"epoch": 1.2307692307692308, |
|
"grad_norm": 0.7185765504837036, |
|
"learning_rate": 5.80709086014102e-05, |
|
"loss": 3.9989, |
|
"step": 47 |
|
}, |
|
{ |
|
"epoch": 1.2569558101472995, |
|
"grad_norm": 0.5369796752929688, |
|
"learning_rate": 5.653634995836856e-05, |
|
"loss": 3.3087, |
|
"step": 48 |
|
}, |
|
{ |
|
"epoch": 1.2831423895253682, |
|
"grad_norm": 0.3849294185638428, |
|
"learning_rate": 5.500000000000001e-05, |
|
"loss": 3.8981, |
|
"step": 49 |
|
}, |
|
{ |
|
"epoch": 1.3093289689034369, |
|
"grad_norm": 0.34771353006362915, |
|
"learning_rate": 5.346365004163145e-05, |
|
"loss": 3.6507, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.3093289689034369, |
|
"eval_loss": 3.856473922729492, |
|
"eval_runtime": 0.2803, |
|
"eval_samples_per_second": 178.389, |
|
"eval_steps_per_second": 46.381, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 1.3355155482815058, |
|
"grad_norm": 0.37725579738616943, |
|
"learning_rate": 5.192909139858981e-05, |
|
"loss": 3.7317, |
|
"step": 51 |
|
}, |
|
{ |
|
"epoch": 1.3617021276595744, |
|
"grad_norm": 0.4553833603858948, |
|
"learning_rate": 5.0398113297608465e-05, |
|
"loss": 3.7528, |
|
"step": 52 |
|
}, |
|
{ |
|
"epoch": 1.3878887070376433, |
|
"grad_norm": 0.4247891306877136, |
|
"learning_rate": 4.887250079066892e-05, |
|
"loss": 3.8082, |
|
"step": 53 |
|
}, |
|
{ |
|
"epoch": 1.414075286415712, |
|
"grad_norm": 0.45754310488700867, |
|
"learning_rate": 4.7354032673710005e-05, |
|
"loss": 3.8597, |
|
"step": 54 |
|
}, |
|
{ |
|
"epoch": 1.4402618657937807, |
|
"grad_norm": 0.40189892053604126, |
|
"learning_rate": 4.584447941263149e-05, |
|
"loss": 3.8544, |
|
"step": 55 |
|
}, |
|
{ |
|
"epoch": 1.4664484451718494, |
|
"grad_norm": 0.4472440779209137, |
|
"learning_rate": 4.43456010790099e-05, |
|
"loss": 3.902, |
|
"step": 56 |
|
}, |
|
{ |
|
"epoch": 1.492635024549918, |
|
"grad_norm": 0.7986133694648743, |
|
"learning_rate": 4.285914529793391e-05, |
|
"loss": 4.3171, |
|
"step": 57 |
|
}, |
|
{ |
|
"epoch": 1.518821603927987, |
|
"grad_norm": 0.47378429770469666, |
|
"learning_rate": 4.13868452103516e-05, |
|
"loss": 3.2801, |
|
"step": 58 |
|
}, |
|
{ |
|
"epoch": 1.5450081833060556, |
|
"grad_norm": 0.609893798828125, |
|
"learning_rate": 3.9930417452305626e-05, |
|
"loss": 3.5901, |
|
"step": 59 |
|
}, |
|
{ |
|
"epoch": 1.5711947626841245, |
|
"grad_norm": 0.5858629941940308, |
|
"learning_rate": 3.8491560153412466e-05, |
|
"loss": 3.601, |
|
"step": 60 |
|
}, |
|
{ |
|
"epoch": 1.5973813420621932, |
|
"grad_norm": 0.37681153416633606, |
|
"learning_rate": 3.707195095691913e-05, |
|
"loss": 3.7124, |
|
"step": 61 |
|
}, |
|
{ |
|
"epoch": 1.6235679214402619, |
|
"grad_norm": 0.338227778673172, |
|
"learning_rate": 3.567324506364632e-05, |
|
"loss": 3.7552, |
|
"step": 62 |
|
}, |
|
{ |
|
"epoch": 1.6497545008183305, |
|
"grad_norm": 0.35423874855041504, |
|
"learning_rate": 3.4297073302098156e-05, |
|
"loss": 3.7876, |
|
"step": 63 |
|
}, |
|
{ |
|
"epoch": 1.6759410801963992, |
|
"grad_norm": 0.4390277862548828, |
|
"learning_rate": 3.2945040226989244e-05, |
|
"loss": 3.8012, |
|
"step": 64 |
|
}, |
|
{ |
|
"epoch": 1.702127659574468, |
|
"grad_norm": 0.563541829586029, |
|
"learning_rate": 3.16187222484055e-05, |
|
"loss": 3.8544, |
|
"step": 65 |
|
}, |
|
{ |
|
"epoch": 1.7283142389525368, |
|
"grad_norm": 0.7141799330711365, |
|
"learning_rate": 3.0319665793780648e-05, |
|
"loss": 4.0238, |
|
"step": 66 |
|
}, |
|
{ |
|
"epoch": 1.7545008183306057, |
|
"grad_norm": 0.4013899862766266, |
|
"learning_rate": 2.9049385504830985e-05, |
|
"loss": 3.1346, |
|
"step": 67 |
|
}, |
|
{ |
|
"epoch": 1.7806873977086743, |
|
"grad_norm": 0.3977162539958954, |
|
"learning_rate": 2.7809362471550748e-05, |
|
"loss": 3.8809, |
|
"step": 68 |
|
}, |
|
{ |
|
"epoch": 1.806873977086743, |
|
"grad_norm": 0.35375577211380005, |
|
"learning_rate": 2.660104250532764e-05, |
|
"loss": 3.6103, |
|
"step": 69 |
|
}, |
|
{ |
|
"epoch": 1.8330605564648117, |
|
"grad_norm": 0.3449622094631195, |
|
"learning_rate": 2.5425834453191232e-05, |
|
"loss": 3.6076, |
|
"step": 70 |
|
}, |
|
{ |
|
"epoch": 1.8592471358428804, |
|
"grad_norm": 0.3213866651058197, |
|
"learning_rate": 2.4285108555160577e-05, |
|
"loss": 3.712, |
|
"step": 71 |
|
}, |
|
{ |
|
"epoch": 1.8854337152209493, |
|
"grad_norm": 0.34646686911582947, |
|
"learning_rate": 2.3180194846605367e-05, |
|
"loss": 3.7291, |
|
"step": 72 |
|
}, |
|
{ |
|
"epoch": 1.9116202945990182, |
|
"grad_norm": 0.3628241717815399, |
|
"learning_rate": 2.2112381607484417e-05, |
|
"loss": 3.8293, |
|
"step": 73 |
|
}, |
|
{ |
|
"epoch": 1.9378068739770868, |
|
"grad_norm": 0.37866660952568054, |
|
"learning_rate": 2.1082913860268765e-05, |
|
"loss": 3.8291, |
|
"step": 74 |
|
}, |
|
{ |
|
"epoch": 1.9639934533551555, |
|
"grad_norm": 0.4907776415348053, |
|
"learning_rate": 2.0092991918301108e-05, |
|
"loss": 3.8753, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.9639934533551555, |
|
"eval_loss": 3.767355442047119, |
|
"eval_runtime": 0.2893, |
|
"eval_samples_per_second": 172.844, |
|
"eval_steps_per_second": 44.939, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 1.9901800327332242, |
|
"grad_norm": 0.8656964302062988, |
|
"learning_rate": 1.91437699862843e-05, |
|
"loss": 4.1986, |
|
"step": 76 |
|
}, |
|
{ |
|
"epoch": 2.016366612111293, |
|
"grad_norm": 0.8424877524375916, |
|
"learning_rate": 1.8236354814530112e-05, |
|
"loss": 6.5033, |
|
"step": 77 |
|
}, |
|
{ |
|
"epoch": 2.0425531914893615, |
|
"grad_norm": 0.6298512816429138, |
|
"learning_rate": 1.7371804408538024e-05, |
|
"loss": 3.5305, |
|
"step": 78 |
|
}, |
|
{ |
|
"epoch": 2.06873977086743, |
|
"grad_norm": 0.577872633934021, |
|
"learning_rate": 1.6551126795408016e-05, |
|
"loss": 3.5686, |
|
"step": 79 |
|
}, |
|
{ |
|
"epoch": 2.0949263502454993, |
|
"grad_norm": 0.4558631479740143, |
|
"learning_rate": 1.577527884852619e-05, |
|
"loss": 3.619, |
|
"step": 80 |
|
}, |
|
{ |
|
"epoch": 2.121112929623568, |
|
"grad_norm": 0.3921566903591156, |
|
"learning_rate": 1.5045165171893116e-05, |
|
"loss": 3.6359, |
|
"step": 81 |
|
}, |
|
{ |
|
"epoch": 2.1472995090016367, |
|
"grad_norm": 0.3551250398159027, |
|
"learning_rate": 1.4361637045396029e-05, |
|
"loss": 3.7263, |
|
"step": 82 |
|
}, |
|
{ |
|
"epoch": 2.1734860883797054, |
|
"grad_norm": 0.3237386643886566, |
|
"learning_rate": 1.3725491432254624e-05, |
|
"loss": 3.7598, |
|
"step": 83 |
|
}, |
|
{ |
|
"epoch": 2.199672667757774, |
|
"grad_norm": 0.4270864725112915, |
|
"learning_rate": 1.313747004979751e-05, |
|
"loss": 3.8036, |
|
"step": 84 |
|
}, |
|
{ |
|
"epoch": 2.2258592471358427, |
|
"grad_norm": 0.5806615948677063, |
|
"learning_rate": 1.2598258504653081e-05, |
|
"loss": 3.9918, |
|
"step": 85 |
|
}, |
|
{ |
|
"epoch": 2.2520458265139114, |
|
"grad_norm": 0.4796655774116516, |
|
"learning_rate": 1.2108485493362765e-05, |
|
"loss": 2.7358, |
|
"step": 86 |
|
}, |
|
{ |
|
"epoch": 2.2782324058919805, |
|
"grad_norm": 0.5247834324836731, |
|
"learning_rate": 1.1668722069349041e-05, |
|
"loss": 4.1886, |
|
"step": 87 |
|
}, |
|
{ |
|
"epoch": 2.304418985270049, |
|
"grad_norm": 0.400111585855484, |
|
"learning_rate": 1.1279480977092635e-05, |
|
"loss": 3.5208, |
|
"step": 88 |
|
}, |
|
{ |
|
"epoch": 2.330605564648118, |
|
"grad_norm": 0.35815542936325073, |
|
"learning_rate": 1.094121605429547e-05, |
|
"loss": 3.6359, |
|
"step": 89 |
|
}, |
|
{ |
|
"epoch": 2.3567921440261865, |
|
"grad_norm": 0.3279394209384918, |
|
"learning_rate": 1.0654321702726141e-05, |
|
"loss": 3.6536, |
|
"step": 90 |
|
}, |
|
{ |
|
"epoch": 2.382978723404255, |
|
"grad_norm": 0.3094680905342102, |
|
"learning_rate": 1.0419132428365116e-05, |
|
"loss": 3.7183, |
|
"step": 91 |
|
}, |
|
{ |
|
"epoch": 2.409165302782324, |
|
"grad_norm": 0.3676002025604248, |
|
"learning_rate": 1.0235922451385733e-05, |
|
"loss": 3.7482, |
|
"step": 92 |
|
}, |
|
{ |
|
"epoch": 2.4353518821603926, |
|
"grad_norm": 0.3828237056732178, |
|
"learning_rate": 1.0104905386425733e-05, |
|
"loss": 3.781, |
|
"step": 93 |
|
}, |
|
{ |
|
"epoch": 2.4615384615384617, |
|
"grad_norm": 0.5239596366882324, |
|
"learning_rate": 1.002623399352217e-05, |
|
"loss": 3.8249, |
|
"step": 94 |
|
}, |
|
{ |
|
"epoch": 2.4877250409165304, |
|
"grad_norm": 0.8069905638694763, |
|
"learning_rate": 1e-05, |
|
"loss": 4.1793, |
|
"step": 95 |
|
} |
|
], |
|
"logging_steps": 1, |
|
"max_steps": 95, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 3, |
|
"save_steps": 25, |
|
"stateful_callbacks": { |
|
"EarlyStoppingCallback": { |
|
"args": { |
|
"early_stopping_patience": 1, |
|
"early_stopping_threshold": 0.0 |
|
}, |
|
"attributes": { |
|
"early_stopping_patience_counter": 0 |
|
} |
|
}, |
|
"TrainerControl": { |
|
"args": { |
|
"should_epoch_stop": false, |
|
"should_evaluate": false, |
|
"should_log": false, |
|
"should_save": true, |
|
"should_training_stop": true |
|
}, |
|
"attributes": {} |
|
} |
|
}, |
|
"total_flos": 2.161007627599872e+16, |
|
"train_batch_size": 1, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|