|
,loss,learning_rate,epoch,step,eval_loss,eval_accuracy,eval_f1,eval_runtime,eval_samples_per_second,eval_steps_per_second,train_runtime,train_samples_per_second,train_steps_per_second,total_flos,train_loss |
|
0,0.7499,2e-05,1.0,938,,,,,,,,,,, |
|
1,,,1.0,938,0.654792308807373,0.712,0.712652463040488,1.2726,1964.409,62.075,,,,, |
|
2,0.5464,2e-05,2.0,1876,,,,,,,,,,, |
|
3,,,2.0,1876,0.699586033821106,0.7128,0.7073848402995836,1.2761,1959.062,61.906,,,,, |
|
4,0.3782,2e-05,3.0,2814,,,,,,,,,,, |
|
5,,,3.0,2814,0.7840703129768372,0.7028,0.7011891803629126,1.2756,1959.882,61.932,,,,, |
|
6,0.2444,2e-05,4.0,3752,,,,,,,,,,, |
|
7,,,4.0,3752,0.9180415868759155,0.7108,0.7088532209184721,1.2745,1961.551,61.985,,,,, |
|
8,0.1664,2e-05,5.0,4690,,,,,,,,,,, |
|
9,,,5.0,4690,1.0885692834854126,0.7128,0.7127755757268002,1.2786,1955.302,61.788,,,,, |
|
10,,,5.0,4690,,,,,,,328.8299,9123.257,285.254,6502067001188640.0,0.41708059737931436 |
|
|