|
{ |
|
"best_metric": 0.00030898803379386663, |
|
"best_model_checkpoint": "AlexWang99/byt5_add/checkpoint-1275", |
|
"epoch": 51.0, |
|
"eval_steps": 500, |
|
"global_step": 1275, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 1.0, |
|
"eval_loss": 1.747314691543579, |
|
"eval_runtime": 11.1213, |
|
"eval_samples_per_second": 899.172, |
|
"eval_steps_per_second": 1.169, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 2.0, |
|
"eval_loss": 1.6477937698364258, |
|
"eval_runtime": 10.7967, |
|
"eval_samples_per_second": 926.209, |
|
"eval_steps_per_second": 1.204, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 3.0, |
|
"eval_loss": 1.5999796390533447, |
|
"eval_runtime": 10.8098, |
|
"eval_samples_per_second": 925.089, |
|
"eval_steps_per_second": 1.203, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 4.0, |
|
"eval_loss": 1.4885144233703613, |
|
"eval_runtime": 10.8417, |
|
"eval_samples_per_second": 922.367, |
|
"eval_steps_per_second": 1.199, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 5.0, |
|
"eval_loss": 1.3953396081924438, |
|
"eval_runtime": 10.8419, |
|
"eval_samples_per_second": 922.345, |
|
"eval_steps_per_second": 1.199, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 6.0, |
|
"eval_loss": 1.2306207418441772, |
|
"eval_runtime": 10.8327, |
|
"eval_samples_per_second": 923.131, |
|
"eval_steps_per_second": 1.2, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 7.0, |
|
"eval_loss": 1.0172127485275269, |
|
"eval_runtime": 10.8404, |
|
"eval_samples_per_second": 922.478, |
|
"eval_steps_per_second": 1.199, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 8.0, |
|
"eval_loss": 0.7508996725082397, |
|
"eval_runtime": 10.867, |
|
"eval_samples_per_second": 920.221, |
|
"eval_steps_per_second": 1.196, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 9.0, |
|
"eval_loss": 0.5204245448112488, |
|
"eval_runtime": 10.837, |
|
"eval_samples_per_second": 922.761, |
|
"eval_steps_per_second": 1.2, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 10.0, |
|
"eval_loss": 0.3563512861728668, |
|
"eval_runtime": 11.004, |
|
"eval_samples_per_second": 908.763, |
|
"eval_steps_per_second": 1.181, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 11.0, |
|
"eval_loss": 0.3062296211719513, |
|
"eval_runtime": 10.8369, |
|
"eval_samples_per_second": 922.772, |
|
"eval_steps_per_second": 1.2, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 12.0, |
|
"eval_loss": 0.23057429492473602, |
|
"eval_runtime": 10.799, |
|
"eval_samples_per_second": 926.015, |
|
"eval_steps_per_second": 1.204, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 13.0, |
|
"eval_loss": 0.17026692628860474, |
|
"eval_runtime": 10.8267, |
|
"eval_samples_per_second": 923.643, |
|
"eval_steps_per_second": 1.201, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 14.0, |
|
"eval_loss": 0.14094401895999908, |
|
"eval_runtime": 10.8171, |
|
"eval_samples_per_second": 924.461, |
|
"eval_steps_per_second": 1.202, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 15.0, |
|
"eval_loss": 0.11562483012676239, |
|
"eval_runtime": 10.8544, |
|
"eval_samples_per_second": 921.286, |
|
"eval_steps_per_second": 1.198, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 16.0, |
|
"eval_loss": 0.1076672226190567, |
|
"eval_runtime": 10.7997, |
|
"eval_samples_per_second": 925.947, |
|
"eval_steps_per_second": 1.204, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 17.0, |
|
"eval_loss": 0.07891710102558136, |
|
"eval_runtime": 10.8355, |
|
"eval_samples_per_second": 922.895, |
|
"eval_steps_per_second": 1.2, |
|
"step": 425 |
|
}, |
|
{ |
|
"epoch": 18.0, |
|
"eval_loss": 0.07825633883476257, |
|
"eval_runtime": 10.9577, |
|
"eval_samples_per_second": 912.598, |
|
"eval_steps_per_second": 1.186, |
|
"step": 450 |
|
}, |
|
{ |
|
"epoch": 19.0, |
|
"eval_loss": 0.053240709006786346, |
|
"eval_runtime": 10.8407, |
|
"eval_samples_per_second": 922.45, |
|
"eval_steps_per_second": 1.199, |
|
"step": 475 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"learning_rate": 4.5e-05, |
|
"loss": 0.8626, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 20.0, |
|
"eval_loss": 0.03896724432706833, |
|
"eval_runtime": 10.8919, |
|
"eval_samples_per_second": 918.117, |
|
"eval_steps_per_second": 1.194, |
|
"step": 500 |
|
}, |
|
{ |
|
"epoch": 21.0, |
|
"eval_loss": 0.0326126404106617, |
|
"eval_runtime": 10.9808, |
|
"eval_samples_per_second": 910.682, |
|
"eval_steps_per_second": 1.184, |
|
"step": 525 |
|
}, |
|
{ |
|
"epoch": 22.0, |
|
"eval_loss": 0.026844095438718796, |
|
"eval_runtime": 10.8647, |
|
"eval_samples_per_second": 920.415, |
|
"eval_steps_per_second": 1.197, |
|
"step": 550 |
|
}, |
|
{ |
|
"epoch": 23.0, |
|
"eval_loss": 0.022708676755428314, |
|
"eval_runtime": 11.1211, |
|
"eval_samples_per_second": 899.191, |
|
"eval_steps_per_second": 1.169, |
|
"step": 575 |
|
}, |
|
{ |
|
"epoch": 24.0, |
|
"eval_loss": 0.020555635914206505, |
|
"eval_runtime": 10.8169, |
|
"eval_samples_per_second": 924.48, |
|
"eval_steps_per_second": 1.202, |
|
"step": 600 |
|
}, |
|
{ |
|
"epoch": 25.0, |
|
"eval_loss": 0.016072452068328857, |
|
"eval_runtime": 10.8195, |
|
"eval_samples_per_second": 924.261, |
|
"eval_steps_per_second": 1.202, |
|
"step": 625 |
|
}, |
|
{ |
|
"epoch": 26.0, |
|
"eval_loss": 0.015775442123413086, |
|
"eval_runtime": 11.0521, |
|
"eval_samples_per_second": 904.809, |
|
"eval_steps_per_second": 1.176, |
|
"step": 650 |
|
}, |
|
{ |
|
"epoch": 27.0, |
|
"eval_loss": 0.010050756856799126, |
|
"eval_runtime": 10.96, |
|
"eval_samples_per_second": 912.407, |
|
"eval_steps_per_second": 1.186, |
|
"step": 675 |
|
}, |
|
{ |
|
"epoch": 28.0, |
|
"eval_loss": 0.009800990112125874, |
|
"eval_runtime": 10.8085, |
|
"eval_samples_per_second": 925.196, |
|
"eval_steps_per_second": 1.203, |
|
"step": 700 |
|
}, |
|
{ |
|
"epoch": 29.0, |
|
"eval_loss": 0.0077048842795193195, |
|
"eval_runtime": 10.9528, |
|
"eval_samples_per_second": 913.005, |
|
"eval_steps_per_second": 1.187, |
|
"step": 725 |
|
}, |
|
{ |
|
"epoch": 30.0, |
|
"eval_loss": 0.005685885436832905, |
|
"eval_runtime": 10.9631, |
|
"eval_samples_per_second": 912.147, |
|
"eval_steps_per_second": 1.186, |
|
"step": 750 |
|
}, |
|
{ |
|
"epoch": 31.0, |
|
"eval_loss": 0.006655455566942692, |
|
"eval_runtime": 10.8367, |
|
"eval_samples_per_second": 922.788, |
|
"eval_steps_per_second": 1.2, |
|
"step": 775 |
|
}, |
|
{ |
|
"epoch": 32.0, |
|
"eval_loss": 0.004621443338692188, |
|
"eval_runtime": 10.8165, |
|
"eval_samples_per_second": 924.51, |
|
"eval_steps_per_second": 1.202, |
|
"step": 800 |
|
}, |
|
{ |
|
"epoch": 33.0, |
|
"eval_loss": 0.0033882376737892628, |
|
"eval_runtime": 10.9293, |
|
"eval_samples_per_second": 914.976, |
|
"eval_steps_per_second": 1.189, |
|
"step": 825 |
|
}, |
|
{ |
|
"epoch": 34.0, |
|
"eval_loss": 0.0038037376943975687, |
|
"eval_runtime": 10.7973, |
|
"eval_samples_per_second": 926.155, |
|
"eval_steps_per_second": 1.204, |
|
"step": 850 |
|
}, |
|
{ |
|
"epoch": 35.0, |
|
"eval_loss": 0.003371346276253462, |
|
"eval_runtime": 10.834, |
|
"eval_samples_per_second": 923.021, |
|
"eval_steps_per_second": 1.2, |
|
"step": 875 |
|
}, |
|
{ |
|
"epoch": 36.0, |
|
"eval_loss": 0.0024659824557602406, |
|
"eval_runtime": 10.7902, |
|
"eval_samples_per_second": 926.766, |
|
"eval_steps_per_second": 1.205, |
|
"step": 900 |
|
}, |
|
{ |
|
"epoch": 37.0, |
|
"eval_loss": 0.0022366114426404238, |
|
"eval_runtime": 10.8096, |
|
"eval_samples_per_second": 925.1, |
|
"eval_steps_per_second": 1.203, |
|
"step": 925 |
|
}, |
|
{ |
|
"epoch": 38.0, |
|
"eval_loss": 0.0022026619408279657, |
|
"eval_runtime": 10.8109, |
|
"eval_samples_per_second": 924.992, |
|
"eval_steps_per_second": 1.202, |
|
"step": 950 |
|
}, |
|
{ |
|
"epoch": 39.0, |
|
"eval_loss": 0.0024010157212615013, |
|
"eval_runtime": 11.1034, |
|
"eval_samples_per_second": 900.623, |
|
"eval_steps_per_second": 1.171, |
|
"step": 975 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"learning_rate": 4e-05, |
|
"loss": 0.0919, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 40.0, |
|
"eval_loss": 0.0013342766324058175, |
|
"eval_runtime": 10.7511, |
|
"eval_samples_per_second": 930.139, |
|
"eval_steps_per_second": 1.209, |
|
"step": 1000 |
|
}, |
|
{ |
|
"epoch": 41.0, |
|
"eval_loss": 0.0016493805451318622, |
|
"eval_runtime": 10.7987, |
|
"eval_samples_per_second": 926.034, |
|
"eval_steps_per_second": 1.204, |
|
"step": 1025 |
|
}, |
|
{ |
|
"epoch": 42.0, |
|
"eval_loss": 0.001088765449821949, |
|
"eval_runtime": 10.8106, |
|
"eval_samples_per_second": 925.017, |
|
"eval_steps_per_second": 1.203, |
|
"step": 1050 |
|
}, |
|
{ |
|
"epoch": 43.0, |
|
"eval_loss": 0.0009081660537049174, |
|
"eval_runtime": 10.7945, |
|
"eval_samples_per_second": 926.398, |
|
"eval_steps_per_second": 1.204, |
|
"step": 1075 |
|
}, |
|
{ |
|
"epoch": 44.0, |
|
"eval_loss": 0.0007170450408011675, |
|
"eval_runtime": 10.9388, |
|
"eval_samples_per_second": 914.174, |
|
"eval_steps_per_second": 1.188, |
|
"step": 1100 |
|
}, |
|
{ |
|
"epoch": 45.0, |
|
"eval_loss": 0.0006850157515145838, |
|
"eval_runtime": 10.8231, |
|
"eval_samples_per_second": 923.948, |
|
"eval_steps_per_second": 1.201, |
|
"step": 1125 |
|
}, |
|
{ |
|
"epoch": 46.0, |
|
"eval_loss": 0.0007588361040689051, |
|
"eval_runtime": 10.9442, |
|
"eval_samples_per_second": 913.729, |
|
"eval_steps_per_second": 1.188, |
|
"step": 1150 |
|
}, |
|
{ |
|
"epoch": 47.0, |
|
"eval_loss": 0.0007894792361184955, |
|
"eval_runtime": 10.9394, |
|
"eval_samples_per_second": 914.125, |
|
"eval_steps_per_second": 1.188, |
|
"step": 1175 |
|
}, |
|
{ |
|
"epoch": 48.0, |
|
"eval_loss": 0.0004850537225138396, |
|
"eval_runtime": 10.8141, |
|
"eval_samples_per_second": 924.722, |
|
"eval_steps_per_second": 1.202, |
|
"step": 1200 |
|
}, |
|
{ |
|
"epoch": 49.0, |
|
"eval_loss": 0.0003986251540482044, |
|
"eval_runtime": 10.7964, |
|
"eval_samples_per_second": 926.231, |
|
"eval_steps_per_second": 1.204, |
|
"step": 1225 |
|
}, |
|
{ |
|
"epoch": 50.0, |
|
"eval_loss": 0.0005350292194634676, |
|
"eval_runtime": 10.9488, |
|
"eval_samples_per_second": 913.343, |
|
"eval_steps_per_second": 1.187, |
|
"step": 1250 |
|
}, |
|
{ |
|
"epoch": 51.0, |
|
"eval_loss": 0.00030898803379386663, |
|
"eval_runtime": 10.8156, |
|
"eval_samples_per_second": 924.594, |
|
"eval_steps_per_second": 1.202, |
|
"step": 1275 |
|
} |
|
], |
|
"logging_steps": 500, |
|
"max_steps": 5000, |
|
"num_train_epochs": 200, |
|
"save_steps": 500, |
|
"total_flos": 2.928514277376e+16, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|