{ "best_metric": null, "best_model_checkpoint": null, "epoch": 0.997867803837953, "eval_steps": 10, "global_step": 351, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.03, "learning_rate": 1.388888888888889e-05, "loss": 2.3058, "step": 10 }, { "epoch": 0.03, "eval_accuracy": 0.0794, "eval_loss": 2.310584306716919, "eval_runtime": 23.1302, "eval_samples_per_second": 216.168, "eval_steps_per_second": 6.788, "step": 10 }, { "epoch": 0.06, "learning_rate": 2.777777777777778e-05, "loss": 2.3033, "step": 20 }, { "epoch": 0.06, "eval_accuracy": 0.0892, "eval_loss": 2.302612066268921, "eval_runtime": 20.5744, "eval_samples_per_second": 243.02, "eval_steps_per_second": 7.631, "step": 20 }, { "epoch": 0.09, "learning_rate": 4.166666666666667e-05, "loss": 2.3012, "step": 30 }, { "epoch": 0.09, "eval_accuracy": 0.1042, "eval_loss": 2.2971391677856445, "eval_runtime": 19.7054, "eval_samples_per_second": 253.737, "eval_steps_per_second": 7.967, "step": 30 }, { "epoch": 0.11, "learning_rate": 4.936507936507937e-05, "loss": 2.2914, "step": 40 }, { "epoch": 0.11, "eval_accuracy": 0.1254, "eval_loss": 2.2889511585235596, "eval_runtime": 20.5875, "eval_samples_per_second": 242.866, "eval_steps_per_second": 7.626, "step": 40 }, { "epoch": 0.14, "learning_rate": 4.7777777777777784e-05, "loss": 2.2869, "step": 50 }, { "epoch": 0.14, "eval_accuracy": 0.16, "eval_loss": 2.281619071960449, "eval_runtime": 20.7333, "eval_samples_per_second": 241.158, "eval_steps_per_second": 7.572, "step": 50 }, { "epoch": 0.17, "learning_rate": 4.6190476190476194e-05, "loss": 2.2785, "step": 60 }, { "epoch": 0.17, "eval_accuracy": 0.1902, "eval_loss": 2.269974708557129, "eval_runtime": 20.4734, "eval_samples_per_second": 244.22, "eval_steps_per_second": 7.669, "step": 60 }, { "epoch": 0.2, "learning_rate": 4.460317460317461e-05, "loss": 2.2712, "step": 70 }, { "epoch": 0.2, "eval_accuracy": 0.2354, "eval_loss": 2.260219097137451, "eval_runtime": 19.9116, "eval_samples_per_second": 251.11, "eval_steps_per_second": 7.885, "step": 70 }, { "epoch": 0.23, "learning_rate": 4.301587301587302e-05, "loss": 2.2619, "step": 80 }, { "epoch": 0.23, "eval_accuracy": 0.2688, "eval_loss": 2.250117063522339, "eval_runtime": 21.2411, "eval_samples_per_second": 235.392, "eval_steps_per_second": 7.391, "step": 80 }, { "epoch": 0.26, "learning_rate": 4.1428571428571437e-05, "loss": 2.2509, "step": 90 }, { "epoch": 0.26, "eval_accuracy": 0.3022, "eval_loss": 2.2383360862731934, "eval_runtime": 20.8445, "eval_samples_per_second": 239.872, "eval_steps_per_second": 7.532, "step": 90 }, { "epoch": 0.28, "learning_rate": 3.984126984126984e-05, "loss": 2.2382, "step": 100 }, { "epoch": 0.28, "eval_accuracy": 0.3268, "eval_loss": 2.222919225692749, "eval_runtime": 19.2819, "eval_samples_per_second": 259.311, "eval_steps_per_second": 8.142, "step": 100 }, { "epoch": 0.31, "learning_rate": 3.8253968253968256e-05, "loss": 2.2255, "step": 110 }, { "epoch": 0.31, "eval_accuracy": 0.353, "eval_loss": 2.2083821296691895, "eval_runtime": 23.1248, "eval_samples_per_second": 216.218, "eval_steps_per_second": 6.789, "step": 110 }, { "epoch": 0.34, "learning_rate": 3.6666666666666666e-05, "loss": 2.2164, "step": 120 }, { "epoch": 0.34, "eval_accuracy": 0.3608, "eval_loss": 2.1939358711242676, "eval_runtime": 25.8088, "eval_samples_per_second": 193.732, "eval_steps_per_second": 6.083, "step": 120 }, { "epoch": 0.37, "learning_rate": 3.5079365079365075e-05, "loss": 2.2028, "step": 130 }, { "epoch": 0.37, "eval_accuracy": 0.3668, "eval_loss": 2.182861804962158, "eval_runtime": 25.579, "eval_samples_per_second": 195.473, "eval_steps_per_second": 6.138, "step": 130 }, { "epoch": 0.4, "learning_rate": 3.349206349206349e-05, "loss": 2.1977, "step": 140 }, { "epoch": 0.4, "eval_accuracy": 0.401, "eval_loss": 2.164577007293701, "eval_runtime": 25.165, "eval_samples_per_second": 198.688, "eval_steps_per_second": 6.239, "step": 140 }, { "epoch": 0.43, "learning_rate": 3.19047619047619e-05, "loss": 2.1844, "step": 150 }, { "epoch": 0.43, "eval_accuracy": 0.4244, "eval_loss": 2.144054651260376, "eval_runtime": 24.2393, "eval_samples_per_second": 206.277, "eval_steps_per_second": 6.477, "step": 150 }, { "epoch": 0.45, "learning_rate": 3.0317460317460318e-05, "loss": 2.1689, "step": 160 }, { "epoch": 0.45, "eval_accuracy": 0.437, "eval_loss": 2.1322500705718994, "eval_runtime": 25.4332, "eval_samples_per_second": 196.593, "eval_steps_per_second": 6.173, "step": 160 }, { "epoch": 0.48, "learning_rate": 2.8730158730158728e-05, "loss": 2.1555, "step": 170 }, { "epoch": 0.48, "eval_accuracy": 0.4462, "eval_loss": 2.1159207820892334, "eval_runtime": 25.3372, "eval_samples_per_second": 197.338, "eval_steps_per_second": 6.196, "step": 170 }, { "epoch": 0.51, "learning_rate": 2.714285714285714e-05, "loss": 2.1448, "step": 180 }, { "epoch": 0.51, "eval_accuracy": 0.45, "eval_loss": 2.0992112159729004, "eval_runtime": 25.0688, "eval_samples_per_second": 199.451, "eval_steps_per_second": 6.263, "step": 180 }, { "epoch": 0.54, "learning_rate": 2.5555555555555554e-05, "loss": 2.1313, "step": 190 }, { "epoch": 0.54, "eval_accuracy": 0.4642, "eval_loss": 2.080961227416992, "eval_runtime": 25.2129, "eval_samples_per_second": 198.311, "eval_steps_per_second": 6.227, "step": 190 }, { "epoch": 0.57, "learning_rate": 2.396825396825397e-05, "loss": 2.1189, "step": 200 }, { "epoch": 0.57, "eval_accuracy": 0.4708, "eval_loss": 2.0589163303375244, "eval_runtime": 25.6278, "eval_samples_per_second": 195.101, "eval_steps_per_second": 6.126, "step": 200 }, { "epoch": 0.6, "learning_rate": 2.2380952380952384e-05, "loss": 2.1111, "step": 210 }, { "epoch": 0.6, "eval_accuracy": 0.4828, "eval_loss": 2.0430362224578857, "eval_runtime": 19.2415, "eval_samples_per_second": 259.854, "eval_steps_per_second": 8.159, "step": 210 }, { "epoch": 0.63, "learning_rate": 2.0793650793650797e-05, "loss": 2.0905, "step": 220 }, { "epoch": 0.63, "eval_accuracy": 0.4938, "eval_loss": 2.028820753097534, "eval_runtime": 20.6864, "eval_samples_per_second": 241.705, "eval_steps_per_second": 7.59, "step": 220 }, { "epoch": 0.65, "learning_rate": 1.920634920634921e-05, "loss": 2.082, "step": 230 }, { "epoch": 0.65, "eval_accuracy": 0.4938, "eval_loss": 2.008862257003784, "eval_runtime": 20.028, "eval_samples_per_second": 249.65, "eval_steps_per_second": 7.839, "step": 230 }, { "epoch": 0.68, "learning_rate": 1.761904761904762e-05, "loss": 2.0646, "step": 240 }, { "epoch": 0.68, "eval_accuracy": 0.5014, "eval_loss": 1.9969898462295532, "eval_runtime": 20.7247, "eval_samples_per_second": 241.258, "eval_steps_per_second": 7.576, "step": 240 }, { "epoch": 0.71, "learning_rate": 1.6031746031746033e-05, "loss": 2.0636, "step": 250 }, { "epoch": 0.71, "eval_accuracy": 0.4946, "eval_loss": 1.9777544736862183, "eval_runtime": 18.9902, "eval_samples_per_second": 263.294, "eval_steps_per_second": 8.267, "step": 250 }, { "epoch": 0.74, "learning_rate": 1.4444444444444444e-05, "loss": 2.0579, "step": 260 }, { "epoch": 0.74, "eval_accuracy": 0.49, "eval_loss": 1.9608845710754395, "eval_runtime": 18.9333, "eval_samples_per_second": 264.084, "eval_steps_per_second": 8.292, "step": 260 }, { "epoch": 0.77, "learning_rate": 1.2857142857142857e-05, "loss": 2.028, "step": 270 }, { "epoch": 0.77, "eval_accuracy": 0.4862, "eval_loss": 1.960185170173645, "eval_runtime": 18.9535, "eval_samples_per_second": 263.804, "eval_steps_per_second": 8.283, "step": 270 }, { "epoch": 0.8, "learning_rate": 1.126984126984127e-05, "loss": 2.0447, "step": 280 }, { "epoch": 0.8, "eval_accuracy": 0.4934, "eval_loss": 1.9459648132324219, "eval_runtime": 18.8589, "eval_samples_per_second": 265.126, "eval_steps_per_second": 8.325, "step": 280 }, { "epoch": 0.82, "learning_rate": 9.682539682539683e-06, "loss": 2.0168, "step": 290 }, { "epoch": 0.82, "eval_accuracy": 0.505, "eval_loss": 1.9368737936019897, "eval_runtime": 19.0033, "eval_samples_per_second": 263.112, "eval_steps_per_second": 8.262, "step": 290 }, { "epoch": 0.85, "learning_rate": 8.095238095238097e-06, "loss": 2.0126, "step": 300 }, { "epoch": 0.85, "eval_accuracy": 0.4926, "eval_loss": 1.931652545928955, "eval_runtime": 19.1274, "eval_samples_per_second": 261.406, "eval_steps_per_second": 8.208, "step": 300 }, { "epoch": 0.88, "learning_rate": 6.507936507936509e-06, "loss": 2.0099, "step": 310 }, { "epoch": 0.88, "eval_accuracy": 0.4952, "eval_loss": 1.9234933853149414, "eval_runtime": 19.032, "eval_samples_per_second": 262.715, "eval_steps_per_second": 8.249, "step": 310 }, { "epoch": 0.91, "learning_rate": 4.920634920634921e-06, "loss": 1.9978, "step": 320 }, { "epoch": 0.91, "eval_accuracy": 0.4972, "eval_loss": 1.9174150228500366, "eval_runtime": 18.9404, "eval_samples_per_second": 263.986, "eval_steps_per_second": 8.289, "step": 320 }, { "epoch": 0.94, "learning_rate": 3.3333333333333333e-06, "loss": 1.9951, "step": 330 }, { "epoch": 0.94, "eval_accuracy": 0.507, "eval_loss": 1.9119243621826172, "eval_runtime": 18.9559, "eval_samples_per_second": 263.769, "eval_steps_per_second": 8.282, "step": 330 }, { "epoch": 0.97, "learning_rate": 1.7460317460317462e-06, "loss": 1.9823, "step": 340 }, { "epoch": 0.97, "eval_accuracy": 0.4992, "eval_loss": 1.9119775295257568, "eval_runtime": 19.001, "eval_samples_per_second": 263.143, "eval_steps_per_second": 8.263, "step": 340 }, { "epoch": 1.0, "learning_rate": 1.5873015873015874e-07, "loss": 1.985, "step": 350 }, { "epoch": 1.0, "eval_accuracy": 0.5022, "eval_loss": 1.9064103364944458, "eval_runtime": 19.1234, "eval_samples_per_second": 261.459, "eval_steps_per_second": 8.21, "step": 350 }, { "epoch": 1.0, "step": 351, "total_flos": 9.547293521089659e+17, "train_loss": 2.144495400947723, "train_runtime": 1271.552, "train_samples_per_second": 35.39, "train_steps_per_second": 0.276 } ], "logging_steps": 10, "max_steps": 351, "num_train_epochs": 1, "save_steps": 500, "total_flos": 9.547293521089659e+17, "trial_name": null, "trial_params": null }