|
{ |
|
"best_metric": 0.8807787895202637, |
|
"best_model_checkpoint": "org_org_a/org_aug_a/checkpoint-400", |
|
"epoch": 0.24615384615384617, |
|
"eval_steps": 25, |
|
"global_step": 400, |
|
"is_hyper_param_search": false, |
|
"is_local_process_zero": true, |
|
"is_world_process_zero": true, |
|
"log_history": [ |
|
{ |
|
"epoch": 0.015384615384615385, |
|
"grad_norm": 99.7617416381836, |
|
"learning_rate": 9.375e-05, |
|
"loss": 2.2195, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.015384615384615385, |
|
"eval_f1_macro": 0.5521299126846324, |
|
"eval_f1_micro": 0.5691194856224325, |
|
"eval_f1_weighted": 0.569423288164649, |
|
"eval_loss": 1.520703673362732, |
|
"eval_runtime": 1286.7359, |
|
"eval_samples_per_second": 8.703, |
|
"eval_steps_per_second": 0.272, |
|
"step": 25 |
|
}, |
|
{ |
|
"epoch": 0.03076923076923077, |
|
"grad_norm": 40.433555603027344, |
|
"learning_rate": 8.75e-05, |
|
"loss": 1.4371, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.03076923076923077, |
|
"eval_f1_macro": 0.5857301862067549, |
|
"eval_f1_micro": 0.6089480264332917, |
|
"eval_f1_weighted": 0.6051732662908408, |
|
"eval_loss": 1.2746953964233398, |
|
"eval_runtime": 1302.368, |
|
"eval_samples_per_second": 8.598, |
|
"eval_steps_per_second": 0.269, |
|
"step": 50 |
|
}, |
|
{ |
|
"epoch": 0.046153846153846156, |
|
"grad_norm": 50.680335998535156, |
|
"learning_rate": 8.125000000000001e-05, |
|
"loss": 1.2556, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.046153846153846156, |
|
"eval_f1_macro": 0.6036315452406847, |
|
"eval_f1_micro": 0.6303804250759064, |
|
"eval_f1_weighted": 0.6240449220829647, |
|
"eval_loss": 1.1545159816741943, |
|
"eval_runtime": 1308.8563, |
|
"eval_samples_per_second": 8.556, |
|
"eval_steps_per_second": 0.267, |
|
"step": 75 |
|
}, |
|
{ |
|
"epoch": 0.06153846153846154, |
|
"grad_norm": 24.990671157836914, |
|
"learning_rate": 7.500000000000001e-05, |
|
"loss": 1.2415, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.06153846153846154, |
|
"eval_f1_macro": 0.6131831448419165, |
|
"eval_f1_micro": 0.6319878549741025, |
|
"eval_f1_weighted": 0.6300913896752308, |
|
"eval_loss": 1.0690622329711914, |
|
"eval_runtime": 1302.0106, |
|
"eval_samples_per_second": 8.601, |
|
"eval_steps_per_second": 0.269, |
|
"step": 100 |
|
}, |
|
{ |
|
"epoch": 0.07692307692307693, |
|
"grad_norm": 20.941816329956055, |
|
"learning_rate": 6.875e-05, |
|
"loss": 0.9864, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.07692307692307693, |
|
"eval_f1_macro": 0.627800207605976, |
|
"eval_f1_micro": 0.6399357028040722, |
|
"eval_f1_weighted": 0.6411240450638264, |
|
"eval_loss": 1.0263742208480835, |
|
"eval_runtime": 1310.3949, |
|
"eval_samples_per_second": 8.546, |
|
"eval_steps_per_second": 0.267, |
|
"step": 125 |
|
}, |
|
{ |
|
"epoch": 0.09230769230769231, |
|
"grad_norm": 17.243314743041992, |
|
"learning_rate": 6.25e-05, |
|
"loss": 1.0647, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.09230769230769231, |
|
"eval_f1_macro": 0.6265827051287185, |
|
"eval_f1_micro": 0.6510091087694231, |
|
"eval_f1_weighted": 0.6455165794591529, |
|
"eval_loss": 0.9917964339256287, |
|
"eval_runtime": 1292.6103, |
|
"eval_samples_per_second": 8.663, |
|
"eval_steps_per_second": 0.271, |
|
"step": 150 |
|
}, |
|
{ |
|
"epoch": 0.1076923076923077, |
|
"grad_norm": 20.114173889160156, |
|
"learning_rate": 5.6250000000000005e-05, |
|
"loss": 0.9849, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.1076923076923077, |
|
"eval_f1_macro": 0.6317476736951155, |
|
"eval_f1_micro": 0.6576174316842294, |
|
"eval_f1_weighted": 0.6510976948325254, |
|
"eval_loss": 0.9679338932037354, |
|
"eval_runtime": 1305.0812, |
|
"eval_samples_per_second": 8.58, |
|
"eval_steps_per_second": 0.268, |
|
"step": 175 |
|
}, |
|
{ |
|
"epoch": 0.12307692307692308, |
|
"grad_norm": 39.2221565246582, |
|
"learning_rate": 5e-05, |
|
"loss": 1.0067, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.12307692307692308, |
|
"eval_f1_macro": 0.6383959350585475, |
|
"eval_f1_micro": 0.6501160921593142, |
|
"eval_f1_weighted": 0.6513020604373679, |
|
"eval_loss": 0.9382981061935425, |
|
"eval_runtime": 1283.0843, |
|
"eval_samples_per_second": 8.727, |
|
"eval_steps_per_second": 0.273, |
|
"step": 200 |
|
}, |
|
{ |
|
"epoch": 0.13846153846153847, |
|
"grad_norm": 26.992185592651367, |
|
"learning_rate": 4.375e-05, |
|
"loss": 0.8928, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.13846153846153847, |
|
"eval_f1_macro": 0.640450740779414, |
|
"eval_f1_micro": 0.6619932130737631, |
|
"eval_f1_weighted": 0.6578984928748007, |
|
"eval_loss": 0.9242791533470154, |
|
"eval_runtime": 1276.3685, |
|
"eval_samples_per_second": 8.773, |
|
"eval_steps_per_second": 0.274, |
|
"step": 225 |
|
}, |
|
{ |
|
"epoch": 0.15384615384615385, |
|
"grad_norm": 71.61570739746094, |
|
"learning_rate": 3.7500000000000003e-05, |
|
"loss": 0.9858, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.15384615384615385, |
|
"eval_f1_macro": 0.640469116958249, |
|
"eval_f1_micro": 0.6627076263618503, |
|
"eval_f1_weighted": 0.6581960436641718, |
|
"eval_loss": 0.9131789803504944, |
|
"eval_runtime": 1285.3671, |
|
"eval_samples_per_second": 8.712, |
|
"eval_steps_per_second": 0.272, |
|
"step": 250 |
|
}, |
|
{ |
|
"epoch": 0.16923076923076924, |
|
"grad_norm": 51.381019592285156, |
|
"learning_rate": 3.125e-05, |
|
"loss": 0.9085, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.16923076923076924, |
|
"eval_f1_macro": 0.6446344740224741, |
|
"eval_f1_micro": 0.6575281300232184, |
|
"eval_f1_weighted": 0.6580540316041209, |
|
"eval_loss": 0.9010853171348572, |
|
"eval_runtime": 1307.6049, |
|
"eval_samples_per_second": 8.564, |
|
"eval_steps_per_second": 0.268, |
|
"step": 275 |
|
}, |
|
{ |
|
"epoch": 0.18461538461538463, |
|
"grad_norm": 25.47317886352539, |
|
"learning_rate": 2.5e-05, |
|
"loss": 1.0059, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.18461538461538463, |
|
"eval_f1_macro": 0.6435798595814134, |
|
"eval_f1_micro": 0.6686015359885694, |
|
"eval_f1_weighted": 0.662301841928527, |
|
"eval_loss": 0.9018191695213318, |
|
"eval_runtime": 1330.8319, |
|
"eval_samples_per_second": 8.414, |
|
"eval_steps_per_second": 0.263, |
|
"step": 300 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"grad_norm": 16.64508628845215, |
|
"learning_rate": 1.8750000000000002e-05, |
|
"loss": 0.8939, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.2, |
|
"eval_f1_macro": 0.6448244249080287, |
|
"eval_f1_micro": 0.6681550276835149, |
|
"eval_f1_weighted": 0.6628710932570111, |
|
"eval_loss": 0.892798125743866, |
|
"eval_runtime": 1305.0822, |
|
"eval_samples_per_second": 8.58, |
|
"eval_steps_per_second": 0.268, |
|
"step": 325 |
|
}, |
|
{ |
|
"epoch": 0.2153846153846154, |
|
"grad_norm": 32.9316520690918, |
|
"learning_rate": 1.25e-05, |
|
"loss": 0.864, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.2153846153846154, |
|
"eval_f1_macro": 0.6477608498148076, |
|
"eval_f1_micro": 0.6621718163957849, |
|
"eval_f1_weighted": 0.6618855058014609, |
|
"eval_loss": 0.8832775950431824, |
|
"eval_runtime": 1344.9811, |
|
"eval_samples_per_second": 8.326, |
|
"eval_steps_per_second": 0.26, |
|
"step": 350 |
|
}, |
|
{ |
|
"epoch": 0.23076923076923078, |
|
"grad_norm": 63.84526824951172, |
|
"learning_rate": 6.25e-06, |
|
"loss": 0.9499, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.23076923076923078, |
|
"eval_f1_macro": 0.646307301425064, |
|
"eval_f1_micro": 0.6585104482943382, |
|
"eval_f1_weighted": 0.659331813949001, |
|
"eval_loss": 0.8836826682090759, |
|
"eval_runtime": 1287.5574, |
|
"eval_samples_per_second": 8.697, |
|
"eval_steps_per_second": 0.272, |
|
"step": 375 |
|
}, |
|
{ |
|
"epoch": 0.24615384615384617, |
|
"grad_norm": 21.614879608154297, |
|
"learning_rate": 0.0, |
|
"loss": 0.9721, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.24615384615384617, |
|
"eval_f1_macro": 0.6475943444675745, |
|
"eval_f1_micro": 0.6614574031076978, |
|
"eval_f1_weighted": 0.6614511612983146, |
|
"eval_loss": 0.8807787895202637, |
|
"eval_runtime": 1313.0891, |
|
"eval_samples_per_second": 8.528, |
|
"eval_steps_per_second": 0.267, |
|
"step": 400 |
|
}, |
|
{ |
|
"epoch": 0.24615384615384617, |
|
"step": 400, |
|
"total_flos": 1.690284412871639e+17, |
|
"train_loss": 1.104334650039673, |
|
"train_runtime": 22485.3021, |
|
"train_samples_per_second": 0.569, |
|
"train_steps_per_second": 0.018 |
|
}, |
|
{ |
|
"epoch": 0.24615384615384617, |
|
"eval_f1_macro": 0.6475943444675745, |
|
"eval_f1_micro": 0.6614574031076978, |
|
"eval_f1_weighted": 0.6614511612983146, |
|
"eval_loss": 0.8807787895202637, |
|
"eval_runtime": 1299.8097, |
|
"eval_samples_per_second": 8.615, |
|
"eval_steps_per_second": 0.269, |
|
"step": 400 |
|
} |
|
], |
|
"logging_steps": 25, |
|
"max_steps": 400, |
|
"num_input_tokens_seen": 0, |
|
"num_train_epochs": 1, |
|
"save_steps": 25, |
|
"total_flos": 1.690284412871639e+17, |
|
"train_batch_size": 32, |
|
"trial_name": null, |
|
"trial_params": null |
|
} |
|
|