{ "best_metric": 0.02325253002345562, "best_model_checkpoint": "./results/answerdotai/ModernBERT-base/trial-4/checkpoint-3011", "epoch": 1.0, "eval_steps": 500, "global_step": 3011, "is_hyper_param_search": false, "is_local_process_zero": true, "is_world_process_zero": true, "log_history": [ { "epoch": 0.016605778811026237, "grad_norm": 7.4845476150512695, "learning_rate": 1.3209406688296726e-05, "loss": 0.427, "step": 50 }, { "epoch": 0.033211557622052475, "grad_norm": 8.739913940429688, "learning_rate": 1.3184989137392264e-05, "loss": 0.2079, "step": 100 }, { "epoch": 0.04981733643307871, "grad_norm": 10.918631553649902, "learning_rate": 1.31605715864878e-05, "loss": 0.1374, "step": 150 }, { "epoch": 0.06642311524410495, "grad_norm": 0.09207049757242203, "learning_rate": 1.3136154035583336e-05, "loss": 0.0971, "step": 200 }, { "epoch": 0.08302889405513118, "grad_norm": 0.1270512193441391, "learning_rate": 1.3111736484678873e-05, "loss": 0.0431, "step": 250 }, { "epoch": 0.09963467286615742, "grad_norm": 0.01078485231846571, "learning_rate": 1.3087318933774408e-05, "loss": 0.0679, "step": 300 }, { "epoch": 0.11624045167718366, "grad_norm": 0.16803160309791565, "learning_rate": 1.3062901382869945e-05, "loss": 0.0364, "step": 350 }, { "epoch": 0.1328462304882099, "grad_norm": 0.2863476872444153, "learning_rate": 1.303848383196548e-05, "loss": 0.0802, "step": 400 }, { "epoch": 0.14945200929923613, "grad_norm": 0.018498318269848824, "learning_rate": 1.3014066281061019e-05, "loss": 0.0324, "step": 450 }, { "epoch": 0.16605778811026237, "grad_norm": 12.099262237548828, "learning_rate": 1.2989648730156554e-05, "loss": 0.0567, "step": 500 }, { "epoch": 0.1826635669212886, "grad_norm": 0.04201498255133629, "learning_rate": 1.296523117925209e-05, "loss": 0.0265, "step": 550 }, { "epoch": 0.19926934573231483, "grad_norm": 13.225788116455078, "learning_rate": 1.2940813628347628e-05, "loss": 0.027, "step": 600 }, { "epoch": 0.2158751245433411, "grad_norm": 2.1863136291503906, "learning_rate": 1.2916396077443163e-05, "loss": 0.0325, "step": 650 }, { "epoch": 0.23248090335436733, "grad_norm": 0.0031948979012668133, "learning_rate": 1.28919785265387e-05, "loss": 0.0378, "step": 700 }, { "epoch": 0.24908668216539356, "grad_norm": 0.0001850352855399251, "learning_rate": 1.2867560975634237e-05, "loss": 0.0242, "step": 750 }, { "epoch": 0.2656924609764198, "grad_norm": 0.0007033672300167382, "learning_rate": 1.2843143424729772e-05, "loss": 0.0306, "step": 800 }, { "epoch": 0.282298239787446, "grad_norm": 13.938993453979492, "learning_rate": 1.2818725873825309e-05, "loss": 0.0458, "step": 850 }, { "epoch": 0.29890401859847227, "grad_norm": 0.02099405601620674, "learning_rate": 1.2794308322920844e-05, "loss": 0.0306, "step": 900 }, { "epoch": 0.3155097974094985, "grad_norm": 0.024268606677651405, "learning_rate": 1.2769890772016383e-05, "loss": 0.0142, "step": 950 }, { "epoch": 0.33211557622052473, "grad_norm": 0.004759958013892174, "learning_rate": 1.2745473221111918e-05, "loss": 0.0141, "step": 1000 }, { "epoch": 0.348721355031551, "grad_norm": 0.0019629066810011864, "learning_rate": 1.2721055670207453e-05, "loss": 0.0345, "step": 1050 }, { "epoch": 0.3653271338425772, "grad_norm": 0.00019358922145329416, "learning_rate": 1.2696638119302992e-05, "loss": 0.0089, "step": 1100 }, { "epoch": 0.38193291265360346, "grad_norm": 0.0028237327933311462, "learning_rate": 1.2672220568398527e-05, "loss": 0.0239, "step": 1150 }, { "epoch": 0.39853869146462967, "grad_norm": 0.00010467255196999758, "learning_rate": 1.2647803017494064e-05, "loss": 0.0094, "step": 1200 }, { "epoch": 0.41514447027565593, "grad_norm": 0.05774892866611481, "learning_rate": 1.26233854665896e-05, "loss": 0.0246, "step": 1250 }, { "epoch": 0.4317502490866822, "grad_norm": 0.024394717067480087, "learning_rate": 1.2598967915685136e-05, "loss": 0.0328, "step": 1300 }, { "epoch": 0.4483560278977084, "grad_norm": 2.231964349746704, "learning_rate": 1.2574550364780673e-05, "loss": 0.0204, "step": 1350 }, { "epoch": 0.46496180670873466, "grad_norm": 0.0014322358183562756, "learning_rate": 1.2550132813876208e-05, "loss": 0.0001, "step": 1400 }, { "epoch": 0.48156758551976087, "grad_norm": 0.001744006876833737, "learning_rate": 1.2525715262971747e-05, "loss": 0.0392, "step": 1450 }, { "epoch": 0.4981733643307871, "grad_norm": 0.027050139382481575, "learning_rate": 1.2501297712067282e-05, "loss": 0.0151, "step": 1500 }, { "epoch": 0.5147791431418134, "grad_norm": 0.0001924823591252789, "learning_rate": 1.2476880161162817e-05, "loss": 0.0036, "step": 1550 }, { "epoch": 0.5313849219528396, "grad_norm": 4.767300128936768, "learning_rate": 1.2452462610258356e-05, "loss": 0.0148, "step": 1600 }, { "epoch": 0.5479907007638658, "grad_norm": 0.0022574588656425476, "learning_rate": 1.242804505935389e-05, "loss": 0.0384, "step": 1650 }, { "epoch": 0.564596479574892, "grad_norm": 0.12995891273021698, "learning_rate": 1.2403627508449428e-05, "loss": 0.018, "step": 1700 }, { "epoch": 0.5812022583859183, "grad_norm": 0.0005374422180466354, "learning_rate": 1.2379209957544964e-05, "loss": 0.0039, "step": 1750 }, { "epoch": 0.5978080371969445, "grad_norm": 0.004592420998960733, "learning_rate": 1.23547924066405e-05, "loss": 0.0136, "step": 1800 }, { "epoch": 0.6144138160079707, "grad_norm": 0.0008812470478005707, "learning_rate": 1.2330374855736037e-05, "loss": 0.0167, "step": 1850 }, { "epoch": 0.631019594818997, "grad_norm": 28.337797164916992, "learning_rate": 1.2305957304831572e-05, "loss": 0.0098, "step": 1900 }, { "epoch": 0.6476253736300233, "grad_norm": 0.0003208396374247968, "learning_rate": 1.228153975392711e-05, "loss": 0.0083, "step": 1950 }, { "epoch": 0.6642311524410495, "grad_norm": 0.004917904268950224, "learning_rate": 1.2257122203022646e-05, "loss": 0.012, "step": 2000 }, { "epoch": 0.6808369312520757, "grad_norm": 0.0006444657919928432, "learning_rate": 1.2232704652118182e-05, "loss": 0.0006, "step": 2050 }, { "epoch": 0.697442710063102, "grad_norm": 0.00020880017837043852, "learning_rate": 1.220828710121372e-05, "loss": 0.0169, "step": 2100 }, { "epoch": 0.7140484888741282, "grad_norm": 0.009818737395107746, "learning_rate": 1.2183869550309254e-05, "loss": 0.0143, "step": 2150 }, { "epoch": 0.7306542676851544, "grad_norm": 0.0009041284793056548, "learning_rate": 1.2159451999404791e-05, "loss": 0.0026, "step": 2200 }, { "epoch": 0.7472600464961807, "grad_norm": 2.3109569549560547, "learning_rate": 1.2135034448500328e-05, "loss": 0.0062, "step": 2250 }, { "epoch": 0.7638658253072069, "grad_norm": 9.242107807949651e-06, "learning_rate": 1.2110616897595863e-05, "loss": 0.0029, "step": 2300 }, { "epoch": 0.7804716041182331, "grad_norm": 0.00020709235104732215, "learning_rate": 1.20861993466914e-05, "loss": 0.0, "step": 2350 }, { "epoch": 0.7970773829292593, "grad_norm": 0.0008476360817439854, "learning_rate": 1.2061781795786937e-05, "loss": 0.019, "step": 2400 }, { "epoch": 0.8136831617402857, "grad_norm": 0.0002165739715564996, "learning_rate": 1.2037364244882474e-05, "loss": 0.0, "step": 2450 }, { "epoch": 0.8302889405513119, "grad_norm": 0.029956847429275513, "learning_rate": 1.201294669397801e-05, "loss": 0.0012, "step": 2500 }, { "epoch": 0.8468947193623381, "grad_norm": 0.0002400112134637311, "learning_rate": 1.1988529143073546e-05, "loss": 0.0191, "step": 2550 }, { "epoch": 0.8635004981733644, "grad_norm": 0.0070993551053106785, "learning_rate": 1.1964111592169083e-05, "loss": 0.0155, "step": 2600 }, { "epoch": 0.8801062769843906, "grad_norm": 5.127764234202914e-05, "learning_rate": 1.1939694041264618e-05, "loss": 0.0185, "step": 2650 }, { "epoch": 0.8967120557954168, "grad_norm": 0.056577421724796295, "learning_rate": 1.1915276490360155e-05, "loss": 0.0063, "step": 2700 }, { "epoch": 0.913317834606443, "grad_norm": 4.399678437039256e-05, "learning_rate": 1.1890858939455692e-05, "loss": 0.012, "step": 2750 }, { "epoch": 0.9299236134174693, "grad_norm": 6.6589759626367595e-06, "learning_rate": 1.1866441388551227e-05, "loss": 0.0001, "step": 2800 }, { "epoch": 0.9465293922284955, "grad_norm": 0.009270718321204185, "learning_rate": 1.1842023837646764e-05, "loss": 0.0001, "step": 2850 }, { "epoch": 0.9631351710395217, "grad_norm": 6.743930339813232, "learning_rate": 1.1817606286742301e-05, "loss": 0.0019, "step": 2900 }, { "epoch": 0.9797409498505479, "grad_norm": 10.679564476013184, "learning_rate": 1.1793188735837838e-05, "loss": 0.0258, "step": 2950 }, { "epoch": 0.9963467286615743, "grad_norm": 0.0007653234642930329, "learning_rate": 1.1768771184933373e-05, "loss": 0.0018, "step": 3000 }, { "epoch": 1.0, "eval_accuracy": 0.997256679389313, "eval_f1": 0.9972464717374746, "eval_loss": 0.02325253002345562, "eval_precision": 0.997240941740882, "eval_recall": 0.997256679389313, "eval_runtime": 36.6991, "eval_samples_per_second": 228.453, "eval_steps_per_second": 14.278, "step": 3011 } ], "logging_steps": 50, "max_steps": 27099, "num_input_tokens_seen": 0, "num_train_epochs": 9, "save_steps": 500, "stateful_callbacks": { "TrainerControl": { "args": { "should_epoch_stop": false, "should_evaluate": false, "should_log": false, "should_save": true, "should_training_stop": false }, "attributes": {} } }, "total_flos": 1.641430544259072e+16, "train_batch_size": 16, "trial_name": null, "trial_params": null }