IDinsight
/

gemma-2-2b-it-ud

@@ -1,579 +0,0 @@
-{
-  "best_metric": 0.11245531588792801,
-  "best_model_checkpoint": "/home/tonyzhao6/Projects/urgency-detection-finetuning/results/model_training/gemma-2-2b-it-8bit-64-32-v4/checkpoint-700",
-  "epoch": 0.970873786407767,
-  "eval_steps": 100,
-  "global_step": 700,
-  "is_hyper_param_search": false,
-  "is_local_process_zero": true,
-  "is_world_process_zero": true,
-  "log_history": [
-    {
-      "epoch": 0.013869625520110958,
-      "grad_norm": 0.9987179040908813,
-      "learning_rate": 1.834862385321101e-05,
-      "loss": 2.1962,
-      "step": 10
-    },
-    {
-      "epoch": 0.027739251040221916,
-      "grad_norm": 0.7583550810813904,
-      "learning_rate": 3.669724770642202e-05,
-      "loss": 1.6408,
-      "step": 20
-    },
-    {
-      "epoch": 0.04160887656033287,
-      "grad_norm": 0.6101565361022949,
-      "learning_rate": 5.504587155963303e-05,
-      "loss": 0.8461,
-      "step": 30
-    },
-    {
-      "epoch": 0.05547850208044383,
-      "grad_norm": 0.2617435157299042,
-      "learning_rate": 7.339449541284404e-05,
-      "loss": 0.3555,
-      "step": 40
-    },
-    {
-      "epoch": 0.06934812760055478,
-      "grad_norm": 0.17355866730213165,
-      "learning_rate": 9.174311926605506e-05,
-      "loss": 0.2506,
-      "step": 50
-    },
-    {
-      "epoch": 0.08321775312066575,
-      "grad_norm": 0.1640639752149582,
-      "learning_rate": 0.00011009174311926606,
-      "loss": 0.23,
-      "step": 60
-    },
-    {
-      "epoch": 0.0970873786407767,
-      "grad_norm": 0.1592264175415039,
-      "learning_rate": 0.00012844036697247707,
-      "loss": 0.2178,
-      "step": 70
-    },
-    {
-      "epoch": 0.11095700416088766,
-      "grad_norm": 0.13895617425441742,
-      "learning_rate": 0.0001467889908256881,
-      "loss": 0.1953,
-      "step": 80
-    },
-    {
-      "epoch": 0.12482662968099861,
-      "grad_norm": 0.13263335824012756,
-      "learning_rate": 0.0001651376146788991,
-      "loss": 0.1851,
-      "step": 90
-    },
-    {
-      "epoch": 0.13869625520110956,
-      "grad_norm": 0.14112189412117004,
-      "learning_rate": 0.00018348623853211012,
-      "loss": 0.1782,
-      "step": 100
-    },
-    {
-      "epoch": 0.13869625520110956,
-      "eval_loss": 0.1666356474161148,
-      "eval_runtime": 87.2237,
-      "eval_samples_per_second": 14.48,
-      "eval_steps_per_second": 0.906,
-      "step": 100
-    },
-    {
-      "epoch": 0.15256588072122051,
-      "grad_norm": 0.1304333657026291,
-      "learning_rate": 0.00019994279176201374,
-      "loss": 0.1644,
-      "step": 110
-    },
-    {
-      "epoch": 0.1664355062413315,
-      "grad_norm": 0.13556469976902008,
-      "learning_rate": 0.00019937070938215104,
-      "loss": 0.1614,
-      "step": 120
-    },
-    {
-      "epoch": 0.18030513176144244,
-      "grad_norm": 0.12493357062339783,
-      "learning_rate": 0.00019879862700228834,
-      "loss": 0.148,
-      "step": 130
-    },
-    {
-      "epoch": 0.1941747572815534,
-      "grad_norm": 0.12785165011882782,
-      "learning_rate": 0.00019822654462242566,
-      "loss": 0.1526,
-      "step": 140
-    },
-    {
-      "epoch": 0.20804438280166435,
-      "grad_norm": 0.1414169818162918,
-      "learning_rate": 0.00019765446224256295,
-      "loss": 0.1498,
-      "step": 150
-    },
-    {
-      "epoch": 0.22191400832177532,
-      "grad_norm": 0.11336012184619904,
-      "learning_rate": 0.00019708237986270025,
-      "loss": 0.1506,
-      "step": 160
-    },
-    {
-      "epoch": 0.23578363384188628,
-      "grad_norm": 0.11893126368522644,
-      "learning_rate": 0.00019651029748283754,
-      "loss": 0.1343,
-      "step": 170
-    },
-    {
-      "epoch": 0.24965325936199723,
-      "grad_norm": 0.12188615649938583,
-      "learning_rate": 0.00019593821510297484,
-      "loss": 0.1379,
-      "step": 180
-    },
-    {
-      "epoch": 0.2635228848821082,
-      "grad_norm": 0.11430846899747849,
-      "learning_rate": 0.00019536613272311214,
-      "loss": 0.1344,
-      "step": 190
-    },
-    {
-      "epoch": 0.27739251040221913,
-      "grad_norm": 0.11359121650457382,
-      "learning_rate": 0.00019479405034324946,
-      "loss": 0.139,
-      "step": 200
-    },
-    {
-      "epoch": 0.27739251040221913,
-      "eval_loss": 0.13432957231998444,
-      "eval_runtime": 86.9127,
-      "eval_samples_per_second": 14.532,
-      "eval_steps_per_second": 0.909,
-      "step": 200
-    },
-    {
-      "epoch": 0.2912621359223301,
-      "grad_norm": 0.10335998982191086,
-      "learning_rate": 0.00019422196796338675,
-      "loss": 0.1374,
-      "step": 210
-    },
-    {
-      "epoch": 0.30513176144244103,
-      "grad_norm": 0.09991727769374847,
-      "learning_rate": 0.00019364988558352405,
-      "loss": 0.1344,
-      "step": 220
-    },
-    {
-      "epoch": 0.31900138696255204,
-      "grad_norm": 0.10995834320783615,
-      "learning_rate": 0.00019307780320366135,
-      "loss": 0.1394,
-      "step": 230
-    },
-    {
-      "epoch": 0.332871012482663,
-      "grad_norm": 0.10396566987037659,
-      "learning_rate": 0.00019250572082379864,
-      "loss": 0.1223,
-      "step": 240
-    },
-    {
-      "epoch": 0.34674063800277394,
-      "grad_norm": 0.10032226890325546,
-      "learning_rate": 0.00019193363844393594,
-      "loss": 0.1285,
-      "step": 250
-    },
-    {
-      "epoch": 0.3606102635228849,
-      "grad_norm": 0.10445073246955872,
-      "learning_rate": 0.00019136155606407323,
-      "loss": 0.1261,
-      "step": 260
-    },
-    {
-      "epoch": 0.37447988904299584,
-      "grad_norm": 0.11336586624383926,
-      "learning_rate": 0.00019078947368421053,
-      "loss": 0.1286,
-      "step": 270
-    },
-    {
-      "epoch": 0.3883495145631068,
-      "grad_norm": 0.10205301642417908,
-      "learning_rate": 0.00019021739130434782,
-      "loss": 0.1208,
-      "step": 280
-    },
-    {
-      "epoch": 0.40221914008321774,
-      "grad_norm": 0.09567493945360184,
-      "learning_rate": 0.00018964530892448515,
-      "loss": 0.1271,
-      "step": 290
-    },
-    {
-      "epoch": 0.4160887656033287,
-      "grad_norm": 0.10747899860143661,
-      "learning_rate": 0.00018907322654462244,
-      "loss": 0.1233,
-      "step": 300
-    },
-    {
-      "epoch": 0.4160887656033287,
-      "eval_loss": 0.1257271021604538,
-      "eval_runtime": 86.6524,
-      "eval_samples_per_second": 14.575,
-      "eval_steps_per_second": 0.912,
-      "step": 300
-    },
-    {
-      "epoch": 0.42995839112343964,
-      "grad_norm": 0.10108979046344757,
-      "learning_rate": 0.00018850114416475974,
-      "loss": 0.124,
-      "step": 310
-    },
-    {
-      "epoch": 0.44382801664355065,
-      "grad_norm": 0.09316466003656387,
-      "learning_rate": 0.00018792906178489703,
-      "loss": 0.1212,
-      "step": 320
-    },
-    {
-      "epoch": 0.4576976421636616,
-      "grad_norm": 0.10638488829135895,
-      "learning_rate": 0.00018735697940503433,
-      "loss": 0.1288,
-      "step": 330
-    },
-    {
-      "epoch": 0.47156726768377255,
-      "grad_norm": 0.09914766252040863,
-      "learning_rate": 0.00018678489702517162,
-      "loss": 0.1259,
-      "step": 340
-    },
-    {
-      "epoch": 0.4854368932038835,
-      "grad_norm": 0.09707864373922348,
-      "learning_rate": 0.00018621281464530892,
-      "loss": 0.124,
-      "step": 350
-    },
-    {
-      "epoch": 0.49930651872399445,
-      "grad_norm": 0.09507231414318085,
-      "learning_rate": 0.00018564073226544621,
-      "loss": 0.1262,
-      "step": 360
-    },
-    {
-      "epoch": 0.5131761442441054,
-      "grad_norm": 0.09129882603883743,
-      "learning_rate": 0.0001850686498855835,
-      "loss": 0.1211,
-      "step": 370
-    },
-    {
-      "epoch": 0.5270457697642164,
-      "grad_norm": 0.09889239072799683,
-      "learning_rate": 0.00018449656750572083,
-      "loss": 0.1218,
-      "step": 380
-    },
-    {
-      "epoch": 0.5409153952843273,
-      "grad_norm": 0.09886115044355392,
-      "learning_rate": 0.00018392448512585813,
-      "loss": 0.1214,
-      "step": 390
-    },
-    {
-      "epoch": 0.5547850208044383,
-      "grad_norm": 0.09064166992902756,
-      "learning_rate": 0.00018335240274599542,
-      "loss": 0.126,
-      "step": 400
-    },
-    {
-      "epoch": 0.5547850208044383,
-      "eval_loss": 0.12142250686883926,
-      "eval_runtime": 86.64,
-      "eval_samples_per_second": 14.578,
-      "eval_steps_per_second": 0.912,
-      "step": 400
-    },
-    {
-      "epoch": 0.5686546463245492,
-      "grad_norm": 0.10354544222354889,
-      "learning_rate": 0.00018278032036613272,
-      "loss": 0.1253,
-      "step": 410
-    },
-    {
-      "epoch": 0.5825242718446602,
-      "grad_norm": 0.09165250509977341,
-      "learning_rate": 0.00018220823798627001,
-      "loss": 0.1224,
-      "step": 420
-    },
-    {
-      "epoch": 0.5963938973647711,
-      "grad_norm": 0.09138130396604538,
-      "learning_rate": 0.0001816361556064073,
-      "loss": 0.1289,
-      "step": 430
-    },
-    {
-      "epoch": 0.6102635228848821,
-      "grad_norm": 0.09735599905252457,
-      "learning_rate": 0.00018106407322654463,
-      "loss": 0.1181,
-      "step": 440
-    },
-    {
-      "epoch": 0.624133148404993,
-      "grad_norm": 0.09955897927284241,
-      "learning_rate": 0.00018049199084668193,
-      "loss": 0.1207,
-      "step": 450
-    },
-    {
-      "epoch": 0.6380027739251041,
-      "grad_norm": 0.09378518909215927,
-      "learning_rate": 0.00017991990846681922,
-      "loss": 0.1189,
-      "step": 460
-    },
-    {
-      "epoch": 0.651872399445215,
-      "grad_norm": 0.09985518455505371,
-      "learning_rate": 0.00017934782608695652,
-      "loss": 0.1196,
-      "step": 470
-    },
-    {
-      "epoch": 0.665742024965326,
-      "grad_norm": 0.09567826986312866,
-      "learning_rate": 0.00017877574370709382,
-      "loss": 0.1189,
-      "step": 480
-    },
-    {
-      "epoch": 0.6796116504854369,
-      "grad_norm": 0.09133660793304443,
-      "learning_rate": 0.0001782036613272311,
-      "loss": 0.1199,
-      "step": 490
-    },
-    {
-      "epoch": 0.6934812760055479,
-      "grad_norm": 0.07571779191493988,
-      "learning_rate": 0.00017763157894736843,
-      "loss": 0.1199,
-      "step": 500
-    },
-    {
-      "epoch": 0.6934812760055479,
-      "eval_loss": 0.11764033138751984,
-      "eval_runtime": 86.7125,
-      "eval_samples_per_second": 14.565,
-      "eval_steps_per_second": 0.911,
-      "step": 500
-    },
-    {
-      "epoch": 0.7073509015256588,
-      "grad_norm": 0.07904700189828873,
-      "learning_rate": 0.00017705949656750573,
-      "loss": 0.1174,
-      "step": 510
-    },
-    {
-      "epoch": 0.7212205270457698,
-      "grad_norm": 0.0874553844332695,
-      "learning_rate": 0.00017648741418764302,
-      "loss": 0.1191,
-      "step": 520
-    },
-    {
-      "epoch": 0.7350901525658807,
-      "grad_norm": 0.09417985379695892,
-      "learning_rate": 0.00017591533180778032,
-      "loss": 0.1158,
-      "step": 530
-    },
-    {
-      "epoch": 0.7489597780859917,
-      "grad_norm": 0.0866062194108963,
-      "learning_rate": 0.00017534324942791762,
-      "loss": 0.1106,
-      "step": 540
-    },
-    {
-      "epoch": 0.7628294036061026,
-      "grad_norm": 0.08498796820640564,
-      "learning_rate": 0.0001747711670480549,
-      "loss": 0.1124,
-      "step": 550
-    },
-    {
-      "epoch": 0.7766990291262136,
-      "grad_norm": 0.08251694589853287,
-      "learning_rate": 0.00017419908466819223,
-      "loss": 0.1136,
-      "step": 560
-    },
-    {
-      "epoch": 0.7905686546463245,
-      "grad_norm": 0.08275240659713745,
-      "learning_rate": 0.00017362700228832953,
-      "loss": 0.1107,
-      "step": 570
-    },
-    {
-      "epoch": 0.8044382801664355,
-      "grad_norm": 0.08751562237739563,
-      "learning_rate": 0.00017305491990846682,
-      "loss": 0.1169,
-      "step": 580
-    },
-    {
-      "epoch": 0.8183079056865464,
-      "grad_norm": 0.09078636020421982,
-      "learning_rate": 0.00017248283752860412,
-      "loss": 0.1143,
-      "step": 590
-    },
-    {
-      "epoch": 0.8321775312066574,
-      "grad_norm": 0.08412676304578781,
-      "learning_rate": 0.00017191075514874142,
-      "loss": 0.1197,
-      "step": 600
-    },
-    {
-      "epoch": 0.8321775312066574,
-      "eval_loss": 0.11502571403980255,
-      "eval_runtime": 86.625,
-      "eval_samples_per_second": 14.58,
-      "eval_steps_per_second": 0.912,
-      "step": 600
-    },
-    {
-      "epoch": 0.8460471567267683,
-      "grad_norm": 0.08373397588729858,
-      "learning_rate": 0.0001713386727688787,
-      "loss": 0.1205,
-      "step": 610
-    },
-    {
-      "epoch": 0.8599167822468793,
-      "grad_norm": 0.08933025598526001,
-      "learning_rate": 0.00017076659038901603,
-      "loss": 0.1147,
-      "step": 620
-    },
-    {
-      "epoch": 0.8737864077669902,
-      "grad_norm": 0.08800772577524185,
-      "learning_rate": 0.00017019450800915333,
-      "loss": 0.1201,
-      "step": 630
-    },
-    {
-      "epoch": 0.8876560332871013,
-      "grad_norm": 0.08623263984918594,
-      "learning_rate": 0.00016962242562929063,
-      "loss": 0.1144,
-      "step": 640
-    },
-    {
-      "epoch": 0.9015256588072122,
-      "grad_norm": 0.0788191556930542,
-      "learning_rate": 0.00016905034324942792,
-      "loss": 0.1188,
-      "step": 650
-    },
-    {
-      "epoch": 0.9153952843273232,
-      "grad_norm": 0.0787658542394638,
-      "learning_rate": 0.00016847826086956522,
-      "loss": 0.1077,
-      "step": 660
-    },
-    {
-      "epoch": 0.9292649098474342,
-      "grad_norm": 0.08364666253328323,
-      "learning_rate": 0.0001679061784897025,
-      "loss": 0.1072,
-      "step": 670
-    },
-    {
-      "epoch": 0.9431345353675451,
-      "grad_norm": 0.08853990584611893,
-      "learning_rate": 0.00016733409610983983,
-      "loss": 0.1097,
-      "step": 680
-    },
-    {
-      "epoch": 0.957004160887656,
-      "grad_norm": 0.08456674963235855,
-      "learning_rate": 0.00016676201372997713,
-      "loss": 0.1167,
-      "step": 690
-    },
-    {
-      "epoch": 0.970873786407767,
-      "grad_norm": 0.0840703621506691,
-      "learning_rate": 0.00016618993135011443,
-      "loss": 0.1231,
-      "step": 700
-    },
-    {
-      "epoch": 0.970873786407767,
-      "eval_loss": 0.11245531588792801,
-      "eval_runtime": 86.613,
-      "eval_samples_per_second": 14.582,
-      "eval_steps_per_second": 0.912,
-      "step": 700
-    }
-  ],
-  "logging_steps": 10,
-  "max_steps": 3605,
-  "num_input_tokens_seen": 0,
-  "num_train_epochs": 5,
-  "save_steps": 100,
-  "stateful_callbacks": {
-    "TrainerControl": {
-      "args": {
-        "should_epoch_stop": false,
-        "should_evaluate": false,
-        "should_log": false,
-        "should_save": true,
-        "should_training_stop": false
-      },
-      "attributes": {}
-    }
-  },
-  "total_flos": 1.0638251619228058e+17,
-  "train_batch_size": 16,
-  "trial_name": null,
-  "trial_params": null
-}