Colder203
/

phi_orm_3.8b_8ksamples

PEFT

Safetensors

Model card Files Files and versions Community

Colder203 commited on Jan 3

Commit

9d30c4e

verified ·

1 Parent(s): 99793fd

Upload trainer_state.json with huggingface_hub

Browse files

Files changed (1) hide show

trainer_state.json +593 -0

trainer_state.json ADDED Viewed

	@@ -0,0 +1,593 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 0.4708236471176765,
+  "eval_steps": 500,
+  "global_step": 8000,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.005885295588970956,
+      "grad_norm": 29.875,
+      "learning_rate": 3.9215686274509805e-05,
+      "loss": 1.0686,
+      "step": 100
+    },
+    {
+      "epoch": 0.011770591177941912,
+      "grad_norm": 11.375,
+      "learning_rate": 7.843137254901961e-05,
+      "loss": 0.6153,
+      "step": 200
+    },
+    {
+      "epoch": 0.01765588676691287,
+      "grad_norm": 20.25,
+      "learning_rate": 0.00011764705882352942,
+      "loss": 0.5969,
+      "step": 300
+    },
+    {
+      "epoch": 0.023541182355883823,
+      "grad_norm": 4.5,
+      "learning_rate": 0.00015686274509803922,
+      "loss": 0.5478,
+      "step": 400
+    },
+    {
+      "epoch": 0.02942647794485478,
+      "grad_norm": 7.40625,
+      "learning_rate": 0.000196078431372549,
+      "loss": 0.5599,
+      "step": 500
+    },
+    {
+      "epoch": 0.03531177353382574,
+      "grad_norm": 2.71875,
+      "learning_rate": 0.00019998528443307886,
+      "loss": 0.565,
+      "step": 600
+    },
+    {
+      "epoch": 0.04119706912279669,
+      "grad_norm": 30.875,
+      "learning_rate": 0.00019993442136695625,
+      "loss": 0.5501,
+      "step": 700
+    },
+    {
+      "epoch": 0.047082364711767646,
+      "grad_norm": 3.328125,
+      "learning_rate": 0.00019984724760441856,
+      "loss": 0.5355,
+      "step": 800
+    },
+    {
+      "epoch": 0.05296766030073861,
+      "grad_norm": 6.03125,
+      "learning_rate": 0.00019972379481963764,
+      "loss": 0.5344,
+      "step": 900
+    },
+    {
+      "epoch": 0.05885295588970956,
+      "grad_norm": 29.75,
+      "learning_rate": 0.00019956410786859524,
+      "loss": 0.5016,
+      "step": 1000
+    },
+    {
+      "epoch": 0.06473825147868052,
+      "grad_norm": 14.0625,
+      "learning_rate": 0.00019936824477278514,
+      "loss": 0.5091,
+      "step": 1100
+    },
+    {
+      "epoch": 0.07062354706765148,
+      "grad_norm": 24.75,
+      "learning_rate": 0.00019913627669813103,
+      "loss": 0.5005,
+      "step": 1200
+    },
+    {
+      "epoch": 0.07650884265662243,
+      "grad_norm": 3.203125,
+      "learning_rate": 0.00019886828792912894,
+      "loss": 0.4961,
+      "step": 1300
+    },
+    {
+      "epoch": 0.08239413824559338,
+      "grad_norm": 1.875,
+      "learning_rate": 0.0001985643758382227,
+      "loss": 0.4755,
+      "step": 1400
+    },
+    {
+      "epoch": 0.08827943383456434,
+      "grad_norm": 7.46875,
+      "learning_rate": 0.00019822465085042422,
+      "loss": 0.4889,
+      "step": 1500
+    },
+    {
+      "epoch": 0.09416472942353529,
+      "grad_norm": 3.859375,
+      "learning_rate": 0.0001978492364031911,
+      "loss": 0.5024,
+      "step": 1600
+    },
+    {
+      "epoch": 0.10005002501250625,
+      "grad_norm": 14.4375,
+      "learning_rate": 0.00019743826890157614,
+      "loss": 0.4681,
+      "step": 1700
+    },
+    {
+      "epoch": 0.10593532060147721,
+      "grad_norm": 10.375,
+      "learning_rate": 0.0001969918976686652,
+      "loss": 0.488,
+      "step": 1800
+    },
+    {
+      "epoch": 0.11182061619044817,
+      "grad_norm": 9.5625,
+      "learning_rate": 0.00019651028489132147,
+      "loss": 0.4859,
+      "step": 1900
+    },
+    {
+      "epoch": 0.11770591177941912,
+      "grad_norm": 15.125,
+      "learning_rate": 0.0001959936055612557,
+      "loss": 0.5028,
+      "step": 2000
+    },
+    {
+      "epoch": 0.12359120736839008,
+      "grad_norm": 12.5625,
+      "learning_rate": 0.0001954420474114435,
+      "loss": 0.4937,
+      "step": 2100
+    },
+    {
+      "epoch": 0.12947650295736104,
+      "grad_norm": 3.890625,
+      "learning_rate": 0.00019485581084791376,
+      "loss": 0.4801,
+      "step": 2200
+    },
+    {
+      "epoch": 0.13536179854633199,
+      "grad_norm": 19.125,
+      "learning_rate": 0.0001942351088769319,
+      "loss": 0.4853,
+      "step": 2300
+    },
+    {
+      "epoch": 0.14124709413530295,
+      "grad_norm": 11.8125,
+      "learning_rate": 0.0001935801670276052,
+      "loss": 0.4739,
+      "step": 2400
+    },
+    {
+      "epoch": 0.1471323897242739,
+      "grad_norm": 35.5,
+      "learning_rate": 0.00019289122326993777,
+      "loss": 0.4868,
+      "step": 2500
+    },
+    {
+      "epoch": 0.15301768531324486,
+      "grad_norm": 20.875,
+      "learning_rate": 0.00019216852792836516,
+      "loss": 0.4925,
+      "step": 2600
+    },
+    {
+      "epoch": 0.1589029809022158,
+      "grad_norm": 12.5625,
+      "learning_rate": 0.00019141234359080055,
+      "loss": 0.4808,
+      "step": 2700
+    },
+    {
+      "epoch": 0.16478827649118677,
+      "grad_norm": 8.6875,
+      "learning_rate": 0.00019062294501322416,
+      "loss": 0.4757,
+      "step": 2800
+    },
+    {
+      "epoch": 0.17067357208015774,
+      "grad_norm": 20.625,
+      "learning_rate": 0.0001898006190198525,
+      "loss": 0.4805,
+      "step": 2900
+    },
+    {
+      "epoch": 0.17655886766912868,
+      "grad_norm": 10.25,
+      "learning_rate": 0.0001889456643989218,
+      "loss": 0.4832,
+      "step": 3000
+    },
+    {
+      "epoch": 0.18244416325809965,
+      "grad_norm": 20.25,
+      "learning_rate": 0.00018805839179412485,
+      "loss": 0.4559,
+      "step": 3100
+    },
+    {
+      "epoch": 0.18832945884707059,
+      "grad_norm": 8.5625,
+      "learning_rate": 0.00018713912359174,
+      "loss": 0.497,
+      "step": 3200
+    },
+    {
+      "epoch": 0.19421475443604155,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.00018618819380349382,
+      "loss": 0.4776,
+      "step": 3300
+    },
+    {
+      "epoch": 0.2001000500250125,
+      "grad_norm": 12.8125,
+      "learning_rate": 0.00018520594794519941,
+      "loss": 0.4915,
+      "step": 3400
+    },
+    {
+      "epoch": 0.20598534561398346,
+      "grad_norm": 1.84375,
+      "learning_rate": 0.00018419274291121485,
+      "loss": 0.4498,
+      "step": 3500
+    },
+    {
+      "epoch": 0.21187064120295443,
+      "grad_norm": 3.8125,
+      "learning_rate": 0.00018314894684476736,
+      "loss": 0.4625,
+      "step": 3600
+    },
+    {
+      "epoch": 0.21775593679192537,
+      "grad_norm": 19.125,
+      "learning_rate": 0.00018207493900419027,
+      "loss": 0.4625,
+      "step": 3700
+    },
+    {
+      "epoch": 0.22364123238089634,
+      "grad_norm": 11.5,
+      "learning_rate": 0.00018097110962512128,
+      "loss": 0.4655,
+      "step": 3800
+    },
+    {
+      "epoch": 0.22952652796986728,
+      "grad_norm": 6.3125,
+      "learning_rate": 0.00017983785977871209,
+      "loss": 0.4488,
+      "step": 3900
+    },
+    {
+      "epoch": 0.23541182355883825,
+      "grad_norm": 9.875,
+      "learning_rate": 0.00017867560122590125,
+      "loss": 0.4441,
+      "step": 4000
+    },
+    {
+      "epoch": 0.24129711914780919,
+      "grad_norm": 12.875,
+      "learning_rate": 0.00017748475626780277,
+      "loss": 0.4732,
+      "step": 4100
+    },
+    {
+      "epoch": 0.24718241473678015,
+      "grad_norm": 4.21875,
+      "learning_rate": 0.0001762657575922649,
+      "loss": 0.4544,
+      "step": 4200
+    },
+    {
+      "epoch": 0.2530677103257511,
+      "grad_norm": 3.125,
+      "learning_rate": 0.0001750190481166552,
+      "loss": 0.4779,
+      "step": 4300
+    },
+    {
+      "epoch": 0.2589530059147221,
+      "grad_norm": 2.1875,
+      "learning_rate": 0.00017374508082692848,
+      "loss": 0.4661,
+      "step": 4400
+    },
+    {
+      "epoch": 0.26483830150369303,
+      "grad_norm": 26.25,
+      "learning_rate": 0.0001724443186130367,
+      "loss": 0.4916,
+      "step": 4500
+    },
+    {
+      "epoch": 0.27072359709266397,
+      "grad_norm": 8.125,
+      "learning_rate": 0.00017111723410073991,
+      "loss": 0.449,
+      "step": 4600
+    },
+    {
+      "epoch": 0.2766088926816349,
+      "grad_norm": 8.625,
+      "learning_rate": 0.00016976430947988007,
+      "loss": 0.45,
+      "step": 4700
+    },
+    {
+      "epoch": 0.2824941882706059,
+      "grad_norm": 3.59375,
+      "learning_rate": 0.00016838603632917954,
+      "loss": 0.4593,
+      "step": 4800
+    },
+    {
+      "epoch": 0.28837948385957685,
+      "grad_norm": 6.40625,
+      "learning_rate": 0.0001669829154376285,
+      "loss": 0.4847,
+      "step": 4900
+    },
+    {
+      "epoch": 0.2942647794485478,
+      "grad_norm": 13.125,
+      "learning_rate": 0.00016555545662252536,
+      "loss": 0.4576,
+      "step": 5000
+    },
+    {
+      "epoch": 0.3001500750375188,
+      "grad_norm": 14.3125,
+      "learning_rate": 0.00016410417854423735,
+      "loss": 0.4457,
+      "step": 5100
+    },
+    {
+      "epoch": 0.3060353706264897,
+      "grad_norm": 29.0,
+      "learning_rate": 0.00016262960851774752,
+      "loss": 0.4972,
+      "step": 5200
+    },
+    {
+      "epoch": 0.31192066621546066,
+      "grad_norm": 20.75,
+      "learning_rate": 0.00016113228232105757,
+      "loss": 0.4715,
+      "step": 5300
+    },
+    {
+      "epoch": 0.3178059618044316,
+      "grad_norm": 22.5,
+      "learning_rate": 0.0001596127440005152,
+      "loss": 0.4696,
+      "step": 5400
+    },
+    {
+      "epoch": 0.3236912573934026,
+      "grad_norm": 8.1875,
+      "learning_rate": 0.00015807154567313775,
+      "loss": 0.4629,
+      "step": 5500
+    },
+    {
+      "epoch": 0.32957655298237354,
+      "grad_norm": 4.375,
+      "learning_rate": 0.0001565092473260029,
+      "loss": 0.475,
+      "step": 5600
+    },
+    {
+      "epoch": 0.3354618485713445,
+      "grad_norm": 13.5,
+      "learning_rate": 0.00015492641661278005,
+      "loss": 0.4511,
+      "step": 5700
+    },
+    {
+      "epoch": 0.3413471441603155,
+      "grad_norm": 3.5625,
+      "learning_rate": 0.0001533236286474762,
+      "loss": 0.4743,
+      "step": 5800
+    },
+    {
+      "epoch": 0.3472324397492864,
+      "grad_norm": 11.8125,
+      "learning_rate": 0.0001517014657954708,
+      "loss": 0.4418,
+      "step": 5900
+    },
+    {
+      "epoch": 0.35311773533825735,
+      "grad_norm": 26.125,
+      "learning_rate": 0.00015006051746191626,
+      "loss": 0.45,
+      "step": 6000
+    },
+    {
+      "epoch": 0.3590030309272283,
+      "grad_norm": 15.375,
+      "learning_rate": 0.00014840137987758028,
+      "loss": 0.4463,
+      "step": 6100
+    },
+    {
+      "epoch": 0.3648883265161993,
+      "grad_norm": 5.90625,
+      "learning_rate": 0.00014672465588220837,
+      "loss": 0.4559,
+      "step": 6200
+    },
+    {
+      "epoch": 0.37077362210517023,
+      "grad_norm": 12.9375,
+      "learning_rate": 0.0001450309547054846,
+      "loss": 0.4398,
+      "step": 6300
+    },
+    {
+      "epoch": 0.37665891769414117,
+      "grad_norm": 21.875,
+      "learning_rate": 0.00014332089174567126,
+      "loss": 0.4454,
+      "step": 6400
+    },
+    {
+      "epoch": 0.38254421328311217,
+      "grad_norm": 16.875,
+      "learning_rate": 0.00014159508834600657,
+      "loss": 0.4443,
+      "step": 6500
+    },
+    {
+      "epoch": 0.3884295088720831,
+      "grad_norm": 34.25,
+      "learning_rate": 0.00013985417156894267,
+      "loss": 0.4762,
+      "step": 6600
+    },
+    {
+      "epoch": 0.39431480446105405,
+      "grad_norm": 4.5625,
+      "learning_rate": 0.0001380987739683055,
+      "loss": 0.4795,
+      "step": 6700
+    },
+    {
+      "epoch": 0.400200100050025,
+      "grad_norm": 15.75,
+      "learning_rate": 0.00013632953335945927,
+      "loss": 0.4603,
+      "step": 6800
+    },
+    {
+      "epoch": 0.406085395638996,
+      "grad_norm": 5.40625,
+      "learning_rate": 0.00013454709258755942,
+      "loss": 0.4674,
+      "step": 6900
+    },
+    {
+      "epoch": 0.4119706912279669,
+      "grad_norm": 30.125,
+      "learning_rate": 0.00013275209929397775,
+      "loss": 0.4595,
+      "step": 7000
+    },
+    {
+      "epoch": 0.41785598681693786,
+      "grad_norm": 16.875,
+      "learning_rate": 0.0001309452056809851,
+      "loss": 0.4398,
+      "step": 7100
+    },
+    {
+      "epoch": 0.42374128240590886,
+      "grad_norm": 5.6875,
+      "learning_rate": 0.00012912706827477671,
+      "loss": 0.4693,
+      "step": 7200
+    },
+    {
+      "epoch": 0.4296265779948798,
+      "grad_norm": 17.125,
+      "learning_rate": 0.00012729834768692667,
+      "loss": 0.4564,
+      "step": 7300
+    },
+    {
+      "epoch": 0.43551187358385074,
+      "grad_norm": 9.75,
+      "learning_rate": 0.00012545970837435756,
+      "loss": 0.4732,
+      "step": 7400
+    },
+    {
+      "epoch": 0.4413971691728217,
+      "grad_norm": 6.3125,
+      "learning_rate": 0.00012361181839791357,
+      "loss": 0.4647,
+      "step": 7500
+    },
+    {
+      "epoch": 0.4472824647617927,
+      "grad_norm": 19.0,
+      "learning_rate": 0.00012175534917962352,
+      "loss": 0.4697,
+      "step": 7600
+    },
+    {
+      "epoch": 0.4531677603507636,
+      "grad_norm": 19.375,
+      "learning_rate": 0.00011989097525874294,
+      "loss": 0.4814,
+      "step": 7700
+    },
+    {
+      "epoch": 0.45905305593973456,
+      "grad_norm": 2.015625,
+      "learning_rate": 0.00011801937404666336,
+      "loss": 0.4688,
+      "step": 7800
+    },
+    {
+      "epoch": 0.46493835152870555,
+      "grad_norm": 9.625,
+      "learning_rate": 0.00011614122558077828,
+      "loss": 0.4665,
+      "step": 7900
+    },
+    {
+      "epoch": 0.4708236471176765,
+      "grad_norm": 21.875,
+      "learning_rate": 0.00011425721227739465,
+      "loss": 0.472,
+      "step": 8000
+    }
+  ],
+  "logging_steps": 100,
+  "max_steps": 16991,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 1,
+  "save_steps": 4000,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": false
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1.253476198349144e+18,
+  "train_batch_size": 8,
+  "trial_name": null,
+  "trial_params": null
+}