End of training

Browse files

Files changed (7) hide show

README.md +2 -1
all_results.json +12 -0
eval_results.json +7 -0
train_results.json +8 -0
trainer_state.json +647 -0
training_eval_loss.png +0 -0
training_loss.png +0 -0

README.md CHANGED Viewed

@@ -4,6 +4,7 @@ license: llama3.1
 base_model: meta-llama/Meta-Llama-3.1-8B
 tags:
 - llama-factory
 - generated_from_trainer
 model-index:
 - name: llama3-1_8b_physics_375000_samples
@@ -15,7 +16,7 @@ should probably proofread and complete it, then remove this comment. -->
 # llama3-1_8b_physics_375000_samples
-This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on an unknown dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.7752

 base_model: meta-llama/Meta-Llama-3.1-8B
 tags:
 - llama-factory
+- full
 - generated_from_trainer
 model-index:
 - name: llama3-1_8b_physics_375000_samples
 # llama3-1_8b_physics_375000_samples
+This model is a fine-tuned version of [meta-llama/Meta-Llama-3.1-8B](https://huggingface.co/meta-llama/Meta-Llama-3.1-8B) on the mlfoundations-dev/physics_375000_samples dataset.
 It achieves the following results on the evaluation set:
 - Loss: 0.7752

all_results.json ADDED Viewed

	@@ -0,0 +1,12 @@

+{
+    "epoch": 3.0,
+    "eval_loss": 0.7752296328544617,
+    "eval_runtime": 27.3716,
+    "eval_samples_per_second": 273.423,
+    "eval_steps_per_second": 1.096,
+    "total_flos": 1396981062696960.0,
+    "train_loss": 0.7675551453368555,
+    "train_runtime": 5571.5313,
+    "train_samples_per_second": 76.563,
+    "train_steps_per_second": 0.15
+}

eval_results.json ADDED Viewed

	@@ -0,0 +1,7 @@

+{
+    "epoch": 3.0,
+    "eval_loss": 0.7752296328544617,
+    "eval_runtime": 27.3716,
+    "eval_samples_per_second": 273.423,
+    "eval_steps_per_second": 1.096
+}

train_results.json ADDED Viewed

	@@ -0,0 +1,8 @@

+{
+    "epoch": 3.0,
+    "total_flos": 1396981062696960.0,
+    "train_loss": 0.7675551453368555,
+    "train_runtime": 5571.5313,
+    "train_samples_per_second": 76.563,
+    "train_steps_per_second": 0.15
+}

trainer_state.json ADDED Viewed

	@@ -0,0 +1,647 @@

+{
+  "best_metric": null,
+  "best_model_checkpoint": null,
+  "epoch": 3.0,
+  "eval_steps": 500,
+  "global_step": 834,
+  "is_hyper_param_search": false,
+  "is_local_process_zero": true,
+  "is_world_process_zero": true,
+  "log_history": [
+    {
+      "epoch": 0.03597122302158273,
+      "grad_norm": 26.93948859971876,
+      "learning_rate": 5e-06,
+      "loss": 1.0175,
+      "step": 10
+    },
+    {
+      "epoch": 0.07194244604316546,
+      "grad_norm": 2.6743423167480063,
+      "learning_rate": 5e-06,
+      "loss": 0.9337,
+      "step": 20
+    },
+    {
+      "epoch": 0.1079136690647482,
+      "grad_norm": 1.100805597904231,
+      "learning_rate": 5e-06,
+      "loss": 0.8923,
+      "step": 30
+    },
+    {
+      "epoch": 0.14388489208633093,
+      "grad_norm": 0.8355539701078896,
+      "learning_rate": 5e-06,
+      "loss": 0.8673,
+      "step": 40
+    },
+    {
+      "epoch": 0.17985611510791366,
+      "grad_norm": 0.7157506047100403,
+      "learning_rate": 5e-06,
+      "loss": 0.8553,
+      "step": 50
+    },
+    {
+      "epoch": 0.2158273381294964,
+      "grad_norm": 0.9806631521043339,
+      "learning_rate": 5e-06,
+      "loss": 0.8492,
+      "step": 60
+    },
+    {
+      "epoch": 0.2517985611510791,
+      "grad_norm": 0.8360835611944488,
+      "learning_rate": 5e-06,
+      "loss": 0.8382,
+      "step": 70
+    },
+    {
+      "epoch": 0.28776978417266186,
+      "grad_norm": 0.7078472519601653,
+      "learning_rate": 5e-06,
+      "loss": 0.8318,
+      "step": 80
+    },
+    {
+      "epoch": 0.3237410071942446,
+      "grad_norm": 0.6255785562847258,
+      "learning_rate": 5e-06,
+      "loss": 0.825,
+      "step": 90
+    },
+    {
+      "epoch": 0.3597122302158273,
+      "grad_norm": 0.6950072028339258,
+      "learning_rate": 5e-06,
+      "loss": 0.8225,
+      "step": 100
+    },
+    {
+      "epoch": 0.39568345323741005,
+      "grad_norm": 0.622757689781733,
+      "learning_rate": 5e-06,
+      "loss": 0.8165,
+      "step": 110
+    },
+    {
+      "epoch": 0.4316546762589928,
+      "grad_norm": 0.6855173384055511,
+      "learning_rate": 5e-06,
+      "loss": 0.8162,
+      "step": 120
+    },
+    {
+      "epoch": 0.4676258992805755,
+      "grad_norm": 0.555459004966806,
+      "learning_rate": 5e-06,
+      "loss": 0.8141,
+      "step": 130
+    },
+    {
+      "epoch": 0.5035971223021583,
+      "grad_norm": 0.7189252900166325,
+      "learning_rate": 5e-06,
+      "loss": 0.8113,
+      "step": 140
+    },
+    {
+      "epoch": 0.539568345323741,
+      "grad_norm": 0.8411135438726722,
+      "learning_rate": 5e-06,
+      "loss": 0.8069,
+      "step": 150
+    },
+    {
+      "epoch": 0.5755395683453237,
+      "grad_norm": 0.9141854769887011,
+      "learning_rate": 5e-06,
+      "loss": 0.8087,
+      "step": 160
+    },
+    {
+      "epoch": 0.6115107913669064,
+      "grad_norm": 0.6527584548807389,
+      "learning_rate": 5e-06,
+      "loss": 0.8048,
+      "step": 170
+    },
+    {
+      "epoch": 0.6474820143884892,
+      "grad_norm": 0.6986581112545092,
+      "learning_rate": 5e-06,
+      "loss": 0.8051,
+      "step": 180
+    },
+    {
+      "epoch": 0.6834532374100719,
+      "grad_norm": 0.6094857952430536,
+      "learning_rate": 5e-06,
+      "loss": 0.8044,
+      "step": 190
+    },
+    {
+      "epoch": 0.7194244604316546,
+      "grad_norm": 0.74096920276776,
+      "learning_rate": 5e-06,
+      "loss": 0.7989,
+      "step": 200
+    },
+    {
+      "epoch": 0.7553956834532374,
+      "grad_norm": 0.6584952886572538,
+      "learning_rate": 5e-06,
+      "loss": 0.8025,
+      "step": 210
+    },
+    {
+      "epoch": 0.7913669064748201,
+      "grad_norm": 0.5838446606699556,
+      "learning_rate": 5e-06,
+      "loss": 0.7988,
+      "step": 220
+    },
+    {
+      "epoch": 0.8273381294964028,
+      "grad_norm": 0.5916175411049406,
+      "learning_rate": 5e-06,
+      "loss": 0.7985,
+      "step": 230
+    },
+    {
+      "epoch": 0.8633093525179856,
+      "grad_norm": 0.626471567693148,
+      "learning_rate": 5e-06,
+      "loss": 0.7973,
+      "step": 240
+    },
+    {
+      "epoch": 0.8992805755395683,
+      "grad_norm": 0.6338741269795162,
+      "learning_rate": 5e-06,
+      "loss": 0.7933,
+      "step": 250
+    },
+    {
+      "epoch": 0.935251798561151,
+      "grad_norm": 0.8343555675066444,
+      "learning_rate": 5e-06,
+      "loss": 0.7969,
+      "step": 260
+    },
+    {
+      "epoch": 0.9712230215827338,
+      "grad_norm": 0.6221641429373133,
+      "learning_rate": 5e-06,
+      "loss": 0.7933,
+      "step": 270
+    },
+    {
+      "epoch": 1.0,
+      "eval_loss": 0.7923575043678284,
+      "eval_runtime": 27.9533,
+      "eval_samples_per_second": 267.732,
+      "eval_steps_per_second": 1.073,
+      "step": 278
+    },
+    {
+      "epoch": 1.0071942446043165,
+      "grad_norm": 0.8944971285319924,
+      "learning_rate": 5e-06,
+      "loss": 0.7823,
+      "step": 280
+    },
+    {
+      "epoch": 1.0431654676258992,
+      "grad_norm": 0.7668083853056575,
+      "learning_rate": 5e-06,
+      "loss": 0.7574,
+      "step": 290
+    },
+    {
+      "epoch": 1.079136690647482,
+      "grad_norm": 0.6176816592509634,
+      "learning_rate": 5e-06,
+      "loss": 0.7529,
+      "step": 300
+    },
+    {
+      "epoch": 1.1151079136690647,
+      "grad_norm": 0.6475301176330789,
+      "learning_rate": 5e-06,
+      "loss": 0.7558,
+      "step": 310
+    },
+    {
+      "epoch": 1.1510791366906474,
+      "grad_norm": 0.5811910989874788,
+      "learning_rate": 5e-06,
+      "loss": 0.7623,
+      "step": 320
+    },
+    {
+      "epoch": 1.1870503597122302,
+      "grad_norm": 0.6269454462814978,
+      "learning_rate": 5e-06,
+      "loss": 0.7601,
+      "step": 330
+    },
+    {
+      "epoch": 1.223021582733813,
+      "grad_norm": 0.5423886247053047,
+      "learning_rate": 5e-06,
+      "loss": 0.7535,
+      "step": 340
+    },
+    {
+      "epoch": 1.2589928057553956,
+      "grad_norm": 0.6670401432003603,
+      "learning_rate": 5e-06,
+      "loss": 0.757,
+      "step": 350
+    },
+    {
+      "epoch": 1.2949640287769784,
+      "grad_norm": 0.7095322132659916,
+      "learning_rate": 5e-06,
+      "loss": 0.759,
+      "step": 360
+    },
+    {
+      "epoch": 1.330935251798561,
+      "grad_norm": 0.6870367808903867,
+      "learning_rate": 5e-06,
+      "loss": 0.7567,
+      "step": 370
+    },
+    {
+      "epoch": 1.3669064748201438,
+      "grad_norm": 0.6640094117573664,
+      "learning_rate": 5e-06,
+      "loss": 0.7592,
+      "step": 380
+    },
+    {
+      "epoch": 1.4028776978417266,
+      "grad_norm": 0.5994950619117767,
+      "learning_rate": 5e-06,
+      "loss": 0.7529,
+      "step": 390
+    },
+    {
+      "epoch": 1.4388489208633093,
+      "grad_norm": 0.7392872817621052,
+      "learning_rate": 5e-06,
+      "loss": 0.7554,
+      "step": 400
+    },
+    {
+      "epoch": 1.474820143884892,
+      "grad_norm": 0.5656749568866071,
+      "learning_rate": 5e-06,
+      "loss": 0.7547,
+      "step": 410
+    },
+    {
+      "epoch": 1.5107913669064748,
+      "grad_norm": 0.921484641426356,
+      "learning_rate": 5e-06,
+      "loss": 0.7532,
+      "step": 420
+    },
+    {
+      "epoch": 1.5467625899280577,
+      "grad_norm": 0.540059029380678,
+      "learning_rate": 5e-06,
+      "loss": 0.7585,
+      "step": 430
+    },
+    {
+      "epoch": 1.5827338129496402,
+      "grad_norm": 0.6558652758296812,
+      "learning_rate": 5e-06,
+      "loss": 0.7515,
+      "step": 440
+    },
+    {
+      "epoch": 1.6187050359712232,
+      "grad_norm": 0.57268163367781,
+      "learning_rate": 5e-06,
+      "loss": 0.7562,
+      "step": 450
+    },
+    {
+      "epoch": 1.6546762589928057,
+      "grad_norm": 0.5407189047091853,
+      "learning_rate": 5e-06,
+      "loss": 0.7559,
+      "step": 460
+    },
+    {
+      "epoch": 1.6906474820143886,
+      "grad_norm": 0.6077940984618293,
+      "learning_rate": 5e-06,
+      "loss": 0.757,
+      "step": 470
+    },
+    {
+      "epoch": 1.7266187050359711,
+      "grad_norm": 1.001124812241379,
+      "learning_rate": 5e-06,
+      "loss": 0.7552,
+      "step": 480
+    },
+    {
+      "epoch": 1.762589928057554,
+      "grad_norm": 0.6254013722291123,
+      "learning_rate": 5e-06,
+      "loss": 0.753,
+      "step": 490
+    },
+    {
+      "epoch": 1.7985611510791366,
+      "grad_norm": 0.5767617312575639,
+      "learning_rate": 5e-06,
+      "loss": 0.7594,
+      "step": 500
+    },
+    {
+      "epoch": 1.8345323741007196,
+      "grad_norm": 0.665915353902276,
+      "learning_rate": 5e-06,
+      "loss": 0.7554,
+      "step": 510
+    },
+    {
+      "epoch": 1.870503597122302,
+      "grad_norm": 0.5596777388150926,
+      "learning_rate": 5e-06,
+      "loss": 0.7537,
+      "step": 520
+    },
+    {
+      "epoch": 1.906474820143885,
+      "grad_norm": 0.5547398560915929,
+      "learning_rate": 5e-06,
+      "loss": 0.7555,
+      "step": 530
+    },
+    {
+      "epoch": 1.9424460431654675,
+      "grad_norm": 0.5874602156110944,
+      "learning_rate": 5e-06,
+      "loss": 0.7509,
+      "step": 540
+    },
+    {
+      "epoch": 1.9784172661870505,
+      "grad_norm": 0.6369533697170318,
+      "learning_rate": 5e-06,
+      "loss": 0.7503,
+      "step": 550
+    },
+    {
+      "epoch": 2.0,
+      "eval_loss": 0.7788412570953369,
+      "eval_runtime": 27.8988,
+      "eval_samples_per_second": 268.255,
+      "eval_steps_per_second": 1.075,
+      "step": 556
+    },
+    {
+      "epoch": 2.014388489208633,
+      "grad_norm": 1.0929207520027995,
+      "learning_rate": 5e-06,
+      "loss": 0.735,
+      "step": 560
+    },
+    {
+      "epoch": 2.050359712230216,
+      "grad_norm": 0.687310495052166,
+      "learning_rate": 5e-06,
+      "loss": 0.7131,
+      "step": 570
+    },
+    {
+      "epoch": 2.0863309352517985,
+      "grad_norm": 0.6848749958758751,
+      "learning_rate": 5e-06,
+      "loss": 0.7129,
+      "step": 580
+    },
+    {
+      "epoch": 2.1223021582733814,
+      "grad_norm": 0.9700661070159223,
+      "learning_rate": 5e-06,
+      "loss": 0.7154,
+      "step": 590
+    },
+    {
+      "epoch": 2.158273381294964,
+      "grad_norm": 0.7429316335562708,
+      "learning_rate": 5e-06,
+      "loss": 0.7163,
+      "step": 600
+    },
+    {
+      "epoch": 2.194244604316547,
+      "grad_norm": 0.5731198010767242,
+      "learning_rate": 5e-06,
+      "loss": 0.7197,
+      "step": 610
+    },
+    {
+      "epoch": 2.2302158273381294,
+      "grad_norm": 0.6519774548706885,
+      "learning_rate": 5e-06,
+      "loss": 0.7192,
+      "step": 620
+    },
+    {
+      "epoch": 2.2661870503597124,
+      "grad_norm": 0.7092939571259266,
+      "learning_rate": 5e-06,
+      "loss": 0.717,
+      "step": 630
+    },
+    {
+      "epoch": 2.302158273381295,
+      "grad_norm": 0.8300683342338049,
+      "learning_rate": 5e-06,
+      "loss": 0.7171,
+      "step": 640
+    },
+    {
+      "epoch": 2.338129496402878,
+      "grad_norm": 0.6364079517115279,
+      "learning_rate": 5e-06,
+      "loss": 0.7179,
+      "step": 650
+    },
+    {
+      "epoch": 2.3741007194244603,
+      "grad_norm": 0.6830216482631195,
+      "learning_rate": 5e-06,
+      "loss": 0.7208,
+      "step": 660
+    },
+    {
+      "epoch": 2.4100719424460433,
+      "grad_norm": 0.580810416113199,
+      "learning_rate": 5e-06,
+      "loss": 0.7201,
+      "step": 670
+    },
+    {
+      "epoch": 2.446043165467626,
+      "grad_norm": 0.7709663647446697,
+      "learning_rate": 5e-06,
+      "loss": 0.7165,
+      "step": 680
+    },
+    {
+      "epoch": 2.4820143884892087,
+      "grad_norm": 0.6587806242655105,
+      "learning_rate": 5e-06,
+      "loss": 0.7199,
+      "step": 690
+    },
+    {
+      "epoch": 2.5179856115107913,
+      "grad_norm": 0.6679031168226195,
+      "learning_rate": 5e-06,
+      "loss": 0.7228,
+      "step": 700
+    },
+    {
+      "epoch": 2.553956834532374,
+      "grad_norm": 0.5802019851320436,
+      "learning_rate": 5e-06,
+      "loss": 0.7211,
+      "step": 710
+    },
+    {
+      "epoch": 2.5899280575539567,
+      "grad_norm": 0.633360775543426,
+      "learning_rate": 5e-06,
+      "loss": 0.7192,
+      "step": 720
+    },
+    {
+      "epoch": 2.6258992805755397,
+      "grad_norm": 0.7014721250700231,
+      "learning_rate": 5e-06,
+      "loss": 0.7208,
+      "step": 730
+    },
+    {
+      "epoch": 2.661870503597122,
+      "grad_norm": 0.5972726636881343,
+      "learning_rate": 5e-06,
+      "loss": 0.7184,
+      "step": 740
+    },
+    {
+      "epoch": 2.697841726618705,
+      "grad_norm": 0.5454556975289979,
+      "learning_rate": 5e-06,
+      "loss": 0.7139,
+      "step": 750
+    },
+    {
+      "epoch": 2.7338129496402876,
+      "grad_norm": 0.5626224999737693,
+      "learning_rate": 5e-06,
+      "loss": 0.7207,
+      "step": 760
+    },
+    {
+      "epoch": 2.7697841726618706,
+      "grad_norm": 0.5106193565014756,
+      "learning_rate": 5e-06,
+      "loss": 0.7193,
+      "step": 770
+    },
+    {
+      "epoch": 2.805755395683453,
+      "grad_norm": 0.6138738602878809,
+      "learning_rate": 5e-06,
+      "loss": 0.7185,
+      "step": 780
+    },
+    {
+      "epoch": 2.841726618705036,
+      "grad_norm": 0.6093685279993987,
+      "learning_rate": 5e-06,
+      "loss": 0.7217,
+      "step": 790
+    },
+    {
+      "epoch": 2.8776978417266186,
+      "grad_norm": 0.5564883285882788,
+      "learning_rate": 5e-06,
+      "loss": 0.7213,
+      "step": 800
+    },
+    {
+      "epoch": 2.9136690647482015,
+      "grad_norm": 0.5906548449538034,
+      "learning_rate": 5e-06,
+      "loss": 0.7183,
+      "step": 810
+    },
+    {
+      "epoch": 2.949640287769784,
+      "grad_norm": 0.5460219561244413,
+      "learning_rate": 5e-06,
+      "loss": 0.7216,
+      "step": 820
+    },
+    {
+      "epoch": 2.985611510791367,
+      "grad_norm": 0.6453368774762195,
+      "learning_rate": 5e-06,
+      "loss": 0.7198,
+      "step": 830
+    },
+    {
+      "epoch": 3.0,
+      "eval_loss": 0.7752296328544617,
+      "eval_runtime": 27.5746,
+      "eval_samples_per_second": 271.409,
+      "eval_steps_per_second": 1.088,
+      "step": 834
+    },
+    {
+      "epoch": 3.0,
+      "step": 834,
+      "total_flos": 1396981062696960.0,
+      "train_loss": 0.7675551453368555,
+      "train_runtime": 5571.5313,
+      "train_samples_per_second": 76.563,
+      "train_steps_per_second": 0.15
+    }
+  ],
+  "logging_steps": 10,
+  "max_steps": 834,
+  "num_input_tokens_seen": 0,
+  "num_train_epochs": 3,
+  "save_steps": 500,
+  "stateful_callbacks": {
+    "TrainerControl": {
+      "args": {
+        "should_epoch_stop": false,
+        "should_evaluate": false,
+        "should_log": false,
+        "should_save": true,
+        "should_training_stop": true
+      },
+      "attributes": {}
+    }
+  },
+  "total_flos": 1396981062696960.0,
+  "train_batch_size": 16,
+  "trial_name": null,
+  "trial_params": null
+}

training_eval_loss.png ADDED Viewed

training_loss.png ADDED Viewed