Training in progress, step 500

Browse files

Files changed (15) hide show

model.safetensors +1 -1
run-16/checkpoint-500/model.safetensors +1 -1
run-16/checkpoint-500/optimizer.pt +1 -1
run-16/checkpoint-500/scheduler.pt +1 -1
run-16/checkpoint-500/training_args.bin +1 -1
run-18/checkpoint-500/model.safetensors +1 -1
run-18/checkpoint-500/optimizer.pt +1 -1
run-18/checkpoint-500/scheduler.pt +1 -1
run-18/checkpoint-500/trainer_state.json +14 -14
run-18/checkpoint-500/training_args.bin +1 -1
run-19/checkpoint-1000/trainer_state.json +26 -26
run-19/checkpoint-500/optimizer.pt +1 -1
run-19/checkpoint-500/trainer_state.json +13 -13
run-19/checkpoint-500/training_args.bin +1 -1
training_args.bin +1 -1

model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:94dd719ae3e79ab6db163a2c0423be61fef278b4f047585040f2170742065552
 size 268290900

 version https://git-lfs.github.com/spec/v1
+oid sha256:b64fce98847974a1fbc15a5211620e911f29d0a6fa5bc6a5bf2e0acabc13c361
 size 268290900

run-16/checkpoint-500/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:4b8ad3c9e3788334f5c2adb4229ae87dd8d5043fdb7947dc8f97f35fed88c2e7
 size 268290900

 version https://git-lfs.github.com/spec/v1
+oid sha256:25f9d6eb5310fb82fd6d0ce02178e219ef280bfac553ca1e5a08cd0dd16f2028
 size 268290900

run-16/checkpoint-500/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d7e6ebd7722bad34820adb2b29c8deca0afdcb6d5c67073d609c2c6adcb9ec77
 size 536643898

 version https://git-lfs.github.com/spec/v1
+oid sha256:61b66909a996ee50dd5b57e0c400b5d2288582050b5f926b1c289f7a221255f6
 size 536643898

run-16/checkpoint-500/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:04366f62f8f88f5a8265df59adb051b320463277845db80e7fa43f13110c18c9
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:90dc4637e972cc69c745eebddd8a7560dca27d2318df3e23f8e145abbf236536
 size 1064

run-16/checkpoint-500/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:dcf27e76d8884c946fcfa9b2e89273479b0e58bfc88fe2cda59dc077e543cf3a
 size 5176

 version https://git-lfs.github.com/spec/v1
+oid sha256:699ed76d9af91edc99d12562d64ce4055f71cfe483dfd9ab44c7bfc8626ae66f
 size 5176

run-18/checkpoint-500/model.safetensors CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:1df6a2e095aa6bbb5d360e7f012cbe4a7929871fd4d4183142dd1f5f93bff5b9
 size 268290900

 version https://git-lfs.github.com/spec/v1
+oid sha256:b64fce98847974a1fbc15a5211620e911f29d0a6fa5bc6a5bf2e0acabc13c361
 size 268290900

run-18/checkpoint-500/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:f255b5aed36ba4abd626288657ee63762aafb521c202746c931ad4d10b96a61e
 size 536643898

 version https://git-lfs.github.com/spec/v1
+oid sha256:8b58f2178d777caeda6638dad5deb905c5d807ad067e89066f0357dbd988962e
 size 536643898

run-18/checkpoint-500/scheduler.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:98041bd7cae455426e290a1a0ee683bd5dd30893f7451fec3a464ae8995b17e4
 size 1064

 version https://git-lfs.github.com/spec/v1
+oid sha256:11e744277c61f87520794334442fae36c5f9ff6e10cb79d4bfee5176ca7eafe2
 size 1064

run-18/checkpoint-500/trainer_state.json CHANGED Viewed

@@ -10,25 +10,25 @@
   "log_history": [
     {
       "epoch": 1.0,
-      "eval_accuracy": 0.5951612903225807,
-      "eval_loss": 0.20783506333827972,
-      "eval_runtime": 1.3749,
-      "eval_samples_per_second": 2254.744,
-      "eval_steps_per_second": 47.277,
       "step": 318
     },
     {
       "epoch": 1.5723270440251573,
-      "grad_norm": 0.5312509536743164,
-      "learning_rate": 1.650593990216632e-05,
-      "loss": 0.3341,
       "step": 500
     }
   ],
   "logging_steps": 500,
-  "max_steps": 2862,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 9,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -42,12 +42,12 @@
       "attributes": {}
     }
   },
-  "total_flos": 189074629692144.0,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": {
-    "alpha": 0.40001389391237485,
-    "num_train_epochs": 9,
-    "temperature": 8
   }
 }

   "log_history": [
     {
       "epoch": 1.0,
+      "eval_accuracy": 0.6674193548387096,
+      "eval_loss": 0.416048139333725,
+      "eval_runtime": 1.3562,
+      "eval_samples_per_second": 2285.869,
+      "eval_steps_per_second": 47.93,
       "step": 318
     },
     {
       "epoch": 1.5723270440251573,
+      "grad_norm": 0.9905994534492493,
+      "learning_rate": 1.550763701707098e-05,
+      "loss": 0.6477,
       "step": 500
     }
   ],
   "logging_steps": 500,
+  "max_steps": 2226,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 7,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 248936225670636.0,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": {
+    "alpha": 0.6352487868101878,
+    "num_train_epochs": 7,
+    "temperature": 2
   }
 }

run-18/checkpoint-500/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:d563c3fa77ebad6147745c6eb10d98ee4dd83dbdc6caa67346241d873d2dc259
 size 5176

 version https://git-lfs.github.com/spec/v1
+oid sha256:1255ca729d01717aa414fa6756e0d6f9030540b62191575288bf9e49b890e64b
 size 5176

run-19/checkpoint-1000/trainer_state.json CHANGED Viewed

@@ -10,50 +10,50 @@
   "log_history": [
     {
       "epoch": 1.0,
-      "eval_accuracy": 0.6229032258064516,
-      "eval_loss": 0.2478133589029312,
-      "eval_runtime": 1.3731,
-      "eval_samples_per_second": 2257.605,
-      "eval_steps_per_second": 47.337,
       "step": 318
     },
     {
       "epoch": 1.5723270440251573,
-      "grad_norm": 0.6151512861251831,
-      "learning_rate": 1.685534591194969e-05,
-      "loss": 0.398,
       "step": 500
     },
     {
       "epoch": 2.0,
-      "eval_accuracy": 0.844516129032258,
-      "eval_loss": 0.10843393951654434,
-      "eval_runtime": 1.3751,
-      "eval_samples_per_second": 2254.368,
-      "eval_steps_per_second": 47.269,
       "step": 636
     },
     {
       "epoch": 3.0,
-      "eval_accuracy": 0.89,
-      "eval_loss": 0.06599755585193634,
-      "eval_runtime": 1.3802,
-      "eval_samples_per_second": 2246.112,
-      "eval_steps_per_second": 47.096,
       "step": 954
     },
     {
       "epoch": 3.1446540880503147,
-      "grad_norm": 0.5515937209129333,
-      "learning_rate": 1.371069182389937e-05,
-      "loss": 0.1268,
       "step": 1000
     }
   ],
   "logging_steps": 500,
-  "max_steps": 3180,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 10,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -67,12 +67,12 @@
       "attributes": {}
     }
   },
-  "total_flos": 319943755193412.0,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": {
-    "alpha": 0.878459838807545,
-    "num_train_epochs": 10,
     "temperature": 4
   }
 }

   "log_history": [
     {
       "epoch": 1.0,
+      "eval_accuracy": 0.62,
+      "eval_loss": 0.249518021941185,
+      "eval_runtime": 1.3797,
+      "eval_samples_per_second": 2246.797,
+      "eval_steps_per_second": 47.11,
       "step": 318
     },
     {
       "epoch": 1.5723270440251573,
+      "grad_norm": 0.6149903535842896,
+      "learning_rate": 1.650593990216632e-05,
+      "loss": 0.3991,
       "step": 500
     },
     {
       "epoch": 2.0,
+      "eval_accuracy": 0.8422580645161291,
+      "eval_loss": 0.11037396639585495,
+      "eval_runtime": 1.3583,
+      "eval_samples_per_second": 2282.186,
+      "eval_steps_per_second": 47.852,
       "step": 636
     },
     {
       "epoch": 3.0,
+      "eval_accuracy": 0.8883870967741936,
+      "eval_loss": 0.0677555724978447,
+      "eval_runtime": 1.3701,
+      "eval_samples_per_second": 2262.648,
+      "eval_steps_per_second": 47.443,
       "step": 954
     },
     {
       "epoch": 3.1446540880503147,
+      "grad_norm": 0.5547620058059692,
+      "learning_rate": 1.3011879804332637e-05,
+      "loss": 0.1288,
       "step": 1000
     }
   ],
   "logging_steps": 500,
+  "max_steps": 2862,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 9,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 296801013878124.0,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": {
+    "alpha": 0.8975938459167363,
+    "num_train_epochs": 9,
     "temperature": 4
   }
 }

run-19/checkpoint-500/optimizer.pt CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9c39c8c8c93ca935fff2403d4310e8f1a88a48f69672fc74726e178cf1685176
 size 536643898

 version https://git-lfs.github.com/spec/v1
+oid sha256:f632c364b0e8bb9c5aaa1a1d92dc11251e021768e8319ab85e38045e3c31cf61
 size 536643898

run-19/checkpoint-500/trainer_state.json CHANGED Viewed

@@ -10,25 +10,25 @@
   "log_history": [
     {
       "epoch": 1.0,
-      "eval_accuracy": 0.6229032258064516,
-      "eval_loss": 0.2478133589029312,
-      "eval_runtime": 1.3731,
-      "eval_samples_per_second": 2257.605,
-      "eval_steps_per_second": 47.337,
       "step": 318
     },
     {
       "epoch": 1.5723270440251573,
-      "grad_norm": 0.6151512861251831,
-      "learning_rate": 1.685534591194969e-05,
-      "loss": 0.398,
       "step": 500
     }
   ],
   "logging_steps": 500,
-  "max_steps": 3180,
   "num_input_tokens_seen": 0,
-  "num_train_epochs": 10,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
@@ -42,12 +42,12 @@
       "attributes": {}
     }
   },
-  "total_flos": 189074629692144.0,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": {
-    "alpha": 0.878459838807545,
-    "num_train_epochs": 10,
     "temperature": 4
   }
 }

   "log_history": [
     {
       "epoch": 1.0,
+      "eval_accuracy": 0.62,
+      "eval_loss": 0.249518021941185,
+      "eval_runtime": 1.3797,
+      "eval_samples_per_second": 2246.797,
+      "eval_steps_per_second": 47.11,
       "step": 318
     },
     {
       "epoch": 1.5723270440251573,
+      "grad_norm": 0.6149903535842896,
+      "learning_rate": 1.650593990216632e-05,
+      "loss": 0.3991,
       "step": 500
     }
   ],
   "logging_steps": 500,
+  "max_steps": 2862,
   "num_input_tokens_seen": 0,
+  "num_train_epochs": 9,
   "save_steps": 500,
   "stateful_callbacks": {
     "TrainerControl": {
       "attributes": {}
     }
   },
+  "total_flos": 165931888376856.0,
   "train_batch_size": 48,
   "trial_name": null,
   "trial_params": {
+    "alpha": 0.8975938459167363,
+    "num_train_epochs": 9,
     "temperature": 4
   }
 }

run-19/checkpoint-500/training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:43ad3e5230508df5eafe3b8cc807f8ce5e9543207331b4b6a4fd8cdd4dbc0b67
 size 5176

 version https://git-lfs.github.com/spec/v1
+oid sha256:9b2e73103b9db907e09d098899dc83e3dbf1869ac8fe958d6aec7beb5cf0db0d
 size 5176

training_args.bin CHANGED Viewed

@@ -1,3 +1,3 @@
 version https://git-lfs.github.com/spec/v1
-oid sha256:9b2e73103b9db907e09d098899dc83e3dbf1869ac8fe958d6aec7beb5cf0db0d
 size 5176

 version https://git-lfs.github.com/spec/v1
+oid sha256:1255ca729d01717aa414fa6756e0d6f9030540b62191575288bf9e49b890e64b
 size 5176