ncbateman commited on
Commit
e0d086c
·
verified ·
1 Parent(s): 973c5c2

Training in progress, step 865, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:99b129df98bdc306d1bc4565ee000cc0b871ca7381053214c2914dac7ae77608
3
  size 97307544
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:82b7ea91db0b8414ee1ee370f896bbd26f3fbb47a73176b7f3f65b18d197a25b
3
  size 97307544
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cb976eeabc1d40bc1ef543f0d6b42e69810a3b568ef7fec526ce855dcd53f250
3
  size 49846644
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:43083f322054aff43bc6930fa6998ada6fdd635d44556e37099a3e09e1044dd6
3
  size 49846644
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:798b54db949c4ae9de08b62eac89d6111767c04dba8dc38518460f18e2c13d16
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d62a615819a68a35523082dd9af5336db4b9184e8270ea5a330d6ceefb606b95
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a2d74cdd7773895240aff3837ca564e1fc035a5a8a0853fee30d28f2c6ee4c25
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:fc3d8a2771961fb52eec0bdd20d20676362c5357acba011a4424ecf88c9a6ca9
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.112908443869298,
5
  "eval_steps": 386,
6
- "global_step": 860,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -6051,6 +6051,41 @@
6051
  "learning_rate": 8.925627310699275e-05,
6052
  "loss": 0.9271,
6053
  "step": 860
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
6054
  }
6055
  ],
6056
  "logging_steps": 1,
@@ -6070,7 +6105,7 @@
6070
  "attributes": {}
6071
  }
6072
  },
6073
- "total_flos": 9.613560965832376e+17,
6074
  "train_batch_size": 4,
6075
  "trial_name": null,
6076
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.1193788417987707,
5
  "eval_steps": 386,
6
+ "global_step": 865,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
6051
  "learning_rate": 8.925627310699275e-05,
6052
  "loss": 0.9271,
6053
  "step": 860
6054
+ },
6055
+ {
6056
+ "epoch": 1.1142025234551924,
6057
+ "grad_norm": 0.8622956275939941,
6058
+ "learning_rate": 8.92307255942185e-05,
6059
+ "loss": 0.7689,
6060
+ "step": 861
6061
+ },
6062
+ {
6063
+ "epoch": 1.115496603041087,
6064
+ "grad_norm": 0.8629088401794434,
6065
+ "learning_rate": 8.920515140818351e-05,
6066
+ "loss": 0.811,
6067
+ "step": 862
6068
+ },
6069
+ {
6070
+ "epoch": 1.1167906826269816,
6071
+ "grad_norm": 0.9130288362503052,
6072
+ "learning_rate": 8.91795505662759e-05,
6073
+ "loss": 0.8302,
6074
+ "step": 863
6075
+ },
6076
+ {
6077
+ "epoch": 1.118084762212876,
6078
+ "grad_norm": 0.9185603857040405,
6079
+ "learning_rate": 8.915392308590183e-05,
6080
+ "loss": 0.7699,
6081
+ "step": 864
6082
+ },
6083
+ {
6084
+ "epoch": 1.1193788417987707,
6085
+ "grad_norm": 0.8291416168212891,
6086
+ "learning_rate": 8.912826898448561e-05,
6087
+ "loss": 0.6884,
6088
+ "step": 865
6089
  }
6090
  ],
6091
  "logging_steps": 1,
 
6105
  "attributes": {}
6106
  }
6107
  },
6108
+ "total_flos": 9.669457824417055e+17,
6109
  "train_batch_size": 4,
6110
  "trial_name": null,
6111
  "trial_params": null