Rakhman16 commited on
Commit
f19eb5a
·
verified ·
1 Parent(s): 11f1675

Training in progress, step 8500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:96363d74eb8e505e30e70175f38cf365c3d95b9352b85ccbcdda14b0b97b0604
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:23750eae96fff9283e7ae8f0bab6faf349cb0a7eb7d62a4385617e3c105b050a
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9032c06bd1a8e5aba237fba062e9b942284f080fef0916342f268300ad8d5730
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c090d1b505cdd524b425c56d229c74e521a9cfcd2e83913f4b04cd9a688c0244
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a76e94d15e6f47128689bef83934eee553e34dee3554bde049b53753e0e7480
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6d6f9191f2eaf4a4097874de4a821f2a97252f64a38cda2d145c006040151b30
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c3f10b5ea24e9191e915e4456d16cc3b17d3916946799417c52e3e474e43201
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:432af5fdf516a60295d78030fa17670f1cdc61fda7d1429429265938440f5257
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.3250207813798838,
5
  "eval_steps": 500,
6
- "global_step": 8000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -143,6 +143,13 @@
143
  "learning_rate": 6.704904405652536e-06,
144
  "loss": 0.3265,
145
  "step": 8000
 
 
 
 
 
 
 
146
  }
147
  ],
148
  "logging_steps": 500,
@@ -162,7 +169,7 @@
162
  "attributes": {}
163
  }
164
  },
165
- "total_flos": 1.948482562424832e+16,
166
  "train_batch_size": 4,
167
  "trial_name": null,
168
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.5328345802161265,
5
  "eval_steps": 500,
6
+ "global_step": 8500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
143
  "learning_rate": 6.704904405652536e-06,
144
  "loss": 0.3265,
145
  "step": 8000
146
+ },
147
+ {
148
+ "epoch": 3.5328345802161265,
149
+ "grad_norm": 0.7057175040245056,
150
+ "learning_rate": 5.873649210307565e-06,
151
+ "loss": 0.3219,
152
+ "step": 8500
153
  }
154
  ],
155
  "logging_steps": 500,
 
169
  "attributes": {}
170
  }
171
  },
172
+ "total_flos": 2.070274140536832e+16,
173
  "train_batch_size": 4,
174
  "trial_name": null,
175
  "trial_params": null