Rakhman16 commited on
Commit
f742c52
·
verified ·
1 Parent(s): 7d2ae2e

Training in progress, step 11500, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:720eceeb2b357ad023a64f886e05d484eb7d9c0097f7eb7db6168cd2d52a3b5b
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3866dd2a6475ce2b7385f181f047f2dde98048fd09569a5804ff351fd5e9912f
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c19224722ae663a153f5b72e8e4abd5762e71691698100f741aa39099196b03f
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9fa541d3192bd288c7edb3660d558e142b5e462f59453619d7373f84590e81f7
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a6eb93bead6d30b6932224ec7fe6ef202eebeaf7927b1e6638dfda624b533562
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fe8dfec12d6b343eb28ee2b43b75b45c3ae2d185dfc75ecd757d9ec7c567e31
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:723c1afc9409a31cc02de5b4cfab645904dc7aaa6048019bba9e3ae17f717c52
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2504d9c9451078f79b98032c170c402f3ffeb92d11496a90427e835bd48cd055
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 4.57190357439734,
5
  "eval_steps": 500,
6
- "global_step": 11000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -193,6 +193,13 @@
193
  "learning_rate": 1.7190357439733998e-06,
194
  "loss": 0.3166,
195
  "step": 11000
 
 
 
 
 
 
 
196
  }
197
  ],
198
  "logging_steps": 500,
@@ -212,7 +219,7 @@
212
  "attributes": {}
213
  }
214
  },
215
- "total_flos": 2.679171135307776e+16,
216
  "train_batch_size": 4,
217
  "trial_name": null,
218
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 4.7797173732335825,
5
  "eval_steps": 500,
6
+ "global_step": 11500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
193
  "learning_rate": 1.7190357439733998e-06,
194
  "loss": 0.3166,
195
  "step": 11000
196
+ },
197
+ {
198
+ "epoch": 4.7797173732335825,
199
+ "grad_norm": 0.7600696682929993,
200
+ "learning_rate": 8.89443059019119e-07,
201
+ "loss": 0.3117,
202
+ "step": 11500
203
  }
204
  ],
205
  "logging_steps": 500,
 
219
  "attributes": {}
220
  }
221
  },
222
+ "total_flos": 2.800962713419776e+16,
223
  "train_batch_size": 4,
224
  "trial_name": null,
225
  "trial_params": null