rakhman-llm commited on
Commit
7f8e908
·
verified ·
1 Parent(s): 2659354

Training in progress, step 13000, checkpoint

Browse files
last-checkpoint/model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4ba4172139359a1d497324e8e22e9a06ebeca96155aa0037a8e9fe2bd0bb1861
3
  size 891558696
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:96bd68ad012b25619d7bf581e4457855b3766a688abc4f029e6202ce077b6816
3
  size 891558696
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:669022ae6cb17619adcdc439fe592ba5a330da40a4b03a681212031e9d3c7b25
3
  size 1783272762
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0b754627dd5bef36f869f0c293f197d1d9dab012f8b88d73f786ad25764d9c19
3
  size 1783272762
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6f3dfe1e2f7f2fd5854887c0faffeef0e864d418e8da96df789b7b060dfeefbe
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5e9bd9c0c5f3829618d66a8ccc40a2ee0bf94db351e00ed3ee919d3ea07ee90c
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5db0dbd26a91dafa09171d3a40d283c5443302ab9d18cc5d9752c39b01fbce28
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:589ca601b575cfc7f004c136eb91b382b9a1be92a7e3c7f68f79df4414805284
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 2.597672485453034,
5
  "eval_steps": 500,
6
- "global_step": 12500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -198,6 +198,13 @@
198
  "learning_rate": 2.690495982266556e-06,
199
  "loss": 0.3176,
200
  "step": 12500
 
 
 
 
 
 
 
201
  }
202
  ],
203
  "logging_steps": 500,
@@ -217,7 +224,7 @@
217
  "attributes": {}
218
  }
219
  },
220
- "total_flos": 1.522272934821888e+16,
221
  "train_batch_size": 2,
222
  "trial_name": null,
223
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 2.7015793848711556,
5
  "eval_steps": 500,
6
+ "global_step": 13000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
198
  "learning_rate": 2.690495982266556e-06,
199
  "loss": 0.3176,
200
  "step": 12500
201
+ },
202
+ {
203
+ "epoch": 2.7015793848711556,
204
+ "grad_norm": 1.0274338722229004,
205
+ "learning_rate": 1.99778331947908e-06,
206
+ "loss": 0.3389,
207
+ "step": 13000
208
  }
209
  ],
210
  "logging_steps": 500,
 
224
  "attributes": {}
225
  }
226
  },
227
+ "total_flos": 1.583168723877888e+16,
228
  "train_batch_size": 2,
229
  "trial_name": null,
230
  "trial_params": null