MeedoSam commited on
Commit
5a86835
1 Parent(s): 26cf530

Uploaded checkpoint-22500

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f8d86c87b10946be5e19fd09e6c945826193408dd0c45733f14b421ff48c3d03
3
  size 119975656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1728956bca0b9356ae29462b37ac34654e1484ea8ef2b11bb5247ae5eda898c8
3
  size 119975656
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:de3c6d36565a894febe83bdaf8decd391144d8d18bb0a4d0adc45544c47e3860
3
  size 60477396
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e40cd6ba11d9a0ceaea16d7c58dce9d059685099ee9172ed66890d5851cdfc45
3
  size 60477396
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3788580bf499dcd38db18c9ad678a723e513e1df6a53a304a6b7c2bb74dc2674
3
- size 14180
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e2411394ac968a82b73f6de27417329addc7551a6595e6f8e40c25c5f9f504b1
3
+ size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ba0dbbe5ceaff99b526aba1b21b83c949f035061745afe93ca5ff87a34da88f
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:767cf16b1af6a1442d6e9081070ae709d5786da93d28d3f0bf69c41bc72162ac
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.5,
5
  "eval_steps": 2500,
6
- "global_step": 20000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -211,6 +211,28 @@
211
  "eval_samples_per_second": 4.961,
212
  "eval_steps_per_second": 4.961,
213
  "step": 20000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
214
  }
215
  ],
216
  "logging_steps": 1000,
@@ -218,7 +240,7 @@
218
  "num_input_tokens_seen": 0,
219
  "num_train_epochs": 1,
220
  "save_steps": 2500,
221
- "total_flos": 3.2204251987968e+17,
222
  "train_batch_size": 1,
223
  "trial_name": null,
224
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.5625,
5
  "eval_steps": 2500,
6
+ "global_step": 22500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
211
  "eval_samples_per_second": 4.961,
212
  "eval_steps_per_second": 4.961,
213
  "step": 20000
214
+ },
215
+ {
216
+ "epoch": 0.53,
217
+ "grad_norm": 6.062647342681885,
218
+ "learning_rate": 8.120579710144927e-06,
219
+ "loss": 1.3637,
220
+ "step": 21000
221
+ },
222
+ {
223
+ "epoch": 0.55,
224
+ "grad_norm": 5.603002071380615,
225
+ "learning_rate": 7.540869565217392e-06,
226
+ "loss": 1.3464,
227
+ "step": 22000
228
+ },
229
+ {
230
+ "epoch": 0.56,
231
+ "eval_loss": 1.3194873332977295,
232
+ "eval_runtime": 201.6454,
233
+ "eval_samples_per_second": 4.959,
234
+ "eval_steps_per_second": 4.959,
235
+ "step": 22500
236
  }
237
  ],
238
  "logging_steps": 1000,
 
240
  "num_input_tokens_seen": 0,
241
  "num_train_epochs": 1,
242
  "save_steps": 2500,
243
+ "total_flos": 3.6229783486464e+17,
244
  "train_batch_size": 1,
245
  "trial_name": null,
246
  "trial_params": null