MeedoSam commited on
Commit
cd26ffe
1 Parent(s): 5a6aed9

Uploaded checkpoint-27500

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04775532bd2e07bf11ebd2051856f8ff3cd14a3b1602476c8cd878dc55108be6
3
  size 119975656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:897f70228feeb62b32fd6533e7ba27607cf9944dc36bd6809ba46a0584a0ed7c
3
  size 119975656
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:945240ab0506a0e4e3b0b5cf5c427a1d717779a803ea92962e3be770f3863ecd
3
  size 60477396
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:04781c011cca7ba2948408c4dd3d586cf703ca8e7a287bcf6828082753eb7cd9
3
  size 60477396
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6cc6d246693c4c7085670610c3cb24611d2a43f9c316a1c2833732f83270fcef
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9f9a8fc7a3f9973ab500342b710266356bc15e460d8239c7ecffdd7f7b55c419
3
  size 14244
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:05c0c42e10d78b34ca2f7aaf9f72ec3bcc915774ed80b66aaaec9d2cfc880dd7
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:21d9826db2123e1159316c413f79587be4d18b11fad0191a070a36625b9fe97e
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.625,
5
  "eval_steps": 2500,
6
- "global_step": 25000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -262,6 +262,28 @@
262
  "eval_samples_per_second": 4.969,
263
  "eval_steps_per_second": 4.969,
264
  "step": 25000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
265
  }
266
  ],
267
  "logging_steps": 1000,
@@ -269,7 +291,7 @@
269
  "num_input_tokens_seen": 0,
270
  "num_train_epochs": 1,
271
  "save_steps": 2500,
272
- "total_flos": 4.025531498496e+17,
273
  "train_batch_size": 1,
274
  "trial_name": null,
275
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.6875,
5
  "eval_steps": 2500,
6
+ "global_step": 27500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
262
  "eval_samples_per_second": 4.969,
263
  "eval_steps_per_second": 4.969,
264
  "step": 25000
265
+ },
266
+ {
267
+ "epoch": 0.65,
268
+ "grad_norm": 2.6946563720703125,
269
+ "learning_rate": 5.222608695652175e-06,
270
+ "loss": 1.3161,
271
+ "step": 26000
272
+ },
273
+ {
274
+ "epoch": 0.68,
275
+ "grad_norm": 10.966987609863281,
276
+ "learning_rate": 4.643478260869566e-06,
277
+ "loss": 1.3253,
278
+ "step": 27000
279
+ },
280
+ {
281
+ "epoch": 0.69,
282
+ "eval_loss": 1.3072532415390015,
283
+ "eval_runtime": 201.6414,
284
+ "eval_samples_per_second": 4.959,
285
+ "eval_steps_per_second": 4.959,
286
+ "step": 27500
287
  }
288
  ],
289
  "logging_steps": 1000,
 
291
  "num_input_tokens_seen": 0,
292
  "num_train_epochs": 1,
293
  "save_steps": 2500,
294
+ "total_flos": 4.4280846483456e+17,
295
  "train_batch_size": 1,
296
  "trial_name": null,
297
  "trial_params": null