MeedoSam commited on
Commit
12dda8c
1 Parent(s): cd26ffe

Uploaded checkpoint-30000

Browse files
adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:897f70228feeb62b32fd6533e7ba27607cf9944dc36bd6809ba46a0584a0ed7c
3
  size 119975656
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:69e556f93d429e40e699d986989b577cb8c22804f241827fcd7d0cce4c58fb33
3
  size 119975656
optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04781c011cca7ba2948408c4dd3d586cf703ca8e7a287bcf6828082753eb7cd9
3
  size 60477396
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:dd9579d4caf7f2a052e069804a5114ee1cfb406fc8c83bf59d9dd95fdab911ed
3
  size 60477396
rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9f9a8fc7a3f9973ab500342b710266356bc15e460d8239c7ecffdd7f7b55c419
3
- size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:b8b0827c468c23f41a6083f7fc3e6f3ebf585bd2815b57f652696125baed11e8
3
+ size 14180
scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:21d9826db2123e1159316c413f79587be4d18b11fad0191a070a36625b9fe97e
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:89a8f997004f1eea51654cbeb6b0eb881175b2d1ad913ffc6dcbce16821f7297
3
  size 1064
trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.6875,
5
  "eval_steps": 2500,
6
- "global_step": 27500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -284,6 +284,35 @@
284
  "eval_samples_per_second": 4.959,
285
  "eval_steps_per_second": 4.959,
286
  "step": 27500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
287
  }
288
  ],
289
  "logging_steps": 1000,
@@ -291,7 +320,7 @@
291
  "num_input_tokens_seen": 0,
292
  "num_train_epochs": 1,
293
  "save_steps": 2500,
294
- "total_flos": 4.4280846483456e+17,
295
  "train_batch_size": 1,
296
  "trial_name": null,
297
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.75,
5
  "eval_steps": 2500,
6
+ "global_step": 30000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
284
  "eval_samples_per_second": 4.959,
285
  "eval_steps_per_second": 4.959,
286
  "step": 27500
287
+ },
288
+ {
289
+ "epoch": 0.7,
290
+ "grad_norm": 14.545968055725098,
291
+ "learning_rate": 4.06376811594203e-06,
292
+ "loss": 1.3143,
293
+ "step": 28000
294
+ },
295
+ {
296
+ "epoch": 0.72,
297
+ "grad_norm": 9.552095413208008,
298
+ "learning_rate": 3.4840579710144927e-06,
299
+ "loss": 1.3042,
300
+ "step": 29000
301
+ },
302
+ {
303
+ "epoch": 0.75,
304
+ "grad_norm": 2.7875924110412598,
305
+ "learning_rate": 2.905507246376812e-06,
306
+ "loss": 1.2942,
307
+ "step": 30000
308
+ },
309
+ {
310
+ "epoch": 0.75,
311
+ "eval_loss": 1.272377848625183,
312
+ "eval_runtime": 201.643,
313
+ "eval_samples_per_second": 4.959,
314
+ "eval_steps_per_second": 4.959,
315
+ "step": 30000
316
  }
317
  ],
318
  "logging_steps": 1000,
 
320
  "num_input_tokens_seen": 0,
321
  "num_train_epochs": 1,
322
  "save_steps": 2500,
323
+ "total_flos": 4.8306377981952e+17,
324
  "train_batch_size": 1,
325
  "trial_name": null,
326
  "trial_params": null