ashanhr commited on
Commit
d47adc0
·
verified ·
1 Parent(s): f513dfd

Training in progress, step 3000, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:44b2ea87fb45896c5e64675c42319b0c800f3516393fd91fe6189f46278e1a6f
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:aa3f8250eedc0ad91d9bd7bd29190dfcd30c24ddbb981802ff4398fb1b6f8ccb
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2c31b10f100741c4ebc4d4250ad676d7b466307a2ea83cbc5ede244644ac015c
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d61642ae37b7f2123a78d2dc95232dacaf674afbc5bc78d70d4b6245a6343788
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:197e645c69e01ac20ea12442834230672eb334b80f902f66c33a4b04dcb6f523
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9aa0dc12fc4c549ade65217632d28f06d688771482b33353c8dcff908cd43500
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:04efee40e70fc230d3d9de163b7a49660ca999141c4308e72ab88d3a1aa96df5
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b8972e9f2b0f288dfd8ade7d14f3387e82aa534b7b0691b69299c0919f543f0
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3ae68370e6304e1d4db7600cca11df6aac88b08aee5cd9372946660cf6886038
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:cd47c81b0d91d48642f51b85203435f35325e0ebcfd794d4d6fa6f9f3a411358
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 1.1786992212165859,
5
  "eval_steps": 100,
6
- "global_step": 2800,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -455,6 +455,38 @@
455
  "eval_samples_per_second": 27.838,
456
  "eval_steps_per_second": 3.481,
457
  "step": 2800
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
458
  }
459
  ],
460
  "logging_steps": 100,
@@ -462,7 +494,7 @@
462
  "num_input_tokens_seen": 0,
463
  "num_train_epochs": 30,
464
  "save_steps": 100,
465
- "total_flos": 3.076610148872312e+19,
466
  "train_batch_size": 8,
467
  "trial_name": null,
468
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.2628920227320564,
5
  "eval_steps": 100,
6
+ "global_step": 3000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
455
  "eval_samples_per_second": 27.838,
456
  "eval_steps_per_second": 3.481,
457
  "step": 2800
458
+ },
459
+ {
460
+ "epoch": 1.22,
461
+ "grad_norm": 4.691165924072266,
462
+ "learning_rate": 4.8308833922261485e-05,
463
+ "loss": 2.0012,
464
+ "step": 2900
465
+ },
466
+ {
467
+ "epoch": 1.22,
468
+ "eval_cer": 0.5138047006374409,
469
+ "eval_loss": 3.412604331970215,
470
+ "eval_runtime": 349.7896,
471
+ "eval_samples_per_second": 27.096,
472
+ "eval_steps_per_second": 3.388,
473
+ "step": 2900
474
+ },
475
+ {
476
+ "epoch": 1.26,
477
+ "grad_norm": 2.8883514404296875,
478
+ "learning_rate": 4.823816254416961e-05,
479
+ "loss": 3.5496,
480
+ "step": 3000
481
+ },
482
+ {
483
+ "epoch": 1.26,
484
+ "eval_cer": 0.5417074029173673,
485
+ "eval_loss": 1.7423540353775024,
486
+ "eval_runtime": 339.3199,
487
+ "eval_samples_per_second": 27.932,
488
+ "eval_steps_per_second": 3.492,
489
+ "step": 3000
490
  }
491
  ],
492
  "logging_steps": 100,
 
494
  "num_input_tokens_seen": 0,
495
  "num_train_epochs": 30,
496
  "save_steps": 100,
497
+ "total_flos": 3.298748154606821e+19,
498
  "train_batch_size": 8,
499
  "trial_name": null,
500
  "trial_params": null