ashanhr commited on
Commit
76a8ee0
1 Parent(s): 0da1b7f

Training in progress, step 47700, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67401216b3029d2622e7e7add3d17e2a39d754ad2562031455d96e2f627ae67b
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c737cef3e8496bd78bd7ec0023f68befe3d9a9c05598534030ccd26c85918ac6
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6921906aba6ad6cda84ac2dad9392fde118c107f9c07cce5a4b585abe35d446d
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f2723715a10a82482c591b82cb51edd59abae6401d5ba96ea07090b181b9d643
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb52b0ddcb7b49c326f60274849ef6aa3c79a378160d3f09440dbe910b761c44
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9b283c3ae87191de5fca07697a3974a9d234e69848cf8838bc28bee17cf75ce3
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1c20e682f3fe40fa57d88e22486ddd5912b58563883c794fa3b1d8da05268377
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:392684c134f5cf29a9c14bb0a154c1d7d4d520741978198c633be27f537faa31
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:372a4198b83fdb5e8fb2ecd27baf4c7bfc16f7ae1e02bdddaa16958cd86125f9
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:9916ddee38224af3ba1b5cf7e13befa56143fa30905487537b8489305fe5fcae
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 19.995790359924225,
5
  "eval_steps": 100,
6
- "global_step": 47500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7607,6 +7607,38 @@
7607
  "eval_samples_per_second": 24.219,
7608
  "eval_steps_per_second": 3.028,
7609
  "step": 47500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7610
  }
7611
  ],
7612
  "logging_steps": 100,
@@ -7614,7 +7646,7 @@
7614
  "num_input_tokens_seen": 0,
7615
  "num_train_epochs": 30,
7616
  "save_steps": 100,
7617
- "total_flos": 5.20459391384764e+20,
7618
  "train_batch_size": 8,
7619
  "trial_name": null,
7620
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 20.079983161439696,
5
  "eval_steps": 100,
6
+ "global_step": 47700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7607
  "eval_samples_per_second": 24.219,
7608
  "eval_steps_per_second": 3.028,
7609
  "step": 47500
7610
+ },
7611
+ {
7612
+ "epoch": 20.04,
7613
+ "grad_norm": 2.881988525390625,
7614
+ "learning_rate": 1.6729328621908126e-05,
7615
+ "loss": 0.4024,
7616
+ "step": 47600
7617
+ },
7618
+ {
7619
+ "epoch": 20.04,
7620
+ "eval_cer": 0.3465546908607407,
7621
+ "eval_loss": 2.992396831512451,
7622
+ "eval_runtime": 406.9581,
7623
+ "eval_samples_per_second": 23.29,
7624
+ "eval_steps_per_second": 2.912,
7625
+ "step": 47600
7626
+ },
7627
+ {
7628
+ "epoch": 20.08,
7629
+ "grad_norm": 2.220200777053833,
7630
+ "learning_rate": 1.6658657243816255e-05,
7631
+ "loss": 0.4103,
7632
+ "step": 47700
7633
+ },
7634
+ {
7635
+ "epoch": 20.08,
7636
+ "eval_cer": 0.3478818779085683,
7637
+ "eval_loss": 3.0730020999908447,
7638
+ "eval_runtime": 382.3538,
7639
+ "eval_samples_per_second": 24.789,
7640
+ "eval_steps_per_second": 3.099,
7641
+ "step": 47700
7642
  }
7643
  ],
7644
  "logging_steps": 100,
 
7646
  "num_input_tokens_seen": 0,
7647
  "num_train_epochs": 30,
7648
  "save_steps": 100,
7649
+ "total_flos": 5.227217240325222e+20,
7650
  "train_batch_size": 8,
7651
  "trial_name": null,
7652
  "trial_params": null