ashanhr commited on
Commit
4f6c544
·
verified ·
1 Parent(s): 286a852

Training in progress, step 46700, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:b27f517045a239ca3d4c09cfcbc5d90d49a9dc5fb03fcb2ed0885dce063b5310
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:34c44a9f07e1b8aa6dc225c094543b21169c807ababebf17aecec1cdab52533e
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:5a4020fe34ad40d60ab49cb79998fc9757fa6962619664a49f924687783b3f01
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8fa8ecef7a478c24fdefb985526ee50f6d66cb28942cfbb7207572dd699188e0
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:4b3789d043057ca3bdf90a7ba4cfb8ad1bdc5bbc2f61f9921c6b8d8b227b8605
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:264756f803302ce1b7070f07c807bd37de2d9873f549d008ab996618e392720f
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3497a81af3830b12cea55fc1a002714afd90521c8567293cc3b2539688efc021
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:14eca18c97605af6ae4689d37dc0ca3a788eaef7ff89fb5e8f8f6afe500d717c
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7c96c7193a25b743904e5970326ee4765878ca8dbd521d975c5aaa2469d0561c
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e80af03aafdc9b8ba4ba9d26243edd821da4e02e74dd9d4d85df35d2aa6ecc98
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 19.574826352346875,
5
  "eval_steps": 100,
6
- "global_step": 46500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -7447,6 +7447,38 @@
7447
  "eval_samples_per_second": 25.464,
7448
  "eval_steps_per_second": 3.184,
7449
  "step": 46500
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
7450
  }
7451
  ],
7452
  "logging_steps": 100,
@@ -7454,7 +7486,7 @@
7454
  "num_input_tokens_seen": 0,
7455
  "num_train_epochs": 30,
7456
  "save_steps": 100,
7457
- "total_flos": 5.096052922697529e+20,
7458
  "train_batch_size": 8,
7459
  "trial_name": null,
7460
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 19.659019153862346,
5
  "eval_steps": 100,
6
+ "global_step": 46700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
7447
  "eval_samples_per_second": 25.464,
7448
  "eval_steps_per_second": 3.184,
7449
  "step": 46500
7450
+ },
7451
+ {
7452
+ "epoch": 19.62,
7453
+ "grad_norm": 2.0272178649902344,
7454
+ "learning_rate": 1.7436042402826856e-05,
7455
+ "loss": 0.4745,
7456
+ "step": 46600
7457
+ },
7458
+ {
7459
+ "epoch": 19.62,
7460
+ "eval_cer": 0.3511057447890188,
7461
+ "eval_loss": 3.3171424865722656,
7462
+ "eval_runtime": 381.183,
7463
+ "eval_samples_per_second": 24.865,
7464
+ "eval_steps_per_second": 3.109,
7465
+ "step": 46600
7466
+ },
7467
+ {
7468
+ "epoch": 19.66,
7469
+ "grad_norm": 4.2316412925720215,
7470
+ "learning_rate": 1.7365371024734984e-05,
7471
+ "loss": 0.4875,
7472
+ "step": 46700
7473
+ },
7474
+ {
7475
+ "epoch": 19.66,
7476
+ "eval_cer": 0.3484904774940362,
7477
+ "eval_loss": 2.8240554332733154,
7478
+ "eval_runtime": 368.9178,
7479
+ "eval_samples_per_second": 25.691,
7480
+ "eval_steps_per_second": 3.212,
7481
+ "step": 46700
7482
  }
7483
  ],
7484
  "logging_steps": 100,
 
7486
  "num_input_tokens_seen": 0,
7487
  "num_train_epochs": 30,
7488
  "save_steps": 100,
7489
+ "total_flos": 5.1179007006614834e+20,
7490
  "train_batch_size": 8,
7491
  "trial_name": null,
7492
  "trial_params": null