ashanhr commited on
Commit
7bcf924
1 Parent(s): f742587

Training in progress, step 21200, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0914d34b789b137bde7b5e425800476cd3b8a0c1c2a2f6dca29e198df8bce8d1
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:64d2e194abb4d33bd2df007c3f2dfe7c3090b6d911c750db68f03541f777960a
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8b055483df48b82f8f43aae9c1bf44543d9d6b475dd015afb266b79d61079e91
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:a956ba41ba48f1e1424ba9b1b15a3ff3f56d507463416d550ee26a3eaf57770b
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0d1f86a4256547c5562fc9179cc66f200a56f9f53b2448731cd2d83aadb1bd70
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:bc981ba432a5dc7f3000d740be2b6cd1bdf1ec48f451e86e92d2708e7dd2aa39
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73aa4ed4dedc1aae7d9f7534e9a473a7d481fd77c26b1dac6a9acd73c2b3a117
3
  size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:eb8ac8461488f7d31ff771b67510406fd1847a4a10da6092663344858ebfa7ed
3
  size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:73620584f32d0b623d2e9a17c6bb745cf56317c4ff398df3fcc5b050e7d94c8b
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:62c80163459f707d7a13e0f4c1cac2205e273df5ba9942f544868dc9dce2b481
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 8.840244159124396,
5
  "eval_steps": 100,
6
- "global_step": 21000,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -3367,6 +3367,38 @@
3367
  "eval_samples_per_second": 25.442,
3368
  "eval_steps_per_second": 3.181,
3369
  "step": 21000
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
3370
  }
3371
  ],
3372
  "logging_steps": 100,
@@ -3374,7 +3406,7 @@
3374
  "num_input_tokens_seen": 0,
3375
  "num_train_epochs": 30,
3376
  "save_steps": 100,
3377
- "total_flos": 2.300997184346249e+20,
3378
  "train_batch_size": 8,
3379
  "trial_name": null,
3380
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 8.924436960639865,
5
  "eval_steps": 100,
6
+ "global_step": 21200,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
3367
  "eval_samples_per_second": 25.442,
3368
  "eval_steps_per_second": 3.181,
3369
  "step": 21000
3370
+ },
3371
+ {
3372
+ "epoch": 8.88,
3373
+ "grad_norm": 1.8727614879608154,
3374
+ "learning_rate": 3.5450883392226145e-05,
3375
+ "loss": 2.5929,
3376
+ "step": 21100
3377
+ },
3378
+ {
3379
+ "epoch": 8.88,
3380
+ "eval_cer": 0.4467780884595831,
3381
+ "eval_loss": 2.1603004932403564,
3382
+ "eval_runtime": 382.1598,
3383
+ "eval_samples_per_second": 24.801,
3384
+ "eval_steps_per_second": 3.101,
3385
+ "step": 21100
3386
+ },
3387
+ {
3388
+ "epoch": 8.92,
3389
+ "grad_norm": 2.88053560256958,
3390
+ "learning_rate": 3.538021201413428e-05,
3391
+ "loss": 1.2652,
3392
+ "step": 21200
3393
+ },
3394
+ {
3395
+ "epoch": 8.92,
3396
+ "eval_cer": 0.44302872394509407,
3397
+ "eval_loss": 2.7272582054138184,
3398
+ "eval_runtime": 364.629,
3399
+ "eval_samples_per_second": 25.994,
3400
+ "eval_steps_per_second": 3.25,
3401
+ "step": 21200
3402
  }
3403
  ],
3404
  "logging_steps": 100,
 
3406
  "num_input_tokens_seen": 0,
3407
  "num_train_epochs": 30,
3408
  "save_steps": 100,
3409
+ "total_flos": 2.3225360192155995e+20,
3410
  "train_batch_size": 8,
3411
  "trial_name": null,
3412
  "trial_params": null