ashanhr commited on
Commit
5e4e7b6
·
verified ·
1 Parent(s): 395e5b0

Training in progress, step 15900, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:ebec8683b94d94fc8d9612c1c65b13eb71fe6bb5bb5cae5646d15330e2237b15
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1c20ad3beef3f33ef2b4c145c9b9506f9a226a39e2e73693f7ae2fcfc0dcf1cd
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:59bf8a8143b87e68fd71c8a270a03cfa582fc1bfdd2b29a7d15ff8d8b4213d41
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:74d0557152fa6f6a5c4206d20e2e5e3e65c08a54d396cdd08189203105bb821e
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da9ed2936cadc331d13cb99e9dd4c8e3574daaac258fa92af0538fb2ee9612b5
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49a17f38b1772819369863ac91e47ffc5deca1f2b7e3fbb755cc5669eb6f2884
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:0a6d63bd7c4e166ca59e260fbbde05182634cc6b1f0a4d12136bc73308ef55e3
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f938892d18549bfd11854e754003e9c1d26470f2c462d1c3bdac6d13825d7005
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:726650f2459dfa6278e31becc2531b3a3d91514d7f35b02bbac401fc857ec409
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:49c14c6d2163c777317ea1561699a4fab0cc691b0ee652db9a2586b628592aa5
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 6.609134918964428,
5
  "eval_steps": 100,
6
- "global_step": 15700,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -2519,6 +2519,38 @@
2519
  "eval_samples_per_second": 25.949,
2520
  "eval_steps_per_second": 3.244,
2521
  "step": 15700
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
2522
  }
2523
  ],
2524
  "logging_steps": 100,
@@ -2526,7 +2558,7 @@
2526
  "num_input_tokens_seen": 0,
2527
  "num_train_epochs": 30,
2528
  "save_steps": 100,
2529
- "total_flos": 1.7201807515766456e+20,
2530
  "train_batch_size": 8,
2531
  "trial_name": null,
2532
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 6.693327720479899,
5
  "eval_steps": 100,
6
+ "global_step": 15900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
2519
  "eval_samples_per_second": 25.949,
2520
  "eval_steps_per_second": 3.244,
2521
  "step": 15700
2522
+ },
2523
+ {
2524
+ "epoch": 6.65,
2525
+ "grad_norm": 2.1095080375671387,
2526
+ "learning_rate": 3.919505300353357e-05,
2527
+ "loss": 2.2505,
2528
+ "step": 15800
2529
+ },
2530
+ {
2531
+ "epoch": 6.65,
2532
+ "eval_cer": 0.4647232216182394,
2533
+ "eval_loss": 1.8471035957336426,
2534
+ "eval_runtime": 388.913,
2535
+ "eval_samples_per_second": 24.37,
2536
+ "eval_steps_per_second": 3.047,
2537
+ "step": 15800
2538
+ },
2539
+ {
2540
+ "epoch": 6.69,
2541
+ "grad_norm": 3.6614439487457275,
2542
+ "learning_rate": 3.91243816254417e-05,
2543
+ "loss": 1.9341,
2544
+ "step": 15900
2545
+ },
2546
+ {
2547
+ "epoch": 6.69,
2548
+ "eval_cer": 0.4613698134605608,
2549
+ "eval_loss": 2.8519198894500732,
2550
+ "eval_runtime": 363.022,
2551
+ "eval_samples_per_second": 26.109,
2552
+ "eval_steps_per_second": 3.264,
2553
+ "step": 15900
2554
  }
2555
  ],
2556
  "logging_steps": 100,
 
2558
  "num_input_tokens_seen": 0,
2559
  "num_train_epochs": 30,
2560
  "save_steps": 100,
2561
+ "total_flos": 1.7421368190330087e+20,
2562
  "train_batch_size": 8,
2563
  "trial_name": null,
2564
  "trial_params": null