ashanhr commited on
Commit
e7600df
·
verified ·
1 Parent(s): 3d913ce

Training in progress, step 25500, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:92d0f54d94bb40c36aff6996d6fa628c3ad8f35db1778fc27c392e8f1f297c37
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:0ab007dd960c183c2e30e85616514e3da6c3660d56809b3a634e02652595cf69
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e11244be255828a4851b0092fd87821c5c801f9a5160bbcbb2f0bc57051dd6c0
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6778800cc796c45ea30ad3a22874ae830638e117c355360ea6c92a3dfd474eab
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:67169462303e6fe8077b6f06672390845332995ce37eb008c9c0bf1f85691316
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e13a3e04c3bc95af16ed8a2dd0f5ca31f7946fe6e95a84fd44ba865570960002
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:95db0728543ded7d217ea539348d5e5e4f1dbdf964d9771ae6b95d5606964667
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:846f249b88befad596e3b348fc9d3af50438f51fe4f7aa809cab2c988dc38c0e
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:917056f6954f4dd224b2ffcefa138533388ad3cff19d51f30a910abd911033b3
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:3e006367d18ff2557748c2cef8502481c62684745dc5fb7eb9b01c6baf1a2184
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 10.650389391707009,
5
  "eval_steps": 100,
6
- "global_step": 25300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -4055,6 +4055,38 @@
4055
  "eval_samples_per_second": 25.82,
4056
  "eval_steps_per_second": 3.228,
4057
  "step": 25300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
4058
  }
4059
  ],
4060
  "logging_steps": 100,
@@ -4062,7 +4094,7 @@
4062
  "num_input_tokens_seen": 0,
4063
  "num_train_epochs": 30,
4064
  "save_steps": 100,
4065
- "total_flos": 2.772097477056956e+20,
4066
  "train_batch_size": 8,
4067
  "trial_name": null,
4068
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 10.73458219322248,
5
  "eval_steps": 100,
6
+ "global_step": 25500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
4055
  "eval_samples_per_second": 25.82,
4056
  "eval_steps_per_second": 3.228,
4057
  "step": 25300
4058
+ },
4059
+ {
4060
+ "epoch": 10.69,
4061
+ "grad_norm": 2.1997592449188232,
4062
+ "learning_rate": 3.241272084805654e-05,
4063
+ "loss": 1.1923,
4064
+ "step": 25400
4065
+ },
4066
+ {
4067
+ "epoch": 10.69,
4068
+ "eval_cer": 0.42187438895623947,
4069
+ "eval_loss": 2.4305357933044434,
4070
+ "eval_runtime": 385.3951,
4071
+ "eval_samples_per_second": 24.593,
4072
+ "eval_steps_per_second": 3.075,
4073
+ "step": 25400
4074
+ },
4075
+ {
4076
+ "epoch": 10.73,
4077
+ "grad_norm": 1.7977826595306396,
4078
+ "learning_rate": 3.234204946996466e-05,
4079
+ "loss": 1.7363,
4080
+ "step": 25500
4081
+ },
4082
+ {
4083
+ "epoch": 10.73,
4084
+ "eval_cer": 0.4337506354855109,
4085
+ "eval_loss": 2.236682176589966,
4086
+ "eval_runtime": 370.2296,
4087
+ "eval_samples_per_second": 25.6,
4088
+ "eval_steps_per_second": 3.201,
4089
+ "step": 25500
4090
  }
4091
  ],
4092
  "logging_steps": 100,
 
4094
  "num_input_tokens_seen": 0,
4095
  "num_train_epochs": 30,
4096
  "save_steps": 100,
4097
+ "total_flos": 2.7941943750609027e+20,
4098
  "train_batch_size": 8,
4099
  "trial_name": null,
4100
  "trial_params": null