ashanhr commited on
Commit
828c2b2
·
verified ·
1 Parent(s): 3caa8d1

Training in progress, step 7500, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:930ff99206b4cefd343133517df19a99faeea6029d6177481ccaec88e3dce2a1
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:161c63173b11d278454f76e096130af0ea2e197a6c275cfa04ebf9e466be9c63
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:d6c24714dc5073398cfebd333596cf1b0bae2a24c794815e98acfcf729f25e62
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:24a9ff8292338e7bef2a92a787972e8c312dc811b53b5c8f487afd672c88d3a2
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:a5ace4eb638ee35078677814378e460122eee04667323d7ec66aafc41a0bcf3d
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:effe03ee93685e827f87da010b75e451bf179e605eefd74137f038217c39e61d
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:abe36de378b59c2f69bfa0fcce3a08cc6146802a272942efe4095319e84db456
3
  size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:070566d04626f1a1199d33a844fca7649870a2f1dc42c459cb3ffc8f9fae256c
3
  size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:dd87e1ec2272e10b79236e6de3ce1bcd6d1b8483ab6bbbf6487a56197cdb7bd1
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:baab263627df2d83ddbd983f1bff3bce2c66dd722c961b1e37caa6a98898e772
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.0730372553146705,
5
  "eval_steps": 100,
6
- "global_step": 7300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1175,6 +1175,38 @@
1175
  "eval_samples_per_second": 25.996,
1176
  "eval_steps_per_second": 3.25,
1177
  "step": 7300
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1178
  }
1179
  ],
1180
  "logging_steps": 100,
@@ -1182,7 +1214,7 @@
1182
  "num_input_tokens_seen": 0,
1183
  "num_train_epochs": 30,
1184
  "save_steps": 100,
1185
- "total_flos": 8.009534999457974e+19,
1186
  "train_batch_size": 8,
1187
  "trial_name": null,
1188
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.157230056830141,
5
  "eval_steps": 100,
6
+ "global_step": 7500,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1175
  "eval_samples_per_second": 25.996,
1176
  "eval_steps_per_second": 3.25,
1177
  "step": 7300
1178
+ },
1179
+ {
1180
+ "epoch": 3.12,
1181
+ "grad_norm": 19.71111488342285,
1182
+ "learning_rate": 4.513003533568905e-05,
1183
+ "loss": 1.7744,
1184
+ "step": 7400
1185
+ },
1186
+ {
1187
+ "epoch": 3.12,
1188
+ "eval_cer": 0.5048052481326503,
1189
+ "eval_loss": 2.699909210205078,
1190
+ "eval_runtime": 377.4252,
1191
+ "eval_samples_per_second": 25.112,
1192
+ "eval_steps_per_second": 3.14,
1193
+ "step": 7400
1194
+ },
1195
+ {
1196
+ "epoch": 3.16,
1197
+ "grad_norm": 3.2357943058013916,
1198
+ "learning_rate": 4.505936395759717e-05,
1199
+ "loss": 1.8872,
1200
+ "step": 7500
1201
+ },
1202
+ {
1203
+ "epoch": 3.16,
1204
+ "eval_cer": 0.5123968558132259,
1205
+ "eval_loss": 2.0029726028442383,
1206
+ "eval_runtime": 372.4224,
1207
+ "eval_samples_per_second": 25.45,
1208
+ "eval_steps_per_second": 3.182,
1209
+ "step": 7500
1210
  }
1211
  ],
1212
  "logging_steps": 100,
 
1214
  "num_input_tokens_seen": 0,
1215
  "num_train_epochs": 30,
1216
  "save_steps": 100,
1217
+ "total_flos": 8.232776432614433e+19,
1218
  "train_batch_size": 8,
1219
  "trial_name": null,
1220
  "trial_params": null