ashanhr commited on
Commit
d43b55a
·
verified ·
1 Parent(s): 00b6962

Training in progress, step 8100, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:fcf23930d595ea17c01a47edb3798852592d31dfd889457b2d9d1c679c37f37c
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:408c6cd9a698b16001a43197f1c14eb97815dc79cf0faf065cdd9fae5f5d7a0c
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:072b89b705b64c5ce2d1ed54311d307c5b17debef3a5bed90eb6ae9ae9aeeb3c
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:29518292804cc1bbadfb0c2d47c708a9ad2a4899919ddbb39ca62659b229c095
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:cc97c919648478a76fc77d365b16ccd77e7685605a6c32a7a953ccbca70c1486
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:c599e77c37f648f3f925b89128d4d5cf7e01fb20ae88a5649c4f9e9edbed6487
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:bd14d107eddb60c68c35f7471eb98fd20b6fccbc6c5557a787e6bfda0de7513c
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2427345fe81496661e617176cf30f03a179e764f54042a1de82a0b0e5e040e18
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:e743b0e6a41e962bc63d9c92583e4d59314ce7fec2d27ef3526e9f1e5a66f4c2
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:4541330612b9d0a7a65c65620d7604d28710aaed0289ff9c20765e43c3bdf6a7
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.3256156598610818,
5
  "eval_steps": 100,
6
- "global_step": 7900,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1271,6 +1271,38 @@
1271
  "eval_samples_per_second": 25.846,
1272
  "eval_steps_per_second": 3.231,
1273
  "step": 7900
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1274
  }
1275
  ],
1276
  "logging_steps": 100,
@@ -1278,7 +1310,7 @@
1278
  "num_input_tokens_seen": 0,
1279
  "num_train_epochs": 30,
1280
  "save_steps": 100,
1281
- "total_flos": 8.66663762734376e+19,
1282
  "train_batch_size": 8,
1283
  "trial_name": null,
1284
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.4098084613765525,
5
  "eval_steps": 100,
6
+ "global_step": 8100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1271
  "eval_samples_per_second": 25.846,
1272
  "eval_steps_per_second": 3.231,
1273
  "step": 7900
1274
+ },
1275
+ {
1276
+ "epoch": 3.37,
1277
+ "grad_norm": 4.01229190826416,
1278
+ "learning_rate": 4.470600706713781e-05,
1279
+ "loss": 2.008,
1280
+ "step": 8000
1281
+ },
1282
+ {
1283
+ "epoch": 3.37,
1284
+ "eval_cer": 0.5001319854522701,
1285
+ "eval_loss": 1.97179114818573,
1286
+ "eval_runtime": 377.7336,
1287
+ "eval_samples_per_second": 25.092,
1288
+ "eval_steps_per_second": 3.137,
1289
+ "step": 8000
1290
+ },
1291
+ {
1292
+ "epoch": 3.41,
1293
+ "grad_norm": 1.3265995979309082,
1294
+ "learning_rate": 4.463533568904594e-05,
1295
+ "loss": 1.7854,
1296
+ "step": 8100
1297
+ },
1298
+ {
1299
+ "epoch": 3.41,
1300
+ "eval_cer": 0.48717785772945915,
1301
+ "eval_loss": 1.6064122915267944,
1302
+ "eval_runtime": 358.3216,
1303
+ "eval_samples_per_second": 26.451,
1304
+ "eval_steps_per_second": 3.307,
1305
+ "step": 8100
1306
  }
1307
  ],
1308
  "logging_steps": 100,
 
1310
  "num_input_tokens_seen": 0,
1311
  "num_train_epochs": 30,
1312
  "save_steps": 100,
1313
+ "total_flos": 8.881279509355615e+19,
1314
  "train_batch_size": 8,
1315
  "trial_name": null,
1316
  "trial_params": null