ashanhr commited on
Commit
4820196
1 Parent(s): 92500c1

Training in progress, step 9300, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:9bbee931196582b3943799fea6fad9126c367e3f66b33f31350eb68c8e369ac5
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:e53d952088e21ee7c18f6fee4b8d7769b753af22ca10b63a69e3f8de2b65b0be
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:2b5eea1245176ae49a93bbf3af3ce453f09302734a757c1989c08339e3ac6740
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:76dd5b4a8826aefc4d28f9aba3108c5d9d8a9bb5ddbd6054bc88418f54521ed3
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:1a372c874bbe208d173cdf674b526297d0b78e20ec1684ee1fa2fe9c6285b8b8
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:5bb833d4bc9327bd0e01ea33487a3bf19e25b2de7bacf92eb09217f6b4297dc2
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:29f4043ffeadfb240b8eac65e4ab4f2b6109a3126928822311083ed99f5376bf
3
- size 14503
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:f7b3c74ecbda77c6abb056be85c9b612f31948352a984f5f4ba2d90f864dc773
3
+ size 14567
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:da5eeff1d9d5c6004df620e11b35d3b63eb34d8e5fdfa4922d3c6f9e09140f6a
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:336d1b7b025618b96c60c17b8c9303a9e5e4f82eb8a6c5c17e6b7e590bbf1488
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 3.8307724689539047,
5
  "eval_steps": 100,
6
- "global_step": 9100,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -1463,6 +1463,38 @@
1463
  "eval_samples_per_second": 25.953,
1464
  "eval_steps_per_second": 3.245,
1465
  "step": 9100
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
1466
  }
1467
  ],
1468
  "logging_steps": 100,
@@ -1470,7 +1502,7 @@
1470
  "num_input_tokens_seen": 0,
1471
  "num_train_epochs": 30,
1472
  "save_steps": 100,
1473
- "total_flos": 9.973966921673019e+19,
1474
  "train_batch_size": 8,
1475
  "trial_name": null,
1476
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 3.914965270469375,
5
  "eval_steps": 100,
6
+ "global_step": 9300,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
1463
  "eval_samples_per_second": 25.953,
1464
  "eval_steps_per_second": 3.245,
1465
  "step": 9100
1466
+ },
1467
+ {
1468
+ "epoch": 3.87,
1469
+ "grad_norm": 1.68573796749115,
1470
+ "learning_rate": 4.3857950530035335e-05,
1471
+ "loss": 2.5474,
1472
+ "step": 9200
1473
+ },
1474
+ {
1475
+ "epoch": 3.87,
1476
+ "eval_cer": 0.5048394665832389,
1477
+ "eval_loss": 2.7017831802368164,
1478
+ "eval_runtime": 374.6095,
1479
+ "eval_samples_per_second": 25.301,
1480
+ "eval_steps_per_second": 3.163,
1481
+ "step": 9200
1482
+ },
1483
+ {
1484
+ "epoch": 3.91,
1485
+ "grad_norm": 4.48902702331543,
1486
+ "learning_rate": 4.378727915194347e-05,
1487
+ "loss": 2.2683,
1488
+ "step": 9300
1489
+ },
1490
+ {
1491
+ "epoch": 3.91,
1492
+ "eval_cer": 0.49138917132689375,
1493
+ "eval_loss": 1.5836848020553589,
1494
+ "eval_runtime": 360.7206,
1495
+ "eval_samples_per_second": 26.275,
1496
+ "eval_steps_per_second": 3.285,
1497
+ "step": 9300
1498
  }
1499
  ],
1500
  "logging_steps": 100,
 
1502
  "num_input_tokens_seen": 0,
1503
  "num_train_epochs": 30,
1504
  "save_steps": 100,
1505
+ "total_flos": 1.0193037614760552e+20,
1506
  "train_batch_size": 8,
1507
  "trial_name": null,
1508
  "trial_params": null