ashanhr commited on
Commit
4cd85c1
1 Parent(s): c197f06

Training in progress, step 600, checkpoint

Browse files
last-checkpoint/model-00001-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8d560c592defc59db71fd1f258216fb9592038915fa8276cf7f58ac806225cb4
3
  size 4978139416
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:653ef63911d6161b88b3f9f32df8efdb866ecf55d97fc3febdbed2c5450a3c6e
3
  size 4978139416
last-checkpoint/model-00002-of-00002.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:8403037ca76855de814e44e294a36c871d07b6ce7eb262004265b47b68c23e7b
3
  size 3659223436
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:6e64cd2b2c62bdc327ea06a22049bd82a06e5ae8faac9465953e7b68978aad26
3
  size 3659223436
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:6857b9d85e05e6482fdfa6f15b895b4892978c2e062e96357cd370bc50bbe430
3
  size 17241500333
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:67fcec210fd34c6bf199fbb322d43b9953bbb316718492511d8d57134ad24c1a
3
  size 17241500333
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:63dfeb92128b97ff4e9751e56ee84873463786fe077e2b5c6477027ff804530b
3
- size 14567
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:d3be477733fd09e276311bb65c01459e6a102c492ba2de21b23224cea823214d
3
+ size 14503
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:f9951f9ad49c35aaa5c7497a99df980cb2ba3cc1a085952af21354635556e2f7
3
  size 623
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:2039042aec5b0843d31e4b45065f8954fece58ce2d7d372ebe3c6462b2a5b07c
3
  size 623
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.16838560303094086,
5
  "eval_steps": 100,
6
- "global_step": 400,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -71,6 +71,38 @@
71
  "eval_samples_per_second": 27.769,
72
  "eval_steps_per_second": 3.472,
73
  "step": 400
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
74
  }
75
  ],
76
  "logging_steps": 100,
@@ -78,7 +110,7 @@
78
  "num_input_tokens_seen": 0,
79
  "num_train_epochs": 30,
80
  "save_steps": 100,
81
- "total_flos": 4.435557986072007e+18,
82
  "train_batch_size": 8,
83
  "trial_name": null,
84
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 0.2525784045464113,
5
  "eval_steps": 100,
6
+ "global_step": 600,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
71
  "eval_samples_per_second": 27.769,
72
  "eval_steps_per_second": 3.472,
73
  "step": 400
74
+ },
75
+ {
76
+ "epoch": 0.21,
77
+ "grad_norm": 4.236645698547363,
78
+ "learning_rate": 4.9500000000000004e-05,
79
+ "loss": 4.2363,
80
+ "step": 500
81
+ },
82
+ {
83
+ "epoch": 0.21,
84
+ "eval_cer": 0.624139650385202,
85
+ "eval_loss": 2.457566261291504,
86
+ "eval_runtime": 356.3471,
87
+ "eval_samples_per_second": 26.598,
88
+ "eval_steps_per_second": 3.325,
89
+ "step": 500
90
+ },
91
+ {
92
+ "epoch": 0.25,
93
+ "grad_norm": 13.769584655761719,
94
+ "learning_rate": 4.9932862190812725e-05,
95
+ "loss": 2.6933,
96
+ "step": 600
97
+ },
98
+ {
99
+ "epoch": 0.25,
100
+ "eval_cer": 0.6483369833013961,
101
+ "eval_loss": 3.4720070362091064,
102
+ "eval_runtime": 345.5879,
103
+ "eval_samples_per_second": 27.426,
104
+ "eval_steps_per_second": 3.429,
105
+ "step": 600
106
  }
107
  ],
108
  "logging_steps": 100,
 
110
  "num_input_tokens_seen": 0,
111
  "num_train_epochs": 30,
112
  "save_steps": 100,
113
+ "total_flos": 6.597028028856881e+18,
114
  "train_batch_size": 8,
115
  "trial_name": null,
116
  "trial_params": null