VERSIL91 commited on
Commit
eee28e7
1 Parent(s): f8a4529

Training in progress, step 22, checkpoint

Browse files
last-checkpoint/adapter_model.safetensors CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:3fa532b40d5c0681f2baf3e51922c8aea3f5f742de78d5404e2c7fb13b9d1384
3
  size 27024
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:994a6c1ec3dcc1a5db23088000de14b21b89c2b42f8184f6c205cb6e0fb92541
3
  size 27024
last-checkpoint/optimizer.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:c51b96be3eb3a7f526346cab91aa0e09347e5f1b0e2ea7802e6818d1e2a82d14
3
  size 63974
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:966b5efc886d7597a65c3d0119ab91d4beaf99e8e9d77cfe0e8c341359f42811
3
  size 63974
last-checkpoint/rng_state.pth CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:eb04243bb25bf686a4dbec102fd9ca7613ad5c31a6a03dfc0541d4df84930100
3
  size 14244
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:8ed09bfe734c9814025ec917462737624591264937583c93cb36af9436c63d48
3
  size 14244
last-checkpoint/scheduler.pt CHANGED
@@ -1,3 +1,3 @@
1
  version https://git-lfs.github.com/spec/v1
2
- oid sha256:7f110d38baddc83b27dc701018da1279ccbbfd00f24dd48bfba755b5837c286c
3
  size 1064
 
1
  version https://git-lfs.github.com/spec/v1
2
+ oid sha256:1565387b96ec5f770ad7b2f4817988cb2fc09f94ceb911096ae3c468010baf3d
3
  size 1064
last-checkpoint/trainer_state.json CHANGED
@@ -1,9 +1,9 @@
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
- "epoch": 0.8545994065281899,
5
  "eval_steps": 6,
6
- "global_step": 18,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
@@ -165,6 +165,34 @@
165
  "eval_samples_per_second": 464.759,
166
  "eval_steps_per_second": 232.379,
167
  "step": 18
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
 
168
  }
169
  ],
170
  "logging_steps": 1,
@@ -179,12 +207,12 @@
179
  "should_evaluate": false,
180
  "should_log": false,
181
  "should_save": true,
182
- "should_training_stop": false
183
  },
184
  "attributes": {}
185
  }
186
  },
187
- "total_flos": 1432098078720.0,
188
  "train_batch_size": 2,
189
  "trial_name": null,
190
  "trial_params": null
 
1
  {
2
  "best_metric": null,
3
  "best_model_checkpoint": null,
4
+ "epoch": 1.0445103857566767,
5
  "eval_steps": 6,
6
+ "global_step": 22,
7
  "is_hyper_param_search": false,
8
  "is_local_process_zero": true,
9
  "is_world_process_zero": true,
 
165
  "eval_samples_per_second": 464.759,
166
  "eval_steps_per_second": 232.379,
167
  "step": 18
168
+ },
169
+ {
170
+ "epoch": 0.9020771513353115,
171
+ "grad_norm": 0.10974813997745514,
172
+ "learning_rate": 1.4644660940672627e-05,
173
+ "loss": 10.3813,
174
+ "step": 19
175
+ },
176
+ {
177
+ "epoch": 0.9495548961424333,
178
+ "grad_norm": 0.11267790198326111,
179
+ "learning_rate": 6.698729810778065e-06,
180
+ "loss": 10.3783,
181
+ "step": 20
182
+ },
183
+ {
184
+ "epoch": 0.9970326409495549,
185
+ "grad_norm": 0.10685980319976807,
186
+ "learning_rate": 1.70370868554659e-06,
187
+ "loss": 10.3776,
188
+ "step": 21
189
+ },
190
+ {
191
+ "epoch": 1.0445103857566767,
192
+ "grad_norm": 0.26596400141716003,
193
+ "learning_rate": 0.0,
194
+ "loss": 20.1393,
195
+ "step": 22
196
  }
197
  ],
198
  "logging_steps": 1,
 
207
  "should_evaluate": false,
208
  "should_log": false,
209
  "should_save": true,
210
+ "should_training_stop": true
211
  },
212
  "attributes": {}
213
  }
214
  },
215
+ "total_flos": 1765068963840.0,
216
  "train_batch_size": 2,
217
  "trial_name": null,
218
  "trial_params": null