Training in progress, epoch 2, checkpoint
Browse files- last-checkpoint/README.md +22 -12
- last-checkpoint/optimizer.pt +1 -1
- last-checkpoint/pytorch_model.bin +1 -1
- last-checkpoint/rng_state.pth +1 -1
- last-checkpoint/scheduler.pt +1 -1
- last-checkpoint/trainer_state.json +153 -3
last-checkpoint/README.md
CHANGED
@@ -538,18 +538,28 @@ You can finetune this model on your own dataset.
|
|
538 |
</details>
|
539 |
|
540 |
### Training Logs
|
541 |
-
| Epoch | Step
|
542 |
-
|
543 |
-
| None | 0
|
544 |
-
| 0.1001 | 734
|
545 |
-
| 0.2001 | 1468
|
546 |
-
| 0.3002 | 2202
|
547 |
-
| 0.4002 | 2936
|
548 |
-
| 0.5003 | 3670
|
549 |
-
| 0.6003 | 4404
|
550 |
-
| 0.7004 | 5138
|
551 |
-
| 0.8004 | 5872
|
552 |
-
| 0.9005 | 6606
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
553 |
|
554 |
|
555 |
### Framework Versions
|
|
|
538 |
</details>
|
539 |
|
540 |
### Training Logs
|
541 |
+
| Epoch | Step | Training Loss | qnli-contrastive loss | nli-pairs loss |
|
542 |
+
|:------:|:-----:|:-------------:|:---------------------:|:--------------:|
|
543 |
+
| None | 0 | - | 20.1737 | 4.0959 |
|
544 |
+
| 0.1001 | 734 | 4.796 | - | - |
|
545 |
+
| 0.2001 | 1468 | 1.3015 | 0.0358 | 0.9115 |
|
546 |
+
| 0.3002 | 2202 | 0.89 | - | - |
|
547 |
+
| 0.4002 | 2936 | 0.716 | 0.0168 | 0.5944 |
|
548 |
+
| 0.5003 | 3670 | 0.6365 | - | - |
|
549 |
+
| 0.6003 | 4404 | 0.5883 | 0.0164 | 0.4975 |
|
550 |
+
| 0.7004 | 5138 | 0.5192 | - | - |
|
551 |
+
| 0.8004 | 5872 | 0.4961 | 0.0288 | 0.4450 |
|
552 |
+
| 0.9005 | 6606 | 0.6035 | - | - |
|
553 |
+
| 1.0005 | 7340 | 0.4733 | 0.0110 | 0.4215 |
|
554 |
+
| 1.1006 | 8074 | 0.4002 | - | - |
|
555 |
+
| 1.2007 | 8808 | 0.3929 | 0.0454 | 0.3796 |
|
556 |
+
| 1.3007 | 9542 | 0.3826 | - | - |
|
557 |
+
| 1.4008 | 10276 | 0.3522 | 0.0178 | 0.3714 |
|
558 |
+
| 1.5008 | 11010 | 0.3627 | - | - |
|
559 |
+
| 1.6009 | 11744 | 0.3553 | 0.0257 | 0.3629 |
|
560 |
+
| 1.7009 | 12478 | 0.3406 | - | - |
|
561 |
+
| 1.8010 | 13212 | 0.3288 | 0.0289 | 0.3575 |
|
562 |
+
| 1.9010 | 13946 | 0.4563 | - | - |
|
563 |
|
564 |
|
565 |
### Framework Versions
|
last-checkpoint/optimizer.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1130520122
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:197eac8569a220596590c77575fe0e465e399356472710a19ab8e45585b2ae6e
|
3 |
size 1130520122
|
last-checkpoint/pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 565251810
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:e6b8f13689928c18ece3856866a5eabeea8661d8106e9cb8141da0943dbf28da
|
3 |
size 565251810
|
last-checkpoint/rng_state.pth
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 14180
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:fd3ebef39758a414d139f600ddff7baf9e47748f2ea2a2338f7aed637daeb655
|
3 |
size 14180
|
last-checkpoint/scheduler.pt
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
size 1064
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:4125f2e2f1951242b702b885a9e93bcaa6d5f26d6f59d55fecae3d75e9474f60
|
3 |
size 1064
|
last-checkpoint/trainer_state.json
CHANGED
@@ -1,9 +1,9 @@
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
-
"epoch":
|
5 |
"eval_steps": 1468,
|
6 |
-
"global_step":
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
@@ -134,6 +134,156 @@
|
|
134 |
"learning_rate": 1.8462745233342613e-05,
|
135 |
"loss": 0.6035,
|
136 |
"step": 6606
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
137 |
}
|
138 |
],
|
139 |
"logging_steps": 734,
|
@@ -148,7 +298,7 @@
|
|
148 |
"should_evaluate": false,
|
149 |
"should_log": false,
|
150 |
"should_save": true,
|
151 |
-
"should_training_stop":
|
152 |
},
|
153 |
"attributes": {}
|
154 |
}
|
|
|
1 |
{
|
2 |
"best_metric": null,
|
3 |
"best_model_checkpoint": null,
|
4 |
+
"epoch": 2.0,
|
5 |
"eval_steps": 1468,
|
6 |
+
"global_step": 14672,
|
7 |
"is_hyper_param_search": false,
|
8 |
"is_local_process_zero": true,
|
9 |
"is_world_process_zero": true,
|
|
|
134 |
"learning_rate": 1.8462745233342613e-05,
|
135 |
"loss": 0.6035,
|
136 |
"step": 6606
|
137 |
+
},
|
138 |
+
{
|
139 |
+
"epoch": 1.000545256270447,
|
140 |
+
"grad_norm": 3.743481397628784,
|
141 |
+
"learning_rate": 1.699267443860664e-05,
|
142 |
+
"loss": 0.4733,
|
143 |
+
"step": 7340
|
144 |
+
},
|
145 |
+
{
|
146 |
+
"epoch": 1.000545256270447,
|
147 |
+
"eval_nli-pairs_loss": 0.4215342402458191,
|
148 |
+
"eval_nli-pairs_runtime": 3.6783,
|
149 |
+
"eval_nli-pairs_samples_per_second": 1850.875,
|
150 |
+
"eval_nli-pairs_steps_per_second": 57.908,
|
151 |
+
"step": 7340
|
152 |
+
},
|
153 |
+
{
|
154 |
+
"epoch": 1.000545256270447,
|
155 |
+
"eval_qnli-contrastive_loss": 0.01100869383662939,
|
156 |
+
"eval_qnli-contrastive_runtime": 3.639,
|
157 |
+
"eval_qnli-contrastive_samples_per_second": 1501.242,
|
158 |
+
"eval_qnli-contrastive_steps_per_second": 46.991,
|
159 |
+
"step": 7340
|
160 |
+
},
|
161 |
+
{
|
162 |
+
"epoch": 1.1005997818974917,
|
163 |
+
"grad_norm": 0.39953914284706116,
|
164 |
+
"learning_rate": 1.513957108680355e-05,
|
165 |
+
"loss": 0.4002,
|
166 |
+
"step": 8074
|
167 |
+
},
|
168 |
+
{
|
169 |
+
"epoch": 1.2006543075245366,
|
170 |
+
"grad_norm": 2.542104482650757,
|
171 |
+
"learning_rate": 1.3004941249978107e-05,
|
172 |
+
"loss": 0.3929,
|
173 |
+
"step": 8808
|
174 |
+
},
|
175 |
+
{
|
176 |
+
"epoch": 1.2006543075245366,
|
177 |
+
"eval_nli-pairs_loss": 0.37960606813430786,
|
178 |
+
"eval_nli-pairs_runtime": 3.5792,
|
179 |
+
"eval_nli-pairs_samples_per_second": 1902.102,
|
180 |
+
"eval_nli-pairs_steps_per_second": 59.511,
|
181 |
+
"step": 8808
|
182 |
+
},
|
183 |
+
{
|
184 |
+
"epoch": 1.2006543075245366,
|
185 |
+
"eval_qnli-contrastive_loss": 0.04537490755319595,
|
186 |
+
"eval_qnli-contrastive_runtime": 3.371,
|
187 |
+
"eval_qnli-contrastive_samples_per_second": 1620.568,
|
188 |
+
"eval_qnli-contrastive_steps_per_second": 50.726,
|
189 |
+
"step": 8808
|
190 |
+
},
|
191 |
+
{
|
192 |
+
"epoch": 1.3007088331515813,
|
193 |
+
"grad_norm": 2.3156607151031494,
|
194 |
+
"learning_rate": 1.0705711968273469e-05,
|
195 |
+
"loss": 0.3826,
|
196 |
+
"step": 9542
|
197 |
+
},
|
198 |
+
{
|
199 |
+
"epoch": 1.400763358778626,
|
200 |
+
"grad_norm": 3.3540971279144287,
|
201 |
+
"learning_rate": 8.370979573663896e-06,
|
202 |
+
"loss": 0.3522,
|
203 |
+
"step": 10276
|
204 |
+
},
|
205 |
+
{
|
206 |
+
"epoch": 1.400763358778626,
|
207 |
+
"eval_nli-pairs_loss": 0.3714284896850586,
|
208 |
+
"eval_nli-pairs_runtime": 3.5826,
|
209 |
+
"eval_nli-pairs_samples_per_second": 1900.32,
|
210 |
+
"eval_nli-pairs_steps_per_second": 59.455,
|
211 |
+
"step": 10276
|
212 |
+
},
|
213 |
+
{
|
214 |
+
"epoch": 1.400763358778626,
|
215 |
+
"eval_qnli-contrastive_loss": 0.017819516360759735,
|
216 |
+
"eval_qnli-contrastive_runtime": 3.4236,
|
217 |
+
"eval_qnli-contrastive_samples_per_second": 1595.701,
|
218 |
+
"eval_qnli-contrastive_steps_per_second": 49.948,
|
219 |
+
"step": 10276
|
220 |
+
},
|
221 |
+
{
|
222 |
+
"epoch": 1.5008178844056705,
|
223 |
+
"grad_norm": 1.3052864074707031,
|
224 |
+
"learning_rate": 6.125236966193413e-06,
|
225 |
+
"loss": 0.3627,
|
226 |
+
"step": 11010
|
227 |
+
},
|
228 |
+
{
|
229 |
+
"epoch": 1.6008724100327154,
|
230 |
+
"grad_norm": 1.00529944896698,
|
231 |
+
"learning_rate": 4.088586072137575e-06,
|
232 |
+
"loss": 0.3553,
|
233 |
+
"step": 11744
|
234 |
+
},
|
235 |
+
{
|
236 |
+
"epoch": 1.6008724100327154,
|
237 |
+
"eval_nli-pairs_loss": 0.3628700077533722,
|
238 |
+
"eval_nli-pairs_runtime": 3.7016,
|
239 |
+
"eval_nli-pairs_samples_per_second": 1839.184,
|
240 |
+
"eval_nli-pairs_steps_per_second": 57.542,
|
241 |
+
"step": 11744
|
242 |
+
},
|
243 |
+
{
|
244 |
+
"epoch": 1.6008724100327154,
|
245 |
+
"eval_qnli-contrastive_loss": 0.025695964694023132,
|
246 |
+
"eval_qnli-contrastive_runtime": 3.365,
|
247 |
+
"eval_qnli-contrastive_samples_per_second": 1623.495,
|
248 |
+
"eval_qnli-contrastive_steps_per_second": 50.818,
|
249 |
+
"step": 11744
|
250 |
+
},
|
251 |
+
{
|
252 |
+
"epoch": 1.70092693565976,
|
253 |
+
"grad_norm": 6.129855632781982,
|
254 |
+
"learning_rate": 2.375740327678049e-06,
|
255 |
+
"loss": 0.3406,
|
256 |
+
"step": 12478
|
257 |
+
},
|
258 |
+
{
|
259 |
+
"epoch": 1.800981461286805,
|
260 |
+
"grad_norm": 0.2667868733406067,
|
261 |
+
"learning_rate": 1.0819684733779468e-06,
|
262 |
+
"loss": 0.3288,
|
263 |
+
"step": 13212
|
264 |
+
},
|
265 |
+
{
|
266 |
+
"epoch": 1.800981461286805,
|
267 |
+
"eval_nli-pairs_loss": 0.3574618101119995,
|
268 |
+
"eval_nli-pairs_runtime": 3.5437,
|
269 |
+
"eval_nli-pairs_samples_per_second": 1921.132,
|
270 |
+
"eval_nli-pairs_steps_per_second": 60.106,
|
271 |
+
"step": 13212
|
272 |
+
},
|
273 |
+
{
|
274 |
+
"epoch": 1.800981461286805,
|
275 |
+
"eval_qnli-contrastive_loss": 0.028892073780298233,
|
276 |
+
"eval_qnli-contrastive_runtime": 3.3304,
|
277 |
+
"eval_qnli-contrastive_samples_per_second": 1640.352,
|
278 |
+
"eval_qnli-contrastive_steps_per_second": 51.345,
|
279 |
+
"step": 13212
|
280 |
+
},
|
281 |
+
{
|
282 |
+
"epoch": 1.9010359869138496,
|
283 |
+
"grad_norm": 5.174046039581299,
|
284 |
+
"learning_rate": 2.746246447818135e-07,
|
285 |
+
"loss": 0.4563,
|
286 |
+
"step": 13946
|
287 |
}
|
288 |
],
|
289 |
"logging_steps": 734,
|
|
|
298 |
"should_evaluate": false,
|
299 |
"should_log": false,
|
300 |
"should_save": true,
|
301 |
+
"should_training_stop": true
|
302 |
},
|
303 |
"attributes": {}
|
304 |
}
|