Seth Kulick
commited on
Commit
·
63a0ab7
1
Parent(s):
dd15378
update test
Browse files- loss.tsv +2 -50
- pytorch_model.bin +2 -2
- training.log +53 -784
loss.tsv
CHANGED
@@ -1,51 +1,3 @@
|
|
1 |
EPOCH TIMESTAMP LEARNING_RATE TRAIN_LOSS DEV_LOSS DEV_PRECISION DEV_RECALL DEV_F1 DEV_ACCURACY
|
2 |
-
1
|
3 |
-
2
|
4 |
-
3 14:39:02 0.0000 0.649921288465607 0.247285857796669 0.9518 0.9518 0.9518 0.9518
|
5 |
-
4 14:47:06 0.0000 0.49171242865394643 0.18006576597690582 0.9648 0.9648 0.9648 0.9648
|
6 |
-
5 14:55:21 0.0000 0.42244408257393484 0.15854212641716003 0.9715 0.9715 0.9715 0.9715
|
7 |
-
6 15:02:59 0.0000 0.38888915953073067 0.1478930115699768 0.9729 0.9729 0.9729 0.9729
|
8 |
-
7 15:10:47 0.0000 0.36700320148296955 0.14240729808807373 0.9717 0.9717 0.9717 0.9717
|
9 |
-
8 15:18:56 0.0000 0.34955964649357035 0.13401205837726593 0.9752 0.9752 0.9752 0.9752
|
10 |
-
9 15:27:27 0.0000 0.3329301695936556 0.14190562069416046 0.9764 0.9764 0.9764 0.9764
|
11 |
-
10 15:35:06 0.0000 0.3320703285608999 0.1481310874223709 0.9734 0.9734 0.9734 0.9734
|
12 |
-
11 15:43:05 0.0000 0.3201111280509732 0.16022486984729767 0.9744 0.9744 0.9744 0.9744
|
13 |
-
12 15:50:40 0.0000 0.3146267273126732 0.17038877308368683 0.9764 0.9764 0.9764 0.9764
|
14 |
-
13 15:58:16 0.0000 0.3056304733563553 0.16180633008480072 0.9766 0.9766 0.9766 0.9766
|
15 |
-
14 16:06:46 0.0000 0.2946385615949501 0.1961415857076645 0.9729 0.9729 0.9729 0.9729
|
16 |
-
15 16:15:05 0.0000 0.29707305155274893 0.21415923535823822 0.9737 0.9737 0.9737 0.9737
|
17 |
-
16 16:23:13 0.0000 0.28580348285942486 0.17488490045070648 0.9764 0.9764 0.9764 0.9764
|
18 |
-
17 16:30:47 0.0000 0.28452900538217474 0.1961992233991623 0.9764 0.9764 0.9764 0.9764
|
19 |
-
18 16:38:27 0.0000 0.286532418628909 0.18113288283348083 0.9781 0.9781 0.9781 0.9781
|
20 |
-
19 16:46:56 0.0000 0.2808003542003455 0.2043328434228897 0.9793 0.9793 0.9793 0.9793
|
21 |
-
20 16:55:58 0.0000 0.28836057751744903 0.17976026237010956 0.9798 0.9798 0.9798 0.9798
|
22 |
-
21 17:03:34 0.0000 0.276102740426533 0.20532046258449554 0.9808 0.9808 0.9808 0.9808
|
23 |
-
22 17:11:12 0.0000 0.27382087732996463 0.20975473523139954 0.9771 0.9771 0.9771 0.9771
|
24 |
-
23 17:18:46 0.0000 0.27393156456791734 0.21456189453601837 0.9796 0.9796 0.9796 0.9796
|
25 |
-
24 17:26:20 0.0000 0.2696315985537938 0.21408958733081818 0.9788 0.9788 0.9788 0.9788
|
26 |
-
25 17:34:18 0.0000 0.2604978712176271 0.20778048038482666 0.9801 0.9801 0.9801 0.9801
|
27 |
-
26 17:42:35 0.0000 0.2631420220409018 0.22401468455791473 0.9786 0.9786 0.9786 0.9786
|
28 |
-
27 17:50:17 0.0000 0.2655839982462426 0.2324327975511551 0.9771 0.9771 0.9771 0.9771
|
29 |
-
28 17:57:50 0.0000 0.2628011544988305 0.21640333533287048 0.9803 0.9803 0.9803 0.9803
|
30 |
-
29 18:05:23 0.0000 0.26550006090015277 0.24248327314853668 0.9796 0.9796 0.9796 0.9796
|
31 |
-
30 18:14:03 0.0000 0.2652689226998264 0.23393450677394867 0.9776 0.9776 0.9776 0.9776
|
32 |
-
31 18:21:37 0.0000 0.25939785844109664 0.22774212062358856 0.9788 0.9788 0.9788 0.9788
|
33 |
-
32 18:29:23 0.0000 0.2645543534505578 0.22920973598957062 0.9793 0.9793 0.9793 0.9793
|
34 |
-
33 18:37:33 0.0000 0.255237703400159 0.23627179861068726 0.9791 0.9791 0.9791 0.9791
|
35 |
-
34 18:45:06 0.0000 0.2595120500430324 0.23955273628234863 0.9796 0.9796 0.9796 0.9796
|
36 |
-
35 18:52:39 0.0000 0.2611494515697348 0.24450713396072388 0.9791 0.9791 0.9791 0.9791
|
37 |
-
36 19:00:48 0.0000 0.25360077463430586 0.2530966103076935 0.9788 0.9788 0.9788 0.9788
|
38 |
-
37 19:08:22 0.0000 0.26495934852662506 0.2624962031841278 0.9781 0.9781 0.9781 0.9781
|
39 |
-
38 19:15:52 0.0000 0.25416150340144184 0.24244999885559082 0.9788 0.9788 0.9788 0.9788
|
40 |
-
39 19:23:23 0.0000 0.257929350459372 0.24615894258022308 0.9798 0.9798 0.9798 0.9798
|
41 |
-
40 19:30:54 0.0000 0.25402286565305776 0.2575598359107971 0.9791 0.9791 0.9791 0.9791
|
42 |
-
41 19:38:24 0.0000 0.2508873656720227 0.25127604603767395 0.9786 0.9786 0.9786 0.9786
|
43 |
-
42 19:45:54 0.0000 0.2584042182083517 0.2509002983570099 0.9776 0.9776 0.9776 0.9776
|
44 |
-
43 19:53:27 0.0000 0.2535730162199338 0.25337928533554077 0.9784 0.9784 0.9784 0.9784
|
45 |
-
44 20:01:00 0.0000 0.24615347105615198 0.25915977358818054 0.9784 0.9784 0.9784 0.9784
|
46 |
-
45 20:08:32 0.0000 0.2502548443814474 0.2550533413887024 0.9788 0.9788 0.9788 0.9788
|
47 |
-
46 20:16:06 0.0000 0.24752661908553505 0.2502936124801636 0.9796 0.9796 0.9796 0.9796
|
48 |
-
47 20:23:38 0.0000 0.25003396999949856 0.25260353088378906 0.9788 0.9788 0.9788 0.9788
|
49 |
-
48 20:31:11 0.0000 0.2562181207417887 0.2517630159854889 0.9793 0.9793 0.9793 0.9793
|
50 |
-
49 20:38:45 0.0000 0.2479874323703076 0.25181668996810913 0.9786 0.9786 0.9786 0.9786
|
51 |
-
50 20:46:19 0.0000 0.2503007775652108 0.2513697147369385 0.9784 0.9784 0.9784 0.9784
|
|
|
1 |
EPOCH TIMESTAMP LEARNING_RATE TRAIN_LOSS DEV_LOSS DEV_PRECISION DEV_RECALL DEV_F1 DEV_ACCURACY
|
2 |
+
1 13:29:52 0.0000 1.7334235176429078 0.3509514629840851 0.9331 0.9331 0.9331 0.9331
|
3 |
+
2 13:38:31 0.0000 0.5678914814638107 0.23018118739128113 0.9562 0.9562 0.9562 0.9562
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
pytorch_model.bin
CHANGED
@@ -1,3 +1,3 @@
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
-
oid sha256:
|
3 |
-
size
|
|
|
1 |
version https://git-lfs.github.com/spec/v1
|
2 |
+
oid sha256:3a3e480a68c134d5e7472d163b3430db9b146ed211912fb183a11d40d3a1d542
|
3 |
+
size 1129933867
|
training.log
CHANGED
@@ -1,5 +1,5 @@
|
|
1 |
-
2023-
|
2 |
-
2023-
|
3 |
(embeddings): TransformerWordEmbeddings(
|
4 |
(model): XLMRobertaModel(
|
5 |
(embeddings): XLMRobertaEmbeddings(
|
@@ -313,785 +313,54 @@
|
|
313 |
(loss_function): ViterbiLoss()
|
314 |
(crf): CRF()
|
315 |
)"
|
316 |
-
2023-
|
317 |
-
2023-
|
318 |
-
2023-
|
319 |
-
2023-
|
320 |
-
2023-
|
321 |
-
2023-
|
322 |
-
2023-
|
323 |
-
2023-
|
324 |
-
2023-
|
325 |
-
2023-
|
326 |
-
2023-
|
327 |
-
2023-
|
328 |
-
2023-
|
329 |
-
2023-
|
330 |
-
2023-
|
331 |
-
2023-
|
332 |
-
2023-
|
333 |
-
2023-
|
334 |
-
2023-
|
335 |
-
2023-
|
336 |
-
2023-
|
337 |
-
2023-
|
338 |
-
2023-
|
339 |
-
2023-
|
340 |
-
2023-
|
341 |
-
2023-
|
342 |
-
2023-
|
343 |
-
2023-
|
344 |
-
2023-
|
345 |
-
2023-
|
346 |
-
2023-
|
347 |
-
2023-
|
348 |
-
2023-
|
349 |
-
2023-
|
350 |
-
2023-
|
351 |
-
2023-
|
352 |
-
2023-
|
353 |
-
2023-
|
354 |
-
2023-
|
355 |
-
2023-
|
356 |
-
2023-
|
357 |
-
2023-
|
358 |
-
2023-
|
359 |
-
2023-
|
360 |
-
2023-
|
361 |
-
2023-
|
362 |
-
2023-
|
363 |
-
2023-
|
364 |
-
2023-
|
365 |
-
2023-
|
366 |
-
2023-
|
367 |
-
2023-07-26 14:32:15,374 epoch 3 - iter 24/243 - loss 0.80478615 - time (sec): 44.63 - samples/sec: 181.44 - lr: 0.000021
|
368 |
-
2023-07-26 14:32:59,817 epoch 3 - iter 48/243 - loss 0.76412570 - time (sec): 89.08 - samples/sec: 179.04 - lr: 0.000022
|
369 |
-
2023-07-26 14:33:44,386 epoch 3 - iter 72/243 - loss 0.74620943 - time (sec): 133.64 - samples/sec: 176.74 - lr: 0.000023
|
370 |
-
2023-07-26 14:34:28,788 epoch 3 - iter 96/243 - loss 0.72917808 - time (sec): 178.05 - samples/sec: 175.92 - lr: 0.000024
|
371 |
-
2023-07-26 14:35:13,386 epoch 3 - iter 120/243 - loss 0.72089137 - time (sec): 222.64 - samples/sec: 176.15 - lr: 0.000025
|
372 |
-
2023-07-26 14:35:57,934 epoch 3 - iter 144/243 - loss 0.70075087 - time (sec): 267.19 - samples/sec: 175.65 - lr: 0.000026
|
373 |
-
2023-07-26 14:36:42,264 epoch 3 - iter 168/243 - loss 0.68433087 - time (sec): 311.52 - samples/sec: 174.95 - lr: 0.000027
|
374 |
-
2023-07-26 14:37:26,778 epoch 3 - iter 192/243 - loss 0.67039041 - time (sec): 356.04 - samples/sec: 175.14 - lr: 0.000028
|
375 |
-
2023-07-26 14:38:11,135 epoch 3 - iter 216/243 - loss 0.66061953 - time (sec): 400.39 - samples/sec: 175.13 - lr: 0.000029
|
376 |
-
2023-07-26 14:38:55,563 epoch 3 - iter 240/243 - loss 0.65094446 - time (sec): 444.82 - samples/sec: 174.77 - lr: 0.000030
|
377 |
-
2023-07-26 14:39:00,596 ----------------------------------------------------------------------------------------------------
|
378 |
-
2023-07-26 14:39:00,596 EPOCH 3 done: loss 0.6499 - lr 0.000030
|
379 |
-
2023-07-26 14:39:02,334 Evaluating as a multi-label problem: False
|
380 |
-
2023-07-26 14:39:02,376 DEV : loss 0.247285857796669 - f1-score (micro avg) 0.9518
|
381 |
-
2023-07-26 14:39:02,385 saving best model
|
382 |
-
2023-07-26 14:39:05,730 ----------------------------------------------------------------------------------------------------
|
383 |
-
2023-07-26 14:39:50,093 epoch 4 - iter 24/243 - loss 0.55472967 - time (sec): 44.36 - samples/sec: 176.66 - lr: 0.000031
|
384 |
-
2023-07-26 14:40:34,562 epoch 4 - iter 48/243 - loss 0.52360637 - time (sec): 88.83 - samples/sec: 175.54 - lr: 0.000032
|
385 |
-
2023-07-26 14:41:19,307 epoch 4 - iter 72/243 - loss 0.51655667 - time (sec): 133.58 - samples/sec: 174.54 - lr: 0.000033
|
386 |
-
2023-07-26 14:42:05,307 epoch 4 - iter 96/243 - loss 0.51891961 - time (sec): 179.58 - samples/sec: 173.86 - lr: 0.000034
|
387 |
-
2023-07-26 14:42:54,480 epoch 4 - iter 120/243 - loss 0.50631556 - time (sec): 228.75 - samples/sec: 171.40 - lr: 0.000035
|
388 |
-
2023-07-26 14:43:43,446 epoch 4 - iter 144/243 - loss 0.50459545 - time (sec): 277.72 - samples/sec: 168.74 - lr: 0.000036
|
389 |
-
2023-07-26 14:44:32,519 epoch 4 - iter 168/243 - loss 0.50045519 - time (sec): 326.79 - samples/sec: 167.35 - lr: 0.000037
|
390 |
-
2023-07-26 14:45:21,599 epoch 4 - iter 192/243 - loss 0.49446570 - time (sec): 375.87 - samples/sec: 166.24 - lr: 0.000038
|
391 |
-
2023-07-26 14:46:10,542 epoch 4 - iter 216/243 - loss 0.49218271 - time (sec): 424.81 - samples/sec: 165.38 - lr: 0.000039
|
392 |
-
2023-07-26 14:46:59,284 epoch 4 - iter 240/243 - loss 0.49159525 - time (sec): 473.55 - samples/sec: 164.09 - lr: 0.000040
|
393 |
-
2023-07-26 14:47:04,893 ----------------------------------------------------------------------------------------------------
|
394 |
-
2023-07-26 14:47:04,893 EPOCH 4 done: loss 0.4917 - lr 0.000040
|
395 |
-
2023-07-26 14:47:06,684 Evaluating as a multi-label problem: False
|
396 |
-
2023-07-26 14:47:06,726 DEV : loss 0.18006576597690582 - f1-score (micro avg) 0.9648
|
397 |
-
2023-07-26 14:47:06,736 saving best model
|
398 |
-
2023-07-26 14:47:10,014 ----------------------------------------------------------------------------------------------------
|
399 |
-
2023-07-26 14:47:54,932 epoch 5 - iter 24/243 - loss 0.45058356 - time (sec): 44.92 - samples/sec: 173.25 - lr: 0.000041
|
400 |
-
2023-07-26 14:48:41,950 epoch 5 - iter 48/243 - loss 0.43329992 - time (sec): 91.94 - samples/sec: 169.29 - lr: 0.000042
|
401 |
-
2023-07-26 14:49:33,377 epoch 5 - iter 72/243 - loss 0.43373609 - time (sec): 143.36 - samples/sec: 163.90 - lr: 0.000043
|
402 |
-
2023-07-26 14:50:24,178 epoch 5 - iter 96/243 - loss 0.43090189 - time (sec): 194.16 - samples/sec: 160.68 - lr: 0.000044
|
403 |
-
2023-07-26 14:51:14,713 epoch 5 - iter 120/243 - loss 0.42730629 - time (sec): 244.70 - samples/sec: 158.36 - lr: 0.000045
|
404 |
-
2023-07-26 14:52:05,519 epoch 5 - iter 144/243 - loss 0.42510607 - time (sec): 295.50 - samples/sec: 157.71 - lr: 0.000046
|
405 |
-
2023-07-26 14:52:56,269 epoch 5 - iter 168/243 - loss 0.42354677 - time (sec): 346.25 - samples/sec: 157.30 - lr: 0.000047
|
406 |
-
2023-07-26 14:53:45,024 epoch 5 - iter 192/243 - loss 0.42562343 - time (sec): 395.01 - samples/sec: 157.75 - lr: 0.000048
|
407 |
-
2023-07-26 14:54:29,614 epoch 5 - iter 216/243 - loss 0.42329549 - time (sec): 439.60 - samples/sec: 159.49 - lr: 0.000049
|
408 |
-
2023-07-26 14:55:14,101 epoch 5 - iter 240/243 - loss 0.42313631 - time (sec): 484.09 - samples/sec: 160.63 - lr: 0.000050
|
409 |
-
2023-07-26 14:55:19,182 ----------------------------------------------------------------------------------------------------
|
410 |
-
2023-07-26 14:55:19,183 EPOCH 5 done: loss 0.4224 - lr 0.000050
|
411 |
-
2023-07-26 14:55:20,964 Evaluating as a multi-label problem: False
|
412 |
-
2023-07-26 14:55:21,010 DEV : loss 0.15854212641716003 - f1-score (micro avg) 0.9715
|
413 |
-
2023-07-26 14:55:21,021 saving best model
|
414 |
-
2023-07-26 14:55:24,373 ----------------------------------------------------------------------------------------------------
|
415 |
-
2023-07-26 14:56:09,000 epoch 6 - iter 24/243 - loss 0.38322411 - time (sec): 44.63 - samples/sec: 170.24 - lr: 0.000050
|
416 |
-
2023-07-26 14:56:53,917 epoch 6 - iter 48/243 - loss 0.38879490 - time (sec): 89.54 - samples/sec: 173.84 - lr: 0.000050
|
417 |
-
2023-07-26 14:57:38,715 epoch 6 - iter 72/243 - loss 0.39501775 - time (sec): 134.34 - samples/sec: 173.59 - lr: 0.000050
|
418 |
-
2023-07-26 14:58:23,414 epoch 6 - iter 96/243 - loss 0.39125526 - time (sec): 179.04 - samples/sec: 172.72 - lr: 0.000050
|
419 |
-
2023-07-26 14:59:08,294 epoch 6 - iter 120/243 - loss 0.38810381 - time (sec): 223.92 - samples/sec: 173.39 - lr: 0.000049
|
420 |
-
2023-07-26 14:59:53,048 epoch 6 - iter 144/243 - loss 0.38859919 - time (sec): 268.67 - samples/sec: 173.20 - lr: 0.000049
|
421 |
-
2023-07-26 15:00:37,709 epoch 6 - iter 168/243 - loss 0.39183603 - time (sec): 313.34 - samples/sec: 172.54 - lr: 0.000049
|
422 |
-
2023-07-26 15:01:22,620 epoch 6 - iter 192/243 - loss 0.39172498 - time (sec): 358.25 - samples/sec: 173.10 - lr: 0.000049
|
423 |
-
2023-07-26 15:02:07,658 epoch 6 - iter 216/243 - loss 0.38755663 - time (sec): 403.28 - samples/sec: 173.50 - lr: 0.000049
|
424 |
-
2023-07-26 15:02:52,481 epoch 6 - iter 240/243 - loss 0.38859503 - time (sec): 448.11 - samples/sec: 173.42 - lr: 0.000049
|
425 |
-
2023-07-26 15:02:57,605 ----------------------------------------------------------------------------------------------------
|
426 |
-
2023-07-26 15:02:57,605 EPOCH 6 done: loss 0.3889 - lr 0.000049
|
427 |
-
2023-07-26 15:02:59,359 Evaluating as a multi-label problem: False
|
428 |
-
2023-07-26 15:02:59,401 DEV : loss 0.1478930115699768 - f1-score (micro avg) 0.9729
|
429 |
-
2023-07-26 15:02:59,411 saving best model
|
430 |
-
2023-07-26 15:03:02,642 ----------------------------------------------------------------------------------------------------
|
431 |
-
2023-07-26 15:03:47,204 epoch 7 - iter 24/243 - loss 0.37119833 - time (sec): 44.56 - samples/sec: 170.57 - lr: 0.000049
|
432 |
-
2023-07-26 15:04:32,257 epoch 7 - iter 48/243 - loss 0.34925497 - time (sec): 89.61 - samples/sec: 170.90 - lr: 0.000049
|
433 |
-
2023-07-26 15:05:17,152 epoch 7 - iter 72/243 - loss 0.36339135 - time (sec): 134.51 - samples/sec: 170.74 - lr: 0.000049
|
434 |
-
2023-07-26 15:06:02,168 epoch 7 - iter 96/243 - loss 0.36053250 - time (sec): 179.53 - samples/sec: 172.30 - lr: 0.000048
|
435 |
-
2023-07-26 15:06:47,283 epoch 7 - iter 120/243 - loss 0.36487615 - time (sec): 224.64 - samples/sec: 173.25 - lr: 0.000048
|
436 |
-
2023-07-26 15:07:32,276 epoch 7 - iter 144/243 - loss 0.36319947 - time (sec): 269.63 - samples/sec: 173.36 - lr: 0.000048
|
437 |
-
2023-07-26 15:08:17,184 epoch 7 - iter 168/243 - loss 0.36321272 - time (sec): 314.54 - samples/sec: 173.50 - lr: 0.000048
|
438 |
-
2023-07-26 15:09:02,085 epoch 7 - iter 192/243 - loss 0.36447693 - time (sec): 359.44 - samples/sec: 173.23 - lr: 0.000048
|
439 |
-
2023-07-26 15:09:51,228 epoch 7 - iter 216/243 - loss 0.36744951 - time (sec): 408.59 - samples/sec: 171.35 - lr: 0.000048
|
440 |
-
2023-07-26 15:10:40,287 epoch 7 - iter 240/243 - loss 0.36634157 - time (sec): 457.64 - samples/sec: 169.91 - lr: 0.000048
|
441 |
-
2023-07-26 15:10:45,862 ----------------------------------------------------------------------------------------------------
|
442 |
-
2023-07-26 15:10:45,863 EPOCH 7 done: loss 0.3670 - lr 0.000048
|
443 |
-
2023-07-26 15:10:47,681 Evaluating as a multi-label problem: False
|
444 |
-
2023-07-26 15:10:47,726 DEV : loss 0.14240729808807373 - f1-score (micro avg) 0.9717
|
445 |
-
2023-07-26 15:10:47,736 ----------------------------------------------------------------------------------------------------
|
446 |
-
2023-07-26 15:11:32,421 epoch 8 - iter 24/243 - loss 0.35991738 - time (sec): 44.68 - samples/sec: 171.16 - lr: 0.000048
|
447 |
-
2023-07-26 15:12:16,856 epoch 8 - iter 48/243 - loss 0.34897131 - time (sec): 89.12 - samples/sec: 171.01 - lr: 0.000048
|
448 |
-
2023-07-26 15:13:01,243 epoch 8 - iter 72/243 - loss 0.34258107 - time (sec): 133.51 - samples/sec: 171.82 - lr: 0.000047
|
449 |
-
2023-07-26 15:13:45,557 epoch 8 - iter 96/243 - loss 0.34457191 - time (sec): 177.82 - samples/sec: 171.15 - lr: 0.000047
|
450 |
-
2023-07-26 15:14:33,081 epoch 8 - iter 120/243 - loss 0.34507195 - time (sec): 225.34 - samples/sec: 168.78 - lr: 0.000047
|
451 |
-
2023-07-26 15:15:23,807 epoch 8 - iter 144/243 - loss 0.34828898 - time (sec): 276.07 - samples/sec: 167.52 - lr: 0.000047
|
452 |
-
2023-07-26 15:16:16,673 epoch 8 - iter 168/243 - loss 0.34938445 - time (sec): 328.94 - samples/sec: 163.83 - lr: 0.000047
|
453 |
-
2023-07-26 15:17:08,647 epoch 8 - iter 192/243 - loss 0.34862273 - time (sec): 380.91 - samples/sec: 162.58 - lr: 0.000047
|
454 |
-
2023-07-26 15:17:59,292 epoch 8 - iter 216/243 - loss 0.34977990 - time (sec): 431.56 - samples/sec: 161.50 - lr: 0.000047
|
455 |
-
2023-07-26 15:18:48,823 epoch 8 - iter 240/243 - loss 0.34875804 - time (sec): 481.09 - samples/sec: 161.18 - lr: 0.000047
|
456 |
-
2023-07-26 15:18:54,694 ----------------------------------------------------------------------------------------------------
|
457 |
-
2023-07-26 15:18:54,694 EPOCH 8 done: loss 0.3496 - lr 0.000047
|
458 |
-
2023-07-26 15:18:56,484 Evaluating as a multi-label problem: False
|
459 |
-
2023-07-26 15:18:56,526 DEV : loss 0.13401205837726593 - f1-score (micro avg) 0.9752
|
460 |
-
2023-07-26 15:18:56,536 saving best model
|
461 |
-
2023-07-26 15:18:59,887 ----------------------------------------------------------------------------------------------------
|
462 |
-
2023-07-26 15:19:45,875 epoch 9 - iter 24/243 - loss 0.33211277 - time (sec): 45.99 - samples/sec: 171.57 - lr: 0.000047
|
463 |
-
2023-07-26 15:20:33,843 epoch 9 - iter 48/243 - loss 0.33508629 - time (sec): 93.96 - samples/sec: 171.82 - lr: 0.000046
|
464 |
-
2023-07-26 15:21:26,038 epoch 9 - iter 72/243 - loss 0.32662985 - time (sec): 146.15 - samples/sec: 162.61 - lr: 0.000046
|
465 |
-
2023-07-26 15:22:17,368 epoch 9 - iter 96/243 - loss 0.32958645 - time (sec): 197.48 - samples/sec: 159.51 - lr: 0.000046
|
466 |
-
2023-07-26 15:23:08,277 epoch 9 - iter 120/243 - loss 0.32364185 - time (sec): 248.39 - samples/sec: 157.62 - lr: 0.000046
|
467 |
-
2023-07-26 15:23:59,015 epoch 9 - iter 144/243 - loss 0.32701429 - time (sec): 299.13 - samples/sec: 156.28 - lr: 0.000046
|
468 |
-
2023-07-26 15:24:49,851 epoch 9 - iter 168/243 - loss 0.33017416 - time (sec): 349.96 - samples/sec: 155.73 - lr: 0.000046
|
469 |
-
2023-07-26 15:25:40,830 epoch 9 - iter 192/243 - loss 0.33104299 - time (sec): 400.94 - samples/sec: 156.11 - lr: 0.000046
|
470 |
-
2023-07-26 15:26:30,943 epoch 9 - iter 216/243 - loss 0.33454509 - time (sec): 451.06 - samples/sec: 155.81 - lr: 0.000046
|
471 |
-
2023-07-26 15:27:20,164 epoch 9 - iter 240/243 - loss 0.33386278 - time (sec): 500.28 - samples/sec: 155.37 - lr: 0.000046
|
472 |
-
2023-07-26 15:27:25,781 ----------------------------------------------------------------------------------------------------
|
473 |
-
2023-07-26 15:27:25,782 EPOCH 9 done: loss 0.3329 - lr 0.000046
|
474 |
-
2023-07-26 15:27:27,595 Evaluating as a multi-label problem: False
|
475 |
-
2023-07-26 15:27:27,637 DEV : loss 0.14190562069416046 - f1-score (micro avg) 0.9764
|
476 |
-
2023-07-26 15:27:27,647 saving best model
|
477 |
-
2023-07-26 15:27:31,002 ----------------------------------------------------------------------------------------------------
|
478 |
-
2023-07-26 15:28:16,088 epoch 10 - iter 24/243 - loss 0.34002265 - time (sec): 45.09 - samples/sec: 170.28 - lr: 0.000045
|
479 |
-
2023-07-26 15:29:00,810 epoch 10 - iter 48/243 - loss 0.33540108 - time (sec): 89.81 - samples/sec: 172.64 - lr: 0.000045
|
480 |
-
2023-07-26 15:29:45,833 epoch 10 - iter 72/243 - loss 0.33399184 - time (sec): 134.83 - samples/sec: 173.50 - lr: 0.000045
|
481 |
-
2023-07-26 15:30:30,533 epoch 10 - iter 96/243 - loss 0.32469492 - time (sec): 179.53 - samples/sec: 173.83 - lr: 0.000045
|
482 |
-
2023-07-26 15:31:15,030 epoch 10 - iter 120/243 - loss 0.32910415 - time (sec): 224.03 - samples/sec: 173.44 - lr: 0.000045
|
483 |
-
2023-07-26 15:31:59,646 epoch 10 - iter 144/243 - loss 0.32899582 - time (sec): 268.64 - samples/sec: 173.64 - lr: 0.000045
|
484 |
-
2023-07-26 15:32:44,609 epoch 10 - iter 168/243 - loss 0.33093813 - time (sec): 313.61 - samples/sec: 174.48 - lr: 0.000045
|
485 |
-
2023-07-26 15:33:29,306 epoch 10 - iter 192/243 - loss 0.33208597 - time (sec): 358.30 - samples/sec: 173.78 - lr: 0.000045
|
486 |
-
2023-07-26 15:34:14,223 epoch 10 - iter 216/243 - loss 0.33175324 - time (sec): 403.22 - samples/sec: 174.07 - lr: 0.000045
|
487 |
-
2023-07-26 15:34:58,900 epoch 10 - iter 240/243 - loss 0.33262740 - time (sec): 447.90 - samples/sec: 173.56 - lr: 0.000044
|
488 |
-
2023-07-26 15:35:04,010 ----------------------------------------------------------------------------------------------------
|
489 |
-
2023-07-26 15:35:04,010 EPOCH 10 done: loss 0.3321 - lr 0.000044
|
490 |
-
2023-07-26 15:35:06,264 Evaluating as a multi-label problem: False
|
491 |
-
2023-07-26 15:35:06,306 DEV : loss 0.1481310874223709 - f1-score (micro avg) 0.9734
|
492 |
-
2023-07-26 15:35:06,316 ----------------------------------------------------------------------------------------------------
|
493 |
-
2023-07-26 15:35:51,091 epoch 11 - iter 24/243 - loss 0.33230355 - time (sec): 44.77 - samples/sec: 172.33 - lr: 0.000044
|
494 |
-
2023-07-26 15:36:36,125 epoch 11 - iter 48/243 - loss 0.32441123 - time (sec): 89.81 - samples/sec: 170.71 - lr: 0.000044
|
495 |
-
2023-07-26 15:37:25,279 epoch 11 - iter 72/243 - loss 0.32514673 - time (sec): 138.96 - samples/sec: 167.78 - lr: 0.000044
|
496 |
-
2023-07-26 15:38:10,516 epoch 11 - iter 96/243 - loss 0.32235685 - time (sec): 184.20 - samples/sec: 169.57 - lr: 0.000044
|
497 |
-
2023-07-26 15:38:58,115 epoch 11 - iter 120/243 - loss 0.31705674 - time (sec): 231.80 - samples/sec: 167.98 - lr: 0.000044
|
498 |
-
2023-07-26 15:39:45,447 epoch 11 - iter 144/243 - loss 0.31351156 - time (sec): 279.13 - samples/sec: 166.74 - lr: 0.000044
|
499 |
-
2023-07-26 15:40:32,843 epoch 11 - iter 168/243 - loss 0.31453443 - time (sec): 326.53 - samples/sec: 166.47 - lr: 0.000044
|
500 |
-
2023-07-26 15:41:20,505 epoch 11 - iter 192/243 - loss 0.32048855 - time (sec): 374.19 - samples/sec: 166.74 - lr: 0.000044
|
501 |
-
2023-07-26 15:42:08,594 epoch 11 - iter 216/243 - loss 0.31914298 - time (sec): 422.28 - samples/sec: 166.07 - lr: 0.000043
|
502 |
-
2023-07-26 15:42:58,015 epoch 11 - iter 240/243 - loss 0.31938530 - time (sec): 471.70 - samples/sec: 164.83 - lr: 0.000043
|
503 |
-
2023-07-26 15:43:03,640 ----------------------------------------------------------------------------------------------------
|
504 |
-
2023-07-26 15:43:03,640 EPOCH 11 done: loss 0.3201 - lr 0.000043
|
505 |
-
2023-07-26 15:43:05,491 Evaluating as a multi-label problem: False
|
506 |
-
2023-07-26 15:43:05,538 DEV : loss 0.16022486984729767 - f1-score (micro avg) 0.9744
|
507 |
-
2023-07-26 15:43:05,549 ----------------------------------------------------------------------------------------------------
|
508 |
-
2023-07-26 15:43:51,010 epoch 12 - iter 24/243 - loss 0.30634651 - time (sec): 45.46 - samples/sec: 169.22 - lr: 0.000043
|
509 |
-
2023-07-26 15:44:35,828 epoch 12 - iter 48/243 - loss 0.32055500 - time (sec): 90.28 - samples/sec: 169.40 - lr: 0.000043
|
510 |
-
2023-07-26 15:45:20,616 epoch 12 - iter 72/243 - loss 0.31591461 - time (sec): 135.07 - samples/sec: 170.20 - lr: 0.000043
|
511 |
-
2023-07-26 15:46:05,323 epoch 12 - iter 96/243 - loss 0.31720616 - time (sec): 179.77 - samples/sec: 171.25 - lr: 0.000043
|
512 |
-
2023-07-26 15:46:50,172 epoch 12 - iter 120/243 - loss 0.31877634 - time (sec): 224.62 - samples/sec: 172.25 - lr: 0.000043
|
513 |
-
2023-07-26 15:47:34,948 epoch 12 - iter 144/243 - loss 0.31817728 - time (sec): 269.40 - samples/sec: 172.60 - lr: 0.000043
|
514 |
-
2023-07-26 15:48:19,648 epoch 12 - iter 168/243 - loss 0.31409341 - time (sec): 314.10 - samples/sec: 173.20 - lr: 0.000043
|
515 |
-
2023-07-26 15:49:04,450 epoch 12 - iter 192/243 - loss 0.31475214 - time (sec): 358.90 - samples/sec: 172.72 - lr: 0.000042
|
516 |
-
2023-07-26 15:49:49,156 epoch 12 - iter 216/243 - loss 0.31439205 - time (sec): 403.61 - samples/sec: 173.13 - lr: 0.000042
|
517 |
-
2023-07-26 15:50:33,925 epoch 12 - iter 240/243 - loss 0.31462372 - time (sec): 448.38 - samples/sec: 173.38 - lr: 0.000042
|
518 |
-
2023-07-26 15:50:39,009 ----------------------------------------------------------------------------------------------------
|
519 |
-
2023-07-26 15:50:39,009 EPOCH 12 done: loss 0.3146 - lr 0.000042
|
520 |
-
2023-07-26 15:50:40,760 Evaluating as a multi-label problem: False
|
521 |
-
2023-07-26 15:50:40,803 DEV : loss 0.17038877308368683 - f1-score (micro avg) 0.9764
|
522 |
-
2023-07-26 15:50:40,813 ----------------------------------------------------------------------------------------------------
|
523 |
-
2023-07-26 15:51:25,228 epoch 13 - iter 24/243 - loss 0.30871471 - time (sec): 44.42 - samples/sec: 169.20 - lr: 0.000042
|
524 |
-
2023-07-26 15:52:09,735 epoch 13 - iter 48/243 - loss 0.30951571 - time (sec): 88.92 - samples/sec: 169.92 - lr: 0.000042
|
525 |
-
2023-07-26 15:52:54,713 epoch 13 - iter 72/243 - loss 0.30146253 - time (sec): 133.90 - samples/sec: 170.69 - lr: 0.000042
|
526 |
-
2023-07-26 15:53:39,688 epoch 13 - iter 96/243 - loss 0.29818491 - time (sec): 178.88 - samples/sec: 171.59 - lr: 0.000042
|
527 |
-
2023-07-26 15:54:24,347 epoch 13 - iter 120/243 - loss 0.29829818 - time (sec): 223.53 - samples/sec: 171.45 - lr: 0.000042
|
528 |
-
2023-07-26 15:55:09,312 epoch 13 - iter 144/243 - loss 0.31111593 - time (sec): 268.50 - samples/sec: 171.76 - lr: 0.000042
|
529 |
-
2023-07-26 15:55:54,240 epoch 13 - iter 168/243 - loss 0.31147702 - time (sec): 313.43 - samples/sec: 171.94 - lr: 0.000041
|
530 |
-
2023-07-26 15:56:39,090 epoch 13 - iter 192/243 - loss 0.30976085 - time (sec): 358.28 - samples/sec: 172.90 - lr: 0.000041
|
531 |
-
2023-07-26 15:57:24,278 epoch 13 - iter 216/243 - loss 0.30904370 - time (sec): 403.46 - samples/sec: 173.00 - lr: 0.000041
|
532 |
-
2023-07-26 15:58:09,133 epoch 13 - iter 240/243 - loss 0.30572837 - time (sec): 448.32 - samples/sec: 173.40 - lr: 0.000041
|
533 |
-
2023-07-26 15:58:14,202 ----------------------------------------------------------------------------------------------------
|
534 |
-
2023-07-26 15:58:14,202 EPOCH 13 done: loss 0.3056 - lr 0.000041
|
535 |
-
2023-07-26 15:58:15,991 Evaluating as a multi-label problem: False
|
536 |
-
2023-07-26 15:58:16,034 DEV : loss 0.16180633008480072 - f1-score (micro avg) 0.9766
|
537 |
-
2023-07-26 15:58:16,044 saving best model
|
538 |
-
2023-07-26 15:58:19,355 ----------------------------------------------------------------------------------------------------
|
539 |
-
2023-07-26 15:59:06,668 epoch 14 - iter 24/243 - loss 0.28577045 - time (sec): 47.31 - samples/sec: 164.50 - lr: 0.000041
|
540 |
-
2023-07-26 15:59:56,998 epoch 14 - iter 48/243 - loss 0.28369661 - time (sec): 97.64 - samples/sec: 158.27 - lr: 0.000041
|
541 |
-
2023-07-26 16:00:51,211 epoch 14 - iter 72/243 - loss 0.29071442 - time (sec): 151.86 - samples/sec: 153.53 - lr: 0.000041
|
542 |
-
2023-07-26 16:01:43,557 epoch 14 - iter 96/243 - loss 0.29219267 - time (sec): 204.20 - samples/sec: 154.01 - lr: 0.000041
|
543 |
-
2023-07-26 16:02:32,810 epoch 14 - iter 120/243 - loss 0.29452027 - time (sec): 253.45 - samples/sec: 154.42 - lr: 0.000041
|
544 |
-
2023-07-26 16:03:22,073 epoch 14 - iter 144/243 - loss 0.28860385 - time (sec): 302.72 - samples/sec: 154.60 - lr: 0.000040
|
545 |
-
2023-07-26 16:04:11,432 epoch 14 - iter 168/243 - loss 0.29040567 - time (sec): 352.08 - samples/sec: 155.10 - lr: 0.000040
|
546 |
-
2023-07-26 16:05:00,439 epoch 14 - iter 192/243 - loss 0.29057669 - time (sec): 401.08 - samples/sec: 155.56 - lr: 0.000040
|
547 |
-
2023-07-26 16:05:49,734 epoch 14 - iter 216/243 - loss 0.29351512 - time (sec): 450.38 - samples/sec: 155.77 - lr: 0.000040
|
548 |
-
2023-07-26 16:06:38,920 epoch 14 - iter 240/243 - loss 0.29475470 - time (sec): 499.56 - samples/sec: 155.77 - lr: 0.000040
|
549 |
-
2023-07-26 16:06:44,452 ----------------------------------------------------------------------------------------------------
|
550 |
-
2023-07-26 16:06:44,452 EPOCH 14 done: loss 0.2946 - lr 0.000040
|
551 |
-
2023-07-26 16:06:46,282 Evaluating as a multi-label problem: False
|
552 |
-
2023-07-26 16:06:46,328 DEV : loss 0.1961415857076645 - f1-score (micro avg) 0.9729
|
553 |
-
2023-07-26 16:06:46,338 ----------------------------------------------------------------------------------------------------
|
554 |
-
2023-07-26 16:07:31,298 epoch 15 - iter 24/243 - loss 0.32628632 - time (sec): 44.96 - samples/sec: 171.98 - lr: 0.000040
|
555 |
-
2023-07-26 16:08:21,094 epoch 15 - iter 48/243 - loss 0.30408958 - time (sec): 94.76 - samples/sec: 164.10 - lr: 0.000040
|
556 |
-
2023-07-26 16:09:15,364 epoch 15 - iter 72/243 - loss 0.29750206 - time (sec): 149.03 - samples/sec: 157.51 - lr: 0.000040
|
557 |
-
2023-07-26 16:10:06,024 epoch 15 - iter 96/243 - loss 0.29760832 - time (sec): 199.69 - samples/sec: 155.97 - lr: 0.000040
|
558 |
-
2023-07-26 16:10:56,205 epoch 15 - iter 120/243 - loss 0.29974418 - time (sec): 249.87 - samples/sec: 155.76 - lr: 0.000039
|
559 |
-
2023-07-26 16:11:43,301 epoch 15 - iter 144/243 - loss 0.29904887 - time (sec): 296.96 - samples/sec: 157.05 - lr: 0.000039
|
560 |
-
2023-07-26 16:12:31,170 epoch 15 - iter 168/243 - loss 0.29894209 - time (sec): 344.83 - samples/sec: 157.73 - lr: 0.000039
|
561 |
-
2023-07-26 16:13:20,187 epoch 15 - iter 192/243 - loss 0.29754010 - time (sec): 393.85 - samples/sec: 157.85 - lr: 0.000039
|
562 |
-
2023-07-26 16:14:09,012 epoch 15 - iter 216/243 - loss 0.29884402 - time (sec): 442.67 - samples/sec: 157.79 - lr: 0.000039
|
563 |
-
2023-07-26 16:14:57,878 epoch 15 - iter 240/243 - loss 0.29706337 - time (sec): 491.54 - samples/sec: 158.08 - lr: 0.000039
|
564 |
-
2023-07-26 16:15:03,351 ----------------------------------------------------------------------------------------------------
|
565 |
-
2023-07-26 16:15:03,351 EPOCH 15 done: loss 0.2971 - lr 0.000039
|
566 |
-
2023-07-26 16:15:05,134 Evaluating as a multi-label problem: False
|
567 |
-
2023-07-26 16:15:05,176 DEV : loss 0.21415923535823822 - f1-score (micro avg) 0.9737
|
568 |
-
2023-07-26 16:15:05,186 ----------------------------------------------------------------------------------------------------
|
569 |
-
2023-07-26 16:15:50,049 epoch 16 - iter 24/243 - loss 0.32918671 - time (sec): 44.86 - samples/sec: 172.79 - lr: 0.000039
|
570 |
-
2023-07-26 16:16:34,768 epoch 16 - iter 48/243 - loss 0.30668793 - time (sec): 89.58 - samples/sec: 172.52 - lr: 0.000039
|
571 |
-
2023-07-26 16:17:19,891 epoch 16 - iter 72/243 - loss 0.30165600 - time (sec): 134.70 - samples/sec: 171.72 - lr: 0.000039
|
572 |
-
2023-07-26 16:18:09,624 epoch 16 - iter 96/243 - loss 0.29977956 - time (sec): 184.44 - samples/sec: 168.02 - lr: 0.000038
|
573 |
-
2023-07-26 16:18:58,935 epoch 16 - iter 120/243 - loss 0.29035278 - time (sec): 233.75 - samples/sec: 165.52 - lr: 0.000038
|
574 |
-
2023-07-26 16:19:48,358 epoch 16 - iter 144/243 - loss 0.28688344 - time (sec): 283.17 - samples/sec: 164.52 - lr: 0.000038
|
575 |
-
2023-07-26 16:20:37,728 epoch 16 - iter 168/243 - loss 0.28573744 - time (sec): 332.54 - samples/sec: 163.65 - lr: 0.000038
|
576 |
-
2023-07-26 16:21:26,994 epoch 16 - iter 192/243 - loss 0.28483557 - time (sec): 381.81 - samples/sec: 162.65 - lr: 0.000038
|
577 |
-
2023-07-26 16:22:16,480 epoch 16 - iter 216/243 - loss 0.28487700 - time (sec): 431.29 - samples/sec: 162.23 - lr: 0.000038
|
578 |
-
2023-07-26 16:23:05,837 epoch 16 - iter 240/243 - loss 0.28570848 - time (sec): 480.65 - samples/sec: 161.78 - lr: 0.000038
|
579 |
-
2023-07-26 16:23:11,437 ----------------------------------------------------------------------------------------------------
|
580 |
-
2023-07-26 16:23:11,437 EPOCH 16 done: loss 0.2858 - lr 0.000038
|
581 |
-
2023-07-26 16:23:13,234 Evaluating as a multi-label problem: False
|
582 |
-
2023-07-26 16:23:13,276 DEV : loss 0.17488490045070648 - f1-score (micro avg) 0.9764
|
583 |
-
2023-07-26 16:23:13,286 ----------------------------------------------------------------------------------------------------
|
584 |
-
2023-07-26 16:23:58,069 epoch 17 - iter 24/243 - loss 0.28223418 - time (sec): 44.78 - samples/sec: 169.35 - lr: 0.000038
|
585 |
-
2023-07-26 16:24:42,914 epoch 17 - iter 48/243 - loss 0.28773045 - time (sec): 89.63 - samples/sec: 170.29 - lr: 0.000038
|
586 |
-
2023-07-26 16:25:28,001 epoch 17 - iter 72/243 - loss 0.28949629 - time (sec): 134.72 - samples/sec: 171.86 - lr: 0.000037
|
587 |
-
2023-07-26 16:26:12,604 epoch 17 - iter 96/243 - loss 0.29081122 - time (sec): 179.32 - samples/sec: 172.97 - lr: 0.000037
|
588 |
-
2023-07-26 16:26:57,287 epoch 17 - iter 120/243 - loss 0.28910214 - time (sec): 224.00 - samples/sec: 173.28 - lr: 0.000037
|
589 |
-
2023-07-26 16:27:41,994 epoch 17 - iter 144/243 - loss 0.28813940 - time (sec): 268.71 - samples/sec: 173.98 - lr: 0.000037
|
590 |
-
2023-07-26 16:28:26,701 epoch 17 - iter 168/243 - loss 0.28649377 - time (sec): 313.42 - samples/sec: 174.08 - lr: 0.000037
|
591 |
-
2023-07-26 16:29:11,540 epoch 17 - iter 192/243 - loss 0.28690817 - time (sec): 358.25 - samples/sec: 174.44 - lr: 0.000037
|
592 |
-
2023-07-26 16:29:56,114 epoch 17 - iter 216/243 - loss 0.28529445 - time (sec): 402.83 - samples/sec: 173.78 - lr: 0.000037
|
593 |
-
2023-07-26 16:30:40,993 epoch 17 - iter 240/243 - loss 0.28495055 - time (sec): 447.71 - samples/sec: 173.50 - lr: 0.000037
|
594 |
-
2023-07-26 16:30:46,121 ----------------------------------------------------------------------------------------------------
|
595 |
-
2023-07-26 16:30:46,122 EPOCH 17 done: loss 0.2845 - lr 0.000037
|
596 |
-
2023-07-26 16:30:47,874 Evaluating as a multi-label problem: False
|
597 |
-
2023-07-26 16:30:47,918 DEV : loss 0.1961992233991623 - f1-score (micro avg) 0.9764
|
598 |
-
2023-07-26 16:30:47,928 ----------------------------------------------------------------------------------------------------
|
599 |
-
2023-07-26 16:31:33,170 epoch 18 - iter 24/243 - loss 0.28778804 - time (sec): 45.24 - samples/sec: 183.77 - lr: 0.000037
|
600 |
-
2023-07-26 16:32:17,561 epoch 18 - iter 48/243 - loss 0.28633144 - time (sec): 89.63 - samples/sec: 178.16 - lr: 0.000036
|
601 |
-
2023-07-26 16:33:02,262 epoch 18 - iter 72/243 - loss 0.28829018 - time (sec): 134.33 - samples/sec: 176.29 - lr: 0.000036
|
602 |
-
2023-07-26 16:33:47,023 epoch 18 - iter 96/243 - loss 0.28737825 - time (sec): 179.10 - samples/sec: 176.55 - lr: 0.000036
|
603 |
-
2023-07-26 16:34:31,632 epoch 18 - iter 120/243 - loss 0.28870528 - time (sec): 223.70 - samples/sec: 176.96 - lr: 0.000036
|
604 |
-
2023-07-26 16:35:16,249 epoch 18 - iter 144/243 - loss 0.28536506 - time (sec): 268.32 - samples/sec: 176.48 - lr: 0.000036
|
605 |
-
2023-07-26 16:36:01,090 epoch 18 - iter 168/243 - loss 0.28612314 - time (sec): 313.16 - samples/sec: 175.92 - lr: 0.000036
|
606 |
-
2023-07-26 16:36:46,062 epoch 18 - iter 192/243 - loss 0.28681958 - time (sec): 358.13 - samples/sec: 174.98 - lr: 0.000036
|
607 |
-
2023-07-26 16:37:31,063 epoch 18 - iter 216/243 - loss 0.28815101 - time (sec): 403.14 - samples/sec: 174.53 - lr: 0.000036
|
608 |
-
2023-07-26 16:38:19,082 epoch 18 - iter 240/243 - loss 0.28697818 - time (sec): 451.15 - samples/sec: 172.46 - lr: 0.000036
|
609 |
-
2023-07-26 16:38:24,758 ----------------------------------------------------------------------------------------------------
|
610 |
-
2023-07-26 16:38:24,759 EPOCH 18 done: loss 0.2865 - lr 0.000036
|
611 |
-
2023-07-26 16:38:27,073 Evaluating as a multi-label problem: False
|
612 |
-
2023-07-26 16:38:27,115 DEV : loss 0.18113288283348083 - f1-score (micro avg) 0.9781
|
613 |
-
2023-07-26 16:38:27,126 saving best model
|
614 |
-
2023-07-26 16:38:30,288 ----------------------------------------------------------------------------------------------------
|
615 |
-
2023-07-26 16:39:21,782 epoch 19 - iter 24/243 - loss 0.28138164 - time (sec): 51.49 - samples/sec: 154.23 - lr: 0.000036
|
616 |
-
2023-07-26 16:40:12,650 epoch 19 - iter 48/243 - loss 0.28992986 - time (sec): 102.36 - samples/sec: 150.35 - lr: 0.000035
|
617 |
-
2023-07-26 16:41:02,164 epoch 19 - iter 72/243 - loss 0.28244605 - time (sec): 151.88 - samples/sec: 152.82 - lr: 0.000035
|
618 |
-
2023-07-26 16:41:52,390 epoch 19 - iter 96/243 - loss 0.28642854 - time (sec): 202.10 - samples/sec: 152.66 - lr: 0.000035
|
619 |
-
2023-07-26 16:42:44,635 epoch 19 - iter 120/243 - loss 0.28768114 - time (sec): 254.35 - samples/sec: 151.96 - lr: 0.000035
|
620 |
-
2023-07-26 16:43:33,907 epoch 19 - iter 144/243 - loss 0.28722806 - time (sec): 303.62 - samples/sec: 153.22 - lr: 0.000035
|
621 |
-
2023-07-26 16:44:23,000 epoch 19 - iter 168/243 - loss 0.28477685 - time (sec): 352.71 - samples/sec: 154.35 - lr: 0.000035
|
622 |
-
2023-07-26 16:45:11,847 epoch 19 - iter 192/243 - loss 0.28564618 - time (sec): 401.56 - samples/sec: 155.01 - lr: 0.000035
|
623 |
-
2023-07-26 16:46:00,662 epoch 19 - iter 216/243 - loss 0.28166734 - time (sec): 450.37 - samples/sec: 155.14 - lr: 0.000035
|
624 |
-
2023-07-26 16:46:49,519 epoch 19 - iter 240/243 - loss 0.28044622 - time (sec): 499.23 - samples/sec: 155.64 - lr: 0.000035
|
625 |
-
2023-07-26 16:46:55,052 ----------------------------------------------------------------------------------------------------
|
626 |
-
2023-07-26 16:46:55,052 EPOCH 19 done: loss 0.2808 - lr 0.000035
|
627 |
-
2023-07-26 16:46:56,840 Evaluating as a multi-label problem: False
|
628 |
-
2023-07-26 16:46:56,881 DEV : loss 0.2043328434228897 - f1-score (micro avg) 0.9793
|
629 |
-
2023-07-26 16:46:56,891 saving best model
|
630 |
-
2023-07-26 16:47:00,311 ----------------------------------------------------------------------------------------------------
|
631 |
-
2023-07-26 16:47:53,948 epoch 20 - iter 24/243 - loss 0.28666954 - time (sec): 53.64 - samples/sec: 145.20 - lr: 0.000034
|
632 |
-
2023-07-26 16:48:47,747 epoch 20 - iter 48/243 - loss 0.29481761 - time (sec): 107.44 - samples/sec: 143.54 - lr: 0.000034
|
633 |
-
2023-07-26 16:49:41,712 epoch 20 - iter 72/243 - loss 0.29914317 - time (sec): 161.40 - samples/sec: 143.67 - lr: 0.000034
|
634 |
-
2023-07-26 16:50:34,164 epoch 20 - iter 96/243 - loss 0.29393948 - time (sec): 213.85 - samples/sec: 144.15 - lr: 0.000034
|
635 |
-
2023-07-26 16:51:26,758 epoch 20 - iter 120/243 - loss 0.29259273 - time (sec): 266.45 - samples/sec: 144.69 - lr: 0.000034
|
636 |
-
2023-07-26 16:52:19,496 epoch 20 - iter 144/243 - loss 0.29189521 - time (sec): 319.18 - samples/sec: 145.56 - lr: 0.000034
|
637 |
-
2023-07-26 16:53:12,248 epoch 20 - iter 168/243 - loss 0.29174956 - time (sec): 371.94 - samples/sec: 146.27 - lr: 0.000034
|
638 |
-
2023-07-26 16:54:04,770 epoch 20 - iter 192/243 - loss 0.28991116 - time (sec): 424.46 - samples/sec: 146.24 - lr: 0.000034
|
639 |
-
2023-07-26 16:54:57,220 epoch 20 - iter 216/243 - loss 0.28908421 - time (sec): 476.91 - samples/sec: 146.03 - lr: 0.000034
|
640 |
-
2023-07-26 16:55:50,110 epoch 20 - iter 240/243 - loss 0.28802142 - time (sec): 529.80 - samples/sec: 146.82 - lr: 0.000033
|
641 |
-
2023-07-26 16:55:56,063 ----------------------------------------------------------------------------------------------------
|
642 |
-
2023-07-26 16:55:56,064 EPOCH 20 done: loss 0.2884 - lr 0.000033
|
643 |
-
2023-07-26 16:55:58,153 Evaluating as a multi-label problem: False
|
644 |
-
2023-07-26 16:55:58,197 DEV : loss 0.17976026237010956 - f1-score (micro avg) 0.9798
|
645 |
-
2023-07-26 16:55:58,210 saving best model
|
646 |
-
2023-07-26 16:56:01,163 ----------------------------------------------------------------------------------------------------
|
647 |
-
2023-07-26 16:56:45,917 epoch 21 - iter 24/243 - loss 0.27074814 - time (sec): 44.75 - samples/sec: 174.64 - lr: 0.000033
|
648 |
-
2023-07-26 16:57:30,503 epoch 21 - iter 48/243 - loss 0.27757152 - time (sec): 89.34 - samples/sec: 172.96 - lr: 0.000033
|
649 |
-
2023-07-26 16:58:15,097 epoch 21 - iter 72/243 - loss 0.27454337 - time (sec): 133.93 - samples/sec: 173.08 - lr: 0.000033
|
650 |
-
2023-07-26 16:58:59,717 epoch 21 - iter 96/243 - loss 0.27609707 - time (sec): 178.55 - samples/sec: 172.80 - lr: 0.000033
|
651 |
-
2023-07-26 16:59:44,372 epoch 21 - iter 120/243 - loss 0.27224083 - time (sec): 223.21 - samples/sec: 172.96 - lr: 0.000033
|
652 |
-
2023-07-26 17:00:29,083 epoch 21 - iter 144/243 - loss 0.27850149 - time (sec): 267.92 - samples/sec: 172.72 - lr: 0.000033
|
653 |
-
2023-07-26 17:01:13,636 epoch 21 - iter 168/243 - loss 0.27696398 - time (sec): 312.47 - samples/sec: 172.79 - lr: 0.000033
|
654 |
-
2023-07-26 17:01:58,291 epoch 21 - iter 192/243 - loss 0.27664755 - time (sec): 357.13 - samples/sec: 172.80 - lr: 0.000033
|
655 |
-
2023-07-26 17:02:43,178 epoch 21 - iter 216/243 - loss 0.27558848 - time (sec): 402.01 - samples/sec: 173.76 - lr: 0.000032
|
656 |
-
2023-07-26 17:03:27,865 epoch 21 - iter 240/243 - loss 0.27583214 - time (sec): 446.70 - samples/sec: 173.99 - lr: 0.000032
|
657 |
-
2023-07-26 17:03:32,964 ----------------------------------------------------------------------------------------------------
|
658 |
-
2023-07-26 17:03:32,964 EPOCH 21 done: loss 0.2761 - lr 0.000032
|
659 |
-
2023-07-26 17:03:34,719 Evaluating as a multi-label problem: False
|
660 |
-
2023-07-26 17:03:34,761 DEV : loss 0.20532046258449554 - f1-score (micro avg) 0.9808
|
661 |
-
2023-07-26 17:03:34,770 saving best model
|
662 |
-
2023-07-26 17:03:38,172 ----------------------------------------------------------------------------------------------------
|
663 |
-
2023-07-26 17:04:22,817 epoch 22 - iter 24/243 - loss 0.27909847 - time (sec): 44.64 - samples/sec: 173.08 - lr: 0.000032
|
664 |
-
2023-07-26 17:05:07,696 epoch 22 - iter 48/243 - loss 0.27692541 - time (sec): 89.52 - samples/sec: 175.94 - lr: 0.000032
|
665 |
-
2023-07-26 17:05:52,516 epoch 22 - iter 72/243 - loss 0.27632545 - time (sec): 134.34 - samples/sec: 175.36 - lr: 0.000032
|
666 |
-
2023-07-26 17:06:37,349 epoch 22 - iter 96/243 - loss 0.27607549 - time (sec): 179.18 - samples/sec: 175.31 - lr: 0.000032
|
667 |
-
2023-07-26 17:07:22,028 epoch 22 - iter 120/243 - loss 0.27687957 - time (sec): 223.85 - samples/sec: 175.34 - lr: 0.000032
|
668 |
-
2023-07-26 17:08:06,628 epoch 22 - iter 144/243 - loss 0.27294774 - time (sec): 268.46 - samples/sec: 174.93 - lr: 0.000032
|
669 |
-
2023-07-26 17:08:51,184 epoch 22 - iter 168/243 - loss 0.27391471 - time (sec): 313.01 - samples/sec: 174.15 - lr: 0.000032
|
670 |
-
2023-07-26 17:09:35,805 epoch 22 - iter 192/243 - loss 0.27352263 - time (sec): 357.63 - samples/sec: 174.01 - lr: 0.000031
|
671 |
-
2023-07-26 17:10:20,566 epoch 22 - iter 216/243 - loss 0.27144978 - time (sec): 402.39 - samples/sec: 174.04 - lr: 0.000031
|
672 |
-
2023-07-26 17:11:05,178 epoch 22 - iter 240/243 - loss 0.27338785 - time (sec): 447.01 - samples/sec: 173.85 - lr: 0.000031
|
673 |
-
2023-07-26 17:11:10,275 ----------------------------------------------------------------------------------------------------
|
674 |
-
2023-07-26 17:11:10,275 EPOCH 22 done: loss 0.2738 - lr 0.000031
|
675 |
-
2023-07-26 17:11:12,042 Evaluating as a multi-label problem: False
|
676 |
-
2023-07-26 17:11:12,084 DEV : loss 0.20975473523139954 - f1-score (micro avg) 0.9771
|
677 |
-
2023-07-26 17:11:12,094 ----------------------------------------------------------------------------------------------------
|
678 |
-
2023-07-26 17:11:57,033 epoch 23 - iter 24/243 - loss 0.28534317 - time (sec): 44.94 - samples/sec: 175.57 - lr: 0.000031
|
679 |
-
2023-07-26 17:12:41,709 epoch 23 - iter 48/243 - loss 0.28084455 - time (sec): 89.61 - samples/sec: 173.42 - lr: 0.000031
|
680 |
-
2023-07-26 17:13:26,426 epoch 23 - iter 72/243 - loss 0.28011749 - time (sec): 134.33 - samples/sec: 173.68 - lr: 0.000031
|
681 |
-
2023-07-26 17:14:10,996 epoch 23 - iter 96/243 - loss 0.28443955 - time (sec): 178.90 - samples/sec: 173.25 - lr: 0.000031
|
682 |
-
2023-07-26 17:14:55,898 epoch 23 - iter 120/243 - loss 0.28290269 - time (sec): 223.80 - samples/sec: 173.90 - lr: 0.000031
|
683 |
-
2023-07-26 17:15:40,508 epoch 23 - iter 144/243 - loss 0.28079246 - time (sec): 268.41 - samples/sec: 173.44 - lr: 0.000031
|
684 |
-
2023-07-26 17:16:25,384 epoch 23 - iter 168/243 - loss 0.27982769 - time (sec): 313.29 - samples/sec: 173.93 - lr: 0.000030
|
685 |
-
2023-07-26 17:17:10,020 epoch 23 - iter 192/243 - loss 0.27685678 - time (sec): 357.93 - samples/sec: 173.50 - lr: 0.000030
|
686 |
-
2023-07-26 17:17:54,847 epoch 23 - iter 216/243 - loss 0.27359946 - time (sec): 402.75 - samples/sec: 173.94 - lr: 0.000030
|
687 |
-
2023-07-26 17:18:39,474 epoch 23 - iter 240/243 - loss 0.27378796 - time (sec): 447.38 - samples/sec: 173.62 - lr: 0.000030
|
688 |
-
2023-07-26 17:18:44,594 ----------------------------------------------------------------------------------------------------
|
689 |
-
2023-07-26 17:18:44,594 EPOCH 23 done: loss 0.2739 - lr 0.000030
|
690 |
-
2023-07-26 17:18:46,344 Evaluating as a multi-label problem: False
|
691 |
-
2023-07-26 17:18:46,386 DEV : loss 0.21456189453601837 - f1-score (micro avg) 0.9796
|
692 |
-
2023-07-26 17:18:46,395 ----------------------------------------------------------------------------------------------------
|
693 |
-
2023-07-26 17:19:31,051 epoch 24 - iter 24/243 - loss 0.28123621 - time (sec): 44.66 - samples/sec: 168.56 - lr: 0.000030
|
694 |
-
2023-07-26 17:20:15,553 epoch 24 - iter 48/243 - loss 0.27128197 - time (sec): 89.16 - samples/sec: 168.93 - lr: 0.000030
|
695 |
-
2023-07-26 17:21:00,218 epoch 24 - iter 72/243 - loss 0.26742573 - time (sec): 133.82 - samples/sec: 169.68 - lr: 0.000030
|
696 |
-
2023-07-26 17:21:44,804 epoch 24 - iter 96/243 - loss 0.27426501 - time (sec): 178.41 - samples/sec: 170.21 - lr: 0.000030
|
697 |
-
2023-07-26 17:22:29,693 epoch 24 - iter 120/243 - loss 0.26958800 - time (sec): 223.30 - samples/sec: 171.86 - lr: 0.000030
|
698 |
-
2023-07-26 17:23:14,736 epoch 24 - iter 144/243 - loss 0.27011544 - time (sec): 268.34 - samples/sec: 174.09 - lr: 0.000029
|
699 |
-
2023-07-26 17:23:59,891 epoch 24 - iter 168/243 - loss 0.26573691 - time (sec): 313.50 - samples/sec: 173.54 - lr: 0.000029
|
700 |
-
2023-07-26 17:24:44,440 epoch 24 - iter 192/243 - loss 0.26424698 - time (sec): 358.04 - samples/sec: 173.71 - lr: 0.000029
|
701 |
-
2023-07-26 17:25:28,792 epoch 24 - iter 216/243 - loss 0.26555746 - time (sec): 402.40 - samples/sec: 173.43 - lr: 0.000029
|
702 |
-
2023-07-26 17:26:13,338 epoch 24 - iter 240/243 - loss 0.26918457 - time (sec): 446.94 - samples/sec: 173.77 - lr: 0.000029
|
703 |
-
2023-07-26 17:26:18,446 ----------------------------------------------------------------------------------------------------
|
704 |
-
2023-07-26 17:26:18,447 EPOCH 24 done: loss 0.2696 - lr 0.000029
|
705 |
-
2023-07-26 17:26:20,206 Evaluating as a multi-label problem: False
|
706 |
-
2023-07-26 17:26:20,252 DEV : loss 0.21408958733081818 - f1-score (micro avg) 0.9788
|
707 |
-
2023-07-26 17:26:20,263 ----------------------------------------------------------------------------------------------------
|
708 |
-
2023-07-26 17:27:04,792 epoch 25 - iter 24/243 - loss 0.26057600 - time (sec): 44.53 - samples/sec: 175.66 - lr: 0.000029
|
709 |
-
2023-07-26 17:27:49,230 epoch 25 - iter 48/243 - loss 0.25988897 - time (sec): 88.97 - samples/sec: 175.54 - lr: 0.000029
|
710 |
-
2023-07-26 17:28:34,576 epoch 25 - iter 72/243 - loss 0.26336622 - time (sec): 134.31 - samples/sec: 174.81 - lr: 0.000029
|
711 |
-
2023-07-26 17:29:19,911 epoch 25 - iter 96/243 - loss 0.26126366 - time (sec): 179.65 - samples/sec: 174.70 - lr: 0.000029
|
712 |
-
2023-07-26 17:30:05,863 epoch 25 - iter 120/243 - loss 0.26114761 - time (sec): 225.60 - samples/sec: 173.32 - lr: 0.000028
|
713 |
-
2023-07-26 17:30:54,836 epoch 25 - iter 144/243 - loss 0.26019042 - time (sec): 274.57 - samples/sec: 170.53 - lr: 0.000028
|
714 |
-
2023-07-26 17:31:44,973 epoch 25 - iter 168/243 - loss 0.26060643 - time (sec): 324.71 - samples/sec: 168.00 - lr: 0.000028
|
715 |
-
2023-07-26 17:32:34,267 epoch 25 - iter 192/243 - loss 0.26158525 - time (sec): 374.00 - samples/sec: 167.12 - lr: 0.000028
|
716 |
-
2023-07-26 17:33:23,148 epoch 25 - iter 216/243 - loss 0.25965178 - time (sec): 422.89 - samples/sec: 165.83 - lr: 0.000028
|
717 |
-
2023-07-26 17:34:11,558 epoch 25 - iter 240/243 - loss 0.25991617 - time (sec): 471.29 - samples/sec: 165.08 - lr: 0.000028
|
718 |
-
2023-07-26 17:34:17,049 ----------------------------------------------------------------------------------------------------
|
719 |
-
2023-07-26 17:34:17,049 EPOCH 25 done: loss 0.2605 - lr 0.000028
|
720 |
-
2023-07-26 17:34:18,858 Evaluating as a multi-label problem: False
|
721 |
-
2023-07-26 17:34:18,901 DEV : loss 0.20778048038482666 - f1-score (micro avg) 0.9801
|
722 |
-
2023-07-26 17:34:18,911 ----------------------------------------------------------------------------------------------------
|
723 |
-
2023-07-26 17:35:06,124 epoch 26 - iter 24/243 - loss 0.25028245 - time (sec): 47.21 - samples/sec: 162.56 - lr: 0.000028
|
724 |
-
2023-07-26 17:35:56,450 epoch 26 - iter 48/243 - loss 0.26759368 - time (sec): 97.54 - samples/sec: 159.39 - lr: 0.000028
|
725 |
-
2023-07-26 17:36:48,254 epoch 26 - iter 72/243 - loss 0.26240750 - time (sec): 149.34 - samples/sec: 155.33 - lr: 0.000028
|
726 |
-
2023-07-26 17:37:37,860 epoch 26 - iter 96/243 - loss 0.26499737 - time (sec): 198.95 - samples/sec: 155.95 - lr: 0.000027
|
727 |
-
2023-07-26 17:38:26,594 epoch 26 - iter 120/243 - loss 0.26765442 - time (sec): 247.68 - samples/sec: 155.61 - lr: 0.000027
|
728 |
-
2023-07-26 17:39:15,951 epoch 26 - iter 144/243 - loss 0.26496660 - time (sec): 297.04 - samples/sec: 155.98 - lr: 0.000027
|
729 |
-
2023-07-26 17:40:04,512 epoch 26 - iter 168/243 - loss 0.26407033 - time (sec): 345.60 - samples/sec: 157.09 - lr: 0.000027
|
730 |
-
2023-07-26 17:40:52,402 epoch 26 - iter 192/243 - loss 0.26463487 - time (sec): 393.49 - samples/sec: 157.76 - lr: 0.000027
|
731 |
-
2023-07-26 17:41:40,356 epoch 26 - iter 216/243 - loss 0.26192074 - time (sec): 441.45 - samples/sec: 158.66 - lr: 0.000027
|
732 |
-
2023-07-26 17:42:28,247 epoch 26 - iter 240/243 - loss 0.26299030 - time (sec): 489.34 - samples/sec: 158.86 - lr: 0.000027
|
733 |
-
2023-07-26 17:42:33,832 ----------------------------------------------------------------------------------------------------
|
734 |
-
2023-07-26 17:42:33,833 EPOCH 26 done: loss 0.2631 - lr 0.000027
|
735 |
-
2023-07-26 17:42:35,630 Evaluating as a multi-label problem: False
|
736 |
-
2023-07-26 17:42:35,672 DEV : loss 0.22401468455791473 - f1-score (micro avg) 0.9786
|
737 |
-
2023-07-26 17:42:35,682 ----------------------------------------------------------------------------------------------------
|
738 |
-
2023-07-26 17:43:20,454 epoch 27 - iter 24/243 - loss 0.26639657 - time (sec): 44.77 - samples/sec: 182.81 - lr: 0.000027
|
739 |
-
2023-07-26 17:44:04,934 epoch 27 - iter 48/243 - loss 0.27451501 - time (sec): 89.25 - samples/sec: 178.70 - lr: 0.000027
|
740 |
-
2023-07-26 17:44:49,425 epoch 27 - iter 72/243 - loss 0.27289399 - time (sec): 133.74 - samples/sec: 176.62 - lr: 0.000026
|
741 |
-
2023-07-26 17:45:33,681 epoch 27 - iter 96/243 - loss 0.27091536 - time (sec): 178.00 - samples/sec: 175.50 - lr: 0.000026
|
742 |
-
2023-07-26 17:46:18,171 epoch 27 - iter 120/243 - loss 0.27191898 - time (sec): 222.49 - samples/sec: 173.82 - lr: 0.000026
|
743 |
-
2023-07-26 17:47:02,640 epoch 27 - iter 144/243 - loss 0.27013358 - time (sec): 266.96 - samples/sec: 173.92 - lr: 0.000026
|
744 |
-
2023-07-26 17:47:47,032 epoch 27 - iter 168/243 - loss 0.26766038 - time (sec): 311.35 - samples/sec: 173.58 - lr: 0.000026
|
745 |
-
2023-07-26 17:48:33,707 epoch 27 - iter 192/243 - loss 0.26602770 - time (sec): 358.02 - samples/sec: 173.06 - lr: 0.000026
|
746 |
-
2023-07-26 17:49:21,690 epoch 27 - iter 216/243 - loss 0.26757355 - time (sec): 406.01 - samples/sec: 171.85 - lr: 0.000026
|
747 |
-
2023-07-26 17:50:09,653 epoch 27 - iter 240/243 - loss 0.26544815 - time (sec): 453.97 - samples/sec: 171.19 - lr: 0.000026
|
748 |
-
2023-07-26 17:50:15,122 ----------------------------------------------------------------------------------------------------
|
749 |
-
2023-07-26 17:50:15,123 EPOCH 27 done: loss 0.2656 - lr 0.000026
|
750 |
-
2023-07-26 17:50:17,372 Evaluating as a multi-label problem: False
|
751 |
-
2023-07-26 17:50:17,414 DEV : loss 0.2324327975511551 - f1-score (micro avg) 0.9771
|
752 |
-
2023-07-26 17:50:17,424 ----------------------------------------------------------------------------------------------------
|
753 |
-
2023-07-26 17:51:02,154 epoch 28 - iter 24/243 - loss 0.26044359 - time (sec): 44.73 - samples/sec: 177.24 - lr: 0.000026
|
754 |
-
2023-07-26 17:51:46,725 epoch 28 - iter 48/243 - loss 0.25192260 - time (sec): 89.30 - samples/sec: 175.55 - lr: 0.000025
|
755 |
-
2023-07-26 17:52:31,357 epoch 28 - iter 72/243 - loss 0.24867911 - time (sec): 133.93 - samples/sec: 175.88 - lr: 0.000025
|
756 |
-
2023-07-26 17:53:15,933 epoch 28 - iter 96/243 - loss 0.25204485 - time (sec): 178.51 - samples/sec: 175.73 - lr: 0.000025
|
757 |
-
2023-07-26 17:54:00,443 epoch 28 - iter 120/243 - loss 0.24981817 - time (sec): 223.02 - samples/sec: 174.90 - lr: 0.000025
|
758 |
-
2023-07-26 17:54:44,958 epoch 28 - iter 144/243 - loss 0.25157168 - time (sec): 267.53 - samples/sec: 174.46 - lr: 0.000025
|
759 |
-
2023-07-26 17:55:29,493 epoch 28 - iter 168/243 - loss 0.25440998 - time (sec): 312.07 - samples/sec: 174.04 - lr: 0.000025
|
760 |
-
2023-07-26 17:56:13,998 epoch 28 - iter 192/243 - loss 0.25791455 - time (sec): 356.57 - samples/sec: 174.06 - lr: 0.000025
|
761 |
-
2023-07-26 17:56:58,663 epoch 28 - iter 216/243 - loss 0.26113615 - time (sec): 401.24 - samples/sec: 173.82 - lr: 0.000025
|
762 |
-
2023-07-26 17:57:43,598 epoch 28 - iter 240/243 - loss 0.26254906 - time (sec): 446.17 - samples/sec: 174.40 - lr: 0.000025
|
763 |
-
2023-07-26 17:57:48,629 ----------------------------------------------------------------------------------------------------
|
764 |
-
2023-07-26 17:57:48,629 EPOCH 28 done: loss 0.2628 - lr 0.000025
|
765 |
-
2023-07-26 17:57:50,384 Evaluating as a multi-label problem: False
|
766 |
-
2023-07-26 17:57:50,427 DEV : loss 0.21640333533287048 - f1-score (micro avg) 0.9803
|
767 |
-
2023-07-26 17:57:50,437 ----------------------------------------------------------------------------------------------------
|
768 |
-
2023-07-26 17:58:34,969 epoch 29 - iter 24/243 - loss 0.24833162 - time (sec): 44.53 - samples/sec: 173.47 - lr: 0.000024
|
769 |
-
2023-07-26 17:59:19,469 epoch 29 - iter 48/243 - loss 0.25554505 - time (sec): 89.03 - samples/sec: 173.26 - lr: 0.000024
|
770 |
-
2023-07-26 18:00:04,033 epoch 29 - iter 72/243 - loss 0.26313723 - time (sec): 133.60 - samples/sec: 173.10 - lr: 0.000024
|
771 |
-
2023-07-26 18:00:48,651 epoch 29 - iter 96/243 - loss 0.26456129 - time (sec): 178.21 - samples/sec: 173.90 - lr: 0.000024
|
772 |
-
2023-07-26 18:01:33,121 epoch 29 - iter 120/243 - loss 0.26539430 - time (sec): 222.68 - samples/sec: 173.48 - lr: 0.000024
|
773 |
-
2023-07-26 18:02:17,661 epoch 29 - iter 144/243 - loss 0.26756174 - time (sec): 267.22 - samples/sec: 173.79 - lr: 0.000024
|
774 |
-
2023-07-26 18:03:02,505 epoch 29 - iter 168/243 - loss 0.26309703 - time (sec): 312.07 - samples/sec: 174.46 - lr: 0.000024
|
775 |
-
2023-07-26 18:03:46,972 epoch 29 - iter 192/243 - loss 0.26532971 - time (sec): 356.53 - samples/sec: 173.68 - lr: 0.000024
|
776 |
-
2023-07-26 18:04:31,621 epoch 29 - iter 216/243 - loss 0.26648227 - time (sec): 401.18 - samples/sec: 173.71 - lr: 0.000024
|
777 |
-
2023-07-26 18:05:16,534 epoch 29 - iter 240/243 - loss 0.26528743 - time (sec): 446.10 - samples/sec: 174.44 - lr: 0.000023
|
778 |
-
2023-07-26 18:05:21,587 ----------------------------------------------------------------------------------------------------
|
779 |
-
2023-07-26 18:05:21,587 EPOCH 29 done: loss 0.2655 - lr 0.000023
|
780 |
-
2023-07-26 18:05:23,600 Evaluating as a multi-label problem: False
|
781 |
-
2023-07-26 18:05:23,646 DEV : loss 0.24248327314853668 - f1-score (micro avg) 0.9796
|
782 |
-
2023-07-26 18:05:23,660 ----------------------------------------------------------------------------------------------------
|
783 |
-
2023-07-26 18:06:12,165 epoch 30 - iter 24/243 - loss 0.26154968 - time (sec): 48.51 - samples/sec: 161.26 - lr: 0.000023
|
784 |
-
2023-07-26 18:07:02,942 epoch 30 - iter 48/243 - loss 0.27126768 - time (sec): 99.28 - samples/sec: 157.70 - lr: 0.000023
|
785 |
-
2023-07-26 18:07:57,979 epoch 30 - iter 72/243 - loss 0.27468039 - time (sec): 154.32 - samples/sec: 150.27 - lr: 0.000023
|
786 |
-
2023-07-26 18:08:53,322 epoch 30 - iter 96/243 - loss 0.27662270 - time (sec): 209.66 - samples/sec: 147.69 - lr: 0.000023
|
787 |
-
2023-07-26 18:09:48,639 epoch 30 - iter 120/243 - loss 0.27403633 - time (sec): 264.98 - samples/sec: 145.88 - lr: 0.000023
|
788 |
-
2023-07-26 18:10:40,050 epoch 30 - iter 144/243 - loss 0.27461637 - time (sec): 316.39 - samples/sec: 146.57 - lr: 0.000023
|
789 |
-
2023-07-26 18:11:29,552 epoch 30 - iter 168/243 - loss 0.26994770 - time (sec): 365.89 - samples/sec: 148.67 - lr: 0.000023
|
790 |
-
2023-07-26 18:12:18,746 epoch 30 - iter 192/243 - loss 0.26952319 - time (sec): 415.09 - samples/sec: 150.29 - lr: 0.000023
|
791 |
-
2023-07-26 18:13:07,757 epoch 30 - iter 216/243 - loss 0.26556592 - time (sec): 464.10 - samples/sec: 151.23 - lr: 0.000022
|
792 |
-
2023-07-26 18:13:56,449 epoch 30 - iter 240/243 - loss 0.26521277 - time (sec): 512.79 - samples/sec: 151.74 - lr: 0.000022
|
793 |
-
2023-07-26 18:14:01,871 ----------------------------------------------------------------------------------------------------
|
794 |
-
2023-07-26 18:14:01,871 EPOCH 30 done: loss 0.2653 - lr 0.000022
|
795 |
-
2023-07-26 18:14:03,693 Evaluating as a multi-label problem: False
|
796 |
-
2023-07-26 18:14:03,735 DEV : loss 0.23393450677394867 - f1-score (micro avg) 0.9776
|
797 |
-
2023-07-26 18:14:03,746 ----------------------------------------------------------------------------------------------------
|
798 |
-
2023-07-26 18:14:48,764 epoch 31 - iter 24/243 - loss 0.24073944 - time (sec): 45.02 - samples/sec: 179.77 - lr: 0.000022
|
799 |
-
2023-07-26 18:15:33,209 epoch 31 - iter 48/243 - loss 0.24507990 - time (sec): 89.46 - samples/sec: 173.00 - lr: 0.000022
|
800 |
-
2023-07-26 18:16:17,809 epoch 31 - iter 72/243 - loss 0.25127541 - time (sec): 134.06 - samples/sec: 173.96 - lr: 0.000022
|
801 |
-
2023-07-26 18:17:02,650 epoch 31 - iter 96/243 - loss 0.25526836 - time (sec): 178.90 - samples/sec: 175.19 - lr: 0.000022
|
802 |
-
2023-07-26 18:17:47,365 epoch 31 - iter 120/243 - loss 0.25884615 - time (sec): 223.62 - samples/sec: 174.90 - lr: 0.000022
|
803 |
-
2023-07-26 18:18:32,093 epoch 31 - iter 144/243 - loss 0.26107421 - time (sec): 268.35 - samples/sec: 174.71 - lr: 0.000022
|
804 |
-
2023-07-26 18:19:16,568 epoch 31 - iter 168/243 - loss 0.25772191 - time (sec): 312.82 - samples/sec: 174.07 - lr: 0.000022
|
805 |
-
2023-07-26 18:20:01,232 epoch 31 - iter 192/243 - loss 0.25843953 - time (sec): 357.49 - samples/sec: 174.12 - lr: 0.000021
|
806 |
-
2023-07-26 18:20:46,098 epoch 31 - iter 216/243 - loss 0.25940033 - time (sec): 402.35 - samples/sec: 174.28 - lr: 0.000021
|
807 |
-
2023-07-26 18:21:30,680 epoch 31 - iter 240/243 - loss 0.25924131 - time (sec): 446.93 - samples/sec: 173.95 - lr: 0.000021
|
808 |
-
2023-07-26 18:21:35,753 ----------------------------------------------------------------------------------------------------
|
809 |
-
2023-07-26 18:21:35,753 EPOCH 31 done: loss 0.2594 - lr 0.000021
|
810 |
-
2023-07-26 18:21:37,502 Evaluating as a multi-label problem: False
|
811 |
-
2023-07-26 18:21:37,544 DEV : loss 0.22774212062358856 - f1-score (micro avg) 0.9788
|
812 |
-
2023-07-26 18:21:37,554 ----------------------------------------------------------------------------------------------------
|
813 |
-
2023-07-26 18:22:22,282 epoch 32 - iter 24/243 - loss 0.25476998 - time (sec): 44.73 - samples/sec: 179.17 - lr: 0.000021
|
814 |
-
2023-07-26 18:23:07,025 epoch 32 - iter 48/243 - loss 0.25629909 - time (sec): 89.47 - samples/sec: 178.31 - lr: 0.000021
|
815 |
-
2023-07-26 18:23:51,761 epoch 32 - iter 72/243 - loss 0.25739595 - time (sec): 134.21 - samples/sec: 177.13 - lr: 0.000021
|
816 |
-
2023-07-26 18:24:36,312 epoch 32 - iter 96/243 - loss 0.26207122 - time (sec): 178.76 - samples/sec: 175.24 - lr: 0.000021
|
817 |
-
2023-07-26 18:25:20,955 epoch 32 - iter 120/243 - loss 0.26238445 - time (sec): 223.40 - samples/sec: 175.45 - lr: 0.000021
|
818 |
-
2023-07-26 18:26:05,680 epoch 32 - iter 144/243 - loss 0.26421827 - time (sec): 268.13 - samples/sec: 174.45 - lr: 0.000021
|
819 |
-
2023-07-26 18:26:50,600 epoch 32 - iter 168/243 - loss 0.26554256 - time (sec): 313.05 - samples/sec: 175.05 - lr: 0.000020
|
820 |
-
2023-07-26 18:27:37,550 epoch 32 - iter 192/243 - loss 0.26682748 - time (sec): 360.00 - samples/sec: 173.67 - lr: 0.000020
|
821 |
-
2023-07-26 18:28:26,938 epoch 32 - iter 216/243 - loss 0.26495455 - time (sec): 409.38 - samples/sec: 172.06 - lr: 0.000020
|
822 |
-
2023-07-26 18:29:15,763 epoch 32 - iter 240/243 - loss 0.26526827 - time (sec): 458.21 - samples/sec: 169.70 - lr: 0.000020
|
823 |
-
2023-07-26 18:29:21,316 ----------------------------------------------------------------------------------------------------
|
824 |
-
2023-07-26 18:29:21,316 EPOCH 32 done: loss 0.2646 - lr 0.000020
|
825 |
-
2023-07-26 18:29:23,143 Evaluating as a multi-label problem: False
|
826 |
-
2023-07-26 18:29:23,187 DEV : loss 0.22920973598957062 - f1-score (micro avg) 0.9793
|
827 |
-
2023-07-26 18:29:23,197 ----------------------------------------------------------------------------------------------------
|
828 |
-
2023-07-26 18:30:10,600 epoch 33 - iter 24/243 - loss 0.26866868 - time (sec): 47.40 - samples/sec: 165.35 - lr: 0.000020
|
829 |
-
2023-07-26 18:30:58,341 epoch 33 - iter 48/243 - loss 0.25914800 - time (sec): 95.14 - samples/sec: 161.12 - lr: 0.000020
|
830 |
-
2023-07-26 18:31:46,238 epoch 33 - iter 72/243 - loss 0.25631313 - time (sec): 143.04 - samples/sec: 161.25 - lr: 0.000020
|
831 |
-
2023-07-26 18:32:38,739 epoch 33 - iter 96/243 - loss 0.25455371 - time (sec): 195.54 - samples/sec: 158.90 - lr: 0.000020
|
832 |
-
2023-07-26 18:33:26,705 epoch 33 - iter 120/243 - loss 0.25585405 - time (sec): 243.51 - samples/sec: 159.48 - lr: 0.000020
|
833 |
-
2023-07-26 18:34:14,895 epoch 33 - iter 144/243 - loss 0.25945055 - time (sec): 291.70 - samples/sec: 159.74 - lr: 0.000019
|
834 |
-
2023-07-26 18:35:02,659 epoch 33 - iter 168/243 - loss 0.25932428 - time (sec): 339.46 - samples/sec: 159.76 - lr: 0.000019
|
835 |
-
2023-07-26 18:35:50,532 epoch 33 - iter 192/243 - loss 0.25851724 - time (sec): 387.33 - samples/sec: 160.31 - lr: 0.000019
|
836 |
-
2023-07-26 18:36:38,327 epoch 33 - iter 216/243 - loss 0.25678080 - time (sec): 435.13 - samples/sec: 160.50 - lr: 0.000019
|
837 |
-
2023-07-26 18:37:26,262 epoch 33 - iter 240/243 - loss 0.25562158 - time (sec): 483.06 - samples/sec: 160.86 - lr: 0.000019
|
838 |
-
2023-07-26 18:37:31,732 ----------------------------------------------------------------------------------------------------
|
839 |
-
2023-07-26 18:37:31,732 EPOCH 33 done: loss 0.2552 - lr 0.000019
|
840 |
-
2023-07-26 18:37:33,524 Evaluating as a multi-label problem: False
|
841 |
-
2023-07-26 18:37:33,566 DEV : loss 0.23627179861068726 - f1-score (micro avg) 0.9791
|
842 |
-
2023-07-26 18:37:33,576 ----------------------------------------------------------------------------------------------------
|
843 |
-
2023-07-26 18:38:18,104 epoch 34 - iter 24/243 - loss 0.27182899 - time (sec): 44.53 - samples/sec: 177.66 - lr: 0.000019
|
844 |
-
2023-07-26 18:39:02,789 epoch 34 - iter 48/243 - loss 0.27027922 - time (sec): 89.21 - samples/sec: 177.42 - lr: 0.000019
|
845 |
-
2023-07-26 18:39:47,401 epoch 34 - iter 72/243 - loss 0.26451951 - time (sec): 133.83 - samples/sec: 176.71 - lr: 0.000019
|
846 |
-
2023-07-26 18:40:31,661 epoch 34 - iter 96/243 - loss 0.26736759 - time (sec): 178.09 - samples/sec: 174.31 - lr: 0.000019
|
847 |
-
2023-07-26 18:41:16,196 epoch 34 - iter 120/243 - loss 0.26439071 - time (sec): 222.62 - samples/sec: 174.62 - lr: 0.000018
|
848 |
-
2023-07-26 18:42:00,770 epoch 34 - iter 144/243 - loss 0.26033732 - time (sec): 267.19 - samples/sec: 174.48 - lr: 0.000018
|
849 |
-
2023-07-26 18:42:45,441 epoch 34 - iter 168/243 - loss 0.25756053 - time (sec): 311.87 - samples/sec: 174.19 - lr: 0.000018
|
850 |
-
2023-07-26 18:43:30,194 epoch 34 - iter 192/243 - loss 0.26053780 - time (sec): 356.62 - samples/sec: 174.51 - lr: 0.000018
|
851 |
-
2023-07-26 18:44:14,725 epoch 34 - iter 216/243 - loss 0.26079037 - time (sec): 401.15 - samples/sec: 174.64 - lr: 0.000018
|
852 |
-
2023-07-26 18:44:59,292 epoch 34 - iter 240/243 - loss 0.25971768 - time (sec): 445.72 - samples/sec: 174.39 - lr: 0.000018
|
853 |
-
2023-07-26 18:45:04,380 ----------------------------------------------------------------------------------------------------
|
854 |
-
2023-07-26 18:45:04,380 EPOCH 34 done: loss 0.2595 - lr 0.000018
|
855 |
-
2023-07-26 18:45:06,131 Evaluating as a multi-label problem: False
|
856 |
-
2023-07-26 18:45:06,173 DEV : loss 0.23955273628234863 - f1-score (micro avg) 0.9796
|
857 |
-
2023-07-26 18:45:06,183 ----------------------------------------------------------------------------------------------------
|
858 |
-
2023-07-26 18:45:50,882 epoch 35 - iter 24/243 - loss 0.26701266 - time (sec): 44.70 - samples/sec: 178.66 - lr: 0.000018
|
859 |
-
2023-07-26 18:46:35,519 epoch 35 - iter 48/243 - loss 0.25211759 - time (sec): 89.34 - samples/sec: 175.67 - lr: 0.000018
|
860 |
-
2023-07-26 18:47:20,251 epoch 35 - iter 72/243 - loss 0.25876122 - time (sec): 134.07 - samples/sec: 175.92 - lr: 0.000018
|
861 |
-
2023-07-26 18:48:04,922 epoch 35 - iter 96/243 - loss 0.25751966 - time (sec): 178.74 - samples/sec: 175.77 - lr: 0.000017
|
862 |
-
2023-07-26 18:48:49,416 epoch 35 - iter 120/243 - loss 0.25782676 - time (sec): 223.23 - samples/sec: 174.59 - lr: 0.000017
|
863 |
-
2023-07-26 18:49:34,049 epoch 35 - iter 144/243 - loss 0.26020302 - time (sec): 267.87 - samples/sec: 174.72 - lr: 0.000017
|
864 |
-
2023-07-26 18:50:18,677 epoch 35 - iter 168/243 - loss 0.26431905 - time (sec): 312.49 - samples/sec: 175.29 - lr: 0.000017
|
865 |
-
2023-07-26 18:51:03,300 epoch 35 - iter 192/243 - loss 0.26060801 - time (sec): 357.12 - samples/sec: 175.10 - lr: 0.000017
|
866 |
-
2023-07-26 18:51:47,857 epoch 35 - iter 216/243 - loss 0.26100924 - time (sec): 401.67 - samples/sec: 174.60 - lr: 0.000017
|
867 |
-
2023-07-26 18:52:32,385 epoch 35 - iter 240/243 - loss 0.26071736 - time (sec): 446.20 - samples/sec: 174.23 - lr: 0.000017
|
868 |
-
2023-07-26 18:52:37,453 ----------------------------------------------------------------------------------------------------
|
869 |
-
2023-07-26 18:52:37,454 EPOCH 35 done: loss 0.2611 - lr 0.000017
|
870 |
-
2023-07-26 18:52:39,658 Evaluating as a multi-label problem: False
|
871 |
-
2023-07-26 18:52:39,699 DEV : loss 0.24450713396072388 - f1-score (micro avg) 0.9791
|
872 |
-
2023-07-26 18:52:39,709 ----------------------------------------------------------------------------------------------------
|
873 |
-
2023-07-26 18:53:24,264 epoch 36 - iter 24/243 - loss 0.27084705 - time (sec): 44.55 - samples/sec: 175.18 - lr: 0.000017
|
874 |
-
2023-07-26 18:54:08,663 epoch 36 - iter 48/243 - loss 0.25947400 - time (sec): 88.95 - samples/sec: 173.11 - lr: 0.000017
|
875 |
-
2023-07-26 18:54:53,501 epoch 36 - iter 72/243 - loss 0.25687195 - time (sec): 133.79 - samples/sec: 175.35 - lr: 0.000016
|
876 |
-
2023-07-26 18:55:37,893 epoch 36 - iter 96/243 - loss 0.25424198 - time (sec): 178.18 - samples/sec: 173.93 - lr: 0.000016
|
877 |
-
2023-07-26 18:56:22,286 epoch 36 - iter 120/243 - loss 0.25557169 - time (sec): 222.58 - samples/sec: 173.34 - lr: 0.000016
|
878 |
-
2023-07-26 18:57:15,000 epoch 36 - iter 144/243 - loss 0.25787383 - time (sec): 275.29 - samples/sec: 168.90 - lr: 0.000016
|
879 |
-
2023-07-26 18:58:08,183 epoch 36 - iter 168/243 - loss 0.25642415 - time (sec): 328.47 - samples/sec: 165.37 - lr: 0.000016
|
880 |
-
2023-07-26 18:59:02,250 epoch 36 - iter 192/243 - loss 0.25543523 - time (sec): 382.54 - samples/sec: 162.77 - lr: 0.000016
|
881 |
-
2023-07-26 18:59:53,084 epoch 36 - iter 216/243 - loss 0.25443060 - time (sec): 433.38 - samples/sec: 161.58 - lr: 0.000016
|
882 |
-
2023-07-26 19:00:41,508 epoch 36 - iter 240/243 - loss 0.25344304 - time (sec): 481.80 - samples/sec: 161.25 - lr: 0.000016
|
883 |
-
2023-07-26 19:00:47,029 ----------------------------------------------------------------------------------------------------
|
884 |
-
2023-07-26 19:00:47,029 EPOCH 36 done: loss 0.2536 - lr 0.000016
|
885 |
-
2023-07-26 19:00:48,817 Evaluating as a multi-label problem: False
|
886 |
-
2023-07-26 19:00:48,859 DEV : loss 0.2530966103076935 - f1-score (micro avg) 0.9788
|
887 |
-
2023-07-26 19:00:48,869 ----------------------------------------------------------------------------------------------------
|
888 |
-
2023-07-26 19:01:33,809 epoch 37 - iter 24/243 - loss 0.27190881 - time (sec): 44.94 - samples/sec: 183.74 - lr: 0.000016
|
889 |
-
2023-07-26 19:02:18,402 epoch 37 - iter 48/243 - loss 0.26681536 - time (sec): 89.53 - samples/sec: 178.86 - lr: 0.000015
|
890 |
-
2023-07-26 19:03:03,153 epoch 37 - iter 72/243 - loss 0.26204165 - time (sec): 134.28 - samples/sec: 177.43 - lr: 0.000015
|
891 |
-
2023-07-26 19:03:47,816 epoch 37 - iter 96/243 - loss 0.25844813 - time (sec): 178.95 - samples/sec: 176.20 - lr: 0.000015
|
892 |
-
2023-07-26 19:04:32,391 epoch 37 - iter 120/243 - loss 0.25889938 - time (sec): 223.52 - samples/sec: 174.82 - lr: 0.000015
|
893 |
-
2023-07-26 19:05:17,029 epoch 37 - iter 144/243 - loss 0.26222809 - time (sec): 268.16 - samples/sec: 175.18 - lr: 0.000015
|
894 |
-
2023-07-26 19:06:01,650 epoch 37 - iter 168/243 - loss 0.26407155 - time (sec): 312.78 - samples/sec: 174.91 - lr: 0.000015
|
895 |
-
2023-07-26 19:06:46,300 epoch 37 - iter 192/243 - loss 0.26361155 - time (sec): 357.43 - samples/sec: 174.83 - lr: 0.000015
|
896 |
-
2023-07-26 19:07:31,061 epoch 37 - iter 216/243 - loss 0.26668156 - time (sec): 402.19 - samples/sec: 174.78 - lr: 0.000015
|
897 |
-
2023-07-26 19:08:15,436 epoch 37 - iter 240/243 - loss 0.26504239 - time (sec): 446.57 - samples/sec: 174.15 - lr: 0.000015
|
898 |
-
2023-07-26 19:08:20,495 ----------------------------------------------------------------------------------------------------
|
899 |
-
2023-07-26 19:08:20,495 EPOCH 37 done: loss 0.2650 - lr 0.000015
|
900 |
-
2023-07-26 19:08:22,330 Evaluating as a multi-label problem: False
|
901 |
-
2023-07-26 19:08:22,374 DEV : loss 0.2624962031841278 - f1-score (micro avg) 0.9781
|
902 |
-
2023-07-26 19:08:22,384 ----------------------------------------------------------------------------------------------------
|
903 |
-
2023-07-26 19:09:06,629 epoch 38 - iter 24/243 - loss 0.26162759 - time (sec): 44.24 - samples/sec: 174.37 - lr: 0.000014
|
904 |
-
2023-07-26 19:09:51,176 epoch 38 - iter 48/243 - loss 0.26085357 - time (sec): 88.79 - samples/sec: 175.87 - lr: 0.000014
|
905 |
-
2023-07-26 19:10:35,702 epoch 38 - iter 72/243 - loss 0.25308808 - time (sec): 133.32 - samples/sec: 176.61 - lr: 0.000014
|
906 |
-
2023-07-26 19:11:19,948 epoch 38 - iter 96/243 - loss 0.25632516 - time (sec): 177.56 - samples/sec: 175.93 - lr: 0.000014
|
907 |
-
2023-07-26 19:12:04,580 epoch 38 - iter 120/243 - loss 0.25358337 - time (sec): 222.20 - samples/sec: 176.70 - lr: 0.000014
|
908 |
-
2023-07-26 19:12:48,992 epoch 38 - iter 144/243 - loss 0.25557088 - time (sec): 266.61 - samples/sec: 176.51 - lr: 0.000014
|
909 |
-
2023-07-26 19:13:33,435 epoch 38 - iter 168/243 - loss 0.25407854 - time (sec): 311.05 - samples/sec: 176.83 - lr: 0.000014
|
910 |
-
2023-07-26 19:14:17,541 epoch 38 - iter 192/243 - loss 0.25597339 - time (sec): 355.16 - samples/sec: 176.02 - lr: 0.000014
|
911 |
-
2023-07-26 19:15:01,826 epoch 38 - iter 216/243 - loss 0.25532730 - time (sec): 399.44 - samples/sec: 175.68 - lr: 0.000014
|
912 |
-
2023-07-26 19:15:45,905 epoch 38 - iter 240/243 - loss 0.25415245 - time (sec): 443.52 - samples/sec: 175.02 - lr: 0.000013
|
913 |
-
2023-07-26 19:15:51,052 ----------------------------------------------------------------------------------------------------
|
914 |
-
2023-07-26 19:15:51,053 EPOCH 38 done: loss 0.2542 - lr 0.000013
|
915 |
-
2023-07-26 19:15:52,801 Evaluating as a multi-label problem: False
|
916 |
-
2023-07-26 19:15:52,845 DEV : loss 0.24244999885559082 - f1-score (micro avg) 0.9788
|
917 |
-
2023-07-26 19:15:52,855 ----------------------------------------------------------------------------------------------------
|
918 |
-
2023-07-26 19:16:37,293 epoch 39 - iter 24/243 - loss 0.25336484 - time (sec): 44.44 - samples/sec: 176.50 - lr: 0.000013
|
919 |
-
2023-07-26 19:17:21,644 epoch 39 - iter 48/243 - loss 0.25897743 - time (sec): 88.79 - samples/sec: 177.38 - lr: 0.000013
|
920 |
-
2023-07-26 19:18:05,772 epoch 39 - iter 72/243 - loss 0.25769549 - time (sec): 132.92 - samples/sec: 175.31 - lr: 0.000013
|
921 |
-
2023-07-26 19:18:50,169 epoch 39 - iter 96/243 - loss 0.25751150 - time (sec): 177.31 - samples/sec: 175.93 - lr: 0.000013
|
922 |
-
2023-07-26 19:19:34,381 epoch 39 - iter 120/243 - loss 0.25315782 - time (sec): 221.53 - samples/sec: 175.24 - lr: 0.000013
|
923 |
-
2023-07-26 19:20:18,559 epoch 39 - iter 144/243 - loss 0.25233489 - time (sec): 265.70 - samples/sec: 174.74 - lr: 0.000013
|
924 |
-
2023-07-26 19:21:03,145 epoch 39 - iter 168/243 - loss 0.25114668 - time (sec): 310.29 - samples/sec: 174.33 - lr: 0.000013
|
925 |
-
2023-07-26 19:21:47,854 epoch 39 - iter 192/243 - loss 0.25185953 - time (sec): 355.00 - samples/sec: 174.12 - lr: 0.000013
|
926 |
-
2023-07-26 19:22:32,507 epoch 39 - iter 216/243 - loss 0.25746349 - time (sec): 399.65 - samples/sec: 174.90 - lr: 0.000012
|
927 |
-
2023-07-26 19:23:16,796 epoch 39 - iter 240/243 - loss 0.25680252 - time (sec): 443.94 - samples/sec: 174.98 - lr: 0.000012
|
928 |
-
2023-07-26 19:23:21,907 ----------------------------------------------------------------------------------------------------
|
929 |
-
2023-07-26 19:23:21,908 EPOCH 39 done: loss 0.2579 - lr 0.000012
|
930 |
-
2023-07-26 19:23:23,678 Evaluating as a multi-label problem: False
|
931 |
-
2023-07-26 19:23:23,719 DEV : loss 0.24615894258022308 - f1-score (micro avg) 0.9798
|
932 |
-
2023-07-26 19:23:23,729 ----------------------------------------------------------------------------------------------------
|
933 |
-
2023-07-26 19:24:08,073 epoch 40 - iter 24/243 - loss 0.24837758 - time (sec): 44.34 - samples/sec: 175.24 - lr: 0.000012
|
934 |
-
2023-07-26 19:24:52,448 epoch 40 - iter 48/243 - loss 0.24725040 - time (sec): 88.72 - samples/sec: 176.39 - lr: 0.000012
|
935 |
-
2023-07-26 19:25:37,011 epoch 40 - iter 72/243 - loss 0.25023824 - time (sec): 133.28 - samples/sec: 176.92 - lr: 0.000012
|
936 |
-
2023-07-26 19:26:21,296 epoch 40 - iter 96/243 - loss 0.24239002 - time (sec): 177.57 - samples/sec: 176.32 - lr: 0.000012
|
937 |
-
2023-07-26 19:27:05,481 epoch 40 - iter 120/243 - loss 0.24524267 - time (sec): 221.75 - samples/sec: 175.34 - lr: 0.000012
|
938 |
-
2023-07-26 19:27:49,791 epoch 40 - iter 144/243 - loss 0.24784591 - time (sec): 266.06 - samples/sec: 175.50 - lr: 0.000012
|
939 |
-
2023-07-26 19:28:34,155 epoch 40 - iter 168/243 - loss 0.24872740 - time (sec): 310.43 - samples/sec: 174.67 - lr: 0.000012
|
940 |
-
2023-07-26 19:29:18,697 epoch 40 - iter 192/243 - loss 0.25012412 - time (sec): 354.97 - samples/sec: 174.67 - lr: 0.000011
|
941 |
-
2023-07-26 19:30:03,191 epoch 40 - iter 216/243 - loss 0.25345259 - time (sec): 399.46 - samples/sec: 174.99 - lr: 0.000011
|
942 |
-
2023-07-26 19:30:47,560 epoch 40 - iter 240/243 - loss 0.25383699 - time (sec): 443.83 - samples/sec: 174.98 - lr: 0.000011
|
943 |
-
2023-07-26 19:30:52,654 ----------------------------------------------------------------------------------------------------
|
944 |
-
2023-07-26 19:30:52,655 EPOCH 40 done: loss 0.2540 - lr 0.000011
|
945 |
-
2023-07-26 19:30:54,396 Evaluating as a multi-label problem: False
|
946 |
-
2023-07-26 19:30:54,438 DEV : loss 0.2575598359107971 - f1-score (micro avg) 0.9791
|
947 |
-
2023-07-26 19:30:54,447 ----------------------------------------------------------------------------------------------------
|
948 |
-
2023-07-26 19:31:38,768 epoch 41 - iter 24/243 - loss 0.24306327 - time (sec): 44.32 - samples/sec: 175.47 - lr: 0.000011
|
949 |
-
2023-07-26 19:32:23,092 epoch 41 - iter 48/243 - loss 0.24156726 - time (sec): 88.64 - samples/sec: 175.50 - lr: 0.000011
|
950 |
-
2023-07-26 19:33:07,508 epoch 41 - iter 72/243 - loss 0.24869032 - time (sec): 133.06 - samples/sec: 177.14 - lr: 0.000011
|
951 |
-
2023-07-26 19:33:51,664 epoch 41 - iter 96/243 - loss 0.25072177 - time (sec): 177.22 - samples/sec: 175.38 - lr: 0.000011
|
952 |
-
2023-07-26 19:34:35,771 epoch 41 - iter 120/243 - loss 0.25396376 - time (sec): 221.32 - samples/sec: 174.35 - lr: 0.000011
|
953 |
-
2023-07-26 19:35:20,071 epoch 41 - iter 144/243 - loss 0.25095812 - time (sec): 265.62 - samples/sec: 174.83 - lr: 0.000011
|
954 |
-
2023-07-26 19:36:04,548 epoch 41 - iter 168/243 - loss 0.24810464 - time (sec): 310.10 - samples/sec: 175.56 - lr: 0.000010
|
955 |
-
2023-07-26 19:36:48,812 epoch 41 - iter 192/243 - loss 0.24879453 - time (sec): 354.36 - samples/sec: 175.37 - lr: 0.000010
|
956 |
-
2023-07-26 19:37:33,241 epoch 41 - iter 216/243 - loss 0.25177431 - time (sec): 398.79 - samples/sec: 175.79 - lr: 0.000010
|
957 |
-
2023-07-26 19:38:17,405 epoch 41 - iter 240/243 - loss 0.25152758 - time (sec): 442.96 - samples/sec: 175.50 - lr: 0.000010
|
958 |
-
2023-07-26 19:38:22,468 ----------------------------------------------------------------------------------------------------
|
959 |
-
2023-07-26 19:38:22,469 EPOCH 41 done: loss 0.2509 - lr 0.000010
|
960 |
-
2023-07-26 19:38:24,215 Evaluating as a multi-label problem: False
|
961 |
-
2023-07-26 19:38:24,257 DEV : loss 0.25127604603767395 - f1-score (micro avg) 0.9786
|
962 |
-
2023-07-26 19:38:24,267 ----------------------------------------------------------------------------------------------------
|
963 |
-
2023-07-26 19:39:08,271 epoch 42 - iter 24/243 - loss 0.25413425 - time (sec): 44.00 - samples/sec: 167.14 - lr: 0.000010
|
964 |
-
2023-07-26 19:39:52,711 epoch 42 - iter 48/243 - loss 0.25771203 - time (sec): 88.44 - samples/sec: 173.59 - lr: 0.000010
|
965 |
-
2023-07-26 19:40:37,013 epoch 42 - iter 72/243 - loss 0.25402986 - time (sec): 132.75 - samples/sec: 174.07 - lr: 0.000010
|
966 |
-
2023-07-26 19:41:21,464 epoch 42 - iter 96/243 - loss 0.25689370 - time (sec): 177.20 - samples/sec: 175.64 - lr: 0.000010
|
967 |
-
2023-07-26 19:42:05,507 epoch 42 - iter 120/243 - loss 0.25635789 - time (sec): 221.24 - samples/sec: 174.08 - lr: 0.000010
|
968 |
-
2023-07-26 19:42:49,881 epoch 42 - iter 144/243 - loss 0.25641142 - time (sec): 265.61 - samples/sec: 174.68 - lr: 0.000009
|
969 |
-
2023-07-26 19:43:34,200 epoch 42 - iter 168/243 - loss 0.25676110 - time (sec): 309.93 - samples/sec: 175.15 - lr: 0.000009
|
970 |
-
2023-07-26 19:44:18,472 epoch 42 - iter 192/243 - loss 0.25789268 - time (sec): 354.20 - samples/sec: 175.15 - lr: 0.000009
|
971 |
-
2023-07-26 19:45:02,833 epoch 42 - iter 216/243 - loss 0.25889165 - time (sec): 398.57 - samples/sec: 175.63 - lr: 0.000009
|
972 |
-
2023-07-26 19:45:47,116 epoch 42 - iter 240/243 - loss 0.25885055 - time (sec): 442.85 - samples/sec: 175.64 - lr: 0.000009
|
973 |
-
2023-07-26 19:45:52,133 ----------------------------------------------------------------------------------------------------
|
974 |
-
2023-07-26 19:45:52,133 EPOCH 42 done: loss 0.2584 - lr 0.000009
|
975 |
-
2023-07-26 19:45:54,001 Evaluating as a multi-label problem: False
|
976 |
-
2023-07-26 19:45:54,045 DEV : loss 0.2509002983570099 - f1-score (micro avg) 0.9776
|
977 |
-
2023-07-26 19:45:54,056 ----------------------------------------------------------------------------------------------------
|
978 |
-
2023-07-26 19:46:38,776 epoch 43 - iter 24/243 - loss 0.25656669 - time (sec): 44.72 - samples/sec: 175.88 - lr: 0.000009
|
979 |
-
2023-07-26 19:47:23,646 epoch 43 - iter 48/243 - loss 0.25713909 - time (sec): 89.59 - samples/sec: 179.17 - lr: 0.000009
|
980 |
-
2023-07-26 19:48:08,109 epoch 43 - iter 72/243 - loss 0.25209780 - time (sec): 134.05 - samples/sec: 176.45 - lr: 0.000009
|
981 |
-
2023-07-26 19:48:52,698 epoch 43 - iter 96/243 - loss 0.24509857 - time (sec): 178.64 - samples/sec: 175.75 - lr: 0.000009
|
982 |
-
2023-07-26 19:49:37,182 epoch 43 - iter 120/243 - loss 0.25000579 - time (sec): 223.13 - samples/sec: 174.94 - lr: 0.000008
|
983 |
-
2023-07-26 19:50:21,736 epoch 43 - iter 144/243 - loss 0.25295949 - time (sec): 267.68 - samples/sec: 175.08 - lr: 0.000008
|
984 |
-
2023-07-26 19:51:06,420 epoch 43 - iter 168/243 - loss 0.25493036 - time (sec): 312.36 - samples/sec: 175.74 - lr: 0.000008
|
985 |
-
2023-07-26 19:51:50,778 epoch 43 - iter 192/243 - loss 0.25313033 - time (sec): 356.72 - samples/sec: 174.71 - lr: 0.000008
|
986 |
-
2023-07-26 19:52:35,121 epoch 43 - iter 216/243 - loss 0.25255837 - time (sec): 401.06 - samples/sec: 174.20 - lr: 0.000008
|
987 |
-
2023-07-26 19:53:19,699 epoch 43 - iter 240/243 - loss 0.25326105 - time (sec): 445.64 - samples/sec: 174.52 - lr: 0.000008
|
988 |
-
2023-07-26 19:53:24,805 ----------------------------------------------------------------------------------------------------
|
989 |
-
2023-07-26 19:53:24,805 EPOCH 43 done: loss 0.2536 - lr 0.000008
|
990 |
-
2023-07-26 19:53:27,059 Evaluating as a multi-label problem: False
|
991 |
-
2023-07-26 19:53:27,103 DEV : loss 0.25337928533554077 - f1-score (micro avg) 0.9784
|
992 |
-
2023-07-26 19:53:27,114 ----------------------------------------------------------------------------------------------------
|
993 |
-
2023-07-26 19:54:11,472 epoch 44 - iter 24/243 - loss 0.22752064 - time (sec): 44.36 - samples/sec: 169.73 - lr: 0.000008
|
994 |
-
2023-07-26 19:54:55,919 epoch 44 - iter 48/243 - loss 0.23951614 - time (sec): 88.80 - samples/sec: 171.20 - lr: 0.000008
|
995 |
-
2023-07-26 19:55:40,452 epoch 44 - iter 72/243 - loss 0.23986022 - time (sec): 133.34 - samples/sec: 171.97 - lr: 0.000008
|
996 |
-
2023-07-26 19:56:25,023 epoch 44 - iter 96/243 - loss 0.24528781 - time (sec): 177.91 - samples/sec: 173.40 - lr: 0.000007
|
997 |
-
2023-07-26 19:57:09,511 epoch 44 - iter 120/243 - loss 0.24572088 - time (sec): 222.40 - samples/sec: 173.40 - lr: 0.000007
|
998 |
-
2023-07-26 19:57:54,163 epoch 44 - iter 144/243 - loss 0.24464183 - time (sec): 267.05 - samples/sec: 173.04 - lr: 0.000007
|
999 |
-
2023-07-26 19:58:39,149 epoch 44 - iter 168/243 - loss 0.24523592 - time (sec): 312.04 - samples/sec: 173.72 - lr: 0.000007
|
1000 |
-
2023-07-26 19:59:23,881 epoch 44 - iter 192/243 - loss 0.24519757 - time (sec): 356.77 - samples/sec: 173.61 - lr: 0.000007
|
1001 |
-
2023-07-26 20:00:08,665 epoch 44 - iter 216/243 - loss 0.24456227 - time (sec): 401.55 - samples/sec: 173.99 - lr: 0.000007
|
1002 |
-
2023-07-26 20:00:53,278 epoch 44 - iter 240/243 - loss 0.24582873 - time (sec): 446.16 - samples/sec: 174.08 - lr: 0.000007
|
1003 |
-
2023-07-26 20:00:58,393 ----------------------------------------------------------------------------------------------------
|
1004 |
-
2023-07-26 20:00:58,393 EPOCH 44 done: loss 0.2462 - lr 0.000007
|
1005 |
-
2023-07-26 20:01:00,158 Evaluating as a multi-label problem: False
|
1006 |
-
2023-07-26 20:01:00,200 DEV : loss 0.25915977358818054 - f1-score (micro avg) 0.9784
|
1007 |
-
2023-07-26 20:01:00,210 ----------------------------------------------------------------------------------------------------
|
1008 |
-
2023-07-26 20:01:44,820 epoch 45 - iter 24/243 - loss 0.26201019 - time (sec): 44.61 - samples/sec: 176.98 - lr: 0.000007
|
1009 |
-
2023-07-26 20:02:29,464 epoch 45 - iter 48/243 - loss 0.24779270 - time (sec): 89.25 - samples/sec: 174.35 - lr: 0.000007
|
1010 |
-
2023-07-26 20:03:13,973 epoch 45 - iter 72/243 - loss 0.25012887 - time (sec): 133.76 - samples/sec: 174.72 - lr: 0.000006
|
1011 |
-
2023-07-26 20:03:58,625 epoch 45 - iter 96/243 - loss 0.25289868 - time (sec): 178.41 - samples/sec: 174.60 - lr: 0.000006
|
1012 |
-
2023-07-26 20:04:43,139 epoch 45 - iter 120/243 - loss 0.25326284 - time (sec): 222.93 - samples/sec: 174.12 - lr: 0.000006
|
1013 |
-
2023-07-26 20:05:27,809 epoch 45 - iter 144/243 - loss 0.25373868 - time (sec): 267.60 - samples/sec: 174.76 - lr: 0.000006
|
1014 |
-
2023-07-26 20:06:12,288 epoch 45 - iter 168/243 - loss 0.25215421 - time (sec): 312.08 - samples/sec: 174.53 - lr: 0.000006
|
1015 |
-
2023-07-26 20:06:56,723 epoch 45 - iter 192/243 - loss 0.25175489 - time (sec): 356.51 - samples/sec: 174.02 - lr: 0.000006
|
1016 |
-
2023-07-26 20:07:41,287 epoch 45 - iter 216/243 - loss 0.24952171 - time (sec): 401.08 - samples/sec: 174.05 - lr: 0.000006
|
1017 |
-
2023-07-26 20:08:25,996 epoch 45 - iter 240/243 - loss 0.25004168 - time (sec): 445.79 - samples/sec: 174.41 - lr: 0.000006
|
1018 |
-
2023-07-26 20:08:31,078 ----------------------------------------------------------------------------------------------------
|
1019 |
-
2023-07-26 20:08:31,079 EPOCH 45 done: loss 0.2503 - lr 0.000006
|
1020 |
-
2023-07-26 20:08:32,834 Evaluating as a multi-label problem: False
|
1021 |
-
2023-07-26 20:08:32,877 DEV : loss 0.2550533413887024 - f1-score (micro avg) 0.9788
|
1022 |
-
2023-07-26 20:08:32,887 ----------------------------------------------------------------------------------------------------
|
1023 |
-
2023-07-26 20:09:17,479 epoch 46 - iter 24/243 - loss 0.24479678 - time (sec): 44.59 - samples/sec: 177.79 - lr: 0.000006
|
1024 |
-
2023-07-26 20:10:02,067 epoch 46 - iter 48/243 - loss 0.24138586 - time (sec): 89.18 - samples/sec: 175.65 - lr: 0.000005
|
1025 |
-
2023-07-26 20:10:46,638 epoch 46 - iter 72/243 - loss 0.24404064 - time (sec): 133.75 - samples/sec: 175.18 - lr: 0.000005
|
1026 |
-
2023-07-26 20:11:31,127 epoch 46 - iter 96/243 - loss 0.24604064 - time (sec): 178.24 - samples/sec: 174.01 - lr: 0.000005
|
1027 |
-
2023-07-26 20:12:15,792 epoch 46 - iter 120/243 - loss 0.24783294 - time (sec): 222.91 - samples/sec: 174.51 - lr: 0.000005
|
1028 |
-
2023-07-26 20:13:00,505 epoch 46 - iter 144/243 - loss 0.24973562 - time (sec): 267.62 - samples/sec: 174.34 - lr: 0.000005
|
1029 |
-
2023-07-26 20:13:45,181 epoch 46 - iter 168/243 - loss 0.24967162 - time (sec): 312.29 - samples/sec: 173.97 - lr: 0.000005
|
1030 |
-
2023-07-26 20:14:30,156 epoch 46 - iter 192/243 - loss 0.25131667 - time (sec): 357.27 - samples/sec: 173.94 - lr: 0.000005
|
1031 |
-
2023-07-26 20:15:14,977 epoch 46 - iter 216/243 - loss 0.25004815 - time (sec): 402.09 - samples/sec: 174.06 - lr: 0.000005
|
1032 |
-
2023-07-26 20:15:59,586 epoch 46 - iter 240/243 - loss 0.24797003 - time (sec): 446.70 - samples/sec: 174.19 - lr: 0.000005
|
1033 |
-
2023-07-26 20:16:04,601 ----------------------------------------------------------------------------------------------------
|
1034 |
-
2023-07-26 20:16:04,602 EPOCH 46 done: loss 0.2475 - lr 0.000005
|
1035 |
-
2023-07-26 20:16:06,359 Evaluating as a multi-label problem: False
|
1036 |
-
2023-07-26 20:16:06,401 DEV : loss 0.2502936124801636 - f1-score (micro avg) 0.9796
|
1037 |
-
2023-07-26 20:16:06,411 ----------------------------------------------------------------------------------------------------
|
1038 |
-
2023-07-26 20:16:50,970 epoch 47 - iter 24/243 - loss 0.24652539 - time (sec): 44.56 - samples/sec: 177.11 - lr: 0.000004
|
1039 |
-
2023-07-26 20:17:35,687 epoch 47 - iter 48/243 - loss 0.25432254 - time (sec): 89.28 - samples/sec: 178.43 - lr: 0.000004
|
1040 |
-
2023-07-26 20:18:20,313 epoch 47 - iter 72/243 - loss 0.24907829 - time (sec): 133.90 - samples/sec: 178.67 - lr: 0.000004
|
1041 |
-
2023-07-26 20:19:04,573 epoch 47 - iter 96/243 - loss 0.25143514 - time (sec): 178.16 - samples/sec: 175.41 - lr: 0.000004
|
1042 |
-
2023-07-26 20:19:49,067 epoch 47 - iter 120/243 - loss 0.25195942 - time (sec): 222.66 - samples/sec: 174.82 - lr: 0.000004
|
1043 |
-
2023-07-26 20:20:33,729 epoch 47 - iter 144/243 - loss 0.25140692 - time (sec): 267.32 - samples/sec: 175.12 - lr: 0.000004
|
1044 |
-
2023-07-26 20:21:18,294 epoch 47 - iter 168/243 - loss 0.25098133 - time (sec): 311.88 - samples/sec: 175.27 - lr: 0.000004
|
1045 |
-
2023-07-26 20:22:02,731 epoch 47 - iter 192/243 - loss 0.24903435 - time (sec): 356.32 - samples/sec: 174.38 - lr: 0.000004
|
1046 |
-
2023-07-26 20:22:47,241 epoch 47 - iter 216/243 - loss 0.24707558 - time (sec): 400.83 - samples/sec: 174.35 - lr: 0.000004
|
1047 |
-
2023-07-26 20:23:31,808 epoch 47 - iter 240/243 - loss 0.24996260 - time (sec): 445.40 - samples/sec: 174.50 - lr: 0.000003
|
1048 |
-
2023-07-26 20:23:36,885 ----------------------------------------------------------------------------------------------------
|
1049 |
-
2023-07-26 20:23:36,885 EPOCH 47 done: loss 0.2500 - lr 0.000003
|
1050 |
-
2023-07-26 20:23:38,718 Evaluating as a multi-label problem: False
|
1051 |
-
2023-07-26 20:23:38,760 DEV : loss 0.25260353088378906 - f1-score (micro avg) 0.9788
|
1052 |
-
2023-07-26 20:23:38,770 ----------------------------------------------------------------------------------------------------
|
1053 |
-
2023-07-26 20:24:23,284 epoch 48 - iter 24/243 - loss 0.26092477 - time (sec): 44.51 - samples/sec: 173.72 - lr: 0.000003
|
1054 |
-
2023-07-26 20:25:07,731 epoch 48 - iter 48/243 - loss 0.26380496 - time (sec): 88.96 - samples/sec: 172.51 - lr: 0.000003
|
1055 |
-
2023-07-26 20:25:52,549 epoch 48 - iter 72/243 - loss 0.26586966 - time (sec): 133.78 - samples/sec: 175.68 - lr: 0.000003
|
1056 |
-
2023-07-26 20:26:37,081 epoch 48 - iter 96/243 - loss 0.26118560 - time (sec): 178.31 - samples/sec: 175.37 - lr: 0.000003
|
1057 |
-
2023-07-26 20:27:21,769 epoch 48 - iter 120/243 - loss 0.25715945 - time (sec): 223.00 - samples/sec: 176.11 - lr: 0.000003
|
1058 |
-
2023-07-26 20:28:06,589 epoch 48 - iter 144/243 - loss 0.25935501 - time (sec): 267.82 - samples/sec: 176.32 - lr: 0.000003
|
1059 |
-
2023-07-26 20:28:51,230 epoch 48 - iter 168/243 - loss 0.25807126 - time (sec): 312.46 - samples/sec: 175.36 - lr: 0.000003
|
1060 |
-
2023-07-26 20:29:35,872 epoch 48 - iter 192/243 - loss 0.25819322 - time (sec): 357.10 - samples/sec: 174.73 - lr: 0.000003
|
1061 |
-
2023-07-26 20:30:20,621 epoch 48 - iter 216/243 - loss 0.25780077 - time (sec): 401.85 - samples/sec: 174.84 - lr: 0.000002
|
1062 |
-
2023-07-26 20:31:05,115 epoch 48 - iter 240/243 - loss 0.25669533 - time (sec): 446.34 - samples/sec: 174.17 - lr: 0.000002
|
1063 |
-
2023-07-26 20:31:10,189 ----------------------------------------------------------------------------------------------------
|
1064 |
-
2023-07-26 20:31:10,189 EPOCH 48 done: loss 0.2562 - lr 0.000002
|
1065 |
-
2023-07-26 20:31:11,946 Evaluating as a multi-label problem: False
|
1066 |
-
2023-07-26 20:31:11,989 DEV : loss 0.2517630159854889 - f1-score (micro avg) 0.9793
|
1067 |
-
2023-07-26 20:31:11,998 ----------------------------------------------------------------------------------------------------
|
1068 |
-
2023-07-26 20:31:56,576 epoch 49 - iter 24/243 - loss 0.27952006 - time (sec): 44.58 - samples/sec: 171.79 - lr: 0.000002
|
1069 |
-
2023-07-26 20:32:41,285 epoch 49 - iter 48/243 - loss 0.26483505 - time (sec): 89.29 - samples/sec: 172.32 - lr: 0.000002
|
1070 |
-
2023-07-26 20:33:25,782 epoch 49 - iter 72/243 - loss 0.25971199 - time (sec): 133.78 - samples/sec: 171.90 - lr: 0.000002
|
1071 |
-
2023-07-26 20:34:10,460 epoch 49 - iter 96/243 - loss 0.25971123 - time (sec): 178.46 - samples/sec: 173.31 - lr: 0.000002
|
1072 |
-
2023-07-26 20:34:55,145 epoch 49 - iter 120/243 - loss 0.25121870 - time (sec): 223.15 - samples/sec: 174.45 - lr: 0.000002
|
1073 |
-
2023-07-26 20:35:39,794 epoch 49 - iter 144/243 - loss 0.24985456 - time (sec): 267.80 - samples/sec: 174.14 - lr: 0.000002
|
1074 |
-
2023-07-26 20:36:24,454 epoch 49 - iter 168/243 - loss 0.25019492 - time (sec): 312.46 - samples/sec: 173.74 - lr: 0.000002
|
1075 |
-
2023-07-26 20:37:09,180 epoch 49 - iter 192/243 - loss 0.24964407 - time (sec): 357.18 - samples/sec: 174.05 - lr: 0.000001
|
1076 |
-
2023-07-26 20:37:53,667 epoch 49 - iter 216/243 - loss 0.24966262 - time (sec): 401.67 - samples/sec: 173.91 - lr: 0.000001
|
1077 |
-
2023-07-26 20:38:38,222 epoch 49 - iter 240/243 - loss 0.24839303 - time (sec): 446.22 - samples/sec: 173.82 - lr: 0.000001
|
1078 |
-
2023-07-26 20:38:43,407 ----------------------------------------------------------------------------------------------------
|
1079 |
-
2023-07-26 20:38:43,407 EPOCH 49 done: loss 0.2480 - lr 0.000001
|
1080 |
-
2023-07-26 20:38:45,164 Evaluating as a multi-label problem: False
|
1081 |
-
2023-07-26 20:38:45,206 DEV : loss 0.25181668996810913 - f1-score (micro avg) 0.9786
|
1082 |
-
2023-07-26 20:38:45,216 ----------------------------------------------------------------------------------------------------
|
1083 |
-
2023-07-26 20:39:30,103 epoch 50 - iter 24/243 - loss 0.26114983 - time (sec): 44.89 - samples/sec: 184.97 - lr: 0.000001
|
1084 |
-
2023-07-26 20:40:14,469 epoch 50 - iter 48/243 - loss 0.24629344 - time (sec): 89.25 - samples/sec: 177.23 - lr: 0.000001
|
1085 |
-
2023-07-26 20:40:58,962 epoch 50 - iter 72/243 - loss 0.24771674 - time (sec): 133.75 - samples/sec: 176.12 - lr: 0.000001
|
1086 |
-
2023-07-26 20:41:43,633 epoch 50 - iter 96/243 - loss 0.24705085 - time (sec): 178.42 - samples/sec: 176.67 - lr: 0.000001
|
1087 |
-
2023-07-26 20:42:28,058 epoch 50 - iter 120/243 - loss 0.24435267 - time (sec): 222.84 - samples/sec: 175.63 - lr: 0.000001
|
1088 |
-
2023-07-26 20:43:12,552 epoch 50 - iter 144/243 - loss 0.24537610 - time (sec): 267.34 - samples/sec: 175.26 - lr: 0.000001
|
1089 |
-
2023-07-26 20:43:57,183 epoch 50 - iter 168/243 - loss 0.24725247 - time (sec): 311.97 - samples/sec: 175.35 - lr: 0.000000
|
1090 |
-
2023-07-26 20:44:42,166 epoch 50 - iter 192/243 - loss 0.24773009 - time (sec): 356.95 - samples/sec: 174.58 - lr: 0.000000
|
1091 |
-
2023-07-26 20:45:27,096 epoch 50 - iter 216/243 - loss 0.24906212 - time (sec): 401.88 - samples/sec: 173.96 - lr: 0.000000
|
1092 |
-
2023-07-26 20:46:12,548 epoch 50 - iter 240/243 - loss 0.24977353 - time (sec): 447.33 - samples/sec: 173.87 - lr: 0.000000
|
1093 |
-
2023-07-26 20:46:17,709 ----------------------------------------------------------------------------------------------------
|
1094 |
-
2023-07-26 20:46:17,709 EPOCH 50 done: loss 0.2503 - lr 0.000000
|
1095 |
-
2023-07-26 20:46:19,451 Evaluating as a multi-label problem: False
|
1096 |
-
2023-07-26 20:46:19,493 DEV : loss 0.2513697147369385 - f1-score (micro avg) 0.9784
|
1097 |
-
2023-07-26 20:46:22,002 Test data not provided setting final score to 0
|
|
|
1 |
+
2023-08-17 13:20:08,970 ----------------------------------------------------------------------------------------------------
|
2 |
+
2023-08-17 13:20:08,977 Model: "SequenceTagger(
|
3 |
(embeddings): TransformerWordEmbeddings(
|
4 |
(model): XLMRobertaModel(
|
5 |
(embeddings): XLMRobertaEmbeddings(
|
|
|
313 |
(loss_function): ViterbiLoss()
|
314 |
(crf): CRF()
|
315 |
)"
|
316 |
+
2023-08-17 13:20:08,995 ----------------------------------------------------------------------------------------------------
|
317 |
+
2023-08-17 13:20:08,996 Corpus: "Corpus: 7767 train + 409 dev + 0 test sentences"
|
318 |
+
2023-08-17 13:20:08,997 ----------------------------------------------------------------------------------------------------
|
319 |
+
2023-08-17 13:20:08,997 Parameters:
|
320 |
+
2023-08-17 13:20:08,997 - learning_rate: "0.000050"
|
321 |
+
2023-08-17 13:20:08,998 - mini_batch_size: "32"
|
322 |
+
2023-08-17 13:20:08,998 - patience: "3"
|
323 |
+
2023-08-17 13:20:08,998 - anneal_factor: "0.5"
|
324 |
+
2023-08-17 13:20:08,999 - max_epochs: "2"
|
325 |
+
2023-08-17 13:20:08,999 - shuffle: "True"
|
326 |
+
2023-08-17 13:20:09,000 - train_with_dev: "False"
|
327 |
+
2023-08-17 13:20:09,000 - batch_growth_annealing: "False"
|
328 |
+
2023-08-17 13:20:09,000 ----------------------------------------------------------------------------------------------------
|
329 |
+
2023-08-17 13:20:09,001 Model training base path: "/scratch/skulick/ppchy-11-pos/xlmb-ck05-yid1/split_final/train"
|
330 |
+
2023-08-17 13:20:09,001 ----------------------------------------------------------------------------------------------------
|
331 |
+
2023-08-17 13:20:09,001 Device: cuda:0
|
332 |
+
2023-08-17 13:20:09,002 ----------------------------------------------------------------------------------------------------
|
333 |
+
2023-08-17 13:20:09,002 Embeddings storage mode: none
|
334 |
+
2023-08-17 13:20:09,002 ----------------------------------------------------------------------------------------------------
|
335 |
+
2023-08-17 13:21:05,834 epoch 1 - iter 24/243 - loss 5.52841502 - time (sec): 56.83 - samples/sec: 131.44 - lr: 0.000025
|
336 |
+
2023-08-17 13:22:03,318 epoch 1 - iter 48/243 - loss 4.70686211 - time (sec): 114.32 - samples/sec: 130.45 - lr: 0.000050
|
337 |
+
2023-08-17 13:23:00,549 epoch 1 - iter 72/243 - loss 3.86110162 - time (sec): 171.55 - samples/sec: 131.94 - lr: 0.000047
|
338 |
+
2023-08-17 13:23:57,695 epoch 1 - iter 96/243 - loss 3.22106003 - time (sec): 228.69 - samples/sec: 132.37 - lr: 0.000045
|
339 |
+
2023-08-17 13:24:55,039 epoch 1 - iter 120/243 - loss 2.77518007 - time (sec): 286.04 - samples/sec: 132.92 - lr: 0.000042
|
340 |
+
2023-08-17 13:25:52,345 epoch 1 - iter 144/243 - loss 2.46009763 - time (sec): 343.34 - samples/sec: 133.06 - lr: 0.000039
|
341 |
+
2023-08-17 13:26:49,831 epoch 1 - iter 168/243 - loss 2.21288400 - time (sec): 400.83 - samples/sec: 134.04 - lr: 0.000036
|
342 |
+
2023-08-17 13:27:47,964 epoch 1 - iter 192/243 - loss 2.01670410 - time (sec): 458.96 - samples/sec: 134.63 - lr: 0.000034
|
343 |
+
2023-08-17 13:28:45,494 epoch 1 - iter 216/243 - loss 1.86783335 - time (sec): 516.49 - samples/sec: 134.47 - lr: 0.000031
|
344 |
+
2023-08-17 13:29:43,119 epoch 1 - iter 240/243 - loss 1.74523925 - time (sec): 574.12 - samples/sec: 135.25 - lr: 0.000028
|
345 |
+
2023-08-17 13:29:50,011 ----------------------------------------------------------------------------------------------------
|
346 |
+
2023-08-17 13:29:50,011 EPOCH 1 done: loss 1.7334 - lr 0.000028
|
347 |
+
2023-08-17 13:29:52,277 Evaluating as a multi-label problem: False
|
348 |
+
2023-08-17 13:29:52,376 DEV : loss 0.3509514629840851 - f1-score (micro avg) 0.9331
|
349 |
+
2023-08-17 13:29:52,410 saving best model
|
350 |
+
2023-08-17 13:29:54,774 ----------------------------------------------------------------------------------------------------
|
351 |
+
2023-08-17 13:30:44,972 epoch 2 - iter 24/243 - loss 0.58877620 - time (sec): 50.20 - samples/sec: 152.66 - lr: 0.000025
|
352 |
+
2023-08-17 13:31:36,455 epoch 2 - iter 48/243 - loss 0.60804646 - time (sec): 101.68 - samples/sec: 152.75 - lr: 0.000022
|
353 |
+
2023-08-17 13:32:27,132 epoch 2 - iter 72/243 - loss 0.60136722 - time (sec): 152.36 - samples/sec: 153.64 - lr: 0.000020
|
354 |
+
2023-08-17 13:33:17,902 epoch 2 - iter 96/243 - loss 0.59255541 - time (sec): 203.13 - samples/sec: 154.55 - lr: 0.000017
|
355 |
+
2023-08-17 13:34:08,949 epoch 2 - iter 120/243 - loss 0.58957421 - time (sec): 254.17 - samples/sec: 154.79 - lr: 0.000014
|
356 |
+
2023-08-17 13:35:00,256 epoch 2 - iter 144/243 - loss 0.58878210 - time (sec): 305.48 - samples/sec: 154.48 - lr: 0.000011
|
357 |
+
2023-08-17 13:35:51,214 epoch 2 - iter 168/243 - loss 0.58168957 - time (sec): 356.44 - samples/sec: 153.84 - lr: 0.000009
|
358 |
+
2023-08-17 13:36:42,167 epoch 2 - iter 192/243 - loss 0.57403444 - time (sec): 407.39 - samples/sec: 153.55 - lr: 0.000006
|
359 |
+
2023-08-17 13:37:32,761 epoch 2 - iter 216/243 - loss 0.57331317 - time (sec): 457.99 - samples/sec: 152.68 - lr: 0.000003
|
360 |
+
2023-08-17 13:38:23,745 epoch 2 - iter 240/243 - loss 0.56849021 - time (sec): 508.97 - samples/sec: 152.71 - lr: 0.000000
|
361 |
+
2023-08-17 13:38:29,500 ----------------------------------------------------------------------------------------------------
|
362 |
+
2023-08-17 13:38:29,500 EPOCH 2 done: loss 0.5679 - lr 0.000000
|
363 |
+
2023-08-17 13:38:31,769 Evaluating as a multi-label problem: False
|
364 |
+
2023-08-17 13:38:31,868 DEV : loss 0.23018118739128113 - f1-score (micro avg) 0.9562
|
365 |
+
2023-08-17 13:38:31,902 saving best model
|
366 |
+
2023-08-17 13:38:37,560 Test data not provided setting final score to 0
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|